{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.12655344351919814, "eval_steps": 500, "global_step": 45000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.8122987448710702e-05, "grad_norm": 26.75, "learning_rate": 1.999999999648735e-05, "loss": 9.4046, "step": 10 }, { "epoch": 5.6245974897421405e-05, "grad_norm": 30.25, "learning_rate": 1.9999999984344847e-05, "loss": 9.1867, "step": 20 }, { "epoch": 8.436896234613211e-05, "grad_norm": 30.875, "learning_rate": 1.999999996352913e-05, "loss": 8.6504, "step": 30 }, { "epoch": 0.00011249194979484281, "grad_norm": 31.5, "learning_rate": 1.9999999934040198e-05, "loss": 9.4269, "step": 40 }, { "epoch": 0.0001406149372435535, "grad_norm": 62.5, "learning_rate": 1.999999989587805e-05, "loss": 9.6804, "step": 50 }, { "epoch": 0.00016873792469226421, "grad_norm": 30.375, "learning_rate": 1.9999999849042684e-05, "loss": 9.0471, "step": 60 }, { "epoch": 0.0001968609121409749, "grad_norm": 27.375, "learning_rate": 1.9999999793534105e-05, "loss": 8.7142, "step": 70 }, { "epoch": 0.00022498389958968562, "grad_norm": 38.75, "learning_rate": 1.999999972935231e-05, "loss": 8.8189, "step": 80 }, { "epoch": 0.0002531068870383963, "grad_norm": 31.5, "learning_rate": 1.99999996564973e-05, "loss": 9.1094, "step": 90 }, { "epoch": 0.000281229874487107, "grad_norm": 22.375, "learning_rate": 1.9999999574969077e-05, "loss": 8.7019, "step": 100 }, { "epoch": 0.00030935286193581774, "grad_norm": 29.625, "learning_rate": 1.9999999484767634e-05, "loss": 9.7579, "step": 110 }, { "epoch": 0.00033747584938452843, "grad_norm": 29.75, "learning_rate": 1.999999938589298e-05, "loss": 8.6248, "step": 120 }, { "epoch": 0.0003655988368332391, "grad_norm": 31.875, "learning_rate": 1.9999999278345105e-05, "loss": 8.2482, "step": 130 }, { "epoch": 0.0003937218242819498, "grad_norm": 35.5, "learning_rate": 1.999999916212402e-05, "loss": 8.621, "step": 140 }, { "epoch": 0.00042184481173066055, "grad_norm": 37.75, "learning_rate": 1.999999903722972e-05, "loss": 9.3644, "step": 150 }, { "epoch": 0.00044996779917937124, "grad_norm": 50.75, "learning_rate": 1.9999998903662202e-05, "loss": 8.8872, "step": 160 }, { "epoch": 0.0004780907866280819, "grad_norm": 27.625, "learning_rate": 1.9999998761421473e-05, "loss": 9.3207, "step": 170 }, { "epoch": 0.0005062137740767926, "grad_norm": 45.5, "learning_rate": 1.999999861050753e-05, "loss": 8.9391, "step": 180 }, { "epoch": 0.0005343367615255033, "grad_norm": 41.75, "learning_rate": 1.999999845092037e-05, "loss": 7.9287, "step": 190 }, { "epoch": 0.000562459748974214, "grad_norm": 46.0, "learning_rate": 1.9999998282659996e-05, "loss": 8.0963, "step": 200 }, { "epoch": 0.0005905827364229248, "grad_norm": 33.75, "learning_rate": 1.999999810572641e-05, "loss": 8.2999, "step": 210 }, { "epoch": 0.0006187057238716355, "grad_norm": 24.625, "learning_rate": 1.999999792011961e-05, "loss": 9.307, "step": 220 }, { "epoch": 0.0006468287113203462, "grad_norm": 63.0, "learning_rate": 1.9999997725839592e-05, "loss": 9.2622, "step": 230 }, { "epoch": 0.0006749516987690569, "grad_norm": 27.875, "learning_rate": 1.9999997522886364e-05, "loss": 8.0718, "step": 240 }, { "epoch": 0.0007030746862177675, "grad_norm": 61.5, "learning_rate": 1.9999997311259922e-05, "loss": 8.9239, "step": 250 }, { "epoch": 0.0007311976736664782, "grad_norm": 24.125, "learning_rate": 1.9999997090960266e-05, "loss": 8.4316, "step": 260 }, { "epoch": 0.0007593206611151889, "grad_norm": 36.75, "learning_rate": 1.9999996861987396e-05, "loss": 7.9428, "step": 270 }, { "epoch": 0.0007874436485638996, "grad_norm": 62.75, "learning_rate": 1.9999996624341315e-05, "loss": 8.4613, "step": 280 }, { "epoch": 0.0008155666360126103, "grad_norm": 26.75, "learning_rate": 1.9999996378022023e-05, "loss": 9.8768, "step": 290 }, { "epoch": 0.0008436896234613211, "grad_norm": 26.125, "learning_rate": 1.9999996123029514e-05, "loss": 8.7369, "step": 300 }, { "epoch": 0.0008718126109100318, "grad_norm": 53.5, "learning_rate": 1.9999995859363797e-05, "loss": 8.1794, "step": 310 }, { "epoch": 0.0008999355983587425, "grad_norm": 63.75, "learning_rate": 1.9999995587024866e-05, "loss": 8.6823, "step": 320 }, { "epoch": 0.0009280585858074532, "grad_norm": 47.5, "learning_rate": 1.9999995306012724e-05, "loss": 8.809, "step": 330 }, { "epoch": 0.0009561815732561639, "grad_norm": 27.125, "learning_rate": 1.9999995016327368e-05, "loss": 8.9519, "step": 340 }, { "epoch": 0.0009843045607048745, "grad_norm": 46.0, "learning_rate": 1.99999947179688e-05, "loss": 9.0159, "step": 350 }, { "epoch": 0.0010124275481535852, "grad_norm": 44.5, "learning_rate": 1.9999994410937027e-05, "loss": 8.0437, "step": 360 }, { "epoch": 0.001040550535602296, "grad_norm": 58.25, "learning_rate": 1.9999994095232038e-05, "loss": 7.9795, "step": 370 }, { "epoch": 0.0010686735230510066, "grad_norm": 46.75, "learning_rate": 1.9999993770853842e-05, "loss": 8.9127, "step": 380 }, { "epoch": 0.0010967965104997173, "grad_norm": 43.25, "learning_rate": 1.9999993437802436e-05, "loss": 8.5466, "step": 390 }, { "epoch": 0.001124919497948428, "grad_norm": 22.375, "learning_rate": 1.9999993096077818e-05, "loss": 8.608, "step": 400 }, { "epoch": 0.0011530424853971387, "grad_norm": 32.5, "learning_rate": 1.999999274567999e-05, "loss": 8.3159, "step": 410 }, { "epoch": 0.0011811654728458496, "grad_norm": 36.0, "learning_rate": 1.9999992386608954e-05, "loss": 8.198, "step": 420 }, { "epoch": 0.0012092884602945603, "grad_norm": 35.75, "learning_rate": 1.9999992018864708e-05, "loss": 8.959, "step": 430 }, { "epoch": 0.001237411447743271, "grad_norm": 28.25, "learning_rate": 1.9999991642447254e-05, "loss": 8.2303, "step": 440 }, { "epoch": 0.0012655344351919816, "grad_norm": 27.375, "learning_rate": 1.999999125735659e-05, "loss": 8.5069, "step": 450 }, { "epoch": 0.0012936574226406923, "grad_norm": 39.75, "learning_rate": 1.999999086359272e-05, "loss": 7.9837, "step": 460 }, { "epoch": 0.001321780410089403, "grad_norm": 52.0, "learning_rate": 1.999999046115564e-05, "loss": 8.5483, "step": 470 }, { "epoch": 0.0013499033975381137, "grad_norm": 41.75, "learning_rate": 1.9999990050045354e-05, "loss": 8.9188, "step": 480 }, { "epoch": 0.0013780263849868244, "grad_norm": 27.375, "learning_rate": 1.9999989630261863e-05, "loss": 8.6146, "step": 490 }, { "epoch": 0.001406149372435535, "grad_norm": 29.0, "learning_rate": 1.9999989201805164e-05, "loss": 8.4098, "step": 500 }, { "epoch": 0.0014342723598842458, "grad_norm": 24.25, "learning_rate": 1.999998876467526e-05, "loss": 8.5872, "step": 510 }, { "epoch": 0.0014623953473329565, "grad_norm": 26.375, "learning_rate": 1.9999988318872145e-05, "loss": 8.7304, "step": 520 }, { "epoch": 0.0014905183347816672, "grad_norm": 22.75, "learning_rate": 1.999998786439583e-05, "loss": 8.2715, "step": 530 }, { "epoch": 0.0015186413222303778, "grad_norm": 32.25, "learning_rate": 1.9999987401246306e-05, "loss": 8.472, "step": 540 }, { "epoch": 0.0015467643096790885, "grad_norm": 21.75, "learning_rate": 1.999998692942358e-05, "loss": 7.9425, "step": 550 }, { "epoch": 0.0015748872971277992, "grad_norm": 37.75, "learning_rate": 1.9999986448927652e-05, "loss": 9.0206, "step": 560 }, { "epoch": 0.00160301028457651, "grad_norm": 39.5, "learning_rate": 1.9999985959758515e-05, "loss": 8.0803, "step": 570 }, { "epoch": 0.0016311332720252206, "grad_norm": 37.25, "learning_rate": 1.9999985461916178e-05, "loss": 8.3754, "step": 580 }, { "epoch": 0.0016592562594739315, "grad_norm": 24.25, "learning_rate": 1.9999984955400636e-05, "loss": 9.2416, "step": 590 }, { "epoch": 0.0016873792469226422, "grad_norm": 42.5, "learning_rate": 1.9999984440211894e-05, "loss": 8.3339, "step": 600 }, { "epoch": 0.0017155022343713529, "grad_norm": 35.25, "learning_rate": 1.9999983916349948e-05, "loss": 8.1455, "step": 610 }, { "epoch": 0.0017436252218200636, "grad_norm": 51.0, "learning_rate": 1.99999833838148e-05, "loss": 7.6625, "step": 620 }, { "epoch": 0.0017717482092687743, "grad_norm": 34.5, "learning_rate": 1.999998284260645e-05, "loss": 8.1291, "step": 630 }, { "epoch": 0.001799871196717485, "grad_norm": 56.5, "learning_rate": 1.9999982292724902e-05, "loss": 8.5348, "step": 640 }, { "epoch": 0.0018279941841661956, "grad_norm": 27.5, "learning_rate": 1.9999981734170154e-05, "loss": 7.475, "step": 650 }, { "epoch": 0.0018561171716149063, "grad_norm": 25.0, "learning_rate": 1.9999981166942204e-05, "loss": 9.086, "step": 660 }, { "epoch": 0.001884240159063617, "grad_norm": 47.25, "learning_rate": 1.9999980591041054e-05, "loss": 8.1769, "step": 670 }, { "epoch": 0.0019123631465123277, "grad_norm": 44.5, "learning_rate": 1.999998000646671e-05, "loss": 8.5343, "step": 680 }, { "epoch": 0.0019404861339610384, "grad_norm": 26.375, "learning_rate": 1.9999979413219163e-05, "loss": 8.3249, "step": 690 }, { "epoch": 0.001968609121409749, "grad_norm": 29.5, "learning_rate": 1.999997881129842e-05, "loss": 8.9132, "step": 700 }, { "epoch": 0.0019967321088584598, "grad_norm": 31.25, "learning_rate": 1.999997820070448e-05, "loss": 8.0182, "step": 710 }, { "epoch": 0.0020248550963071705, "grad_norm": 29.625, "learning_rate": 1.9999977581437342e-05, "loss": 8.1667, "step": 720 }, { "epoch": 0.002052978083755881, "grad_norm": 27.5, "learning_rate": 1.9999976953497012e-05, "loss": 8.2431, "step": 730 }, { "epoch": 0.002081101071204592, "grad_norm": 68.5, "learning_rate": 1.999997631688348e-05, "loss": 9.0525, "step": 740 }, { "epoch": 0.0021092240586533025, "grad_norm": 30.375, "learning_rate": 1.9999975671596757e-05, "loss": 7.8491, "step": 750 }, { "epoch": 0.002137347046102013, "grad_norm": 31.5, "learning_rate": 1.9999975017636835e-05, "loss": 8.1601, "step": 760 }, { "epoch": 0.002165470033550724, "grad_norm": 38.25, "learning_rate": 1.9999974355003726e-05, "loss": 8.5655, "step": 770 }, { "epoch": 0.0021935930209994346, "grad_norm": 51.25, "learning_rate": 1.999997368369742e-05, "loss": 8.4149, "step": 780 }, { "epoch": 0.0022217160084481453, "grad_norm": 33.25, "learning_rate": 1.999997300371792e-05, "loss": 8.2946, "step": 790 }, { "epoch": 0.002249838995896856, "grad_norm": 34.0, "learning_rate": 1.999997231506523e-05, "loss": 7.6409, "step": 800 }, { "epoch": 0.0022779619833455667, "grad_norm": 27.25, "learning_rate": 1.999997161773935e-05, "loss": 8.7731, "step": 810 }, { "epoch": 0.0023060849707942773, "grad_norm": 25.625, "learning_rate": 1.9999970911740277e-05, "loss": 8.0884, "step": 820 }, { "epoch": 0.002334207958242988, "grad_norm": 45.0, "learning_rate": 1.9999970197068012e-05, "loss": 8.8899, "step": 830 }, { "epoch": 0.002362330945691699, "grad_norm": 42.0, "learning_rate": 1.999996947372256e-05, "loss": 8.7895, "step": 840 }, { "epoch": 0.00239045393314041, "grad_norm": 33.5, "learning_rate": 1.9999968741703917e-05, "loss": 8.2149, "step": 850 }, { "epoch": 0.0024185769205891205, "grad_norm": 30.75, "learning_rate": 1.9999968001012083e-05, "loss": 8.2464, "step": 860 }, { "epoch": 0.0024466999080378312, "grad_norm": 37.75, "learning_rate": 1.9999967251647066e-05, "loss": 8.2323, "step": 870 }, { "epoch": 0.002474822895486542, "grad_norm": 34.5, "learning_rate": 1.999996649360886e-05, "loss": 8.3075, "step": 880 }, { "epoch": 0.0025029458829352526, "grad_norm": 42.75, "learning_rate": 1.9999965726897467e-05, "loss": 7.8954, "step": 890 }, { "epoch": 0.0025310688703839633, "grad_norm": 22.75, "learning_rate": 1.999996495151289e-05, "loss": 8.7934, "step": 900 }, { "epoch": 0.002559191857832674, "grad_norm": 28.5, "learning_rate": 1.9999964167455127e-05, "loss": 7.965, "step": 910 }, { "epoch": 0.0025873148452813847, "grad_norm": 25.25, "learning_rate": 1.999996337472418e-05, "loss": 8.4655, "step": 920 }, { "epoch": 0.0026154378327300954, "grad_norm": 28.375, "learning_rate": 1.9999962573320045e-05, "loss": 7.8498, "step": 930 }, { "epoch": 0.002643560820178806, "grad_norm": 45.25, "learning_rate": 1.9999961763242732e-05, "loss": 7.3685, "step": 940 }, { "epoch": 0.0026716838076275167, "grad_norm": 39.25, "learning_rate": 1.9999960944492235e-05, "loss": 8.991, "step": 950 }, { "epoch": 0.0026998067950762274, "grad_norm": 33.5, "learning_rate": 1.9999960117068555e-05, "loss": 8.6423, "step": 960 }, { "epoch": 0.002727929782524938, "grad_norm": 37.0, "learning_rate": 1.9999959280971697e-05, "loss": 8.3628, "step": 970 }, { "epoch": 0.002756052769973649, "grad_norm": 29.25, "learning_rate": 1.9999958436201656e-05, "loss": 8.3249, "step": 980 }, { "epoch": 0.0027841757574223595, "grad_norm": 29.125, "learning_rate": 1.9999957582758435e-05, "loss": 8.2939, "step": 990 }, { "epoch": 0.00281229874487107, "grad_norm": 40.5, "learning_rate": 1.9999956720642036e-05, "loss": 8.7847, "step": 1000 }, { "epoch": 0.002840421732319781, "grad_norm": 25.0, "learning_rate": 1.999995584985246e-05, "loss": 8.669, "step": 1010 }, { "epoch": 0.0028685447197684916, "grad_norm": 37.0, "learning_rate": 1.9999954970389705e-05, "loss": 8.0318, "step": 1020 }, { "epoch": 0.0028966677072172022, "grad_norm": 27.125, "learning_rate": 1.9999954082253773e-05, "loss": 8.5188, "step": 1030 }, { "epoch": 0.002924790694665913, "grad_norm": 32.5, "learning_rate": 1.9999953185444667e-05, "loss": 9.3141, "step": 1040 }, { "epoch": 0.0029529136821146236, "grad_norm": 46.75, "learning_rate": 1.9999952279962387e-05, "loss": 8.1453, "step": 1050 }, { "epoch": 0.0029810366695633343, "grad_norm": 53.5, "learning_rate": 1.999995136580693e-05, "loss": 8.3799, "step": 1060 }, { "epoch": 0.003009159657012045, "grad_norm": 43.75, "learning_rate": 1.99999504429783e-05, "loss": 7.6179, "step": 1070 }, { "epoch": 0.0030372826444607557, "grad_norm": 44.5, "learning_rate": 1.99999495114765e-05, "loss": 7.5147, "step": 1080 }, { "epoch": 0.0030654056319094664, "grad_norm": 32.75, "learning_rate": 1.999994857130153e-05, "loss": 8.6252, "step": 1090 }, { "epoch": 0.003093528619358177, "grad_norm": 32.5, "learning_rate": 1.9999947622453383e-05, "loss": 8.4846, "step": 1100 }, { "epoch": 0.0031216516068068877, "grad_norm": 31.375, "learning_rate": 1.9999946664932065e-05, "loss": 8.3329, "step": 1110 }, { "epoch": 0.0031497745942555984, "grad_norm": 31.5, "learning_rate": 1.9999945698737585e-05, "loss": 7.5005, "step": 1120 }, { "epoch": 0.003177897581704309, "grad_norm": 29.0, "learning_rate": 1.9999944723869934e-05, "loss": 8.5725, "step": 1130 }, { "epoch": 0.00320602056915302, "grad_norm": 42.75, "learning_rate": 1.999994374032911e-05, "loss": 9.1367, "step": 1140 }, { "epoch": 0.0032341435566017305, "grad_norm": 34.75, "learning_rate": 1.9999942748115125e-05, "loss": 8.0121, "step": 1150 }, { "epoch": 0.003262266544050441, "grad_norm": 27.0, "learning_rate": 1.999994174722797e-05, "loss": 8.2077, "step": 1160 }, { "epoch": 0.003290389531499152, "grad_norm": 30.75, "learning_rate": 1.9999940737667652e-05, "loss": 9.0612, "step": 1170 }, { "epoch": 0.003318512518947863, "grad_norm": 36.75, "learning_rate": 1.999993971943417e-05, "loss": 8.223, "step": 1180 }, { "epoch": 0.0033466355063965737, "grad_norm": 39.0, "learning_rate": 1.9999938692527526e-05, "loss": 7.9979, "step": 1190 }, { "epoch": 0.0033747584938452844, "grad_norm": 33.0, "learning_rate": 1.9999937656947718e-05, "loss": 8.5902, "step": 1200 }, { "epoch": 0.003402881481293995, "grad_norm": 21.75, "learning_rate": 1.9999936612694747e-05, "loss": 7.9853, "step": 1210 }, { "epoch": 0.0034310044687427058, "grad_norm": 28.375, "learning_rate": 1.999993555976862e-05, "loss": 8.129, "step": 1220 }, { "epoch": 0.0034591274561914164, "grad_norm": 34.0, "learning_rate": 1.999993449816933e-05, "loss": 7.971, "step": 1230 }, { "epoch": 0.003487250443640127, "grad_norm": 26.875, "learning_rate": 1.999993342789688e-05, "loss": 8.9445, "step": 1240 }, { "epoch": 0.003515373431088838, "grad_norm": 30.75, "learning_rate": 1.9999932348951275e-05, "loss": 7.3252, "step": 1250 }, { "epoch": 0.0035434964185375485, "grad_norm": 34.0, "learning_rate": 1.999993126133251e-05, "loss": 8.2313, "step": 1260 }, { "epoch": 0.003571619405986259, "grad_norm": 40.5, "learning_rate": 1.9999930165040592e-05, "loss": 8.6545, "step": 1270 }, { "epoch": 0.00359974239343497, "grad_norm": 30.875, "learning_rate": 1.9999929060075518e-05, "loss": 7.8905, "step": 1280 }, { "epoch": 0.0036278653808836806, "grad_norm": 27.5, "learning_rate": 1.999992794643729e-05, "loss": 7.475, "step": 1290 }, { "epoch": 0.0036559883683323913, "grad_norm": 30.25, "learning_rate": 1.999992682412591e-05, "loss": 8.5303, "step": 1300 }, { "epoch": 0.003684111355781102, "grad_norm": 31.375, "learning_rate": 1.9999925693141375e-05, "loss": 8.0424, "step": 1310 }, { "epoch": 0.0037122343432298126, "grad_norm": 25.125, "learning_rate": 1.9999924553483688e-05, "loss": 8.4007, "step": 1320 }, { "epoch": 0.0037403573306785233, "grad_norm": 35.5, "learning_rate": 1.9999923405152855e-05, "loss": 8.7585, "step": 1330 }, { "epoch": 0.003768480318127234, "grad_norm": 30.875, "learning_rate": 1.999992224814887e-05, "loss": 8.2205, "step": 1340 }, { "epoch": 0.0037966033055759447, "grad_norm": 36.25, "learning_rate": 1.9999921082471738e-05, "loss": 7.3258, "step": 1350 }, { "epoch": 0.0038247262930246554, "grad_norm": 71.0, "learning_rate": 1.9999919908121458e-05, "loss": 8.4646, "step": 1360 }, { "epoch": 0.003852849280473366, "grad_norm": 23.375, "learning_rate": 1.9999918725098032e-05, "loss": 8.2898, "step": 1370 }, { "epoch": 0.0038809722679220768, "grad_norm": 40.5, "learning_rate": 1.9999917533401466e-05, "loss": 8.3837, "step": 1380 }, { "epoch": 0.0039090952553707875, "grad_norm": 20.75, "learning_rate": 1.999991633303175e-05, "loss": 8.3823, "step": 1390 }, { "epoch": 0.003937218242819498, "grad_norm": 28.5, "learning_rate": 1.9999915123988892e-05, "loss": 7.7302, "step": 1400 }, { "epoch": 0.003965341230268209, "grad_norm": 38.75, "learning_rate": 1.9999913906272894e-05, "loss": 9.2115, "step": 1410 }, { "epoch": 0.0039934642177169195, "grad_norm": 36.75, "learning_rate": 1.999991267988375e-05, "loss": 8.3871, "step": 1420 }, { "epoch": 0.00402158720516563, "grad_norm": 47.0, "learning_rate": 1.9999911444821468e-05, "loss": 8.1022, "step": 1430 }, { "epoch": 0.004049710192614341, "grad_norm": 37.0, "learning_rate": 1.9999910201086054e-05, "loss": 8.6464, "step": 1440 }, { "epoch": 0.004077833180063052, "grad_norm": 35.0, "learning_rate": 1.9999908948677494e-05, "loss": 8.2938, "step": 1450 }, { "epoch": 0.004105956167511762, "grad_norm": 29.0, "learning_rate": 1.99999076875958e-05, "loss": 7.6254, "step": 1460 }, { "epoch": 0.004134079154960473, "grad_norm": 36.0, "learning_rate": 1.999990641784097e-05, "loss": 7.8324, "step": 1470 }, { "epoch": 0.004162202142409184, "grad_norm": 36.0, "learning_rate": 1.999990513941301e-05, "loss": 7.6072, "step": 1480 }, { "epoch": 0.004190325129857894, "grad_norm": 58.0, "learning_rate": 1.999990385231191e-05, "loss": 8.1061, "step": 1490 }, { "epoch": 0.004218448117306605, "grad_norm": 23.5, "learning_rate": 1.9999902556537682e-05, "loss": 8.0443, "step": 1500 }, { "epoch": 0.004246571104755316, "grad_norm": 56.5, "learning_rate": 1.999990125209032e-05, "loss": 7.6882, "step": 1510 }, { "epoch": 0.004274694092204026, "grad_norm": 22.375, "learning_rate": 1.9999899938969834e-05, "loss": 8.3186, "step": 1520 }, { "epoch": 0.004302817079652737, "grad_norm": 37.75, "learning_rate": 1.9999898617176214e-05, "loss": 8.4319, "step": 1530 }, { "epoch": 0.004330940067101448, "grad_norm": 48.75, "learning_rate": 1.9999897286709465e-05, "loss": 7.6039, "step": 1540 }, { "epoch": 0.0043590630545501585, "grad_norm": 32.25, "learning_rate": 1.9999895947569593e-05, "loss": 8.9144, "step": 1550 }, { "epoch": 0.004387186041998869, "grad_norm": 42.0, "learning_rate": 1.9999894599756598e-05, "loss": 7.8979, "step": 1560 }, { "epoch": 0.00441530902944758, "grad_norm": 41.0, "learning_rate": 1.9999893243270474e-05, "loss": 7.9144, "step": 1570 }, { "epoch": 0.0044434320168962906, "grad_norm": 24.75, "learning_rate": 1.999989187811123e-05, "loss": 7.8997, "step": 1580 }, { "epoch": 0.004471555004345001, "grad_norm": 47.75, "learning_rate": 1.999989050427886e-05, "loss": 8.5669, "step": 1590 }, { "epoch": 0.004499677991793712, "grad_norm": 26.75, "learning_rate": 1.999988912177337e-05, "loss": 8.1317, "step": 1600 }, { "epoch": 0.004527800979242423, "grad_norm": 23.625, "learning_rate": 1.9999887730594766e-05, "loss": 8.7139, "step": 1610 }, { "epoch": 0.004555923966691133, "grad_norm": 36.5, "learning_rate": 1.999988633074304e-05, "loss": 8.8209, "step": 1620 }, { "epoch": 0.004584046954139844, "grad_norm": 24.125, "learning_rate": 1.9999884922218195e-05, "loss": 8.7102, "step": 1630 }, { "epoch": 0.004612169941588555, "grad_norm": 32.25, "learning_rate": 1.999988350502024e-05, "loss": 8.3722, "step": 1640 }, { "epoch": 0.004640292929037265, "grad_norm": 23.5, "learning_rate": 1.9999882079149166e-05, "loss": 8.12, "step": 1650 }, { "epoch": 0.004668415916485976, "grad_norm": 38.5, "learning_rate": 1.999988064460498e-05, "loss": 7.867, "step": 1660 }, { "epoch": 0.004696538903934687, "grad_norm": 42.25, "learning_rate": 1.999987920138768e-05, "loss": 8.528, "step": 1670 }, { "epoch": 0.004724661891383398, "grad_norm": 37.5, "learning_rate": 1.999987774949727e-05, "loss": 8.0916, "step": 1680 }, { "epoch": 0.004752784878832109, "grad_norm": 30.625, "learning_rate": 1.9999876288933752e-05, "loss": 7.4676, "step": 1690 }, { "epoch": 0.00478090786628082, "grad_norm": 26.0, "learning_rate": 1.9999874819697128e-05, "loss": 7.8035, "step": 1700 }, { "epoch": 0.00480903085372953, "grad_norm": 22.25, "learning_rate": 1.999987334178739e-05, "loss": 7.8775, "step": 1710 }, { "epoch": 0.004837153841178241, "grad_norm": 25.75, "learning_rate": 1.999987185520455e-05, "loss": 9.0641, "step": 1720 }, { "epoch": 0.004865276828626952, "grad_norm": 67.5, "learning_rate": 1.999987035994861e-05, "loss": 8.6583, "step": 1730 }, { "epoch": 0.0048933998160756624, "grad_norm": 34.5, "learning_rate": 1.999986885601956e-05, "loss": 7.6526, "step": 1740 }, { "epoch": 0.004921522803524373, "grad_norm": 38.0, "learning_rate": 1.999986734341741e-05, "loss": 8.4408, "step": 1750 }, { "epoch": 0.004949645790973084, "grad_norm": 23.875, "learning_rate": 1.999986582214216e-05, "loss": 8.9989, "step": 1760 }, { "epoch": 0.0049777687784217945, "grad_norm": 28.25, "learning_rate": 1.9999864292193813e-05, "loss": 8.1971, "step": 1770 }, { "epoch": 0.005005891765870505, "grad_norm": 29.875, "learning_rate": 1.9999862753572366e-05, "loss": 7.9196, "step": 1780 }, { "epoch": 0.005034014753319216, "grad_norm": 34.75, "learning_rate": 1.9999861206277823e-05, "loss": 7.9966, "step": 1790 }, { "epoch": 0.005062137740767927, "grad_norm": 32.25, "learning_rate": 1.999985965031018e-05, "loss": 8.5606, "step": 1800 }, { "epoch": 0.005090260728216637, "grad_norm": 21.375, "learning_rate": 1.999985808566945e-05, "loss": 8.1658, "step": 1810 }, { "epoch": 0.005118383715665348, "grad_norm": 36.0, "learning_rate": 1.9999856512355628e-05, "loss": 8.6702, "step": 1820 }, { "epoch": 0.005146506703114059, "grad_norm": 38.0, "learning_rate": 1.999985493036871e-05, "loss": 7.7636, "step": 1830 }, { "epoch": 0.005174629690562769, "grad_norm": 27.875, "learning_rate": 1.9999853339708702e-05, "loss": 8.1655, "step": 1840 }, { "epoch": 0.00520275267801148, "grad_norm": 36.25, "learning_rate": 1.999985174037561e-05, "loss": 8.1779, "step": 1850 }, { "epoch": 0.005230875665460191, "grad_norm": 30.75, "learning_rate": 1.9999850132369428e-05, "loss": 8.1862, "step": 1860 }, { "epoch": 0.005258998652908901, "grad_norm": 23.875, "learning_rate": 1.999984851569016e-05, "loss": 8.5442, "step": 1870 }, { "epoch": 0.005287121640357612, "grad_norm": 24.125, "learning_rate": 1.999984689033781e-05, "loss": 8.0296, "step": 1880 }, { "epoch": 0.005315244627806323, "grad_norm": 44.75, "learning_rate": 1.9999845256312374e-05, "loss": 7.9315, "step": 1890 }, { "epoch": 0.0053433676152550335, "grad_norm": 28.25, "learning_rate": 1.9999843613613858e-05, "loss": 8.4237, "step": 1900 }, { "epoch": 0.005371490602703744, "grad_norm": 29.375, "learning_rate": 1.9999841962242262e-05, "loss": 9.3751, "step": 1910 }, { "epoch": 0.005399613590152455, "grad_norm": 40.0, "learning_rate": 1.9999840302197592e-05, "loss": 8.5878, "step": 1920 }, { "epoch": 0.0054277365776011655, "grad_norm": 29.375, "learning_rate": 1.999983863347984e-05, "loss": 7.7537, "step": 1930 }, { "epoch": 0.005455859565049876, "grad_norm": 27.625, "learning_rate": 1.9999836956089015e-05, "loss": 8.2422, "step": 1940 }, { "epoch": 0.005483982552498587, "grad_norm": 45.25, "learning_rate": 1.9999835270025115e-05, "loss": 8.5899, "step": 1950 }, { "epoch": 0.005512105539947298, "grad_norm": 24.75, "learning_rate": 1.999983357528814e-05, "loss": 8.1926, "step": 1960 }, { "epoch": 0.005540228527396008, "grad_norm": 52.0, "learning_rate": 1.9999831871878093e-05, "loss": 8.2475, "step": 1970 }, { "epoch": 0.005568351514844719, "grad_norm": 33.5, "learning_rate": 1.9999830159794978e-05, "loss": 8.4705, "step": 1980 }, { "epoch": 0.00559647450229343, "grad_norm": 33.0, "learning_rate": 1.9999828439038797e-05, "loss": 7.8861, "step": 1990 }, { "epoch": 0.00562459748974214, "grad_norm": 35.75, "learning_rate": 1.9999826709609545e-05, "loss": 8.3294, "step": 2000 }, { "epoch": 0.005652720477190851, "grad_norm": 24.25, "learning_rate": 1.999982497150723e-05, "loss": 7.7923, "step": 2010 }, { "epoch": 0.005680843464639562, "grad_norm": 31.875, "learning_rate": 1.9999823224731855e-05, "loss": 7.5634, "step": 2020 }, { "epoch": 0.005708966452088272, "grad_norm": 48.0, "learning_rate": 1.9999821469283414e-05, "loss": 8.2047, "step": 2030 }, { "epoch": 0.005737089439536983, "grad_norm": 26.875, "learning_rate": 1.999981970516191e-05, "loss": 7.9333, "step": 2040 }, { "epoch": 0.005765212426985694, "grad_norm": 35.75, "learning_rate": 1.999981793236735e-05, "loss": 8.4141, "step": 2050 }, { "epoch": 0.0057933354144344045, "grad_norm": 36.5, "learning_rate": 1.999981615089973e-05, "loss": 8.2208, "step": 2060 }, { "epoch": 0.005821458401883115, "grad_norm": 37.0, "learning_rate": 1.9999814360759057e-05, "loss": 8.2442, "step": 2070 }, { "epoch": 0.005849581389331826, "grad_norm": 33.75, "learning_rate": 1.9999812561945327e-05, "loss": 8.892, "step": 2080 }, { "epoch": 0.0058777043767805365, "grad_norm": 28.375, "learning_rate": 1.9999810754458547e-05, "loss": 8.0526, "step": 2090 }, { "epoch": 0.005905827364229247, "grad_norm": 26.75, "learning_rate": 1.999980893829871e-05, "loss": 8.3751, "step": 2100 }, { "epoch": 0.005933950351677958, "grad_norm": 32.0, "learning_rate": 1.9999807113465827e-05, "loss": 7.9085, "step": 2110 }, { "epoch": 0.005962073339126669, "grad_norm": 41.5, "learning_rate": 1.9999805279959895e-05, "loss": 8.9577, "step": 2120 }, { "epoch": 0.005990196326575379, "grad_norm": 67.5, "learning_rate": 1.9999803437780918e-05, "loss": 8.204, "step": 2130 }, { "epoch": 0.00601831931402409, "grad_norm": 29.125, "learning_rate": 1.9999801586928893e-05, "loss": 7.8166, "step": 2140 }, { "epoch": 0.006046442301472801, "grad_norm": 47.5, "learning_rate": 1.9999799727403826e-05, "loss": 8.6401, "step": 2150 }, { "epoch": 0.006074565288921511, "grad_norm": 24.25, "learning_rate": 1.9999797859205715e-05, "loss": 8.569, "step": 2160 }, { "epoch": 0.006102688276370222, "grad_norm": 20.375, "learning_rate": 1.999979598233457e-05, "loss": 8.4517, "step": 2170 }, { "epoch": 0.006130811263818933, "grad_norm": 36.5, "learning_rate": 1.999979409679038e-05, "loss": 8.8763, "step": 2180 }, { "epoch": 0.0061589342512676434, "grad_norm": 32.75, "learning_rate": 1.9999792202573153e-05, "loss": 8.3364, "step": 2190 }, { "epoch": 0.006187057238716354, "grad_norm": 33.5, "learning_rate": 1.9999790299682897e-05, "loss": 7.9852, "step": 2200 }, { "epoch": 0.006215180226165065, "grad_norm": 34.75, "learning_rate": 1.9999788388119602e-05, "loss": 7.6193, "step": 2210 }, { "epoch": 0.0062433032136137755, "grad_norm": 32.0, "learning_rate": 1.9999786467883273e-05, "loss": 8.3854, "step": 2220 }, { "epoch": 0.006271426201062486, "grad_norm": 28.625, "learning_rate": 1.9999784538973916e-05, "loss": 8.6284, "step": 2230 }, { "epoch": 0.006299549188511197, "grad_norm": 23.875, "learning_rate": 1.999978260139153e-05, "loss": 7.3057, "step": 2240 }, { "epoch": 0.0063276721759599076, "grad_norm": 55.75, "learning_rate": 1.999978065513612e-05, "loss": 8.1596, "step": 2250 }, { "epoch": 0.006355795163408618, "grad_norm": 27.0, "learning_rate": 1.9999778700207678e-05, "loss": 8.3685, "step": 2260 }, { "epoch": 0.006383918150857329, "grad_norm": 36.25, "learning_rate": 1.999977673660622e-05, "loss": 7.9203, "step": 2270 }, { "epoch": 0.00641204113830604, "grad_norm": 35.0, "learning_rate": 1.9999774764331737e-05, "loss": 7.9711, "step": 2280 }, { "epoch": 0.00644016412575475, "grad_norm": 32.25, "learning_rate": 1.9999772783384233e-05, "loss": 8.959, "step": 2290 }, { "epoch": 0.006468287113203461, "grad_norm": 23.375, "learning_rate": 1.999977079376371e-05, "loss": 8.0193, "step": 2300 }, { "epoch": 0.006496410100652172, "grad_norm": 27.75, "learning_rate": 1.9999768795470173e-05, "loss": 8.1791, "step": 2310 }, { "epoch": 0.006524533088100882, "grad_norm": 22.875, "learning_rate": 1.9999766788503616e-05, "loss": 8.3491, "step": 2320 }, { "epoch": 0.006552656075549593, "grad_norm": 43.25, "learning_rate": 1.9999764772864048e-05, "loss": 8.3152, "step": 2330 }, { "epoch": 0.006580779062998304, "grad_norm": 39.5, "learning_rate": 1.999976274855147e-05, "loss": 8.4961, "step": 2340 }, { "epoch": 0.0066089020504470145, "grad_norm": 42.0, "learning_rate": 1.999976071556588e-05, "loss": 7.7311, "step": 2350 }, { "epoch": 0.006637025037895726, "grad_norm": 25.75, "learning_rate": 1.9999758673907286e-05, "loss": 8.254, "step": 2360 }, { "epoch": 0.006665148025344437, "grad_norm": 27.75, "learning_rate": 1.9999756623575682e-05, "loss": 8.4002, "step": 2370 }, { "epoch": 0.006693271012793147, "grad_norm": 29.125, "learning_rate": 1.9999754564571073e-05, "loss": 8.8437, "step": 2380 }, { "epoch": 0.006721394000241858, "grad_norm": 21.125, "learning_rate": 1.9999752496893464e-05, "loss": 7.9154, "step": 2390 }, { "epoch": 0.006749516987690569, "grad_norm": 35.75, "learning_rate": 1.999975042054285e-05, "loss": 8.1271, "step": 2400 }, { "epoch": 0.0067776399751392795, "grad_norm": 23.0, "learning_rate": 1.9999748335519243e-05, "loss": 8.7894, "step": 2410 }, { "epoch": 0.00680576296258799, "grad_norm": 30.0, "learning_rate": 1.9999746241822635e-05, "loss": 7.7237, "step": 2420 }, { "epoch": 0.006833885950036701, "grad_norm": 27.375, "learning_rate": 1.9999744139453032e-05, "loss": 7.8372, "step": 2430 }, { "epoch": 0.0068620089374854115, "grad_norm": 25.875, "learning_rate": 1.9999742028410436e-05, "loss": 8.0883, "step": 2440 }, { "epoch": 0.006890131924934122, "grad_norm": 33.5, "learning_rate": 1.9999739908694846e-05, "loss": 8.5254, "step": 2450 }, { "epoch": 0.006918254912382833, "grad_norm": 54.0, "learning_rate": 1.999973778030627e-05, "loss": 8.7582, "step": 2460 }, { "epoch": 0.006946377899831544, "grad_norm": 20.875, "learning_rate": 1.99997356432447e-05, "loss": 8.2299, "step": 2470 }, { "epoch": 0.006974500887280254, "grad_norm": 35.25, "learning_rate": 1.9999733497510146e-05, "loss": 8.6918, "step": 2480 }, { "epoch": 0.007002623874728965, "grad_norm": 27.125, "learning_rate": 1.999973134310261e-05, "loss": 9.4593, "step": 2490 }, { "epoch": 0.007030746862177676, "grad_norm": 23.0, "learning_rate": 1.9999729180022088e-05, "loss": 8.6678, "step": 2500 }, { "epoch": 0.007058869849626386, "grad_norm": 37.5, "learning_rate": 1.999972700826859e-05, "loss": 8.4213, "step": 2510 }, { "epoch": 0.007086992837075097, "grad_norm": 31.75, "learning_rate": 1.9999724827842108e-05, "loss": 7.9444, "step": 2520 }, { "epoch": 0.007115115824523808, "grad_norm": 27.125, "learning_rate": 1.9999722638742654e-05, "loss": 8.1803, "step": 2530 }, { "epoch": 0.007143238811972518, "grad_norm": 31.5, "learning_rate": 1.999972044097022e-05, "loss": 8.2442, "step": 2540 }, { "epoch": 0.007171361799421229, "grad_norm": 25.125, "learning_rate": 1.9999718234524816e-05, "loss": 8.5528, "step": 2550 }, { "epoch": 0.00719948478686994, "grad_norm": 48.25, "learning_rate": 1.9999716019406438e-05, "loss": 8.6968, "step": 2560 }, { "epoch": 0.0072276077743186505, "grad_norm": 38.5, "learning_rate": 1.9999713795615093e-05, "loss": 7.8031, "step": 2570 }, { "epoch": 0.007255730761767361, "grad_norm": 27.875, "learning_rate": 1.999971156315078e-05, "loss": 7.8565, "step": 2580 }, { "epoch": 0.007283853749216072, "grad_norm": 26.75, "learning_rate": 1.9999709322013502e-05, "loss": 7.6584, "step": 2590 }, { "epoch": 0.0073119767366647825, "grad_norm": 37.5, "learning_rate": 1.9999707072203264e-05, "loss": 8.1999, "step": 2600 }, { "epoch": 0.007340099724113493, "grad_norm": 35.75, "learning_rate": 1.9999704813720058e-05, "loss": 8.0539, "step": 2610 }, { "epoch": 0.007368222711562204, "grad_norm": 49.5, "learning_rate": 1.9999702546563896e-05, "loss": 7.6773, "step": 2620 }, { "epoch": 0.007396345699010915, "grad_norm": 36.75, "learning_rate": 1.9999700270734777e-05, "loss": 8.2563, "step": 2630 }, { "epoch": 0.007424468686459625, "grad_norm": 22.75, "learning_rate": 1.99996979862327e-05, "loss": 8.3656, "step": 2640 }, { "epoch": 0.007452591673908336, "grad_norm": 23.0, "learning_rate": 1.999969569305767e-05, "loss": 8.2729, "step": 2650 }, { "epoch": 0.007480714661357047, "grad_norm": 25.875, "learning_rate": 1.999969339120969e-05, "loss": 8.248, "step": 2660 }, { "epoch": 0.007508837648805757, "grad_norm": 38.75, "learning_rate": 1.9999691080688758e-05, "loss": 8.5252, "step": 2670 }, { "epoch": 0.007536960636254468, "grad_norm": 25.0, "learning_rate": 1.9999688761494876e-05, "loss": 8.3807, "step": 2680 }, { "epoch": 0.007565083623703179, "grad_norm": 55.75, "learning_rate": 1.9999686433628054e-05, "loss": 7.4805, "step": 2690 }, { "epoch": 0.007593206611151889, "grad_norm": 34.75, "learning_rate": 1.999968409708829e-05, "loss": 8.0859, "step": 2700 }, { "epoch": 0.0076213295986006, "grad_norm": 29.625, "learning_rate": 1.9999681751875578e-05, "loss": 7.2083, "step": 2710 }, { "epoch": 0.007649452586049311, "grad_norm": 55.0, "learning_rate": 1.999967939798993e-05, "loss": 8.823, "step": 2720 }, { "epoch": 0.0076775755734980215, "grad_norm": 20.75, "learning_rate": 1.999967703543134e-05, "loss": 8.5313, "step": 2730 }, { "epoch": 0.007705698560946732, "grad_norm": 40.0, "learning_rate": 1.9999674664199818e-05, "loss": 7.3648, "step": 2740 }, { "epoch": 0.007733821548395443, "grad_norm": 27.0, "learning_rate": 1.9999672284295365e-05, "loss": 7.8972, "step": 2750 }, { "epoch": 0.0077619445358441536, "grad_norm": 31.5, "learning_rate": 1.9999669895717978e-05, "loss": 7.856, "step": 2760 }, { "epoch": 0.007790067523292864, "grad_norm": 33.75, "learning_rate": 1.9999667498467662e-05, "loss": 8.2645, "step": 2770 }, { "epoch": 0.007818190510741575, "grad_norm": 31.625, "learning_rate": 1.9999665092544416e-05, "loss": 8.112, "step": 2780 }, { "epoch": 0.007846313498190286, "grad_norm": 24.625, "learning_rate": 1.9999662677948248e-05, "loss": 8.0376, "step": 2790 }, { "epoch": 0.007874436485638996, "grad_norm": 25.75, "learning_rate": 1.9999660254679156e-05, "loss": 7.6112, "step": 2800 }, { "epoch": 0.007902559473087708, "grad_norm": 45.25, "learning_rate": 1.9999657822737145e-05, "loss": 7.7822, "step": 2810 }, { "epoch": 0.007930682460536418, "grad_norm": 31.375, "learning_rate": 1.9999655382122212e-05, "loss": 7.888, "step": 2820 }, { "epoch": 0.00795880544798513, "grad_norm": 42.0, "learning_rate": 1.9999652932834366e-05, "loss": 8.043, "step": 2830 }, { "epoch": 0.007986928435433839, "grad_norm": 43.25, "learning_rate": 1.9999650474873603e-05, "loss": 8.4205, "step": 2840 }, { "epoch": 0.00801505142288255, "grad_norm": 29.75, "learning_rate": 1.999964800823993e-05, "loss": 8.0209, "step": 2850 }, { "epoch": 0.00804317441033126, "grad_norm": 27.125, "learning_rate": 1.9999645532933344e-05, "loss": 8.923, "step": 2860 }, { "epoch": 0.008071297397779972, "grad_norm": 24.125, "learning_rate": 1.999964304895385e-05, "loss": 7.5883, "step": 2870 }, { "epoch": 0.008099420385228682, "grad_norm": 41.0, "learning_rate": 1.9999640556301448e-05, "loss": 7.8274, "step": 2880 }, { "epoch": 0.008127543372677393, "grad_norm": 35.0, "learning_rate": 1.9999638054976146e-05, "loss": 8.3555, "step": 2890 }, { "epoch": 0.008155666360126103, "grad_norm": 32.75, "learning_rate": 1.9999635544977942e-05, "loss": 7.4039, "step": 2900 }, { "epoch": 0.008183789347574815, "grad_norm": 28.5, "learning_rate": 1.999963302630684e-05, "loss": 7.7538, "step": 2910 }, { "epoch": 0.008211912335023525, "grad_norm": 29.875, "learning_rate": 1.9999630498962833e-05, "loss": 9.2657, "step": 2920 }, { "epoch": 0.008240035322472236, "grad_norm": 38.0, "learning_rate": 1.9999627962945938e-05, "loss": 8.4342, "step": 2930 }, { "epoch": 0.008268158309920946, "grad_norm": 26.0, "learning_rate": 1.9999625418256147e-05, "loss": 8.1592, "step": 2940 }, { "epoch": 0.008296281297369658, "grad_norm": 28.875, "learning_rate": 1.9999622864893468e-05, "loss": 7.8994, "step": 2950 }, { "epoch": 0.008324404284818367, "grad_norm": 22.875, "learning_rate": 1.99996203028579e-05, "loss": 7.6056, "step": 2960 }, { "epoch": 0.008352527272267079, "grad_norm": 34.25, "learning_rate": 1.9999617732149442e-05, "loss": 8.0552, "step": 2970 }, { "epoch": 0.008380650259715789, "grad_norm": 31.125, "learning_rate": 1.9999615152768103e-05, "loss": 7.8963, "step": 2980 }, { "epoch": 0.0084087732471645, "grad_norm": 18.75, "learning_rate": 1.999961256471388e-05, "loss": 8.4423, "step": 2990 }, { "epoch": 0.00843689623461321, "grad_norm": 27.625, "learning_rate": 1.999960996798678e-05, "loss": 8.0885, "step": 3000 }, { "epoch": 0.008465019222061922, "grad_norm": 29.5, "learning_rate": 1.9999607362586805e-05, "loss": 7.9177, "step": 3010 }, { "epoch": 0.008493142209510631, "grad_norm": 48.0, "learning_rate": 1.999960474851395e-05, "loss": 8.5548, "step": 3020 }, { "epoch": 0.008521265196959343, "grad_norm": 30.125, "learning_rate": 1.9999602125768227e-05, "loss": 8.1429, "step": 3030 }, { "epoch": 0.008549388184408053, "grad_norm": 25.0, "learning_rate": 1.999959949434963e-05, "loss": 7.3102, "step": 3040 }, { "epoch": 0.008577511171856764, "grad_norm": 26.75, "learning_rate": 1.9999596854258162e-05, "loss": 7.824, "step": 3050 }, { "epoch": 0.008605634159305474, "grad_norm": 50.5, "learning_rate": 1.9999594205493836e-05, "loss": 7.7445, "step": 3060 }, { "epoch": 0.008633757146754186, "grad_norm": 32.75, "learning_rate": 1.9999591548056642e-05, "loss": 9.1142, "step": 3070 }, { "epoch": 0.008661880134202896, "grad_norm": 47.75, "learning_rate": 1.9999588881946583e-05, "loss": 7.5972, "step": 3080 }, { "epoch": 0.008690003121651607, "grad_norm": 20.875, "learning_rate": 1.999958620716367e-05, "loss": 7.804, "step": 3090 }, { "epoch": 0.008718126109100317, "grad_norm": 23.5, "learning_rate": 1.99995835237079e-05, "loss": 7.8338, "step": 3100 }, { "epoch": 0.008746249096549029, "grad_norm": 27.0, "learning_rate": 1.9999580831579274e-05, "loss": 7.747, "step": 3110 }, { "epoch": 0.008774372083997738, "grad_norm": 41.5, "learning_rate": 1.9999578130777798e-05, "loss": 7.3338, "step": 3120 }, { "epoch": 0.00880249507144645, "grad_norm": 25.125, "learning_rate": 1.9999575421303467e-05, "loss": 8.548, "step": 3130 }, { "epoch": 0.00883061805889516, "grad_norm": 24.5, "learning_rate": 1.9999572703156294e-05, "loss": 8.2418, "step": 3140 }, { "epoch": 0.008858741046343871, "grad_norm": 36.75, "learning_rate": 1.9999569976336277e-05, "loss": 8.0847, "step": 3150 }, { "epoch": 0.008886864033792581, "grad_norm": 20.25, "learning_rate": 1.999956724084341e-05, "loss": 8.2338, "step": 3160 }, { "epoch": 0.008914987021241293, "grad_norm": 25.375, "learning_rate": 1.9999564496677707e-05, "loss": 8.462, "step": 3170 }, { "epoch": 0.008943110008690002, "grad_norm": 31.5, "learning_rate": 1.9999561743839168e-05, "loss": 7.7826, "step": 3180 }, { "epoch": 0.008971232996138714, "grad_norm": 17.375, "learning_rate": 1.9999558982327795e-05, "loss": 8.2453, "step": 3190 }, { "epoch": 0.008999355983587424, "grad_norm": 23.0, "learning_rate": 1.9999556212143583e-05, "loss": 7.9897, "step": 3200 }, { "epoch": 0.009027478971036135, "grad_norm": 35.75, "learning_rate": 1.9999553433286543e-05, "loss": 7.1996, "step": 3210 }, { "epoch": 0.009055601958484845, "grad_norm": 24.875, "learning_rate": 1.9999550645756676e-05, "loss": 8.0288, "step": 3220 }, { "epoch": 0.009083724945933557, "grad_norm": 31.5, "learning_rate": 1.999954784955398e-05, "loss": 8.2478, "step": 3230 }, { "epoch": 0.009111847933382267, "grad_norm": 30.5, "learning_rate": 1.9999545044678464e-05, "loss": 7.9006, "step": 3240 }, { "epoch": 0.009139970920830978, "grad_norm": 39.75, "learning_rate": 1.9999542231130126e-05, "loss": 8.0676, "step": 3250 }, { "epoch": 0.009168093908279688, "grad_norm": 32.0, "learning_rate": 1.999953940890897e-05, "loss": 7.9471, "step": 3260 }, { "epoch": 0.0091962168957284, "grad_norm": 38.0, "learning_rate": 1.9999536578014998e-05, "loss": 7.2531, "step": 3270 }, { "epoch": 0.00922433988317711, "grad_norm": 30.375, "learning_rate": 1.999953373844821e-05, "loss": 7.6782, "step": 3280 }, { "epoch": 0.009252462870625821, "grad_norm": 30.375, "learning_rate": 1.999953089020861e-05, "loss": 7.6688, "step": 3290 }, { "epoch": 0.00928058585807453, "grad_norm": 35.75, "learning_rate": 1.999952803329621e-05, "loss": 7.3213, "step": 3300 }, { "epoch": 0.009308708845523242, "grad_norm": 24.0, "learning_rate": 1.9999525167710994e-05, "loss": 7.8643, "step": 3310 }, { "epoch": 0.009336831832971952, "grad_norm": 24.25, "learning_rate": 1.9999522293452976e-05, "loss": 8.3795, "step": 3320 }, { "epoch": 0.009364954820420664, "grad_norm": 27.125, "learning_rate": 1.999951941052216e-05, "loss": 8.7058, "step": 3330 }, { "epoch": 0.009393077807869374, "grad_norm": 24.375, "learning_rate": 1.999951651891854e-05, "loss": 8.6353, "step": 3340 }, { "epoch": 0.009421200795318085, "grad_norm": 26.75, "learning_rate": 1.9999513618642127e-05, "loss": 8.2442, "step": 3350 }, { "epoch": 0.009449323782766797, "grad_norm": 26.875, "learning_rate": 1.9999510709692922e-05, "loss": 8.1887, "step": 3360 }, { "epoch": 0.009477446770215506, "grad_norm": 29.125, "learning_rate": 1.9999507792070924e-05, "loss": 8.5246, "step": 3370 }, { "epoch": 0.009505569757664218, "grad_norm": 23.625, "learning_rate": 1.9999504865776138e-05, "loss": 8.0467, "step": 3380 }, { "epoch": 0.009533692745112928, "grad_norm": 44.25, "learning_rate": 1.9999501930808565e-05, "loss": 7.8232, "step": 3390 }, { "epoch": 0.00956181573256164, "grad_norm": 28.375, "learning_rate": 1.999949898716821e-05, "loss": 7.5936, "step": 3400 }, { "epoch": 0.00958993872001035, "grad_norm": 37.25, "learning_rate": 1.9999496034855073e-05, "loss": 7.6965, "step": 3410 }, { "epoch": 0.00961806170745906, "grad_norm": 22.0, "learning_rate": 1.9999493073869155e-05, "loss": 8.2151, "step": 3420 }, { "epoch": 0.00964618469490777, "grad_norm": 23.5, "learning_rate": 1.9999490104210463e-05, "loss": 8.0461, "step": 3430 }, { "epoch": 0.009674307682356482, "grad_norm": 42.25, "learning_rate": 1.9999487125879e-05, "loss": 9.0034, "step": 3440 }, { "epoch": 0.009702430669805192, "grad_norm": 24.75, "learning_rate": 1.9999484138874762e-05, "loss": 8.3002, "step": 3450 }, { "epoch": 0.009730553657253904, "grad_norm": 38.0, "learning_rate": 1.999948114319776e-05, "loss": 7.8161, "step": 3460 }, { "epoch": 0.009758676644702613, "grad_norm": 33.25, "learning_rate": 1.999947813884799e-05, "loss": 8.0935, "step": 3470 }, { "epoch": 0.009786799632151325, "grad_norm": 27.25, "learning_rate": 1.999947512582546e-05, "loss": 7.4854, "step": 3480 }, { "epoch": 0.009814922619600035, "grad_norm": 23.625, "learning_rate": 1.9999472104130165e-05, "loss": 7.6675, "step": 3490 }, { "epoch": 0.009843045607048746, "grad_norm": 35.75, "learning_rate": 1.9999469073762115e-05, "loss": 7.3555, "step": 3500 }, { "epoch": 0.009871168594497456, "grad_norm": 34.25, "learning_rate": 1.9999466034721308e-05, "loss": 7.9388, "step": 3510 }, { "epoch": 0.009899291581946168, "grad_norm": 30.375, "learning_rate": 1.999946298700775e-05, "loss": 8.803, "step": 3520 }, { "epoch": 0.009927414569394877, "grad_norm": 40.5, "learning_rate": 1.999945993062144e-05, "loss": 7.7273, "step": 3530 }, { "epoch": 0.009955537556843589, "grad_norm": 22.25, "learning_rate": 1.9999456865562386e-05, "loss": 7.9366, "step": 3540 }, { "epoch": 0.009983660544292299, "grad_norm": 24.625, "learning_rate": 1.999945379183059e-05, "loss": 7.5814, "step": 3550 }, { "epoch": 0.01001178353174101, "grad_norm": 33.0, "learning_rate": 1.999945070942605e-05, "loss": 8.4469, "step": 3560 }, { "epoch": 0.01003990651918972, "grad_norm": 26.625, "learning_rate": 1.9999447618348765e-05, "loss": 8.1064, "step": 3570 }, { "epoch": 0.010068029506638432, "grad_norm": 49.25, "learning_rate": 1.999944451859875e-05, "loss": 7.3223, "step": 3580 }, { "epoch": 0.010096152494087142, "grad_norm": 26.875, "learning_rate": 1.9999441410176e-05, "loss": 7.3847, "step": 3590 }, { "epoch": 0.010124275481535853, "grad_norm": 38.25, "learning_rate": 1.9999438293080512e-05, "loss": 7.5861, "step": 3600 }, { "epoch": 0.010152398468984563, "grad_norm": 32.25, "learning_rate": 1.9999435167312303e-05, "loss": 8.5341, "step": 3610 }, { "epoch": 0.010180521456433275, "grad_norm": 50.25, "learning_rate": 1.9999432032871368e-05, "loss": 7.4763, "step": 3620 }, { "epoch": 0.010208644443881984, "grad_norm": 33.25, "learning_rate": 1.999942888975771e-05, "loss": 7.9193, "step": 3630 }, { "epoch": 0.010236767431330696, "grad_norm": 54.25, "learning_rate": 1.9999425737971328e-05, "loss": 8.376, "step": 3640 }, { "epoch": 0.010264890418779406, "grad_norm": 63.25, "learning_rate": 1.999942257751223e-05, "loss": 7.7004, "step": 3650 }, { "epoch": 0.010293013406228117, "grad_norm": 28.375, "learning_rate": 1.999941940838042e-05, "loss": 8.0735, "step": 3660 }, { "epoch": 0.010321136393676827, "grad_norm": 25.25, "learning_rate": 1.9999416230575894e-05, "loss": 7.6901, "step": 3670 }, { "epoch": 0.010349259381125539, "grad_norm": 28.75, "learning_rate": 1.9999413044098662e-05, "loss": 8.1958, "step": 3680 }, { "epoch": 0.010377382368574248, "grad_norm": 22.0, "learning_rate": 1.999940984894872e-05, "loss": 8.2711, "step": 3690 }, { "epoch": 0.01040550535602296, "grad_norm": 26.375, "learning_rate": 1.999940664512608e-05, "loss": 8.2227, "step": 3700 }, { "epoch": 0.01043362834347167, "grad_norm": 36.5, "learning_rate": 1.9999403432630733e-05, "loss": 7.9065, "step": 3710 }, { "epoch": 0.010461751330920381, "grad_norm": 28.375, "learning_rate": 1.999940021146269e-05, "loss": 7.9618, "step": 3720 }, { "epoch": 0.010489874318369091, "grad_norm": 32.25, "learning_rate": 1.9999396981621952e-05, "loss": 8.2882, "step": 3730 }, { "epoch": 0.010517997305817803, "grad_norm": 28.25, "learning_rate": 1.999939374310852e-05, "loss": 7.9705, "step": 3740 }, { "epoch": 0.010546120293266513, "grad_norm": 25.5, "learning_rate": 1.99993904959224e-05, "loss": 7.4949, "step": 3750 }, { "epoch": 0.010574243280715224, "grad_norm": 33.75, "learning_rate": 1.9999387240063596e-05, "loss": 7.865, "step": 3760 }, { "epoch": 0.010602366268163934, "grad_norm": 26.625, "learning_rate": 1.99993839755321e-05, "loss": 7.4999, "step": 3770 }, { "epoch": 0.010630489255612646, "grad_norm": 24.125, "learning_rate": 1.999938070232793e-05, "loss": 8.2593, "step": 3780 }, { "epoch": 0.010658612243061355, "grad_norm": 42.0, "learning_rate": 1.999937742045108e-05, "loss": 7.7764, "step": 3790 }, { "epoch": 0.010686735230510067, "grad_norm": 47.25, "learning_rate": 1.999937412990155e-05, "loss": 7.5523, "step": 3800 }, { "epoch": 0.010714858217958777, "grad_norm": 26.125, "learning_rate": 1.9999370830679355e-05, "loss": 8.2505, "step": 3810 }, { "epoch": 0.010742981205407488, "grad_norm": 37.25, "learning_rate": 1.9999367522784486e-05, "loss": 7.9532, "step": 3820 }, { "epoch": 0.010771104192856198, "grad_norm": 37.25, "learning_rate": 1.9999364206216946e-05, "loss": 8.2539, "step": 3830 }, { "epoch": 0.01079922718030491, "grad_norm": 27.125, "learning_rate": 1.9999360880976747e-05, "loss": 7.9396, "step": 3840 }, { "epoch": 0.01082735016775362, "grad_norm": 24.625, "learning_rate": 1.9999357547063886e-05, "loss": 8.4424, "step": 3850 }, { "epoch": 0.010855473155202331, "grad_norm": 25.875, "learning_rate": 1.9999354204478366e-05, "loss": 8.6787, "step": 3860 }, { "epoch": 0.010883596142651041, "grad_norm": 34.0, "learning_rate": 1.9999350853220188e-05, "loss": 8.3079, "step": 3870 }, { "epoch": 0.010911719130099752, "grad_norm": 26.25, "learning_rate": 1.999934749328936e-05, "loss": 8.3721, "step": 3880 }, { "epoch": 0.010939842117548462, "grad_norm": 33.0, "learning_rate": 1.9999344124685883e-05, "loss": 8.3725, "step": 3890 }, { "epoch": 0.010967965104997174, "grad_norm": 25.375, "learning_rate": 1.9999340747409756e-05, "loss": 8.0926, "step": 3900 }, { "epoch": 0.010996088092445884, "grad_norm": 21.375, "learning_rate": 1.9999337361460988e-05, "loss": 7.9782, "step": 3910 }, { "epoch": 0.011024211079894595, "grad_norm": 42.5, "learning_rate": 1.9999333966839578e-05, "loss": 8.305, "step": 3920 }, { "epoch": 0.011052334067343305, "grad_norm": 54.25, "learning_rate": 1.999933056354553e-05, "loss": 7.9586, "step": 3930 }, { "epoch": 0.011080457054792017, "grad_norm": 31.25, "learning_rate": 1.999932715157885e-05, "loss": 8.1783, "step": 3940 }, { "epoch": 0.011108580042240726, "grad_norm": 26.875, "learning_rate": 1.9999323730939536e-05, "loss": 8.2139, "step": 3950 }, { "epoch": 0.011136703029689438, "grad_norm": 31.375, "learning_rate": 1.999932030162759e-05, "loss": 8.6769, "step": 3960 }, { "epoch": 0.011164826017138148, "grad_norm": 31.25, "learning_rate": 1.999931686364302e-05, "loss": 8.1601, "step": 3970 }, { "epoch": 0.01119294900458686, "grad_norm": 63.0, "learning_rate": 1.999931341698583e-05, "loss": 8.4853, "step": 3980 }, { "epoch": 0.01122107199203557, "grad_norm": 40.5, "learning_rate": 1.999930996165602e-05, "loss": 8.6766, "step": 3990 }, { "epoch": 0.01124919497948428, "grad_norm": 30.75, "learning_rate": 1.9999306497653584e-05, "loss": 8.3272, "step": 4000 }, { "epoch": 0.01127731796693299, "grad_norm": 30.75, "learning_rate": 1.9999303024978542e-05, "loss": 8.9331, "step": 4010 }, { "epoch": 0.011305440954381702, "grad_norm": 42.5, "learning_rate": 1.9999299543630886e-05, "loss": 7.7461, "step": 4020 }, { "epoch": 0.011333563941830412, "grad_norm": 23.75, "learning_rate": 1.9999296053610625e-05, "loss": 8.6209, "step": 4030 }, { "epoch": 0.011361686929279123, "grad_norm": 41.0, "learning_rate": 1.9999292554917757e-05, "loss": 7.3432, "step": 4040 }, { "epoch": 0.011389809916727835, "grad_norm": 47.5, "learning_rate": 1.9999289047552287e-05, "loss": 8.6455, "step": 4050 }, { "epoch": 0.011417932904176545, "grad_norm": 30.0, "learning_rate": 1.9999285531514217e-05, "loss": 8.5096, "step": 4060 }, { "epoch": 0.011446055891625256, "grad_norm": 23.125, "learning_rate": 1.9999282006803552e-05, "loss": 8.3673, "step": 4070 }, { "epoch": 0.011474178879073966, "grad_norm": 23.875, "learning_rate": 1.9999278473420294e-05, "loss": 7.9102, "step": 4080 }, { "epoch": 0.011502301866522678, "grad_norm": 45.0, "learning_rate": 1.9999274931364448e-05, "loss": 8.6135, "step": 4090 }, { "epoch": 0.011530424853971388, "grad_norm": 28.0, "learning_rate": 1.999927138063601e-05, "loss": 7.6448, "step": 4100 }, { "epoch": 0.0115585478414201, "grad_norm": 21.5, "learning_rate": 1.9999267821234993e-05, "loss": 7.6886, "step": 4110 }, { "epoch": 0.011586670828868809, "grad_norm": 27.0, "learning_rate": 1.9999264253161395e-05, "loss": 8.2981, "step": 4120 }, { "epoch": 0.01161479381631752, "grad_norm": 46.5, "learning_rate": 1.9999260676415217e-05, "loss": 8.2034, "step": 4130 }, { "epoch": 0.01164291680376623, "grad_norm": 27.875, "learning_rate": 1.9999257090996467e-05, "loss": 8.5675, "step": 4140 }, { "epoch": 0.011671039791214942, "grad_norm": 38.0, "learning_rate": 1.9999253496905148e-05, "loss": 7.6995, "step": 4150 }, { "epoch": 0.011699162778663652, "grad_norm": 26.0, "learning_rate": 1.9999249894141254e-05, "loss": 7.38, "step": 4160 }, { "epoch": 0.011727285766112363, "grad_norm": 49.5, "learning_rate": 1.9999246282704804e-05, "loss": 8.2433, "step": 4170 }, { "epoch": 0.011755408753561073, "grad_norm": 22.5, "learning_rate": 1.9999242662595787e-05, "loss": 8.2355, "step": 4180 }, { "epoch": 0.011783531741009785, "grad_norm": 32.75, "learning_rate": 1.999923903381421e-05, "loss": 7.953, "step": 4190 }, { "epoch": 0.011811654728458494, "grad_norm": 29.75, "learning_rate": 1.9999235396360078e-05, "loss": 8.5966, "step": 4200 }, { "epoch": 0.011839777715907206, "grad_norm": 38.25, "learning_rate": 1.9999231750233393e-05, "loss": 8.3424, "step": 4210 }, { "epoch": 0.011867900703355916, "grad_norm": 28.125, "learning_rate": 1.9999228095434162e-05, "loss": 8.7131, "step": 4220 }, { "epoch": 0.011896023690804627, "grad_norm": 30.25, "learning_rate": 1.9999224431962384e-05, "loss": 7.8723, "step": 4230 }, { "epoch": 0.011924146678253337, "grad_norm": 25.875, "learning_rate": 1.9999220759818062e-05, "loss": 7.92, "step": 4240 }, { "epoch": 0.011952269665702049, "grad_norm": 40.5, "learning_rate": 1.99992170790012e-05, "loss": 8.0392, "step": 4250 }, { "epoch": 0.011980392653150759, "grad_norm": 26.125, "learning_rate": 1.9999213389511803e-05, "loss": 7.944, "step": 4260 }, { "epoch": 0.01200851564059947, "grad_norm": 31.75, "learning_rate": 1.999920969134987e-05, "loss": 8.143, "step": 4270 }, { "epoch": 0.01203663862804818, "grad_norm": 36.25, "learning_rate": 1.9999205984515407e-05, "loss": 7.5141, "step": 4280 }, { "epoch": 0.012064761615496892, "grad_norm": 29.0, "learning_rate": 1.999920226900842e-05, "loss": 7.7645, "step": 4290 }, { "epoch": 0.012092884602945601, "grad_norm": 29.75, "learning_rate": 1.9999198544828906e-05, "loss": 8.5484, "step": 4300 }, { "epoch": 0.012121007590394313, "grad_norm": 25.0, "learning_rate": 1.9999194811976873e-05, "loss": 8.0031, "step": 4310 }, { "epoch": 0.012149130577843023, "grad_norm": 35.25, "learning_rate": 1.9999191070452323e-05, "loss": 7.8425, "step": 4320 }, { "epoch": 0.012177253565291734, "grad_norm": 30.5, "learning_rate": 1.999918732025526e-05, "loss": 8.5762, "step": 4330 }, { "epoch": 0.012205376552740444, "grad_norm": 23.375, "learning_rate": 1.9999183561385685e-05, "loss": 7.4557, "step": 4340 }, { "epoch": 0.012233499540189156, "grad_norm": 43.75, "learning_rate": 1.9999179793843604e-05, "loss": 8.876, "step": 4350 }, { "epoch": 0.012261622527637865, "grad_norm": 26.0, "learning_rate": 1.999917601762902e-05, "loss": 7.8134, "step": 4360 }, { "epoch": 0.012289745515086577, "grad_norm": 55.5, "learning_rate": 1.999917223274193e-05, "loss": 8.4313, "step": 4370 }, { "epoch": 0.012317868502535287, "grad_norm": 22.25, "learning_rate": 1.9999168439182344e-05, "loss": 7.781, "step": 4380 }, { "epoch": 0.012345991489983998, "grad_norm": 25.875, "learning_rate": 1.9999164636950265e-05, "loss": 8.3306, "step": 4390 }, { "epoch": 0.012374114477432708, "grad_norm": 37.5, "learning_rate": 1.9999160826045694e-05, "loss": 8.1507, "step": 4400 }, { "epoch": 0.01240223746488142, "grad_norm": 23.875, "learning_rate": 1.9999157006468637e-05, "loss": 8.6167, "step": 4410 }, { "epoch": 0.01243036045233013, "grad_norm": 20.875, "learning_rate": 1.9999153178219094e-05, "loss": 8.5502, "step": 4420 }, { "epoch": 0.012458483439778841, "grad_norm": 58.0, "learning_rate": 1.999914934129707e-05, "loss": 8.4065, "step": 4430 }, { "epoch": 0.012486606427227551, "grad_norm": 21.25, "learning_rate": 1.999914549570257e-05, "loss": 7.2662, "step": 4440 }, { "epoch": 0.012514729414676263, "grad_norm": 44.75, "learning_rate": 1.9999141641435593e-05, "loss": 7.3791, "step": 4450 }, { "epoch": 0.012542852402124972, "grad_norm": 33.25, "learning_rate": 1.9999137778496145e-05, "loss": 7.6404, "step": 4460 }, { "epoch": 0.012570975389573684, "grad_norm": 32.5, "learning_rate": 1.999913390688423e-05, "loss": 7.3878, "step": 4470 }, { "epoch": 0.012599098377022394, "grad_norm": 27.5, "learning_rate": 1.9999130026599852e-05, "loss": 8.4735, "step": 4480 }, { "epoch": 0.012627221364471105, "grad_norm": 76.5, "learning_rate": 1.999912613764301e-05, "loss": 8.2022, "step": 4490 }, { "epoch": 0.012655344351919815, "grad_norm": 24.0, "learning_rate": 1.9999122240013714e-05, "loss": 8.0111, "step": 4500 }, { "epoch": 0.012683467339368527, "grad_norm": 30.375, "learning_rate": 1.9999118333711963e-05, "loss": 7.6184, "step": 4510 }, { "epoch": 0.012711590326817237, "grad_norm": 28.125, "learning_rate": 1.9999114418737756e-05, "loss": 8.358, "step": 4520 }, { "epoch": 0.012739713314265948, "grad_norm": 57.5, "learning_rate": 1.9999110495091107e-05, "loss": 7.808, "step": 4530 }, { "epoch": 0.012767836301714658, "grad_norm": 24.5, "learning_rate": 1.999910656277201e-05, "loss": 8.7598, "step": 4540 }, { "epoch": 0.01279595928916337, "grad_norm": 39.5, "learning_rate": 1.9999102621780478e-05, "loss": 8.854, "step": 4550 }, { "epoch": 0.01282408227661208, "grad_norm": 47.75, "learning_rate": 1.9999098672116504e-05, "loss": 7.4798, "step": 4560 }, { "epoch": 0.01285220526406079, "grad_norm": 26.375, "learning_rate": 1.99990947137801e-05, "loss": 7.779, "step": 4570 }, { "epoch": 0.0128803282515095, "grad_norm": 89.5, "learning_rate": 1.999909074677126e-05, "loss": 8.3723, "step": 4580 }, { "epoch": 0.012908451238958212, "grad_norm": 41.5, "learning_rate": 1.9999086771089997e-05, "loss": 8.441, "step": 4590 }, { "epoch": 0.012936574226406922, "grad_norm": 30.125, "learning_rate": 1.9999082786736308e-05, "loss": 8.4291, "step": 4600 }, { "epoch": 0.012964697213855634, "grad_norm": 37.75, "learning_rate": 1.99990787937102e-05, "loss": 7.896, "step": 4610 }, { "epoch": 0.012992820201304343, "grad_norm": 37.75, "learning_rate": 1.9999074792011678e-05, "loss": 7.9664, "step": 4620 }, { "epoch": 0.013020943188753055, "grad_norm": 37.25, "learning_rate": 1.999907078164074e-05, "loss": 8.7379, "step": 4630 }, { "epoch": 0.013049066176201765, "grad_norm": 31.5, "learning_rate": 1.9999066762597393e-05, "loss": 8.4004, "step": 4640 }, { "epoch": 0.013077189163650476, "grad_norm": 21.375, "learning_rate": 1.9999062734881637e-05, "loss": 7.1223, "step": 4650 }, { "epoch": 0.013105312151099186, "grad_norm": 24.125, "learning_rate": 1.9999058698493483e-05, "loss": 7.5563, "step": 4660 }, { "epoch": 0.013133435138547898, "grad_norm": 31.125, "learning_rate": 1.9999054653432926e-05, "loss": 8.6648, "step": 4670 }, { "epoch": 0.013161558125996608, "grad_norm": 32.0, "learning_rate": 1.9999050599699977e-05, "loss": 8.3401, "step": 4680 }, { "epoch": 0.013189681113445319, "grad_norm": 23.125, "learning_rate": 1.9999046537294634e-05, "loss": 7.662, "step": 4690 }, { "epoch": 0.013217804100894029, "grad_norm": 31.625, "learning_rate": 1.9999042466216903e-05, "loss": 7.9025, "step": 4700 }, { "epoch": 0.01324592708834274, "grad_norm": 29.375, "learning_rate": 1.9999038386466785e-05, "loss": 7.3699, "step": 4710 }, { "epoch": 0.013274050075791452, "grad_norm": 45.0, "learning_rate": 1.9999034298044286e-05, "loss": 8.7693, "step": 4720 }, { "epoch": 0.013302173063240162, "grad_norm": 25.25, "learning_rate": 1.999903020094941e-05, "loss": 7.4744, "step": 4730 }, { "epoch": 0.013330296050688873, "grad_norm": 27.5, "learning_rate": 1.999902609518216e-05, "loss": 8.1181, "step": 4740 }, { "epoch": 0.013358419038137583, "grad_norm": 29.5, "learning_rate": 1.9999021980742536e-05, "loss": 7.7701, "step": 4750 }, { "epoch": 0.013386542025586295, "grad_norm": 34.0, "learning_rate": 1.999901785763055e-05, "loss": 8.7794, "step": 4760 }, { "epoch": 0.013414665013035005, "grad_norm": 24.0, "learning_rate": 1.9999013725846197e-05, "loss": 7.8328, "step": 4770 }, { "epoch": 0.013442788000483716, "grad_norm": 37.75, "learning_rate": 1.999900958538948e-05, "loss": 8.4463, "step": 4780 }, { "epoch": 0.013470910987932426, "grad_norm": 24.25, "learning_rate": 1.9999005436260416e-05, "loss": 7.8103, "step": 4790 }, { "epoch": 0.013499033975381138, "grad_norm": 38.75, "learning_rate": 1.999900127845899e-05, "loss": 8.541, "step": 4800 }, { "epoch": 0.013527156962829847, "grad_norm": 34.25, "learning_rate": 1.9998997111985218e-05, "loss": 8.0218, "step": 4810 }, { "epoch": 0.013555279950278559, "grad_norm": 25.25, "learning_rate": 1.99989929368391e-05, "loss": 8.3862, "step": 4820 }, { "epoch": 0.013583402937727269, "grad_norm": 22.5, "learning_rate": 1.9998988753020642e-05, "loss": 8.297, "step": 4830 }, { "epoch": 0.01361152592517598, "grad_norm": 29.375, "learning_rate": 1.9998984560529842e-05, "loss": 7.6039, "step": 4840 }, { "epoch": 0.01363964891262469, "grad_norm": 25.5, "learning_rate": 1.999898035936671e-05, "loss": 7.9024, "step": 4850 }, { "epoch": 0.013667771900073402, "grad_norm": 36.25, "learning_rate": 1.9998976149531243e-05, "loss": 8.1757, "step": 4860 }, { "epoch": 0.013695894887522111, "grad_norm": 26.0, "learning_rate": 1.9998971931023454e-05, "loss": 8.3246, "step": 4870 }, { "epoch": 0.013724017874970823, "grad_norm": 21.75, "learning_rate": 1.9998967703843338e-05, "loss": 8.3151, "step": 4880 }, { "epoch": 0.013752140862419533, "grad_norm": 23.25, "learning_rate": 1.9998963467990902e-05, "loss": 8.3641, "step": 4890 }, { "epoch": 0.013780263849868244, "grad_norm": 26.5, "learning_rate": 1.9998959223466147e-05, "loss": 7.4782, "step": 4900 }, { "epoch": 0.013808386837316954, "grad_norm": 26.375, "learning_rate": 1.9998954970269082e-05, "loss": 8.3574, "step": 4910 }, { "epoch": 0.013836509824765666, "grad_norm": 37.5, "learning_rate": 1.9998950708399708e-05, "loss": 8.1733, "step": 4920 }, { "epoch": 0.013864632812214376, "grad_norm": 40.25, "learning_rate": 1.9998946437858027e-05, "loss": 8.5876, "step": 4930 }, { "epoch": 0.013892755799663087, "grad_norm": 20.875, "learning_rate": 1.9998942158644047e-05, "loss": 8.8059, "step": 4940 }, { "epoch": 0.013920878787111797, "grad_norm": 48.0, "learning_rate": 1.9998937870757765e-05, "loss": 8.2536, "step": 4950 }, { "epoch": 0.013949001774560509, "grad_norm": 22.5, "learning_rate": 1.999893357419919e-05, "loss": 7.8894, "step": 4960 }, { "epoch": 0.013977124762009218, "grad_norm": 35.25, "learning_rate": 1.9998929268968325e-05, "loss": 8.1565, "step": 4970 }, { "epoch": 0.01400524774945793, "grad_norm": 27.875, "learning_rate": 1.9998924955065174e-05, "loss": 7.1035, "step": 4980 }, { "epoch": 0.01403337073690664, "grad_norm": 32.25, "learning_rate": 1.9998920632489734e-05, "loss": 8.3748, "step": 4990 }, { "epoch": 0.014061493724355351, "grad_norm": 25.625, "learning_rate": 1.9998916301242023e-05, "loss": 7.5022, "step": 5000 }, { "epoch": 0.014089616711804061, "grad_norm": 22.5, "learning_rate": 1.9998911961322028e-05, "loss": 8.234, "step": 5010 }, { "epoch": 0.014117739699252773, "grad_norm": 46.25, "learning_rate": 1.9998907612729768e-05, "loss": 7.8939, "step": 5020 }, { "epoch": 0.014145862686701483, "grad_norm": 39.5, "learning_rate": 1.999890325546524e-05, "loss": 8.5412, "step": 5030 }, { "epoch": 0.014173985674150194, "grad_norm": 44.25, "learning_rate": 1.999889888952844e-05, "loss": 7.7492, "step": 5040 }, { "epoch": 0.014202108661598904, "grad_norm": 47.5, "learning_rate": 1.999889451491939e-05, "loss": 8.2012, "step": 5050 }, { "epoch": 0.014230231649047615, "grad_norm": 26.0, "learning_rate": 1.9998890131638073e-05, "loss": 7.9138, "step": 5060 }, { "epoch": 0.014258354636496325, "grad_norm": 27.75, "learning_rate": 1.999888573968451e-05, "loss": 7.5371, "step": 5070 }, { "epoch": 0.014286477623945037, "grad_norm": 24.875, "learning_rate": 1.9998881339058694e-05, "loss": 8.8557, "step": 5080 }, { "epoch": 0.014314600611393747, "grad_norm": 45.75, "learning_rate": 1.9998876929760633e-05, "loss": 8.1463, "step": 5090 }, { "epoch": 0.014342723598842458, "grad_norm": 30.0, "learning_rate": 1.9998872511790333e-05, "loss": 8.3442, "step": 5100 }, { "epoch": 0.014370846586291168, "grad_norm": 33.25, "learning_rate": 1.9998868085147792e-05, "loss": 7.7405, "step": 5110 }, { "epoch": 0.01439896957373988, "grad_norm": 24.625, "learning_rate": 1.999886364983302e-05, "loss": 8.1353, "step": 5120 }, { "epoch": 0.01442709256118859, "grad_norm": 30.875, "learning_rate": 1.999885920584602e-05, "loss": 8.4558, "step": 5130 }, { "epoch": 0.014455215548637301, "grad_norm": 26.0, "learning_rate": 1.9998854753186786e-05, "loss": 8.3118, "step": 5140 }, { "epoch": 0.01448333853608601, "grad_norm": 49.25, "learning_rate": 1.9998850291855335e-05, "loss": 7.6215, "step": 5150 }, { "epoch": 0.014511461523534722, "grad_norm": 24.625, "learning_rate": 1.9998845821851666e-05, "loss": 7.5545, "step": 5160 }, { "epoch": 0.014539584510983432, "grad_norm": 27.25, "learning_rate": 1.9998841343175783e-05, "loss": 7.6474, "step": 5170 }, { "epoch": 0.014567707498432144, "grad_norm": 32.75, "learning_rate": 1.9998836855827684e-05, "loss": 8.2626, "step": 5180 }, { "epoch": 0.014595830485880854, "grad_norm": 28.625, "learning_rate": 1.999883235980738e-05, "loss": 8.6014, "step": 5190 }, { "epoch": 0.014623953473329565, "grad_norm": 53.25, "learning_rate": 1.999882785511488e-05, "loss": 8.1841, "step": 5200 }, { "epoch": 0.014652076460778275, "grad_norm": 23.25, "learning_rate": 1.9998823341750176e-05, "loss": 7.6087, "step": 5210 }, { "epoch": 0.014680199448226986, "grad_norm": 24.0, "learning_rate": 1.9998818819713277e-05, "loss": 8.3995, "step": 5220 }, { "epoch": 0.014708322435675696, "grad_norm": 45.75, "learning_rate": 1.9998814289004186e-05, "loss": 7.9991, "step": 5230 }, { "epoch": 0.014736445423124408, "grad_norm": 22.375, "learning_rate": 1.9998809749622912e-05, "loss": 8.1138, "step": 5240 }, { "epoch": 0.014764568410573118, "grad_norm": 31.0, "learning_rate": 1.999880520156945e-05, "loss": 8.8926, "step": 5250 }, { "epoch": 0.01479269139802183, "grad_norm": 31.0, "learning_rate": 1.999880064484381e-05, "loss": 8.3993, "step": 5260 }, { "epoch": 0.014820814385470539, "grad_norm": 24.125, "learning_rate": 1.9998796079445993e-05, "loss": 7.889, "step": 5270 }, { "epoch": 0.01484893737291925, "grad_norm": 39.75, "learning_rate": 1.999879150537601e-05, "loss": 7.8774, "step": 5280 }, { "epoch": 0.01487706036036796, "grad_norm": 26.125, "learning_rate": 1.9998786922633854e-05, "loss": 8.0017, "step": 5290 }, { "epoch": 0.014905183347816672, "grad_norm": 24.125, "learning_rate": 1.999878233121954e-05, "loss": 7.0155, "step": 5300 }, { "epoch": 0.014933306335265382, "grad_norm": 29.625, "learning_rate": 1.9998777731133063e-05, "loss": 7.2782, "step": 5310 }, { "epoch": 0.014961429322714093, "grad_norm": 59.0, "learning_rate": 1.999877312237443e-05, "loss": 8.1134, "step": 5320 }, { "epoch": 0.014989552310162803, "grad_norm": 46.5, "learning_rate": 1.9998768504943643e-05, "loss": 8.6425, "step": 5330 }, { "epoch": 0.015017675297611515, "grad_norm": 34.0, "learning_rate": 1.9998763878840713e-05, "loss": 8.0684, "step": 5340 }, { "epoch": 0.015045798285060225, "grad_norm": 28.875, "learning_rate": 1.999875924406564e-05, "loss": 7.882, "step": 5350 }, { "epoch": 0.015073921272508936, "grad_norm": 40.25, "learning_rate": 1.9998754600618425e-05, "loss": 8.1564, "step": 5360 }, { "epoch": 0.015102044259957646, "grad_norm": 24.875, "learning_rate": 1.999874994849907e-05, "loss": 8.5689, "step": 5370 }, { "epoch": 0.015130167247406357, "grad_norm": 30.25, "learning_rate": 1.999874528770759e-05, "loss": 8.4312, "step": 5380 }, { "epoch": 0.015158290234855069, "grad_norm": 25.625, "learning_rate": 1.9998740618243983e-05, "loss": 8.3391, "step": 5390 }, { "epoch": 0.015186413222303779, "grad_norm": 24.875, "learning_rate": 1.9998735940108252e-05, "loss": 8.267, "step": 5400 }, { "epoch": 0.01521453620975249, "grad_norm": 28.125, "learning_rate": 1.9998731253300402e-05, "loss": 8.5453, "step": 5410 }, { "epoch": 0.0152426591972012, "grad_norm": 49.25, "learning_rate": 1.9998726557820432e-05, "loss": 7.5432, "step": 5420 }, { "epoch": 0.015270782184649912, "grad_norm": 34.0, "learning_rate": 1.9998721853668356e-05, "loss": 8.2142, "step": 5430 }, { "epoch": 0.015298905172098622, "grad_norm": 40.5, "learning_rate": 1.9998717140844173e-05, "loss": 8.1221, "step": 5440 }, { "epoch": 0.015327028159547333, "grad_norm": 23.375, "learning_rate": 1.9998712419347884e-05, "loss": 7.7446, "step": 5450 }, { "epoch": 0.015355151146996043, "grad_norm": 30.125, "learning_rate": 1.99987076891795e-05, "loss": 7.8589, "step": 5460 }, { "epoch": 0.015383274134444755, "grad_norm": 22.0, "learning_rate": 1.999870295033902e-05, "loss": 7.7178, "step": 5470 }, { "epoch": 0.015411397121893464, "grad_norm": 23.875, "learning_rate": 1.9998698202826444e-05, "loss": 7.9956, "step": 5480 }, { "epoch": 0.015439520109342176, "grad_norm": 39.0, "learning_rate": 1.9998693446641787e-05, "loss": 8.2401, "step": 5490 }, { "epoch": 0.015467643096790886, "grad_norm": 27.625, "learning_rate": 1.999868868178505e-05, "loss": 7.4811, "step": 5500 }, { "epoch": 0.015495766084239597, "grad_norm": 26.5, "learning_rate": 1.999868390825623e-05, "loss": 8.83, "step": 5510 }, { "epoch": 0.015523889071688307, "grad_norm": 25.625, "learning_rate": 1.9998679126055336e-05, "loss": 7.7178, "step": 5520 }, { "epoch": 0.015552012059137019, "grad_norm": 31.125, "learning_rate": 1.9998674335182373e-05, "loss": 8.4372, "step": 5530 }, { "epoch": 0.015580135046585728, "grad_norm": 21.5, "learning_rate": 1.9998669535637345e-05, "loss": 8.2238, "step": 5540 }, { "epoch": 0.01560825803403444, "grad_norm": 25.125, "learning_rate": 1.9998664727420257e-05, "loss": 7.3885, "step": 5550 }, { "epoch": 0.01563638102148315, "grad_norm": 31.125, "learning_rate": 1.9998659910531108e-05, "loss": 8.2133, "step": 5560 }, { "epoch": 0.01566450400893186, "grad_norm": 36.0, "learning_rate": 1.9998655084969906e-05, "loss": 8.048, "step": 5570 }, { "epoch": 0.015692626996380573, "grad_norm": 25.875, "learning_rate": 1.9998650250736655e-05, "loss": 8.9737, "step": 5580 }, { "epoch": 0.01572074998382928, "grad_norm": 31.0, "learning_rate": 1.999864540783136e-05, "loss": 8.1481, "step": 5590 }, { "epoch": 0.015748872971277993, "grad_norm": 35.75, "learning_rate": 1.9998640556254022e-05, "loss": 7.7248, "step": 5600 }, { "epoch": 0.015776995958726704, "grad_norm": 27.125, "learning_rate": 1.9998635696004653e-05, "loss": 8.5561, "step": 5610 }, { "epoch": 0.015805118946175416, "grad_norm": 43.0, "learning_rate": 1.999863082708325e-05, "loss": 8.6236, "step": 5620 }, { "epoch": 0.015833241933624124, "grad_norm": 28.75, "learning_rate": 1.9998625949489816e-05, "loss": 7.8709, "step": 5630 }, { "epoch": 0.015861364921072835, "grad_norm": 38.0, "learning_rate": 1.9998621063224358e-05, "loss": 7.9756, "step": 5640 }, { "epoch": 0.015889487908521547, "grad_norm": 22.25, "learning_rate": 1.9998616168286882e-05, "loss": 8.3834, "step": 5650 }, { "epoch": 0.01591761089597026, "grad_norm": 27.25, "learning_rate": 1.999861126467739e-05, "loss": 8.5246, "step": 5660 }, { "epoch": 0.015945733883418967, "grad_norm": 72.5, "learning_rate": 1.9998606352395886e-05, "loss": 7.8261, "step": 5670 }, { "epoch": 0.015973856870867678, "grad_norm": 21.125, "learning_rate": 1.999860143144238e-05, "loss": 7.8429, "step": 5680 }, { "epoch": 0.01600197985831639, "grad_norm": 33.25, "learning_rate": 1.9998596501816868e-05, "loss": 7.8281, "step": 5690 }, { "epoch": 0.0160301028457651, "grad_norm": 21.875, "learning_rate": 1.9998591563519355e-05, "loss": 8.1761, "step": 5700 }, { "epoch": 0.01605822583321381, "grad_norm": 21.75, "learning_rate": 1.999858661654985e-05, "loss": 7.5968, "step": 5710 }, { "epoch": 0.01608634882066252, "grad_norm": 38.75, "learning_rate": 1.9998581660908357e-05, "loss": 8.1945, "step": 5720 }, { "epoch": 0.016114471808111232, "grad_norm": 21.5, "learning_rate": 1.9998576696594874e-05, "loss": 8.4048, "step": 5730 }, { "epoch": 0.016142594795559944, "grad_norm": 25.75, "learning_rate": 1.9998571723609418e-05, "loss": 8.0875, "step": 5740 }, { "epoch": 0.016170717783008652, "grad_norm": 26.875, "learning_rate": 1.999856674195198e-05, "loss": 8.2936, "step": 5750 }, { "epoch": 0.016198840770457364, "grad_norm": 31.625, "learning_rate": 1.999856175162257e-05, "loss": 7.3004, "step": 5760 }, { "epoch": 0.016226963757906075, "grad_norm": 37.0, "learning_rate": 1.999855675262119e-05, "loss": 7.6768, "step": 5770 }, { "epoch": 0.016255086745354787, "grad_norm": 28.75, "learning_rate": 1.9998551744947847e-05, "loss": 7.3202, "step": 5780 }, { "epoch": 0.016283209732803495, "grad_norm": 24.75, "learning_rate": 1.9998546728602546e-05, "loss": 7.2178, "step": 5790 }, { "epoch": 0.016311332720252206, "grad_norm": 26.25, "learning_rate": 1.999854170358529e-05, "loss": 8.1874, "step": 5800 }, { "epoch": 0.016339455707700918, "grad_norm": 28.25, "learning_rate": 1.999853666989608e-05, "loss": 8.938, "step": 5810 }, { "epoch": 0.01636757869514963, "grad_norm": 34.5, "learning_rate": 1.999853162753493e-05, "loss": 8.9892, "step": 5820 }, { "epoch": 0.016395701682598338, "grad_norm": 30.375, "learning_rate": 1.999852657650183e-05, "loss": 8.0376, "step": 5830 }, { "epoch": 0.01642382467004705, "grad_norm": 23.125, "learning_rate": 1.99985215167968e-05, "loss": 8.7777, "step": 5840 }, { "epoch": 0.01645194765749576, "grad_norm": 22.5, "learning_rate": 1.999851644841983e-05, "loss": 8.1604, "step": 5850 }, { "epoch": 0.016480070644944472, "grad_norm": 27.0, "learning_rate": 1.9998511371370935e-05, "loss": 7.5964, "step": 5860 }, { "epoch": 0.01650819363239318, "grad_norm": 31.75, "learning_rate": 1.9998506285650116e-05, "loss": 8.5186, "step": 5870 }, { "epoch": 0.016536316619841892, "grad_norm": 45.0, "learning_rate": 1.9998501191257374e-05, "loss": 8.4827, "step": 5880 }, { "epoch": 0.016564439607290603, "grad_norm": 51.75, "learning_rate": 1.999849608819272e-05, "loss": 8.1855, "step": 5890 }, { "epoch": 0.016592562594739315, "grad_norm": 34.5, "learning_rate": 1.999849097645615e-05, "loss": 7.9797, "step": 5900 }, { "epoch": 0.016620685582188023, "grad_norm": 35.75, "learning_rate": 1.999848585604768e-05, "loss": 8.892, "step": 5910 }, { "epoch": 0.016648808569636735, "grad_norm": 25.75, "learning_rate": 1.99984807269673e-05, "loss": 8.8391, "step": 5920 }, { "epoch": 0.016676931557085446, "grad_norm": 21.625, "learning_rate": 1.9998475589215027e-05, "loss": 8.8296, "step": 5930 }, { "epoch": 0.016705054544534158, "grad_norm": 33.75, "learning_rate": 1.9998470442790857e-05, "loss": 8.1961, "step": 5940 }, { "epoch": 0.016733177531982866, "grad_norm": 25.875, "learning_rate": 1.99984652876948e-05, "loss": 8.8623, "step": 5950 }, { "epoch": 0.016761300519431577, "grad_norm": 41.5, "learning_rate": 1.9998460123926862e-05, "loss": 8.1579, "step": 5960 }, { "epoch": 0.01678942350688029, "grad_norm": 30.875, "learning_rate": 1.999845495148704e-05, "loss": 7.9915, "step": 5970 }, { "epoch": 0.016817546494329, "grad_norm": 36.75, "learning_rate": 1.9998449770375343e-05, "loss": 8.344, "step": 5980 }, { "epoch": 0.01684566948177771, "grad_norm": 33.25, "learning_rate": 1.9998444580591776e-05, "loss": 8.2671, "step": 5990 }, { "epoch": 0.01687379246922642, "grad_norm": 24.125, "learning_rate": 1.9998439382136344e-05, "loss": 8.0181, "step": 6000 }, { "epoch": 0.01690191545667513, "grad_norm": 39.5, "learning_rate": 1.9998434175009046e-05, "loss": 8.2912, "step": 6010 }, { "epoch": 0.016930038444123843, "grad_norm": 30.125, "learning_rate": 1.9998428959209893e-05, "loss": 8.5638, "step": 6020 }, { "epoch": 0.01695816143157255, "grad_norm": 44.75, "learning_rate": 1.9998423734738887e-05, "loss": 7.8268, "step": 6030 }, { "epoch": 0.016986284419021263, "grad_norm": 29.0, "learning_rate": 1.999841850159603e-05, "loss": 7.3282, "step": 6040 }, { "epoch": 0.017014407406469974, "grad_norm": 21.625, "learning_rate": 1.999841325978133e-05, "loss": 7.9607, "step": 6050 }, { "epoch": 0.017042530393918686, "grad_norm": 35.75, "learning_rate": 1.999840800929479e-05, "loss": 8.2691, "step": 6060 }, { "epoch": 0.017070653381367398, "grad_norm": 45.25, "learning_rate": 1.9998402750136417e-05, "loss": 7.5812, "step": 6070 }, { "epoch": 0.017098776368816106, "grad_norm": 27.125, "learning_rate": 1.9998397482306215e-05, "loss": 7.8885, "step": 6080 }, { "epoch": 0.017126899356264817, "grad_norm": 25.75, "learning_rate": 1.9998392205804184e-05, "loss": 8.3677, "step": 6090 }, { "epoch": 0.01715502234371353, "grad_norm": 40.5, "learning_rate": 1.9998386920630332e-05, "loss": 8.3508, "step": 6100 }, { "epoch": 0.01718314533116224, "grad_norm": 32.25, "learning_rate": 1.999838162678467e-05, "loss": 7.9316, "step": 6110 }, { "epoch": 0.01721126831861095, "grad_norm": 26.0, "learning_rate": 1.999837632426719e-05, "loss": 9.0578, "step": 6120 }, { "epoch": 0.01723939130605966, "grad_norm": 30.0, "learning_rate": 1.9998371013077903e-05, "loss": 8.3462, "step": 6130 }, { "epoch": 0.01726751429350837, "grad_norm": 23.25, "learning_rate": 1.999836569321681e-05, "loss": 8.1155, "step": 6140 }, { "epoch": 0.017295637280957083, "grad_norm": 37.5, "learning_rate": 1.9998360364683925e-05, "loss": 7.1628, "step": 6150 }, { "epoch": 0.01732376026840579, "grad_norm": 31.375, "learning_rate": 1.9998355027479245e-05, "loss": 8.3626, "step": 6160 }, { "epoch": 0.017351883255854503, "grad_norm": 28.625, "learning_rate": 1.9998349681602773e-05, "loss": 8.069, "step": 6170 }, { "epoch": 0.017380006243303214, "grad_norm": 41.75, "learning_rate": 1.999834432705452e-05, "loss": 8.4142, "step": 6180 }, { "epoch": 0.017408129230751926, "grad_norm": 33.5, "learning_rate": 1.9998338963834486e-05, "loss": 8.0817, "step": 6190 }, { "epoch": 0.017436252218200634, "grad_norm": 22.125, "learning_rate": 1.999833359194268e-05, "loss": 7.8722, "step": 6200 }, { "epoch": 0.017464375205649346, "grad_norm": 27.5, "learning_rate": 1.9998328211379103e-05, "loss": 7.586, "step": 6210 }, { "epoch": 0.017492498193098057, "grad_norm": 35.0, "learning_rate": 1.9998322822143756e-05, "loss": 8.5163, "step": 6220 }, { "epoch": 0.01752062118054677, "grad_norm": 27.375, "learning_rate": 1.999831742423665e-05, "loss": 8.1256, "step": 6230 }, { "epoch": 0.017548744167995477, "grad_norm": 40.25, "learning_rate": 1.999831201765779e-05, "loss": 7.4541, "step": 6240 }, { "epoch": 0.017576867155444188, "grad_norm": 30.5, "learning_rate": 1.9998306602407177e-05, "loss": 8.0544, "step": 6250 }, { "epoch": 0.0176049901428929, "grad_norm": 28.375, "learning_rate": 1.9998301178484815e-05, "loss": 8.1091, "step": 6260 }, { "epoch": 0.01763311313034161, "grad_norm": 20.875, "learning_rate": 1.9998295745890716e-05, "loss": 8.4898, "step": 6270 }, { "epoch": 0.01766123611779032, "grad_norm": 34.0, "learning_rate": 1.9998290304624873e-05, "loss": 8.0686, "step": 6280 }, { "epoch": 0.01768935910523903, "grad_norm": 30.125, "learning_rate": 1.9998284854687304e-05, "loss": 9.1744, "step": 6290 }, { "epoch": 0.017717482092687743, "grad_norm": 31.25, "learning_rate": 1.9998279396078e-05, "loss": 7.9447, "step": 6300 }, { "epoch": 0.017745605080136454, "grad_norm": 28.375, "learning_rate": 1.9998273928796977e-05, "loss": 7.4735, "step": 6310 }, { "epoch": 0.017773728067585162, "grad_norm": 29.125, "learning_rate": 1.9998268452844238e-05, "loss": 8.118, "step": 6320 }, { "epoch": 0.017801851055033874, "grad_norm": 27.25, "learning_rate": 1.9998262968219778e-05, "loss": 7.8616, "step": 6330 }, { "epoch": 0.017829974042482585, "grad_norm": 48.25, "learning_rate": 1.9998257474923615e-05, "loss": 8.1934, "step": 6340 }, { "epoch": 0.017858097029931297, "grad_norm": 27.875, "learning_rate": 1.9998251972955746e-05, "loss": 7.6326, "step": 6350 }, { "epoch": 0.017886220017380005, "grad_norm": 27.875, "learning_rate": 1.9998246462316177e-05, "loss": 7.7852, "step": 6360 }, { "epoch": 0.017914343004828717, "grad_norm": 33.0, "learning_rate": 1.9998240943004915e-05, "loss": 7.738, "step": 6370 }, { "epoch": 0.017942465992277428, "grad_norm": 51.75, "learning_rate": 1.9998235415021963e-05, "loss": 7.6015, "step": 6380 }, { "epoch": 0.01797058897972614, "grad_norm": 27.875, "learning_rate": 1.9998229878367322e-05, "loss": 8.0608, "step": 6390 }, { "epoch": 0.017998711967174848, "grad_norm": 42.0, "learning_rate": 1.9998224333041002e-05, "loss": 7.8947, "step": 6400 }, { "epoch": 0.01802683495462356, "grad_norm": 45.0, "learning_rate": 1.999821877904301e-05, "loss": 7.9311, "step": 6410 }, { "epoch": 0.01805495794207227, "grad_norm": 24.5, "learning_rate": 1.9998213216373347e-05, "loss": 8.3355, "step": 6420 }, { "epoch": 0.018083080929520982, "grad_norm": 35.75, "learning_rate": 1.9998207645032015e-05, "loss": 8.7402, "step": 6430 }, { "epoch": 0.01811120391696969, "grad_norm": 24.25, "learning_rate": 1.999820206501902e-05, "loss": 8.3129, "step": 6440 }, { "epoch": 0.018139326904418402, "grad_norm": 32.25, "learning_rate": 1.9998196476334372e-05, "loss": 7.8271, "step": 6450 }, { "epoch": 0.018167449891867114, "grad_norm": 45.75, "learning_rate": 1.9998190878978074e-05, "loss": 8.3484, "step": 6460 }, { "epoch": 0.018195572879315825, "grad_norm": 24.25, "learning_rate": 1.9998185272950126e-05, "loss": 8.5476, "step": 6470 }, { "epoch": 0.018223695866764533, "grad_norm": 24.0, "learning_rate": 1.999817965825054e-05, "loss": 8.7645, "step": 6480 }, { "epoch": 0.018251818854213245, "grad_norm": 30.625, "learning_rate": 1.9998174034879316e-05, "loss": 7.4244, "step": 6490 }, { "epoch": 0.018279941841661956, "grad_norm": 23.75, "learning_rate": 1.999816840283646e-05, "loss": 8.3478, "step": 6500 }, { "epoch": 0.018308064829110668, "grad_norm": 23.75, "learning_rate": 1.9998162762121977e-05, "loss": 7.7965, "step": 6510 }, { "epoch": 0.018336187816559376, "grad_norm": 21.0, "learning_rate": 1.999815711273587e-05, "loss": 8.0261, "step": 6520 }, { "epoch": 0.018364310804008088, "grad_norm": 40.75, "learning_rate": 1.999815145467815e-05, "loss": 7.5809, "step": 6530 }, { "epoch": 0.0183924337914568, "grad_norm": 22.5, "learning_rate": 1.9998145787948812e-05, "loss": 8.1214, "step": 6540 }, { "epoch": 0.01842055677890551, "grad_norm": 24.5, "learning_rate": 1.9998140112547873e-05, "loss": 7.7434, "step": 6550 }, { "epoch": 0.01844867976635422, "grad_norm": 33.0, "learning_rate": 1.9998134428475327e-05, "loss": 8.7913, "step": 6560 }, { "epoch": 0.01847680275380293, "grad_norm": 52.0, "learning_rate": 1.9998128735731182e-05, "loss": 8.5184, "step": 6570 }, { "epoch": 0.018504925741251642, "grad_norm": 33.75, "learning_rate": 1.999812303431545e-05, "loss": 8.0932, "step": 6580 }, { "epoch": 0.018533048728700353, "grad_norm": 40.75, "learning_rate": 1.9998117324228126e-05, "loss": 8.5118, "step": 6590 }, { "epoch": 0.01856117171614906, "grad_norm": 26.5, "learning_rate": 1.9998111605469218e-05, "loss": 8.0939, "step": 6600 }, { "epoch": 0.018589294703597773, "grad_norm": 41.0, "learning_rate": 1.999810587803874e-05, "loss": 8.1472, "step": 6610 }, { "epoch": 0.018617417691046485, "grad_norm": 20.875, "learning_rate": 1.999810014193668e-05, "loss": 8.2657, "step": 6620 }, { "epoch": 0.018645540678495196, "grad_norm": 39.5, "learning_rate": 1.9998094397163057e-05, "loss": 7.6033, "step": 6630 }, { "epoch": 0.018673663665943904, "grad_norm": 29.875, "learning_rate": 1.999808864371787e-05, "loss": 8.4298, "step": 6640 }, { "epoch": 0.018701786653392616, "grad_norm": 38.75, "learning_rate": 1.9998082881601126e-05, "loss": 8.4281, "step": 6650 }, { "epoch": 0.018729909640841327, "grad_norm": 23.875, "learning_rate": 1.999807711081283e-05, "loss": 8.0242, "step": 6660 }, { "epoch": 0.01875803262829004, "grad_norm": 22.75, "learning_rate": 1.9998071331352985e-05, "loss": 7.201, "step": 6670 }, { "epoch": 0.018786155615738747, "grad_norm": 19.75, "learning_rate": 1.9998065543221597e-05, "loss": 7.4634, "step": 6680 }, { "epoch": 0.01881427860318746, "grad_norm": 26.125, "learning_rate": 1.9998059746418673e-05, "loss": 7.6623, "step": 6690 }, { "epoch": 0.01884240159063617, "grad_norm": 45.0, "learning_rate": 1.9998053940944214e-05, "loss": 7.7956, "step": 6700 }, { "epoch": 0.01887052457808488, "grad_norm": 29.25, "learning_rate": 1.9998048126798225e-05, "loss": 7.3149, "step": 6710 }, { "epoch": 0.018898647565533593, "grad_norm": 39.0, "learning_rate": 1.999804230398072e-05, "loss": 7.3412, "step": 6720 }, { "epoch": 0.0189267705529823, "grad_norm": 35.5, "learning_rate": 1.999803647249169e-05, "loss": 8.4152, "step": 6730 }, { "epoch": 0.018954893540431013, "grad_norm": 39.25, "learning_rate": 1.9998030632331155e-05, "loss": 8.6922, "step": 6740 }, { "epoch": 0.018983016527879724, "grad_norm": 44.5, "learning_rate": 1.9998024783499106e-05, "loss": 7.5313, "step": 6750 }, { "epoch": 0.019011139515328436, "grad_norm": 44.0, "learning_rate": 1.9998018925995557e-05, "loss": 7.4298, "step": 6760 }, { "epoch": 0.019039262502777144, "grad_norm": 28.125, "learning_rate": 1.9998013059820512e-05, "loss": 7.4348, "step": 6770 }, { "epoch": 0.019067385490225856, "grad_norm": 32.0, "learning_rate": 1.9998007184973974e-05, "loss": 7.9927, "step": 6780 }, { "epoch": 0.019095508477674567, "grad_norm": 34.25, "learning_rate": 1.9998001301455946e-05, "loss": 7.9237, "step": 6790 }, { "epoch": 0.01912363146512328, "grad_norm": 33.25, "learning_rate": 1.999799540926644e-05, "loss": 7.6304, "step": 6800 }, { "epoch": 0.019151754452571987, "grad_norm": 31.5, "learning_rate": 1.9997989508405452e-05, "loss": 7.4487, "step": 6810 }, { "epoch": 0.0191798774400207, "grad_norm": 26.25, "learning_rate": 1.9997983598872996e-05, "loss": 8.1076, "step": 6820 }, { "epoch": 0.01920800042746941, "grad_norm": 34.25, "learning_rate": 1.999797768066907e-05, "loss": 8.3876, "step": 6830 }, { "epoch": 0.01923612341491812, "grad_norm": 25.125, "learning_rate": 1.999797175379369e-05, "loss": 7.6084, "step": 6840 }, { "epoch": 0.01926424640236683, "grad_norm": 35.5, "learning_rate": 1.9997965818246846e-05, "loss": 7.9332, "step": 6850 }, { "epoch": 0.01929236938981554, "grad_norm": 25.5, "learning_rate": 1.9997959874028553e-05, "loss": 8.2781, "step": 6860 }, { "epoch": 0.019320492377264253, "grad_norm": 37.5, "learning_rate": 1.999795392113881e-05, "loss": 7.4716, "step": 6870 }, { "epoch": 0.019348615364712964, "grad_norm": 30.625, "learning_rate": 1.9997947959577635e-05, "loss": 8.3292, "step": 6880 }, { "epoch": 0.019376738352161672, "grad_norm": 29.125, "learning_rate": 1.999794198934502e-05, "loss": 8.7477, "step": 6890 }, { "epoch": 0.019404861339610384, "grad_norm": 28.625, "learning_rate": 1.9997936010440974e-05, "loss": 8.3882, "step": 6900 }, { "epoch": 0.019432984327059095, "grad_norm": 24.875, "learning_rate": 1.99979300228655e-05, "loss": 7.798, "step": 6910 }, { "epoch": 0.019461107314507807, "grad_norm": 33.0, "learning_rate": 1.9997924026618606e-05, "loss": 8.0021, "step": 6920 }, { "epoch": 0.019489230301956515, "grad_norm": 25.25, "learning_rate": 1.9997918021700303e-05, "loss": 8.1944, "step": 6930 }, { "epoch": 0.019517353289405227, "grad_norm": 23.75, "learning_rate": 1.9997912008110584e-05, "loss": 7.5742, "step": 6940 }, { "epoch": 0.019545476276853938, "grad_norm": 28.125, "learning_rate": 1.999790598584946e-05, "loss": 8.1905, "step": 6950 }, { "epoch": 0.01957359926430265, "grad_norm": 26.375, "learning_rate": 1.9997899954916943e-05, "loss": 7.5328, "step": 6960 }, { "epoch": 0.019601722251751358, "grad_norm": 26.375, "learning_rate": 1.9997893915313023e-05, "loss": 7.9268, "step": 6970 }, { "epoch": 0.01962984523920007, "grad_norm": 28.625, "learning_rate": 1.9997887867037722e-05, "loss": 8.7345, "step": 6980 }, { "epoch": 0.01965796822664878, "grad_norm": 32.0, "learning_rate": 1.9997881810091033e-05, "loss": 7.8559, "step": 6990 }, { "epoch": 0.019686091214097493, "grad_norm": 32.75, "learning_rate": 1.9997875744472967e-05, "loss": 7.7247, "step": 7000 }, { "epoch": 0.0197142142015462, "grad_norm": 32.25, "learning_rate": 1.999786967018353e-05, "loss": 8.5207, "step": 7010 }, { "epoch": 0.019742337188994912, "grad_norm": 21.125, "learning_rate": 1.999786358722272e-05, "loss": 7.7105, "step": 7020 }, { "epoch": 0.019770460176443624, "grad_norm": 26.5, "learning_rate": 1.999785749559055e-05, "loss": 8.5356, "step": 7030 }, { "epoch": 0.019798583163892335, "grad_norm": 24.875, "learning_rate": 1.9997851395287023e-05, "loss": 7.246, "step": 7040 }, { "epoch": 0.019826706151341043, "grad_norm": 21.125, "learning_rate": 1.9997845286312142e-05, "loss": 7.8412, "step": 7050 }, { "epoch": 0.019854829138789755, "grad_norm": 23.0, "learning_rate": 1.9997839168665917e-05, "loss": 8.609, "step": 7060 }, { "epoch": 0.019882952126238466, "grad_norm": 26.375, "learning_rate": 1.9997833042348348e-05, "loss": 9.1348, "step": 7070 }, { "epoch": 0.019911075113687178, "grad_norm": 28.625, "learning_rate": 1.9997826907359445e-05, "loss": 8.5175, "step": 7080 }, { "epoch": 0.019939198101135886, "grad_norm": 28.5, "learning_rate": 1.9997820763699208e-05, "loss": 7.8184, "step": 7090 }, { "epoch": 0.019967321088584598, "grad_norm": 27.75, "learning_rate": 1.999781461136765e-05, "loss": 8.0497, "step": 7100 }, { "epoch": 0.01999544407603331, "grad_norm": 25.625, "learning_rate": 1.999780845036477e-05, "loss": 8.7238, "step": 7110 }, { "epoch": 0.02002356706348202, "grad_norm": 21.75, "learning_rate": 1.9997802280690573e-05, "loss": 8.3382, "step": 7120 }, { "epoch": 0.02005169005093073, "grad_norm": 31.125, "learning_rate": 1.999779610234507e-05, "loss": 7.534, "step": 7130 }, { "epoch": 0.02007981303837944, "grad_norm": 26.625, "learning_rate": 1.9997789915328262e-05, "loss": 8.1322, "step": 7140 }, { "epoch": 0.020107936025828152, "grad_norm": 27.625, "learning_rate": 1.9997783719640155e-05, "loss": 7.6091, "step": 7150 }, { "epoch": 0.020136059013276864, "grad_norm": 26.25, "learning_rate": 1.9997777515280754e-05, "loss": 8.5611, "step": 7160 }, { "epoch": 0.02016418200072557, "grad_norm": 24.5, "learning_rate": 1.9997771302250064e-05, "loss": 8.6052, "step": 7170 }, { "epoch": 0.020192304988174283, "grad_norm": 29.875, "learning_rate": 1.9997765080548094e-05, "loss": 8.4787, "step": 7180 }, { "epoch": 0.020220427975622995, "grad_norm": 24.75, "learning_rate": 1.9997758850174845e-05, "loss": 7.5623, "step": 7190 }, { "epoch": 0.020248550963071706, "grad_norm": 46.0, "learning_rate": 1.9997752611130323e-05, "loss": 8.9582, "step": 7200 }, { "epoch": 0.020276673950520414, "grad_norm": 58.25, "learning_rate": 1.999774636341454e-05, "loss": 8.7094, "step": 7210 }, { "epoch": 0.020304796937969126, "grad_norm": 20.375, "learning_rate": 1.999774010702749e-05, "loss": 7.7926, "step": 7220 }, { "epoch": 0.020332919925417837, "grad_norm": 28.0, "learning_rate": 1.9997733841969188e-05, "loss": 7.9416, "step": 7230 }, { "epoch": 0.02036104291286655, "grad_norm": 25.875, "learning_rate": 1.9997727568239632e-05, "loss": 7.8061, "step": 7240 }, { "epoch": 0.020389165900315257, "grad_norm": 19.375, "learning_rate": 1.9997721285838835e-05, "loss": 8.3291, "step": 7250 }, { "epoch": 0.02041728888776397, "grad_norm": 36.75, "learning_rate": 1.9997714994766798e-05, "loss": 7.8051, "step": 7260 }, { "epoch": 0.02044541187521268, "grad_norm": 40.5, "learning_rate": 1.999770869502353e-05, "loss": 7.5696, "step": 7270 }, { "epoch": 0.020473534862661392, "grad_norm": 22.25, "learning_rate": 1.9997702386609024e-05, "loss": 8.1361, "step": 7280 }, { "epoch": 0.0205016578501101, "grad_norm": 26.5, "learning_rate": 1.9997696069523305e-05, "loss": 8.2315, "step": 7290 }, { "epoch": 0.02052978083755881, "grad_norm": 35.75, "learning_rate": 1.9997689743766363e-05, "loss": 8.7212, "step": 7300 }, { "epoch": 0.020557903825007523, "grad_norm": 49.25, "learning_rate": 1.9997683409338213e-05, "loss": 8.2517, "step": 7310 }, { "epoch": 0.020586026812456235, "grad_norm": 33.0, "learning_rate": 1.9997677066238858e-05, "loss": 8.6049, "step": 7320 }, { "epoch": 0.020614149799904943, "grad_norm": 25.75, "learning_rate": 1.9997670714468298e-05, "loss": 7.8715, "step": 7330 }, { "epoch": 0.020642272787353654, "grad_norm": 21.0, "learning_rate": 1.9997664354026543e-05, "loss": 8.0771, "step": 7340 }, { "epoch": 0.020670395774802366, "grad_norm": 28.875, "learning_rate": 1.99976579849136e-05, "loss": 7.4014, "step": 7350 }, { "epoch": 0.020698518762251077, "grad_norm": 18.375, "learning_rate": 1.9997651607129472e-05, "loss": 7.5094, "step": 7360 }, { "epoch": 0.020726641749699785, "grad_norm": 25.25, "learning_rate": 1.9997645220674166e-05, "loss": 8.8834, "step": 7370 }, { "epoch": 0.020754764737148497, "grad_norm": 24.0, "learning_rate": 1.9997638825547686e-05, "loss": 8.7092, "step": 7380 }, { "epoch": 0.02078288772459721, "grad_norm": 35.75, "learning_rate": 1.9997632421750038e-05, "loss": 7.8499, "step": 7390 }, { "epoch": 0.02081101071204592, "grad_norm": 24.625, "learning_rate": 1.9997626009281226e-05, "loss": 7.8206, "step": 7400 }, { "epoch": 0.02083913369949463, "grad_norm": 23.875, "learning_rate": 1.999761958814126e-05, "loss": 7.5156, "step": 7410 }, { "epoch": 0.02086725668694334, "grad_norm": 37.0, "learning_rate": 1.9997613158330145e-05, "loss": 8.3361, "step": 7420 }, { "epoch": 0.02089537967439205, "grad_norm": 24.625, "learning_rate": 1.999760671984788e-05, "loss": 7.9261, "step": 7430 }, { "epoch": 0.020923502661840763, "grad_norm": 46.5, "learning_rate": 1.9997600272694478e-05, "loss": 7.7139, "step": 7440 }, { "epoch": 0.020951625649289474, "grad_norm": 21.875, "learning_rate": 1.999759381686994e-05, "loss": 8.4161, "step": 7450 }, { "epoch": 0.020979748636738182, "grad_norm": 24.0, "learning_rate": 1.9997587352374276e-05, "loss": 7.7741, "step": 7460 }, { "epoch": 0.021007871624186894, "grad_norm": 31.75, "learning_rate": 1.9997580879207485e-05, "loss": 8.3733, "step": 7470 }, { "epoch": 0.021035994611635606, "grad_norm": 27.375, "learning_rate": 1.9997574397369578e-05, "loss": 7.8929, "step": 7480 }, { "epoch": 0.021064117599084317, "grad_norm": 52.5, "learning_rate": 1.999756790686056e-05, "loss": 8.6431, "step": 7490 }, { "epoch": 0.021092240586533025, "grad_norm": 29.75, "learning_rate": 1.9997561407680437e-05, "loss": 7.8175, "step": 7500 }, { "epoch": 0.021120363573981737, "grad_norm": 28.875, "learning_rate": 1.9997554899829213e-05, "loss": 8.2436, "step": 7510 }, { "epoch": 0.02114848656143045, "grad_norm": 22.375, "learning_rate": 1.9997548383306892e-05, "loss": 7.2461, "step": 7520 }, { "epoch": 0.02117660954887916, "grad_norm": 30.125, "learning_rate": 1.9997541858113485e-05, "loss": 7.7224, "step": 7530 }, { "epoch": 0.021204732536327868, "grad_norm": 83.5, "learning_rate": 1.999753532424899e-05, "loss": 8.6864, "step": 7540 }, { "epoch": 0.02123285552377658, "grad_norm": 32.5, "learning_rate": 1.999752878171342e-05, "loss": 7.7229, "step": 7550 }, { "epoch": 0.02126097851122529, "grad_norm": 37.25, "learning_rate": 1.9997522230506776e-05, "loss": 7.869, "step": 7560 }, { "epoch": 0.021289101498674003, "grad_norm": 71.5, "learning_rate": 1.999751567062907e-05, "loss": 8.0629, "step": 7570 }, { "epoch": 0.02131722448612271, "grad_norm": 34.0, "learning_rate": 1.99975091020803e-05, "loss": 8.4305, "step": 7580 }, { "epoch": 0.021345347473571422, "grad_norm": 28.5, "learning_rate": 1.999750252486047e-05, "loss": 8.7654, "step": 7590 }, { "epoch": 0.021373470461020134, "grad_norm": 24.0, "learning_rate": 1.9997495938969598e-05, "loss": 8.2996, "step": 7600 }, { "epoch": 0.021401593448468845, "grad_norm": 25.0, "learning_rate": 1.999748934440768e-05, "loss": 8.3266, "step": 7610 }, { "epoch": 0.021429716435917553, "grad_norm": 37.0, "learning_rate": 1.9997482741174724e-05, "loss": 8.342, "step": 7620 }, { "epoch": 0.021457839423366265, "grad_norm": 34.75, "learning_rate": 1.9997476129270733e-05, "loss": 8.2805, "step": 7630 }, { "epoch": 0.021485962410814977, "grad_norm": 21.875, "learning_rate": 1.9997469508695718e-05, "loss": 7.6063, "step": 7640 }, { "epoch": 0.021514085398263688, "grad_norm": 21.5, "learning_rate": 1.999746287944968e-05, "loss": 7.3674, "step": 7650 }, { "epoch": 0.021542208385712396, "grad_norm": 35.75, "learning_rate": 1.999745624153263e-05, "loss": 8.2359, "step": 7660 }, { "epoch": 0.021570331373161108, "grad_norm": 35.75, "learning_rate": 1.9997449594944568e-05, "loss": 8.8457, "step": 7670 }, { "epoch": 0.02159845436060982, "grad_norm": 24.5, "learning_rate": 1.99974429396855e-05, "loss": 8.3223, "step": 7680 }, { "epoch": 0.02162657734805853, "grad_norm": 36.5, "learning_rate": 1.9997436275755443e-05, "loss": 7.7577, "step": 7690 }, { "epoch": 0.02165470033550724, "grad_norm": 30.25, "learning_rate": 1.999742960315439e-05, "loss": 7.8929, "step": 7700 }, { "epoch": 0.02168282332295595, "grad_norm": 25.375, "learning_rate": 1.9997422921882348e-05, "loss": 7.8984, "step": 7710 }, { "epoch": 0.021710946310404662, "grad_norm": 32.0, "learning_rate": 1.9997416231939325e-05, "loss": 7.9924, "step": 7720 }, { "epoch": 0.021739069297853374, "grad_norm": 41.25, "learning_rate": 1.999740953332533e-05, "loss": 8.5479, "step": 7730 }, { "epoch": 0.021767192285302082, "grad_norm": 27.625, "learning_rate": 1.9997402826040365e-05, "loss": 8.4177, "step": 7740 }, { "epoch": 0.021795315272750793, "grad_norm": 74.0, "learning_rate": 1.999739611008444e-05, "loss": 7.9194, "step": 7750 }, { "epoch": 0.021823438260199505, "grad_norm": 27.75, "learning_rate": 1.9997389385457555e-05, "loss": 8.6962, "step": 7760 }, { "epoch": 0.021851561247648216, "grad_norm": 31.875, "learning_rate": 1.999738265215972e-05, "loss": 8.0286, "step": 7770 }, { "epoch": 0.021879684235096925, "grad_norm": 23.625, "learning_rate": 1.9997375910190937e-05, "loss": 8.4991, "step": 7780 }, { "epoch": 0.021907807222545636, "grad_norm": 26.25, "learning_rate": 1.9997369159551217e-05, "loss": 7.4188, "step": 7790 }, { "epoch": 0.021935930209994348, "grad_norm": 25.875, "learning_rate": 1.999736240024056e-05, "loss": 7.9129, "step": 7800 }, { "epoch": 0.02196405319744306, "grad_norm": 26.25, "learning_rate": 1.9997355632258978e-05, "loss": 8.6951, "step": 7810 }, { "epoch": 0.021992176184891767, "grad_norm": 38.0, "learning_rate": 1.9997348855606473e-05, "loss": 8.5205, "step": 7820 }, { "epoch": 0.02202029917234048, "grad_norm": 33.25, "learning_rate": 1.999734207028305e-05, "loss": 8.2526, "step": 7830 }, { "epoch": 0.02204842215978919, "grad_norm": 44.75, "learning_rate": 1.9997335276288722e-05, "loss": 7.9077, "step": 7840 }, { "epoch": 0.022076545147237902, "grad_norm": 24.5, "learning_rate": 1.9997328473623486e-05, "loss": 7.8463, "step": 7850 }, { "epoch": 0.02210466813468661, "grad_norm": 20.75, "learning_rate": 1.999732166228735e-05, "loss": 7.5722, "step": 7860 }, { "epoch": 0.02213279112213532, "grad_norm": 30.0, "learning_rate": 1.999731484228032e-05, "loss": 7.8458, "step": 7870 }, { "epoch": 0.022160914109584033, "grad_norm": 23.875, "learning_rate": 1.999730801360241e-05, "loss": 8.0548, "step": 7880 }, { "epoch": 0.022189037097032745, "grad_norm": 31.75, "learning_rate": 1.9997301176253616e-05, "loss": 8.1572, "step": 7890 }, { "epoch": 0.022217160084481453, "grad_norm": 33.5, "learning_rate": 1.9997294330233944e-05, "loss": 7.3252, "step": 7900 }, { "epoch": 0.022245283071930164, "grad_norm": 25.125, "learning_rate": 1.999728747554341e-05, "loss": 8.5656, "step": 7910 }, { "epoch": 0.022273406059378876, "grad_norm": 35.25, "learning_rate": 1.9997280612182005e-05, "loss": 8.3027, "step": 7920 }, { "epoch": 0.022301529046827587, "grad_norm": 21.5, "learning_rate": 1.9997273740149748e-05, "loss": 8.2364, "step": 7930 }, { "epoch": 0.022329652034276296, "grad_norm": 24.625, "learning_rate": 1.9997266859446638e-05, "loss": 7.4403, "step": 7940 }, { "epoch": 0.022357775021725007, "grad_norm": 27.375, "learning_rate": 1.9997259970072686e-05, "loss": 7.8787, "step": 7950 }, { "epoch": 0.02238589800917372, "grad_norm": 22.5, "learning_rate": 1.999725307202789e-05, "loss": 7.6426, "step": 7960 }, { "epoch": 0.02241402099662243, "grad_norm": 56.0, "learning_rate": 1.9997246165312264e-05, "loss": 7.9893, "step": 7970 }, { "epoch": 0.02244214398407114, "grad_norm": 29.25, "learning_rate": 1.999723924992581e-05, "loss": 8.1582, "step": 7980 }, { "epoch": 0.02247026697151985, "grad_norm": 35.0, "learning_rate": 1.9997232325868537e-05, "loss": 8.1756, "step": 7990 }, { "epoch": 0.02249838995896856, "grad_norm": 44.0, "learning_rate": 1.9997225393140446e-05, "loss": 8.1742, "step": 8000 }, { "epoch": 0.022526512946417273, "grad_norm": 24.875, "learning_rate": 1.9997218451741548e-05, "loss": 8.038, "step": 8010 }, { "epoch": 0.02255463593386598, "grad_norm": 26.375, "learning_rate": 1.9997211501671844e-05, "loss": 8.2978, "step": 8020 }, { "epoch": 0.022582758921314693, "grad_norm": 35.75, "learning_rate": 1.9997204542931344e-05, "loss": 8.0201, "step": 8030 }, { "epoch": 0.022610881908763404, "grad_norm": 23.125, "learning_rate": 1.9997197575520057e-05, "loss": 8.416, "step": 8040 }, { "epoch": 0.022639004896212116, "grad_norm": 36.25, "learning_rate": 1.9997190599437978e-05, "loss": 8.2767, "step": 8050 }, { "epoch": 0.022667127883660824, "grad_norm": 45.0, "learning_rate": 1.9997183614685124e-05, "loss": 8.5677, "step": 8060 }, { "epoch": 0.022695250871109535, "grad_norm": 47.25, "learning_rate": 1.99971766212615e-05, "loss": 8.3419, "step": 8070 }, { "epoch": 0.022723373858558247, "grad_norm": 26.0, "learning_rate": 1.9997169619167102e-05, "loss": 8.1733, "step": 8080 }, { "epoch": 0.02275149684600696, "grad_norm": 31.125, "learning_rate": 1.999716260840195e-05, "loss": 8.9879, "step": 8090 }, { "epoch": 0.02277961983345567, "grad_norm": 46.5, "learning_rate": 1.999715558896604e-05, "loss": 7.4397, "step": 8100 }, { "epoch": 0.022807742820904378, "grad_norm": 32.5, "learning_rate": 1.9997148560859383e-05, "loss": 8.1972, "step": 8110 }, { "epoch": 0.02283586580835309, "grad_norm": 42.0, "learning_rate": 1.9997141524081982e-05, "loss": 7.7304, "step": 8120 }, { "epoch": 0.0228639887958018, "grad_norm": 25.125, "learning_rate": 1.9997134478633848e-05, "loss": 8.4608, "step": 8130 }, { "epoch": 0.022892111783250513, "grad_norm": 30.5, "learning_rate": 1.999712742451498e-05, "loss": 7.5254, "step": 8140 }, { "epoch": 0.02292023477069922, "grad_norm": 24.75, "learning_rate": 1.9997120361725386e-05, "loss": 7.3108, "step": 8150 }, { "epoch": 0.022948357758147932, "grad_norm": 39.0, "learning_rate": 1.9997113290265076e-05, "loss": 8.335, "step": 8160 }, { "epoch": 0.022976480745596644, "grad_norm": 33.0, "learning_rate": 1.9997106210134056e-05, "loss": 8.6027, "step": 8170 }, { "epoch": 0.023004603733045356, "grad_norm": 42.5, "learning_rate": 1.999709912133233e-05, "loss": 7.6828, "step": 8180 }, { "epoch": 0.023032726720494064, "grad_norm": 22.125, "learning_rate": 1.9997092023859902e-05, "loss": 8.1195, "step": 8190 }, { "epoch": 0.023060849707942775, "grad_norm": 24.25, "learning_rate": 1.999708491771678e-05, "loss": 7.8631, "step": 8200 }, { "epoch": 0.023088972695391487, "grad_norm": 24.875, "learning_rate": 1.9997077802902977e-05, "loss": 8.5451, "step": 8210 }, { "epoch": 0.0231170956828402, "grad_norm": 29.625, "learning_rate": 1.9997070679418486e-05, "loss": 7.6865, "step": 8220 }, { "epoch": 0.023145218670288906, "grad_norm": 26.875, "learning_rate": 1.9997063547263326e-05, "loss": 8.3994, "step": 8230 }, { "epoch": 0.023173341657737618, "grad_norm": 41.75, "learning_rate": 1.9997056406437492e-05, "loss": 7.2586, "step": 8240 }, { "epoch": 0.02320146464518633, "grad_norm": 30.875, "learning_rate": 1.9997049256940996e-05, "loss": 7.5855, "step": 8250 }, { "epoch": 0.02322958763263504, "grad_norm": 35.25, "learning_rate": 1.999704209877385e-05, "loss": 8.7402, "step": 8260 }, { "epoch": 0.02325771062008375, "grad_norm": 26.625, "learning_rate": 1.9997034931936045e-05, "loss": 7.9202, "step": 8270 }, { "epoch": 0.02328583360753246, "grad_norm": 40.25, "learning_rate": 1.99970277564276e-05, "loss": 7.8678, "step": 8280 }, { "epoch": 0.023313956594981172, "grad_norm": 24.75, "learning_rate": 1.9997020572248517e-05, "loss": 7.8925, "step": 8290 }, { "epoch": 0.023342079582429884, "grad_norm": 30.5, "learning_rate": 1.9997013379398804e-05, "loss": 7.8366, "step": 8300 }, { "epoch": 0.023370202569878592, "grad_norm": 29.5, "learning_rate": 1.9997006177878465e-05, "loss": 7.7181, "step": 8310 }, { "epoch": 0.023398325557327303, "grad_norm": 35.0, "learning_rate": 1.9996998967687508e-05, "loss": 8.234, "step": 8320 }, { "epoch": 0.023426448544776015, "grad_norm": 35.75, "learning_rate": 1.9996991748825935e-05, "loss": 7.6394, "step": 8330 }, { "epoch": 0.023454571532224727, "grad_norm": 41.0, "learning_rate": 1.9996984521293757e-05, "loss": 7.6562, "step": 8340 }, { "epoch": 0.023482694519673435, "grad_norm": 28.5, "learning_rate": 1.999697728509098e-05, "loss": 7.9189, "step": 8350 }, { "epoch": 0.023510817507122146, "grad_norm": 41.5, "learning_rate": 1.999697004021761e-05, "loss": 7.5574, "step": 8360 }, { "epoch": 0.023538940494570858, "grad_norm": 23.5, "learning_rate": 1.999696278667365e-05, "loss": 7.9631, "step": 8370 }, { "epoch": 0.02356706348201957, "grad_norm": 25.5, "learning_rate": 1.999695552445911e-05, "loss": 7.7609, "step": 8380 }, { "epoch": 0.023595186469468277, "grad_norm": 40.25, "learning_rate": 1.9996948253573993e-05, "loss": 8.5702, "step": 8390 }, { "epoch": 0.02362330945691699, "grad_norm": 19.75, "learning_rate": 1.9996940974018308e-05, "loss": 8.0607, "step": 8400 }, { "epoch": 0.0236514324443657, "grad_norm": 30.0, "learning_rate": 1.9996933685792062e-05, "loss": 8.068, "step": 8410 }, { "epoch": 0.023679555431814412, "grad_norm": 28.375, "learning_rate": 1.9996926388895258e-05, "loss": 8.7895, "step": 8420 }, { "epoch": 0.02370767841926312, "grad_norm": 36.5, "learning_rate": 1.9996919083327907e-05, "loss": 7.1192, "step": 8430 }, { "epoch": 0.02373580140671183, "grad_norm": 21.125, "learning_rate": 1.9996911769090007e-05, "loss": 8.7498, "step": 8440 }, { "epoch": 0.023763924394160543, "grad_norm": 20.625, "learning_rate": 1.9996904446181577e-05, "loss": 8.61, "step": 8450 }, { "epoch": 0.023792047381609255, "grad_norm": 36.0, "learning_rate": 1.9996897114602613e-05, "loss": 8.8897, "step": 8460 }, { "epoch": 0.023820170369057963, "grad_norm": 32.25, "learning_rate": 1.9996889774353124e-05, "loss": 8.3375, "step": 8470 }, { "epoch": 0.023848293356506674, "grad_norm": 30.5, "learning_rate": 1.999688242543312e-05, "loss": 8.7113, "step": 8480 }, { "epoch": 0.023876416343955386, "grad_norm": 32.5, "learning_rate": 1.99968750678426e-05, "loss": 8.244, "step": 8490 }, { "epoch": 0.023904539331404098, "grad_norm": 25.875, "learning_rate": 1.9996867701581576e-05, "loss": 8.7278, "step": 8500 }, { "epoch": 0.023932662318852806, "grad_norm": 36.25, "learning_rate": 1.9996860326650052e-05, "loss": 8.672, "step": 8510 }, { "epoch": 0.023960785306301517, "grad_norm": 28.875, "learning_rate": 1.999685294304804e-05, "loss": 7.6934, "step": 8520 }, { "epoch": 0.02398890829375023, "grad_norm": 28.25, "learning_rate": 1.9996845550775538e-05, "loss": 7.9508, "step": 8530 }, { "epoch": 0.02401703128119894, "grad_norm": 32.25, "learning_rate": 1.9996838149832558e-05, "loss": 8.157, "step": 8540 }, { "epoch": 0.02404515426864765, "grad_norm": 46.25, "learning_rate": 1.99968307402191e-05, "loss": 8.2076, "step": 8550 }, { "epoch": 0.02407327725609636, "grad_norm": 28.375, "learning_rate": 1.999682332193518e-05, "loss": 7.9685, "step": 8560 }, { "epoch": 0.02410140024354507, "grad_norm": 54.25, "learning_rate": 1.99968158949808e-05, "loss": 7.7684, "step": 8570 }, { "epoch": 0.024129523230993783, "grad_norm": 29.375, "learning_rate": 1.9996808459355966e-05, "loss": 7.0636, "step": 8580 }, { "epoch": 0.02415764621844249, "grad_norm": 29.75, "learning_rate": 1.9996801015060684e-05, "loss": 7.6273, "step": 8590 }, { "epoch": 0.024185769205891203, "grad_norm": 22.5, "learning_rate": 1.999679356209496e-05, "loss": 7.8155, "step": 8600 }, { "epoch": 0.024213892193339914, "grad_norm": 34.25, "learning_rate": 1.9996786100458802e-05, "loss": 7.5556, "step": 8610 }, { "epoch": 0.024242015180788626, "grad_norm": 65.0, "learning_rate": 1.9996778630152216e-05, "loss": 8.121, "step": 8620 }, { "epoch": 0.024270138168237334, "grad_norm": 27.375, "learning_rate": 1.9996771151175207e-05, "loss": 8.1539, "step": 8630 }, { "epoch": 0.024298261155686045, "grad_norm": 30.375, "learning_rate": 1.9996763663527786e-05, "loss": 8.2234, "step": 8640 }, { "epoch": 0.024326384143134757, "grad_norm": 43.75, "learning_rate": 1.9996756167209953e-05, "loss": 7.5178, "step": 8650 }, { "epoch": 0.02435450713058347, "grad_norm": 23.875, "learning_rate": 1.9996748662221718e-05, "loss": 7.7804, "step": 8660 }, { "epoch": 0.024382630118032177, "grad_norm": 34.75, "learning_rate": 1.999674114856309e-05, "loss": 7.6625, "step": 8670 }, { "epoch": 0.024410753105480888, "grad_norm": 33.75, "learning_rate": 1.9996733626234072e-05, "loss": 7.7876, "step": 8680 }, { "epoch": 0.0244388760929296, "grad_norm": 25.125, "learning_rate": 1.9996726095234668e-05, "loss": 7.5176, "step": 8690 }, { "epoch": 0.02446699908037831, "grad_norm": 35.5, "learning_rate": 1.9996718555564892e-05, "loss": 7.7748, "step": 8700 }, { "epoch": 0.02449512206782702, "grad_norm": 36.75, "learning_rate": 1.999671100722475e-05, "loss": 7.4708, "step": 8710 }, { "epoch": 0.02452324505527573, "grad_norm": 36.25, "learning_rate": 1.9996703450214237e-05, "loss": 7.8013, "step": 8720 }, { "epoch": 0.024551368042724443, "grad_norm": 44.5, "learning_rate": 1.999669588453337e-05, "loss": 8.7622, "step": 8730 }, { "epoch": 0.024579491030173154, "grad_norm": 47.5, "learning_rate": 1.9996688310182156e-05, "loss": 8.0576, "step": 8740 }, { "epoch": 0.024607614017621866, "grad_norm": 24.125, "learning_rate": 1.9996680727160594e-05, "loss": 8.4018, "step": 8750 }, { "epoch": 0.024635737005070574, "grad_norm": 24.75, "learning_rate": 1.9996673135468697e-05, "loss": 7.7353, "step": 8760 }, { "epoch": 0.024663859992519285, "grad_norm": 23.0, "learning_rate": 1.9996665535106473e-05, "loss": 8.1296, "step": 8770 }, { "epoch": 0.024691982979967997, "grad_norm": 33.5, "learning_rate": 1.9996657926073922e-05, "loss": 8.8128, "step": 8780 }, { "epoch": 0.02472010596741671, "grad_norm": 42.0, "learning_rate": 1.9996650308371056e-05, "loss": 8.0906, "step": 8790 }, { "epoch": 0.024748228954865416, "grad_norm": 41.5, "learning_rate": 1.999664268199788e-05, "loss": 7.3259, "step": 8800 }, { "epoch": 0.024776351942314128, "grad_norm": 27.25, "learning_rate": 1.9996635046954397e-05, "loss": 8.8595, "step": 8810 }, { "epoch": 0.02480447492976284, "grad_norm": 20.25, "learning_rate": 1.999662740324062e-05, "loss": 7.5234, "step": 8820 }, { "epoch": 0.02483259791721155, "grad_norm": 21.375, "learning_rate": 1.999661975085655e-05, "loss": 8.2014, "step": 8830 }, { "epoch": 0.02486072090466026, "grad_norm": 57.25, "learning_rate": 1.99966120898022e-05, "loss": 8.3155, "step": 8840 }, { "epoch": 0.02488884389210897, "grad_norm": 37.75, "learning_rate": 1.9996604420077573e-05, "loss": 7.9681, "step": 8850 }, { "epoch": 0.024916966879557682, "grad_norm": 35.5, "learning_rate": 1.999659674168267e-05, "loss": 8.3407, "step": 8860 }, { "epoch": 0.024945089867006394, "grad_norm": 28.75, "learning_rate": 1.999658905461751e-05, "loss": 7.9579, "step": 8870 }, { "epoch": 0.024973212854455102, "grad_norm": 22.125, "learning_rate": 1.9996581358882085e-05, "loss": 7.7966, "step": 8880 }, { "epoch": 0.025001335841903814, "grad_norm": 56.0, "learning_rate": 1.9996573654476416e-05, "loss": 7.926, "step": 8890 }, { "epoch": 0.025029458829352525, "grad_norm": 25.25, "learning_rate": 1.99965659414005e-05, "loss": 8.0135, "step": 8900 }, { "epoch": 0.025057581816801237, "grad_norm": 23.75, "learning_rate": 1.999655821965435e-05, "loss": 7.7444, "step": 8910 }, { "epoch": 0.025085704804249945, "grad_norm": 24.125, "learning_rate": 1.9996550489237964e-05, "loss": 7.7179, "step": 8920 }, { "epoch": 0.025113827791698656, "grad_norm": 26.375, "learning_rate": 1.9996542750151357e-05, "loss": 7.9628, "step": 8930 }, { "epoch": 0.025141950779147368, "grad_norm": 40.0, "learning_rate": 1.9996535002394535e-05, "loss": 8.035, "step": 8940 }, { "epoch": 0.02517007376659608, "grad_norm": 18.25, "learning_rate": 1.99965272459675e-05, "loss": 8.2915, "step": 8950 }, { "epoch": 0.025198196754044788, "grad_norm": 49.5, "learning_rate": 1.9996519480870263e-05, "loss": 8.2397, "step": 8960 }, { "epoch": 0.0252263197414935, "grad_norm": 37.75, "learning_rate": 1.9996511707102827e-05, "loss": 8.1704, "step": 8970 }, { "epoch": 0.02525444272894221, "grad_norm": 34.75, "learning_rate": 1.9996503924665203e-05, "loss": 7.9778, "step": 8980 }, { "epoch": 0.025282565716390922, "grad_norm": 46.75, "learning_rate": 1.9996496133557394e-05, "loss": 7.8852, "step": 8990 }, { "epoch": 0.02531068870383963, "grad_norm": 41.75, "learning_rate": 1.9996488333779412e-05, "loss": 8.014, "step": 9000 }, { "epoch": 0.025338811691288342, "grad_norm": 45.0, "learning_rate": 1.9996480525331256e-05, "loss": 8.242, "step": 9010 }, { "epoch": 0.025366934678737053, "grad_norm": 58.5, "learning_rate": 1.999647270821294e-05, "loss": 8.2374, "step": 9020 }, { "epoch": 0.025395057666185765, "grad_norm": 37.0, "learning_rate": 1.9996464882424463e-05, "loss": 7.7369, "step": 9030 }, { "epoch": 0.025423180653634473, "grad_norm": 17.375, "learning_rate": 1.999645704796584e-05, "loss": 8.3109, "step": 9040 }, { "epoch": 0.025451303641083185, "grad_norm": 20.75, "learning_rate": 1.9996449204837078e-05, "loss": 7.214, "step": 9050 }, { "epoch": 0.025479426628531896, "grad_norm": 36.25, "learning_rate": 1.999644135303817e-05, "loss": 8.0174, "step": 9060 }, { "epoch": 0.025507549615980608, "grad_norm": 41.5, "learning_rate": 1.999643349256914e-05, "loss": 8.0048, "step": 9070 }, { "epoch": 0.025535672603429316, "grad_norm": 58.75, "learning_rate": 1.9996425623429987e-05, "loss": 8.5075, "step": 9080 }, { "epoch": 0.025563795590878027, "grad_norm": 27.75, "learning_rate": 1.999641774562072e-05, "loss": 8.4168, "step": 9090 }, { "epoch": 0.02559191857832674, "grad_norm": 28.125, "learning_rate": 1.9996409859141343e-05, "loss": 7.6584, "step": 9100 }, { "epoch": 0.02562004156577545, "grad_norm": 23.25, "learning_rate": 1.9996401963991863e-05, "loss": 7.0018, "step": 9110 }, { "epoch": 0.02564816455322416, "grad_norm": 37.25, "learning_rate": 1.999639406017229e-05, "loss": 7.9149, "step": 9120 }, { "epoch": 0.02567628754067287, "grad_norm": 30.75, "learning_rate": 1.9996386147682626e-05, "loss": 8.3805, "step": 9130 }, { "epoch": 0.02570441052812158, "grad_norm": 30.875, "learning_rate": 1.9996378226522882e-05, "loss": 8.4078, "step": 9140 }, { "epoch": 0.025732533515570293, "grad_norm": 39.25, "learning_rate": 1.9996370296693065e-05, "loss": 8.5351, "step": 9150 }, { "epoch": 0.025760656503019, "grad_norm": 27.75, "learning_rate": 1.9996362358193177e-05, "loss": 7.8481, "step": 9160 }, { "epoch": 0.025788779490467713, "grad_norm": 32.75, "learning_rate": 1.9996354411023234e-05, "loss": 8.544, "step": 9170 }, { "epoch": 0.025816902477916424, "grad_norm": 28.875, "learning_rate": 1.9996346455183233e-05, "loss": 8.299, "step": 9180 }, { "epoch": 0.025845025465365136, "grad_norm": 21.625, "learning_rate": 1.9996338490673187e-05, "loss": 7.5148, "step": 9190 }, { "epoch": 0.025873148452813844, "grad_norm": 20.0, "learning_rate": 1.99963305174931e-05, "loss": 8.2518, "step": 9200 }, { "epoch": 0.025901271440262556, "grad_norm": 36.0, "learning_rate": 1.9996322535642982e-05, "loss": 8.2257, "step": 9210 }, { "epoch": 0.025929394427711267, "grad_norm": 53.0, "learning_rate": 1.9996314545122834e-05, "loss": 7.8572, "step": 9220 }, { "epoch": 0.02595751741515998, "grad_norm": 25.25, "learning_rate": 1.999630654593267e-05, "loss": 7.4187, "step": 9230 }, { "epoch": 0.025985640402608687, "grad_norm": 32.5, "learning_rate": 1.9996298538072497e-05, "loss": 8.5596, "step": 9240 }, { "epoch": 0.0260137633900574, "grad_norm": 26.5, "learning_rate": 1.9996290521542315e-05, "loss": 7.6479, "step": 9250 }, { "epoch": 0.02604188637750611, "grad_norm": 31.625, "learning_rate": 1.9996282496342135e-05, "loss": 7.8732, "step": 9260 }, { "epoch": 0.02607000936495482, "grad_norm": 30.5, "learning_rate": 1.9996274462471962e-05, "loss": 8.1521, "step": 9270 }, { "epoch": 0.02609813235240353, "grad_norm": 28.25, "learning_rate": 1.9996266419931807e-05, "loss": 7.7561, "step": 9280 }, { "epoch": 0.02612625533985224, "grad_norm": 23.125, "learning_rate": 1.9996258368721674e-05, "loss": 7.6299, "step": 9290 }, { "epoch": 0.026154378327300953, "grad_norm": 35.75, "learning_rate": 1.999625030884157e-05, "loss": 7.7965, "step": 9300 }, { "epoch": 0.026182501314749664, "grad_norm": 35.75, "learning_rate": 1.9996242240291502e-05, "loss": 8.018, "step": 9310 }, { "epoch": 0.026210624302198372, "grad_norm": 32.5, "learning_rate": 1.9996234163071477e-05, "loss": 7.7824, "step": 9320 }, { "epoch": 0.026238747289647084, "grad_norm": 55.75, "learning_rate": 1.9996226077181508e-05, "loss": 8.4256, "step": 9330 }, { "epoch": 0.026266870277095795, "grad_norm": 27.625, "learning_rate": 1.999621798262159e-05, "loss": 7.7526, "step": 9340 }, { "epoch": 0.026294993264544507, "grad_norm": 37.75, "learning_rate": 1.999620987939174e-05, "loss": 8.1458, "step": 9350 }, { "epoch": 0.026323116251993215, "grad_norm": 37.5, "learning_rate": 1.9996201767491962e-05, "loss": 7.4293, "step": 9360 }, { "epoch": 0.026351239239441927, "grad_norm": 29.375, "learning_rate": 1.9996193646922258e-05, "loss": 7.9999, "step": 9370 }, { "epoch": 0.026379362226890638, "grad_norm": 23.125, "learning_rate": 1.9996185517682643e-05, "loss": 8.0223, "step": 9380 }, { "epoch": 0.02640748521433935, "grad_norm": 27.125, "learning_rate": 1.9996177379773125e-05, "loss": 8.8174, "step": 9390 }, { "epoch": 0.026435608201788058, "grad_norm": 31.625, "learning_rate": 1.9996169233193702e-05, "loss": 7.409, "step": 9400 }, { "epoch": 0.02646373118923677, "grad_norm": 55.25, "learning_rate": 1.999616107794439e-05, "loss": 8.1505, "step": 9410 }, { "epoch": 0.02649185417668548, "grad_norm": 62.5, "learning_rate": 1.9996152914025185e-05, "loss": 7.9507, "step": 9420 }, { "epoch": 0.026519977164134192, "grad_norm": 20.75, "learning_rate": 1.9996144741436104e-05, "loss": 7.954, "step": 9430 }, { "epoch": 0.026548100151582904, "grad_norm": 37.75, "learning_rate": 1.9996136560177154e-05, "loss": 7.9875, "step": 9440 }, { "epoch": 0.026576223139031612, "grad_norm": 48.5, "learning_rate": 1.999612837024834e-05, "loss": 7.7239, "step": 9450 }, { "epoch": 0.026604346126480324, "grad_norm": 42.5, "learning_rate": 1.9996120171649666e-05, "loss": 8.4069, "step": 9460 }, { "epoch": 0.026632469113929035, "grad_norm": 29.125, "learning_rate": 1.999611196438114e-05, "loss": 8.0084, "step": 9470 }, { "epoch": 0.026660592101377747, "grad_norm": 24.125, "learning_rate": 1.9996103748442772e-05, "loss": 8.2494, "step": 9480 }, { "epoch": 0.026688715088826455, "grad_norm": 37.75, "learning_rate": 1.999609552383457e-05, "loss": 8.019, "step": 9490 }, { "epoch": 0.026716838076275166, "grad_norm": 40.0, "learning_rate": 1.9996087290556534e-05, "loss": 8.1527, "step": 9500 }, { "epoch": 0.026744961063723878, "grad_norm": 29.5, "learning_rate": 1.999607904860868e-05, "loss": 8.5709, "step": 9510 }, { "epoch": 0.02677308405117259, "grad_norm": 30.125, "learning_rate": 1.9996070797991008e-05, "loss": 7.8723, "step": 9520 }, { "epoch": 0.026801207038621298, "grad_norm": 37.0, "learning_rate": 1.9996062538703528e-05, "loss": 8.2198, "step": 9530 }, { "epoch": 0.02682933002607001, "grad_norm": 49.75, "learning_rate": 1.999605427074625e-05, "loss": 7.6952, "step": 9540 }, { "epoch": 0.02685745301351872, "grad_norm": 38.5, "learning_rate": 1.9996045994119178e-05, "loss": 8.5775, "step": 9550 }, { "epoch": 0.026885576000967432, "grad_norm": 24.0, "learning_rate": 1.999603770882232e-05, "loss": 8.0717, "step": 9560 }, { "epoch": 0.02691369898841614, "grad_norm": 27.625, "learning_rate": 1.9996029414855683e-05, "loss": 7.923, "step": 9570 }, { "epoch": 0.026941821975864852, "grad_norm": 24.5, "learning_rate": 1.9996021112219272e-05, "loss": 8.0142, "step": 9580 }, { "epoch": 0.026969944963313564, "grad_norm": 32.25, "learning_rate": 1.99960128009131e-05, "loss": 8.5648, "step": 9590 }, { "epoch": 0.026998067950762275, "grad_norm": 44.75, "learning_rate": 1.999600448093717e-05, "loss": 7.87, "step": 9600 }, { "epoch": 0.027026190938210983, "grad_norm": 25.875, "learning_rate": 1.9995996152291488e-05, "loss": 8.4132, "step": 9610 }, { "epoch": 0.027054313925659695, "grad_norm": 38.25, "learning_rate": 1.9995987814976065e-05, "loss": 7.3158, "step": 9620 }, { "epoch": 0.027082436913108406, "grad_norm": 22.0, "learning_rate": 1.9995979468990903e-05, "loss": 7.7528, "step": 9630 }, { "epoch": 0.027110559900557118, "grad_norm": 38.5, "learning_rate": 1.9995971114336018e-05, "loss": 8.3966, "step": 9640 }, { "epoch": 0.027138682888005826, "grad_norm": 22.75, "learning_rate": 1.9995962751011408e-05, "loss": 8.4306, "step": 9650 }, { "epoch": 0.027166805875454537, "grad_norm": 22.5, "learning_rate": 1.9995954379017083e-05, "loss": 7.5641, "step": 9660 }, { "epoch": 0.02719492886290325, "grad_norm": 23.875, "learning_rate": 1.999594599835305e-05, "loss": 8.2257, "step": 9670 }, { "epoch": 0.02722305185035196, "grad_norm": 31.375, "learning_rate": 1.9995937609019322e-05, "loss": 7.6467, "step": 9680 }, { "epoch": 0.02725117483780067, "grad_norm": 20.0, "learning_rate": 1.99959292110159e-05, "loss": 8.2393, "step": 9690 }, { "epoch": 0.02727929782524938, "grad_norm": 24.0, "learning_rate": 1.9995920804342792e-05, "loss": 7.9213, "step": 9700 }, { "epoch": 0.027307420812698092, "grad_norm": 26.5, "learning_rate": 1.999591238900001e-05, "loss": 7.9248, "step": 9710 }, { "epoch": 0.027335543800146803, "grad_norm": 34.0, "learning_rate": 1.9995903964987555e-05, "loss": 8.5032, "step": 9720 }, { "epoch": 0.02736366678759551, "grad_norm": 40.0, "learning_rate": 1.9995895532305434e-05, "loss": 7.9645, "step": 9730 }, { "epoch": 0.027391789775044223, "grad_norm": 24.625, "learning_rate": 1.999588709095366e-05, "loss": 7.7626, "step": 9740 }, { "epoch": 0.027419912762492935, "grad_norm": 25.0, "learning_rate": 1.999587864093224e-05, "loss": 7.5728, "step": 9750 }, { "epoch": 0.027448035749941646, "grad_norm": 22.0, "learning_rate": 1.9995870182241176e-05, "loss": 7.8912, "step": 9760 }, { "epoch": 0.027476158737390354, "grad_norm": 37.75, "learning_rate": 1.999586171488048e-05, "loss": 8.215, "step": 9770 }, { "epoch": 0.027504281724839066, "grad_norm": 34.75, "learning_rate": 1.9995853238850156e-05, "loss": 8.0155, "step": 9780 }, { "epoch": 0.027532404712287777, "grad_norm": 30.875, "learning_rate": 1.9995844754150214e-05, "loss": 8.2868, "step": 9790 }, { "epoch": 0.02756052769973649, "grad_norm": 34.75, "learning_rate": 1.999583626078066e-05, "loss": 8.0389, "step": 9800 }, { "epoch": 0.027588650687185197, "grad_norm": 47.25, "learning_rate": 1.9995827758741502e-05, "loss": 7.7408, "step": 9810 }, { "epoch": 0.02761677367463391, "grad_norm": 47.0, "learning_rate": 1.9995819248032746e-05, "loss": 8.6171, "step": 9820 }, { "epoch": 0.02764489666208262, "grad_norm": 26.875, "learning_rate": 1.99958107286544e-05, "loss": 7.8723, "step": 9830 }, { "epoch": 0.02767301964953133, "grad_norm": 35.25, "learning_rate": 1.9995802200606473e-05, "loss": 7.1007, "step": 9840 }, { "epoch": 0.02770114263698004, "grad_norm": 25.5, "learning_rate": 1.999579366388897e-05, "loss": 7.7648, "step": 9850 }, { "epoch": 0.02772926562442875, "grad_norm": 30.0, "learning_rate": 1.9995785118501903e-05, "loss": 8.0552, "step": 9860 }, { "epoch": 0.027757388611877463, "grad_norm": 37.5, "learning_rate": 1.9995776564445274e-05, "loss": 7.8143, "step": 9870 }, { "epoch": 0.027785511599326174, "grad_norm": 31.375, "learning_rate": 1.9995768001719092e-05, "loss": 8.0577, "step": 9880 }, { "epoch": 0.027813634586774882, "grad_norm": 27.75, "learning_rate": 1.9995759430323366e-05, "loss": 7.821, "step": 9890 }, { "epoch": 0.027841757574223594, "grad_norm": 32.0, "learning_rate": 1.9995750850258102e-05, "loss": 8.5424, "step": 9900 }, { "epoch": 0.027869880561672306, "grad_norm": 21.25, "learning_rate": 1.9995742261523307e-05, "loss": 7.82, "step": 9910 }, { "epoch": 0.027898003549121017, "grad_norm": 35.75, "learning_rate": 1.999573366411899e-05, "loss": 8.0116, "step": 9920 }, { "epoch": 0.027926126536569725, "grad_norm": 35.75, "learning_rate": 1.999572505804516e-05, "loss": 7.9151, "step": 9930 }, { "epoch": 0.027954249524018437, "grad_norm": 31.625, "learning_rate": 1.9995716443301818e-05, "loss": 7.7329, "step": 9940 }, { "epoch": 0.02798237251146715, "grad_norm": 51.75, "learning_rate": 1.9995707819888975e-05, "loss": 7.9913, "step": 9950 }, { "epoch": 0.02801049549891586, "grad_norm": 29.5, "learning_rate": 1.9995699187806642e-05, "loss": 7.9975, "step": 9960 }, { "epoch": 0.028038618486364568, "grad_norm": 40.5, "learning_rate": 1.9995690547054825e-05, "loss": 8.0908, "step": 9970 }, { "epoch": 0.02806674147381328, "grad_norm": 32.5, "learning_rate": 1.9995681897633527e-05, "loss": 8.2361, "step": 9980 }, { "epoch": 0.02809486446126199, "grad_norm": 20.375, "learning_rate": 1.999567323954276e-05, "loss": 7.896, "step": 9990 }, { "epoch": 0.028122987448710703, "grad_norm": 51.25, "learning_rate": 1.999566457278253e-05, "loss": 8.4663, "step": 10000 }, { "epoch": 0.02815111043615941, "grad_norm": 30.875, "learning_rate": 1.9995655897352843e-05, "loss": 8.1154, "step": 10010 }, { "epoch": 0.028179233423608122, "grad_norm": 28.75, "learning_rate": 1.999564721325371e-05, "loss": 7.8825, "step": 10020 }, { "epoch": 0.028207356411056834, "grad_norm": 39.5, "learning_rate": 1.999563852048514e-05, "loss": 8.1331, "step": 10030 }, { "epoch": 0.028235479398505545, "grad_norm": 30.75, "learning_rate": 1.9995629819047133e-05, "loss": 8.1916, "step": 10040 }, { "epoch": 0.028263602385954253, "grad_norm": 52.0, "learning_rate": 1.99956211089397e-05, "loss": 7.3916, "step": 10050 }, { "epoch": 0.028291725373402965, "grad_norm": 30.125, "learning_rate": 1.9995612390162852e-05, "loss": 7.9241, "step": 10060 }, { "epoch": 0.028319848360851677, "grad_norm": 42.75, "learning_rate": 1.999560366271659e-05, "loss": 8.5944, "step": 10070 }, { "epoch": 0.028347971348300388, "grad_norm": 24.75, "learning_rate": 1.9995594926600928e-05, "loss": 7.6087, "step": 10080 }, { "epoch": 0.0283760943357491, "grad_norm": 33.25, "learning_rate": 1.999558618181587e-05, "loss": 7.81, "step": 10090 }, { "epoch": 0.028404217323197808, "grad_norm": 25.875, "learning_rate": 1.9995577428361427e-05, "loss": 8.3325, "step": 10100 }, { "epoch": 0.02843234031064652, "grad_norm": 73.5, "learning_rate": 1.99955686662376e-05, "loss": 8.3205, "step": 10110 }, { "epoch": 0.02846046329809523, "grad_norm": 22.125, "learning_rate": 1.9995559895444404e-05, "loss": 8.3681, "step": 10120 }, { "epoch": 0.028488586285543942, "grad_norm": 31.75, "learning_rate": 1.9995551115981844e-05, "loss": 8.1783, "step": 10130 }, { "epoch": 0.02851670927299265, "grad_norm": 34.5, "learning_rate": 1.9995542327849927e-05, "loss": 8.5446, "step": 10140 }, { "epoch": 0.028544832260441362, "grad_norm": 37.5, "learning_rate": 1.999553353104866e-05, "loss": 7.9473, "step": 10150 }, { "epoch": 0.028572955247890074, "grad_norm": 45.75, "learning_rate": 1.999552472557805e-05, "loss": 7.891, "step": 10160 }, { "epoch": 0.028601078235338785, "grad_norm": 38.0, "learning_rate": 1.9995515911438107e-05, "loss": 8.4031, "step": 10170 }, { "epoch": 0.028629201222787493, "grad_norm": 27.125, "learning_rate": 1.9995507088628836e-05, "loss": 7.2783, "step": 10180 }, { "epoch": 0.028657324210236205, "grad_norm": 32.5, "learning_rate": 1.9995498257150248e-05, "loss": 7.7798, "step": 10190 }, { "epoch": 0.028685447197684916, "grad_norm": 46.5, "learning_rate": 1.9995489417002348e-05, "loss": 7.7, "step": 10200 }, { "epoch": 0.028713570185133628, "grad_norm": 24.625, "learning_rate": 1.9995480568185145e-05, "loss": 7.6215, "step": 10210 }, { "epoch": 0.028741693172582336, "grad_norm": 25.0, "learning_rate": 1.9995471710698647e-05, "loss": 7.7735, "step": 10220 }, { "epoch": 0.028769816160031048, "grad_norm": 42.25, "learning_rate": 1.9995462844542858e-05, "loss": 7.7311, "step": 10230 }, { "epoch": 0.02879793914747976, "grad_norm": 49.5, "learning_rate": 1.9995453969717792e-05, "loss": 7.9439, "step": 10240 }, { "epoch": 0.02882606213492847, "grad_norm": 22.875, "learning_rate": 1.999544508622345e-05, "loss": 8.3863, "step": 10250 }, { "epoch": 0.02885418512237718, "grad_norm": 18.625, "learning_rate": 1.9995436194059846e-05, "loss": 7.4996, "step": 10260 }, { "epoch": 0.02888230810982589, "grad_norm": 29.5, "learning_rate": 1.9995427293226983e-05, "loss": 7.8813, "step": 10270 }, { "epoch": 0.028910431097274602, "grad_norm": 51.0, "learning_rate": 1.9995418383724872e-05, "loss": 8.077, "step": 10280 }, { "epoch": 0.028938554084723313, "grad_norm": 26.75, "learning_rate": 1.9995409465553516e-05, "loss": 8.3224, "step": 10290 }, { "epoch": 0.02896667707217202, "grad_norm": 46.0, "learning_rate": 1.999540053871293e-05, "loss": 6.9259, "step": 10300 }, { "epoch": 0.028994800059620733, "grad_norm": 47.25, "learning_rate": 1.9995391603203113e-05, "loss": 7.1346, "step": 10310 }, { "epoch": 0.029022923047069445, "grad_norm": 52.5, "learning_rate": 1.999538265902408e-05, "loss": 8.4722, "step": 10320 }, { "epoch": 0.029051046034518156, "grad_norm": 24.0, "learning_rate": 1.9995373706175838e-05, "loss": 7.4183, "step": 10330 }, { "epoch": 0.029079169021966864, "grad_norm": 33.0, "learning_rate": 1.9995364744658394e-05, "loss": 7.4469, "step": 10340 }, { "epoch": 0.029107292009415576, "grad_norm": 45.75, "learning_rate": 1.999535577447175e-05, "loss": 7.8218, "step": 10350 }, { "epoch": 0.029135414996864287, "grad_norm": 42.75, "learning_rate": 1.9995346795615917e-05, "loss": 7.8033, "step": 10360 }, { "epoch": 0.029163537984313, "grad_norm": 28.25, "learning_rate": 1.999533780809091e-05, "loss": 7.5636, "step": 10370 }, { "epoch": 0.029191660971761707, "grad_norm": 28.0, "learning_rate": 1.9995328811896728e-05, "loss": 7.9838, "step": 10380 }, { "epoch": 0.02921978395921042, "grad_norm": 31.875, "learning_rate": 1.999531980703338e-05, "loss": 7.9796, "step": 10390 }, { "epoch": 0.02924790694665913, "grad_norm": 47.25, "learning_rate": 1.9995310793500877e-05, "loss": 8.6816, "step": 10400 }, { "epoch": 0.02927602993410784, "grad_norm": 29.25, "learning_rate": 1.999530177129923e-05, "loss": 8.2196, "step": 10410 }, { "epoch": 0.02930415292155655, "grad_norm": 29.375, "learning_rate": 1.9995292740428436e-05, "loss": 8.2338, "step": 10420 }, { "epoch": 0.02933227590900526, "grad_norm": 36.0, "learning_rate": 1.999528370088851e-05, "loss": 7.6998, "step": 10430 }, { "epoch": 0.029360398896453973, "grad_norm": 39.25, "learning_rate": 1.9995274652679462e-05, "loss": 8.2312, "step": 10440 }, { "epoch": 0.029388521883902684, "grad_norm": 24.75, "learning_rate": 1.9995265595801294e-05, "loss": 8.754, "step": 10450 }, { "epoch": 0.029416644871351393, "grad_norm": 25.5, "learning_rate": 1.999525653025402e-05, "loss": 7.8424, "step": 10460 }, { "epoch": 0.029444767858800104, "grad_norm": 28.125, "learning_rate": 1.9995247456037643e-05, "loss": 8.4582, "step": 10470 }, { "epoch": 0.029472890846248816, "grad_norm": 22.375, "learning_rate": 1.9995238373152174e-05, "loss": 7.4337, "step": 10480 }, { "epoch": 0.029501013833697527, "grad_norm": 29.375, "learning_rate": 1.9995229281597615e-05, "loss": 6.7875, "step": 10490 }, { "epoch": 0.029529136821146235, "grad_norm": 20.875, "learning_rate": 1.999522018137398e-05, "loss": 7.8073, "step": 10500 }, { "epoch": 0.029557259808594947, "grad_norm": 35.25, "learning_rate": 1.9995211072481275e-05, "loss": 8.0483, "step": 10510 }, { "epoch": 0.02958538279604366, "grad_norm": 38.25, "learning_rate": 1.9995201954919507e-05, "loss": 8.0767, "step": 10520 }, { "epoch": 0.02961350578349237, "grad_norm": 37.75, "learning_rate": 1.9995192828688686e-05, "loss": 7.9907, "step": 10530 }, { "epoch": 0.029641628770941078, "grad_norm": 24.875, "learning_rate": 1.999518369378882e-05, "loss": 8.0382, "step": 10540 }, { "epoch": 0.02966975175838979, "grad_norm": 27.875, "learning_rate": 1.9995174550219914e-05, "loss": 8.3722, "step": 10550 }, { "epoch": 0.0296978747458385, "grad_norm": 43.75, "learning_rate": 1.999516539798198e-05, "loss": 8.1127, "step": 10560 }, { "epoch": 0.029725997733287213, "grad_norm": 28.25, "learning_rate": 1.9995156237075023e-05, "loss": 7.9002, "step": 10570 }, { "epoch": 0.02975412072073592, "grad_norm": 39.25, "learning_rate": 1.999514706749905e-05, "loss": 7.143, "step": 10580 }, { "epoch": 0.029782243708184632, "grad_norm": 23.625, "learning_rate": 1.999513788925407e-05, "loss": 7.8294, "step": 10590 }, { "epoch": 0.029810366695633344, "grad_norm": 33.5, "learning_rate": 1.9995128702340093e-05, "loss": 7.8767, "step": 10600 }, { "epoch": 0.029838489683082055, "grad_norm": 49.25, "learning_rate": 1.9995119506757125e-05, "loss": 8.1221, "step": 10610 }, { "epoch": 0.029866612670530764, "grad_norm": 28.0, "learning_rate": 1.999511030250517e-05, "loss": 8.5001, "step": 10620 }, { "epoch": 0.029894735657979475, "grad_norm": 33.25, "learning_rate": 1.9995101089584245e-05, "loss": 8.5017, "step": 10630 }, { "epoch": 0.029922858645428187, "grad_norm": 25.625, "learning_rate": 1.9995091867994353e-05, "loss": 7.3184, "step": 10640 }, { "epoch": 0.029950981632876898, "grad_norm": 25.0, "learning_rate": 1.9995082637735502e-05, "loss": 7.6897, "step": 10650 }, { "epoch": 0.029979104620325606, "grad_norm": 23.875, "learning_rate": 1.9995073398807703e-05, "loss": 8.3613, "step": 10660 }, { "epoch": 0.030007227607774318, "grad_norm": 42.0, "learning_rate": 1.9995064151210956e-05, "loss": 8.2334, "step": 10670 }, { "epoch": 0.03003535059522303, "grad_norm": 21.25, "learning_rate": 1.999505489494528e-05, "loss": 8.3809, "step": 10680 }, { "epoch": 0.03006347358267174, "grad_norm": 20.75, "learning_rate": 1.9995045630010672e-05, "loss": 7.5654, "step": 10690 }, { "epoch": 0.03009159657012045, "grad_norm": 26.875, "learning_rate": 1.9995036356407147e-05, "loss": 8.6393, "step": 10700 }, { "epoch": 0.03011971955756916, "grad_norm": 27.625, "learning_rate": 1.9995027074134714e-05, "loss": 7.8963, "step": 10710 }, { "epoch": 0.030147842545017872, "grad_norm": 44.0, "learning_rate": 1.9995017783193376e-05, "loss": 7.8087, "step": 10720 }, { "epoch": 0.030175965532466584, "grad_norm": 31.25, "learning_rate": 1.9995008483583144e-05, "loss": 7.5284, "step": 10730 }, { "epoch": 0.030204088519915292, "grad_norm": 23.125, "learning_rate": 1.9994999175304027e-05, "loss": 8.9282, "step": 10740 }, { "epoch": 0.030232211507364003, "grad_norm": 22.5, "learning_rate": 1.999498985835603e-05, "loss": 8.3088, "step": 10750 }, { "epoch": 0.030260334494812715, "grad_norm": 21.125, "learning_rate": 1.9994980532739166e-05, "loss": 8.0229, "step": 10760 }, { "epoch": 0.030288457482261427, "grad_norm": 20.5, "learning_rate": 1.9994971198453438e-05, "loss": 7.7891, "step": 10770 }, { "epoch": 0.030316580469710138, "grad_norm": 40.0, "learning_rate": 1.9994961855498856e-05, "loss": 8.3593, "step": 10780 }, { "epoch": 0.030344703457158846, "grad_norm": 32.0, "learning_rate": 1.9994952503875424e-05, "loss": 7.6064, "step": 10790 }, { "epoch": 0.030372826444607558, "grad_norm": 21.875, "learning_rate": 1.999494314358316e-05, "loss": 7.7996, "step": 10800 }, { "epoch": 0.03040094943205627, "grad_norm": 21.0, "learning_rate": 1.9994933774622063e-05, "loss": 8.5965, "step": 10810 }, { "epoch": 0.03042907241950498, "grad_norm": 19.375, "learning_rate": 1.9994924396992145e-05, "loss": 7.965, "step": 10820 }, { "epoch": 0.03045719540695369, "grad_norm": 29.0, "learning_rate": 1.9994915010693417e-05, "loss": 8.6208, "step": 10830 }, { "epoch": 0.0304853183944024, "grad_norm": 28.875, "learning_rate": 1.999490561572588e-05, "loss": 8.1938, "step": 10840 }, { "epoch": 0.030513441381851112, "grad_norm": 37.5, "learning_rate": 1.999489621208955e-05, "loss": 8.6939, "step": 10850 }, { "epoch": 0.030541564369299824, "grad_norm": 24.75, "learning_rate": 1.9994886799784425e-05, "loss": 7.5773, "step": 10860 }, { "epoch": 0.03056968735674853, "grad_norm": 46.75, "learning_rate": 1.9994877378810523e-05, "loss": 7.5652, "step": 10870 }, { "epoch": 0.030597810344197243, "grad_norm": 45.25, "learning_rate": 1.9994867949167845e-05, "loss": 8.1565, "step": 10880 }, { "epoch": 0.030625933331645955, "grad_norm": 27.125, "learning_rate": 1.9994858510856405e-05, "loss": 7.4442, "step": 10890 }, { "epoch": 0.030654056319094666, "grad_norm": 38.5, "learning_rate": 1.999484906387621e-05, "loss": 8.0493, "step": 10900 }, { "epoch": 0.030682179306543374, "grad_norm": 28.625, "learning_rate": 1.9994839608227265e-05, "loss": 8.7882, "step": 10910 }, { "epoch": 0.030710302293992086, "grad_norm": 31.625, "learning_rate": 1.999483014390958e-05, "loss": 7.1223, "step": 10920 }, { "epoch": 0.030738425281440798, "grad_norm": 36.75, "learning_rate": 1.9994820670923164e-05, "loss": 8.1631, "step": 10930 }, { "epoch": 0.03076654826888951, "grad_norm": 39.5, "learning_rate": 1.9994811189268025e-05, "loss": 7.4024, "step": 10940 }, { "epoch": 0.030794671256338217, "grad_norm": 35.25, "learning_rate": 1.999480169894417e-05, "loss": 8.0403, "step": 10950 }, { "epoch": 0.03082279424378693, "grad_norm": 22.875, "learning_rate": 1.999479219995161e-05, "loss": 7.9412, "step": 10960 }, { "epoch": 0.03085091723123564, "grad_norm": 27.0, "learning_rate": 1.999478269229035e-05, "loss": 7.3164, "step": 10970 }, { "epoch": 0.030879040218684352, "grad_norm": 36.75, "learning_rate": 1.99947731759604e-05, "loss": 7.8037, "step": 10980 }, { "epoch": 0.03090716320613306, "grad_norm": 23.375, "learning_rate": 1.9994763650961767e-05, "loss": 7.5078, "step": 10990 }, { "epoch": 0.03093528619358177, "grad_norm": 21.75, "learning_rate": 1.9994754117294457e-05, "loss": 8.8212, "step": 11000 }, { "epoch": 0.030963409181030483, "grad_norm": 33.0, "learning_rate": 1.9994744574958488e-05, "loss": 8.0928, "step": 11010 }, { "epoch": 0.030991532168479195, "grad_norm": 32.5, "learning_rate": 1.9994735023953854e-05, "loss": 8.3206, "step": 11020 }, { "epoch": 0.031019655155927903, "grad_norm": 23.5, "learning_rate": 1.9994725464280576e-05, "loss": 8.1828, "step": 11030 }, { "epoch": 0.031047778143376614, "grad_norm": 27.25, "learning_rate": 1.9994715895938656e-05, "loss": 7.9468, "step": 11040 }, { "epoch": 0.031075901130825326, "grad_norm": 29.875, "learning_rate": 1.9994706318928105e-05, "loss": 7.9164, "step": 11050 }, { "epoch": 0.031104024118274037, "grad_norm": 31.125, "learning_rate": 1.9994696733248924e-05, "loss": 7.7643, "step": 11060 }, { "epoch": 0.031132147105722745, "grad_norm": 29.25, "learning_rate": 1.9994687138901133e-05, "loss": 7.1076, "step": 11070 }, { "epoch": 0.031160270093171457, "grad_norm": 36.25, "learning_rate": 1.9994677535884732e-05, "loss": 8.8965, "step": 11080 }, { "epoch": 0.03118839308062017, "grad_norm": 24.5, "learning_rate": 1.9994667924199732e-05, "loss": 8.5676, "step": 11090 }, { "epoch": 0.03121651606806888, "grad_norm": 24.25, "learning_rate": 1.999465830384614e-05, "loss": 7.8746, "step": 11100 }, { "epoch": 0.031244639055517588, "grad_norm": 29.5, "learning_rate": 1.9994648674823968e-05, "loss": 7.8671, "step": 11110 }, { "epoch": 0.0312727620429663, "grad_norm": 72.5, "learning_rate": 1.999463903713322e-05, "loss": 8.3754, "step": 11120 }, { "epoch": 0.03130088503041501, "grad_norm": 25.25, "learning_rate": 1.9994629390773906e-05, "loss": 8.0884, "step": 11130 }, { "epoch": 0.03132900801786372, "grad_norm": 32.5, "learning_rate": 1.9994619735746034e-05, "loss": 8.2682, "step": 11140 }, { "epoch": 0.031357131005312434, "grad_norm": 31.0, "learning_rate": 1.9994610072049612e-05, "loss": 8.4546, "step": 11150 }, { "epoch": 0.031385253992761146, "grad_norm": 25.5, "learning_rate": 1.9994600399684653e-05, "loss": 7.207, "step": 11160 }, { "epoch": 0.03141337698020985, "grad_norm": 31.625, "learning_rate": 1.9994590718651158e-05, "loss": 8.0186, "step": 11170 }, { "epoch": 0.03144149996765856, "grad_norm": 27.25, "learning_rate": 1.9994581028949138e-05, "loss": 8.6081, "step": 11180 }, { "epoch": 0.031469622955107274, "grad_norm": 24.125, "learning_rate": 1.9994571330578606e-05, "loss": 8.436, "step": 11190 }, { "epoch": 0.031497745942555985, "grad_norm": 25.25, "learning_rate": 1.9994561623539567e-05, "loss": 8.1119, "step": 11200 }, { "epoch": 0.0315258689300047, "grad_norm": 40.0, "learning_rate": 1.9994551907832026e-05, "loss": 8.5074, "step": 11210 }, { "epoch": 0.03155399191745341, "grad_norm": 33.5, "learning_rate": 1.9994542183455998e-05, "loss": 8.0558, "step": 11220 }, { "epoch": 0.03158211490490212, "grad_norm": 28.125, "learning_rate": 1.9994532450411485e-05, "loss": 7.3696, "step": 11230 }, { "epoch": 0.03161023789235083, "grad_norm": 26.75, "learning_rate": 1.99945227086985e-05, "loss": 7.8066, "step": 11240 }, { "epoch": 0.031638360879799536, "grad_norm": 25.625, "learning_rate": 1.999451295831705e-05, "loss": 8.5742, "step": 11250 }, { "epoch": 0.03166648386724825, "grad_norm": 23.125, "learning_rate": 1.9994503199267145e-05, "loss": 8.7722, "step": 11260 }, { "epoch": 0.03169460685469696, "grad_norm": 24.875, "learning_rate": 1.9994493431548786e-05, "loss": 7.8013, "step": 11270 }, { "epoch": 0.03172272984214567, "grad_norm": 41.75, "learning_rate": 1.9994483655161994e-05, "loss": 8.2136, "step": 11280 }, { "epoch": 0.03175085282959438, "grad_norm": 21.125, "learning_rate": 1.999447387010677e-05, "loss": 7.6348, "step": 11290 }, { "epoch": 0.031778975817043094, "grad_norm": 24.625, "learning_rate": 1.999446407638312e-05, "loss": 8.0521, "step": 11300 }, { "epoch": 0.031807098804491805, "grad_norm": 28.75, "learning_rate": 1.9994454273991056e-05, "loss": 7.9637, "step": 11310 }, { "epoch": 0.03183522179194052, "grad_norm": 30.875, "learning_rate": 1.999444446293059e-05, "loss": 7.9769, "step": 11320 }, { "epoch": 0.03186334477938922, "grad_norm": 23.625, "learning_rate": 1.9994434643201725e-05, "loss": 7.6544, "step": 11330 }, { "epoch": 0.03189146776683793, "grad_norm": 26.625, "learning_rate": 1.999442481480447e-05, "loss": 8.424, "step": 11340 }, { "epoch": 0.031919590754286645, "grad_norm": 47.0, "learning_rate": 1.9994414977738836e-05, "loss": 8.4298, "step": 11350 }, { "epoch": 0.031947713741735356, "grad_norm": 22.125, "learning_rate": 1.999440513200483e-05, "loss": 7.9579, "step": 11360 }, { "epoch": 0.03197583672918407, "grad_norm": 42.25, "learning_rate": 1.9994395277602464e-05, "loss": 7.7313, "step": 11370 }, { "epoch": 0.03200395971663278, "grad_norm": 46.0, "learning_rate": 1.999438541453174e-05, "loss": 7.3802, "step": 11380 }, { "epoch": 0.03203208270408149, "grad_norm": 20.625, "learning_rate": 1.999437554279267e-05, "loss": 7.9468, "step": 11390 }, { "epoch": 0.0320602056915302, "grad_norm": 25.875, "learning_rate": 1.9994365662385264e-05, "loss": 7.1312, "step": 11400 }, { "epoch": 0.03208832867897891, "grad_norm": 29.125, "learning_rate": 1.9994355773309527e-05, "loss": 8.3429, "step": 11410 }, { "epoch": 0.03211645166642762, "grad_norm": 28.5, "learning_rate": 1.999434587556547e-05, "loss": 8.2113, "step": 11420 }, { "epoch": 0.03214457465387633, "grad_norm": 42.75, "learning_rate": 1.9994335969153104e-05, "loss": 7.454, "step": 11430 }, { "epoch": 0.03217269764132504, "grad_norm": 25.5, "learning_rate": 1.9994326054072436e-05, "loss": 8.7546, "step": 11440 }, { "epoch": 0.03220082062877375, "grad_norm": 23.25, "learning_rate": 1.9994316130323473e-05, "loss": 7.9789, "step": 11450 }, { "epoch": 0.032228943616222465, "grad_norm": 57.0, "learning_rate": 1.999430619790622e-05, "loss": 8.4451, "step": 11460 }, { "epoch": 0.032257066603671176, "grad_norm": 39.25, "learning_rate": 1.9994296256820693e-05, "loss": 7.3193, "step": 11470 }, { "epoch": 0.03228518959111989, "grad_norm": 30.75, "learning_rate": 1.9994286307066897e-05, "loss": 7.4242, "step": 11480 }, { "epoch": 0.0323133125785686, "grad_norm": 21.125, "learning_rate": 1.999427634864484e-05, "loss": 8.7769, "step": 11490 }, { "epoch": 0.032341435566017304, "grad_norm": 38.0, "learning_rate": 1.9994266381554532e-05, "loss": 8.1196, "step": 11500 }, { "epoch": 0.032369558553466016, "grad_norm": 24.875, "learning_rate": 1.9994256405795984e-05, "loss": 8.0855, "step": 11510 }, { "epoch": 0.03239768154091473, "grad_norm": 37.75, "learning_rate": 1.9994246421369196e-05, "loss": 8.1357, "step": 11520 }, { "epoch": 0.03242580452836344, "grad_norm": 29.5, "learning_rate": 1.9994236428274187e-05, "loss": 7.7256, "step": 11530 }, { "epoch": 0.03245392751581215, "grad_norm": 23.875, "learning_rate": 1.9994226426510962e-05, "loss": 7.7523, "step": 11540 }, { "epoch": 0.03248205050326086, "grad_norm": 32.0, "learning_rate": 1.9994216416079527e-05, "loss": 7.8625, "step": 11550 }, { "epoch": 0.032510173490709574, "grad_norm": 34.0, "learning_rate": 1.9994206396979892e-05, "loss": 7.8557, "step": 11560 }, { "epoch": 0.032538296478158285, "grad_norm": 43.75, "learning_rate": 1.9994196369212068e-05, "loss": 7.6083, "step": 11570 }, { "epoch": 0.03256641946560699, "grad_norm": 25.75, "learning_rate": 1.999418633277606e-05, "loss": 7.8477, "step": 11580 }, { "epoch": 0.0325945424530557, "grad_norm": 27.875, "learning_rate": 1.999417628767188e-05, "loss": 7.5025, "step": 11590 }, { "epoch": 0.03262266544050441, "grad_norm": 22.875, "learning_rate": 1.9994166233899535e-05, "loss": 7.4354, "step": 11600 }, { "epoch": 0.032650788427953124, "grad_norm": 42.0, "learning_rate": 1.9994156171459037e-05, "loss": 7.8523, "step": 11610 }, { "epoch": 0.032678911415401836, "grad_norm": 21.0, "learning_rate": 1.999414610035039e-05, "loss": 8.1717, "step": 11620 }, { "epoch": 0.03270703440285055, "grad_norm": 22.5, "learning_rate": 1.9994136020573604e-05, "loss": 8.0534, "step": 11630 }, { "epoch": 0.03273515739029926, "grad_norm": 40.75, "learning_rate": 1.9994125932128687e-05, "loss": 8.8001, "step": 11640 }, { "epoch": 0.03276328037774797, "grad_norm": 30.0, "learning_rate": 1.9994115835015654e-05, "loss": 7.7579, "step": 11650 }, { "epoch": 0.032791403365196675, "grad_norm": 33.25, "learning_rate": 1.9994105729234502e-05, "loss": 8.0191, "step": 11660 }, { "epoch": 0.03281952635264539, "grad_norm": 31.5, "learning_rate": 1.9994095614785253e-05, "loss": 7.9548, "step": 11670 }, { "epoch": 0.0328476493400941, "grad_norm": 42.75, "learning_rate": 1.999408549166791e-05, "loss": 8.3128, "step": 11680 }, { "epoch": 0.03287577232754281, "grad_norm": 32.0, "learning_rate": 1.9994075359882473e-05, "loss": 8.1475, "step": 11690 }, { "epoch": 0.03290389531499152, "grad_norm": 23.0, "learning_rate": 1.9994065219428964e-05, "loss": 7.2656, "step": 11700 }, { "epoch": 0.03293201830244023, "grad_norm": 36.25, "learning_rate": 1.9994055070307386e-05, "loss": 8.206, "step": 11710 }, { "epoch": 0.032960141289888945, "grad_norm": 36.25, "learning_rate": 1.999404491251775e-05, "loss": 7.915, "step": 11720 }, { "epoch": 0.032988264277337656, "grad_norm": 30.375, "learning_rate": 1.9994034746060062e-05, "loss": 7.8346, "step": 11730 }, { "epoch": 0.03301638726478636, "grad_norm": 43.75, "learning_rate": 1.9994024570934333e-05, "loss": 7.5286, "step": 11740 }, { "epoch": 0.03304451025223507, "grad_norm": 21.25, "learning_rate": 1.999401438714057e-05, "loss": 7.4084, "step": 11750 }, { "epoch": 0.033072633239683784, "grad_norm": 26.625, "learning_rate": 1.9994004194678786e-05, "loss": 7.5272, "step": 11760 }, { "epoch": 0.033100756227132495, "grad_norm": 29.625, "learning_rate": 1.9993993993548982e-05, "loss": 7.8422, "step": 11770 }, { "epoch": 0.03312887921458121, "grad_norm": 36.0, "learning_rate": 1.9993983783751176e-05, "loss": 7.7822, "step": 11780 }, { "epoch": 0.03315700220202992, "grad_norm": 45.75, "learning_rate": 1.999397356528537e-05, "loss": 7.2096, "step": 11790 }, { "epoch": 0.03318512518947863, "grad_norm": 24.375, "learning_rate": 1.9993963338151572e-05, "loss": 7.7872, "step": 11800 }, { "epoch": 0.03321324817692734, "grad_norm": 29.125, "learning_rate": 1.99939531023498e-05, "loss": 8.0382, "step": 11810 }, { "epoch": 0.033241371164376046, "grad_norm": 30.25, "learning_rate": 1.9993942857880055e-05, "loss": 8.1769, "step": 11820 }, { "epoch": 0.03326949415182476, "grad_norm": 21.75, "learning_rate": 1.9993932604742347e-05, "loss": 7.7626, "step": 11830 }, { "epoch": 0.03329761713927347, "grad_norm": 31.625, "learning_rate": 1.9993922342936686e-05, "loss": 8.1302, "step": 11840 }, { "epoch": 0.03332574012672218, "grad_norm": 32.5, "learning_rate": 1.999391207246308e-05, "loss": 7.6275, "step": 11850 }, { "epoch": 0.03335386311417089, "grad_norm": 35.0, "learning_rate": 1.9993901793321542e-05, "loss": 8.7084, "step": 11860 }, { "epoch": 0.033381986101619604, "grad_norm": 29.375, "learning_rate": 1.9993891505512076e-05, "loss": 8.89, "step": 11870 }, { "epoch": 0.033410109089068316, "grad_norm": 27.75, "learning_rate": 1.999388120903469e-05, "loss": 8.1222, "step": 11880 }, { "epoch": 0.03343823207651703, "grad_norm": 29.25, "learning_rate": 1.9993870903889397e-05, "loss": 8.0613, "step": 11890 }, { "epoch": 0.03346635506396573, "grad_norm": 28.375, "learning_rate": 1.9993860590076205e-05, "loss": 8.7048, "step": 11900 }, { "epoch": 0.03349447805141444, "grad_norm": 27.0, "learning_rate": 1.999385026759512e-05, "loss": 8.092, "step": 11910 }, { "epoch": 0.033522601038863155, "grad_norm": 28.875, "learning_rate": 1.9993839936446157e-05, "loss": 6.9908, "step": 11920 }, { "epoch": 0.033550724026311866, "grad_norm": 24.625, "learning_rate": 1.9993829596629318e-05, "loss": 8.0606, "step": 11930 }, { "epoch": 0.03357884701376058, "grad_norm": 41.0, "learning_rate": 1.9993819248144614e-05, "loss": 8.3727, "step": 11940 }, { "epoch": 0.03360697000120929, "grad_norm": 38.75, "learning_rate": 1.999380889099206e-05, "loss": 7.431, "step": 11950 }, { "epoch": 0.033635092988658, "grad_norm": 24.0, "learning_rate": 1.9993798525171655e-05, "loss": 8.2877, "step": 11960 }, { "epoch": 0.03366321597610671, "grad_norm": 36.25, "learning_rate": 1.9993788150683416e-05, "loss": 7.9029, "step": 11970 }, { "epoch": 0.03369133896355542, "grad_norm": 30.5, "learning_rate": 1.9993777767527347e-05, "loss": 8.789, "step": 11980 }, { "epoch": 0.03371946195100413, "grad_norm": 32.0, "learning_rate": 1.999376737570346e-05, "loss": 8.5905, "step": 11990 }, { "epoch": 0.03374758493845284, "grad_norm": 31.75, "learning_rate": 1.9993756975211764e-05, "loss": 7.7265, "step": 12000 }, { "epoch": 0.03377570792590155, "grad_norm": 33.25, "learning_rate": 1.9993746566052268e-05, "loss": 8.0885, "step": 12010 }, { "epoch": 0.03380383091335026, "grad_norm": 26.5, "learning_rate": 1.999373614822498e-05, "loss": 7.5074, "step": 12020 }, { "epoch": 0.033831953900798975, "grad_norm": 37.5, "learning_rate": 1.9993725721729906e-05, "loss": 7.9845, "step": 12030 }, { "epoch": 0.03386007688824769, "grad_norm": 37.25, "learning_rate": 1.9993715286567058e-05, "loss": 8.4465, "step": 12040 }, { "epoch": 0.0338881998756964, "grad_norm": 22.875, "learning_rate": 1.9993704842736446e-05, "loss": 6.9398, "step": 12050 }, { "epoch": 0.0339163228631451, "grad_norm": 27.625, "learning_rate": 1.9993694390238082e-05, "loss": 8.5352, "step": 12060 }, { "epoch": 0.033944445850593814, "grad_norm": 26.0, "learning_rate": 1.999368392907197e-05, "loss": 8.141, "step": 12070 }, { "epoch": 0.033972568838042526, "grad_norm": 42.75, "learning_rate": 1.9993673459238117e-05, "loss": 7.7464, "step": 12080 }, { "epoch": 0.03400069182549124, "grad_norm": 31.875, "learning_rate": 1.999366298073654e-05, "loss": 7.4315, "step": 12090 }, { "epoch": 0.03402881481293995, "grad_norm": 26.5, "learning_rate": 1.9993652493567244e-05, "loss": 8.5784, "step": 12100 }, { "epoch": 0.03405693780038866, "grad_norm": 36.75, "learning_rate": 1.9993641997730236e-05, "loss": 8.0436, "step": 12110 }, { "epoch": 0.03408506078783737, "grad_norm": 22.75, "learning_rate": 1.9993631493225526e-05, "loss": 8.2972, "step": 12120 }, { "epoch": 0.034113183775286084, "grad_norm": 26.375, "learning_rate": 1.999362098005312e-05, "loss": 8.21, "step": 12130 }, { "epoch": 0.034141306762734795, "grad_norm": 34.5, "learning_rate": 1.999361045821304e-05, "loss": 8.1532, "step": 12140 }, { "epoch": 0.0341694297501835, "grad_norm": 31.5, "learning_rate": 1.999359992770528e-05, "loss": 8.0812, "step": 12150 }, { "epoch": 0.03419755273763221, "grad_norm": 27.0, "learning_rate": 1.9993589388529857e-05, "loss": 8.1976, "step": 12160 }, { "epoch": 0.03422567572508092, "grad_norm": 30.125, "learning_rate": 1.9993578840686777e-05, "loss": 8.38, "step": 12170 }, { "epoch": 0.034253798712529634, "grad_norm": 23.875, "learning_rate": 1.9993568284176054e-05, "loss": 7.4026, "step": 12180 }, { "epoch": 0.034281921699978346, "grad_norm": 41.25, "learning_rate": 1.999355771899769e-05, "loss": 7.2106, "step": 12190 }, { "epoch": 0.03431004468742706, "grad_norm": 35.0, "learning_rate": 1.9993547145151702e-05, "loss": 8.3326, "step": 12200 }, { "epoch": 0.03433816767487577, "grad_norm": 35.75, "learning_rate": 1.999353656263809e-05, "loss": 8.4656, "step": 12210 }, { "epoch": 0.03436629066232448, "grad_norm": 36.5, "learning_rate": 1.9993525971456872e-05, "loss": 8.5133, "step": 12220 }, { "epoch": 0.034394413649773185, "grad_norm": 32.0, "learning_rate": 1.9993515371608054e-05, "loss": 8.1739, "step": 12230 }, { "epoch": 0.0344225366372219, "grad_norm": 34.75, "learning_rate": 1.9993504763091642e-05, "loss": 7.9026, "step": 12240 }, { "epoch": 0.03445065962467061, "grad_norm": 29.25, "learning_rate": 1.999349414590765e-05, "loss": 8.2083, "step": 12250 }, { "epoch": 0.03447878261211932, "grad_norm": 42.25, "learning_rate": 1.9993483520056082e-05, "loss": 8.1157, "step": 12260 }, { "epoch": 0.03450690559956803, "grad_norm": 33.5, "learning_rate": 1.9993472885536952e-05, "loss": 7.5783, "step": 12270 }, { "epoch": 0.03453502858701674, "grad_norm": 51.25, "learning_rate": 1.999346224235027e-05, "loss": 7.4398, "step": 12280 }, { "epoch": 0.034563151574465455, "grad_norm": 27.375, "learning_rate": 1.999345159049604e-05, "loss": 7.348, "step": 12290 }, { "epoch": 0.034591274561914166, "grad_norm": 22.875, "learning_rate": 1.9993440929974275e-05, "loss": 8.1754, "step": 12300 }, { "epoch": 0.03461939754936287, "grad_norm": 52.25, "learning_rate": 1.9993430260784984e-05, "loss": 9.4514, "step": 12310 }, { "epoch": 0.03464752053681158, "grad_norm": 21.25, "learning_rate": 1.9993419582928172e-05, "loss": 7.6919, "step": 12320 }, { "epoch": 0.034675643524260294, "grad_norm": 24.0, "learning_rate": 1.9993408896403858e-05, "loss": 7.3727, "step": 12330 }, { "epoch": 0.034703766511709006, "grad_norm": 35.5, "learning_rate": 1.9993398201212038e-05, "loss": 7.8566, "step": 12340 }, { "epoch": 0.03473188949915772, "grad_norm": 39.0, "learning_rate": 1.999338749735273e-05, "loss": 8.3421, "step": 12350 }, { "epoch": 0.03476001248660643, "grad_norm": 27.375, "learning_rate": 1.9993376784825945e-05, "loss": 7.8186, "step": 12360 }, { "epoch": 0.03478813547405514, "grad_norm": 44.25, "learning_rate": 1.9993366063631688e-05, "loss": 8.126, "step": 12370 }, { "epoch": 0.03481625846150385, "grad_norm": 26.875, "learning_rate": 1.999335533376997e-05, "loss": 7.9688, "step": 12380 }, { "epoch": 0.034844381448952556, "grad_norm": 30.0, "learning_rate": 1.9993344595240797e-05, "loss": 8.0047, "step": 12390 }, { "epoch": 0.03487250443640127, "grad_norm": 42.25, "learning_rate": 1.999333384804418e-05, "loss": 7.9628, "step": 12400 }, { "epoch": 0.03490062742384998, "grad_norm": 35.75, "learning_rate": 1.999332309218013e-05, "loss": 8.5207, "step": 12410 }, { "epoch": 0.03492875041129869, "grad_norm": 47.75, "learning_rate": 1.9993312327648654e-05, "loss": 8.3276, "step": 12420 }, { "epoch": 0.0349568733987474, "grad_norm": 26.125, "learning_rate": 1.9993301554449766e-05, "loss": 8.159, "step": 12430 }, { "epoch": 0.034984996386196114, "grad_norm": 29.0, "learning_rate": 1.999329077258347e-05, "loss": 8.2998, "step": 12440 }, { "epoch": 0.035013119373644826, "grad_norm": 35.0, "learning_rate": 1.999327998204978e-05, "loss": 7.2331, "step": 12450 }, { "epoch": 0.03504124236109354, "grad_norm": 31.375, "learning_rate": 1.99932691828487e-05, "loss": 7.7339, "step": 12460 }, { "epoch": 0.03506936534854224, "grad_norm": 34.0, "learning_rate": 1.999325837498024e-05, "loss": 7.8044, "step": 12470 }, { "epoch": 0.03509748833599095, "grad_norm": 25.75, "learning_rate": 1.9993247558444416e-05, "loss": 7.456, "step": 12480 }, { "epoch": 0.035125611323439665, "grad_norm": 43.75, "learning_rate": 1.9993236733241232e-05, "loss": 7.4126, "step": 12490 }, { "epoch": 0.035153734310888377, "grad_norm": 34.0, "learning_rate": 1.99932258993707e-05, "loss": 7.8842, "step": 12500 }, { "epoch": 0.03518185729833709, "grad_norm": 24.875, "learning_rate": 1.9993215056832826e-05, "loss": 8.0112, "step": 12510 }, { "epoch": 0.0352099802857858, "grad_norm": 48.0, "learning_rate": 1.999320420562762e-05, "loss": 8.3875, "step": 12520 }, { "epoch": 0.03523810327323451, "grad_norm": 40.5, "learning_rate": 1.9993193345755093e-05, "loss": 7.612, "step": 12530 }, { "epoch": 0.03526622626068322, "grad_norm": 36.5, "learning_rate": 1.9993182477215253e-05, "loss": 7.7024, "step": 12540 }, { "epoch": 0.03529434924813193, "grad_norm": 32.5, "learning_rate": 1.9993171600008115e-05, "loss": 7.7255, "step": 12550 }, { "epoch": 0.03532247223558064, "grad_norm": 23.625, "learning_rate": 1.999316071413368e-05, "loss": 8.9419, "step": 12560 }, { "epoch": 0.03535059522302935, "grad_norm": 41.25, "learning_rate": 1.999314981959196e-05, "loss": 8.1375, "step": 12570 }, { "epoch": 0.03537871821047806, "grad_norm": 27.75, "learning_rate": 1.9993138916382968e-05, "loss": 8.4635, "step": 12580 }, { "epoch": 0.035406841197926774, "grad_norm": 39.25, "learning_rate": 1.999312800450671e-05, "loss": 7.6638, "step": 12590 }, { "epoch": 0.035434964185375485, "grad_norm": 43.0, "learning_rate": 1.9993117083963198e-05, "loss": 7.3149, "step": 12600 }, { "epoch": 0.0354630871728242, "grad_norm": 41.0, "learning_rate": 1.9993106154752438e-05, "loss": 8.4439, "step": 12610 }, { "epoch": 0.03549121016027291, "grad_norm": 22.625, "learning_rate": 1.9993095216874444e-05, "loss": 7.4968, "step": 12620 }, { "epoch": 0.03551933314772161, "grad_norm": 38.25, "learning_rate": 1.9993084270329223e-05, "loss": 8.0752, "step": 12630 }, { "epoch": 0.035547456135170324, "grad_norm": 44.25, "learning_rate": 1.9993073315116785e-05, "loss": 7.7841, "step": 12640 }, { "epoch": 0.035575579122619036, "grad_norm": 38.0, "learning_rate": 1.999306235123714e-05, "loss": 7.6506, "step": 12650 }, { "epoch": 0.03560370211006775, "grad_norm": 28.375, "learning_rate": 1.999305137869029e-05, "loss": 8.454, "step": 12660 }, { "epoch": 0.03563182509751646, "grad_norm": 31.875, "learning_rate": 1.9993040397476258e-05, "loss": 7.8975, "step": 12670 }, { "epoch": 0.03565994808496517, "grad_norm": 25.875, "learning_rate": 1.9993029407595045e-05, "loss": 7.9324, "step": 12680 }, { "epoch": 0.03568807107241388, "grad_norm": 32.5, "learning_rate": 1.999301840904666e-05, "loss": 8.3967, "step": 12690 }, { "epoch": 0.035716194059862594, "grad_norm": 32.0, "learning_rate": 1.9993007401831115e-05, "loss": 8.6223, "step": 12700 }, { "epoch": 0.0357443170473113, "grad_norm": 31.625, "learning_rate": 1.999299638594842e-05, "loss": 8.4912, "step": 12710 }, { "epoch": 0.03577244003476001, "grad_norm": 26.75, "learning_rate": 1.9992985361398586e-05, "loss": 7.5636, "step": 12720 }, { "epoch": 0.03580056302220872, "grad_norm": 34.25, "learning_rate": 1.9992974328181616e-05, "loss": 7.9519, "step": 12730 }, { "epoch": 0.03582868600965743, "grad_norm": 21.125, "learning_rate": 1.9992963286297528e-05, "loss": 7.8472, "step": 12740 }, { "epoch": 0.035856808997106145, "grad_norm": 27.875, "learning_rate": 1.9992952235746324e-05, "loss": 7.9171, "step": 12750 }, { "epoch": 0.035884931984554856, "grad_norm": 34.5, "learning_rate": 1.9992941176528017e-05, "loss": 8.0738, "step": 12760 }, { "epoch": 0.03591305497200357, "grad_norm": 27.125, "learning_rate": 1.999293010864262e-05, "loss": 8.637, "step": 12770 }, { "epoch": 0.03594117795945228, "grad_norm": 27.625, "learning_rate": 1.999291903209014e-05, "loss": 8.0261, "step": 12780 }, { "epoch": 0.03596930094690099, "grad_norm": 29.0, "learning_rate": 1.9992907946870585e-05, "loss": 7.9682, "step": 12790 }, { "epoch": 0.035997423934349695, "grad_norm": 33.25, "learning_rate": 1.9992896852983962e-05, "loss": 7.8657, "step": 12800 }, { "epoch": 0.03602554692179841, "grad_norm": 20.75, "learning_rate": 1.9992885750430285e-05, "loss": 8.2606, "step": 12810 }, { "epoch": 0.03605366990924712, "grad_norm": 28.0, "learning_rate": 1.9992874639209563e-05, "loss": 8.5209, "step": 12820 }, { "epoch": 0.03608179289669583, "grad_norm": 30.5, "learning_rate": 1.9992863519321807e-05, "loss": 8.5619, "step": 12830 }, { "epoch": 0.03610991588414454, "grad_norm": 36.0, "learning_rate": 1.9992852390767027e-05, "loss": 8.8402, "step": 12840 }, { "epoch": 0.03613803887159325, "grad_norm": 24.5, "learning_rate": 1.9992841253545227e-05, "loss": 7.9641, "step": 12850 }, { "epoch": 0.036166161859041965, "grad_norm": 34.75, "learning_rate": 1.999283010765642e-05, "loss": 7.7746, "step": 12860 }, { "epoch": 0.036194284846490676, "grad_norm": 48.0, "learning_rate": 1.9992818953100617e-05, "loss": 8.4606, "step": 12870 }, { "epoch": 0.03622240783393938, "grad_norm": 24.875, "learning_rate": 1.999280778987783e-05, "loss": 8.8518, "step": 12880 }, { "epoch": 0.03625053082138809, "grad_norm": 43.25, "learning_rate": 1.999279661798806e-05, "loss": 8.6471, "step": 12890 }, { "epoch": 0.036278653808836804, "grad_norm": 23.125, "learning_rate": 1.9992785437431326e-05, "loss": 8.2689, "step": 12900 }, { "epoch": 0.036306776796285516, "grad_norm": 43.25, "learning_rate": 1.999277424820763e-05, "loss": 8.4438, "step": 12910 }, { "epoch": 0.03633489978373423, "grad_norm": 23.25, "learning_rate": 1.9992763050316988e-05, "loss": 8.7991, "step": 12920 }, { "epoch": 0.03636302277118294, "grad_norm": 22.875, "learning_rate": 1.9992751843759408e-05, "loss": 7.9707, "step": 12930 }, { "epoch": 0.03639114575863165, "grad_norm": 28.75, "learning_rate": 1.99927406285349e-05, "loss": 8.4509, "step": 12940 }, { "epoch": 0.03641926874608036, "grad_norm": 33.5, "learning_rate": 1.999272940464347e-05, "loss": 8.2446, "step": 12950 }, { "epoch": 0.036447391733529066, "grad_norm": 32.5, "learning_rate": 1.999271817208513e-05, "loss": 7.7981, "step": 12960 }, { "epoch": 0.03647551472097778, "grad_norm": 25.625, "learning_rate": 1.9992706930859887e-05, "loss": 8.3955, "step": 12970 }, { "epoch": 0.03650363770842649, "grad_norm": 30.625, "learning_rate": 1.999269568096776e-05, "loss": 8.6762, "step": 12980 }, { "epoch": 0.0365317606958752, "grad_norm": 24.875, "learning_rate": 1.999268442240875e-05, "loss": 8.1404, "step": 12990 }, { "epoch": 0.03655988368332391, "grad_norm": 38.5, "learning_rate": 1.9992673155182872e-05, "loss": 7.8047, "step": 13000 }, { "epoch": 0.036588006670772624, "grad_norm": 20.375, "learning_rate": 1.999266187929013e-05, "loss": 7.6434, "step": 13010 }, { "epoch": 0.036616129658221336, "grad_norm": 28.0, "learning_rate": 1.9992650594730536e-05, "loss": 8.3842, "step": 13020 }, { "epoch": 0.03664425264567005, "grad_norm": 34.25, "learning_rate": 1.9992639301504104e-05, "loss": 8.2192, "step": 13030 }, { "epoch": 0.03667237563311875, "grad_norm": 34.25, "learning_rate": 1.9992627999610838e-05, "loss": 8.4654, "step": 13040 }, { "epoch": 0.036700498620567464, "grad_norm": 28.625, "learning_rate": 1.999261668905075e-05, "loss": 7.954, "step": 13050 }, { "epoch": 0.036728621608016175, "grad_norm": 24.875, "learning_rate": 1.9992605369823854e-05, "loss": 7.7056, "step": 13060 }, { "epoch": 0.03675674459546489, "grad_norm": 29.75, "learning_rate": 1.9992594041930153e-05, "loss": 7.5381, "step": 13070 }, { "epoch": 0.0367848675829136, "grad_norm": 25.875, "learning_rate": 1.999258270536966e-05, "loss": 9.2101, "step": 13080 }, { "epoch": 0.03681299057036231, "grad_norm": 21.0, "learning_rate": 1.9992571360142383e-05, "loss": 8.0199, "step": 13090 }, { "epoch": 0.03684111355781102, "grad_norm": 21.5, "learning_rate": 1.9992560006248335e-05, "loss": 7.6696, "step": 13100 }, { "epoch": 0.03686923654525973, "grad_norm": 38.25, "learning_rate": 1.9992548643687526e-05, "loss": 7.5022, "step": 13110 }, { "epoch": 0.03689735953270844, "grad_norm": 41.0, "learning_rate": 1.9992537272459962e-05, "loss": 8.3703, "step": 13120 }, { "epoch": 0.03692548252015715, "grad_norm": 36.0, "learning_rate": 1.9992525892565658e-05, "loss": 7.651, "step": 13130 }, { "epoch": 0.03695360550760586, "grad_norm": 43.75, "learning_rate": 1.9992514504004618e-05, "loss": 7.1515, "step": 13140 }, { "epoch": 0.03698172849505457, "grad_norm": 43.5, "learning_rate": 1.999250310677685e-05, "loss": 7.8423, "step": 13150 }, { "epoch": 0.037009851482503284, "grad_norm": 24.0, "learning_rate": 1.9992491700882377e-05, "loss": 8.0301, "step": 13160 }, { "epoch": 0.037037974469951995, "grad_norm": 48.25, "learning_rate": 1.9992480286321196e-05, "loss": 8.5987, "step": 13170 }, { "epoch": 0.03706609745740071, "grad_norm": 49.25, "learning_rate": 1.9992468863093323e-05, "loss": 9.1781, "step": 13180 }, { "epoch": 0.03709422044484942, "grad_norm": 20.875, "learning_rate": 1.9992457431198765e-05, "loss": 7.9495, "step": 13190 }, { "epoch": 0.03712234343229812, "grad_norm": 42.0, "learning_rate": 1.9992445990637536e-05, "loss": 8.3674, "step": 13200 }, { "epoch": 0.037150466419746835, "grad_norm": 31.0, "learning_rate": 1.999243454140964e-05, "loss": 7.5379, "step": 13210 }, { "epoch": 0.037178589407195546, "grad_norm": 22.375, "learning_rate": 1.9992423083515092e-05, "loss": 7.5037, "step": 13220 }, { "epoch": 0.03720671239464426, "grad_norm": 39.75, "learning_rate": 1.9992411616953898e-05, "loss": 7.6243, "step": 13230 }, { "epoch": 0.03723483538209297, "grad_norm": 35.5, "learning_rate": 1.9992400141726073e-05, "loss": 7.7285, "step": 13240 }, { "epoch": 0.03726295836954168, "grad_norm": 29.875, "learning_rate": 1.9992388657831624e-05, "loss": 8.0422, "step": 13250 }, { "epoch": 0.03729108135699039, "grad_norm": 33.75, "learning_rate": 1.9992377165270557e-05, "loss": 7.9258, "step": 13260 }, { "epoch": 0.037319204344439104, "grad_norm": 25.5, "learning_rate": 1.9992365664042886e-05, "loss": 7.5074, "step": 13270 }, { "epoch": 0.03734732733188781, "grad_norm": 28.25, "learning_rate": 1.9992354154148625e-05, "loss": 7.5044, "step": 13280 }, { "epoch": 0.03737545031933652, "grad_norm": 33.25, "learning_rate": 1.9992342635587777e-05, "loss": 8.3605, "step": 13290 }, { "epoch": 0.03740357330678523, "grad_norm": 48.25, "learning_rate": 1.9992331108360356e-05, "loss": 8.1142, "step": 13300 }, { "epoch": 0.03743169629423394, "grad_norm": 31.125, "learning_rate": 1.999231957246637e-05, "loss": 7.85, "step": 13310 }, { "epoch": 0.037459819281682655, "grad_norm": 45.75, "learning_rate": 1.9992308027905833e-05, "loss": 8.0692, "step": 13320 }, { "epoch": 0.037487942269131366, "grad_norm": 42.0, "learning_rate": 1.999229647467875e-05, "loss": 7.7516, "step": 13330 }, { "epoch": 0.03751606525658008, "grad_norm": 23.875, "learning_rate": 1.9992284912785128e-05, "loss": 7.626, "step": 13340 }, { "epoch": 0.03754418824402879, "grad_norm": 23.375, "learning_rate": 1.9992273342224985e-05, "loss": 8.0699, "step": 13350 }, { "epoch": 0.037572311231477494, "grad_norm": 33.0, "learning_rate": 1.999226176299833e-05, "loss": 8.6411, "step": 13360 }, { "epoch": 0.037600434218926206, "grad_norm": 22.625, "learning_rate": 1.999225017510517e-05, "loss": 7.7081, "step": 13370 }, { "epoch": 0.03762855720637492, "grad_norm": 23.5, "learning_rate": 1.9992238578545514e-05, "loss": 7.9727, "step": 13380 }, { "epoch": 0.03765668019382363, "grad_norm": 32.75, "learning_rate": 1.999222697331938e-05, "loss": 8.0928, "step": 13390 }, { "epoch": 0.03768480318127234, "grad_norm": 43.25, "learning_rate": 1.9992215359426765e-05, "loss": 8.4706, "step": 13400 }, { "epoch": 0.03771292616872105, "grad_norm": 26.625, "learning_rate": 1.999220373686769e-05, "loss": 8.1479, "step": 13410 }, { "epoch": 0.03774104915616976, "grad_norm": 33.75, "learning_rate": 1.999219210564216e-05, "loss": 9.0185, "step": 13420 }, { "epoch": 0.037769172143618475, "grad_norm": 30.75, "learning_rate": 1.9992180465750187e-05, "loss": 8.1988, "step": 13430 }, { "epoch": 0.037797295131067186, "grad_norm": 36.0, "learning_rate": 1.999216881719178e-05, "loss": 7.8366, "step": 13440 }, { "epoch": 0.03782541811851589, "grad_norm": 31.25, "learning_rate": 1.9992157159966946e-05, "loss": 7.7922, "step": 13450 }, { "epoch": 0.0378535411059646, "grad_norm": 29.375, "learning_rate": 1.99921454940757e-05, "loss": 7.4517, "step": 13460 }, { "epoch": 0.037881664093413314, "grad_norm": 25.0, "learning_rate": 1.9992133819518056e-05, "loss": 8.6028, "step": 13470 }, { "epoch": 0.037909787080862026, "grad_norm": 27.125, "learning_rate": 1.9992122136294016e-05, "loss": 7.9583, "step": 13480 }, { "epoch": 0.03793791006831074, "grad_norm": 42.25, "learning_rate": 1.9992110444403593e-05, "loss": 7.9869, "step": 13490 }, { "epoch": 0.03796603305575945, "grad_norm": 44.25, "learning_rate": 1.9992098743846796e-05, "loss": 8.5041, "step": 13500 }, { "epoch": 0.03799415604320816, "grad_norm": 37.25, "learning_rate": 1.999208703462364e-05, "loss": 7.2959, "step": 13510 }, { "epoch": 0.03802227903065687, "grad_norm": 45.5, "learning_rate": 1.9992075316734126e-05, "loss": 7.6549, "step": 13520 }, { "epoch": 0.03805040201810558, "grad_norm": 20.0, "learning_rate": 1.9992063590178273e-05, "loss": 8.3891, "step": 13530 }, { "epoch": 0.03807852500555429, "grad_norm": 65.0, "learning_rate": 1.999205185495609e-05, "loss": 8.8544, "step": 13540 }, { "epoch": 0.038106647993003, "grad_norm": 40.5, "learning_rate": 1.9992040111067582e-05, "loss": 8.2655, "step": 13550 }, { "epoch": 0.03813477098045171, "grad_norm": 27.5, "learning_rate": 1.9992028358512764e-05, "loss": 8.0888, "step": 13560 }, { "epoch": 0.03816289396790042, "grad_norm": 63.25, "learning_rate": 1.999201659729164e-05, "loss": 8.1617, "step": 13570 }, { "epoch": 0.038191016955349134, "grad_norm": 47.25, "learning_rate": 1.999200482740423e-05, "loss": 7.8589, "step": 13580 }, { "epoch": 0.038219139942797846, "grad_norm": 23.875, "learning_rate": 1.9991993048850537e-05, "loss": 8.0865, "step": 13590 }, { "epoch": 0.03824726293024656, "grad_norm": 30.125, "learning_rate": 1.999198126163057e-05, "loss": 7.8325, "step": 13600 }, { "epoch": 0.03827538591769526, "grad_norm": 28.0, "learning_rate": 1.9991969465744345e-05, "loss": 7.9923, "step": 13610 }, { "epoch": 0.038303508905143974, "grad_norm": 23.25, "learning_rate": 1.999195766119187e-05, "loss": 7.0202, "step": 13620 }, { "epoch": 0.038331631892592685, "grad_norm": 31.125, "learning_rate": 1.9991945847973154e-05, "loss": 7.9617, "step": 13630 }, { "epoch": 0.0383597548800414, "grad_norm": 34.5, "learning_rate": 1.9991934026088205e-05, "loss": 7.7364, "step": 13640 }, { "epoch": 0.03838787786749011, "grad_norm": 31.125, "learning_rate": 1.9991922195537042e-05, "loss": 8.0584, "step": 13650 }, { "epoch": 0.03841600085493882, "grad_norm": 30.5, "learning_rate": 1.9991910356319668e-05, "loss": 7.6093, "step": 13660 }, { "epoch": 0.03844412384238753, "grad_norm": 22.75, "learning_rate": 1.999189850843609e-05, "loss": 8.0979, "step": 13670 }, { "epoch": 0.03847224682983624, "grad_norm": 26.25, "learning_rate": 1.9991886651886327e-05, "loss": 7.878, "step": 13680 }, { "epoch": 0.03850036981728495, "grad_norm": 102.0, "learning_rate": 1.999187478667038e-05, "loss": 7.6886, "step": 13690 }, { "epoch": 0.03852849280473366, "grad_norm": 43.25, "learning_rate": 1.9991862912788274e-05, "loss": 7.7273, "step": 13700 }, { "epoch": 0.03855661579218237, "grad_norm": 23.25, "learning_rate": 1.9991851030240005e-05, "loss": 7.7588, "step": 13710 }, { "epoch": 0.03858473877963108, "grad_norm": 31.625, "learning_rate": 1.9991839139025585e-05, "loss": 8.3302, "step": 13720 }, { "epoch": 0.038612861767079794, "grad_norm": 40.0, "learning_rate": 1.9991827239145032e-05, "loss": 7.8507, "step": 13730 }, { "epoch": 0.038640984754528505, "grad_norm": 22.75, "learning_rate": 1.999181533059835e-05, "loss": 7.9962, "step": 13740 }, { "epoch": 0.03866910774197722, "grad_norm": 36.5, "learning_rate": 1.9991803413385552e-05, "loss": 7.6173, "step": 13750 }, { "epoch": 0.03869723072942593, "grad_norm": 32.0, "learning_rate": 1.9991791487506648e-05, "loss": 7.7178, "step": 13760 }, { "epoch": 0.03872535371687463, "grad_norm": 31.875, "learning_rate": 1.9991779552961644e-05, "loss": 8.2425, "step": 13770 }, { "epoch": 0.038753476704323345, "grad_norm": 23.375, "learning_rate": 1.999176760975056e-05, "loss": 7.9868, "step": 13780 }, { "epoch": 0.038781599691772056, "grad_norm": 24.5, "learning_rate": 1.9991755657873394e-05, "loss": 8.6608, "step": 13790 }, { "epoch": 0.03880972267922077, "grad_norm": 39.5, "learning_rate": 1.999174369733017e-05, "loss": 8.0978, "step": 13800 }, { "epoch": 0.03883784566666948, "grad_norm": 34.25, "learning_rate": 1.9991731728120887e-05, "loss": 7.7132, "step": 13810 }, { "epoch": 0.03886596865411819, "grad_norm": 38.0, "learning_rate": 1.999171975024556e-05, "loss": 8.1656, "step": 13820 }, { "epoch": 0.0388940916415669, "grad_norm": 33.0, "learning_rate": 1.99917077637042e-05, "loss": 7.8016, "step": 13830 }, { "epoch": 0.038922214629015614, "grad_norm": 30.125, "learning_rate": 1.9991695768496815e-05, "loss": 8.6725, "step": 13840 }, { "epoch": 0.03895033761646432, "grad_norm": 24.25, "learning_rate": 1.9991683764623418e-05, "loss": 7.4398, "step": 13850 }, { "epoch": 0.03897846060391303, "grad_norm": 38.75, "learning_rate": 1.9991671752084018e-05, "loss": 7.9079, "step": 13860 }, { "epoch": 0.03900658359136174, "grad_norm": 23.25, "learning_rate": 1.9991659730878626e-05, "loss": 7.7781, "step": 13870 }, { "epoch": 0.03903470657881045, "grad_norm": 32.75, "learning_rate": 1.999164770100725e-05, "loss": 7.5926, "step": 13880 }, { "epoch": 0.039062829566259165, "grad_norm": 40.0, "learning_rate": 1.9991635662469905e-05, "loss": 7.6708, "step": 13890 }, { "epoch": 0.039090952553707876, "grad_norm": 33.25, "learning_rate": 1.99916236152666e-05, "loss": 8.1736, "step": 13900 }, { "epoch": 0.03911907554115659, "grad_norm": 31.5, "learning_rate": 1.999161155939734e-05, "loss": 8.3694, "step": 13910 }, { "epoch": 0.0391471985286053, "grad_norm": 45.25, "learning_rate": 1.9991599494862144e-05, "loss": 8.5924, "step": 13920 }, { "epoch": 0.039175321516054004, "grad_norm": 32.75, "learning_rate": 1.9991587421661015e-05, "loss": 7.9896, "step": 13930 }, { "epoch": 0.039203444503502716, "grad_norm": 24.875, "learning_rate": 1.999157533979397e-05, "loss": 8.1262, "step": 13940 }, { "epoch": 0.03923156749095143, "grad_norm": 27.875, "learning_rate": 1.9991563249261016e-05, "loss": 8.1849, "step": 13950 }, { "epoch": 0.03925969047840014, "grad_norm": 31.5, "learning_rate": 1.9991551150062162e-05, "loss": 8.659, "step": 13960 }, { "epoch": 0.03928781346584885, "grad_norm": 23.25, "learning_rate": 1.999153904219742e-05, "loss": 8.0891, "step": 13970 }, { "epoch": 0.03931593645329756, "grad_norm": 28.25, "learning_rate": 1.99915269256668e-05, "loss": 8.1969, "step": 13980 }, { "epoch": 0.039344059440746273, "grad_norm": 22.875, "learning_rate": 1.9991514800470318e-05, "loss": 8.0725, "step": 13990 }, { "epoch": 0.039372182428194985, "grad_norm": 26.875, "learning_rate": 1.9991502666607976e-05, "loss": 7.5807, "step": 14000 }, { "epoch": 0.03940030541564369, "grad_norm": 27.25, "learning_rate": 1.9991490524079792e-05, "loss": 8.0843, "step": 14010 }, { "epoch": 0.0394284284030924, "grad_norm": 32.5, "learning_rate": 1.9991478372885767e-05, "loss": 7.7271, "step": 14020 }, { "epoch": 0.03945655139054111, "grad_norm": 34.75, "learning_rate": 1.999146621302592e-05, "loss": 7.983, "step": 14030 }, { "epoch": 0.039484674377989824, "grad_norm": 31.125, "learning_rate": 1.999145404450026e-05, "loss": 7.5228, "step": 14040 }, { "epoch": 0.039512797365438536, "grad_norm": 31.375, "learning_rate": 1.9991441867308797e-05, "loss": 8.0152, "step": 14050 }, { "epoch": 0.03954092035288725, "grad_norm": 26.5, "learning_rate": 1.999142968145154e-05, "loss": 7.6515, "step": 14060 }, { "epoch": 0.03956904334033596, "grad_norm": 48.0, "learning_rate": 1.99914174869285e-05, "loss": 8.1369, "step": 14070 }, { "epoch": 0.03959716632778467, "grad_norm": 22.875, "learning_rate": 1.999140528373969e-05, "loss": 7.8141, "step": 14080 }, { "epoch": 0.039625289315233375, "grad_norm": 38.25, "learning_rate": 1.9991393071885115e-05, "loss": 8.06, "step": 14090 }, { "epoch": 0.03965341230268209, "grad_norm": 26.875, "learning_rate": 1.999138085136479e-05, "loss": 7.9396, "step": 14100 }, { "epoch": 0.0396815352901308, "grad_norm": 27.625, "learning_rate": 1.999136862217873e-05, "loss": 8.3304, "step": 14110 }, { "epoch": 0.03970965827757951, "grad_norm": 24.5, "learning_rate": 1.999135638432694e-05, "loss": 8.7337, "step": 14120 }, { "epoch": 0.03973778126502822, "grad_norm": 50.75, "learning_rate": 1.9991344137809428e-05, "loss": 7.23, "step": 14130 }, { "epoch": 0.03976590425247693, "grad_norm": 27.25, "learning_rate": 1.9991331882626208e-05, "loss": 7.1619, "step": 14140 }, { "epoch": 0.039794027239925645, "grad_norm": 30.0, "learning_rate": 1.999131961877729e-05, "loss": 7.7033, "step": 14150 }, { "epoch": 0.039822150227374356, "grad_norm": 32.5, "learning_rate": 1.9991307346262687e-05, "loss": 7.6366, "step": 14160 }, { "epoch": 0.03985027321482307, "grad_norm": 27.875, "learning_rate": 1.999129506508241e-05, "loss": 8.2651, "step": 14170 }, { "epoch": 0.03987839620227177, "grad_norm": 24.0, "learning_rate": 1.999128277523646e-05, "loss": 8.1208, "step": 14180 }, { "epoch": 0.039906519189720484, "grad_norm": 24.5, "learning_rate": 1.999127047672486e-05, "loss": 7.7212, "step": 14190 }, { "epoch": 0.039934642177169195, "grad_norm": 34.25, "learning_rate": 1.9991258169547615e-05, "loss": 8.4624, "step": 14200 }, { "epoch": 0.03996276516461791, "grad_norm": 28.75, "learning_rate": 1.9991245853704737e-05, "loss": 8.5372, "step": 14210 }, { "epoch": 0.03999088815206662, "grad_norm": 24.125, "learning_rate": 1.9991233529196234e-05, "loss": 7.9803, "step": 14220 }, { "epoch": 0.04001901113951533, "grad_norm": 29.125, "learning_rate": 1.999122119602212e-05, "loss": 7.6576, "step": 14230 }, { "epoch": 0.04004713412696404, "grad_norm": 27.375, "learning_rate": 1.9991208854182408e-05, "loss": 7.9341, "step": 14240 }, { "epoch": 0.04007525711441275, "grad_norm": 23.25, "learning_rate": 1.99911965036771e-05, "loss": 8.0133, "step": 14250 }, { "epoch": 0.04010338010186146, "grad_norm": 32.25, "learning_rate": 1.999118414450621e-05, "loss": 8.3118, "step": 14260 }, { "epoch": 0.04013150308931017, "grad_norm": 22.625, "learning_rate": 1.9991171776669758e-05, "loss": 7.5477, "step": 14270 }, { "epoch": 0.04015962607675888, "grad_norm": 31.625, "learning_rate": 1.9991159400167743e-05, "loss": 8.2519, "step": 14280 }, { "epoch": 0.04018774906420759, "grad_norm": 28.5, "learning_rate": 1.9991147015000182e-05, "loss": 8.3795, "step": 14290 }, { "epoch": 0.040215872051656304, "grad_norm": 28.375, "learning_rate": 1.999113462116708e-05, "loss": 7.9344, "step": 14300 }, { "epoch": 0.040243995039105016, "grad_norm": 24.0, "learning_rate": 1.9991122218668456e-05, "loss": 7.8801, "step": 14310 }, { "epoch": 0.04027211802655373, "grad_norm": 27.375, "learning_rate": 1.9991109807504312e-05, "loss": 7.9509, "step": 14320 }, { "epoch": 0.04030024101400244, "grad_norm": 30.875, "learning_rate": 1.999109738767467e-05, "loss": 8.0977, "step": 14330 }, { "epoch": 0.04032836400145114, "grad_norm": 37.25, "learning_rate": 1.9991084959179525e-05, "loss": 8.1091, "step": 14340 }, { "epoch": 0.040356486988899855, "grad_norm": 34.0, "learning_rate": 1.9991072522018903e-05, "loss": 7.8797, "step": 14350 }, { "epoch": 0.040384609976348566, "grad_norm": 31.5, "learning_rate": 1.9991060076192802e-05, "loss": 8.8112, "step": 14360 }, { "epoch": 0.04041273296379728, "grad_norm": 35.5, "learning_rate": 1.9991047621701245e-05, "loss": 7.7425, "step": 14370 }, { "epoch": 0.04044085595124599, "grad_norm": 30.0, "learning_rate": 1.9991035158544237e-05, "loss": 7.1846, "step": 14380 }, { "epoch": 0.0404689789386947, "grad_norm": 63.5, "learning_rate": 1.9991022686721786e-05, "loss": 9.0637, "step": 14390 }, { "epoch": 0.04049710192614341, "grad_norm": 33.25, "learning_rate": 1.9991010206233908e-05, "loss": 7.6429, "step": 14400 }, { "epoch": 0.040525224913592124, "grad_norm": 34.25, "learning_rate": 1.9990997717080608e-05, "loss": 7.5113, "step": 14410 }, { "epoch": 0.04055334790104083, "grad_norm": 27.125, "learning_rate": 1.9990985219261906e-05, "loss": 8.1203, "step": 14420 }, { "epoch": 0.04058147088848954, "grad_norm": 64.5, "learning_rate": 1.9990972712777804e-05, "loss": 8.5142, "step": 14430 }, { "epoch": 0.04060959387593825, "grad_norm": 41.25, "learning_rate": 1.9990960197628314e-05, "loss": 8.356, "step": 14440 }, { "epoch": 0.04063771686338696, "grad_norm": 26.75, "learning_rate": 1.9990947673813446e-05, "loss": 7.1781, "step": 14450 }, { "epoch": 0.040665839850835675, "grad_norm": 32.5, "learning_rate": 1.9990935141333222e-05, "loss": 7.6836, "step": 14460 }, { "epoch": 0.04069396283828439, "grad_norm": 29.625, "learning_rate": 1.999092260018764e-05, "loss": 7.677, "step": 14470 }, { "epoch": 0.0407220858257331, "grad_norm": 22.25, "learning_rate": 1.9990910050376718e-05, "loss": 7.6785, "step": 14480 }, { "epoch": 0.04075020881318181, "grad_norm": 24.625, "learning_rate": 1.999089749190046e-05, "loss": 7.7934, "step": 14490 }, { "epoch": 0.040778331800630514, "grad_norm": 23.75, "learning_rate": 1.9990884924758884e-05, "loss": 8.4624, "step": 14500 }, { "epoch": 0.040806454788079226, "grad_norm": 33.5, "learning_rate": 1.9990872348951996e-05, "loss": 8.6198, "step": 14510 }, { "epoch": 0.04083457777552794, "grad_norm": 35.25, "learning_rate": 1.999085976447981e-05, "loss": 8.1483, "step": 14520 }, { "epoch": 0.04086270076297665, "grad_norm": 35.25, "learning_rate": 1.9990847171342338e-05, "loss": 7.164, "step": 14530 }, { "epoch": 0.04089082375042536, "grad_norm": 28.125, "learning_rate": 1.999083456953959e-05, "loss": 7.7325, "step": 14540 }, { "epoch": 0.04091894673787407, "grad_norm": 19.875, "learning_rate": 1.9990821959071573e-05, "loss": 8.1406, "step": 14550 }, { "epoch": 0.040947069725322784, "grad_norm": 46.75, "learning_rate": 1.99908093399383e-05, "loss": 7.9699, "step": 14560 }, { "epoch": 0.040975192712771495, "grad_norm": 44.5, "learning_rate": 1.9990796712139783e-05, "loss": 7.626, "step": 14570 }, { "epoch": 0.0410033157002202, "grad_norm": 56.25, "learning_rate": 1.9990784075676032e-05, "loss": 8.7337, "step": 14580 }, { "epoch": 0.04103143868766891, "grad_norm": 44.75, "learning_rate": 1.9990771430547063e-05, "loss": 8.0525, "step": 14590 }, { "epoch": 0.04105956167511762, "grad_norm": 24.125, "learning_rate": 1.999075877675288e-05, "loss": 7.5899, "step": 14600 }, { "epoch": 0.041087684662566334, "grad_norm": 37.75, "learning_rate": 1.9990746114293494e-05, "loss": 7.6175, "step": 14610 }, { "epoch": 0.041115807650015046, "grad_norm": 25.125, "learning_rate": 1.999073344316892e-05, "loss": 8.2447, "step": 14620 }, { "epoch": 0.04114393063746376, "grad_norm": 33.25, "learning_rate": 1.999072076337917e-05, "loss": 7.923, "step": 14630 }, { "epoch": 0.04117205362491247, "grad_norm": 29.125, "learning_rate": 1.999070807492425e-05, "loss": 8.3361, "step": 14640 }, { "epoch": 0.04120017661236118, "grad_norm": 37.25, "learning_rate": 1.9990695377804173e-05, "loss": 7.8386, "step": 14650 }, { "epoch": 0.041228299599809885, "grad_norm": 38.75, "learning_rate": 1.999068267201895e-05, "loss": 8.1347, "step": 14660 }, { "epoch": 0.0412564225872586, "grad_norm": 25.0, "learning_rate": 1.9990669957568592e-05, "loss": 7.2211, "step": 14670 }, { "epoch": 0.04128454557470731, "grad_norm": 22.875, "learning_rate": 1.9990657234453116e-05, "loss": 7.1225, "step": 14680 }, { "epoch": 0.04131266856215602, "grad_norm": 29.125, "learning_rate": 1.9990644502672524e-05, "loss": 8.1533, "step": 14690 }, { "epoch": 0.04134079154960473, "grad_norm": 45.0, "learning_rate": 1.999063176222683e-05, "loss": 8.1926, "step": 14700 }, { "epoch": 0.04136891453705344, "grad_norm": 45.5, "learning_rate": 1.9990619013116048e-05, "loss": 7.6541, "step": 14710 }, { "epoch": 0.041397037524502155, "grad_norm": 45.75, "learning_rate": 1.9990606255340184e-05, "loss": 7.9876, "step": 14720 }, { "epoch": 0.041425160511950866, "grad_norm": 23.5, "learning_rate": 1.9990593488899254e-05, "loss": 8.3273, "step": 14730 }, { "epoch": 0.04145328349939957, "grad_norm": 33.75, "learning_rate": 1.9990580713793265e-05, "loss": 8.1444, "step": 14740 }, { "epoch": 0.04148140648684828, "grad_norm": 40.75, "learning_rate": 1.999056793002223e-05, "loss": 7.4559, "step": 14750 }, { "epoch": 0.041509529474296994, "grad_norm": 30.5, "learning_rate": 1.9990555137586162e-05, "loss": 8.2001, "step": 14760 }, { "epoch": 0.041537652461745705, "grad_norm": 26.5, "learning_rate": 1.9990542336485068e-05, "loss": 8.6624, "step": 14770 }, { "epoch": 0.04156577544919442, "grad_norm": 27.5, "learning_rate": 1.999052952671896e-05, "loss": 7.7957, "step": 14780 }, { "epoch": 0.04159389843664313, "grad_norm": 37.0, "learning_rate": 1.9990516708287854e-05, "loss": 8.1917, "step": 14790 }, { "epoch": 0.04162202142409184, "grad_norm": 44.25, "learning_rate": 1.9990503881191756e-05, "loss": 7.3718, "step": 14800 }, { "epoch": 0.04165014441154055, "grad_norm": 32.5, "learning_rate": 1.999049104543068e-05, "loss": 7.5645, "step": 14810 }, { "epoch": 0.04167826739898926, "grad_norm": 39.0, "learning_rate": 1.9990478201004633e-05, "loss": 8.3762, "step": 14820 }, { "epoch": 0.04170639038643797, "grad_norm": 34.75, "learning_rate": 1.9990465347913632e-05, "loss": 8.6465, "step": 14830 }, { "epoch": 0.04173451337388668, "grad_norm": 26.25, "learning_rate": 1.9990452486157684e-05, "loss": 7.7829, "step": 14840 }, { "epoch": 0.04176263636133539, "grad_norm": 34.5, "learning_rate": 1.99904396157368e-05, "loss": 7.9722, "step": 14850 }, { "epoch": 0.0417907593487841, "grad_norm": 42.25, "learning_rate": 1.999042673665099e-05, "loss": 7.7614, "step": 14860 }, { "epoch": 0.041818882336232814, "grad_norm": 47.0, "learning_rate": 1.9990413848900272e-05, "loss": 7.9466, "step": 14870 }, { "epoch": 0.041847005323681526, "grad_norm": 30.625, "learning_rate": 1.999040095248465e-05, "loss": 8.0575, "step": 14880 }, { "epoch": 0.04187512831113024, "grad_norm": 29.25, "learning_rate": 1.999038804740414e-05, "loss": 8.1235, "step": 14890 }, { "epoch": 0.04190325129857895, "grad_norm": 40.0, "learning_rate": 1.999037513365875e-05, "loss": 8.1925, "step": 14900 }, { "epoch": 0.04193137428602765, "grad_norm": 26.375, "learning_rate": 1.999036221124849e-05, "loss": 8.5718, "step": 14910 }, { "epoch": 0.041959497273476365, "grad_norm": 27.0, "learning_rate": 1.9990349280173377e-05, "loss": 8.6519, "step": 14920 }, { "epoch": 0.041987620260925076, "grad_norm": 23.25, "learning_rate": 1.9990336340433417e-05, "loss": 7.4102, "step": 14930 }, { "epoch": 0.04201574324837379, "grad_norm": 23.25, "learning_rate": 1.999032339202862e-05, "loss": 7.9522, "step": 14940 }, { "epoch": 0.0420438662358225, "grad_norm": 21.625, "learning_rate": 1.9990310434959006e-05, "loss": 8.0161, "step": 14950 }, { "epoch": 0.04207198922327121, "grad_norm": 28.5, "learning_rate": 1.9990297469224576e-05, "loss": 8.1599, "step": 14960 }, { "epoch": 0.04210011221071992, "grad_norm": 52.0, "learning_rate": 1.999028449482535e-05, "loss": 8.6145, "step": 14970 }, { "epoch": 0.042128235198168634, "grad_norm": 41.75, "learning_rate": 1.999027151176133e-05, "loss": 7.9823, "step": 14980 }, { "epoch": 0.04215635818561734, "grad_norm": 29.0, "learning_rate": 1.9990258520032536e-05, "loss": 7.5883, "step": 14990 }, { "epoch": 0.04218448117306605, "grad_norm": 32.25, "learning_rate": 1.9990245519638972e-05, "loss": 8.4586, "step": 15000 }, { "epoch": 0.04221260416051476, "grad_norm": 26.25, "learning_rate": 1.9990232510580654e-05, "loss": 7.1532, "step": 15010 }, { "epoch": 0.042240727147963474, "grad_norm": 28.75, "learning_rate": 1.9990219492857595e-05, "loss": 8.5273, "step": 15020 }, { "epoch": 0.042268850135412185, "grad_norm": 22.5, "learning_rate": 1.99902064664698e-05, "loss": 8.2674, "step": 15030 }, { "epoch": 0.0422969731228609, "grad_norm": 35.25, "learning_rate": 1.9990193431417283e-05, "loss": 8.1158, "step": 15040 }, { "epoch": 0.04232509611030961, "grad_norm": 25.25, "learning_rate": 1.9990180387700057e-05, "loss": 8.3668, "step": 15050 }, { "epoch": 0.04235321909775832, "grad_norm": 46.25, "learning_rate": 1.9990167335318135e-05, "loss": 7.5332, "step": 15060 }, { "epoch": 0.042381342085207024, "grad_norm": 74.0, "learning_rate": 1.999015427427152e-05, "loss": 8.0822, "step": 15070 }, { "epoch": 0.042409465072655736, "grad_norm": 38.0, "learning_rate": 1.999014120456023e-05, "loss": 7.8682, "step": 15080 }, { "epoch": 0.04243758806010445, "grad_norm": 27.875, "learning_rate": 1.9990128126184277e-05, "loss": 8.1402, "step": 15090 }, { "epoch": 0.04246571104755316, "grad_norm": 21.0, "learning_rate": 1.999011503914367e-05, "loss": 8.1576, "step": 15100 }, { "epoch": 0.04249383403500187, "grad_norm": 21.625, "learning_rate": 1.9990101943438422e-05, "loss": 8.322, "step": 15110 }, { "epoch": 0.04252195702245058, "grad_norm": 29.125, "learning_rate": 1.999008883906854e-05, "loss": 7.13, "step": 15120 }, { "epoch": 0.042550080009899294, "grad_norm": 52.0, "learning_rate": 1.9990075726034042e-05, "loss": 7.9855, "step": 15130 }, { "epoch": 0.042578202997348005, "grad_norm": 33.25, "learning_rate": 1.9990062604334938e-05, "loss": 7.8318, "step": 15140 }, { "epoch": 0.04260632598479671, "grad_norm": 51.0, "learning_rate": 1.9990049473971232e-05, "loss": 7.5613, "step": 15150 }, { "epoch": 0.04263444897224542, "grad_norm": 30.375, "learning_rate": 1.999003633494294e-05, "loss": 8.262, "step": 15160 }, { "epoch": 0.04266257195969413, "grad_norm": 39.5, "learning_rate": 1.9990023187250077e-05, "loss": 8.1489, "step": 15170 }, { "epoch": 0.042690694947142845, "grad_norm": 29.125, "learning_rate": 1.999001003089265e-05, "loss": 8.6367, "step": 15180 }, { "epoch": 0.042718817934591556, "grad_norm": 31.125, "learning_rate": 1.9989996865870674e-05, "loss": 8.2041, "step": 15190 }, { "epoch": 0.04274694092204027, "grad_norm": 50.25, "learning_rate": 1.9989983692184157e-05, "loss": 7.9912, "step": 15200 }, { "epoch": 0.04277506390948898, "grad_norm": 23.125, "learning_rate": 1.9989970509833117e-05, "loss": 8.1719, "step": 15210 }, { "epoch": 0.04280318689693769, "grad_norm": 26.625, "learning_rate": 1.9989957318817552e-05, "loss": 7.8112, "step": 15220 }, { "epoch": 0.042831309884386395, "grad_norm": 28.0, "learning_rate": 1.9989944119137488e-05, "loss": 8.0234, "step": 15230 }, { "epoch": 0.04285943287183511, "grad_norm": 20.0, "learning_rate": 1.9989930910792927e-05, "loss": 7.4964, "step": 15240 }, { "epoch": 0.04288755585928382, "grad_norm": 28.25, "learning_rate": 1.9989917693783884e-05, "loss": 7.9812, "step": 15250 }, { "epoch": 0.04291567884673253, "grad_norm": 36.25, "learning_rate": 1.9989904468110367e-05, "loss": 7.743, "step": 15260 }, { "epoch": 0.04294380183418124, "grad_norm": 46.25, "learning_rate": 1.9989891233772395e-05, "loss": 8.2282, "step": 15270 }, { "epoch": 0.04297192482162995, "grad_norm": 31.75, "learning_rate": 1.9989877990769973e-05, "loss": 7.965, "step": 15280 }, { "epoch": 0.043000047809078665, "grad_norm": 39.75, "learning_rate": 1.9989864739103116e-05, "loss": 7.4655, "step": 15290 }, { "epoch": 0.043028170796527376, "grad_norm": 36.5, "learning_rate": 1.998985147877183e-05, "loss": 8.2348, "step": 15300 }, { "epoch": 0.04305629378397608, "grad_norm": 41.25, "learning_rate": 1.9989838209776136e-05, "loss": 8.841, "step": 15310 }, { "epoch": 0.04308441677142479, "grad_norm": 28.75, "learning_rate": 1.9989824932116037e-05, "loss": 8.0753, "step": 15320 }, { "epoch": 0.043112539758873504, "grad_norm": 29.5, "learning_rate": 1.9989811645791546e-05, "loss": 8.1346, "step": 15330 }, { "epoch": 0.043140662746322216, "grad_norm": 52.25, "learning_rate": 1.9989798350802677e-05, "loss": 8.1031, "step": 15340 }, { "epoch": 0.04316878573377093, "grad_norm": 36.0, "learning_rate": 1.998978504714944e-05, "loss": 7.9402, "step": 15350 }, { "epoch": 0.04319690872121964, "grad_norm": 34.75, "learning_rate": 1.998977173483185e-05, "loss": 7.9225, "step": 15360 }, { "epoch": 0.04322503170866835, "grad_norm": 39.5, "learning_rate": 1.9989758413849915e-05, "loss": 7.7493, "step": 15370 }, { "epoch": 0.04325315469611706, "grad_norm": 34.75, "learning_rate": 1.9989745084203643e-05, "loss": 7.652, "step": 15380 }, { "epoch": 0.043281277683565766, "grad_norm": 33.25, "learning_rate": 1.9989731745893054e-05, "loss": 8.4337, "step": 15390 }, { "epoch": 0.04330940067101448, "grad_norm": 25.75, "learning_rate": 1.9989718398918154e-05, "loss": 7.3164, "step": 15400 }, { "epoch": 0.04333752365846319, "grad_norm": 24.0, "learning_rate": 1.9989705043278955e-05, "loss": 7.8767, "step": 15410 }, { "epoch": 0.0433656466459119, "grad_norm": 27.25, "learning_rate": 1.998969167897547e-05, "loss": 7.9032, "step": 15420 }, { "epoch": 0.04339376963336061, "grad_norm": 32.25, "learning_rate": 1.998967830600771e-05, "loss": 8.4123, "step": 15430 }, { "epoch": 0.043421892620809324, "grad_norm": 28.625, "learning_rate": 1.9989664924375686e-05, "loss": 7.3174, "step": 15440 }, { "epoch": 0.043450015608258036, "grad_norm": 23.875, "learning_rate": 1.9989651534079412e-05, "loss": 8.1008, "step": 15450 }, { "epoch": 0.04347813859570675, "grad_norm": 34.75, "learning_rate": 1.9989638135118895e-05, "loss": 7.6594, "step": 15460 }, { "epoch": 0.04350626158315546, "grad_norm": 41.75, "learning_rate": 1.9989624727494154e-05, "loss": 7.7052, "step": 15470 }, { "epoch": 0.043534384570604164, "grad_norm": 30.25, "learning_rate": 1.998961131120519e-05, "loss": 8.4516, "step": 15480 }, { "epoch": 0.043562507558052875, "grad_norm": 36.75, "learning_rate": 1.9989597886252027e-05, "loss": 8.3436, "step": 15490 }, { "epoch": 0.04359063054550159, "grad_norm": 24.125, "learning_rate": 1.9989584452634666e-05, "loss": 7.9859, "step": 15500 }, { "epoch": 0.0436187535329503, "grad_norm": 34.5, "learning_rate": 1.9989571010353124e-05, "loss": 7.7361, "step": 15510 }, { "epoch": 0.04364687652039901, "grad_norm": 24.25, "learning_rate": 1.9989557559407414e-05, "loss": 7.8217, "step": 15520 }, { "epoch": 0.04367499950784772, "grad_norm": 39.5, "learning_rate": 1.9989544099797543e-05, "loss": 8.1218, "step": 15530 }, { "epoch": 0.04370312249529643, "grad_norm": 20.375, "learning_rate": 1.9989530631523524e-05, "loss": 7.802, "step": 15540 }, { "epoch": 0.043731245482745144, "grad_norm": 27.375, "learning_rate": 1.9989517154585375e-05, "loss": 7.899, "step": 15550 }, { "epoch": 0.04375936847019385, "grad_norm": 23.125, "learning_rate": 1.9989503668983095e-05, "loss": 8.2947, "step": 15560 }, { "epoch": 0.04378749145764256, "grad_norm": 35.5, "learning_rate": 1.998949017471671e-05, "loss": 8.2794, "step": 15570 }, { "epoch": 0.04381561444509127, "grad_norm": 32.75, "learning_rate": 1.998947667178622e-05, "loss": 8.3958, "step": 15580 }, { "epoch": 0.043843737432539984, "grad_norm": 23.625, "learning_rate": 1.9989463160191645e-05, "loss": 7.7927, "step": 15590 }, { "epoch": 0.043871860419988695, "grad_norm": 37.75, "learning_rate": 1.9989449639932988e-05, "loss": 8.208, "step": 15600 }, { "epoch": 0.04389998340743741, "grad_norm": 48.75, "learning_rate": 1.998943611101027e-05, "loss": 8.1944, "step": 15610 }, { "epoch": 0.04392810639488612, "grad_norm": 35.75, "learning_rate": 1.99894225734235e-05, "loss": 8.2754, "step": 15620 }, { "epoch": 0.04395622938233483, "grad_norm": 22.875, "learning_rate": 1.9989409027172685e-05, "loss": 8.0983, "step": 15630 }, { "epoch": 0.043984352369783535, "grad_norm": 44.0, "learning_rate": 1.998939547225784e-05, "loss": 8.6797, "step": 15640 }, { "epoch": 0.044012475357232246, "grad_norm": 24.375, "learning_rate": 1.998938190867898e-05, "loss": 8.6991, "step": 15650 }, { "epoch": 0.04404059834468096, "grad_norm": 31.5, "learning_rate": 1.998936833643611e-05, "loss": 8.1683, "step": 15660 }, { "epoch": 0.04406872133212967, "grad_norm": 30.625, "learning_rate": 1.998935475552925e-05, "loss": 7.5811, "step": 15670 }, { "epoch": 0.04409684431957838, "grad_norm": 33.25, "learning_rate": 1.9989341165958404e-05, "loss": 7.0774, "step": 15680 }, { "epoch": 0.04412496730702709, "grad_norm": 29.625, "learning_rate": 1.998932756772359e-05, "loss": 7.6559, "step": 15690 }, { "epoch": 0.044153090294475804, "grad_norm": 51.5, "learning_rate": 1.9989313960824814e-05, "loss": 8.0974, "step": 15700 }, { "epoch": 0.044181213281924515, "grad_norm": 26.125, "learning_rate": 1.9989300345262093e-05, "loss": 7.4047, "step": 15710 }, { "epoch": 0.04420933626937322, "grad_norm": 45.75, "learning_rate": 1.9989286721035435e-05, "loss": 7.7699, "step": 15720 }, { "epoch": 0.04423745925682193, "grad_norm": 28.625, "learning_rate": 1.9989273088144855e-05, "loss": 8.1135, "step": 15730 }, { "epoch": 0.04426558224427064, "grad_norm": 32.25, "learning_rate": 1.998925944659036e-05, "loss": 6.9867, "step": 15740 }, { "epoch": 0.044293705231719355, "grad_norm": 27.5, "learning_rate": 1.9989245796371966e-05, "loss": 7.4603, "step": 15750 }, { "epoch": 0.044321828219168066, "grad_norm": 25.125, "learning_rate": 1.9989232137489686e-05, "loss": 7.7796, "step": 15760 }, { "epoch": 0.04434995120661678, "grad_norm": 33.25, "learning_rate": 1.9989218469943526e-05, "loss": 7.7801, "step": 15770 }, { "epoch": 0.04437807419406549, "grad_norm": 41.75, "learning_rate": 1.9989204793733503e-05, "loss": 8.3392, "step": 15780 }, { "epoch": 0.0444061971815142, "grad_norm": 27.625, "learning_rate": 1.9989191108859628e-05, "loss": 7.6179, "step": 15790 }, { "epoch": 0.044434320168962906, "grad_norm": 25.625, "learning_rate": 1.998917741532191e-05, "loss": 7.6459, "step": 15800 }, { "epoch": 0.04446244315641162, "grad_norm": 24.125, "learning_rate": 1.9989163713120366e-05, "loss": 8.2462, "step": 15810 }, { "epoch": 0.04449056614386033, "grad_norm": 25.125, "learning_rate": 1.9989150002255003e-05, "loss": 7.9194, "step": 15820 }, { "epoch": 0.04451868913130904, "grad_norm": 25.75, "learning_rate": 1.9989136282725835e-05, "loss": 8.3128, "step": 15830 }, { "epoch": 0.04454681211875775, "grad_norm": 25.0, "learning_rate": 1.9989122554532875e-05, "loss": 8.1845, "step": 15840 }, { "epoch": 0.04457493510620646, "grad_norm": 26.625, "learning_rate": 1.9989108817676134e-05, "loss": 7.6273, "step": 15850 }, { "epoch": 0.044603058093655175, "grad_norm": 73.5, "learning_rate": 1.998909507215562e-05, "loss": 8.0485, "step": 15860 }, { "epoch": 0.044631181081103886, "grad_norm": 35.0, "learning_rate": 1.9989081317971353e-05, "loss": 7.9216, "step": 15870 }, { "epoch": 0.04465930406855259, "grad_norm": 26.125, "learning_rate": 1.9989067555123338e-05, "loss": 7.7466, "step": 15880 }, { "epoch": 0.0446874270560013, "grad_norm": 23.0, "learning_rate": 1.998905378361159e-05, "loss": 7.5943, "step": 15890 }, { "epoch": 0.044715550043450014, "grad_norm": 36.25, "learning_rate": 1.998904000343612e-05, "loss": 7.4588, "step": 15900 }, { "epoch": 0.044743673030898726, "grad_norm": 45.0, "learning_rate": 1.9989026214596938e-05, "loss": 8.4078, "step": 15910 }, { "epoch": 0.04477179601834744, "grad_norm": 39.75, "learning_rate": 1.9989012417094064e-05, "loss": 8.678, "step": 15920 }, { "epoch": 0.04479991900579615, "grad_norm": 52.5, "learning_rate": 1.99889986109275e-05, "loss": 8.3793, "step": 15930 }, { "epoch": 0.04482804199324486, "grad_norm": 24.0, "learning_rate": 1.9988984796097262e-05, "loss": 7.7575, "step": 15940 }, { "epoch": 0.04485616498069357, "grad_norm": 36.75, "learning_rate": 1.998897097260336e-05, "loss": 8.2181, "step": 15950 }, { "epoch": 0.04488428796814228, "grad_norm": 27.125, "learning_rate": 1.9988957140445815e-05, "loss": 7.629, "step": 15960 }, { "epoch": 0.04491241095559099, "grad_norm": 43.25, "learning_rate": 1.998894329962463e-05, "loss": 7.9694, "step": 15970 }, { "epoch": 0.0449405339430397, "grad_norm": 37.5, "learning_rate": 1.9988929450139814e-05, "loss": 8.0665, "step": 15980 }, { "epoch": 0.04496865693048841, "grad_norm": 31.875, "learning_rate": 1.9988915591991386e-05, "loss": 8.6759, "step": 15990 }, { "epoch": 0.04499677991793712, "grad_norm": 21.625, "learning_rate": 1.998890172517936e-05, "loss": 7.3393, "step": 16000 }, { "epoch": 0.045024902905385834, "grad_norm": 33.5, "learning_rate": 1.9988887849703743e-05, "loss": 8.1811, "step": 16010 }, { "epoch": 0.045053025892834546, "grad_norm": 18.0, "learning_rate": 1.9988873965564543e-05, "loss": 8.4981, "step": 16020 }, { "epoch": 0.04508114888028326, "grad_norm": 26.125, "learning_rate": 1.9988860072761782e-05, "loss": 7.6231, "step": 16030 }, { "epoch": 0.04510927186773196, "grad_norm": 32.75, "learning_rate": 1.9988846171295466e-05, "loss": 8.4545, "step": 16040 }, { "epoch": 0.045137394855180674, "grad_norm": 45.25, "learning_rate": 1.998883226116561e-05, "loss": 7.7677, "step": 16050 }, { "epoch": 0.045165517842629385, "grad_norm": 24.875, "learning_rate": 1.9988818342372225e-05, "loss": 8.2254, "step": 16060 }, { "epoch": 0.0451936408300781, "grad_norm": 25.5, "learning_rate": 1.998880441491532e-05, "loss": 8.5046, "step": 16070 }, { "epoch": 0.04522176381752681, "grad_norm": 24.75, "learning_rate": 1.9988790478794908e-05, "loss": 7.6883, "step": 16080 }, { "epoch": 0.04524988680497552, "grad_norm": 34.25, "learning_rate": 1.998877653401101e-05, "loss": 7.7976, "step": 16090 }, { "epoch": 0.04527800979242423, "grad_norm": 23.5, "learning_rate": 1.9988762580563623e-05, "loss": 7.7764, "step": 16100 }, { "epoch": 0.04530613277987294, "grad_norm": 37.0, "learning_rate": 1.998874861845277e-05, "loss": 7.5296, "step": 16110 }, { "epoch": 0.04533425576732165, "grad_norm": 26.5, "learning_rate": 1.9988734647678458e-05, "loss": 8.0913, "step": 16120 }, { "epoch": 0.04536237875477036, "grad_norm": 23.5, "learning_rate": 1.9988720668240703e-05, "loss": 7.766, "step": 16130 }, { "epoch": 0.04539050174221907, "grad_norm": 25.875, "learning_rate": 1.9988706680139515e-05, "loss": 7.9757, "step": 16140 }, { "epoch": 0.04541862472966778, "grad_norm": 24.375, "learning_rate": 1.9988692683374905e-05, "loss": 7.9978, "step": 16150 }, { "epoch": 0.045446747717116494, "grad_norm": 34.0, "learning_rate": 1.998867867794689e-05, "loss": 7.8265, "step": 16160 }, { "epoch": 0.045474870704565205, "grad_norm": 24.5, "learning_rate": 1.9988664663855478e-05, "loss": 7.7763, "step": 16170 }, { "epoch": 0.04550299369201392, "grad_norm": 45.25, "learning_rate": 1.9988650641100678e-05, "loss": 7.5871, "step": 16180 }, { "epoch": 0.04553111667946263, "grad_norm": 38.25, "learning_rate": 1.9988636609682507e-05, "loss": 8.633, "step": 16190 }, { "epoch": 0.04555923966691134, "grad_norm": 34.5, "learning_rate": 1.9988622569600977e-05, "loss": 7.1273, "step": 16200 }, { "epoch": 0.045587362654360045, "grad_norm": 25.5, "learning_rate": 1.99886085208561e-05, "loss": 7.1782, "step": 16210 }, { "epoch": 0.045615485641808756, "grad_norm": 28.625, "learning_rate": 1.9988594463447888e-05, "loss": 8.4275, "step": 16220 }, { "epoch": 0.04564360862925747, "grad_norm": 25.0, "learning_rate": 1.998858039737635e-05, "loss": 7.6835, "step": 16230 }, { "epoch": 0.04567173161670618, "grad_norm": 18.5, "learning_rate": 1.9988566322641505e-05, "loss": 7.8056, "step": 16240 }, { "epoch": 0.04569985460415489, "grad_norm": 40.25, "learning_rate": 1.9988552239243357e-05, "loss": 8.3347, "step": 16250 }, { "epoch": 0.0457279775916036, "grad_norm": 30.75, "learning_rate": 1.9988538147181926e-05, "loss": 7.8491, "step": 16260 }, { "epoch": 0.045756100579052314, "grad_norm": 45.75, "learning_rate": 1.998852404645722e-05, "loss": 8.3845, "step": 16270 }, { "epoch": 0.045784223566501026, "grad_norm": 29.125, "learning_rate": 1.9988509937069248e-05, "loss": 8.0623, "step": 16280 }, { "epoch": 0.04581234655394973, "grad_norm": 31.375, "learning_rate": 1.9988495819018027e-05, "loss": 8.7344, "step": 16290 }, { "epoch": 0.04584046954139844, "grad_norm": 28.0, "learning_rate": 1.998848169230357e-05, "loss": 8.33, "step": 16300 }, { "epoch": 0.04586859252884715, "grad_norm": 42.75, "learning_rate": 1.9988467556925887e-05, "loss": 8.2033, "step": 16310 }, { "epoch": 0.045896715516295865, "grad_norm": 30.625, "learning_rate": 1.9988453412884992e-05, "loss": 7.4832, "step": 16320 }, { "epoch": 0.045924838503744576, "grad_norm": 26.375, "learning_rate": 1.9988439260180895e-05, "loss": 7.3331, "step": 16330 }, { "epoch": 0.04595296149119329, "grad_norm": 23.875, "learning_rate": 1.9988425098813605e-05, "loss": 7.341, "step": 16340 }, { "epoch": 0.045981084478642, "grad_norm": 33.0, "learning_rate": 1.9988410928783147e-05, "loss": 7.932, "step": 16350 }, { "epoch": 0.04600920746609071, "grad_norm": 41.25, "learning_rate": 1.9988396750089517e-05, "loss": 8.0818, "step": 16360 }, { "epoch": 0.046037330453539416, "grad_norm": 20.75, "learning_rate": 1.998838256273274e-05, "loss": 8.1024, "step": 16370 }, { "epoch": 0.04606545344098813, "grad_norm": 35.0, "learning_rate": 1.998836836671282e-05, "loss": 7.487, "step": 16380 }, { "epoch": 0.04609357642843684, "grad_norm": 29.0, "learning_rate": 1.9988354162029777e-05, "loss": 8.4373, "step": 16390 }, { "epoch": 0.04612169941588555, "grad_norm": 25.375, "learning_rate": 1.9988339948683616e-05, "loss": 7.7159, "step": 16400 }, { "epoch": 0.04614982240333426, "grad_norm": 43.5, "learning_rate": 1.9988325726674354e-05, "loss": 7.2631, "step": 16410 }, { "epoch": 0.04617794539078297, "grad_norm": 19.375, "learning_rate": 1.9988311496002e-05, "loss": 8.0049, "step": 16420 }, { "epoch": 0.046206068378231685, "grad_norm": 28.125, "learning_rate": 1.998829725666657e-05, "loss": 8.6466, "step": 16430 }, { "epoch": 0.0462341913656804, "grad_norm": 39.25, "learning_rate": 1.998828300866807e-05, "loss": 8.6418, "step": 16440 }, { "epoch": 0.0462623143531291, "grad_norm": 37.0, "learning_rate": 1.9988268752006522e-05, "loss": 7.5935, "step": 16450 }, { "epoch": 0.04629043734057781, "grad_norm": 32.25, "learning_rate": 1.9988254486681934e-05, "loss": 6.8411, "step": 16460 }, { "epoch": 0.046318560328026524, "grad_norm": 29.25, "learning_rate": 1.9988240212694316e-05, "loss": 7.8838, "step": 16470 }, { "epoch": 0.046346683315475236, "grad_norm": 29.75, "learning_rate": 1.998822593004368e-05, "loss": 7.3165, "step": 16480 }, { "epoch": 0.04637480630292395, "grad_norm": 27.25, "learning_rate": 1.9988211638730042e-05, "loss": 8.1123, "step": 16490 }, { "epoch": 0.04640292929037266, "grad_norm": 24.375, "learning_rate": 1.9988197338753414e-05, "loss": 8.2335, "step": 16500 }, { "epoch": 0.04643105227782137, "grad_norm": 39.0, "learning_rate": 1.99881830301138e-05, "loss": 8.4012, "step": 16510 }, { "epoch": 0.04645917526527008, "grad_norm": 30.0, "learning_rate": 1.9988168712811228e-05, "loss": 7.7348, "step": 16520 }, { "epoch": 0.04648729825271879, "grad_norm": 30.0, "learning_rate": 1.9988154386845697e-05, "loss": 7.0193, "step": 16530 }, { "epoch": 0.0465154212401675, "grad_norm": 30.125, "learning_rate": 1.9988140052217225e-05, "loss": 7.3901, "step": 16540 }, { "epoch": 0.04654354422761621, "grad_norm": 28.625, "learning_rate": 1.998812570892583e-05, "loss": 8.1108, "step": 16550 }, { "epoch": 0.04657166721506492, "grad_norm": 33.25, "learning_rate": 1.9988111356971508e-05, "loss": 8.0769, "step": 16560 }, { "epoch": 0.04659979020251363, "grad_norm": 46.0, "learning_rate": 1.9988096996354287e-05, "loss": 7.6357, "step": 16570 }, { "epoch": 0.046627913189962344, "grad_norm": 36.75, "learning_rate": 1.9988082627074175e-05, "loss": 8.1465, "step": 16580 }, { "epoch": 0.046656036177411056, "grad_norm": 36.5, "learning_rate": 1.9988068249131182e-05, "loss": 6.9119, "step": 16590 }, { "epoch": 0.04668415916485977, "grad_norm": 35.5, "learning_rate": 1.9988053862525323e-05, "loss": 7.6147, "step": 16600 }, { "epoch": 0.04671228215230847, "grad_norm": 28.25, "learning_rate": 1.9988039467256607e-05, "loss": 8.1272, "step": 16610 }, { "epoch": 0.046740405139757184, "grad_norm": 42.5, "learning_rate": 1.9988025063325055e-05, "loss": 8.4363, "step": 16620 }, { "epoch": 0.046768528127205895, "grad_norm": 25.125, "learning_rate": 1.9988010650730666e-05, "loss": 8.2167, "step": 16630 }, { "epoch": 0.04679665111465461, "grad_norm": 29.75, "learning_rate": 1.998799622947346e-05, "loss": 8.1811, "step": 16640 }, { "epoch": 0.04682477410210332, "grad_norm": 29.5, "learning_rate": 1.9987981799553458e-05, "loss": 7.9032, "step": 16650 }, { "epoch": 0.04685289708955203, "grad_norm": 44.75, "learning_rate": 1.998796736097066e-05, "loss": 8.8709, "step": 16660 }, { "epoch": 0.04688102007700074, "grad_norm": 33.5, "learning_rate": 1.998795291372508e-05, "loss": 8.5267, "step": 16670 }, { "epoch": 0.04690914306444945, "grad_norm": 21.5, "learning_rate": 1.9987938457816737e-05, "loss": 7.6331, "step": 16680 }, { "epoch": 0.04693726605189816, "grad_norm": 34.5, "learning_rate": 1.998792399324564e-05, "loss": 8.2361, "step": 16690 }, { "epoch": 0.04696538903934687, "grad_norm": 41.75, "learning_rate": 1.9987909520011795e-05, "loss": 8.6306, "step": 16700 }, { "epoch": 0.04699351202679558, "grad_norm": 43.0, "learning_rate": 1.9987895038115224e-05, "loss": 8.5579, "step": 16710 }, { "epoch": 0.04702163501424429, "grad_norm": 21.25, "learning_rate": 1.998788054755594e-05, "loss": 7.9455, "step": 16720 }, { "epoch": 0.047049758001693004, "grad_norm": 33.5, "learning_rate": 1.9987866048333945e-05, "loss": 8.5222, "step": 16730 }, { "epoch": 0.047077880989141715, "grad_norm": 24.625, "learning_rate": 1.998785154044926e-05, "loss": 7.2171, "step": 16740 }, { "epoch": 0.04710600397659043, "grad_norm": 23.5, "learning_rate": 1.99878370239019e-05, "loss": 8.4293, "step": 16750 }, { "epoch": 0.04713412696403914, "grad_norm": 38.75, "learning_rate": 1.998782249869187e-05, "loss": 8.419, "step": 16760 }, { "epoch": 0.04716224995148784, "grad_norm": 25.5, "learning_rate": 1.9987807964819192e-05, "loss": 7.6227, "step": 16770 }, { "epoch": 0.047190372938936555, "grad_norm": 31.0, "learning_rate": 1.9987793422283864e-05, "loss": 8.2135, "step": 16780 }, { "epoch": 0.047218495926385266, "grad_norm": 23.875, "learning_rate": 1.9987778871085917e-05, "loss": 8.5281, "step": 16790 }, { "epoch": 0.04724661891383398, "grad_norm": 37.0, "learning_rate": 1.9987764311225346e-05, "loss": 8.2783, "step": 16800 }, { "epoch": 0.04727474190128269, "grad_norm": 27.75, "learning_rate": 1.9987749742702177e-05, "loss": 8.7025, "step": 16810 }, { "epoch": 0.0473028648887314, "grad_norm": 39.0, "learning_rate": 1.9987735165516415e-05, "loss": 8.2682, "step": 16820 }, { "epoch": 0.04733098787618011, "grad_norm": 29.5, "learning_rate": 1.9987720579668075e-05, "loss": 7.9711, "step": 16830 }, { "epoch": 0.047359110863628824, "grad_norm": 28.625, "learning_rate": 1.9987705985157172e-05, "loss": 7.2494, "step": 16840 }, { "epoch": 0.047387233851077536, "grad_norm": 29.25, "learning_rate": 1.998769138198371e-05, "loss": 7.2451, "step": 16850 }, { "epoch": 0.04741535683852624, "grad_norm": 22.25, "learning_rate": 1.9987676770147715e-05, "loss": 8.2972, "step": 16860 }, { "epoch": 0.04744347982597495, "grad_norm": 25.25, "learning_rate": 1.9987662149649188e-05, "loss": 8.4163, "step": 16870 }, { "epoch": 0.04747160281342366, "grad_norm": 27.125, "learning_rate": 1.998764752048815e-05, "loss": 7.5057, "step": 16880 }, { "epoch": 0.047499725800872375, "grad_norm": 32.25, "learning_rate": 1.9987632882664604e-05, "loss": 7.6181, "step": 16890 }, { "epoch": 0.047527848788321087, "grad_norm": 44.5, "learning_rate": 1.9987618236178574e-05, "loss": 7.6507, "step": 16900 }, { "epoch": 0.0475559717757698, "grad_norm": 32.5, "learning_rate": 1.9987603581030066e-05, "loss": 8.1165, "step": 16910 }, { "epoch": 0.04758409476321851, "grad_norm": 35.0, "learning_rate": 1.9987588917219095e-05, "loss": 7.9291, "step": 16920 }, { "epoch": 0.04761221775066722, "grad_norm": 25.25, "learning_rate": 1.998757424474567e-05, "loss": 8.4136, "step": 16930 }, { "epoch": 0.047640340738115926, "grad_norm": 28.0, "learning_rate": 1.998755956360981e-05, "loss": 7.6407, "step": 16940 }, { "epoch": 0.04766846372556464, "grad_norm": 88.0, "learning_rate": 1.9987544873811523e-05, "loss": 8.2023, "step": 16950 }, { "epoch": 0.04769658671301335, "grad_norm": 40.0, "learning_rate": 1.9987530175350823e-05, "loss": 7.8967, "step": 16960 }, { "epoch": 0.04772470970046206, "grad_norm": 35.25, "learning_rate": 1.998751546822772e-05, "loss": 7.9824, "step": 16970 }, { "epoch": 0.04775283268791077, "grad_norm": 21.75, "learning_rate": 1.998750075244223e-05, "loss": 7.1745, "step": 16980 }, { "epoch": 0.047780955675359484, "grad_norm": 31.75, "learning_rate": 1.998748602799437e-05, "loss": 8.5975, "step": 16990 }, { "epoch": 0.047809078662808195, "grad_norm": 26.75, "learning_rate": 1.9987471294884148e-05, "loss": 8.3548, "step": 17000 }, { "epoch": 0.04783720165025691, "grad_norm": 19.5, "learning_rate": 1.998745655311157e-05, "loss": 7.9876, "step": 17010 }, { "epoch": 0.04786532463770561, "grad_norm": 25.25, "learning_rate": 1.998744180267666e-05, "loss": 7.9681, "step": 17020 }, { "epoch": 0.04789344762515432, "grad_norm": 35.0, "learning_rate": 1.9987427043579427e-05, "loss": 7.7028, "step": 17030 }, { "epoch": 0.047921570612603034, "grad_norm": 27.625, "learning_rate": 1.998741227581988e-05, "loss": 7.6756, "step": 17040 }, { "epoch": 0.047949693600051746, "grad_norm": 21.75, "learning_rate": 1.998739749939804e-05, "loss": 8.0954, "step": 17050 }, { "epoch": 0.04797781658750046, "grad_norm": 41.0, "learning_rate": 1.998738271431391e-05, "loss": 7.5269, "step": 17060 }, { "epoch": 0.04800593957494917, "grad_norm": 26.125, "learning_rate": 1.998736792056751e-05, "loss": 8.3666, "step": 17070 }, { "epoch": 0.04803406256239788, "grad_norm": 25.875, "learning_rate": 1.9987353118158852e-05, "loss": 7.4585, "step": 17080 }, { "epoch": 0.04806218554984659, "grad_norm": 33.0, "learning_rate": 1.9987338307087944e-05, "loss": 8.1088, "step": 17090 }, { "epoch": 0.0480903085372953, "grad_norm": 39.0, "learning_rate": 1.9987323487354806e-05, "loss": 8.3615, "step": 17100 }, { "epoch": 0.04811843152474401, "grad_norm": 50.75, "learning_rate": 1.9987308658959442e-05, "loss": 7.3314, "step": 17110 }, { "epoch": 0.04814655451219272, "grad_norm": 22.625, "learning_rate": 1.9987293821901874e-05, "loss": 8.1899, "step": 17120 }, { "epoch": 0.04817467749964143, "grad_norm": 25.5, "learning_rate": 1.9987278976182108e-05, "loss": 8.1695, "step": 17130 }, { "epoch": 0.04820280048709014, "grad_norm": 37.25, "learning_rate": 1.9987264121800162e-05, "loss": 7.9669, "step": 17140 }, { "epoch": 0.048230923474538855, "grad_norm": 25.125, "learning_rate": 1.9987249258756044e-05, "loss": 8.2335, "step": 17150 }, { "epoch": 0.048259046461987566, "grad_norm": 45.75, "learning_rate": 1.9987234387049774e-05, "loss": 8.9114, "step": 17160 }, { "epoch": 0.04828716944943628, "grad_norm": 25.25, "learning_rate": 1.9987219506681356e-05, "loss": 7.5578, "step": 17170 }, { "epoch": 0.04831529243688498, "grad_norm": 50.0, "learning_rate": 1.998720461765081e-05, "loss": 8.1219, "step": 17180 }, { "epoch": 0.048343415424333694, "grad_norm": 27.875, "learning_rate": 1.998718971995814e-05, "loss": 6.9166, "step": 17190 }, { "epoch": 0.048371538411782405, "grad_norm": 23.625, "learning_rate": 1.9987174813603372e-05, "loss": 7.5407, "step": 17200 }, { "epoch": 0.04839966139923112, "grad_norm": 35.75, "learning_rate": 1.9987159898586512e-05, "loss": 8.1419, "step": 17210 }, { "epoch": 0.04842778438667983, "grad_norm": 37.25, "learning_rate": 1.9987144974907567e-05, "loss": 7.3759, "step": 17220 }, { "epoch": 0.04845590737412854, "grad_norm": 29.125, "learning_rate": 1.998713004256656e-05, "loss": 7.9778, "step": 17230 }, { "epoch": 0.04848403036157725, "grad_norm": 32.75, "learning_rate": 1.99871151015635e-05, "loss": 7.4636, "step": 17240 }, { "epoch": 0.04851215334902596, "grad_norm": 23.625, "learning_rate": 1.99871001518984e-05, "loss": 7.6179, "step": 17250 }, { "epoch": 0.04854027633647467, "grad_norm": 52.5, "learning_rate": 1.998708519357127e-05, "loss": 7.882, "step": 17260 }, { "epoch": 0.04856839932392338, "grad_norm": 27.625, "learning_rate": 1.9987070226582128e-05, "loss": 7.7648, "step": 17270 }, { "epoch": 0.04859652231137209, "grad_norm": 39.75, "learning_rate": 1.9987055250930984e-05, "loss": 8.3472, "step": 17280 }, { "epoch": 0.0486246452988208, "grad_norm": 36.75, "learning_rate": 1.998704026661785e-05, "loss": 7.5586, "step": 17290 }, { "epoch": 0.048652768286269514, "grad_norm": 41.0, "learning_rate": 1.9987025273642744e-05, "loss": 7.918, "step": 17300 }, { "epoch": 0.048680891273718226, "grad_norm": 35.25, "learning_rate": 1.9987010272005672e-05, "loss": 8.3589, "step": 17310 }, { "epoch": 0.04870901426116694, "grad_norm": 28.625, "learning_rate": 1.998699526170665e-05, "loss": 8.0466, "step": 17320 }, { "epoch": 0.04873713724861565, "grad_norm": 25.75, "learning_rate": 1.9986980242745696e-05, "loss": 7.717, "step": 17330 }, { "epoch": 0.04876526023606435, "grad_norm": 23.25, "learning_rate": 1.998696521512282e-05, "loss": 8.0517, "step": 17340 }, { "epoch": 0.048793383223513065, "grad_norm": 38.0, "learning_rate": 1.998695017883803e-05, "loss": 7.4055, "step": 17350 }, { "epoch": 0.048821506210961776, "grad_norm": 45.5, "learning_rate": 1.9986935133891344e-05, "loss": 8.9037, "step": 17360 }, { "epoch": 0.04884962919841049, "grad_norm": 37.75, "learning_rate": 1.9986920080282773e-05, "loss": 8.8098, "step": 17370 }, { "epoch": 0.0488777521858592, "grad_norm": 22.25, "learning_rate": 1.998690501801233e-05, "loss": 7.6807, "step": 17380 }, { "epoch": 0.04890587517330791, "grad_norm": 30.0, "learning_rate": 1.998688994708003e-05, "loss": 8.4379, "step": 17390 }, { "epoch": 0.04893399816075662, "grad_norm": 29.625, "learning_rate": 1.9986874867485885e-05, "loss": 8.2624, "step": 17400 }, { "epoch": 0.048962121148205334, "grad_norm": 24.375, "learning_rate": 1.998685977922991e-05, "loss": 7.7149, "step": 17410 }, { "epoch": 0.04899024413565404, "grad_norm": 22.75, "learning_rate": 1.9986844682312114e-05, "loss": 8.695, "step": 17420 }, { "epoch": 0.04901836712310275, "grad_norm": 47.75, "learning_rate": 1.9986829576732512e-05, "loss": 8.0961, "step": 17430 }, { "epoch": 0.04904649011055146, "grad_norm": 25.625, "learning_rate": 1.998681446249112e-05, "loss": 8.2192, "step": 17440 }, { "epoch": 0.049074613098000174, "grad_norm": 39.0, "learning_rate": 1.9986799339587947e-05, "loss": 8.5481, "step": 17450 }, { "epoch": 0.049102736085448885, "grad_norm": 36.5, "learning_rate": 1.9986784208023007e-05, "loss": 8.0833, "step": 17460 }, { "epoch": 0.0491308590728976, "grad_norm": 23.5, "learning_rate": 1.9986769067796312e-05, "loss": 8.305, "step": 17470 }, { "epoch": 0.04915898206034631, "grad_norm": 25.625, "learning_rate": 1.998675391890788e-05, "loss": 7.3399, "step": 17480 }, { "epoch": 0.04918710504779502, "grad_norm": 30.25, "learning_rate": 1.998673876135772e-05, "loss": 7.5145, "step": 17490 }, { "epoch": 0.04921522803524373, "grad_norm": 32.25, "learning_rate": 1.9986723595145846e-05, "loss": 7.2597, "step": 17500 }, { "epoch": 0.049243351022692436, "grad_norm": 36.75, "learning_rate": 1.998670842027227e-05, "loss": 8.5303, "step": 17510 }, { "epoch": 0.04927147401014115, "grad_norm": 47.75, "learning_rate": 1.998669323673701e-05, "loss": 7.783, "step": 17520 }, { "epoch": 0.04929959699758986, "grad_norm": 27.0, "learning_rate": 1.998667804454007e-05, "loss": 7.5953, "step": 17530 }, { "epoch": 0.04932771998503857, "grad_norm": 35.5, "learning_rate": 1.9986662843681474e-05, "loss": 8.1571, "step": 17540 }, { "epoch": 0.04935584297248728, "grad_norm": 34.5, "learning_rate": 1.9986647634161233e-05, "loss": 7.6336, "step": 17550 }, { "epoch": 0.049383965959935994, "grad_norm": 25.875, "learning_rate": 1.998663241597935e-05, "loss": 7.7515, "step": 17560 }, { "epoch": 0.049412088947384705, "grad_norm": 29.875, "learning_rate": 1.998661718913585e-05, "loss": 7.2571, "step": 17570 }, { "epoch": 0.04944021193483342, "grad_norm": 37.5, "learning_rate": 1.998660195363074e-05, "loss": 7.9719, "step": 17580 }, { "epoch": 0.04946833492228212, "grad_norm": 27.125, "learning_rate": 1.9986586709464034e-05, "loss": 8.0792, "step": 17590 }, { "epoch": 0.04949645790973083, "grad_norm": 21.125, "learning_rate": 1.9986571456635743e-05, "loss": 7.165, "step": 17600 }, { "epoch": 0.049524580897179545, "grad_norm": 31.5, "learning_rate": 1.998655619514589e-05, "loss": 7.9473, "step": 17610 }, { "epoch": 0.049552703884628256, "grad_norm": 20.0, "learning_rate": 1.9986540924994478e-05, "loss": 8.5033, "step": 17620 }, { "epoch": 0.04958082687207697, "grad_norm": 30.625, "learning_rate": 1.9986525646181524e-05, "loss": 8.4058, "step": 17630 }, { "epoch": 0.04960894985952568, "grad_norm": 58.0, "learning_rate": 1.9986510358707042e-05, "loss": 8.6028, "step": 17640 }, { "epoch": 0.04963707284697439, "grad_norm": 31.25, "learning_rate": 1.9986495062571044e-05, "loss": 8.8688, "step": 17650 }, { "epoch": 0.0496651958344231, "grad_norm": 31.75, "learning_rate": 1.9986479757773542e-05, "loss": 6.848, "step": 17660 }, { "epoch": 0.04969331882187181, "grad_norm": 25.125, "learning_rate": 1.9986464444314552e-05, "loss": 8.0212, "step": 17670 }, { "epoch": 0.04972144180932052, "grad_norm": 31.0, "learning_rate": 1.9986449122194084e-05, "loss": 7.974, "step": 17680 }, { "epoch": 0.04974956479676923, "grad_norm": 35.25, "learning_rate": 1.9986433791412155e-05, "loss": 7.947, "step": 17690 }, { "epoch": 0.04977768778421794, "grad_norm": 29.875, "learning_rate": 1.9986418451968776e-05, "loss": 8.0968, "step": 17700 }, { "epoch": 0.04980581077166665, "grad_norm": 45.25, "learning_rate": 1.9986403103863963e-05, "loss": 8.0338, "step": 17710 }, { "epoch": 0.049833933759115365, "grad_norm": 40.5, "learning_rate": 1.9986387747097727e-05, "loss": 8.4278, "step": 17720 }, { "epoch": 0.049862056746564076, "grad_norm": 38.0, "learning_rate": 1.998637238167008e-05, "loss": 8.6942, "step": 17730 }, { "epoch": 0.04989017973401279, "grad_norm": 25.125, "learning_rate": 1.9986357007581038e-05, "loss": 7.4679, "step": 17740 }, { "epoch": 0.04991830272146149, "grad_norm": 32.75, "learning_rate": 1.9986341624830616e-05, "loss": 8.0009, "step": 17750 }, { "epoch": 0.049946425708910204, "grad_norm": 35.75, "learning_rate": 1.9986326233418817e-05, "loss": 7.3796, "step": 17760 }, { "epoch": 0.049974548696358916, "grad_norm": 38.25, "learning_rate": 1.9986310833345667e-05, "loss": 7.4392, "step": 17770 }, { "epoch": 0.05000267168380763, "grad_norm": 38.75, "learning_rate": 1.9986295424611174e-05, "loss": 8.1885, "step": 17780 }, { "epoch": 0.05003079467125634, "grad_norm": 26.75, "learning_rate": 1.998628000721535e-05, "loss": 7.3953, "step": 17790 }, { "epoch": 0.05005891765870505, "grad_norm": 27.125, "learning_rate": 1.9986264581158213e-05, "loss": 8.018, "step": 17800 }, { "epoch": 0.05008704064615376, "grad_norm": 25.375, "learning_rate": 1.998624914643977e-05, "loss": 8.089, "step": 17810 }, { "epoch": 0.05011516363360247, "grad_norm": 26.5, "learning_rate": 1.9986233703060042e-05, "loss": 8.2092, "step": 17820 }, { "epoch": 0.05014328662105118, "grad_norm": 35.0, "learning_rate": 1.9986218251019035e-05, "loss": 7.3687, "step": 17830 }, { "epoch": 0.05017140960849989, "grad_norm": 30.5, "learning_rate": 1.998620279031677e-05, "loss": 8.5225, "step": 17840 }, { "epoch": 0.0501995325959486, "grad_norm": 52.0, "learning_rate": 1.9986187320953248e-05, "loss": 8.477, "step": 17850 }, { "epoch": 0.05022765558339731, "grad_norm": 32.75, "learning_rate": 1.99861718429285e-05, "loss": 7.926, "step": 17860 }, { "epoch": 0.050255778570846024, "grad_norm": 41.0, "learning_rate": 1.998615635624252e-05, "loss": 7.7154, "step": 17870 }, { "epoch": 0.050283901558294736, "grad_norm": 52.75, "learning_rate": 1.9986140860895337e-05, "loss": 8.6033, "step": 17880 }, { "epoch": 0.05031202454574345, "grad_norm": 26.375, "learning_rate": 1.9986125356886958e-05, "loss": 8.026, "step": 17890 }, { "epoch": 0.05034014753319216, "grad_norm": 39.5, "learning_rate": 1.99861098442174e-05, "loss": 7.4468, "step": 17900 }, { "epoch": 0.05036827052064086, "grad_norm": 36.5, "learning_rate": 1.9986094322886668e-05, "loss": 7.8802, "step": 17910 }, { "epoch": 0.050396393508089575, "grad_norm": 24.375, "learning_rate": 1.9986078792894784e-05, "loss": 8.1702, "step": 17920 }, { "epoch": 0.05042451649553829, "grad_norm": 49.75, "learning_rate": 1.9986063254241757e-05, "loss": 8.2462, "step": 17930 }, { "epoch": 0.050452639482987, "grad_norm": 52.75, "learning_rate": 1.9986047706927606e-05, "loss": 7.8778, "step": 17940 }, { "epoch": 0.05048076247043571, "grad_norm": 46.5, "learning_rate": 1.998603215095234e-05, "loss": 7.7282, "step": 17950 }, { "epoch": 0.05050888545788442, "grad_norm": 21.125, "learning_rate": 1.9986016586315967e-05, "loss": 8.4388, "step": 17960 }, { "epoch": 0.05053700844533313, "grad_norm": 41.75, "learning_rate": 1.998600101301851e-05, "loss": 8.1653, "step": 17970 }, { "epoch": 0.050565131432781844, "grad_norm": 29.5, "learning_rate": 1.9985985431059983e-05, "loss": 8.4912, "step": 17980 }, { "epoch": 0.05059325442023055, "grad_norm": 32.5, "learning_rate": 1.998596984044039e-05, "loss": 7.657, "step": 17990 }, { "epoch": 0.05062137740767926, "grad_norm": 39.0, "learning_rate": 1.9985954241159752e-05, "loss": 7.8324, "step": 18000 }, { "epoch": 0.05064950039512797, "grad_norm": 25.625, "learning_rate": 1.9985938633218082e-05, "loss": 8.4936, "step": 18010 }, { "epoch": 0.050677623382576684, "grad_norm": 38.75, "learning_rate": 1.9985923016615392e-05, "loss": 8.2165, "step": 18020 }, { "epoch": 0.050705746370025395, "grad_norm": 22.375, "learning_rate": 1.9985907391351692e-05, "loss": 7.7616, "step": 18030 }, { "epoch": 0.05073386935747411, "grad_norm": 22.625, "learning_rate": 1.9985891757427005e-05, "loss": 7.5339, "step": 18040 }, { "epoch": 0.05076199234492282, "grad_norm": 21.0, "learning_rate": 1.9985876114841332e-05, "loss": 7.8015, "step": 18050 }, { "epoch": 0.05079011533237153, "grad_norm": 35.25, "learning_rate": 1.99858604635947e-05, "loss": 7.9375, "step": 18060 }, { "epoch": 0.050818238319820234, "grad_norm": 31.25, "learning_rate": 1.9985844803687115e-05, "loss": 7.9773, "step": 18070 }, { "epoch": 0.050846361307268946, "grad_norm": 38.0, "learning_rate": 1.9985829135118588e-05, "loss": 7.6728, "step": 18080 }, { "epoch": 0.05087448429471766, "grad_norm": 23.375, "learning_rate": 1.9985813457889135e-05, "loss": 7.7526, "step": 18090 }, { "epoch": 0.05090260728216637, "grad_norm": 26.0, "learning_rate": 1.9985797771998774e-05, "loss": 7.8189, "step": 18100 }, { "epoch": 0.05093073026961508, "grad_norm": 31.875, "learning_rate": 1.9985782077447514e-05, "loss": 7.5563, "step": 18110 }, { "epoch": 0.05095885325706379, "grad_norm": 23.75, "learning_rate": 1.9985766374235373e-05, "loss": 7.5335, "step": 18120 }, { "epoch": 0.050986976244512504, "grad_norm": 26.875, "learning_rate": 1.9985750662362357e-05, "loss": 8.925, "step": 18130 }, { "epoch": 0.051015099231961215, "grad_norm": 20.375, "learning_rate": 1.9985734941828484e-05, "loss": 8.1326, "step": 18140 }, { "epoch": 0.05104322221940993, "grad_norm": 31.75, "learning_rate": 1.9985719212633774e-05, "loss": 8.5344, "step": 18150 }, { "epoch": 0.05107134520685863, "grad_norm": 30.5, "learning_rate": 1.998570347477823e-05, "loss": 7.3642, "step": 18160 }, { "epoch": 0.05109946819430734, "grad_norm": 26.625, "learning_rate": 1.9985687728261867e-05, "loss": 8.3164, "step": 18170 }, { "epoch": 0.051127591181756055, "grad_norm": 19.75, "learning_rate": 1.998567197308471e-05, "loss": 8.3561, "step": 18180 }, { "epoch": 0.051155714169204766, "grad_norm": 46.0, "learning_rate": 1.9985656209246757e-05, "loss": 7.9917, "step": 18190 }, { "epoch": 0.05118383715665348, "grad_norm": 33.75, "learning_rate": 1.998564043674803e-05, "loss": 7.7524, "step": 18200 }, { "epoch": 0.05121196014410219, "grad_norm": 34.0, "learning_rate": 1.9985624655588545e-05, "loss": 8.1255, "step": 18210 }, { "epoch": 0.0512400831315509, "grad_norm": 28.75, "learning_rate": 1.9985608865768312e-05, "loss": 8.3894, "step": 18220 }, { "epoch": 0.05126820611899961, "grad_norm": 26.25, "learning_rate": 1.9985593067287343e-05, "loss": 7.3203, "step": 18230 }, { "epoch": 0.05129632910644832, "grad_norm": 25.5, "learning_rate": 1.9985577260145656e-05, "loss": 8.2529, "step": 18240 }, { "epoch": 0.05132445209389703, "grad_norm": 32.5, "learning_rate": 1.9985561444343263e-05, "loss": 8.1071, "step": 18250 }, { "epoch": 0.05135257508134574, "grad_norm": 28.375, "learning_rate": 1.9985545619880175e-05, "loss": 8.1236, "step": 18260 }, { "epoch": 0.05138069806879445, "grad_norm": 24.125, "learning_rate": 1.998552978675641e-05, "loss": 7.6847, "step": 18270 }, { "epoch": 0.05140882105624316, "grad_norm": 23.25, "learning_rate": 1.9985513944971978e-05, "loss": 8.397, "step": 18280 }, { "epoch": 0.051436944043691875, "grad_norm": 22.625, "learning_rate": 1.9985498094526893e-05, "loss": 8.4665, "step": 18290 }, { "epoch": 0.051465067031140586, "grad_norm": 34.75, "learning_rate": 1.998548223542117e-05, "loss": 7.8867, "step": 18300 }, { "epoch": 0.0514931900185893, "grad_norm": 36.25, "learning_rate": 1.9985466367654828e-05, "loss": 7.9034, "step": 18310 }, { "epoch": 0.051521313006038, "grad_norm": 23.5, "learning_rate": 1.9985450491227874e-05, "loss": 8.4502, "step": 18320 }, { "epoch": 0.051549435993486714, "grad_norm": 24.0, "learning_rate": 1.998543460614032e-05, "loss": 7.1839, "step": 18330 }, { "epoch": 0.051577558980935426, "grad_norm": 25.375, "learning_rate": 1.9985418712392187e-05, "loss": 8.578, "step": 18340 }, { "epoch": 0.05160568196838414, "grad_norm": 31.375, "learning_rate": 1.9985402809983484e-05, "loss": 7.6102, "step": 18350 }, { "epoch": 0.05163380495583285, "grad_norm": 22.75, "learning_rate": 1.9985386898914225e-05, "loss": 8.1065, "step": 18360 }, { "epoch": 0.05166192794328156, "grad_norm": 34.0, "learning_rate": 1.9985370979184427e-05, "loss": 7.7248, "step": 18370 }, { "epoch": 0.05169005093073027, "grad_norm": 26.125, "learning_rate": 1.99853550507941e-05, "loss": 7.8967, "step": 18380 }, { "epoch": 0.05171817391817898, "grad_norm": 41.25, "learning_rate": 1.9985339113743257e-05, "loss": 8.4286, "step": 18390 }, { "epoch": 0.05174629690562769, "grad_norm": 28.0, "learning_rate": 1.9985323168031915e-05, "loss": 8.4294, "step": 18400 }, { "epoch": 0.0517744198930764, "grad_norm": 35.75, "learning_rate": 1.998530721366009e-05, "loss": 8.3683, "step": 18410 }, { "epoch": 0.05180254288052511, "grad_norm": 28.875, "learning_rate": 1.998529125062779e-05, "loss": 8.508, "step": 18420 }, { "epoch": 0.05183066586797382, "grad_norm": 44.5, "learning_rate": 1.9985275278935032e-05, "loss": 8.2264, "step": 18430 }, { "epoch": 0.051858788855422534, "grad_norm": 33.5, "learning_rate": 1.998525929858183e-05, "loss": 8.018, "step": 18440 }, { "epoch": 0.051886911842871246, "grad_norm": 23.75, "learning_rate": 1.9985243309568198e-05, "loss": 7.7806, "step": 18450 }, { "epoch": 0.05191503483031996, "grad_norm": 40.5, "learning_rate": 1.998522731189415e-05, "loss": 7.8555, "step": 18460 }, { "epoch": 0.05194315781776867, "grad_norm": 29.875, "learning_rate": 1.9985211305559695e-05, "loss": 8.0372, "step": 18470 }, { "epoch": 0.051971280805217374, "grad_norm": 33.0, "learning_rate": 1.9985195290564853e-05, "loss": 7.2746, "step": 18480 }, { "epoch": 0.051999403792666085, "grad_norm": 35.5, "learning_rate": 1.998517926690964e-05, "loss": 7.6454, "step": 18490 }, { "epoch": 0.0520275267801148, "grad_norm": 33.5, "learning_rate": 1.998516323459406e-05, "loss": 8.0477, "step": 18500 }, { "epoch": 0.05205564976756351, "grad_norm": 22.0, "learning_rate": 1.9985147193618137e-05, "loss": 7.5663, "step": 18510 }, { "epoch": 0.05208377275501222, "grad_norm": 22.375, "learning_rate": 1.9985131143981877e-05, "loss": 7.5779, "step": 18520 }, { "epoch": 0.05211189574246093, "grad_norm": 22.25, "learning_rate": 1.9985115085685298e-05, "loss": 7.7189, "step": 18530 }, { "epoch": 0.05214001872990964, "grad_norm": 27.875, "learning_rate": 1.9985099018728414e-05, "loss": 7.9264, "step": 18540 }, { "epoch": 0.052168141717358354, "grad_norm": 24.25, "learning_rate": 1.9985082943111237e-05, "loss": 8.363, "step": 18550 }, { "epoch": 0.05219626470480706, "grad_norm": 29.625, "learning_rate": 1.9985066858833786e-05, "loss": 8.0014, "step": 18560 }, { "epoch": 0.05222438769225577, "grad_norm": 28.375, "learning_rate": 1.998505076589607e-05, "loss": 8.6115, "step": 18570 }, { "epoch": 0.05225251067970448, "grad_norm": 27.375, "learning_rate": 1.9985034664298104e-05, "loss": 7.4145, "step": 18580 }, { "epoch": 0.052280633667153194, "grad_norm": 34.75, "learning_rate": 1.9985018554039902e-05, "loss": 8.1568, "step": 18590 }, { "epoch": 0.052308756654601905, "grad_norm": 28.0, "learning_rate": 1.9985002435121474e-05, "loss": 7.9962, "step": 18600 }, { "epoch": 0.05233687964205062, "grad_norm": 27.0, "learning_rate": 1.9984986307542844e-05, "loss": 7.7467, "step": 18610 }, { "epoch": 0.05236500262949933, "grad_norm": 32.0, "learning_rate": 1.998497017130402e-05, "loss": 8.1312, "step": 18620 }, { "epoch": 0.05239312561694804, "grad_norm": 26.0, "learning_rate": 1.9984954026405012e-05, "loss": 9.0233, "step": 18630 }, { "epoch": 0.052421248604396745, "grad_norm": 37.5, "learning_rate": 1.998493787284584e-05, "loss": 7.7665, "step": 18640 }, { "epoch": 0.052449371591845456, "grad_norm": 25.25, "learning_rate": 1.9984921710626514e-05, "loss": 7.9155, "step": 18650 }, { "epoch": 0.05247749457929417, "grad_norm": 30.0, "learning_rate": 1.9984905539747052e-05, "loss": 8.1381, "step": 18660 }, { "epoch": 0.05250561756674288, "grad_norm": 30.0, "learning_rate": 1.9984889360207468e-05, "loss": 7.8947, "step": 18670 }, { "epoch": 0.05253374055419159, "grad_norm": 29.0, "learning_rate": 1.9984873172007772e-05, "loss": 7.7746, "step": 18680 }, { "epoch": 0.0525618635416403, "grad_norm": 24.875, "learning_rate": 1.998485697514798e-05, "loss": 8.0488, "step": 18690 }, { "epoch": 0.052589986529089014, "grad_norm": 29.375, "learning_rate": 1.998484076962811e-05, "loss": 7.9374, "step": 18700 }, { "epoch": 0.052618109516537726, "grad_norm": 46.0, "learning_rate": 1.998482455544817e-05, "loss": 8.4672, "step": 18710 }, { "epoch": 0.05264623250398643, "grad_norm": 27.875, "learning_rate": 1.9984808332608175e-05, "loss": 7.9785, "step": 18720 }, { "epoch": 0.05267435549143514, "grad_norm": 28.875, "learning_rate": 1.998479210110814e-05, "loss": 7.398, "step": 18730 }, { "epoch": 0.05270247847888385, "grad_norm": 53.25, "learning_rate": 1.9984775860948083e-05, "loss": 7.5566, "step": 18740 }, { "epoch": 0.052730601466332565, "grad_norm": 33.25, "learning_rate": 1.998475961212801e-05, "loss": 8.4809, "step": 18750 }, { "epoch": 0.052758724453781276, "grad_norm": 33.5, "learning_rate": 1.9984743354647944e-05, "loss": 7.761, "step": 18760 }, { "epoch": 0.05278684744122999, "grad_norm": 26.625, "learning_rate": 1.9984727088507895e-05, "loss": 8.0479, "step": 18770 }, { "epoch": 0.0528149704286787, "grad_norm": 20.125, "learning_rate": 1.9984710813707872e-05, "loss": 8.3646, "step": 18780 }, { "epoch": 0.05284309341612741, "grad_norm": 48.0, "learning_rate": 1.99846945302479e-05, "loss": 8.3399, "step": 18790 }, { "epoch": 0.052871216403576116, "grad_norm": 20.75, "learning_rate": 1.9984678238127984e-05, "loss": 7.2663, "step": 18800 }, { "epoch": 0.05289933939102483, "grad_norm": 21.5, "learning_rate": 1.998466193734814e-05, "loss": 8.3039, "step": 18810 }, { "epoch": 0.05292746237847354, "grad_norm": 38.75, "learning_rate": 1.9984645627908385e-05, "loss": 7.9138, "step": 18820 }, { "epoch": 0.05295558536592225, "grad_norm": 29.0, "learning_rate": 1.998462930980873e-05, "loss": 8.6257, "step": 18830 }, { "epoch": 0.05298370835337096, "grad_norm": 29.625, "learning_rate": 1.9984612983049194e-05, "loss": 7.2328, "step": 18840 }, { "epoch": 0.05301183134081967, "grad_norm": 28.5, "learning_rate": 1.9984596647629785e-05, "loss": 8.441, "step": 18850 }, { "epoch": 0.053039954328268385, "grad_norm": 30.5, "learning_rate": 1.9984580303550523e-05, "loss": 7.9144, "step": 18860 }, { "epoch": 0.053068077315717097, "grad_norm": 43.75, "learning_rate": 1.9984563950811418e-05, "loss": 7.9972, "step": 18870 }, { "epoch": 0.05309620030316581, "grad_norm": 35.0, "learning_rate": 1.9984547589412484e-05, "loss": 7.6836, "step": 18880 }, { "epoch": 0.05312432329061451, "grad_norm": 22.75, "learning_rate": 1.9984531219353737e-05, "loss": 7.9876, "step": 18890 }, { "epoch": 0.053152446278063224, "grad_norm": 36.25, "learning_rate": 1.998451484063519e-05, "loss": 8.9144, "step": 18900 }, { "epoch": 0.053180569265511936, "grad_norm": 32.75, "learning_rate": 1.998449845325686e-05, "loss": 8.1803, "step": 18910 }, { "epoch": 0.05320869225296065, "grad_norm": 35.0, "learning_rate": 1.998448205721876e-05, "loss": 7.7282, "step": 18920 }, { "epoch": 0.05323681524040936, "grad_norm": 26.875, "learning_rate": 1.99844656525209e-05, "loss": 7.6988, "step": 18930 }, { "epoch": 0.05326493822785807, "grad_norm": 27.0, "learning_rate": 1.99844492391633e-05, "loss": 8.6295, "step": 18940 }, { "epoch": 0.05329306121530678, "grad_norm": 25.75, "learning_rate": 1.998443281714597e-05, "loss": 7.3244, "step": 18950 }, { "epoch": 0.053321184202755494, "grad_norm": 22.0, "learning_rate": 1.998441638646893e-05, "loss": 7.9429, "step": 18960 }, { "epoch": 0.0533493071902042, "grad_norm": 26.625, "learning_rate": 1.998439994713219e-05, "loss": 8.728, "step": 18970 }, { "epoch": 0.05337743017765291, "grad_norm": 25.125, "learning_rate": 1.998438349913576e-05, "loss": 8.4232, "step": 18980 }, { "epoch": 0.05340555316510162, "grad_norm": 28.625, "learning_rate": 1.9984367042479662e-05, "loss": 8.0412, "step": 18990 }, { "epoch": 0.05343367615255033, "grad_norm": 39.5, "learning_rate": 1.9984350577163908e-05, "loss": 8.3863, "step": 19000 }, { "epoch": 0.053461799139999044, "grad_norm": 29.875, "learning_rate": 1.998433410318851e-05, "loss": 7.8856, "step": 19010 }, { "epoch": 0.053489922127447756, "grad_norm": 39.5, "learning_rate": 1.9984317620553485e-05, "loss": 7.4815, "step": 19020 }, { "epoch": 0.05351804511489647, "grad_norm": 23.25, "learning_rate": 1.9984301129258844e-05, "loss": 8.1617, "step": 19030 }, { "epoch": 0.05354616810234518, "grad_norm": 26.0, "learning_rate": 1.9984284629304606e-05, "loss": 8.1392, "step": 19040 }, { "epoch": 0.053574291089793884, "grad_norm": 31.375, "learning_rate": 1.998426812069078e-05, "loss": 8.3843, "step": 19050 }, { "epoch": 0.053602414077242595, "grad_norm": 30.375, "learning_rate": 1.9984251603417385e-05, "loss": 7.8697, "step": 19060 }, { "epoch": 0.05363053706469131, "grad_norm": 26.5, "learning_rate": 1.9984235077484433e-05, "loss": 8.6182, "step": 19070 }, { "epoch": 0.05365866005214002, "grad_norm": 25.5, "learning_rate": 1.9984218542891937e-05, "loss": 7.8591, "step": 19080 }, { "epoch": 0.05368678303958873, "grad_norm": 32.75, "learning_rate": 1.9984201999639916e-05, "loss": 8.221, "step": 19090 }, { "epoch": 0.05371490602703744, "grad_norm": 34.0, "learning_rate": 1.9984185447728382e-05, "loss": 7.8907, "step": 19100 }, { "epoch": 0.05374302901448615, "grad_norm": 23.625, "learning_rate": 1.9984168887157345e-05, "loss": 8.3338, "step": 19110 }, { "epoch": 0.053771152001934865, "grad_norm": 23.5, "learning_rate": 1.9984152317926827e-05, "loss": 8.3301, "step": 19120 }, { "epoch": 0.05379927498938357, "grad_norm": 36.0, "learning_rate": 1.9984135740036833e-05, "loss": 7.7672, "step": 19130 }, { "epoch": 0.05382739797683228, "grad_norm": 27.0, "learning_rate": 1.9984119153487387e-05, "loss": 8.6214, "step": 19140 }, { "epoch": 0.05385552096428099, "grad_norm": 31.75, "learning_rate": 1.9984102558278497e-05, "loss": 7.5795, "step": 19150 }, { "epoch": 0.053883643951729704, "grad_norm": 29.0, "learning_rate": 1.998408595441018e-05, "loss": 7.3124, "step": 19160 }, { "epoch": 0.053911766939178415, "grad_norm": 26.25, "learning_rate": 1.9984069341882453e-05, "loss": 8.3848, "step": 19170 }, { "epoch": 0.05393988992662713, "grad_norm": 28.5, "learning_rate": 1.9984052720695323e-05, "loss": 7.653, "step": 19180 }, { "epoch": 0.05396801291407584, "grad_norm": 46.5, "learning_rate": 1.998403609084881e-05, "loss": 7.4101, "step": 19190 }, { "epoch": 0.05399613590152455, "grad_norm": 38.0, "learning_rate": 1.9984019452342932e-05, "loss": 7.9043, "step": 19200 }, { "epoch": 0.054024258888973255, "grad_norm": 42.0, "learning_rate": 1.9984002805177694e-05, "loss": 8.5598, "step": 19210 }, { "epoch": 0.054052381876421966, "grad_norm": 34.75, "learning_rate": 1.9983986149353114e-05, "loss": 8.9097, "step": 19220 }, { "epoch": 0.05408050486387068, "grad_norm": 23.125, "learning_rate": 1.998396948486921e-05, "loss": 8.4042, "step": 19230 }, { "epoch": 0.05410862785131939, "grad_norm": 27.0, "learning_rate": 1.9983952811725994e-05, "loss": 7.7505, "step": 19240 }, { "epoch": 0.0541367508387681, "grad_norm": 39.25, "learning_rate": 1.998393612992348e-05, "loss": 7.9688, "step": 19250 }, { "epoch": 0.05416487382621681, "grad_norm": 31.25, "learning_rate": 1.9983919439461685e-05, "loss": 7.7305, "step": 19260 }, { "epoch": 0.054192996813665524, "grad_norm": 30.5, "learning_rate": 1.9983902740340615e-05, "loss": 7.654, "step": 19270 }, { "epoch": 0.054221119801114236, "grad_norm": 34.75, "learning_rate": 1.9983886032560294e-05, "loss": 7.4599, "step": 19280 }, { "epoch": 0.05424924278856294, "grad_norm": 21.875, "learning_rate": 1.9983869316120738e-05, "loss": 8.2612, "step": 19290 }, { "epoch": 0.05427736577601165, "grad_norm": 32.0, "learning_rate": 1.9983852591021954e-05, "loss": 8.106, "step": 19300 }, { "epoch": 0.05430548876346036, "grad_norm": 30.875, "learning_rate": 1.998383585726396e-05, "loss": 8.6365, "step": 19310 }, { "epoch": 0.054333611750909075, "grad_norm": 24.875, "learning_rate": 1.9983819114846767e-05, "loss": 8.0632, "step": 19320 }, { "epoch": 0.054361734738357786, "grad_norm": 24.25, "learning_rate": 1.9983802363770394e-05, "loss": 7.5131, "step": 19330 }, { "epoch": 0.0543898577258065, "grad_norm": 32.75, "learning_rate": 1.9983785604034854e-05, "loss": 7.3817, "step": 19340 }, { "epoch": 0.05441798071325521, "grad_norm": 20.5, "learning_rate": 1.9983768835640163e-05, "loss": 7.6893, "step": 19350 }, { "epoch": 0.05444610370070392, "grad_norm": 41.25, "learning_rate": 1.9983752058586334e-05, "loss": 8.0993, "step": 19360 }, { "epoch": 0.054474226688152626, "grad_norm": 48.0, "learning_rate": 1.998373527287338e-05, "loss": 7.7485, "step": 19370 }, { "epoch": 0.05450234967560134, "grad_norm": 33.25, "learning_rate": 1.9983718478501318e-05, "loss": 7.7104, "step": 19380 }, { "epoch": 0.05453047266305005, "grad_norm": 43.5, "learning_rate": 1.998370167547016e-05, "loss": 8.0659, "step": 19390 }, { "epoch": 0.05455859565049876, "grad_norm": 24.25, "learning_rate": 1.9983684863779923e-05, "loss": 8.0713, "step": 19400 }, { "epoch": 0.05458671863794747, "grad_norm": 55.75, "learning_rate": 1.998366804343062e-05, "loss": 7.9499, "step": 19410 }, { "epoch": 0.054614841625396184, "grad_norm": 41.75, "learning_rate": 1.998365121442227e-05, "loss": 8.6007, "step": 19420 }, { "epoch": 0.054642964612844895, "grad_norm": 37.5, "learning_rate": 1.998363437675488e-05, "loss": 8.2319, "step": 19430 }, { "epoch": 0.05467108760029361, "grad_norm": 28.25, "learning_rate": 1.998361753042847e-05, "loss": 7.6701, "step": 19440 }, { "epoch": 0.05469921058774231, "grad_norm": 35.5, "learning_rate": 1.9983600675443055e-05, "loss": 8.1674, "step": 19450 }, { "epoch": 0.05472733357519102, "grad_norm": 24.375, "learning_rate": 1.9983583811798646e-05, "loss": 7.8227, "step": 19460 }, { "epoch": 0.054755456562639734, "grad_norm": 25.375, "learning_rate": 1.9983566939495264e-05, "loss": 8.0151, "step": 19470 }, { "epoch": 0.054783579550088446, "grad_norm": 27.5, "learning_rate": 1.9983550058532914e-05, "loss": 8.8682, "step": 19480 }, { "epoch": 0.05481170253753716, "grad_norm": 25.0, "learning_rate": 1.9983533168911617e-05, "loss": 7.8411, "step": 19490 }, { "epoch": 0.05483982552498587, "grad_norm": 47.0, "learning_rate": 1.9983516270631384e-05, "loss": 8.2282, "step": 19500 }, { "epoch": 0.05486794851243458, "grad_norm": 23.5, "learning_rate": 1.9983499363692237e-05, "loss": 7.628, "step": 19510 }, { "epoch": 0.05489607149988329, "grad_norm": 30.25, "learning_rate": 1.998348244809418e-05, "loss": 8.7042, "step": 19520 }, { "epoch": 0.054924194487332004, "grad_norm": 19.125, "learning_rate": 1.998346552383724e-05, "loss": 8.2588, "step": 19530 }, { "epoch": 0.05495231747478071, "grad_norm": 28.625, "learning_rate": 1.9983448590921423e-05, "loss": 8.3071, "step": 19540 }, { "epoch": 0.05498044046222942, "grad_norm": 34.75, "learning_rate": 1.9983431649346744e-05, "loss": 8.3151, "step": 19550 }, { "epoch": 0.05500856344967813, "grad_norm": 32.75, "learning_rate": 1.998341469911322e-05, "loss": 8.528, "step": 19560 }, { "epoch": 0.05503668643712684, "grad_norm": 27.5, "learning_rate": 1.9983397740220867e-05, "loss": 7.2263, "step": 19570 }, { "epoch": 0.055064809424575555, "grad_norm": 26.875, "learning_rate": 1.9983380772669697e-05, "loss": 7.0595, "step": 19580 }, { "epoch": 0.055092932412024266, "grad_norm": 32.75, "learning_rate": 1.998336379645972e-05, "loss": 7.4332, "step": 19590 }, { "epoch": 0.05512105539947298, "grad_norm": 29.875, "learning_rate": 1.9983346811590966e-05, "loss": 7.8423, "step": 19600 }, { "epoch": 0.05514917838692169, "grad_norm": 22.625, "learning_rate": 1.9983329818063436e-05, "loss": 7.1624, "step": 19610 }, { "epoch": 0.055177301374370394, "grad_norm": 28.625, "learning_rate": 1.9983312815877146e-05, "loss": 7.8154, "step": 19620 }, { "epoch": 0.055205424361819105, "grad_norm": 21.25, "learning_rate": 1.998329580503212e-05, "loss": 7.1226, "step": 19630 }, { "epoch": 0.05523354734926782, "grad_norm": 24.875, "learning_rate": 1.998327878552836e-05, "loss": 8.4382, "step": 19640 }, { "epoch": 0.05526167033671653, "grad_norm": 29.625, "learning_rate": 1.998326175736589e-05, "loss": 7.3483, "step": 19650 }, { "epoch": 0.05528979332416524, "grad_norm": 49.75, "learning_rate": 1.998324472054472e-05, "loss": 7.8986, "step": 19660 }, { "epoch": 0.05531791631161395, "grad_norm": 29.125, "learning_rate": 1.998322767506487e-05, "loss": 8.07, "step": 19670 }, { "epoch": 0.05534603929906266, "grad_norm": 31.625, "learning_rate": 1.9983210620926344e-05, "loss": 7.5803, "step": 19680 }, { "epoch": 0.055374162286511375, "grad_norm": 21.5, "learning_rate": 1.9983193558129172e-05, "loss": 7.7817, "step": 19690 }, { "epoch": 0.05540228527396008, "grad_norm": 33.25, "learning_rate": 1.998317648667336e-05, "loss": 7.8277, "step": 19700 }, { "epoch": 0.05543040826140879, "grad_norm": 29.25, "learning_rate": 1.998315940655892e-05, "loss": 8.0376, "step": 19710 }, { "epoch": 0.0554585312488575, "grad_norm": 31.75, "learning_rate": 1.9983142317785874e-05, "loss": 7.7938, "step": 19720 }, { "epoch": 0.055486654236306214, "grad_norm": 26.0, "learning_rate": 1.9983125220354232e-05, "loss": 8.2105, "step": 19730 }, { "epoch": 0.055514777223754926, "grad_norm": 41.0, "learning_rate": 1.998310811426401e-05, "loss": 8.4287, "step": 19740 }, { "epoch": 0.05554290021120364, "grad_norm": 31.5, "learning_rate": 1.9983090999515223e-05, "loss": 8.45, "step": 19750 }, { "epoch": 0.05557102319865235, "grad_norm": 46.25, "learning_rate": 1.9983073876107886e-05, "loss": 7.348, "step": 19760 }, { "epoch": 0.05559914618610106, "grad_norm": 39.75, "learning_rate": 1.9983056744042016e-05, "loss": 8.0936, "step": 19770 }, { "epoch": 0.055627269173549765, "grad_norm": 30.75, "learning_rate": 1.9983039603317626e-05, "loss": 8.2962, "step": 19780 }, { "epoch": 0.055655392160998476, "grad_norm": 22.0, "learning_rate": 1.9983022453934726e-05, "loss": 7.9339, "step": 19790 }, { "epoch": 0.05568351514844719, "grad_norm": 19.75, "learning_rate": 1.9983005295893337e-05, "loss": 7.7592, "step": 19800 }, { "epoch": 0.0557116381358959, "grad_norm": 28.0, "learning_rate": 1.9982988129193475e-05, "loss": 8.0519, "step": 19810 }, { "epoch": 0.05573976112334461, "grad_norm": 32.5, "learning_rate": 1.998297095383515e-05, "loss": 7.3724, "step": 19820 }, { "epoch": 0.05576788411079332, "grad_norm": 39.0, "learning_rate": 1.9982953769818382e-05, "loss": 7.8578, "step": 19830 }, { "epoch": 0.055796007098242034, "grad_norm": 28.375, "learning_rate": 1.9982936577143178e-05, "loss": 7.5796, "step": 19840 }, { "epoch": 0.055824130085690746, "grad_norm": 37.0, "learning_rate": 1.998291937580956e-05, "loss": 8.6568, "step": 19850 }, { "epoch": 0.05585225307313945, "grad_norm": 35.0, "learning_rate": 1.998290216581754e-05, "loss": 7.3343, "step": 19860 }, { "epoch": 0.05588037606058816, "grad_norm": 39.75, "learning_rate": 1.9982884947167137e-05, "loss": 6.9955, "step": 19870 }, { "epoch": 0.055908499048036873, "grad_norm": 58.75, "learning_rate": 1.998286771985836e-05, "loss": 7.7882, "step": 19880 }, { "epoch": 0.055936622035485585, "grad_norm": 26.125, "learning_rate": 1.998285048389123e-05, "loss": 8.1404, "step": 19890 }, { "epoch": 0.0559647450229343, "grad_norm": 36.5, "learning_rate": 1.9982833239265752e-05, "loss": 7.1027, "step": 19900 }, { "epoch": 0.05599286801038301, "grad_norm": 28.625, "learning_rate": 1.9982815985981952e-05, "loss": 7.8337, "step": 19910 }, { "epoch": 0.05602099099783172, "grad_norm": 26.625, "learning_rate": 1.998279872403984e-05, "loss": 7.9801, "step": 19920 }, { "epoch": 0.05604911398528043, "grad_norm": 35.0, "learning_rate": 1.998278145343943e-05, "loss": 7.5082, "step": 19930 }, { "epoch": 0.056077236972729136, "grad_norm": 31.25, "learning_rate": 1.998276417418074e-05, "loss": 7.9007, "step": 19940 }, { "epoch": 0.05610535996017785, "grad_norm": 38.0, "learning_rate": 1.9982746886263783e-05, "loss": 7.2864, "step": 19950 }, { "epoch": 0.05613348294762656, "grad_norm": 33.0, "learning_rate": 1.9982729589688573e-05, "loss": 7.6332, "step": 19960 }, { "epoch": 0.05616160593507527, "grad_norm": 32.0, "learning_rate": 1.9982712284455128e-05, "loss": 8.0916, "step": 19970 }, { "epoch": 0.05618972892252398, "grad_norm": 37.0, "learning_rate": 1.998269497056346e-05, "loss": 7.149, "step": 19980 }, { "epoch": 0.056217851909972694, "grad_norm": 27.125, "learning_rate": 1.9982677648013586e-05, "loss": 7.4871, "step": 19990 }, { "epoch": 0.056245974897421405, "grad_norm": 27.125, "learning_rate": 1.9982660316805523e-05, "loss": 7.9153, "step": 20000 }, { "epoch": 0.05627409788487012, "grad_norm": 41.5, "learning_rate": 1.9982642976939282e-05, "loss": 7.8897, "step": 20010 }, { "epoch": 0.05630222087231882, "grad_norm": 31.25, "learning_rate": 1.9982625628414878e-05, "loss": 7.7555, "step": 20020 }, { "epoch": 0.05633034385976753, "grad_norm": 50.5, "learning_rate": 1.998260827123233e-05, "loss": 8.5057, "step": 20030 }, { "epoch": 0.056358466847216244, "grad_norm": 23.5, "learning_rate": 1.9982590905391647e-05, "loss": 7.9091, "step": 20040 }, { "epoch": 0.056386589834664956, "grad_norm": 31.5, "learning_rate": 1.9982573530892852e-05, "loss": 7.8127, "step": 20050 }, { "epoch": 0.05641471282211367, "grad_norm": 28.75, "learning_rate": 1.9982556147735953e-05, "loss": 8.5595, "step": 20060 }, { "epoch": 0.05644283580956238, "grad_norm": 33.75, "learning_rate": 1.998253875592097e-05, "loss": 7.6451, "step": 20070 }, { "epoch": 0.05647095879701109, "grad_norm": 22.375, "learning_rate": 1.998252135544791e-05, "loss": 7.6805, "step": 20080 }, { "epoch": 0.0564990817844598, "grad_norm": 28.375, "learning_rate": 1.99825039463168e-05, "loss": 7.5887, "step": 20090 }, { "epoch": 0.05652720477190851, "grad_norm": 27.125, "learning_rate": 1.9982486528527646e-05, "loss": 8.1485, "step": 20100 }, { "epoch": 0.05655532775935722, "grad_norm": 27.75, "learning_rate": 1.998246910208047e-05, "loss": 7.5987, "step": 20110 }, { "epoch": 0.05658345074680593, "grad_norm": 44.25, "learning_rate": 1.998245166697528e-05, "loss": 8.5167, "step": 20120 }, { "epoch": 0.05661157373425464, "grad_norm": 37.0, "learning_rate": 1.9982434223212098e-05, "loss": 7.832, "step": 20130 }, { "epoch": 0.05663969672170335, "grad_norm": 25.375, "learning_rate": 1.998241677079093e-05, "loss": 8.0407, "step": 20140 }, { "epoch": 0.056667819709152065, "grad_norm": 34.5, "learning_rate": 1.99823993097118e-05, "loss": 7.4102, "step": 20150 }, { "epoch": 0.056695942696600776, "grad_norm": 27.75, "learning_rate": 1.998238183997472e-05, "loss": 7.7821, "step": 20160 }, { "epoch": 0.05672406568404949, "grad_norm": 37.0, "learning_rate": 1.998236436157971e-05, "loss": 8.1257, "step": 20170 }, { "epoch": 0.0567521886714982, "grad_norm": 46.5, "learning_rate": 1.9982346874526772e-05, "loss": 8.6723, "step": 20180 }, { "epoch": 0.056780311658946904, "grad_norm": 30.375, "learning_rate": 1.9982329378815932e-05, "loss": 8.0866, "step": 20190 }, { "epoch": 0.056808434646395616, "grad_norm": 25.25, "learning_rate": 1.9982311874447203e-05, "loss": 7.0809, "step": 20200 }, { "epoch": 0.05683655763384433, "grad_norm": 31.125, "learning_rate": 1.9982294361420597e-05, "loss": 8.0938, "step": 20210 }, { "epoch": 0.05686468062129304, "grad_norm": 30.375, "learning_rate": 1.9982276839736136e-05, "loss": 7.746, "step": 20220 }, { "epoch": 0.05689280360874175, "grad_norm": 42.25, "learning_rate": 1.998225930939383e-05, "loss": 7.6016, "step": 20230 }, { "epoch": 0.05692092659619046, "grad_norm": 50.75, "learning_rate": 1.9982241770393696e-05, "loss": 8.4288, "step": 20240 }, { "epoch": 0.05694904958363917, "grad_norm": 35.5, "learning_rate": 1.9982224222735745e-05, "loss": 7.7718, "step": 20250 }, { "epoch": 0.056977172571087885, "grad_norm": 25.0, "learning_rate": 1.998220666642e-05, "loss": 8.005, "step": 20260 }, { "epoch": 0.05700529555853659, "grad_norm": 28.375, "learning_rate": 1.9982189101446466e-05, "loss": 7.8985, "step": 20270 }, { "epoch": 0.0570334185459853, "grad_norm": 23.25, "learning_rate": 1.9982171527815168e-05, "loss": 8.2806, "step": 20280 }, { "epoch": 0.05706154153343401, "grad_norm": 24.25, "learning_rate": 1.998215394552612e-05, "loss": 7.9291, "step": 20290 }, { "epoch": 0.057089664520882724, "grad_norm": 42.5, "learning_rate": 1.998213635457933e-05, "loss": 7.793, "step": 20300 }, { "epoch": 0.057117787508331436, "grad_norm": 25.375, "learning_rate": 1.998211875497482e-05, "loss": 7.5269, "step": 20310 }, { "epoch": 0.05714591049578015, "grad_norm": 26.375, "learning_rate": 1.9982101146712604e-05, "loss": 7.5538, "step": 20320 }, { "epoch": 0.05717403348322886, "grad_norm": 26.25, "learning_rate": 1.9982083529792694e-05, "loss": 7.853, "step": 20330 }, { "epoch": 0.05720215647067757, "grad_norm": 25.5, "learning_rate": 1.9982065904215112e-05, "loss": 7.0636, "step": 20340 }, { "epoch": 0.057230279458126275, "grad_norm": 28.375, "learning_rate": 1.9982048269979867e-05, "loss": 7.5472, "step": 20350 }, { "epoch": 0.05725840244557499, "grad_norm": 22.75, "learning_rate": 1.9982030627086976e-05, "loss": 7.5599, "step": 20360 }, { "epoch": 0.0572865254330237, "grad_norm": 20.75, "learning_rate": 1.9982012975536454e-05, "loss": 7.6208, "step": 20370 }, { "epoch": 0.05731464842047241, "grad_norm": 24.625, "learning_rate": 1.998199531532832e-05, "loss": 7.8392, "step": 20380 }, { "epoch": 0.05734277140792112, "grad_norm": 24.0, "learning_rate": 1.9981977646462582e-05, "loss": 8.524, "step": 20390 }, { "epoch": 0.05737089439536983, "grad_norm": 24.5, "learning_rate": 1.998195996893926e-05, "loss": 7.6165, "step": 20400 }, { "epoch": 0.057399017382818544, "grad_norm": 32.0, "learning_rate": 1.9981942282758372e-05, "loss": 7.5146, "step": 20410 }, { "epoch": 0.057427140370267256, "grad_norm": 27.875, "learning_rate": 1.998192458791993e-05, "loss": 7.4979, "step": 20420 }, { "epoch": 0.05745526335771596, "grad_norm": 35.5, "learning_rate": 1.9981906884423945e-05, "loss": 8.1473, "step": 20430 }, { "epoch": 0.05748338634516467, "grad_norm": 45.5, "learning_rate": 1.998188917227044e-05, "loss": 8.1192, "step": 20440 }, { "epoch": 0.057511509332613384, "grad_norm": 24.375, "learning_rate": 1.9981871451459426e-05, "loss": 8.0211, "step": 20450 }, { "epoch": 0.057539632320062095, "grad_norm": 37.75, "learning_rate": 1.9981853721990922e-05, "loss": 7.8262, "step": 20460 }, { "epoch": 0.05756775530751081, "grad_norm": 24.75, "learning_rate": 1.9981835983864938e-05, "loss": 7.8506, "step": 20470 }, { "epoch": 0.05759587829495952, "grad_norm": 22.875, "learning_rate": 1.9981818237081495e-05, "loss": 7.6987, "step": 20480 }, { "epoch": 0.05762400128240823, "grad_norm": 21.875, "learning_rate": 1.998180048164061e-05, "loss": 8.0511, "step": 20490 }, { "epoch": 0.05765212426985694, "grad_norm": 24.375, "learning_rate": 1.9981782717542287e-05, "loss": 7.8948, "step": 20500 }, { "epoch": 0.057680247257305646, "grad_norm": 38.75, "learning_rate": 1.998176494478655e-05, "loss": 7.9494, "step": 20510 }, { "epoch": 0.05770837024475436, "grad_norm": 45.5, "learning_rate": 1.9981747163373415e-05, "loss": 7.9632, "step": 20520 }, { "epoch": 0.05773649323220307, "grad_norm": 42.25, "learning_rate": 1.9981729373302895e-05, "loss": 7.441, "step": 20530 }, { "epoch": 0.05776461621965178, "grad_norm": 35.0, "learning_rate": 1.9981711574575007e-05, "loss": 8.0552, "step": 20540 }, { "epoch": 0.05779273920710049, "grad_norm": 22.125, "learning_rate": 1.998169376718976e-05, "loss": 7.7305, "step": 20550 }, { "epoch": 0.057820862194549204, "grad_norm": 23.75, "learning_rate": 1.998167595114718e-05, "loss": 7.6503, "step": 20560 }, { "epoch": 0.057848985181997915, "grad_norm": 29.875, "learning_rate": 1.9981658126447278e-05, "loss": 8.7395, "step": 20570 }, { "epoch": 0.05787710816944663, "grad_norm": 33.0, "learning_rate": 1.9981640293090064e-05, "loss": 7.163, "step": 20580 }, { "epoch": 0.05790523115689533, "grad_norm": 28.125, "learning_rate": 1.9981622451075563e-05, "loss": 8.8052, "step": 20590 }, { "epoch": 0.05793335414434404, "grad_norm": 30.5, "learning_rate": 1.9981604600403783e-05, "loss": 8.1743, "step": 20600 }, { "epoch": 0.057961477131792755, "grad_norm": 22.0, "learning_rate": 1.9981586741074747e-05, "loss": 7.8118, "step": 20610 }, { "epoch": 0.057989600119241466, "grad_norm": 27.25, "learning_rate": 1.998156887308846e-05, "loss": 8.459, "step": 20620 }, { "epoch": 0.05801772310669018, "grad_norm": 32.25, "learning_rate": 1.9981550996444943e-05, "loss": 7.7447, "step": 20630 }, { "epoch": 0.05804584609413889, "grad_norm": 29.125, "learning_rate": 1.9981533111144213e-05, "loss": 7.8542, "step": 20640 }, { "epoch": 0.0580739690815876, "grad_norm": 29.25, "learning_rate": 1.9981515217186284e-05, "loss": 7.7968, "step": 20650 }, { "epoch": 0.05810209206903631, "grad_norm": 29.875, "learning_rate": 1.998149731457117e-05, "loss": 8.4871, "step": 20660 }, { "epoch": 0.05813021505648502, "grad_norm": 22.25, "learning_rate": 1.9981479403298892e-05, "loss": 6.876, "step": 20670 }, { "epoch": 0.05815833804393373, "grad_norm": 29.5, "learning_rate": 1.998146148336946e-05, "loss": 8.569, "step": 20680 }, { "epoch": 0.05818646103138244, "grad_norm": 27.875, "learning_rate": 1.998144355478289e-05, "loss": 8.5098, "step": 20690 }, { "epoch": 0.05821458401883115, "grad_norm": 28.625, "learning_rate": 1.9981425617539202e-05, "loss": 8.0638, "step": 20700 }, { "epoch": 0.05824270700627986, "grad_norm": 53.5, "learning_rate": 1.9981407671638405e-05, "loss": 7.7695, "step": 20710 }, { "epoch": 0.058270829993728575, "grad_norm": 31.5, "learning_rate": 1.998138971708052e-05, "loss": 7.7284, "step": 20720 }, { "epoch": 0.058298952981177286, "grad_norm": 34.25, "learning_rate": 1.998137175386556e-05, "loss": 8.0025, "step": 20730 }, { "epoch": 0.058327075968626, "grad_norm": 25.0, "learning_rate": 1.998135378199354e-05, "loss": 8.1154, "step": 20740 }, { "epoch": 0.0583551989560747, "grad_norm": 33.25, "learning_rate": 1.9981335801464476e-05, "loss": 7.317, "step": 20750 }, { "epoch": 0.058383321943523414, "grad_norm": 41.5, "learning_rate": 1.9981317812278387e-05, "loss": 8.2104, "step": 20760 }, { "epoch": 0.058411444930972126, "grad_norm": 28.25, "learning_rate": 1.9981299814435286e-05, "loss": 8.4275, "step": 20770 }, { "epoch": 0.05843956791842084, "grad_norm": 34.75, "learning_rate": 1.9981281807935183e-05, "loss": 8.0986, "step": 20780 }, { "epoch": 0.05846769090586955, "grad_norm": 57.25, "learning_rate": 1.998126379277811e-05, "loss": 7.777, "step": 20790 }, { "epoch": 0.05849581389331826, "grad_norm": 27.0, "learning_rate": 1.9981245768964063e-05, "loss": 7.597, "step": 20800 }, { "epoch": 0.05852393688076697, "grad_norm": 30.125, "learning_rate": 1.9981227736493066e-05, "loss": 7.7218, "step": 20810 }, { "epoch": 0.05855205986821568, "grad_norm": 27.875, "learning_rate": 1.9981209695365134e-05, "loss": 7.4805, "step": 20820 }, { "epoch": 0.058580182855664395, "grad_norm": 24.75, "learning_rate": 1.9981191645580292e-05, "loss": 7.2145, "step": 20830 }, { "epoch": 0.0586083058431131, "grad_norm": 26.5, "learning_rate": 1.998117358713854e-05, "loss": 7.1732, "step": 20840 }, { "epoch": 0.05863642883056181, "grad_norm": 22.875, "learning_rate": 1.9981155520039904e-05, "loss": 8.2664, "step": 20850 }, { "epoch": 0.05866455181801052, "grad_norm": 32.75, "learning_rate": 1.9981137444284395e-05, "loss": 7.7409, "step": 20860 }, { "epoch": 0.058692674805459234, "grad_norm": 36.0, "learning_rate": 1.9981119359872034e-05, "loss": 8.3165, "step": 20870 }, { "epoch": 0.058720797792907946, "grad_norm": 27.5, "learning_rate": 1.9981101266802826e-05, "loss": 8.2613, "step": 20880 }, { "epoch": 0.05874892078035666, "grad_norm": 29.5, "learning_rate": 1.99810831650768e-05, "loss": 8.018, "step": 20890 }, { "epoch": 0.05877704376780537, "grad_norm": 43.5, "learning_rate": 1.9981065054693963e-05, "loss": 7.8357, "step": 20900 }, { "epoch": 0.05880516675525408, "grad_norm": 49.5, "learning_rate": 1.9981046935654335e-05, "loss": 7.8559, "step": 20910 }, { "epoch": 0.058833289742702785, "grad_norm": 30.0, "learning_rate": 1.9981028807957925e-05, "loss": 8.2599, "step": 20920 }, { "epoch": 0.0588614127301515, "grad_norm": 24.375, "learning_rate": 1.9981010671604758e-05, "loss": 7.7116, "step": 20930 }, { "epoch": 0.05888953571760021, "grad_norm": 25.125, "learning_rate": 1.9980992526594846e-05, "loss": 8.4724, "step": 20940 }, { "epoch": 0.05891765870504892, "grad_norm": 48.5, "learning_rate": 1.99809743729282e-05, "loss": 7.5649, "step": 20950 }, { "epoch": 0.05894578169249763, "grad_norm": 21.75, "learning_rate": 1.998095621060484e-05, "loss": 7.1761, "step": 20960 }, { "epoch": 0.05897390467994634, "grad_norm": 25.25, "learning_rate": 1.9980938039624785e-05, "loss": 8.121, "step": 20970 }, { "epoch": 0.059002027667395054, "grad_norm": 21.75, "learning_rate": 1.9980919859988044e-05, "loss": 7.9191, "step": 20980 }, { "epoch": 0.059030150654843766, "grad_norm": 32.75, "learning_rate": 1.9980901671694637e-05, "loss": 8.073, "step": 20990 }, { "epoch": 0.05905827364229247, "grad_norm": 24.875, "learning_rate": 1.998088347474458e-05, "loss": 8.4625, "step": 21000 }, { "epoch": 0.05908639662974118, "grad_norm": 37.25, "learning_rate": 1.9980865269137886e-05, "loss": 8.4095, "step": 21010 }, { "epoch": 0.059114519617189894, "grad_norm": 27.875, "learning_rate": 1.9980847054874574e-05, "loss": 8.7108, "step": 21020 }, { "epoch": 0.059142642604638605, "grad_norm": 27.0, "learning_rate": 1.9980828831954656e-05, "loss": 7.7098, "step": 21030 }, { "epoch": 0.05917076559208732, "grad_norm": 29.5, "learning_rate": 1.998081060037815e-05, "loss": 8.2652, "step": 21040 }, { "epoch": 0.05919888857953603, "grad_norm": 39.5, "learning_rate": 1.9980792360145075e-05, "loss": 8.1287, "step": 21050 }, { "epoch": 0.05922701156698474, "grad_norm": 34.75, "learning_rate": 1.9980774111255442e-05, "loss": 8.1299, "step": 21060 }, { "epoch": 0.05925513455443345, "grad_norm": 42.75, "learning_rate": 1.9980755853709265e-05, "loss": 7.7294, "step": 21070 }, { "epoch": 0.059283257541882156, "grad_norm": 24.375, "learning_rate": 1.998073758750657e-05, "loss": 7.7457, "step": 21080 }, { "epoch": 0.05931138052933087, "grad_norm": 53.75, "learning_rate": 1.998071931264736e-05, "loss": 7.8513, "step": 21090 }, { "epoch": 0.05933950351677958, "grad_norm": 25.375, "learning_rate": 1.9980701029131658e-05, "loss": 7.5523, "step": 21100 }, { "epoch": 0.05936762650422829, "grad_norm": 36.0, "learning_rate": 1.998068273695948e-05, "loss": 7.7128, "step": 21110 }, { "epoch": 0.059395749491677, "grad_norm": 26.75, "learning_rate": 1.9980664436130842e-05, "loss": 7.7281, "step": 21120 }, { "epoch": 0.059423872479125714, "grad_norm": 37.5, "learning_rate": 1.998064612664576e-05, "loss": 8.2988, "step": 21130 }, { "epoch": 0.059451995466574425, "grad_norm": 23.25, "learning_rate": 1.9980627808504244e-05, "loss": 8.0079, "step": 21140 }, { "epoch": 0.05948011845402314, "grad_norm": 32.25, "learning_rate": 1.9980609481706317e-05, "loss": 7.5455, "step": 21150 }, { "epoch": 0.05950824144147184, "grad_norm": 27.125, "learning_rate": 1.9980591146251994e-05, "loss": 8.1719, "step": 21160 }, { "epoch": 0.05953636442892055, "grad_norm": 42.25, "learning_rate": 1.998057280214128e-05, "loss": 8.4557, "step": 21170 }, { "epoch": 0.059564487416369265, "grad_norm": 26.625, "learning_rate": 1.998055444937421e-05, "loss": 8.1354, "step": 21180 }, { "epoch": 0.059592610403817976, "grad_norm": 26.75, "learning_rate": 1.9980536087950787e-05, "loss": 7.8391, "step": 21190 }, { "epoch": 0.05962073339126669, "grad_norm": 43.25, "learning_rate": 1.998051771787103e-05, "loss": 8.4365, "step": 21200 }, { "epoch": 0.0596488563787154, "grad_norm": 30.25, "learning_rate": 1.9980499339134953e-05, "loss": 7.9535, "step": 21210 }, { "epoch": 0.05967697936616411, "grad_norm": 32.75, "learning_rate": 1.9980480951742576e-05, "loss": 7.9947, "step": 21220 }, { "epoch": 0.05970510235361282, "grad_norm": 22.25, "learning_rate": 1.998046255569391e-05, "loss": 7.6822, "step": 21230 }, { "epoch": 0.05973322534106153, "grad_norm": 21.25, "learning_rate": 1.998044415098898e-05, "loss": 7.849, "step": 21240 }, { "epoch": 0.05976134832851024, "grad_norm": 28.375, "learning_rate": 1.9980425737627788e-05, "loss": 7.8298, "step": 21250 }, { "epoch": 0.05978947131595895, "grad_norm": 26.0, "learning_rate": 1.998040731561036e-05, "loss": 8.5821, "step": 21260 }, { "epoch": 0.05981759430340766, "grad_norm": 51.75, "learning_rate": 1.998038888493671e-05, "loss": 7.5051, "step": 21270 }, { "epoch": 0.05984571729085637, "grad_norm": 24.0, "learning_rate": 1.9980370445606853e-05, "loss": 7.2337, "step": 21280 }, { "epoch": 0.059873840278305085, "grad_norm": 28.125, "learning_rate": 1.9980351997620804e-05, "loss": 7.8206, "step": 21290 }, { "epoch": 0.059901963265753796, "grad_norm": 36.5, "learning_rate": 1.9980333540978587e-05, "loss": 8.2115, "step": 21300 }, { "epoch": 0.05993008625320251, "grad_norm": 39.0, "learning_rate": 1.998031507568021e-05, "loss": 8.2074, "step": 21310 }, { "epoch": 0.05995820924065121, "grad_norm": 26.0, "learning_rate": 1.9980296601725685e-05, "loss": 7.7596, "step": 21320 }, { "epoch": 0.059986332228099924, "grad_norm": 24.0, "learning_rate": 1.9980278119115034e-05, "loss": 7.5902, "step": 21330 }, { "epoch": 0.060014455215548636, "grad_norm": 23.125, "learning_rate": 1.9980259627848278e-05, "loss": 7.5393, "step": 21340 }, { "epoch": 0.06004257820299735, "grad_norm": 33.5, "learning_rate": 1.9980241127925425e-05, "loss": 8.0611, "step": 21350 }, { "epoch": 0.06007070119044606, "grad_norm": 32.0, "learning_rate": 1.9980222619346492e-05, "loss": 8.417, "step": 21360 }, { "epoch": 0.06009882417789477, "grad_norm": 24.5, "learning_rate": 1.99802041021115e-05, "loss": 7.9667, "step": 21370 }, { "epoch": 0.06012694716534348, "grad_norm": 53.25, "learning_rate": 1.9980185576220455e-05, "loss": 7.9464, "step": 21380 }, { "epoch": 0.060155070152792194, "grad_norm": 31.25, "learning_rate": 1.9980167041673386e-05, "loss": 7.2817, "step": 21390 }, { "epoch": 0.0601831931402409, "grad_norm": 30.875, "learning_rate": 1.99801484984703e-05, "loss": 7.3288, "step": 21400 }, { "epoch": 0.06021131612768961, "grad_norm": 30.625, "learning_rate": 1.9980129946611223e-05, "loss": 7.5807, "step": 21410 }, { "epoch": 0.06023943911513832, "grad_norm": 31.75, "learning_rate": 1.9980111386096156e-05, "loss": 7.8464, "step": 21420 }, { "epoch": 0.06026756210258703, "grad_norm": 45.5, "learning_rate": 1.9980092816925127e-05, "loss": 7.4515, "step": 21430 }, { "epoch": 0.060295685090035744, "grad_norm": 29.375, "learning_rate": 1.9980074239098147e-05, "loss": 8.4607, "step": 21440 }, { "epoch": 0.060323808077484456, "grad_norm": 17.875, "learning_rate": 1.9980055652615236e-05, "loss": 8.4047, "step": 21450 }, { "epoch": 0.06035193106493317, "grad_norm": 29.375, "learning_rate": 1.9980037057476406e-05, "loss": 7.3221, "step": 21460 }, { "epoch": 0.06038005405238188, "grad_norm": 30.875, "learning_rate": 1.9980018453681673e-05, "loss": 7.9673, "step": 21470 }, { "epoch": 0.060408177039830584, "grad_norm": 23.625, "learning_rate": 1.9979999841231055e-05, "loss": 8.3618, "step": 21480 }, { "epoch": 0.060436300027279295, "grad_norm": 32.25, "learning_rate": 1.997998122012457e-05, "loss": 7.7866, "step": 21490 }, { "epoch": 0.06046442301472801, "grad_norm": 33.75, "learning_rate": 1.9979962590362233e-05, "loss": 8.4761, "step": 21500 }, { "epoch": 0.06049254600217672, "grad_norm": 30.0, "learning_rate": 1.9979943951944055e-05, "loss": 8.1006, "step": 21510 }, { "epoch": 0.06052066898962543, "grad_norm": 31.125, "learning_rate": 1.997992530487006e-05, "loss": 8.0258, "step": 21520 }, { "epoch": 0.06054879197707414, "grad_norm": 27.5, "learning_rate": 1.9979906649140258e-05, "loss": 7.5496, "step": 21530 }, { "epoch": 0.06057691496452285, "grad_norm": 27.125, "learning_rate": 1.9979887984754673e-05, "loss": 8.1998, "step": 21540 }, { "epoch": 0.060605037951971565, "grad_norm": 29.375, "learning_rate": 1.997986931171331e-05, "loss": 8.098, "step": 21550 }, { "epoch": 0.060633160939420276, "grad_norm": 21.75, "learning_rate": 1.9979850630016192e-05, "loss": 8.0011, "step": 21560 }, { "epoch": 0.06066128392686898, "grad_norm": 27.25, "learning_rate": 1.9979831939663338e-05, "loss": 7.7268, "step": 21570 }, { "epoch": 0.06068940691431769, "grad_norm": 28.75, "learning_rate": 1.9979813240654756e-05, "loss": 7.9227, "step": 21580 }, { "epoch": 0.060717529901766404, "grad_norm": 32.25, "learning_rate": 1.9979794532990473e-05, "loss": 8.003, "step": 21590 }, { "epoch": 0.060745652889215115, "grad_norm": 32.25, "learning_rate": 1.9979775816670495e-05, "loss": 7.8461, "step": 21600 }, { "epoch": 0.06077377587666383, "grad_norm": 48.0, "learning_rate": 1.997975709169484e-05, "loss": 8.3639, "step": 21610 }, { "epoch": 0.06080189886411254, "grad_norm": 24.375, "learning_rate": 1.9979738358063532e-05, "loss": 6.7699, "step": 21620 }, { "epoch": 0.06083002185156125, "grad_norm": 33.25, "learning_rate": 1.9979719615776578e-05, "loss": 8.0705, "step": 21630 }, { "epoch": 0.06085814483900996, "grad_norm": 77.0, "learning_rate": 1.9979700864833996e-05, "loss": 7.6375, "step": 21640 }, { "epoch": 0.060886267826458666, "grad_norm": 23.875, "learning_rate": 1.997968210523581e-05, "loss": 8.3532, "step": 21650 }, { "epoch": 0.06091439081390738, "grad_norm": 41.75, "learning_rate": 1.997966333698203e-05, "loss": 8.5647, "step": 21660 }, { "epoch": 0.06094251380135609, "grad_norm": 51.0, "learning_rate": 1.9979644560072667e-05, "loss": 7.767, "step": 21670 }, { "epoch": 0.0609706367888048, "grad_norm": 45.5, "learning_rate": 1.9979625774507746e-05, "loss": 8.4055, "step": 21680 }, { "epoch": 0.06099875977625351, "grad_norm": 22.875, "learning_rate": 1.9979606980287284e-05, "loss": 8.0404, "step": 21690 }, { "epoch": 0.061026882763702224, "grad_norm": 61.25, "learning_rate": 1.997958817741129e-05, "loss": 8.2913, "step": 21700 }, { "epoch": 0.061055005751150936, "grad_norm": 40.25, "learning_rate": 1.9979569365879784e-05, "loss": 7.6914, "step": 21710 }, { "epoch": 0.06108312873859965, "grad_norm": 29.0, "learning_rate": 1.9979550545692785e-05, "loss": 8.1403, "step": 21720 }, { "epoch": 0.06111125172604835, "grad_norm": 31.25, "learning_rate": 1.9979531716850304e-05, "loss": 7.5098, "step": 21730 }, { "epoch": 0.06113937471349706, "grad_norm": 27.25, "learning_rate": 1.997951287935236e-05, "loss": 7.9102, "step": 21740 }, { "epoch": 0.061167497700945775, "grad_norm": 23.0, "learning_rate": 1.997949403319897e-05, "loss": 7.7421, "step": 21750 }, { "epoch": 0.061195620688394486, "grad_norm": 25.125, "learning_rate": 1.9979475178390152e-05, "loss": 7.9892, "step": 21760 }, { "epoch": 0.0612237436758432, "grad_norm": 28.75, "learning_rate": 1.997945631492592e-05, "loss": 8.8751, "step": 21770 }, { "epoch": 0.06125186666329191, "grad_norm": 28.875, "learning_rate": 1.9979437442806285e-05, "loss": 8.0082, "step": 21780 }, { "epoch": 0.06127998965074062, "grad_norm": 41.0, "learning_rate": 1.9979418562031273e-05, "loss": 7.0084, "step": 21790 }, { "epoch": 0.06130811263818933, "grad_norm": 23.125, "learning_rate": 1.9979399672600896e-05, "loss": 7.8341, "step": 21800 }, { "epoch": 0.06133623562563804, "grad_norm": 25.5, "learning_rate": 1.997938077451517e-05, "loss": 7.8657, "step": 21810 }, { "epoch": 0.06136435861308675, "grad_norm": 26.5, "learning_rate": 1.9979361867774108e-05, "loss": 6.9899, "step": 21820 }, { "epoch": 0.06139248160053546, "grad_norm": 22.125, "learning_rate": 1.9979342952377735e-05, "loss": 7.7231, "step": 21830 }, { "epoch": 0.06142060458798417, "grad_norm": 40.0, "learning_rate": 1.9979324028326062e-05, "loss": 8.3969, "step": 21840 }, { "epoch": 0.061448727575432883, "grad_norm": 27.0, "learning_rate": 1.9979305095619107e-05, "loss": 7.9266, "step": 21850 }, { "epoch": 0.061476850562881595, "grad_norm": 21.375, "learning_rate": 1.9979286154256882e-05, "loss": 8.3608, "step": 21860 }, { "epoch": 0.06150497355033031, "grad_norm": 82.5, "learning_rate": 1.997926720423941e-05, "loss": 7.8816, "step": 21870 }, { "epoch": 0.06153309653777902, "grad_norm": 25.625, "learning_rate": 1.9979248245566704e-05, "loss": 7.2015, "step": 21880 }, { "epoch": 0.06156121952522772, "grad_norm": 42.0, "learning_rate": 1.997922927823878e-05, "loss": 8.785, "step": 21890 }, { "epoch": 0.061589342512676434, "grad_norm": 17.0, "learning_rate": 1.9979210302255654e-05, "loss": 7.5792, "step": 21900 }, { "epoch": 0.061617465500125146, "grad_norm": 40.25, "learning_rate": 1.9979191317617347e-05, "loss": 8.1866, "step": 21910 }, { "epoch": 0.06164558848757386, "grad_norm": 37.0, "learning_rate": 1.997917232432387e-05, "loss": 7.8444, "step": 21920 }, { "epoch": 0.06167371147502257, "grad_norm": 33.25, "learning_rate": 1.9979153322375246e-05, "loss": 8.1389, "step": 21930 }, { "epoch": 0.06170183446247128, "grad_norm": 33.25, "learning_rate": 1.9979134311771482e-05, "loss": 7.9676, "step": 21940 }, { "epoch": 0.06172995744991999, "grad_norm": 28.0, "learning_rate": 1.99791152925126e-05, "loss": 7.6079, "step": 21950 }, { "epoch": 0.061758080437368704, "grad_norm": 51.75, "learning_rate": 1.9979096264598618e-05, "loss": 8.2742, "step": 21960 }, { "epoch": 0.06178620342481741, "grad_norm": 34.75, "learning_rate": 1.997907722802955e-05, "loss": 8.3118, "step": 21970 }, { "epoch": 0.06181432641226612, "grad_norm": 24.25, "learning_rate": 1.9979058182805416e-05, "loss": 7.8057, "step": 21980 }, { "epoch": 0.06184244939971483, "grad_norm": 25.25, "learning_rate": 1.9979039128926224e-05, "loss": 7.8729, "step": 21990 }, { "epoch": 0.06187057238716354, "grad_norm": 31.25, "learning_rate": 1.9979020066392e-05, "loss": 7.5631, "step": 22000 }, { "epoch": 0.061898695374612255, "grad_norm": 33.25, "learning_rate": 1.9979000995202757e-05, "loss": 8.5476, "step": 22010 }, { "epoch": 0.061926818362060966, "grad_norm": 29.25, "learning_rate": 1.9978981915358512e-05, "loss": 8.0555, "step": 22020 }, { "epoch": 0.06195494134950968, "grad_norm": 50.5, "learning_rate": 1.9978962826859278e-05, "loss": 7.8041, "step": 22030 }, { "epoch": 0.06198306433695839, "grad_norm": 31.625, "learning_rate": 1.9978943729705074e-05, "loss": 8.1923, "step": 22040 }, { "epoch": 0.062011187324407094, "grad_norm": 43.0, "learning_rate": 1.997892462389592e-05, "loss": 8.066, "step": 22050 }, { "epoch": 0.062039310311855805, "grad_norm": 42.0, "learning_rate": 1.9978905509431828e-05, "loss": 8.7097, "step": 22060 }, { "epoch": 0.06206743329930452, "grad_norm": 40.0, "learning_rate": 1.9978886386312816e-05, "loss": 8.1241, "step": 22070 }, { "epoch": 0.06209555628675323, "grad_norm": 28.25, "learning_rate": 1.99788672545389e-05, "loss": 6.6821, "step": 22080 }, { "epoch": 0.06212367927420194, "grad_norm": 38.25, "learning_rate": 1.9978848114110097e-05, "loss": 7.7368, "step": 22090 }, { "epoch": 0.06215180226165065, "grad_norm": 25.625, "learning_rate": 1.9978828965026428e-05, "loss": 8.2003, "step": 22100 }, { "epoch": 0.06217992524909936, "grad_norm": 21.125, "learning_rate": 1.99788098072879e-05, "loss": 7.9211, "step": 22110 }, { "epoch": 0.062208048236548075, "grad_norm": 27.75, "learning_rate": 1.997879064089454e-05, "loss": 8.2872, "step": 22120 }, { "epoch": 0.06223617122399678, "grad_norm": 25.375, "learning_rate": 1.9978771465846356e-05, "loss": 7.5588, "step": 22130 }, { "epoch": 0.06226429421144549, "grad_norm": 40.5, "learning_rate": 1.997875228214337e-05, "loss": 8.0747, "step": 22140 }, { "epoch": 0.0622924171988942, "grad_norm": 41.75, "learning_rate": 1.9978733089785597e-05, "loss": 7.539, "step": 22150 }, { "epoch": 0.062320540186342914, "grad_norm": 34.75, "learning_rate": 1.997871388877305e-05, "loss": 8.34, "step": 22160 }, { "epoch": 0.062348663173791626, "grad_norm": 31.625, "learning_rate": 1.9978694679105754e-05, "loss": 6.9892, "step": 22170 }, { "epoch": 0.06237678616124034, "grad_norm": 27.5, "learning_rate": 1.997867546078372e-05, "loss": 8.4835, "step": 22180 }, { "epoch": 0.06240490914868905, "grad_norm": 31.875, "learning_rate": 1.9978656233806967e-05, "loss": 7.5068, "step": 22190 }, { "epoch": 0.06243303213613776, "grad_norm": 21.875, "learning_rate": 1.9978636998175508e-05, "loss": 8.2025, "step": 22200 }, { "epoch": 0.06246115512358647, "grad_norm": 36.0, "learning_rate": 1.997861775388936e-05, "loss": 7.1691, "step": 22210 }, { "epoch": 0.062489278111035176, "grad_norm": 42.25, "learning_rate": 1.9978598500948546e-05, "loss": 7.2822, "step": 22220 }, { "epoch": 0.06251740109848389, "grad_norm": 25.125, "learning_rate": 1.9978579239353077e-05, "loss": 7.6716, "step": 22230 }, { "epoch": 0.0625455240859326, "grad_norm": 33.5, "learning_rate": 1.997855996910297e-05, "loss": 7.9005, "step": 22240 }, { "epoch": 0.06257364707338131, "grad_norm": 25.375, "learning_rate": 1.9978540690198246e-05, "loss": 7.7899, "step": 22250 }, { "epoch": 0.06260177006083002, "grad_norm": 20.625, "learning_rate": 1.9978521402638917e-05, "loss": 8.4835, "step": 22260 }, { "epoch": 0.06262989304827873, "grad_norm": 23.625, "learning_rate": 1.9978502106424998e-05, "loss": 8.2887, "step": 22270 }, { "epoch": 0.06265801603572745, "grad_norm": 28.25, "learning_rate": 1.997848280155651e-05, "loss": 6.7868, "step": 22280 }, { "epoch": 0.06268613902317616, "grad_norm": 24.875, "learning_rate": 1.9978463488033473e-05, "loss": 7.5978, "step": 22290 }, { "epoch": 0.06271426201062487, "grad_norm": 26.625, "learning_rate": 1.9978444165855894e-05, "loss": 8.4458, "step": 22300 }, { "epoch": 0.06274238499807358, "grad_norm": 23.5, "learning_rate": 1.99784248350238e-05, "loss": 8.8546, "step": 22310 }, { "epoch": 0.06277050798552229, "grad_norm": 27.875, "learning_rate": 1.99784054955372e-05, "loss": 8.3741, "step": 22320 }, { "epoch": 0.062798630972971, "grad_norm": 22.375, "learning_rate": 1.9978386147396116e-05, "loss": 8.1057, "step": 22330 }, { "epoch": 0.0628267539604197, "grad_norm": 26.125, "learning_rate": 1.997836679060056e-05, "loss": 8.0162, "step": 22340 }, { "epoch": 0.06285487694786841, "grad_norm": 34.0, "learning_rate": 1.9978347425150553e-05, "loss": 8.1395, "step": 22350 }, { "epoch": 0.06288299993531712, "grad_norm": 39.0, "learning_rate": 1.997832805104611e-05, "loss": 8.2695, "step": 22360 }, { "epoch": 0.06291112292276584, "grad_norm": 112.0, "learning_rate": 1.9978308668287247e-05, "loss": 7.8599, "step": 22370 }, { "epoch": 0.06293924591021455, "grad_norm": 28.625, "learning_rate": 1.9978289276873984e-05, "loss": 8.5067, "step": 22380 }, { "epoch": 0.06296736889766326, "grad_norm": 31.375, "learning_rate": 1.9978269876806335e-05, "loss": 8.0129, "step": 22390 }, { "epoch": 0.06299549188511197, "grad_norm": 23.25, "learning_rate": 1.9978250468084316e-05, "loss": 7.2293, "step": 22400 }, { "epoch": 0.06302361487256068, "grad_norm": 37.75, "learning_rate": 1.9978231050707945e-05, "loss": 7.6755, "step": 22410 }, { "epoch": 0.0630517378600094, "grad_norm": 25.5, "learning_rate": 1.997821162467724e-05, "loss": 7.6361, "step": 22420 }, { "epoch": 0.0630798608474581, "grad_norm": 27.375, "learning_rate": 1.9978192189992217e-05, "loss": 7.3591, "step": 22430 }, { "epoch": 0.06310798383490682, "grad_norm": 36.5, "learning_rate": 1.9978172746652893e-05, "loss": 7.3798, "step": 22440 }, { "epoch": 0.06313610682235553, "grad_norm": 22.75, "learning_rate": 1.9978153294659282e-05, "loss": 7.3457, "step": 22450 }, { "epoch": 0.06316422980980424, "grad_norm": 38.0, "learning_rate": 1.9978133834011406e-05, "loss": 7.782, "step": 22460 }, { "epoch": 0.06319235279725295, "grad_norm": 24.5, "learning_rate": 1.997811436470928e-05, "loss": 7.7166, "step": 22470 }, { "epoch": 0.06322047578470166, "grad_norm": 44.75, "learning_rate": 1.997809488675292e-05, "loss": 8.1827, "step": 22480 }, { "epoch": 0.06324859877215037, "grad_norm": 44.25, "learning_rate": 1.997807540014234e-05, "loss": 7.8697, "step": 22490 }, { "epoch": 0.06327672175959907, "grad_norm": 39.0, "learning_rate": 1.9978055904877564e-05, "loss": 7.6617, "step": 22500 }, { "epoch": 0.06330484474704778, "grad_norm": 29.5, "learning_rate": 1.9978036400958603e-05, "loss": 8.3092, "step": 22510 }, { "epoch": 0.0633329677344965, "grad_norm": 44.0, "learning_rate": 1.9978016888385476e-05, "loss": 7.6671, "step": 22520 }, { "epoch": 0.0633610907219452, "grad_norm": 32.25, "learning_rate": 1.99779973671582e-05, "loss": 7.9161, "step": 22530 }, { "epoch": 0.06338921370939392, "grad_norm": 32.0, "learning_rate": 1.9977977837276793e-05, "loss": 8.2919, "step": 22540 }, { "epoch": 0.06341733669684263, "grad_norm": 22.875, "learning_rate": 1.997795829874127e-05, "loss": 8.2044, "step": 22550 }, { "epoch": 0.06344545968429134, "grad_norm": 23.5, "learning_rate": 1.9977938751551647e-05, "loss": 9.1357, "step": 22560 }, { "epoch": 0.06347358267174005, "grad_norm": 24.375, "learning_rate": 1.9977919195707948e-05, "loss": 8.6587, "step": 22570 }, { "epoch": 0.06350170565918876, "grad_norm": 33.25, "learning_rate": 1.9977899631210177e-05, "loss": 7.6374, "step": 22580 }, { "epoch": 0.06352982864663748, "grad_norm": 25.875, "learning_rate": 1.997788005805836e-05, "loss": 7.7163, "step": 22590 }, { "epoch": 0.06355795163408619, "grad_norm": 35.25, "learning_rate": 1.997786047625252e-05, "loss": 8.3111, "step": 22600 }, { "epoch": 0.0635860746215349, "grad_norm": 35.75, "learning_rate": 1.997784088579266e-05, "loss": 7.9682, "step": 22610 }, { "epoch": 0.06361419760898361, "grad_norm": 31.375, "learning_rate": 1.9977821286678804e-05, "loss": 8.1675, "step": 22620 }, { "epoch": 0.06364232059643232, "grad_norm": 23.0, "learning_rate": 1.9977801678910968e-05, "loss": 8.3013, "step": 22630 }, { "epoch": 0.06367044358388103, "grad_norm": 43.5, "learning_rate": 1.9977782062489172e-05, "loss": 8.2498, "step": 22640 }, { "epoch": 0.06369856657132975, "grad_norm": 24.75, "learning_rate": 1.9977762437413428e-05, "loss": 7.3001, "step": 22650 }, { "epoch": 0.06372668955877844, "grad_norm": 25.125, "learning_rate": 1.997774280368376e-05, "loss": 7.5571, "step": 22660 }, { "epoch": 0.06375481254622715, "grad_norm": 56.25, "learning_rate": 1.9977723161300176e-05, "loss": 7.8752, "step": 22670 }, { "epoch": 0.06378293553367587, "grad_norm": 26.5, "learning_rate": 1.99777035102627e-05, "loss": 7.0414, "step": 22680 }, { "epoch": 0.06381105852112458, "grad_norm": 33.25, "learning_rate": 1.9977683850571343e-05, "loss": 7.8153, "step": 22690 }, { "epoch": 0.06383918150857329, "grad_norm": 41.0, "learning_rate": 1.997766418222613e-05, "loss": 7.9327, "step": 22700 }, { "epoch": 0.063867304496022, "grad_norm": 24.5, "learning_rate": 1.997764450522707e-05, "loss": 7.7052, "step": 22710 }, { "epoch": 0.06389542748347071, "grad_norm": 37.25, "learning_rate": 1.9977624819574188e-05, "loss": 7.5229, "step": 22720 }, { "epoch": 0.06392355047091942, "grad_norm": 34.25, "learning_rate": 1.9977605125267493e-05, "loss": 7.8829, "step": 22730 }, { "epoch": 0.06395167345836814, "grad_norm": 25.625, "learning_rate": 1.9977585422307008e-05, "loss": 8.1108, "step": 22740 }, { "epoch": 0.06397979644581685, "grad_norm": 24.75, "learning_rate": 1.9977565710692746e-05, "loss": 7.2474, "step": 22750 }, { "epoch": 0.06400791943326556, "grad_norm": 32.0, "learning_rate": 1.997754599042473e-05, "loss": 7.7898, "step": 22760 }, { "epoch": 0.06403604242071427, "grad_norm": 26.125, "learning_rate": 1.9977526261502968e-05, "loss": 7.7363, "step": 22770 }, { "epoch": 0.06406416540816298, "grad_norm": 29.5, "learning_rate": 1.997750652392749e-05, "loss": 7.9805, "step": 22780 }, { "epoch": 0.0640922883956117, "grad_norm": 37.0, "learning_rate": 1.9977486777698298e-05, "loss": 8.2608, "step": 22790 }, { "epoch": 0.0641204113830604, "grad_norm": 57.25, "learning_rate": 1.9977467022815418e-05, "loss": 8.3327, "step": 22800 }, { "epoch": 0.06414853437050912, "grad_norm": 26.125, "learning_rate": 1.9977447259278867e-05, "loss": 8.0268, "step": 22810 }, { "epoch": 0.06417665735795781, "grad_norm": 26.375, "learning_rate": 1.9977427487088665e-05, "loss": 7.559, "step": 22820 }, { "epoch": 0.06420478034540653, "grad_norm": 35.75, "learning_rate": 1.997740770624482e-05, "loss": 8.1212, "step": 22830 }, { "epoch": 0.06423290333285524, "grad_norm": 36.0, "learning_rate": 1.9977387916747354e-05, "loss": 7.5903, "step": 22840 }, { "epoch": 0.06426102632030395, "grad_norm": 25.75, "learning_rate": 1.9977368118596287e-05, "loss": 8.2833, "step": 22850 }, { "epoch": 0.06428914930775266, "grad_norm": 29.375, "learning_rate": 1.9977348311791632e-05, "loss": 8.1548, "step": 22860 }, { "epoch": 0.06431727229520137, "grad_norm": 27.25, "learning_rate": 1.997732849633341e-05, "loss": 8.2613, "step": 22870 }, { "epoch": 0.06434539528265008, "grad_norm": 25.25, "learning_rate": 1.9977308672221628e-05, "loss": 8.0939, "step": 22880 }, { "epoch": 0.0643735182700988, "grad_norm": 45.25, "learning_rate": 1.997728883945632e-05, "loss": 7.6367, "step": 22890 }, { "epoch": 0.0644016412575475, "grad_norm": 28.5, "learning_rate": 1.997726899803749e-05, "loss": 7.9615, "step": 22900 }, { "epoch": 0.06442976424499622, "grad_norm": 97.0, "learning_rate": 1.997724914796516e-05, "loss": 8.1091, "step": 22910 }, { "epoch": 0.06445788723244493, "grad_norm": 37.0, "learning_rate": 1.9977229289239348e-05, "loss": 7.8084, "step": 22920 }, { "epoch": 0.06448601021989364, "grad_norm": 24.875, "learning_rate": 1.9977209421860067e-05, "loss": 8.6558, "step": 22930 }, { "epoch": 0.06451413320734235, "grad_norm": 28.875, "learning_rate": 1.9977189545827342e-05, "loss": 8.0869, "step": 22940 }, { "epoch": 0.06454225619479106, "grad_norm": 28.75, "learning_rate": 1.9977169661141183e-05, "loss": 8.2552, "step": 22950 }, { "epoch": 0.06457037918223978, "grad_norm": 26.125, "learning_rate": 1.997714976780161e-05, "loss": 7.7331, "step": 22960 }, { "epoch": 0.06459850216968849, "grad_norm": 35.5, "learning_rate": 1.9977129865808637e-05, "loss": 7.1371, "step": 22970 }, { "epoch": 0.0646266251571372, "grad_norm": 30.0, "learning_rate": 1.9977109955162287e-05, "loss": 8.1656, "step": 22980 }, { "epoch": 0.0646547481445859, "grad_norm": 33.5, "learning_rate": 1.9977090035862577e-05, "loss": 7.9018, "step": 22990 }, { "epoch": 0.06468287113203461, "grad_norm": 21.0, "learning_rate": 1.9977070107909518e-05, "loss": 8.1548, "step": 23000 }, { "epoch": 0.06471099411948332, "grad_norm": 25.0, "learning_rate": 1.9977050171303132e-05, "loss": 7.8347, "step": 23010 }, { "epoch": 0.06473911710693203, "grad_norm": 58.5, "learning_rate": 1.9977030226043435e-05, "loss": 8.1935, "step": 23020 }, { "epoch": 0.06476724009438074, "grad_norm": 24.5, "learning_rate": 1.9977010272130445e-05, "loss": 8.4827, "step": 23030 }, { "epoch": 0.06479536308182945, "grad_norm": 26.875, "learning_rate": 1.9976990309564177e-05, "loss": 7.1383, "step": 23040 }, { "epoch": 0.06482348606927817, "grad_norm": 44.5, "learning_rate": 1.997697033834465e-05, "loss": 8.0067, "step": 23050 }, { "epoch": 0.06485160905672688, "grad_norm": 36.75, "learning_rate": 1.9976950358471884e-05, "loss": 7.9116, "step": 23060 }, { "epoch": 0.06487973204417559, "grad_norm": 23.375, "learning_rate": 1.9976930369945892e-05, "loss": 7.728, "step": 23070 }, { "epoch": 0.0649078550316243, "grad_norm": 31.375, "learning_rate": 1.9976910372766697e-05, "loss": 7.7307, "step": 23080 }, { "epoch": 0.06493597801907301, "grad_norm": 25.75, "learning_rate": 1.9976890366934308e-05, "loss": 7.9723, "step": 23090 }, { "epoch": 0.06496410100652172, "grad_norm": 31.0, "learning_rate": 1.997687035244875e-05, "loss": 8.0428, "step": 23100 }, { "epoch": 0.06499222399397044, "grad_norm": 30.75, "learning_rate": 1.9976850329310037e-05, "loss": 8.3067, "step": 23110 }, { "epoch": 0.06502034698141915, "grad_norm": 38.25, "learning_rate": 1.9976830297518185e-05, "loss": 7.6053, "step": 23120 }, { "epoch": 0.06504846996886786, "grad_norm": 33.75, "learning_rate": 1.9976810257073214e-05, "loss": 7.0956, "step": 23130 }, { "epoch": 0.06507659295631657, "grad_norm": 22.625, "learning_rate": 1.9976790207975138e-05, "loss": 8.2208, "step": 23140 }, { "epoch": 0.06510471594376527, "grad_norm": 27.25, "learning_rate": 1.997677015022398e-05, "loss": 7.7637, "step": 23150 }, { "epoch": 0.06513283893121398, "grad_norm": 32.75, "learning_rate": 1.9976750083819753e-05, "loss": 8.0825, "step": 23160 }, { "epoch": 0.06516096191866269, "grad_norm": 29.75, "learning_rate": 1.9976730008762475e-05, "loss": 7.8594, "step": 23170 }, { "epoch": 0.0651890849061114, "grad_norm": 29.5, "learning_rate": 1.9976709925052162e-05, "loss": 7.7548, "step": 23180 }, { "epoch": 0.06521720789356011, "grad_norm": 22.625, "learning_rate": 1.997668983268884e-05, "loss": 8.0494, "step": 23190 }, { "epoch": 0.06524533088100883, "grad_norm": 46.5, "learning_rate": 1.997666973167251e-05, "loss": 8.0951, "step": 23200 }, { "epoch": 0.06527345386845754, "grad_norm": 30.875, "learning_rate": 1.9976649622003207e-05, "loss": 7.6749, "step": 23210 }, { "epoch": 0.06530157685590625, "grad_norm": 28.25, "learning_rate": 1.9976629503680936e-05, "loss": 7.7256, "step": 23220 }, { "epoch": 0.06532969984335496, "grad_norm": 28.5, "learning_rate": 1.9976609376705725e-05, "loss": 8.0738, "step": 23230 }, { "epoch": 0.06535782283080367, "grad_norm": 41.75, "learning_rate": 1.9976589241077582e-05, "loss": 7.3333, "step": 23240 }, { "epoch": 0.06538594581825238, "grad_norm": 55.25, "learning_rate": 1.9976569096796526e-05, "loss": 7.4189, "step": 23250 }, { "epoch": 0.0654140688057011, "grad_norm": 33.0, "learning_rate": 1.997654894386258e-05, "loss": 8.2886, "step": 23260 }, { "epoch": 0.0654421917931498, "grad_norm": 26.75, "learning_rate": 1.9976528782275758e-05, "loss": 7.8644, "step": 23270 }, { "epoch": 0.06547031478059852, "grad_norm": 22.625, "learning_rate": 1.9976508612036076e-05, "loss": 7.9872, "step": 23280 }, { "epoch": 0.06549843776804723, "grad_norm": 32.5, "learning_rate": 1.9976488433143553e-05, "loss": 7.7647, "step": 23290 }, { "epoch": 0.06552656075549594, "grad_norm": 27.125, "learning_rate": 1.9976468245598206e-05, "loss": 7.6811, "step": 23300 }, { "epoch": 0.06555468374294464, "grad_norm": 38.75, "learning_rate": 1.9976448049400058e-05, "loss": 7.9701, "step": 23310 }, { "epoch": 0.06558280673039335, "grad_norm": 29.25, "learning_rate": 1.9976427844549112e-05, "loss": 8.1048, "step": 23320 }, { "epoch": 0.06561092971784206, "grad_norm": 25.75, "learning_rate": 1.9976407631045404e-05, "loss": 7.9037, "step": 23330 }, { "epoch": 0.06563905270529077, "grad_norm": 27.625, "learning_rate": 1.9976387408888938e-05, "loss": 7.1071, "step": 23340 }, { "epoch": 0.06566717569273949, "grad_norm": 22.0, "learning_rate": 1.9976367178079736e-05, "loss": 7.7824, "step": 23350 }, { "epoch": 0.0656952986801882, "grad_norm": 32.75, "learning_rate": 1.9976346938617815e-05, "loss": 8.4946, "step": 23360 }, { "epoch": 0.06572342166763691, "grad_norm": 22.875, "learning_rate": 1.9976326690503195e-05, "loss": 8.2183, "step": 23370 }, { "epoch": 0.06575154465508562, "grad_norm": 44.5, "learning_rate": 1.9976306433735896e-05, "loss": 8.5571, "step": 23380 }, { "epoch": 0.06577966764253433, "grad_norm": 25.875, "learning_rate": 1.9976286168315925e-05, "loss": 7.7022, "step": 23390 }, { "epoch": 0.06580779062998304, "grad_norm": 54.0, "learning_rate": 1.9976265894243306e-05, "loss": 7.8134, "step": 23400 }, { "epoch": 0.06583591361743175, "grad_norm": 34.5, "learning_rate": 1.9976245611518062e-05, "loss": 8.1709, "step": 23410 }, { "epoch": 0.06586403660488047, "grad_norm": 24.625, "learning_rate": 1.99762253201402e-05, "loss": 8.1647, "step": 23420 }, { "epoch": 0.06589215959232918, "grad_norm": 24.125, "learning_rate": 1.9976205020109745e-05, "loss": 8.4993, "step": 23430 }, { "epoch": 0.06592028257977789, "grad_norm": 20.75, "learning_rate": 1.9976184711426713e-05, "loss": 6.8959, "step": 23440 }, { "epoch": 0.0659484055672266, "grad_norm": 22.5, "learning_rate": 1.9976164394091117e-05, "loss": 8.0051, "step": 23450 }, { "epoch": 0.06597652855467531, "grad_norm": 25.5, "learning_rate": 1.997614406810298e-05, "loss": 7.8721, "step": 23460 }, { "epoch": 0.06600465154212401, "grad_norm": 38.25, "learning_rate": 1.997612373346232e-05, "loss": 8.0255, "step": 23470 }, { "epoch": 0.06603277452957272, "grad_norm": 52.25, "learning_rate": 1.9976103390169154e-05, "loss": 8.7649, "step": 23480 }, { "epoch": 0.06606089751702143, "grad_norm": 28.75, "learning_rate": 1.9976083038223497e-05, "loss": 7.916, "step": 23490 }, { "epoch": 0.06608902050447014, "grad_norm": 27.0, "learning_rate": 1.9976062677625365e-05, "loss": 8.2649, "step": 23500 }, { "epoch": 0.06611714349191886, "grad_norm": 29.125, "learning_rate": 1.9976042308374784e-05, "loss": 8.5381, "step": 23510 }, { "epoch": 0.06614526647936757, "grad_norm": 23.0, "learning_rate": 1.997602193047176e-05, "loss": 8.2512, "step": 23520 }, { "epoch": 0.06617338946681628, "grad_norm": 36.0, "learning_rate": 1.9976001543916325e-05, "loss": 8.253, "step": 23530 }, { "epoch": 0.06620151245426499, "grad_norm": 33.0, "learning_rate": 1.9975981148708483e-05, "loss": 9.0981, "step": 23540 }, { "epoch": 0.0662296354417137, "grad_norm": 27.125, "learning_rate": 1.997596074484826e-05, "loss": 7.0276, "step": 23550 }, { "epoch": 0.06625775842916241, "grad_norm": 29.0, "learning_rate": 1.997594033233567e-05, "loss": 7.4713, "step": 23560 }, { "epoch": 0.06628588141661113, "grad_norm": 27.25, "learning_rate": 1.9975919911170734e-05, "loss": 7.8457, "step": 23570 }, { "epoch": 0.06631400440405984, "grad_norm": 24.375, "learning_rate": 1.9975899481353466e-05, "loss": 7.6421, "step": 23580 }, { "epoch": 0.06634212739150855, "grad_norm": 27.125, "learning_rate": 1.9975879042883884e-05, "loss": 8.4034, "step": 23590 }, { "epoch": 0.06637025037895726, "grad_norm": 43.0, "learning_rate": 1.9975858595762004e-05, "loss": 8.0975, "step": 23600 }, { "epoch": 0.06639837336640597, "grad_norm": 29.75, "learning_rate": 1.9975838139987854e-05, "loss": 8.0296, "step": 23610 }, { "epoch": 0.06642649635385468, "grad_norm": 35.75, "learning_rate": 1.9975817675561443e-05, "loss": 8.2287, "step": 23620 }, { "epoch": 0.0664546193413034, "grad_norm": 26.0, "learning_rate": 1.9975797202482787e-05, "loss": 8.0322, "step": 23630 }, { "epoch": 0.06648274232875209, "grad_norm": 20.25, "learning_rate": 1.997577672075191e-05, "loss": 7.7976, "step": 23640 }, { "epoch": 0.0665108653162008, "grad_norm": 52.0, "learning_rate": 1.9975756230368827e-05, "loss": 8.0099, "step": 23650 }, { "epoch": 0.06653898830364952, "grad_norm": 38.25, "learning_rate": 1.997573573133355e-05, "loss": 8.3235, "step": 23660 }, { "epoch": 0.06656711129109823, "grad_norm": 39.25, "learning_rate": 1.9975715223646107e-05, "loss": 8.5622, "step": 23670 }, { "epoch": 0.06659523427854694, "grad_norm": 37.75, "learning_rate": 1.997569470730651e-05, "loss": 7.6335, "step": 23680 }, { "epoch": 0.06662335726599565, "grad_norm": 20.875, "learning_rate": 1.997567418231478e-05, "loss": 7.213, "step": 23690 }, { "epoch": 0.06665148025344436, "grad_norm": 27.625, "learning_rate": 1.997565364867093e-05, "loss": 7.1579, "step": 23700 }, { "epoch": 0.06667960324089307, "grad_norm": 34.75, "learning_rate": 1.997563310637498e-05, "loss": 8.0902, "step": 23710 }, { "epoch": 0.06670772622834178, "grad_norm": 29.5, "learning_rate": 1.9975612555426953e-05, "loss": 8.5122, "step": 23720 }, { "epoch": 0.0667358492157905, "grad_norm": 25.0, "learning_rate": 1.997559199582686e-05, "loss": 8.1568, "step": 23730 }, { "epoch": 0.06676397220323921, "grad_norm": 29.5, "learning_rate": 1.9975571427574717e-05, "loss": 7.5884, "step": 23740 }, { "epoch": 0.06679209519068792, "grad_norm": 26.875, "learning_rate": 1.997555085067055e-05, "loss": 8.1614, "step": 23750 }, { "epoch": 0.06682021817813663, "grad_norm": 25.25, "learning_rate": 1.997553026511437e-05, "loss": 7.8148, "step": 23760 }, { "epoch": 0.06684834116558534, "grad_norm": 37.75, "learning_rate": 1.9975509670906202e-05, "loss": 8.2944, "step": 23770 }, { "epoch": 0.06687646415303405, "grad_norm": 22.75, "learning_rate": 1.9975489068046055e-05, "loss": 7.892, "step": 23780 }, { "epoch": 0.06690458714048277, "grad_norm": 28.75, "learning_rate": 1.997546845653395e-05, "loss": 8.2363, "step": 23790 }, { "epoch": 0.06693271012793146, "grad_norm": 40.25, "learning_rate": 1.997544783636991e-05, "loss": 8.4571, "step": 23800 }, { "epoch": 0.06696083311538018, "grad_norm": 26.5, "learning_rate": 1.997542720755395e-05, "loss": 7.9693, "step": 23810 }, { "epoch": 0.06698895610282889, "grad_norm": 30.5, "learning_rate": 1.9975406570086084e-05, "loss": 8.017, "step": 23820 }, { "epoch": 0.0670170790902776, "grad_norm": 27.5, "learning_rate": 1.9975385923966335e-05, "loss": 7.9965, "step": 23830 }, { "epoch": 0.06704520207772631, "grad_norm": 30.625, "learning_rate": 1.9975365269194714e-05, "loss": 7.7165, "step": 23840 }, { "epoch": 0.06707332506517502, "grad_norm": 23.875, "learning_rate": 1.997534460577125e-05, "loss": 7.7689, "step": 23850 }, { "epoch": 0.06710144805262373, "grad_norm": 28.125, "learning_rate": 1.9975323933695952e-05, "loss": 7.9831, "step": 23860 }, { "epoch": 0.06712957104007244, "grad_norm": 24.125, "learning_rate": 1.997530325296884e-05, "loss": 7.7716, "step": 23870 }, { "epoch": 0.06715769402752116, "grad_norm": 26.25, "learning_rate": 1.9975282563589932e-05, "loss": 8.2295, "step": 23880 }, { "epoch": 0.06718581701496987, "grad_norm": 32.5, "learning_rate": 1.9975261865559247e-05, "loss": 7.994, "step": 23890 }, { "epoch": 0.06721394000241858, "grad_norm": 42.0, "learning_rate": 1.9975241158876805e-05, "loss": 8.3927, "step": 23900 }, { "epoch": 0.06724206298986729, "grad_norm": 35.0, "learning_rate": 1.9975220443542616e-05, "loss": 7.6007, "step": 23910 }, { "epoch": 0.067270185977316, "grad_norm": 27.0, "learning_rate": 1.9975199719556707e-05, "loss": 8.5861, "step": 23920 }, { "epoch": 0.06729830896476471, "grad_norm": 31.0, "learning_rate": 1.9975178986919088e-05, "loss": 8.6574, "step": 23930 }, { "epoch": 0.06732643195221343, "grad_norm": 36.0, "learning_rate": 1.9975158245629783e-05, "loss": 8.2572, "step": 23940 }, { "epoch": 0.06735455493966214, "grad_norm": 34.0, "learning_rate": 1.9975137495688813e-05, "loss": 8.3509, "step": 23950 }, { "epoch": 0.06738267792711083, "grad_norm": 22.875, "learning_rate": 1.9975116737096187e-05, "loss": 8.2402, "step": 23960 }, { "epoch": 0.06741080091455955, "grad_norm": 27.125, "learning_rate": 1.9975095969851926e-05, "loss": 8.2014, "step": 23970 }, { "epoch": 0.06743892390200826, "grad_norm": 26.75, "learning_rate": 1.9975075193956053e-05, "loss": 7.5507, "step": 23980 }, { "epoch": 0.06746704688945697, "grad_norm": 44.0, "learning_rate": 1.997505440940858e-05, "loss": 8.6654, "step": 23990 }, { "epoch": 0.06749516987690568, "grad_norm": 53.75, "learning_rate": 1.9975033616209528e-05, "loss": 7.841, "step": 24000 }, { "epoch": 0.06752329286435439, "grad_norm": 28.25, "learning_rate": 1.9975012814358913e-05, "loss": 7.6185, "step": 24010 }, { "epoch": 0.0675514158518031, "grad_norm": 29.125, "learning_rate": 1.9974992003856757e-05, "loss": 8.4279, "step": 24020 }, { "epoch": 0.06757953883925182, "grad_norm": 23.25, "learning_rate": 1.997497118470307e-05, "loss": 8.2732, "step": 24030 }, { "epoch": 0.06760766182670053, "grad_norm": 32.0, "learning_rate": 1.997495035689788e-05, "loss": 7.9184, "step": 24040 }, { "epoch": 0.06763578481414924, "grad_norm": 50.75, "learning_rate": 1.99749295204412e-05, "loss": 8.6721, "step": 24050 }, { "epoch": 0.06766390780159795, "grad_norm": 32.25, "learning_rate": 1.997490867533305e-05, "loss": 7.8413, "step": 24060 }, { "epoch": 0.06769203078904666, "grad_norm": 25.375, "learning_rate": 1.997488782157344e-05, "loss": 7.4932, "step": 24070 }, { "epoch": 0.06772015377649537, "grad_norm": 30.75, "learning_rate": 1.99748669591624e-05, "loss": 7.2798, "step": 24080 }, { "epoch": 0.06774827676394408, "grad_norm": 32.75, "learning_rate": 1.9974846088099944e-05, "loss": 7.3896, "step": 24090 }, { "epoch": 0.0677763997513928, "grad_norm": 34.25, "learning_rate": 1.9974825208386086e-05, "loss": 7.7981, "step": 24100 }, { "epoch": 0.06780452273884151, "grad_norm": 49.75, "learning_rate": 1.997480432002085e-05, "loss": 8.5522, "step": 24110 }, { "epoch": 0.0678326457262902, "grad_norm": 31.75, "learning_rate": 1.997478342300425e-05, "loss": 8.1082, "step": 24120 }, { "epoch": 0.06786076871373892, "grad_norm": 31.625, "learning_rate": 1.9974762517336303e-05, "loss": 7.6424, "step": 24130 }, { "epoch": 0.06788889170118763, "grad_norm": 20.25, "learning_rate": 1.9974741603017032e-05, "loss": 8.1578, "step": 24140 }, { "epoch": 0.06791701468863634, "grad_norm": 35.75, "learning_rate": 1.997472068004645e-05, "loss": 7.946, "step": 24150 }, { "epoch": 0.06794513767608505, "grad_norm": 33.75, "learning_rate": 1.997469974842458e-05, "loss": 7.9592, "step": 24160 }, { "epoch": 0.06797326066353376, "grad_norm": 29.0, "learning_rate": 1.9974678808151438e-05, "loss": 7.9576, "step": 24170 }, { "epoch": 0.06800138365098247, "grad_norm": 33.75, "learning_rate": 1.997465785922704e-05, "loss": 7.9868, "step": 24180 }, { "epoch": 0.06802950663843119, "grad_norm": 26.0, "learning_rate": 1.997463690165141e-05, "loss": 7.6569, "step": 24190 }, { "epoch": 0.0680576296258799, "grad_norm": 27.5, "learning_rate": 1.9974615935424557e-05, "loss": 8.1518, "step": 24200 }, { "epoch": 0.06808575261332861, "grad_norm": 37.0, "learning_rate": 1.997459496054651e-05, "loss": 8.8376, "step": 24210 }, { "epoch": 0.06811387560077732, "grad_norm": 30.375, "learning_rate": 1.9974573977017278e-05, "loss": 8.1753, "step": 24220 }, { "epoch": 0.06814199858822603, "grad_norm": 26.25, "learning_rate": 1.9974552984836882e-05, "loss": 7.8068, "step": 24230 }, { "epoch": 0.06817012157567474, "grad_norm": 51.75, "learning_rate": 1.9974531984005346e-05, "loss": 7.5078, "step": 24240 }, { "epoch": 0.06819824456312346, "grad_norm": 34.25, "learning_rate": 1.9974510974522682e-05, "loss": 7.6289, "step": 24250 }, { "epoch": 0.06822636755057217, "grad_norm": 20.5, "learning_rate": 1.9974489956388905e-05, "loss": 7.7869, "step": 24260 }, { "epoch": 0.06825449053802088, "grad_norm": 27.75, "learning_rate": 1.9974468929604042e-05, "loss": 7.4938, "step": 24270 }, { "epoch": 0.06828261352546959, "grad_norm": 23.125, "learning_rate": 1.9974447894168106e-05, "loss": 7.9507, "step": 24280 }, { "epoch": 0.06831073651291829, "grad_norm": 27.125, "learning_rate": 1.9974426850081118e-05, "loss": 7.9369, "step": 24290 }, { "epoch": 0.068338859500367, "grad_norm": 21.75, "learning_rate": 1.997440579734309e-05, "loss": 7.1179, "step": 24300 }, { "epoch": 0.06836698248781571, "grad_norm": 22.25, "learning_rate": 1.9974384735954045e-05, "loss": 7.6934, "step": 24310 }, { "epoch": 0.06839510547526442, "grad_norm": 52.75, "learning_rate": 1.9974363665914005e-05, "loss": 6.7528, "step": 24320 }, { "epoch": 0.06842322846271313, "grad_norm": 29.625, "learning_rate": 1.9974342587222986e-05, "loss": 7.7815, "step": 24330 }, { "epoch": 0.06845135145016185, "grad_norm": 25.875, "learning_rate": 1.9974321499881e-05, "loss": 8.4692, "step": 24340 }, { "epoch": 0.06847947443761056, "grad_norm": 32.5, "learning_rate": 1.997430040388807e-05, "loss": 8.1893, "step": 24350 }, { "epoch": 0.06850759742505927, "grad_norm": 34.5, "learning_rate": 1.9974279299244216e-05, "loss": 7.8831, "step": 24360 }, { "epoch": 0.06853572041250798, "grad_norm": 23.125, "learning_rate": 1.9974258185949456e-05, "loss": 7.4445, "step": 24370 }, { "epoch": 0.06856384339995669, "grad_norm": 25.0, "learning_rate": 1.9974237064003806e-05, "loss": 8.2006, "step": 24380 }, { "epoch": 0.0685919663874054, "grad_norm": 26.25, "learning_rate": 1.9974215933407282e-05, "loss": 8.0715, "step": 24390 }, { "epoch": 0.06862008937485412, "grad_norm": 26.0, "learning_rate": 1.997419479415991e-05, "loss": 8.259, "step": 24400 }, { "epoch": 0.06864821236230283, "grad_norm": 42.5, "learning_rate": 1.9974173646261702e-05, "loss": 8.6407, "step": 24410 }, { "epoch": 0.06867633534975154, "grad_norm": 22.625, "learning_rate": 1.9974152489712676e-05, "loss": 7.4938, "step": 24420 }, { "epoch": 0.06870445833720025, "grad_norm": 35.5, "learning_rate": 1.9974131324512854e-05, "loss": 8.3035, "step": 24430 }, { "epoch": 0.06873258132464896, "grad_norm": 24.75, "learning_rate": 1.9974110150662255e-05, "loss": 8.2383, "step": 24440 }, { "epoch": 0.06876070431209766, "grad_norm": 26.875, "learning_rate": 1.997408896816089e-05, "loss": 8.6597, "step": 24450 }, { "epoch": 0.06878882729954637, "grad_norm": 21.75, "learning_rate": 1.9974067777008786e-05, "loss": 7.4632, "step": 24460 }, { "epoch": 0.06881695028699508, "grad_norm": 24.25, "learning_rate": 1.997404657720596e-05, "loss": 8.5503, "step": 24470 }, { "epoch": 0.0688450732744438, "grad_norm": 25.5, "learning_rate": 1.9974025368752426e-05, "loss": 7.443, "step": 24480 }, { "epoch": 0.0688731962618925, "grad_norm": 31.375, "learning_rate": 1.9974004151648208e-05, "loss": 7.4162, "step": 24490 }, { "epoch": 0.06890131924934122, "grad_norm": 26.0, "learning_rate": 1.9973982925893317e-05, "loss": 7.9443, "step": 24500 }, { "epoch": 0.06892944223678993, "grad_norm": 31.5, "learning_rate": 1.9973961691487776e-05, "loss": 7.5871, "step": 24510 }, { "epoch": 0.06895756522423864, "grad_norm": 26.625, "learning_rate": 1.9973940448431604e-05, "loss": 8.2074, "step": 24520 }, { "epoch": 0.06898568821168735, "grad_norm": 25.625, "learning_rate": 1.997391919672482e-05, "loss": 8.0772, "step": 24530 }, { "epoch": 0.06901381119913606, "grad_norm": 29.25, "learning_rate": 1.9973897936367436e-05, "loss": 8.0236, "step": 24540 }, { "epoch": 0.06904193418658477, "grad_norm": 23.375, "learning_rate": 1.997387666735948e-05, "loss": 7.7054, "step": 24550 }, { "epoch": 0.06907005717403349, "grad_norm": 33.25, "learning_rate": 1.9973855389700966e-05, "loss": 7.3754, "step": 24560 }, { "epoch": 0.0690981801614822, "grad_norm": 69.5, "learning_rate": 1.997383410339191e-05, "loss": 7.9906, "step": 24570 }, { "epoch": 0.06912630314893091, "grad_norm": 35.75, "learning_rate": 1.9973812808432332e-05, "loss": 8.6023, "step": 24580 }, { "epoch": 0.06915442613637962, "grad_norm": 64.5, "learning_rate": 1.9973791504822252e-05, "loss": 8.0812, "step": 24590 }, { "epoch": 0.06918254912382833, "grad_norm": 29.75, "learning_rate": 1.9973770192561688e-05, "loss": 8.6807, "step": 24600 }, { "epoch": 0.06921067211127703, "grad_norm": 40.75, "learning_rate": 1.997374887165066e-05, "loss": 8.3598, "step": 24610 }, { "epoch": 0.06923879509872574, "grad_norm": 32.75, "learning_rate": 1.997372754208918e-05, "loss": 7.3308, "step": 24620 }, { "epoch": 0.06926691808617445, "grad_norm": 23.625, "learning_rate": 1.997370620387727e-05, "loss": 7.3374, "step": 24630 }, { "epoch": 0.06929504107362316, "grad_norm": 55.25, "learning_rate": 1.9973684857014957e-05, "loss": 8.8595, "step": 24640 }, { "epoch": 0.06932316406107188, "grad_norm": 35.75, "learning_rate": 1.997366350150225e-05, "loss": 7.8599, "step": 24650 }, { "epoch": 0.06935128704852059, "grad_norm": 42.25, "learning_rate": 1.9973642137339165e-05, "loss": 8.3071, "step": 24660 }, { "epoch": 0.0693794100359693, "grad_norm": 55.25, "learning_rate": 1.9973620764525727e-05, "loss": 7.1628, "step": 24670 }, { "epoch": 0.06940753302341801, "grad_norm": 24.125, "learning_rate": 1.9973599383061955e-05, "loss": 7.7399, "step": 24680 }, { "epoch": 0.06943565601086672, "grad_norm": 24.625, "learning_rate": 1.9973577992947862e-05, "loss": 8.583, "step": 24690 }, { "epoch": 0.06946377899831543, "grad_norm": 20.0, "learning_rate": 1.9973556594183473e-05, "loss": 7.7831, "step": 24700 }, { "epoch": 0.06949190198576415, "grad_norm": 58.0, "learning_rate": 1.9973535186768803e-05, "loss": 7.9116, "step": 24710 }, { "epoch": 0.06952002497321286, "grad_norm": 39.75, "learning_rate": 1.9973513770703867e-05, "loss": 8.5319, "step": 24720 }, { "epoch": 0.06954814796066157, "grad_norm": 28.25, "learning_rate": 1.9973492345988692e-05, "loss": 7.0798, "step": 24730 }, { "epoch": 0.06957627094811028, "grad_norm": 22.0, "learning_rate": 1.9973470912623288e-05, "loss": 7.8794, "step": 24740 }, { "epoch": 0.06960439393555899, "grad_norm": 30.25, "learning_rate": 1.9973449470607682e-05, "loss": 8.0186, "step": 24750 }, { "epoch": 0.0696325169230077, "grad_norm": 32.75, "learning_rate": 1.9973428019941887e-05, "loss": 7.9679, "step": 24760 }, { "epoch": 0.0696606399104564, "grad_norm": 30.125, "learning_rate": 1.9973406560625924e-05, "loss": 8.7424, "step": 24770 }, { "epoch": 0.06968876289790511, "grad_norm": 47.0, "learning_rate": 1.9973385092659807e-05, "loss": 8.1056, "step": 24780 }, { "epoch": 0.06971688588535382, "grad_norm": 25.0, "learning_rate": 1.997336361604356e-05, "loss": 8.0202, "step": 24790 }, { "epoch": 0.06974500887280254, "grad_norm": 27.125, "learning_rate": 1.99733421307772e-05, "loss": 7.5639, "step": 24800 }, { "epoch": 0.06977313186025125, "grad_norm": 45.0, "learning_rate": 1.9973320636860744e-05, "loss": 7.6013, "step": 24810 }, { "epoch": 0.06980125484769996, "grad_norm": 37.75, "learning_rate": 1.9973299134294212e-05, "loss": 8.8776, "step": 24820 }, { "epoch": 0.06982937783514867, "grad_norm": 33.75, "learning_rate": 1.9973277623077623e-05, "loss": 7.1823, "step": 24830 }, { "epoch": 0.06985750082259738, "grad_norm": 24.75, "learning_rate": 1.9973256103210995e-05, "loss": 7.9224, "step": 24840 }, { "epoch": 0.0698856238100461, "grad_norm": 23.0, "learning_rate": 1.997323457469435e-05, "loss": 8.2273, "step": 24850 }, { "epoch": 0.0699137467974948, "grad_norm": 36.0, "learning_rate": 1.9973213037527703e-05, "loss": 6.5648, "step": 24860 }, { "epoch": 0.06994186978494352, "grad_norm": 28.75, "learning_rate": 1.9973191491711068e-05, "loss": 8.4503, "step": 24870 }, { "epoch": 0.06996999277239223, "grad_norm": 42.5, "learning_rate": 1.9973169937244476e-05, "loss": 8.266, "step": 24880 }, { "epoch": 0.06999811575984094, "grad_norm": 40.75, "learning_rate": 1.9973148374127932e-05, "loss": 7.3302, "step": 24890 }, { "epoch": 0.07002623874728965, "grad_norm": 32.5, "learning_rate": 1.9973126802361468e-05, "loss": 7.5562, "step": 24900 }, { "epoch": 0.07005436173473836, "grad_norm": 33.75, "learning_rate": 1.997310522194509e-05, "loss": 7.698, "step": 24910 }, { "epoch": 0.07008248472218707, "grad_norm": 28.875, "learning_rate": 1.9973083632878827e-05, "loss": 7.8756, "step": 24920 }, { "epoch": 0.07011060770963579, "grad_norm": 35.25, "learning_rate": 1.9973062035162692e-05, "loss": 8.3029, "step": 24930 }, { "epoch": 0.07013873069708448, "grad_norm": 36.0, "learning_rate": 1.9973040428796704e-05, "loss": 8.2363, "step": 24940 }, { "epoch": 0.0701668536845332, "grad_norm": 33.75, "learning_rate": 1.9973018813780886e-05, "loss": 7.9192, "step": 24950 }, { "epoch": 0.0701949766719819, "grad_norm": 23.75, "learning_rate": 1.997299719011525e-05, "loss": 7.7031, "step": 24960 }, { "epoch": 0.07022309965943062, "grad_norm": 56.75, "learning_rate": 1.9972975557799824e-05, "loss": 8.0672, "step": 24970 }, { "epoch": 0.07025122264687933, "grad_norm": 24.375, "learning_rate": 1.9972953916834616e-05, "loss": 7.9685, "step": 24980 }, { "epoch": 0.07027934563432804, "grad_norm": 33.75, "learning_rate": 1.9972932267219654e-05, "loss": 7.5474, "step": 24990 }, { "epoch": 0.07030746862177675, "grad_norm": 42.25, "learning_rate": 1.997291060895495e-05, "loss": 7.2416, "step": 25000 }, { "epoch": 0.07033559160922546, "grad_norm": 28.375, "learning_rate": 1.9972888942040527e-05, "loss": 8.4164, "step": 25010 }, { "epoch": 0.07036371459667418, "grad_norm": 40.5, "learning_rate": 1.9972867266476402e-05, "loss": 7.7495, "step": 25020 }, { "epoch": 0.07039183758412289, "grad_norm": 32.75, "learning_rate": 1.9972845582262594e-05, "loss": 8.6208, "step": 25030 }, { "epoch": 0.0704199605715716, "grad_norm": 42.5, "learning_rate": 1.9972823889399122e-05, "loss": 7.58, "step": 25040 }, { "epoch": 0.07044808355902031, "grad_norm": 23.625, "learning_rate": 1.9972802187886007e-05, "loss": 7.3101, "step": 25050 }, { "epoch": 0.07047620654646902, "grad_norm": 37.75, "learning_rate": 1.9972780477723263e-05, "loss": 8.8442, "step": 25060 }, { "epoch": 0.07050432953391773, "grad_norm": 26.25, "learning_rate": 1.997275875891091e-05, "loss": 7.3642, "step": 25070 }, { "epoch": 0.07053245252136645, "grad_norm": 37.25, "learning_rate": 1.9972737031448972e-05, "loss": 8.5764, "step": 25080 }, { "epoch": 0.07056057550881516, "grad_norm": 26.375, "learning_rate": 1.9972715295337465e-05, "loss": 8.2793, "step": 25090 }, { "epoch": 0.07058869849626385, "grad_norm": 39.5, "learning_rate": 1.9972693550576403e-05, "loss": 8.0837, "step": 25100 }, { "epoch": 0.07061682148371257, "grad_norm": 28.625, "learning_rate": 1.997267179716581e-05, "loss": 8.0924, "step": 25110 }, { "epoch": 0.07064494447116128, "grad_norm": 36.25, "learning_rate": 1.9972650035105703e-05, "loss": 8.1165, "step": 25120 }, { "epoch": 0.07067306745860999, "grad_norm": 22.125, "learning_rate": 1.9972628264396102e-05, "loss": 8.1356, "step": 25130 }, { "epoch": 0.0707011904460587, "grad_norm": 29.625, "learning_rate": 1.9972606485037028e-05, "loss": 8.1969, "step": 25140 }, { "epoch": 0.07072931343350741, "grad_norm": 41.75, "learning_rate": 1.9972584697028497e-05, "loss": 8.1606, "step": 25150 }, { "epoch": 0.07075743642095612, "grad_norm": 21.0, "learning_rate": 1.9972562900370526e-05, "loss": 8.3056, "step": 25160 }, { "epoch": 0.07078555940840484, "grad_norm": 45.5, "learning_rate": 1.997254109506314e-05, "loss": 8.3785, "step": 25170 }, { "epoch": 0.07081368239585355, "grad_norm": 29.125, "learning_rate": 1.997251928110635e-05, "loss": 7.9498, "step": 25180 }, { "epoch": 0.07084180538330226, "grad_norm": 28.375, "learning_rate": 1.997249745850018e-05, "loss": 7.8461, "step": 25190 }, { "epoch": 0.07086992837075097, "grad_norm": 31.125, "learning_rate": 1.997247562724465e-05, "loss": 7.0328, "step": 25200 }, { "epoch": 0.07089805135819968, "grad_norm": 42.0, "learning_rate": 1.9972453787339775e-05, "loss": 7.8984, "step": 25210 }, { "epoch": 0.0709261743456484, "grad_norm": 28.125, "learning_rate": 1.9972431938785576e-05, "loss": 7.9376, "step": 25220 }, { "epoch": 0.0709542973330971, "grad_norm": 36.5, "learning_rate": 1.997241008158207e-05, "loss": 8.0867, "step": 25230 }, { "epoch": 0.07098242032054582, "grad_norm": 21.625, "learning_rate": 1.9972388215729282e-05, "loss": 8.2967, "step": 25240 }, { "epoch": 0.07101054330799453, "grad_norm": 24.875, "learning_rate": 1.9972366341227222e-05, "loss": 7.565, "step": 25250 }, { "epoch": 0.07103866629544323, "grad_norm": 29.25, "learning_rate": 1.9972344458075915e-05, "loss": 8.3284, "step": 25260 }, { "epoch": 0.07106678928289194, "grad_norm": 28.75, "learning_rate": 1.997232256627538e-05, "loss": 7.8378, "step": 25270 }, { "epoch": 0.07109491227034065, "grad_norm": 37.25, "learning_rate": 1.9972300665825636e-05, "loss": 8.1903, "step": 25280 }, { "epoch": 0.07112303525778936, "grad_norm": 32.5, "learning_rate": 1.99722787567267e-05, "loss": 7.521, "step": 25290 }, { "epoch": 0.07115115824523807, "grad_norm": 51.5, "learning_rate": 1.997225683897859e-05, "loss": 7.5105, "step": 25300 }, { "epoch": 0.07117928123268678, "grad_norm": 50.0, "learning_rate": 1.997223491258133e-05, "loss": 8.3483, "step": 25310 }, { "epoch": 0.0712074042201355, "grad_norm": 24.875, "learning_rate": 1.9972212977534932e-05, "loss": 7.9997, "step": 25320 }, { "epoch": 0.0712355272075842, "grad_norm": 32.5, "learning_rate": 1.9972191033839417e-05, "loss": 8.0054, "step": 25330 }, { "epoch": 0.07126365019503292, "grad_norm": 39.0, "learning_rate": 1.997216908149481e-05, "loss": 8.231, "step": 25340 }, { "epoch": 0.07129177318248163, "grad_norm": 41.25, "learning_rate": 1.9972147120501123e-05, "loss": 8.2103, "step": 25350 }, { "epoch": 0.07131989616993034, "grad_norm": 25.25, "learning_rate": 1.997212515085838e-05, "loss": 8.1515, "step": 25360 }, { "epoch": 0.07134801915737905, "grad_norm": 32.75, "learning_rate": 1.9972103172566598e-05, "loss": 8.2716, "step": 25370 }, { "epoch": 0.07137614214482776, "grad_norm": 26.25, "learning_rate": 1.9972081185625793e-05, "loss": 7.4486, "step": 25380 }, { "epoch": 0.07140426513227648, "grad_norm": 24.75, "learning_rate": 1.9972059190035988e-05, "loss": 8.3785, "step": 25390 }, { "epoch": 0.07143238811972519, "grad_norm": 31.375, "learning_rate": 1.99720371857972e-05, "loss": 7.3092, "step": 25400 }, { "epoch": 0.0714605111071739, "grad_norm": 33.5, "learning_rate": 1.9972015172909452e-05, "loss": 8.2852, "step": 25410 }, { "epoch": 0.0714886340946226, "grad_norm": 49.25, "learning_rate": 1.9971993151372762e-05, "loss": 7.9641, "step": 25420 }, { "epoch": 0.07151675708207131, "grad_norm": 28.75, "learning_rate": 1.9971971121187144e-05, "loss": 8.8815, "step": 25430 }, { "epoch": 0.07154488006952002, "grad_norm": 25.75, "learning_rate": 1.997194908235262e-05, "loss": 7.9302, "step": 25440 }, { "epoch": 0.07157300305696873, "grad_norm": 27.5, "learning_rate": 1.997192703486921e-05, "loss": 7.878, "step": 25450 }, { "epoch": 0.07160112604441744, "grad_norm": 30.75, "learning_rate": 1.9971904978736933e-05, "loss": 8.4253, "step": 25460 }, { "epoch": 0.07162924903186615, "grad_norm": 38.25, "learning_rate": 1.997188291395581e-05, "loss": 8.1017, "step": 25470 }, { "epoch": 0.07165737201931487, "grad_norm": 32.5, "learning_rate": 1.9971860840525855e-05, "loss": 7.901, "step": 25480 }, { "epoch": 0.07168549500676358, "grad_norm": 49.25, "learning_rate": 1.9971838758447092e-05, "loss": 8.2286, "step": 25490 }, { "epoch": 0.07171361799421229, "grad_norm": 28.375, "learning_rate": 1.9971816667719536e-05, "loss": 8.9143, "step": 25500 }, { "epoch": 0.071741740981661, "grad_norm": 30.375, "learning_rate": 1.997179456834321e-05, "loss": 7.7938, "step": 25510 }, { "epoch": 0.07176986396910971, "grad_norm": 23.25, "learning_rate": 1.997177246031813e-05, "loss": 7.9066, "step": 25520 }, { "epoch": 0.07179798695655842, "grad_norm": 25.75, "learning_rate": 1.9971750343644317e-05, "loss": 8.3859, "step": 25530 }, { "epoch": 0.07182610994400714, "grad_norm": 27.125, "learning_rate": 1.9971728218321794e-05, "loss": 8.1234, "step": 25540 }, { "epoch": 0.07185423293145585, "grad_norm": 30.75, "learning_rate": 1.9971706084350574e-05, "loss": 8.2773, "step": 25550 }, { "epoch": 0.07188235591890456, "grad_norm": 36.75, "learning_rate": 1.9971683941730676e-05, "loss": 7.9438, "step": 25560 }, { "epoch": 0.07191047890635327, "grad_norm": 31.0, "learning_rate": 1.997166179046212e-05, "loss": 7.5717, "step": 25570 }, { "epoch": 0.07193860189380198, "grad_norm": 48.5, "learning_rate": 1.9971639630544933e-05, "loss": 8.0687, "step": 25580 }, { "epoch": 0.07196672488125068, "grad_norm": 43.0, "learning_rate": 1.997161746197913e-05, "loss": 8.1877, "step": 25590 }, { "epoch": 0.07199484786869939, "grad_norm": 34.25, "learning_rate": 1.997159528476472e-05, "loss": 7.9821, "step": 25600 }, { "epoch": 0.0720229708561481, "grad_norm": 22.125, "learning_rate": 1.9971573098901732e-05, "loss": 7.8202, "step": 25610 }, { "epoch": 0.07205109384359681, "grad_norm": 36.5, "learning_rate": 1.9971550904390183e-05, "loss": 8.0715, "step": 25620 }, { "epoch": 0.07207921683104553, "grad_norm": 23.125, "learning_rate": 1.9971528701230096e-05, "loss": 7.7052, "step": 25630 }, { "epoch": 0.07210733981849424, "grad_norm": 26.75, "learning_rate": 1.9971506489421485e-05, "loss": 7.7468, "step": 25640 }, { "epoch": 0.07213546280594295, "grad_norm": 29.75, "learning_rate": 1.9971484268964375e-05, "loss": 8.5877, "step": 25650 }, { "epoch": 0.07216358579339166, "grad_norm": 21.625, "learning_rate": 1.997146203985878e-05, "loss": 7.6238, "step": 25660 }, { "epoch": 0.07219170878084037, "grad_norm": 38.25, "learning_rate": 1.997143980210472e-05, "loss": 7.8328, "step": 25670 }, { "epoch": 0.07221983176828908, "grad_norm": 29.625, "learning_rate": 1.9971417555702216e-05, "loss": 7.8446, "step": 25680 }, { "epoch": 0.0722479547557378, "grad_norm": 28.75, "learning_rate": 1.9971395300651285e-05, "loss": 7.0579, "step": 25690 }, { "epoch": 0.0722760777431865, "grad_norm": 27.875, "learning_rate": 1.9971373036951947e-05, "loss": 7.6562, "step": 25700 }, { "epoch": 0.07230420073063522, "grad_norm": 24.375, "learning_rate": 1.9971350764604227e-05, "loss": 8.8901, "step": 25710 }, { "epoch": 0.07233232371808393, "grad_norm": 40.75, "learning_rate": 1.9971328483608138e-05, "loss": 7.5222, "step": 25720 }, { "epoch": 0.07236044670553264, "grad_norm": 34.25, "learning_rate": 1.9971306193963697e-05, "loss": 8.1287, "step": 25730 }, { "epoch": 0.07238856969298135, "grad_norm": 30.375, "learning_rate": 1.997128389567093e-05, "loss": 8.1927, "step": 25740 }, { "epoch": 0.07241669268043005, "grad_norm": 40.75, "learning_rate": 1.9971261588729853e-05, "loss": 8.3004, "step": 25750 }, { "epoch": 0.07244481566787876, "grad_norm": 23.375, "learning_rate": 1.9971239273140485e-05, "loss": 7.6001, "step": 25760 }, { "epoch": 0.07247293865532747, "grad_norm": 38.5, "learning_rate": 1.997121694890285e-05, "loss": 7.216, "step": 25770 }, { "epoch": 0.07250106164277619, "grad_norm": 24.625, "learning_rate": 1.997119461601696e-05, "loss": 7.666, "step": 25780 }, { "epoch": 0.0725291846302249, "grad_norm": 28.0, "learning_rate": 1.997117227448284e-05, "loss": 8.5669, "step": 25790 }, { "epoch": 0.07255730761767361, "grad_norm": 39.0, "learning_rate": 1.9971149924300506e-05, "loss": 7.8265, "step": 25800 }, { "epoch": 0.07258543060512232, "grad_norm": 47.75, "learning_rate": 1.997112756546998e-05, "loss": 8.1448, "step": 25810 }, { "epoch": 0.07261355359257103, "grad_norm": 35.0, "learning_rate": 1.9971105197991278e-05, "loss": 8.3019, "step": 25820 }, { "epoch": 0.07264167658001974, "grad_norm": 78.0, "learning_rate": 1.9971082821864424e-05, "loss": 7.1803, "step": 25830 }, { "epoch": 0.07266979956746845, "grad_norm": 25.125, "learning_rate": 1.997106043708943e-05, "loss": 7.9368, "step": 25840 }, { "epoch": 0.07269792255491717, "grad_norm": 36.5, "learning_rate": 1.9971038043666323e-05, "loss": 8.7768, "step": 25850 }, { "epoch": 0.07272604554236588, "grad_norm": 31.375, "learning_rate": 1.997101564159512e-05, "loss": 8.032, "step": 25860 }, { "epoch": 0.07275416852981459, "grad_norm": 42.0, "learning_rate": 1.9970993230875845e-05, "loss": 7.9507, "step": 25870 }, { "epoch": 0.0727822915172633, "grad_norm": 29.5, "learning_rate": 1.9970970811508504e-05, "loss": 7.6736, "step": 25880 }, { "epoch": 0.07281041450471201, "grad_norm": 45.75, "learning_rate": 1.997094838349313e-05, "loss": 8.3579, "step": 25890 }, { "epoch": 0.07283853749216072, "grad_norm": 20.75, "learning_rate": 1.997092594682974e-05, "loss": 8.0961, "step": 25900 }, { "epoch": 0.07286666047960942, "grad_norm": 32.0, "learning_rate": 1.9970903501518342e-05, "loss": 7.8379, "step": 25910 }, { "epoch": 0.07289478346705813, "grad_norm": 30.625, "learning_rate": 1.9970881047558972e-05, "loss": 8.0047, "step": 25920 }, { "epoch": 0.07292290645450684, "grad_norm": 40.25, "learning_rate": 1.9970858584951643e-05, "loss": 7.9365, "step": 25930 }, { "epoch": 0.07295102944195556, "grad_norm": 23.5, "learning_rate": 1.997083611369637e-05, "loss": 8.2069, "step": 25940 }, { "epoch": 0.07297915242940427, "grad_norm": 23.625, "learning_rate": 1.9970813633793176e-05, "loss": 7.8206, "step": 25950 }, { "epoch": 0.07300727541685298, "grad_norm": 28.375, "learning_rate": 1.997079114524208e-05, "loss": 8.1541, "step": 25960 }, { "epoch": 0.07303539840430169, "grad_norm": 22.625, "learning_rate": 1.9970768648043103e-05, "loss": 8.0545, "step": 25970 }, { "epoch": 0.0730635213917504, "grad_norm": 41.5, "learning_rate": 1.9970746142196263e-05, "loss": 7.9122, "step": 25980 }, { "epoch": 0.07309164437919911, "grad_norm": 33.0, "learning_rate": 1.9970723627701577e-05, "loss": 7.4905, "step": 25990 }, { "epoch": 0.07311976736664783, "grad_norm": 34.25, "learning_rate": 1.9970701104559073e-05, "loss": 8.0898, "step": 26000 }, { "epoch": 0.07314789035409654, "grad_norm": 22.75, "learning_rate": 1.9970678572768763e-05, "loss": 7.8744, "step": 26010 }, { "epoch": 0.07317601334154525, "grad_norm": 30.125, "learning_rate": 1.9970656032330663e-05, "loss": 7.903, "step": 26020 }, { "epoch": 0.07320413632899396, "grad_norm": 26.375, "learning_rate": 1.9970633483244807e-05, "loss": 7.7607, "step": 26030 }, { "epoch": 0.07323225931644267, "grad_norm": 35.5, "learning_rate": 1.99706109255112e-05, "loss": 7.996, "step": 26040 }, { "epoch": 0.07326038230389138, "grad_norm": 30.25, "learning_rate": 1.997058835912987e-05, "loss": 7.836, "step": 26050 }, { "epoch": 0.0732885052913401, "grad_norm": 25.875, "learning_rate": 1.997056578410083e-05, "loss": 8.619, "step": 26060 }, { "epoch": 0.07331662827878879, "grad_norm": 33.75, "learning_rate": 1.9970543200424105e-05, "loss": 7.37, "step": 26070 }, { "epoch": 0.0733447512662375, "grad_norm": 24.625, "learning_rate": 1.9970520608099714e-05, "loss": 7.3459, "step": 26080 }, { "epoch": 0.07337287425368622, "grad_norm": 37.25, "learning_rate": 1.9970498007127673e-05, "loss": 7.7747, "step": 26090 }, { "epoch": 0.07340099724113493, "grad_norm": 44.0, "learning_rate": 1.9970475397508008e-05, "loss": 8.2631, "step": 26100 }, { "epoch": 0.07342912022858364, "grad_norm": 40.5, "learning_rate": 1.9970452779240733e-05, "loss": 7.8677, "step": 26110 }, { "epoch": 0.07345724321603235, "grad_norm": 34.75, "learning_rate": 1.9970430152325872e-05, "loss": 7.909, "step": 26120 }, { "epoch": 0.07348536620348106, "grad_norm": 32.75, "learning_rate": 1.9970407516763438e-05, "loss": 8.0213, "step": 26130 }, { "epoch": 0.07351348919092977, "grad_norm": 24.0, "learning_rate": 1.9970384872553454e-05, "loss": 8.4118, "step": 26140 }, { "epoch": 0.07354161217837848, "grad_norm": 30.625, "learning_rate": 1.9970362219695946e-05, "loss": 7.9723, "step": 26150 }, { "epoch": 0.0735697351658272, "grad_norm": 25.25, "learning_rate": 1.9970339558190922e-05, "loss": 8.612, "step": 26160 }, { "epoch": 0.07359785815327591, "grad_norm": 21.0, "learning_rate": 1.997031688803841e-05, "loss": 7.9131, "step": 26170 }, { "epoch": 0.07362598114072462, "grad_norm": 25.5, "learning_rate": 1.9970294209238427e-05, "loss": 8.1793, "step": 26180 }, { "epoch": 0.07365410412817333, "grad_norm": 48.0, "learning_rate": 1.9970271521790993e-05, "loss": 8.0626, "step": 26190 }, { "epoch": 0.07368222711562204, "grad_norm": 21.75, "learning_rate": 1.9970248825696125e-05, "loss": 7.7248, "step": 26200 }, { "epoch": 0.07371035010307075, "grad_norm": 23.5, "learning_rate": 1.9970226120953847e-05, "loss": 8.4138, "step": 26210 }, { "epoch": 0.07373847309051947, "grad_norm": 27.75, "learning_rate": 1.997020340756418e-05, "loss": 8.2009, "step": 26220 }, { "epoch": 0.07376659607796818, "grad_norm": 40.75, "learning_rate": 1.9970180685527135e-05, "loss": 8.5662, "step": 26230 }, { "epoch": 0.07379471906541687, "grad_norm": 62.25, "learning_rate": 1.997015795484274e-05, "loss": 9.0719, "step": 26240 }, { "epoch": 0.07382284205286559, "grad_norm": 26.625, "learning_rate": 1.9970135215511016e-05, "loss": 7.6147, "step": 26250 }, { "epoch": 0.0738509650403143, "grad_norm": 29.625, "learning_rate": 1.997011246753197e-05, "loss": 7.541, "step": 26260 }, { "epoch": 0.07387908802776301, "grad_norm": 35.0, "learning_rate": 1.997008971090564e-05, "loss": 7.3945, "step": 26270 }, { "epoch": 0.07390721101521172, "grad_norm": 26.375, "learning_rate": 1.997006694563203e-05, "loss": 7.8403, "step": 26280 }, { "epoch": 0.07393533400266043, "grad_norm": 45.0, "learning_rate": 1.997004417171117e-05, "loss": 8.0331, "step": 26290 }, { "epoch": 0.07396345699010914, "grad_norm": 25.5, "learning_rate": 1.997002138914307e-05, "loss": 7.7275, "step": 26300 }, { "epoch": 0.07399157997755786, "grad_norm": 29.0, "learning_rate": 1.9969998597927762e-05, "loss": 8.2184, "step": 26310 }, { "epoch": 0.07401970296500657, "grad_norm": 65.5, "learning_rate": 1.9969975798065256e-05, "loss": 8.6619, "step": 26320 }, { "epoch": 0.07404782595245528, "grad_norm": 28.625, "learning_rate": 1.9969952989555574e-05, "loss": 7.2831, "step": 26330 }, { "epoch": 0.07407594893990399, "grad_norm": 25.875, "learning_rate": 1.996993017239874e-05, "loss": 8.0372, "step": 26340 }, { "epoch": 0.0741040719273527, "grad_norm": 33.0, "learning_rate": 1.9969907346594768e-05, "loss": 8.4383, "step": 26350 }, { "epoch": 0.07413219491480141, "grad_norm": 29.625, "learning_rate": 1.9969884512143682e-05, "loss": 7.9763, "step": 26360 }, { "epoch": 0.07416031790225013, "grad_norm": 24.125, "learning_rate": 1.99698616690455e-05, "loss": 7.6821, "step": 26370 }, { "epoch": 0.07418844088969884, "grad_norm": 30.5, "learning_rate": 1.996983881730024e-05, "loss": 7.7474, "step": 26380 }, { "epoch": 0.07421656387714755, "grad_norm": 24.5, "learning_rate": 1.9969815956907927e-05, "loss": 7.6051, "step": 26390 }, { "epoch": 0.07424468686459625, "grad_norm": 46.25, "learning_rate": 1.9969793087868576e-05, "loss": 8.1298, "step": 26400 }, { "epoch": 0.07427280985204496, "grad_norm": 20.625, "learning_rate": 1.9969770210182208e-05, "loss": 7.1447, "step": 26410 }, { "epoch": 0.07430093283949367, "grad_norm": 22.375, "learning_rate": 1.9969747323848842e-05, "loss": 7.8355, "step": 26420 }, { "epoch": 0.07432905582694238, "grad_norm": 39.25, "learning_rate": 1.9969724428868504e-05, "loss": 8.0793, "step": 26430 }, { "epoch": 0.07435717881439109, "grad_norm": 22.875, "learning_rate": 1.9969701525241206e-05, "loss": 8.8741, "step": 26440 }, { "epoch": 0.0743853018018398, "grad_norm": 58.5, "learning_rate": 1.996967861296697e-05, "loss": 7.9702, "step": 26450 }, { "epoch": 0.07441342478928852, "grad_norm": 34.25, "learning_rate": 1.9969655692045816e-05, "loss": 7.8223, "step": 26460 }, { "epoch": 0.07444154777673723, "grad_norm": 38.25, "learning_rate": 1.996963276247777e-05, "loss": 8.3986, "step": 26470 }, { "epoch": 0.07446967076418594, "grad_norm": 26.375, "learning_rate": 1.9969609824262842e-05, "loss": 7.8544, "step": 26480 }, { "epoch": 0.07449779375163465, "grad_norm": 26.5, "learning_rate": 1.9969586877401058e-05, "loss": 7.868, "step": 26490 }, { "epoch": 0.07452591673908336, "grad_norm": 25.125, "learning_rate": 1.9969563921892437e-05, "loss": 7.4325, "step": 26500 }, { "epoch": 0.07455403972653207, "grad_norm": 23.25, "learning_rate": 1.9969540957736998e-05, "loss": 8.7173, "step": 26510 }, { "epoch": 0.07458216271398078, "grad_norm": 29.0, "learning_rate": 1.996951798493476e-05, "loss": 8.479, "step": 26520 }, { "epoch": 0.0746102857014295, "grad_norm": 29.625, "learning_rate": 1.9969495003485743e-05, "loss": 8.4633, "step": 26530 }, { "epoch": 0.07463840868887821, "grad_norm": 23.375, "learning_rate": 1.996947201338997e-05, "loss": 8.0694, "step": 26540 }, { "epoch": 0.07466653167632692, "grad_norm": 27.625, "learning_rate": 1.9969449014647456e-05, "loss": 8.166, "step": 26550 }, { "epoch": 0.07469465466377562, "grad_norm": 32.5, "learning_rate": 1.996942600725823e-05, "loss": 8.0356, "step": 26560 }, { "epoch": 0.07472277765122433, "grad_norm": 28.375, "learning_rate": 1.9969402991222303e-05, "loss": 8.3624, "step": 26570 }, { "epoch": 0.07475090063867304, "grad_norm": 25.625, "learning_rate": 1.9969379966539696e-05, "loss": 7.8257, "step": 26580 }, { "epoch": 0.07477902362612175, "grad_norm": 23.125, "learning_rate": 1.996935693321043e-05, "loss": 7.9279, "step": 26590 }, { "epoch": 0.07480714661357046, "grad_norm": 42.75, "learning_rate": 1.996933389123453e-05, "loss": 7.7669, "step": 26600 }, { "epoch": 0.07483526960101917, "grad_norm": 23.25, "learning_rate": 1.996931084061201e-05, "loss": 8.0348, "step": 26610 }, { "epoch": 0.07486339258846789, "grad_norm": 50.25, "learning_rate": 1.9969287781342892e-05, "loss": 8.3616, "step": 26620 }, { "epoch": 0.0748915155759166, "grad_norm": 24.0, "learning_rate": 1.9969264713427195e-05, "loss": 8.2894, "step": 26630 }, { "epoch": 0.07491963856336531, "grad_norm": 23.125, "learning_rate": 1.996924163686494e-05, "loss": 7.7881, "step": 26640 }, { "epoch": 0.07494776155081402, "grad_norm": 29.125, "learning_rate": 1.9969218551656148e-05, "loss": 8.1339, "step": 26650 }, { "epoch": 0.07497588453826273, "grad_norm": 25.625, "learning_rate": 1.9969195457800835e-05, "loss": 7.4664, "step": 26660 }, { "epoch": 0.07500400752571144, "grad_norm": 27.25, "learning_rate": 1.9969172355299027e-05, "loss": 8.2754, "step": 26670 }, { "epoch": 0.07503213051316016, "grad_norm": 44.25, "learning_rate": 1.9969149244150737e-05, "loss": 7.5049, "step": 26680 }, { "epoch": 0.07506025350060887, "grad_norm": 26.875, "learning_rate": 1.9969126124355993e-05, "loss": 9.2107, "step": 26690 }, { "epoch": 0.07508837648805758, "grad_norm": 30.625, "learning_rate": 1.9969102995914807e-05, "loss": 7.4617, "step": 26700 }, { "epoch": 0.07511649947550629, "grad_norm": 24.25, "learning_rate": 1.9969079858827208e-05, "loss": 7.648, "step": 26710 }, { "epoch": 0.07514462246295499, "grad_norm": 30.125, "learning_rate": 1.9969056713093207e-05, "loss": 8.0947, "step": 26720 }, { "epoch": 0.0751727454504037, "grad_norm": 22.0, "learning_rate": 1.996903355871283e-05, "loss": 7.5861, "step": 26730 }, { "epoch": 0.07520086843785241, "grad_norm": 21.875, "learning_rate": 1.9969010395686094e-05, "loss": 8.7944, "step": 26740 }, { "epoch": 0.07522899142530112, "grad_norm": 30.875, "learning_rate": 1.9968987224013024e-05, "loss": 8.1003, "step": 26750 }, { "epoch": 0.07525711441274983, "grad_norm": 41.5, "learning_rate": 1.9968964043693632e-05, "loss": 8.1872, "step": 26760 }, { "epoch": 0.07528523740019855, "grad_norm": 26.25, "learning_rate": 1.9968940854727944e-05, "loss": 8.4318, "step": 26770 }, { "epoch": 0.07531336038764726, "grad_norm": 29.0, "learning_rate": 1.996891765711598e-05, "loss": 7.7735, "step": 26780 }, { "epoch": 0.07534148337509597, "grad_norm": 28.0, "learning_rate": 1.9968894450857757e-05, "loss": 7.9705, "step": 26790 }, { "epoch": 0.07536960636254468, "grad_norm": 23.375, "learning_rate": 1.9968871235953298e-05, "loss": 7.4879, "step": 26800 }, { "epoch": 0.07539772934999339, "grad_norm": 33.0, "learning_rate": 1.996884801240262e-05, "loss": 8.0119, "step": 26810 }, { "epoch": 0.0754258523374421, "grad_norm": 28.0, "learning_rate": 1.9968824780205748e-05, "loss": 7.8967, "step": 26820 }, { "epoch": 0.07545397532489082, "grad_norm": 24.125, "learning_rate": 1.99688015393627e-05, "loss": 8.532, "step": 26830 }, { "epoch": 0.07548209831233953, "grad_norm": 34.25, "learning_rate": 1.9968778289873493e-05, "loss": 7.9858, "step": 26840 }, { "epoch": 0.07551022129978824, "grad_norm": 22.5, "learning_rate": 1.996875503173815e-05, "loss": 7.7781, "step": 26850 }, { "epoch": 0.07553834428723695, "grad_norm": 49.25, "learning_rate": 1.9968731764956694e-05, "loss": 8.0445, "step": 26860 }, { "epoch": 0.07556646727468566, "grad_norm": 42.5, "learning_rate": 1.9968708489529135e-05, "loss": 8.1704, "step": 26870 }, { "epoch": 0.07559459026213437, "grad_norm": 33.0, "learning_rate": 1.9968685205455505e-05, "loss": 7.879, "step": 26880 }, { "epoch": 0.07562271324958307, "grad_norm": 23.75, "learning_rate": 1.996866191273582e-05, "loss": 7.7216, "step": 26890 }, { "epoch": 0.07565083623703178, "grad_norm": 33.5, "learning_rate": 1.9968638611370097e-05, "loss": 7.7943, "step": 26900 }, { "epoch": 0.0756789592244805, "grad_norm": 43.25, "learning_rate": 1.9968615301358363e-05, "loss": 7.9838, "step": 26910 }, { "epoch": 0.0757070822119292, "grad_norm": 25.0, "learning_rate": 1.9968591982700628e-05, "loss": 8.035, "step": 26920 }, { "epoch": 0.07573520519937792, "grad_norm": 47.0, "learning_rate": 1.9968568655396923e-05, "loss": 8.625, "step": 26930 }, { "epoch": 0.07576332818682663, "grad_norm": 33.75, "learning_rate": 1.996854531944726e-05, "loss": 7.7114, "step": 26940 }, { "epoch": 0.07579145117427534, "grad_norm": 39.75, "learning_rate": 1.9968521974851667e-05, "loss": 7.8385, "step": 26950 }, { "epoch": 0.07581957416172405, "grad_norm": 30.25, "learning_rate": 1.9968498621610156e-05, "loss": 8.5148, "step": 26960 }, { "epoch": 0.07584769714917276, "grad_norm": 32.25, "learning_rate": 1.9968475259722753e-05, "loss": 8.4401, "step": 26970 }, { "epoch": 0.07587582013662147, "grad_norm": 21.5, "learning_rate": 1.996845188918948e-05, "loss": 8.1494, "step": 26980 }, { "epoch": 0.07590394312407019, "grad_norm": 22.625, "learning_rate": 1.996842851001035e-05, "loss": 8.5612, "step": 26990 }, { "epoch": 0.0759320661115189, "grad_norm": 34.5, "learning_rate": 1.9968405122185387e-05, "loss": 7.6477, "step": 27000 }, { "epoch": 0.07596018909896761, "grad_norm": 34.0, "learning_rate": 1.9968381725714612e-05, "loss": 8.4032, "step": 27010 }, { "epoch": 0.07598831208641632, "grad_norm": 26.125, "learning_rate": 1.9968358320598042e-05, "loss": 7.5209, "step": 27020 }, { "epoch": 0.07601643507386503, "grad_norm": 27.375, "learning_rate": 1.9968334906835706e-05, "loss": 7.9156, "step": 27030 }, { "epoch": 0.07604455806131374, "grad_norm": 37.75, "learning_rate": 1.9968311484427612e-05, "loss": 8.0194, "step": 27040 }, { "epoch": 0.07607268104876244, "grad_norm": 38.5, "learning_rate": 1.996828805337379e-05, "loss": 7.854, "step": 27050 }, { "epoch": 0.07610080403621115, "grad_norm": 24.375, "learning_rate": 1.9968264613674254e-05, "loss": 7.9612, "step": 27060 }, { "epoch": 0.07612892702365986, "grad_norm": 21.75, "learning_rate": 1.996824116532903e-05, "loss": 8.1747, "step": 27070 }, { "epoch": 0.07615705001110858, "grad_norm": 30.375, "learning_rate": 1.9968217708338136e-05, "loss": 8.6858, "step": 27080 }, { "epoch": 0.07618517299855729, "grad_norm": 43.5, "learning_rate": 1.9968194242701587e-05, "loss": 8.5937, "step": 27090 }, { "epoch": 0.076213295986006, "grad_norm": 27.0, "learning_rate": 1.9968170768419414e-05, "loss": 7.9049, "step": 27100 }, { "epoch": 0.07624141897345471, "grad_norm": 26.125, "learning_rate": 1.996814728549163e-05, "loss": 8.4836, "step": 27110 }, { "epoch": 0.07626954196090342, "grad_norm": 25.75, "learning_rate": 1.9968123793918255e-05, "loss": 8.8337, "step": 27120 }, { "epoch": 0.07629766494835213, "grad_norm": 28.25, "learning_rate": 1.996810029369931e-05, "loss": 8.6502, "step": 27130 }, { "epoch": 0.07632578793580085, "grad_norm": 34.5, "learning_rate": 1.996807678483482e-05, "loss": 7.7829, "step": 27140 }, { "epoch": 0.07635391092324956, "grad_norm": 30.625, "learning_rate": 1.9968053267324802e-05, "loss": 8.8064, "step": 27150 }, { "epoch": 0.07638203391069827, "grad_norm": 48.25, "learning_rate": 1.9968029741169277e-05, "loss": 7.8698, "step": 27160 }, { "epoch": 0.07641015689814698, "grad_norm": 23.625, "learning_rate": 1.9968006206368264e-05, "loss": 7.7861, "step": 27170 }, { "epoch": 0.07643827988559569, "grad_norm": 29.625, "learning_rate": 1.9967982662921785e-05, "loss": 7.2716, "step": 27180 }, { "epoch": 0.0764664028730444, "grad_norm": 24.375, "learning_rate": 1.9967959110829857e-05, "loss": 8.5829, "step": 27190 }, { "epoch": 0.07649452586049311, "grad_norm": 23.25, "learning_rate": 1.9967935550092505e-05, "loss": 8.052, "step": 27200 }, { "epoch": 0.07652264884794181, "grad_norm": 34.0, "learning_rate": 1.9967911980709748e-05, "loss": 7.4781, "step": 27210 }, { "epoch": 0.07655077183539052, "grad_norm": 25.0, "learning_rate": 1.9967888402681604e-05, "loss": 7.6999, "step": 27220 }, { "epoch": 0.07657889482283924, "grad_norm": 26.25, "learning_rate": 1.99678648160081e-05, "loss": 7.7534, "step": 27230 }, { "epoch": 0.07660701781028795, "grad_norm": 32.25, "learning_rate": 1.9967841220689248e-05, "loss": 8.7501, "step": 27240 }, { "epoch": 0.07663514079773666, "grad_norm": 23.375, "learning_rate": 1.9967817616725074e-05, "loss": 8.1692, "step": 27250 }, { "epoch": 0.07666326378518537, "grad_norm": 29.125, "learning_rate": 1.9967794004115593e-05, "loss": 7.9667, "step": 27260 }, { "epoch": 0.07669138677263408, "grad_norm": 23.625, "learning_rate": 1.9967770382860834e-05, "loss": 7.9357, "step": 27270 }, { "epoch": 0.0767195097600828, "grad_norm": 22.0, "learning_rate": 1.9967746752960812e-05, "loss": 7.7933, "step": 27280 }, { "epoch": 0.0767476327475315, "grad_norm": 42.25, "learning_rate": 1.996772311441555e-05, "loss": 7.8295, "step": 27290 }, { "epoch": 0.07677575573498022, "grad_norm": 24.125, "learning_rate": 1.9967699467225064e-05, "loss": 8.3429, "step": 27300 }, { "epoch": 0.07680387872242893, "grad_norm": 28.75, "learning_rate": 1.9967675811389378e-05, "loss": 7.742, "step": 27310 }, { "epoch": 0.07683200170987764, "grad_norm": 27.625, "learning_rate": 1.996765214690851e-05, "loss": 7.7158, "step": 27320 }, { "epoch": 0.07686012469732635, "grad_norm": 27.75, "learning_rate": 1.9967628473782487e-05, "loss": 8.1685, "step": 27330 }, { "epoch": 0.07688824768477506, "grad_norm": 19.25, "learning_rate": 1.996760479201132e-05, "loss": 7.8431, "step": 27340 }, { "epoch": 0.07691637067222377, "grad_norm": 41.5, "learning_rate": 1.9967581101595036e-05, "loss": 7.8701, "step": 27350 }, { "epoch": 0.07694449365967249, "grad_norm": 45.25, "learning_rate": 1.9967557402533656e-05, "loss": 7.083, "step": 27360 }, { "epoch": 0.07697261664712118, "grad_norm": 23.375, "learning_rate": 1.9967533694827197e-05, "loss": 7.8217, "step": 27370 }, { "epoch": 0.0770007396345699, "grad_norm": 32.0, "learning_rate": 1.996750997847568e-05, "loss": 8.0724, "step": 27380 }, { "epoch": 0.0770288626220186, "grad_norm": 38.0, "learning_rate": 1.996748625347913e-05, "loss": 8.8073, "step": 27390 }, { "epoch": 0.07705698560946732, "grad_norm": 28.25, "learning_rate": 1.996746251983756e-05, "loss": 8.43, "step": 27400 }, { "epoch": 0.07708510859691603, "grad_norm": 22.0, "learning_rate": 1.9967438777551e-05, "loss": 8.5713, "step": 27410 }, { "epoch": 0.07711323158436474, "grad_norm": 29.375, "learning_rate": 1.996741502661946e-05, "loss": 7.3314, "step": 27420 }, { "epoch": 0.07714135457181345, "grad_norm": 26.5, "learning_rate": 1.996739126704297e-05, "loss": 7.9049, "step": 27430 }, { "epoch": 0.07716947755926216, "grad_norm": 18.25, "learning_rate": 1.9967367498821546e-05, "loss": 7.3143, "step": 27440 }, { "epoch": 0.07719760054671088, "grad_norm": 22.75, "learning_rate": 1.996734372195521e-05, "loss": 7.9868, "step": 27450 }, { "epoch": 0.07722572353415959, "grad_norm": 25.0, "learning_rate": 1.996731993644398e-05, "loss": 8.8749, "step": 27460 }, { "epoch": 0.0772538465216083, "grad_norm": 43.5, "learning_rate": 1.9967296142287878e-05, "loss": 7.805, "step": 27470 }, { "epoch": 0.07728196950905701, "grad_norm": 32.0, "learning_rate": 1.9967272339486924e-05, "loss": 7.9415, "step": 27480 }, { "epoch": 0.07731009249650572, "grad_norm": 27.75, "learning_rate": 1.9967248528041144e-05, "loss": 8.5043, "step": 27490 }, { "epoch": 0.07733821548395443, "grad_norm": 47.25, "learning_rate": 1.996722470795055e-05, "loss": 7.3558, "step": 27500 }, { "epoch": 0.07736633847140315, "grad_norm": 26.75, "learning_rate": 1.9967200879215172e-05, "loss": 8.6099, "step": 27510 }, { "epoch": 0.07739446145885186, "grad_norm": 28.5, "learning_rate": 1.9967177041835023e-05, "loss": 7.7975, "step": 27520 }, { "epoch": 0.07742258444630055, "grad_norm": 52.25, "learning_rate": 1.9967153195810127e-05, "loss": 7.9012, "step": 27530 }, { "epoch": 0.07745070743374927, "grad_norm": 25.75, "learning_rate": 1.9967129341140504e-05, "loss": 8.0525, "step": 27540 }, { "epoch": 0.07747883042119798, "grad_norm": 26.625, "learning_rate": 1.9967105477826174e-05, "loss": 8.437, "step": 27550 }, { "epoch": 0.07750695340864669, "grad_norm": 46.5, "learning_rate": 1.9967081605867162e-05, "loss": 8.1046, "step": 27560 }, { "epoch": 0.0775350763960954, "grad_norm": 26.5, "learning_rate": 1.996705772526348e-05, "loss": 7.3295, "step": 27570 }, { "epoch": 0.07756319938354411, "grad_norm": 37.25, "learning_rate": 1.9967033836015157e-05, "loss": 7.8688, "step": 27580 }, { "epoch": 0.07759132237099282, "grad_norm": 18.625, "learning_rate": 1.996700993812221e-05, "loss": 7.8513, "step": 27590 }, { "epoch": 0.07761944535844154, "grad_norm": 35.0, "learning_rate": 1.9966986031584665e-05, "loss": 8.4104, "step": 27600 }, { "epoch": 0.07764756834589025, "grad_norm": 61.5, "learning_rate": 1.996696211640253e-05, "loss": 7.727, "step": 27610 }, { "epoch": 0.07767569133333896, "grad_norm": 23.625, "learning_rate": 1.9966938192575838e-05, "loss": 8.2852, "step": 27620 }, { "epoch": 0.07770381432078767, "grad_norm": 19.75, "learning_rate": 1.9966914260104606e-05, "loss": 7.4344, "step": 27630 }, { "epoch": 0.07773193730823638, "grad_norm": 27.75, "learning_rate": 1.9966890318988853e-05, "loss": 7.8142, "step": 27640 }, { "epoch": 0.0777600602956851, "grad_norm": 44.75, "learning_rate": 1.9966866369228605e-05, "loss": 8.4584, "step": 27650 }, { "epoch": 0.0777881832831338, "grad_norm": 26.5, "learning_rate": 1.9966842410823873e-05, "loss": 8.0484, "step": 27660 }, { "epoch": 0.07781630627058252, "grad_norm": 26.75, "learning_rate": 1.9966818443774685e-05, "loss": 7.6098, "step": 27670 }, { "epoch": 0.07784442925803123, "grad_norm": 28.125, "learning_rate": 1.9966794468081063e-05, "loss": 8.4103, "step": 27680 }, { "epoch": 0.07787255224547994, "grad_norm": 28.25, "learning_rate": 1.9966770483743025e-05, "loss": 7.9141, "step": 27690 }, { "epoch": 0.07790067523292864, "grad_norm": 23.625, "learning_rate": 1.9966746490760592e-05, "loss": 8.0464, "step": 27700 }, { "epoch": 0.07792879822037735, "grad_norm": 37.75, "learning_rate": 1.996672248913378e-05, "loss": 7.9216, "step": 27710 }, { "epoch": 0.07795692120782606, "grad_norm": 44.25, "learning_rate": 1.9966698478862624e-05, "loss": 7.2609, "step": 27720 }, { "epoch": 0.07798504419527477, "grad_norm": 32.25, "learning_rate": 1.996667445994713e-05, "loss": 8.0394, "step": 27730 }, { "epoch": 0.07801316718272348, "grad_norm": 29.625, "learning_rate": 1.9966650432387324e-05, "loss": 8.3917, "step": 27740 }, { "epoch": 0.0780412901701722, "grad_norm": 28.0, "learning_rate": 1.9966626396183226e-05, "loss": 8.2311, "step": 27750 }, { "epoch": 0.0780694131576209, "grad_norm": 24.625, "learning_rate": 1.9966602351334862e-05, "loss": 7.7782, "step": 27760 }, { "epoch": 0.07809753614506962, "grad_norm": 23.5, "learning_rate": 1.996657829784225e-05, "loss": 7.8627, "step": 27770 }, { "epoch": 0.07812565913251833, "grad_norm": 21.625, "learning_rate": 1.9966554235705404e-05, "loss": 7.7049, "step": 27780 }, { "epoch": 0.07815378211996704, "grad_norm": 45.0, "learning_rate": 1.9966530164924353e-05, "loss": 7.8896, "step": 27790 }, { "epoch": 0.07818190510741575, "grad_norm": 57.25, "learning_rate": 1.9966506085499115e-05, "loss": 7.9455, "step": 27800 }, { "epoch": 0.07821002809486446, "grad_norm": 24.375, "learning_rate": 1.9966481997429713e-05, "loss": 8.2275, "step": 27810 }, { "epoch": 0.07823815108231318, "grad_norm": 43.5, "learning_rate": 1.9966457900716166e-05, "loss": 7.3201, "step": 27820 }, { "epoch": 0.07826627406976189, "grad_norm": 24.375, "learning_rate": 1.9966433795358492e-05, "loss": 7.492, "step": 27830 }, { "epoch": 0.0782943970572106, "grad_norm": 31.125, "learning_rate": 1.996640968135672e-05, "loss": 7.8319, "step": 27840 }, { "epoch": 0.07832252004465931, "grad_norm": 24.25, "learning_rate": 1.9966385558710865e-05, "loss": 7.8667, "step": 27850 }, { "epoch": 0.07835064303210801, "grad_norm": 30.5, "learning_rate": 1.996636142742095e-05, "loss": 7.6707, "step": 27860 }, { "epoch": 0.07837876601955672, "grad_norm": 36.5, "learning_rate": 1.996633728748699e-05, "loss": 7.3885, "step": 27870 }, { "epoch": 0.07840688900700543, "grad_norm": 25.5, "learning_rate": 1.9966313138909013e-05, "loss": 6.9396, "step": 27880 }, { "epoch": 0.07843501199445414, "grad_norm": 36.25, "learning_rate": 1.9966288981687038e-05, "loss": 8.0507, "step": 27890 }, { "epoch": 0.07846313498190285, "grad_norm": 24.625, "learning_rate": 1.9966264815821085e-05, "loss": 8.5828, "step": 27900 }, { "epoch": 0.07849125796935157, "grad_norm": 35.25, "learning_rate": 1.996624064131118e-05, "loss": 7.747, "step": 27910 }, { "epoch": 0.07851938095680028, "grad_norm": 30.0, "learning_rate": 1.9966216458157335e-05, "loss": 7.4919, "step": 27920 }, { "epoch": 0.07854750394424899, "grad_norm": 31.0, "learning_rate": 1.9966192266359577e-05, "loss": 7.862, "step": 27930 }, { "epoch": 0.0785756269316977, "grad_norm": 30.875, "learning_rate": 1.9966168065917925e-05, "loss": 8.4959, "step": 27940 }, { "epoch": 0.07860374991914641, "grad_norm": 22.625, "learning_rate": 1.99661438568324e-05, "loss": 8.0447, "step": 27950 }, { "epoch": 0.07863187290659512, "grad_norm": 30.75, "learning_rate": 1.9966119639103025e-05, "loss": 7.9502, "step": 27960 }, { "epoch": 0.07865999589404384, "grad_norm": 28.125, "learning_rate": 1.996609541272982e-05, "loss": 7.9825, "step": 27970 }, { "epoch": 0.07868811888149255, "grad_norm": 35.0, "learning_rate": 1.9966071177712804e-05, "loss": 7.2857, "step": 27980 }, { "epoch": 0.07871624186894126, "grad_norm": 28.25, "learning_rate": 1.9966046934052e-05, "loss": 7.3708, "step": 27990 }, { "epoch": 0.07874436485638997, "grad_norm": 23.0, "learning_rate": 1.996602268174743e-05, "loss": 7.7806, "step": 28000 }, { "epoch": 0.07877248784383868, "grad_norm": 23.375, "learning_rate": 1.996599842079911e-05, "loss": 8.0325, "step": 28010 }, { "epoch": 0.07880061083128738, "grad_norm": 29.75, "learning_rate": 1.9965974151207067e-05, "loss": 8.085, "step": 28020 }, { "epoch": 0.07882873381873609, "grad_norm": 37.0, "learning_rate": 1.996594987297132e-05, "loss": 8.2522, "step": 28030 }, { "epoch": 0.0788568568061848, "grad_norm": 54.5, "learning_rate": 1.996592558609189e-05, "loss": 8.2149, "step": 28040 }, { "epoch": 0.07888497979363351, "grad_norm": 23.5, "learning_rate": 1.99659012905688e-05, "loss": 7.6538, "step": 28050 }, { "epoch": 0.07891310278108223, "grad_norm": 47.75, "learning_rate": 1.9965876986402067e-05, "loss": 7.736, "step": 28060 }, { "epoch": 0.07894122576853094, "grad_norm": 41.5, "learning_rate": 1.9965852673591713e-05, "loss": 8.3534, "step": 28070 }, { "epoch": 0.07896934875597965, "grad_norm": 25.875, "learning_rate": 1.996582835213776e-05, "loss": 7.958, "step": 28080 }, { "epoch": 0.07899747174342836, "grad_norm": 20.875, "learning_rate": 1.9965804022040233e-05, "loss": 8.0366, "step": 28090 }, { "epoch": 0.07902559473087707, "grad_norm": 37.0, "learning_rate": 1.9965779683299145e-05, "loss": 7.2943, "step": 28100 }, { "epoch": 0.07905371771832578, "grad_norm": 21.125, "learning_rate": 1.9965755335914522e-05, "loss": 7.8049, "step": 28110 }, { "epoch": 0.0790818407057745, "grad_norm": 24.625, "learning_rate": 1.9965730979886385e-05, "loss": 7.2822, "step": 28120 }, { "epoch": 0.0791099636932232, "grad_norm": 25.5, "learning_rate": 1.996570661521476e-05, "loss": 8.0912, "step": 28130 }, { "epoch": 0.07913808668067192, "grad_norm": 35.75, "learning_rate": 1.9965682241899652e-05, "loss": 7.5906, "step": 28140 }, { "epoch": 0.07916620966812063, "grad_norm": 29.625, "learning_rate": 1.9965657859941103e-05, "loss": 7.7365, "step": 28150 }, { "epoch": 0.07919433265556934, "grad_norm": 31.625, "learning_rate": 1.9965633469339117e-05, "loss": 8.2284, "step": 28160 }, { "epoch": 0.07922245564301805, "grad_norm": 27.875, "learning_rate": 1.9965609070093727e-05, "loss": 8.0788, "step": 28170 }, { "epoch": 0.07925057863046675, "grad_norm": 45.75, "learning_rate": 1.9965584662204948e-05, "loss": 8.6868, "step": 28180 }, { "epoch": 0.07927870161791546, "grad_norm": 39.25, "learning_rate": 1.99655602456728e-05, "loss": 8.6296, "step": 28190 }, { "epoch": 0.07930682460536417, "grad_norm": 38.5, "learning_rate": 1.996553582049731e-05, "loss": 7.9807, "step": 28200 }, { "epoch": 0.07933494759281288, "grad_norm": 20.875, "learning_rate": 1.9965511386678496e-05, "loss": 7.6621, "step": 28210 }, { "epoch": 0.0793630705802616, "grad_norm": 39.25, "learning_rate": 1.9965486944216377e-05, "loss": 8.2259, "step": 28220 }, { "epoch": 0.07939119356771031, "grad_norm": 33.5, "learning_rate": 1.9965462493110977e-05, "loss": 8.6496, "step": 28230 }, { "epoch": 0.07941931655515902, "grad_norm": 35.5, "learning_rate": 1.9965438033362318e-05, "loss": 7.7785, "step": 28240 }, { "epoch": 0.07944743954260773, "grad_norm": 75.5, "learning_rate": 1.9965413564970416e-05, "loss": 7.6092, "step": 28250 }, { "epoch": 0.07947556253005644, "grad_norm": 22.875, "learning_rate": 1.99653890879353e-05, "loss": 8.2531, "step": 28260 }, { "epoch": 0.07950368551750515, "grad_norm": 19.375, "learning_rate": 1.9965364602256985e-05, "loss": 8.5099, "step": 28270 }, { "epoch": 0.07953180850495387, "grad_norm": 33.75, "learning_rate": 1.9965340107935494e-05, "loss": 7.5987, "step": 28280 }, { "epoch": 0.07955993149240258, "grad_norm": 21.375, "learning_rate": 1.996531560497085e-05, "loss": 8.6437, "step": 28290 }, { "epoch": 0.07958805447985129, "grad_norm": 34.75, "learning_rate": 1.9965291093363074e-05, "loss": 7.5748, "step": 28300 }, { "epoch": 0.0796161774673, "grad_norm": 45.5, "learning_rate": 1.9965266573112183e-05, "loss": 7.4957, "step": 28310 }, { "epoch": 0.07964430045474871, "grad_norm": 30.625, "learning_rate": 1.9965242044218204e-05, "loss": 8.4343, "step": 28320 }, { "epoch": 0.07967242344219742, "grad_norm": 34.5, "learning_rate": 1.9965217506681156e-05, "loss": 8.1894, "step": 28330 }, { "epoch": 0.07970054642964614, "grad_norm": 29.375, "learning_rate": 1.996519296050106e-05, "loss": 6.7445, "step": 28340 }, { "epoch": 0.07972866941709483, "grad_norm": 44.5, "learning_rate": 1.9965168405677935e-05, "loss": 7.5108, "step": 28350 }, { "epoch": 0.07975679240454354, "grad_norm": 25.0, "learning_rate": 1.9965143842211806e-05, "loss": 8.5677, "step": 28360 }, { "epoch": 0.07978491539199226, "grad_norm": 30.5, "learning_rate": 1.9965119270102695e-05, "loss": 7.7441, "step": 28370 }, { "epoch": 0.07981303837944097, "grad_norm": 30.625, "learning_rate": 1.996509468935062e-05, "loss": 8.0103, "step": 28380 }, { "epoch": 0.07984116136688968, "grad_norm": 31.0, "learning_rate": 1.9965070099955598e-05, "loss": 8.1222, "step": 28390 }, { "epoch": 0.07986928435433839, "grad_norm": 24.375, "learning_rate": 1.9965045501917663e-05, "loss": 8.7416, "step": 28400 }, { "epoch": 0.0798974073417871, "grad_norm": 47.0, "learning_rate": 1.9965020895236824e-05, "loss": 7.5119, "step": 28410 }, { "epoch": 0.07992553032923581, "grad_norm": 25.375, "learning_rate": 1.9964996279913112e-05, "loss": 7.1913, "step": 28420 }, { "epoch": 0.07995365331668453, "grad_norm": 33.0, "learning_rate": 1.9964971655946543e-05, "loss": 8.3894, "step": 28430 }, { "epoch": 0.07998177630413324, "grad_norm": 22.25, "learning_rate": 1.9964947023337138e-05, "loss": 7.8122, "step": 28440 }, { "epoch": 0.08000989929158195, "grad_norm": 26.5, "learning_rate": 1.996492238208492e-05, "loss": 8.3363, "step": 28450 }, { "epoch": 0.08003802227903066, "grad_norm": 42.0, "learning_rate": 1.996489773218991e-05, "loss": 7.2653, "step": 28460 }, { "epoch": 0.08006614526647937, "grad_norm": 28.0, "learning_rate": 1.996487307365213e-05, "loss": 6.5333, "step": 28470 }, { "epoch": 0.08009426825392808, "grad_norm": 50.75, "learning_rate": 1.9964848406471596e-05, "loss": 7.2924, "step": 28480 }, { "epoch": 0.0801223912413768, "grad_norm": 27.0, "learning_rate": 1.9964823730648342e-05, "loss": 8.316, "step": 28490 }, { "epoch": 0.0801505142288255, "grad_norm": 28.375, "learning_rate": 1.9964799046182377e-05, "loss": 7.7852, "step": 28500 }, { "epoch": 0.0801786372162742, "grad_norm": 26.0, "learning_rate": 1.9964774353073725e-05, "loss": 7.6473, "step": 28510 }, { "epoch": 0.08020676020372292, "grad_norm": 41.25, "learning_rate": 1.9964749651322413e-05, "loss": 7.9712, "step": 28520 }, { "epoch": 0.08023488319117163, "grad_norm": 48.0, "learning_rate": 1.9964724940928458e-05, "loss": 8.1065, "step": 28530 }, { "epoch": 0.08026300617862034, "grad_norm": 36.5, "learning_rate": 1.9964700221891884e-05, "loss": 7.3466, "step": 28540 }, { "epoch": 0.08029112916606905, "grad_norm": 29.25, "learning_rate": 1.996467549421271e-05, "loss": 7.9753, "step": 28550 }, { "epoch": 0.08031925215351776, "grad_norm": 27.875, "learning_rate": 1.9964650757890955e-05, "loss": 7.5777, "step": 28560 }, { "epoch": 0.08034737514096647, "grad_norm": 35.25, "learning_rate": 1.9964626012926644e-05, "loss": 7.9163, "step": 28570 }, { "epoch": 0.08037549812841518, "grad_norm": 23.375, "learning_rate": 1.99646012593198e-05, "loss": 7.9393, "step": 28580 }, { "epoch": 0.0804036211158639, "grad_norm": 24.625, "learning_rate": 1.9964576497070443e-05, "loss": 8.0188, "step": 28590 }, { "epoch": 0.08043174410331261, "grad_norm": 36.5, "learning_rate": 1.9964551726178592e-05, "loss": 8.0241, "step": 28600 }, { "epoch": 0.08045986709076132, "grad_norm": 23.875, "learning_rate": 1.9964526946644272e-05, "loss": 7.9075, "step": 28610 }, { "epoch": 0.08048799007821003, "grad_norm": 37.25, "learning_rate": 1.99645021584675e-05, "loss": 8.0868, "step": 28620 }, { "epoch": 0.08051611306565874, "grad_norm": 39.5, "learning_rate": 1.9964477361648305e-05, "loss": 7.472, "step": 28630 }, { "epoch": 0.08054423605310745, "grad_norm": 22.25, "learning_rate": 1.9964452556186702e-05, "loss": 8.1021, "step": 28640 }, { "epoch": 0.08057235904055617, "grad_norm": 49.5, "learning_rate": 1.9964427742082714e-05, "loss": 7.7658, "step": 28650 }, { "epoch": 0.08060048202800488, "grad_norm": 24.625, "learning_rate": 1.9964402919336364e-05, "loss": 8.4861, "step": 28660 }, { "epoch": 0.08062860501545357, "grad_norm": 33.0, "learning_rate": 1.9964378087947673e-05, "loss": 8.0058, "step": 28670 }, { "epoch": 0.08065672800290229, "grad_norm": 25.125, "learning_rate": 1.9964353247916657e-05, "loss": 7.9268, "step": 28680 }, { "epoch": 0.080684850990351, "grad_norm": 35.75, "learning_rate": 1.9964328399243346e-05, "loss": 7.7485, "step": 28690 }, { "epoch": 0.08071297397779971, "grad_norm": 37.75, "learning_rate": 1.996430354192776e-05, "loss": 7.5114, "step": 28700 }, { "epoch": 0.08074109696524842, "grad_norm": 26.125, "learning_rate": 1.9964278675969918e-05, "loss": 7.8527, "step": 28710 }, { "epoch": 0.08076921995269713, "grad_norm": 27.625, "learning_rate": 1.996425380136984e-05, "loss": 7.7686, "step": 28720 }, { "epoch": 0.08079734294014584, "grad_norm": 26.125, "learning_rate": 1.9964228918127554e-05, "loss": 7.6673, "step": 28730 }, { "epoch": 0.08082546592759456, "grad_norm": 26.125, "learning_rate": 1.9964204026243075e-05, "loss": 7.7632, "step": 28740 }, { "epoch": 0.08085358891504327, "grad_norm": 27.125, "learning_rate": 1.9964179125716424e-05, "loss": 7.526, "step": 28750 }, { "epoch": 0.08088171190249198, "grad_norm": 34.0, "learning_rate": 1.996415421654763e-05, "loss": 9.0559, "step": 28760 }, { "epoch": 0.08090983488994069, "grad_norm": 27.5, "learning_rate": 1.996412929873671e-05, "loss": 7.6445, "step": 28770 }, { "epoch": 0.0809379578773894, "grad_norm": 32.5, "learning_rate": 1.9964104372283684e-05, "loss": 7.7676, "step": 28780 }, { "epoch": 0.08096608086483811, "grad_norm": 36.0, "learning_rate": 1.9964079437188575e-05, "loss": 7.4814, "step": 28790 }, { "epoch": 0.08099420385228683, "grad_norm": 32.0, "learning_rate": 1.9964054493451405e-05, "loss": 8.0271, "step": 28800 }, { "epoch": 0.08102232683973554, "grad_norm": 26.875, "learning_rate": 1.99640295410722e-05, "loss": 7.6748, "step": 28810 }, { "epoch": 0.08105044982718425, "grad_norm": 34.5, "learning_rate": 1.996400458005097e-05, "loss": 8.0978, "step": 28820 }, { "epoch": 0.08107857281463295, "grad_norm": 30.25, "learning_rate": 1.996397961038775e-05, "loss": 7.7824, "step": 28830 }, { "epoch": 0.08110669580208166, "grad_norm": 31.875, "learning_rate": 1.9963954632082553e-05, "loss": 8.7066, "step": 28840 }, { "epoch": 0.08113481878953037, "grad_norm": 30.125, "learning_rate": 1.9963929645135404e-05, "loss": 7.3685, "step": 28850 }, { "epoch": 0.08116294177697908, "grad_norm": 27.5, "learning_rate": 1.9963904649546326e-05, "loss": 7.9533, "step": 28860 }, { "epoch": 0.08119106476442779, "grad_norm": 47.75, "learning_rate": 1.9963879645315337e-05, "loss": 7.5284, "step": 28870 }, { "epoch": 0.0812191877518765, "grad_norm": 29.625, "learning_rate": 1.9963854632442457e-05, "loss": 7.7151, "step": 28880 }, { "epoch": 0.08124731073932522, "grad_norm": 21.75, "learning_rate": 1.9963829610927717e-05, "loss": 8.0307, "step": 28890 }, { "epoch": 0.08127543372677393, "grad_norm": 25.125, "learning_rate": 1.996380458077113e-05, "loss": 8.2713, "step": 28900 }, { "epoch": 0.08130355671422264, "grad_norm": 25.875, "learning_rate": 1.996377954197272e-05, "loss": 7.9945, "step": 28910 }, { "epoch": 0.08133167970167135, "grad_norm": 24.25, "learning_rate": 1.9963754494532506e-05, "loss": 7.4128, "step": 28920 }, { "epoch": 0.08135980268912006, "grad_norm": 31.375, "learning_rate": 1.996372943845052e-05, "loss": 7.9222, "step": 28930 }, { "epoch": 0.08138792567656877, "grad_norm": 29.625, "learning_rate": 1.9963704373726773e-05, "loss": 8.6171, "step": 28940 }, { "epoch": 0.08141604866401748, "grad_norm": 26.625, "learning_rate": 1.996367930036129e-05, "loss": 8.2733, "step": 28950 }, { "epoch": 0.0814441716514662, "grad_norm": 32.5, "learning_rate": 1.9963654218354093e-05, "loss": 7.605, "step": 28960 }, { "epoch": 0.08147229463891491, "grad_norm": 28.875, "learning_rate": 1.9963629127705205e-05, "loss": 8.4248, "step": 28970 }, { "epoch": 0.08150041762636362, "grad_norm": 36.5, "learning_rate": 1.9963604028414646e-05, "loss": 7.6258, "step": 28980 }, { "epoch": 0.08152854061381233, "grad_norm": 38.75, "learning_rate": 1.996357892048244e-05, "loss": 7.9703, "step": 28990 }, { "epoch": 0.08155666360126103, "grad_norm": 22.0, "learning_rate": 1.9963553803908607e-05, "loss": 7.5612, "step": 29000 }, { "epoch": 0.08158478658870974, "grad_norm": 24.625, "learning_rate": 1.9963528678693168e-05, "loss": 7.705, "step": 29010 }, { "epoch": 0.08161290957615845, "grad_norm": 47.5, "learning_rate": 1.9963503544836143e-05, "loss": 8.8081, "step": 29020 }, { "epoch": 0.08164103256360716, "grad_norm": 37.25, "learning_rate": 1.9963478402337563e-05, "loss": 8.5169, "step": 29030 }, { "epoch": 0.08166915555105587, "grad_norm": 31.0, "learning_rate": 1.9963453251197437e-05, "loss": 8.1981, "step": 29040 }, { "epoch": 0.08169727853850459, "grad_norm": 30.375, "learning_rate": 1.9963428091415796e-05, "loss": 7.5985, "step": 29050 }, { "epoch": 0.0817254015259533, "grad_norm": 23.875, "learning_rate": 1.9963402922992658e-05, "loss": 7.4507, "step": 29060 }, { "epoch": 0.08175352451340201, "grad_norm": 27.25, "learning_rate": 1.996337774592805e-05, "loss": 7.7355, "step": 29070 }, { "epoch": 0.08178164750085072, "grad_norm": 24.375, "learning_rate": 1.9963352560221985e-05, "loss": 8.3257, "step": 29080 }, { "epoch": 0.08180977048829943, "grad_norm": 35.5, "learning_rate": 1.996332736587449e-05, "loss": 8.1861, "step": 29090 }, { "epoch": 0.08183789347574814, "grad_norm": 26.125, "learning_rate": 1.9963302162885586e-05, "loss": 7.3656, "step": 29100 }, { "epoch": 0.08186601646319686, "grad_norm": 43.5, "learning_rate": 1.9963276951255296e-05, "loss": 8.152, "step": 29110 }, { "epoch": 0.08189413945064557, "grad_norm": 30.75, "learning_rate": 1.9963251730983645e-05, "loss": 7.5156, "step": 29120 }, { "epoch": 0.08192226243809428, "grad_norm": 22.625, "learning_rate": 1.9963226502070645e-05, "loss": 8.0142, "step": 29130 }, { "epoch": 0.08195038542554299, "grad_norm": 25.625, "learning_rate": 1.9963201264516326e-05, "loss": 7.6814, "step": 29140 }, { "epoch": 0.0819785084129917, "grad_norm": 24.875, "learning_rate": 1.996317601832071e-05, "loss": 7.5147, "step": 29150 }, { "epoch": 0.0820066314004404, "grad_norm": 48.0, "learning_rate": 1.9963150763483815e-05, "loss": 8.0413, "step": 29160 }, { "epoch": 0.08203475438788911, "grad_norm": 42.5, "learning_rate": 1.9963125500005663e-05, "loss": 8.6127, "step": 29170 }, { "epoch": 0.08206287737533782, "grad_norm": 21.25, "learning_rate": 1.9963100227886278e-05, "loss": 8.3984, "step": 29180 }, { "epoch": 0.08209100036278653, "grad_norm": 31.0, "learning_rate": 1.9963074947125683e-05, "loss": 7.271, "step": 29190 }, { "epoch": 0.08211912335023525, "grad_norm": 21.125, "learning_rate": 1.9963049657723895e-05, "loss": 7.7053, "step": 29200 }, { "epoch": 0.08214724633768396, "grad_norm": 33.75, "learning_rate": 1.996302435968094e-05, "loss": 8.1535, "step": 29210 }, { "epoch": 0.08217536932513267, "grad_norm": 51.0, "learning_rate": 1.9962999052996845e-05, "loss": 7.4601, "step": 29220 }, { "epoch": 0.08220349231258138, "grad_norm": 26.25, "learning_rate": 1.996297373767162e-05, "loss": 8.2985, "step": 29230 }, { "epoch": 0.08223161530003009, "grad_norm": 42.25, "learning_rate": 1.9962948413705297e-05, "loss": 8.3043, "step": 29240 }, { "epoch": 0.0822597382874788, "grad_norm": 32.25, "learning_rate": 1.996292308109789e-05, "loss": 7.7354, "step": 29250 }, { "epoch": 0.08228786127492752, "grad_norm": 32.25, "learning_rate": 1.996289773984943e-05, "loss": 7.2548, "step": 29260 }, { "epoch": 0.08231598426237623, "grad_norm": 25.5, "learning_rate": 1.996287238995993e-05, "loss": 7.4271, "step": 29270 }, { "epoch": 0.08234410724982494, "grad_norm": 24.5, "learning_rate": 1.996284703142942e-05, "loss": 8.5957, "step": 29280 }, { "epoch": 0.08237223023727365, "grad_norm": 42.75, "learning_rate": 1.9962821664257913e-05, "loss": 7.7794, "step": 29290 }, { "epoch": 0.08240035322472236, "grad_norm": 24.625, "learning_rate": 1.9962796288445436e-05, "loss": 8.5384, "step": 29300 }, { "epoch": 0.08242847621217107, "grad_norm": 24.375, "learning_rate": 1.9962770903992013e-05, "loss": 7.4033, "step": 29310 }, { "epoch": 0.08245659919961977, "grad_norm": 25.75, "learning_rate": 1.9962745510897664e-05, "loss": 7.5881, "step": 29320 }, { "epoch": 0.08248472218706848, "grad_norm": 44.5, "learning_rate": 1.9962720109162414e-05, "loss": 8.0466, "step": 29330 }, { "epoch": 0.0825128451745172, "grad_norm": 27.5, "learning_rate": 1.9962694698786276e-05, "loss": 7.9642, "step": 29340 }, { "epoch": 0.0825409681619659, "grad_norm": 29.5, "learning_rate": 1.9962669279769283e-05, "loss": 7.4193, "step": 29350 }, { "epoch": 0.08256909114941462, "grad_norm": 58.0, "learning_rate": 1.996264385211145e-05, "loss": 7.3756, "step": 29360 }, { "epoch": 0.08259721413686333, "grad_norm": 33.0, "learning_rate": 1.99626184158128e-05, "loss": 7.6708, "step": 29370 }, { "epoch": 0.08262533712431204, "grad_norm": 39.5, "learning_rate": 1.996259297087336e-05, "loss": 7.467, "step": 29380 }, { "epoch": 0.08265346011176075, "grad_norm": 21.125, "learning_rate": 1.9962567517293145e-05, "loss": 7.5773, "step": 29390 }, { "epoch": 0.08268158309920946, "grad_norm": 31.125, "learning_rate": 1.996254205507218e-05, "loss": 7.7751, "step": 29400 }, { "epoch": 0.08270970608665817, "grad_norm": 48.0, "learning_rate": 1.996251658421049e-05, "loss": 8.136, "step": 29410 }, { "epoch": 0.08273782907410689, "grad_norm": 23.625, "learning_rate": 1.9962491104708095e-05, "loss": 7.918, "step": 29420 }, { "epoch": 0.0827659520615556, "grad_norm": 24.5, "learning_rate": 1.9962465616565015e-05, "loss": 7.8234, "step": 29430 }, { "epoch": 0.08279407504900431, "grad_norm": 26.0, "learning_rate": 1.9962440119781274e-05, "loss": 7.5626, "step": 29440 }, { "epoch": 0.08282219803645302, "grad_norm": 39.0, "learning_rate": 1.9962414614356892e-05, "loss": 7.8826, "step": 29450 }, { "epoch": 0.08285032102390173, "grad_norm": 42.75, "learning_rate": 1.9962389100291896e-05, "loss": 8.0542, "step": 29460 }, { "epoch": 0.08287844401135044, "grad_norm": 26.625, "learning_rate": 1.9962363577586304e-05, "loss": 7.6912, "step": 29470 }, { "epoch": 0.08290656699879914, "grad_norm": 30.875, "learning_rate": 1.996233804624014e-05, "loss": 7.6208, "step": 29480 }, { "epoch": 0.08293468998624785, "grad_norm": 26.875, "learning_rate": 1.9962312506253425e-05, "loss": 7.9748, "step": 29490 }, { "epoch": 0.08296281297369656, "grad_norm": 45.5, "learning_rate": 1.9962286957626182e-05, "loss": 7.7688, "step": 29500 }, { "epoch": 0.08299093596114528, "grad_norm": 45.0, "learning_rate": 1.996226140035843e-05, "loss": 7.5988, "step": 29510 }, { "epoch": 0.08301905894859399, "grad_norm": 29.0, "learning_rate": 1.9962235834450197e-05, "loss": 7.6292, "step": 29520 }, { "epoch": 0.0830471819360427, "grad_norm": 30.125, "learning_rate": 1.9962210259901503e-05, "loss": 8.3314, "step": 29530 }, { "epoch": 0.08307530492349141, "grad_norm": 27.25, "learning_rate": 1.9962184676712365e-05, "loss": 7.9886, "step": 29540 }, { "epoch": 0.08310342791094012, "grad_norm": 24.5, "learning_rate": 1.9962159084882816e-05, "loss": 8.5218, "step": 29550 }, { "epoch": 0.08313155089838883, "grad_norm": 24.5, "learning_rate": 1.9962133484412864e-05, "loss": 8.0313, "step": 29560 }, { "epoch": 0.08315967388583755, "grad_norm": 23.375, "learning_rate": 1.9962107875302542e-05, "loss": 7.5289, "step": 29570 }, { "epoch": 0.08318779687328626, "grad_norm": 52.75, "learning_rate": 1.9962082257551868e-05, "loss": 8.0753, "step": 29580 }, { "epoch": 0.08321591986073497, "grad_norm": 27.125, "learning_rate": 1.9962056631160867e-05, "loss": 7.7786, "step": 29590 }, { "epoch": 0.08324404284818368, "grad_norm": 28.625, "learning_rate": 1.9962030996129562e-05, "loss": 8.6019, "step": 29600 }, { "epoch": 0.08327216583563239, "grad_norm": 22.25, "learning_rate": 1.9962005352457968e-05, "loss": 8.1079, "step": 29610 }, { "epoch": 0.0833002888230811, "grad_norm": 31.75, "learning_rate": 1.9961979700146113e-05, "loss": 7.9685, "step": 29620 }, { "epoch": 0.08332841181052981, "grad_norm": 30.5, "learning_rate": 1.9961954039194024e-05, "loss": 7.8832, "step": 29630 }, { "epoch": 0.08335653479797853, "grad_norm": 73.5, "learning_rate": 1.996192836960171e-05, "loss": 8.3904, "step": 29640 }, { "epoch": 0.08338465778542722, "grad_norm": 35.0, "learning_rate": 1.9961902691369205e-05, "loss": 8.2105, "step": 29650 }, { "epoch": 0.08341278077287594, "grad_norm": 38.0, "learning_rate": 1.9961877004496525e-05, "loss": 7.7654, "step": 29660 }, { "epoch": 0.08344090376032465, "grad_norm": 33.75, "learning_rate": 1.9961851308983693e-05, "loss": 7.9462, "step": 29670 }, { "epoch": 0.08346902674777336, "grad_norm": 25.25, "learning_rate": 1.9961825604830735e-05, "loss": 7.859, "step": 29680 }, { "epoch": 0.08349714973522207, "grad_norm": 42.75, "learning_rate": 1.996179989203767e-05, "loss": 8.1228, "step": 29690 }, { "epoch": 0.08352527272267078, "grad_norm": 22.125, "learning_rate": 1.9961774170604522e-05, "loss": 7.3687, "step": 29700 }, { "epoch": 0.0835533957101195, "grad_norm": 31.25, "learning_rate": 1.9961748440531313e-05, "loss": 8.2964, "step": 29710 }, { "epoch": 0.0835815186975682, "grad_norm": 41.0, "learning_rate": 1.9961722701818062e-05, "loss": 7.9904, "step": 29720 }, { "epoch": 0.08360964168501692, "grad_norm": 69.0, "learning_rate": 1.9961696954464797e-05, "loss": 7.3973, "step": 29730 }, { "epoch": 0.08363776467246563, "grad_norm": 30.75, "learning_rate": 1.9961671198471535e-05, "loss": 7.7189, "step": 29740 }, { "epoch": 0.08366588765991434, "grad_norm": 35.0, "learning_rate": 1.9961645433838304e-05, "loss": 7.9342, "step": 29750 }, { "epoch": 0.08369401064736305, "grad_norm": 57.5, "learning_rate": 1.9961619660565123e-05, "loss": 8.1702, "step": 29760 }, { "epoch": 0.08372213363481176, "grad_norm": 28.75, "learning_rate": 1.996159387865201e-05, "loss": 8.2349, "step": 29770 }, { "epoch": 0.08375025662226047, "grad_norm": 33.0, "learning_rate": 1.9961568088098993e-05, "loss": 7.8651, "step": 29780 }, { "epoch": 0.08377837960970919, "grad_norm": 20.0, "learning_rate": 1.9961542288906096e-05, "loss": 7.7593, "step": 29790 }, { "epoch": 0.0838065025971579, "grad_norm": 30.125, "learning_rate": 1.9961516481073337e-05, "loss": 7.8211, "step": 29800 }, { "epoch": 0.0838346255846066, "grad_norm": 27.625, "learning_rate": 1.996149066460074e-05, "loss": 8.2812, "step": 29810 }, { "epoch": 0.0838627485720553, "grad_norm": 30.5, "learning_rate": 1.996146483948833e-05, "loss": 8.1712, "step": 29820 }, { "epoch": 0.08389087155950402, "grad_norm": 26.625, "learning_rate": 1.9961439005736122e-05, "loss": 8.0673, "step": 29830 }, { "epoch": 0.08391899454695273, "grad_norm": 33.5, "learning_rate": 1.9961413163344148e-05, "loss": 8.7466, "step": 29840 }, { "epoch": 0.08394711753440144, "grad_norm": 27.75, "learning_rate": 1.996138731231242e-05, "loss": 7.9358, "step": 29850 }, { "epoch": 0.08397524052185015, "grad_norm": 29.375, "learning_rate": 1.996136145264097e-05, "loss": 7.8069, "step": 29860 }, { "epoch": 0.08400336350929886, "grad_norm": 32.5, "learning_rate": 1.9961335584329816e-05, "loss": 7.3317, "step": 29870 }, { "epoch": 0.08403148649674758, "grad_norm": 26.125, "learning_rate": 1.9961309707378982e-05, "loss": 7.6092, "step": 29880 }, { "epoch": 0.08405960948419629, "grad_norm": 29.25, "learning_rate": 1.9961283821788487e-05, "loss": 7.823, "step": 29890 }, { "epoch": 0.084087732471645, "grad_norm": 32.5, "learning_rate": 1.996125792755836e-05, "loss": 8.023, "step": 29900 }, { "epoch": 0.08411585545909371, "grad_norm": 20.5, "learning_rate": 1.9961232024688614e-05, "loss": 8.2337, "step": 29910 }, { "epoch": 0.08414397844654242, "grad_norm": 27.0, "learning_rate": 1.996120611317928e-05, "loss": 8.0252, "step": 29920 }, { "epoch": 0.08417210143399113, "grad_norm": 23.5, "learning_rate": 1.9961180193030375e-05, "loss": 8.1497, "step": 29930 }, { "epoch": 0.08420022442143985, "grad_norm": 25.0, "learning_rate": 1.9961154264241925e-05, "loss": 8.1874, "step": 29940 }, { "epoch": 0.08422834740888856, "grad_norm": 24.125, "learning_rate": 1.9961128326813952e-05, "loss": 7.5061, "step": 29950 }, { "epoch": 0.08425647039633727, "grad_norm": 25.0, "learning_rate": 1.9961102380746477e-05, "loss": 8.493, "step": 29960 }, { "epoch": 0.08428459338378597, "grad_norm": 38.75, "learning_rate": 1.9961076426039523e-05, "loss": 7.8739, "step": 29970 }, { "epoch": 0.08431271637123468, "grad_norm": 28.5, "learning_rate": 1.9961050462693114e-05, "loss": 8.1151, "step": 29980 }, { "epoch": 0.08434083935868339, "grad_norm": 29.625, "learning_rate": 1.9961024490707267e-05, "loss": 7.6801, "step": 29990 }, { "epoch": 0.0843689623461321, "grad_norm": 32.0, "learning_rate": 1.996099851008201e-05, "loss": 7.9921, "step": 30000 }, { "epoch": 0.08439708533358081, "grad_norm": 34.25, "learning_rate": 1.9960972520817365e-05, "loss": 9.0981, "step": 30010 }, { "epoch": 0.08442520832102952, "grad_norm": 25.75, "learning_rate": 1.9960946522913357e-05, "loss": 8.2323, "step": 30020 }, { "epoch": 0.08445333130847824, "grad_norm": 28.375, "learning_rate": 1.9960920516370003e-05, "loss": 8.2221, "step": 30030 }, { "epoch": 0.08448145429592695, "grad_norm": 32.0, "learning_rate": 1.9960894501187326e-05, "loss": 8.0671, "step": 30040 }, { "epoch": 0.08450957728337566, "grad_norm": 21.875, "learning_rate": 1.996086847736535e-05, "loss": 7.8533, "step": 30050 }, { "epoch": 0.08453770027082437, "grad_norm": 36.75, "learning_rate": 1.99608424449041e-05, "loss": 7.1154, "step": 30060 }, { "epoch": 0.08456582325827308, "grad_norm": 50.0, "learning_rate": 1.99608164038036e-05, "loss": 7.6813, "step": 30070 }, { "epoch": 0.0845939462457218, "grad_norm": 28.125, "learning_rate": 1.9960790354063865e-05, "loss": 8.1946, "step": 30080 }, { "epoch": 0.0846220692331705, "grad_norm": 22.625, "learning_rate": 1.996076429568492e-05, "loss": 7.7372, "step": 30090 }, { "epoch": 0.08465019222061922, "grad_norm": 25.875, "learning_rate": 1.996073822866679e-05, "loss": 8.0794, "step": 30100 }, { "epoch": 0.08467831520806793, "grad_norm": 34.25, "learning_rate": 1.99607121530095e-05, "loss": 8.4461, "step": 30110 }, { "epoch": 0.08470643819551664, "grad_norm": 29.25, "learning_rate": 1.996068606871307e-05, "loss": 7.1612, "step": 30120 }, { "epoch": 0.08473456118296534, "grad_norm": 34.25, "learning_rate": 1.9960659975777518e-05, "loss": 8.0237, "step": 30130 }, { "epoch": 0.08476268417041405, "grad_norm": 39.5, "learning_rate": 1.9960633874202874e-05, "loss": 8.2848, "step": 30140 }, { "epoch": 0.08479080715786276, "grad_norm": 27.375, "learning_rate": 1.996060776398916e-05, "loss": 7.913, "step": 30150 }, { "epoch": 0.08481893014531147, "grad_norm": 26.75, "learning_rate": 1.996058164513639e-05, "loss": 8.4027, "step": 30160 }, { "epoch": 0.08484705313276018, "grad_norm": 84.0, "learning_rate": 1.99605555176446e-05, "loss": 7.9528, "step": 30170 }, { "epoch": 0.0848751761202089, "grad_norm": 33.0, "learning_rate": 1.99605293815138e-05, "loss": 7.6112, "step": 30180 }, { "epoch": 0.0849032991076576, "grad_norm": 26.125, "learning_rate": 1.996050323674402e-05, "loss": 6.8627, "step": 30190 }, { "epoch": 0.08493142209510632, "grad_norm": 37.0, "learning_rate": 1.996047708333528e-05, "loss": 7.7391, "step": 30200 }, { "epoch": 0.08495954508255503, "grad_norm": 27.875, "learning_rate": 1.9960450921287602e-05, "loss": 7.6925, "step": 30210 }, { "epoch": 0.08498766807000374, "grad_norm": 39.25, "learning_rate": 1.9960424750601013e-05, "loss": 8.7664, "step": 30220 }, { "epoch": 0.08501579105745245, "grad_norm": 37.25, "learning_rate": 1.996039857127553e-05, "loss": 8.1097, "step": 30230 }, { "epoch": 0.08504391404490116, "grad_norm": 30.125, "learning_rate": 1.996037238331118e-05, "loss": 8.1872, "step": 30240 }, { "epoch": 0.08507203703234988, "grad_norm": 22.75, "learning_rate": 1.9960346186707985e-05, "loss": 7.65, "step": 30250 }, { "epoch": 0.08510016001979859, "grad_norm": 26.25, "learning_rate": 1.9960319981465968e-05, "loss": 8.1697, "step": 30260 }, { "epoch": 0.0851282830072473, "grad_norm": 29.375, "learning_rate": 1.996029376758515e-05, "loss": 7.4226, "step": 30270 }, { "epoch": 0.08515640599469601, "grad_norm": 25.0, "learning_rate": 1.996026754506555e-05, "loss": 7.6463, "step": 30280 }, { "epoch": 0.08518452898214472, "grad_norm": 24.75, "learning_rate": 1.99602413139072e-05, "loss": 8.238, "step": 30290 }, { "epoch": 0.08521265196959342, "grad_norm": 37.0, "learning_rate": 1.9960215074110118e-05, "loss": 8.188, "step": 30300 }, { "epoch": 0.08524077495704213, "grad_norm": 35.75, "learning_rate": 1.9960188825674327e-05, "loss": 7.9658, "step": 30310 }, { "epoch": 0.08526889794449084, "grad_norm": 30.875, "learning_rate": 1.9960162568599846e-05, "loss": 8.2808, "step": 30320 }, { "epoch": 0.08529702093193955, "grad_norm": 27.25, "learning_rate": 1.9960136302886704e-05, "loss": 7.4087, "step": 30330 }, { "epoch": 0.08532514391938827, "grad_norm": 25.125, "learning_rate": 1.9960110028534923e-05, "loss": 7.9866, "step": 30340 }, { "epoch": 0.08535326690683698, "grad_norm": 21.75, "learning_rate": 1.996008374554452e-05, "loss": 8.1708, "step": 30350 }, { "epoch": 0.08538138989428569, "grad_norm": 41.5, "learning_rate": 1.9960057453915525e-05, "loss": 7.8881, "step": 30360 }, { "epoch": 0.0854095128817344, "grad_norm": 30.875, "learning_rate": 1.9960031153647955e-05, "loss": 7.8825, "step": 30370 }, { "epoch": 0.08543763586918311, "grad_norm": 30.875, "learning_rate": 1.9960004844741835e-05, "loss": 8.5317, "step": 30380 }, { "epoch": 0.08546575885663182, "grad_norm": 52.5, "learning_rate": 1.995997852719719e-05, "loss": 7.0384, "step": 30390 }, { "epoch": 0.08549388184408054, "grad_norm": 30.0, "learning_rate": 1.9959952201014038e-05, "loss": 7.9712, "step": 30400 }, { "epoch": 0.08552200483152925, "grad_norm": 22.125, "learning_rate": 1.9959925866192406e-05, "loss": 8.1405, "step": 30410 }, { "epoch": 0.08555012781897796, "grad_norm": 23.875, "learning_rate": 1.9959899522732318e-05, "loss": 7.697, "step": 30420 }, { "epoch": 0.08557825080642667, "grad_norm": 33.0, "learning_rate": 1.9959873170633792e-05, "loss": 7.9128, "step": 30430 }, { "epoch": 0.08560637379387538, "grad_norm": 25.25, "learning_rate": 1.9959846809896852e-05, "loss": 7.2761, "step": 30440 }, { "epoch": 0.0856344967813241, "grad_norm": 50.0, "learning_rate": 1.9959820440521524e-05, "loss": 8.0125, "step": 30450 }, { "epoch": 0.08566261976877279, "grad_norm": 22.875, "learning_rate": 1.995979406250783e-05, "loss": 7.8731, "step": 30460 }, { "epoch": 0.0856907427562215, "grad_norm": 31.25, "learning_rate": 1.995976767585579e-05, "loss": 7.641, "step": 30470 }, { "epoch": 0.08571886574367021, "grad_norm": 24.75, "learning_rate": 1.995974128056543e-05, "loss": 8.1909, "step": 30480 }, { "epoch": 0.08574698873111893, "grad_norm": 43.0, "learning_rate": 1.9959714876636772e-05, "loss": 7.9417, "step": 30490 }, { "epoch": 0.08577511171856764, "grad_norm": 28.75, "learning_rate": 1.9959688464069838e-05, "loss": 7.8041, "step": 30500 }, { "epoch": 0.08580323470601635, "grad_norm": 86.5, "learning_rate": 1.995966204286465e-05, "loss": 7.9301, "step": 30510 }, { "epoch": 0.08583135769346506, "grad_norm": 42.75, "learning_rate": 1.9959635613021234e-05, "loss": 7.9757, "step": 30520 }, { "epoch": 0.08585948068091377, "grad_norm": 37.75, "learning_rate": 1.995960917453961e-05, "loss": 7.7735, "step": 30530 }, { "epoch": 0.08588760366836248, "grad_norm": 26.375, "learning_rate": 1.9959582727419805e-05, "loss": 7.8387, "step": 30540 }, { "epoch": 0.0859157266558112, "grad_norm": 20.5, "learning_rate": 1.9959556271661834e-05, "loss": 7.6276, "step": 30550 }, { "epoch": 0.0859438496432599, "grad_norm": 27.5, "learning_rate": 1.995952980726573e-05, "loss": 8.2386, "step": 30560 }, { "epoch": 0.08597197263070862, "grad_norm": 25.5, "learning_rate": 1.9959503334231512e-05, "loss": 7.6792, "step": 30570 }, { "epoch": 0.08600009561815733, "grad_norm": 35.75, "learning_rate": 1.99594768525592e-05, "loss": 8.2926, "step": 30580 }, { "epoch": 0.08602821860560604, "grad_norm": 31.375, "learning_rate": 1.9959450362248816e-05, "loss": 7.9275, "step": 30590 }, { "epoch": 0.08605634159305475, "grad_norm": 34.75, "learning_rate": 1.9959423863300388e-05, "loss": 7.8441, "step": 30600 }, { "epoch": 0.08608446458050346, "grad_norm": 28.125, "learning_rate": 1.995939735571394e-05, "loss": 7.2813, "step": 30610 }, { "epoch": 0.08611258756795216, "grad_norm": 31.125, "learning_rate": 1.995937083948949e-05, "loss": 7.5703, "step": 30620 }, { "epoch": 0.08614071055540087, "grad_norm": 21.125, "learning_rate": 1.9959344314627062e-05, "loss": 8.4307, "step": 30630 }, { "epoch": 0.08616883354284958, "grad_norm": 28.625, "learning_rate": 1.995931778112668e-05, "loss": 7.6608, "step": 30640 }, { "epoch": 0.0861969565302983, "grad_norm": 27.75, "learning_rate": 1.9959291238988367e-05, "loss": 8.3147, "step": 30650 }, { "epoch": 0.08622507951774701, "grad_norm": 26.75, "learning_rate": 1.9959264688212144e-05, "loss": 7.1333, "step": 30660 }, { "epoch": 0.08625320250519572, "grad_norm": 37.75, "learning_rate": 1.995923812879804e-05, "loss": 7.8794, "step": 30670 }, { "epoch": 0.08628132549264443, "grad_norm": 34.25, "learning_rate": 1.995921156074607e-05, "loss": 8.2676, "step": 30680 }, { "epoch": 0.08630944848009314, "grad_norm": 34.25, "learning_rate": 1.9959184984056264e-05, "loss": 7.2158, "step": 30690 }, { "epoch": 0.08633757146754185, "grad_norm": 29.25, "learning_rate": 1.9959158398728643e-05, "loss": 7.5831, "step": 30700 }, { "epoch": 0.08636569445499057, "grad_norm": 29.75, "learning_rate": 1.9959131804763226e-05, "loss": 8.2412, "step": 30710 }, { "epoch": 0.08639381744243928, "grad_norm": 37.75, "learning_rate": 1.9959105202160046e-05, "loss": 8.1694, "step": 30720 }, { "epoch": 0.08642194042988799, "grad_norm": 33.25, "learning_rate": 1.9959078590919112e-05, "loss": 8.2763, "step": 30730 }, { "epoch": 0.0864500634173367, "grad_norm": 45.75, "learning_rate": 1.9959051971040457e-05, "loss": 7.9991, "step": 30740 }, { "epoch": 0.08647818640478541, "grad_norm": 29.125, "learning_rate": 1.99590253425241e-05, "loss": 7.7628, "step": 30750 }, { "epoch": 0.08650630939223412, "grad_norm": 36.0, "learning_rate": 1.9958998705370066e-05, "loss": 7.9506, "step": 30760 }, { "epoch": 0.08653443237968284, "grad_norm": 29.5, "learning_rate": 1.995897205957838e-05, "loss": 7.8155, "step": 30770 }, { "epoch": 0.08656255536713153, "grad_norm": 33.5, "learning_rate": 1.995894540514906e-05, "loss": 8.0788, "step": 30780 }, { "epoch": 0.08659067835458024, "grad_norm": 30.0, "learning_rate": 1.9958918742082135e-05, "loss": 8.0447, "step": 30790 }, { "epoch": 0.08661880134202896, "grad_norm": 25.125, "learning_rate": 1.9958892070377624e-05, "loss": 7.5548, "step": 30800 }, { "epoch": 0.08664692432947767, "grad_norm": 40.25, "learning_rate": 1.9958865390035552e-05, "loss": 8.4124, "step": 30810 }, { "epoch": 0.08667504731692638, "grad_norm": 53.75, "learning_rate": 1.995883870105594e-05, "loss": 8.1266, "step": 30820 }, { "epoch": 0.08670317030437509, "grad_norm": 36.25, "learning_rate": 1.9958812003438814e-05, "loss": 8.8181, "step": 30830 }, { "epoch": 0.0867312932918238, "grad_norm": 24.25, "learning_rate": 1.9958785297184195e-05, "loss": 7.6482, "step": 30840 }, { "epoch": 0.08675941627927251, "grad_norm": 35.0, "learning_rate": 1.9958758582292103e-05, "loss": 8.2511, "step": 30850 }, { "epoch": 0.08678753926672123, "grad_norm": 27.5, "learning_rate": 1.995873185876257e-05, "loss": 7.4525, "step": 30860 }, { "epoch": 0.08681566225416994, "grad_norm": 34.5, "learning_rate": 1.9958705126595613e-05, "loss": 7.4543, "step": 30870 }, { "epoch": 0.08684378524161865, "grad_norm": 49.25, "learning_rate": 1.9958678385791255e-05, "loss": 8.2882, "step": 30880 }, { "epoch": 0.08687190822906736, "grad_norm": 49.0, "learning_rate": 1.995865163634952e-05, "loss": 7.9163, "step": 30890 }, { "epoch": 0.08690003121651607, "grad_norm": 23.0, "learning_rate": 1.9958624878270433e-05, "loss": 7.9205, "step": 30900 }, { "epoch": 0.08692815420396478, "grad_norm": 34.0, "learning_rate": 1.9958598111554016e-05, "loss": 7.1666, "step": 30910 }, { "epoch": 0.0869562771914135, "grad_norm": 35.0, "learning_rate": 1.995857133620029e-05, "loss": 8.0085, "step": 30920 }, { "epoch": 0.0869844001788622, "grad_norm": 37.5, "learning_rate": 1.9958544552209282e-05, "loss": 7.6574, "step": 30930 }, { "epoch": 0.08701252316631092, "grad_norm": 43.0, "learning_rate": 1.9958517759581016e-05, "loss": 8.1662, "step": 30940 }, { "epoch": 0.08704064615375962, "grad_norm": 34.25, "learning_rate": 1.995849095831551e-05, "loss": 7.8803, "step": 30950 }, { "epoch": 0.08706876914120833, "grad_norm": 27.25, "learning_rate": 1.995846414841279e-05, "loss": 7.7706, "step": 30960 }, { "epoch": 0.08709689212865704, "grad_norm": 30.0, "learning_rate": 1.995843732987288e-05, "loss": 8.1569, "step": 30970 }, { "epoch": 0.08712501511610575, "grad_norm": 26.875, "learning_rate": 1.9958410502695805e-05, "loss": 7.9677, "step": 30980 }, { "epoch": 0.08715313810355446, "grad_norm": 30.125, "learning_rate": 1.995838366688158e-05, "loss": 7.6945, "step": 30990 }, { "epoch": 0.08718126109100317, "grad_norm": 33.25, "learning_rate": 1.995835682243024e-05, "loss": 7.3983, "step": 31000 }, { "epoch": 0.08720938407845188, "grad_norm": 27.125, "learning_rate": 1.9958329969341802e-05, "loss": 7.7148, "step": 31010 }, { "epoch": 0.0872375070659006, "grad_norm": 62.25, "learning_rate": 1.9958303107616285e-05, "loss": 8.2018, "step": 31020 }, { "epoch": 0.08726563005334931, "grad_norm": 27.625, "learning_rate": 1.995827623725372e-05, "loss": 6.7307, "step": 31030 }, { "epoch": 0.08729375304079802, "grad_norm": 21.5, "learning_rate": 1.995824935825413e-05, "loss": 8.2217, "step": 31040 }, { "epoch": 0.08732187602824673, "grad_norm": 23.625, "learning_rate": 1.995822247061753e-05, "loss": 7.5232, "step": 31050 }, { "epoch": 0.08734999901569544, "grad_norm": 29.375, "learning_rate": 1.9958195574343952e-05, "loss": 8.2332, "step": 31060 }, { "epoch": 0.08737812200314415, "grad_norm": 33.0, "learning_rate": 1.995816866943342e-05, "loss": 7.8385, "step": 31070 }, { "epoch": 0.08740624499059287, "grad_norm": 27.125, "learning_rate": 1.9958141755885948e-05, "loss": 8.4511, "step": 31080 }, { "epoch": 0.08743436797804158, "grad_norm": 22.75, "learning_rate": 1.9958114833701566e-05, "loss": 7.6975, "step": 31090 }, { "epoch": 0.08746249096549029, "grad_norm": 28.0, "learning_rate": 1.9958087902880295e-05, "loss": 8.4121, "step": 31100 }, { "epoch": 0.08749061395293899, "grad_norm": 32.25, "learning_rate": 1.9958060963422164e-05, "loss": 7.855, "step": 31110 }, { "epoch": 0.0875187369403877, "grad_norm": 23.625, "learning_rate": 1.995803401532719e-05, "loss": 7.2676, "step": 31120 }, { "epoch": 0.08754685992783641, "grad_norm": 48.0, "learning_rate": 1.9958007058595398e-05, "loss": 7.7548, "step": 31130 }, { "epoch": 0.08757498291528512, "grad_norm": 22.875, "learning_rate": 1.995798009322681e-05, "loss": 7.9743, "step": 31140 }, { "epoch": 0.08760310590273383, "grad_norm": 35.75, "learning_rate": 1.9957953119221455e-05, "loss": 8.5904, "step": 31150 }, { "epoch": 0.08763122889018254, "grad_norm": 26.625, "learning_rate": 1.9957926136579352e-05, "loss": 7.7661, "step": 31160 }, { "epoch": 0.08765935187763126, "grad_norm": 36.75, "learning_rate": 1.995789914530052e-05, "loss": 7.4335, "step": 31170 }, { "epoch": 0.08768747486507997, "grad_norm": 34.0, "learning_rate": 1.9957872145384993e-05, "loss": 7.355, "step": 31180 }, { "epoch": 0.08771559785252868, "grad_norm": 33.5, "learning_rate": 1.9957845136832786e-05, "loss": 8.0948, "step": 31190 }, { "epoch": 0.08774372083997739, "grad_norm": 42.0, "learning_rate": 1.995781811964393e-05, "loss": 8.0791, "step": 31200 }, { "epoch": 0.0877718438274261, "grad_norm": 37.0, "learning_rate": 1.9957791093818436e-05, "loss": 8.0311, "step": 31210 }, { "epoch": 0.08779996681487481, "grad_norm": 44.0, "learning_rate": 1.995776405935634e-05, "loss": 7.581, "step": 31220 }, { "epoch": 0.08782808980232353, "grad_norm": 35.75, "learning_rate": 1.995773701625766e-05, "loss": 7.9411, "step": 31230 }, { "epoch": 0.08785621278977224, "grad_norm": 42.25, "learning_rate": 1.995770996452242e-05, "loss": 8.5506, "step": 31240 }, { "epoch": 0.08788433577722095, "grad_norm": 28.5, "learning_rate": 1.995768290415064e-05, "loss": 8.3206, "step": 31250 }, { "epoch": 0.08791245876466966, "grad_norm": 29.0, "learning_rate": 1.9957655835142354e-05, "loss": 7.8897, "step": 31260 }, { "epoch": 0.08794058175211836, "grad_norm": 32.5, "learning_rate": 1.9957628757497574e-05, "loss": 7.663, "step": 31270 }, { "epoch": 0.08796870473956707, "grad_norm": 28.375, "learning_rate": 1.9957601671216327e-05, "loss": 7.0347, "step": 31280 }, { "epoch": 0.08799682772701578, "grad_norm": 29.0, "learning_rate": 1.9957574576298638e-05, "loss": 7.8589, "step": 31290 }, { "epoch": 0.08802495071446449, "grad_norm": 28.75, "learning_rate": 1.995754747274453e-05, "loss": 7.4941, "step": 31300 }, { "epoch": 0.0880530737019132, "grad_norm": 28.0, "learning_rate": 1.995752036055403e-05, "loss": 7.9891, "step": 31310 }, { "epoch": 0.08808119668936192, "grad_norm": 111.5, "learning_rate": 1.995749323972715e-05, "loss": 8.1565, "step": 31320 }, { "epoch": 0.08810931967681063, "grad_norm": 39.0, "learning_rate": 1.9957466110263927e-05, "loss": 8.0063, "step": 31330 }, { "epoch": 0.08813744266425934, "grad_norm": 46.25, "learning_rate": 1.995743897216438e-05, "loss": 8.3311, "step": 31340 }, { "epoch": 0.08816556565170805, "grad_norm": 28.875, "learning_rate": 1.9957411825428528e-05, "loss": 8.1747, "step": 31350 }, { "epoch": 0.08819368863915676, "grad_norm": 23.5, "learning_rate": 1.99573846700564e-05, "loss": 7.8104, "step": 31360 }, { "epoch": 0.08822181162660547, "grad_norm": 33.25, "learning_rate": 1.995735750604802e-05, "loss": 8.1157, "step": 31370 }, { "epoch": 0.08824993461405418, "grad_norm": 29.5, "learning_rate": 1.9957330333403405e-05, "loss": 8.3593, "step": 31380 }, { "epoch": 0.0882780576015029, "grad_norm": 28.875, "learning_rate": 1.995730315212258e-05, "loss": 7.8612, "step": 31390 }, { "epoch": 0.08830618058895161, "grad_norm": 29.5, "learning_rate": 1.995727596220558e-05, "loss": 7.9469, "step": 31400 }, { "epoch": 0.08833430357640032, "grad_norm": 49.25, "learning_rate": 1.9957248763652414e-05, "loss": 8.0123, "step": 31410 }, { "epoch": 0.08836242656384903, "grad_norm": 30.875, "learning_rate": 1.9957221556463113e-05, "loss": 7.4203, "step": 31420 }, { "epoch": 0.08839054955129773, "grad_norm": 27.25, "learning_rate": 1.9957194340637698e-05, "loss": 7.8473, "step": 31430 }, { "epoch": 0.08841867253874644, "grad_norm": 34.5, "learning_rate": 1.9957167116176195e-05, "loss": 8.434, "step": 31440 }, { "epoch": 0.08844679552619515, "grad_norm": 25.375, "learning_rate": 1.9957139883078627e-05, "loss": 8.1961, "step": 31450 }, { "epoch": 0.08847491851364386, "grad_norm": 53.0, "learning_rate": 1.9957112641345015e-05, "loss": 7.9697, "step": 31460 }, { "epoch": 0.08850304150109257, "grad_norm": 29.5, "learning_rate": 1.9957085390975387e-05, "loss": 7.2683, "step": 31470 }, { "epoch": 0.08853116448854129, "grad_norm": 22.0, "learning_rate": 1.9957058131969764e-05, "loss": 7.7129, "step": 31480 }, { "epoch": 0.08855928747599, "grad_norm": 70.0, "learning_rate": 1.9957030864328165e-05, "loss": 8.0553, "step": 31490 }, { "epoch": 0.08858741046343871, "grad_norm": 43.0, "learning_rate": 1.995700358805062e-05, "loss": 8.5803, "step": 31500 }, { "epoch": 0.08861553345088742, "grad_norm": 31.5, "learning_rate": 1.9956976303137154e-05, "loss": 7.7549, "step": 31510 }, { "epoch": 0.08864365643833613, "grad_norm": 29.375, "learning_rate": 1.9956949009587785e-05, "loss": 8.4476, "step": 31520 }, { "epoch": 0.08867177942578484, "grad_norm": 43.75, "learning_rate": 1.9956921707402543e-05, "loss": 7.8869, "step": 31530 }, { "epoch": 0.08869990241323356, "grad_norm": 32.0, "learning_rate": 1.9956894396581448e-05, "loss": 7.6557, "step": 31540 }, { "epoch": 0.08872802540068227, "grad_norm": 28.375, "learning_rate": 1.995686707712452e-05, "loss": 8.0399, "step": 31550 }, { "epoch": 0.08875614838813098, "grad_norm": 36.25, "learning_rate": 1.995683974903179e-05, "loss": 7.2351, "step": 31560 }, { "epoch": 0.08878427137557969, "grad_norm": 33.5, "learning_rate": 1.9956812412303277e-05, "loss": 7.7686, "step": 31570 }, { "epoch": 0.0888123943630284, "grad_norm": 48.25, "learning_rate": 1.9956785066939005e-05, "loss": 7.9967, "step": 31580 }, { "epoch": 0.08884051735047711, "grad_norm": 28.625, "learning_rate": 1.9956757712939e-05, "loss": 7.9237, "step": 31590 }, { "epoch": 0.08886864033792581, "grad_norm": 26.875, "learning_rate": 1.9956730350303283e-05, "loss": 7.753, "step": 31600 }, { "epoch": 0.08889676332537452, "grad_norm": 24.75, "learning_rate": 1.9956702979031882e-05, "loss": 7.8594, "step": 31610 }, { "epoch": 0.08892488631282323, "grad_norm": 23.875, "learning_rate": 1.9956675599124815e-05, "loss": 7.3744, "step": 31620 }, { "epoch": 0.08895300930027195, "grad_norm": 36.0, "learning_rate": 1.995664821058211e-05, "loss": 8.1098, "step": 31630 }, { "epoch": 0.08898113228772066, "grad_norm": 46.0, "learning_rate": 1.9956620813403787e-05, "loss": 7.8637, "step": 31640 }, { "epoch": 0.08900925527516937, "grad_norm": 36.75, "learning_rate": 1.995659340758987e-05, "loss": 7.9329, "step": 31650 }, { "epoch": 0.08903737826261808, "grad_norm": 25.125, "learning_rate": 1.9956565993140393e-05, "loss": 8.1023, "step": 31660 }, { "epoch": 0.08906550125006679, "grad_norm": 24.75, "learning_rate": 1.9956538570055367e-05, "loss": 7.3029, "step": 31670 }, { "epoch": 0.0890936242375155, "grad_norm": 31.25, "learning_rate": 1.995651113833482e-05, "loss": 7.6285, "step": 31680 }, { "epoch": 0.08912174722496422, "grad_norm": 50.25, "learning_rate": 1.995648369797878e-05, "loss": 6.8398, "step": 31690 }, { "epoch": 0.08914987021241293, "grad_norm": 43.75, "learning_rate": 1.995645624898726e-05, "loss": 7.7186, "step": 31700 }, { "epoch": 0.08917799319986164, "grad_norm": 33.75, "learning_rate": 1.9956428791360295e-05, "loss": 8.1996, "step": 31710 }, { "epoch": 0.08920611618731035, "grad_norm": 45.25, "learning_rate": 1.9956401325097907e-05, "loss": 7.9158, "step": 31720 }, { "epoch": 0.08923423917475906, "grad_norm": 34.0, "learning_rate": 1.9956373850200116e-05, "loss": 8.6522, "step": 31730 }, { "epoch": 0.08926236216220777, "grad_norm": 40.0, "learning_rate": 1.9956346366666946e-05, "loss": 7.9574, "step": 31740 }, { "epoch": 0.08929048514965648, "grad_norm": 42.5, "learning_rate": 1.995631887449842e-05, "loss": 8.4916, "step": 31750 }, { "epoch": 0.08931860813710518, "grad_norm": 31.75, "learning_rate": 1.9956291373694568e-05, "loss": 8.3419, "step": 31760 }, { "epoch": 0.0893467311245539, "grad_norm": 27.75, "learning_rate": 1.9956263864255407e-05, "loss": 8.3432, "step": 31770 }, { "epoch": 0.0893748541120026, "grad_norm": 26.375, "learning_rate": 1.9956236346180965e-05, "loss": 7.7255, "step": 31780 }, { "epoch": 0.08940297709945132, "grad_norm": 38.75, "learning_rate": 1.9956208819471267e-05, "loss": 8.3694, "step": 31790 }, { "epoch": 0.08943110008690003, "grad_norm": 29.5, "learning_rate": 1.995618128412633e-05, "loss": 7.3745, "step": 31800 }, { "epoch": 0.08945922307434874, "grad_norm": 36.0, "learning_rate": 1.9956153740146183e-05, "loss": 8.7561, "step": 31810 }, { "epoch": 0.08948734606179745, "grad_norm": 43.25, "learning_rate": 1.9956126187530853e-05, "loss": 7.9559, "step": 31820 }, { "epoch": 0.08951546904924616, "grad_norm": 35.0, "learning_rate": 1.9956098626280357e-05, "loss": 7.3742, "step": 31830 }, { "epoch": 0.08954359203669487, "grad_norm": 34.0, "learning_rate": 1.9956071056394723e-05, "loss": 7.3911, "step": 31840 }, { "epoch": 0.08957171502414359, "grad_norm": 36.75, "learning_rate": 1.9956043477873974e-05, "loss": 7.6747, "step": 31850 }, { "epoch": 0.0895998380115923, "grad_norm": 35.5, "learning_rate": 1.9956015890718134e-05, "loss": 8.5908, "step": 31860 }, { "epoch": 0.08962796099904101, "grad_norm": 31.25, "learning_rate": 1.9955988294927223e-05, "loss": 7.9108, "step": 31870 }, { "epoch": 0.08965608398648972, "grad_norm": 23.625, "learning_rate": 1.9955960690501276e-05, "loss": 7.5974, "step": 31880 }, { "epoch": 0.08968420697393843, "grad_norm": 35.25, "learning_rate": 1.9955933077440302e-05, "loss": 7.6679, "step": 31890 }, { "epoch": 0.08971232996138714, "grad_norm": 29.625, "learning_rate": 1.995590545574434e-05, "loss": 8.1325, "step": 31900 }, { "epoch": 0.08974045294883586, "grad_norm": 23.875, "learning_rate": 1.99558778254134e-05, "loss": 7.6805, "step": 31910 }, { "epoch": 0.08976857593628455, "grad_norm": 28.875, "learning_rate": 1.9955850186447513e-05, "loss": 8.2192, "step": 31920 }, { "epoch": 0.08979669892373326, "grad_norm": 32.75, "learning_rate": 1.9955822538846708e-05, "loss": 7.4486, "step": 31930 }, { "epoch": 0.08982482191118198, "grad_norm": 41.0, "learning_rate": 1.9955794882611e-05, "loss": 7.97, "step": 31940 }, { "epoch": 0.08985294489863069, "grad_norm": 30.375, "learning_rate": 1.9955767217740416e-05, "loss": 8.2425, "step": 31950 }, { "epoch": 0.0898810678860794, "grad_norm": 29.625, "learning_rate": 1.995573954423498e-05, "loss": 8.5579, "step": 31960 }, { "epoch": 0.08990919087352811, "grad_norm": 27.5, "learning_rate": 1.995571186209472e-05, "loss": 8.605, "step": 31970 }, { "epoch": 0.08993731386097682, "grad_norm": 26.875, "learning_rate": 1.9955684171319653e-05, "loss": 7.4941, "step": 31980 }, { "epoch": 0.08996543684842553, "grad_norm": 29.0, "learning_rate": 1.9955656471909806e-05, "loss": 7.0686, "step": 31990 }, { "epoch": 0.08999355983587425, "grad_norm": 27.25, "learning_rate": 1.9955628763865204e-05, "loss": 8.023, "step": 32000 }, { "epoch": 0.09002168282332296, "grad_norm": 30.375, "learning_rate": 1.9955601047185873e-05, "loss": 7.8517, "step": 32010 }, { "epoch": 0.09004980581077167, "grad_norm": 32.5, "learning_rate": 1.9955573321871834e-05, "loss": 7.2573, "step": 32020 }, { "epoch": 0.09007792879822038, "grad_norm": 20.25, "learning_rate": 1.995554558792311e-05, "loss": 7.7659, "step": 32030 }, { "epoch": 0.09010605178566909, "grad_norm": 26.5, "learning_rate": 1.995551784533973e-05, "loss": 8.111, "step": 32040 }, { "epoch": 0.0901341747731178, "grad_norm": 39.75, "learning_rate": 1.995549009412171e-05, "loss": 7.9312, "step": 32050 }, { "epoch": 0.09016229776056651, "grad_norm": 24.5, "learning_rate": 1.9955462334269083e-05, "loss": 8.0246, "step": 32060 }, { "epoch": 0.09019042074801523, "grad_norm": 28.5, "learning_rate": 1.9955434565781866e-05, "loss": 7.9773, "step": 32070 }, { "epoch": 0.09021854373546392, "grad_norm": 26.875, "learning_rate": 1.9955406788660087e-05, "loss": 8.0636, "step": 32080 }, { "epoch": 0.09024666672291264, "grad_norm": 22.875, "learning_rate": 1.995537900290377e-05, "loss": 8.456, "step": 32090 }, { "epoch": 0.09027478971036135, "grad_norm": 25.125, "learning_rate": 1.9955351208512933e-05, "loss": 7.2841, "step": 32100 }, { "epoch": 0.09030291269781006, "grad_norm": 67.0, "learning_rate": 1.9955323405487613e-05, "loss": 8.062, "step": 32110 }, { "epoch": 0.09033103568525877, "grad_norm": 40.75, "learning_rate": 1.9955295593827818e-05, "loss": 7.8764, "step": 32120 }, { "epoch": 0.09035915867270748, "grad_norm": 31.25, "learning_rate": 1.9955267773533586e-05, "loss": 8.1385, "step": 32130 }, { "epoch": 0.0903872816601562, "grad_norm": 26.5, "learning_rate": 1.9955239944604937e-05, "loss": 8.8775, "step": 32140 }, { "epoch": 0.0904154046476049, "grad_norm": 20.0, "learning_rate": 1.995521210704189e-05, "loss": 7.4645, "step": 32150 }, { "epoch": 0.09044352763505362, "grad_norm": 33.5, "learning_rate": 1.9955184260844475e-05, "loss": 7.7764, "step": 32160 }, { "epoch": 0.09047165062250233, "grad_norm": 67.0, "learning_rate": 1.9955156406012715e-05, "loss": 8.5735, "step": 32170 }, { "epoch": 0.09049977360995104, "grad_norm": 33.75, "learning_rate": 1.995512854254663e-05, "loss": 8.1308, "step": 32180 }, { "epoch": 0.09052789659739975, "grad_norm": 31.125, "learning_rate": 1.995510067044625e-05, "loss": 8.0062, "step": 32190 }, { "epoch": 0.09055601958484846, "grad_norm": 34.25, "learning_rate": 1.9955072789711597e-05, "loss": 7.9201, "step": 32200 }, { "epoch": 0.09058414257229717, "grad_norm": 36.0, "learning_rate": 1.9955044900342692e-05, "loss": 8.0047, "step": 32210 }, { "epoch": 0.09061226555974589, "grad_norm": 25.875, "learning_rate": 1.9955017002339564e-05, "loss": 7.7446, "step": 32220 }, { "epoch": 0.0906403885471946, "grad_norm": 31.5, "learning_rate": 1.9954989095702232e-05, "loss": 7.9312, "step": 32230 }, { "epoch": 0.0906685115346433, "grad_norm": 31.0, "learning_rate": 1.9954961180430728e-05, "loss": 7.9033, "step": 32240 }, { "epoch": 0.090696634522092, "grad_norm": 50.5, "learning_rate": 1.9954933256525068e-05, "loss": 7.6294, "step": 32250 }, { "epoch": 0.09072475750954072, "grad_norm": 27.875, "learning_rate": 1.9954905323985282e-05, "loss": 7.3763, "step": 32260 }, { "epoch": 0.09075288049698943, "grad_norm": 21.875, "learning_rate": 1.9954877382811392e-05, "loss": 7.7934, "step": 32270 }, { "epoch": 0.09078100348443814, "grad_norm": 23.5, "learning_rate": 1.995484943300342e-05, "loss": 8.4928, "step": 32280 }, { "epoch": 0.09080912647188685, "grad_norm": 24.5, "learning_rate": 1.9954821474561394e-05, "loss": 8.4223, "step": 32290 }, { "epoch": 0.09083724945933556, "grad_norm": 21.875, "learning_rate": 1.9954793507485338e-05, "loss": 8.5671, "step": 32300 }, { "epoch": 0.09086537244678428, "grad_norm": 27.5, "learning_rate": 1.9954765531775272e-05, "loss": 7.5427, "step": 32310 }, { "epoch": 0.09089349543423299, "grad_norm": 29.5, "learning_rate": 1.9954737547431226e-05, "loss": 7.5891, "step": 32320 }, { "epoch": 0.0909216184216817, "grad_norm": 24.25, "learning_rate": 1.995470955445322e-05, "loss": 8.3969, "step": 32330 }, { "epoch": 0.09094974140913041, "grad_norm": 25.875, "learning_rate": 1.995468155284128e-05, "loss": 7.6234, "step": 32340 }, { "epoch": 0.09097786439657912, "grad_norm": 24.25, "learning_rate": 1.995465354259543e-05, "loss": 7.8102, "step": 32350 }, { "epoch": 0.09100598738402783, "grad_norm": 25.125, "learning_rate": 1.9954625523715693e-05, "loss": 7.9179, "step": 32360 }, { "epoch": 0.09103411037147655, "grad_norm": 29.25, "learning_rate": 1.9954597496202098e-05, "loss": 8.1455, "step": 32370 }, { "epoch": 0.09106223335892526, "grad_norm": 23.875, "learning_rate": 1.9954569460054665e-05, "loss": 7.324, "step": 32380 }, { "epoch": 0.09109035634637397, "grad_norm": 26.25, "learning_rate": 1.9954541415273416e-05, "loss": 7.6981, "step": 32390 }, { "epoch": 0.09111847933382268, "grad_norm": 25.125, "learning_rate": 1.995451336185838e-05, "loss": 8.2768, "step": 32400 }, { "epoch": 0.09114660232127138, "grad_norm": 25.25, "learning_rate": 1.9954485299809584e-05, "loss": 7.4089, "step": 32410 }, { "epoch": 0.09117472530872009, "grad_norm": 34.25, "learning_rate": 1.9954457229127043e-05, "loss": 7.9298, "step": 32420 }, { "epoch": 0.0912028482961688, "grad_norm": 28.875, "learning_rate": 1.995442914981079e-05, "loss": 7.4125, "step": 32430 }, { "epoch": 0.09123097128361751, "grad_norm": 41.75, "learning_rate": 1.9954401061860845e-05, "loss": 8.3184, "step": 32440 }, { "epoch": 0.09125909427106622, "grad_norm": 29.625, "learning_rate": 1.9954372965277232e-05, "loss": 8.0944, "step": 32450 }, { "epoch": 0.09128721725851494, "grad_norm": 28.0, "learning_rate": 1.995434486005998e-05, "loss": 7.9146, "step": 32460 }, { "epoch": 0.09131534024596365, "grad_norm": 30.875, "learning_rate": 1.9954316746209108e-05, "loss": 7.5536, "step": 32470 }, { "epoch": 0.09134346323341236, "grad_norm": 24.375, "learning_rate": 1.9954288623724644e-05, "loss": 7.7397, "step": 32480 }, { "epoch": 0.09137158622086107, "grad_norm": 61.5, "learning_rate": 1.9954260492606607e-05, "loss": 8.1758, "step": 32490 }, { "epoch": 0.09139970920830978, "grad_norm": 21.125, "learning_rate": 1.9954232352855027e-05, "loss": 7.496, "step": 32500 }, { "epoch": 0.0914278321957585, "grad_norm": 37.0, "learning_rate": 1.9954204204469925e-05, "loss": 8.2438, "step": 32510 }, { "epoch": 0.0914559551832072, "grad_norm": 46.25, "learning_rate": 1.995417604745133e-05, "loss": 8.4312, "step": 32520 }, { "epoch": 0.09148407817065592, "grad_norm": 28.0, "learning_rate": 1.9954147881799267e-05, "loss": 7.7676, "step": 32530 }, { "epoch": 0.09151220115810463, "grad_norm": 37.5, "learning_rate": 1.9954119707513754e-05, "loss": 7.6462, "step": 32540 }, { "epoch": 0.09154032414555334, "grad_norm": 30.25, "learning_rate": 1.9954091524594814e-05, "loss": 8.7002, "step": 32550 }, { "epoch": 0.09156844713300205, "grad_norm": 40.75, "learning_rate": 1.995406333304248e-05, "loss": 7.591, "step": 32560 }, { "epoch": 0.09159657012045075, "grad_norm": 36.5, "learning_rate": 1.995403513285677e-05, "loss": 7.241, "step": 32570 }, { "epoch": 0.09162469310789946, "grad_norm": 28.25, "learning_rate": 1.9954006924037714e-05, "loss": 8.1166, "step": 32580 }, { "epoch": 0.09165281609534817, "grad_norm": 37.0, "learning_rate": 1.9953978706585332e-05, "loss": 7.65, "step": 32590 }, { "epoch": 0.09168093908279688, "grad_norm": 58.0, "learning_rate": 1.995395048049965e-05, "loss": 8.404, "step": 32600 }, { "epoch": 0.0917090620702456, "grad_norm": 21.625, "learning_rate": 1.995392224578069e-05, "loss": 7.1024, "step": 32610 }, { "epoch": 0.0917371850576943, "grad_norm": 36.5, "learning_rate": 1.9953894002428476e-05, "loss": 7.1803, "step": 32620 }, { "epoch": 0.09176530804514302, "grad_norm": 28.875, "learning_rate": 1.9953865750443042e-05, "loss": 8.5062, "step": 32630 }, { "epoch": 0.09179343103259173, "grad_norm": 30.375, "learning_rate": 1.99538374898244e-05, "loss": 8.2749, "step": 32640 }, { "epoch": 0.09182155402004044, "grad_norm": 17.875, "learning_rate": 1.9953809220572584e-05, "loss": 8.0172, "step": 32650 }, { "epoch": 0.09184967700748915, "grad_norm": 33.75, "learning_rate": 1.9953780942687613e-05, "loss": 8.1709, "step": 32660 }, { "epoch": 0.09187779999493786, "grad_norm": 28.25, "learning_rate": 1.9953752656169513e-05, "loss": 7.8763, "step": 32670 }, { "epoch": 0.09190592298238658, "grad_norm": 37.5, "learning_rate": 1.995372436101831e-05, "loss": 7.816, "step": 32680 }, { "epoch": 0.09193404596983529, "grad_norm": 31.125, "learning_rate": 1.9953696057234027e-05, "loss": 8.281, "step": 32690 }, { "epoch": 0.091962168957284, "grad_norm": 24.875, "learning_rate": 1.995366774481669e-05, "loss": 7.5432, "step": 32700 }, { "epoch": 0.09199029194473271, "grad_norm": 25.125, "learning_rate": 1.995363942376632e-05, "loss": 8.108, "step": 32710 }, { "epoch": 0.09201841493218142, "grad_norm": 23.125, "learning_rate": 1.9953611094082943e-05, "loss": 9.4395, "step": 32720 }, { "epoch": 0.09204653791963012, "grad_norm": 28.125, "learning_rate": 1.9953582755766586e-05, "loss": 7.9753, "step": 32730 }, { "epoch": 0.09207466090707883, "grad_norm": 24.5, "learning_rate": 1.9953554408817273e-05, "loss": 7.9057, "step": 32740 }, { "epoch": 0.09210278389452754, "grad_norm": 24.125, "learning_rate": 1.9953526053235027e-05, "loss": 6.9793, "step": 32750 }, { "epoch": 0.09213090688197625, "grad_norm": 25.875, "learning_rate": 1.9953497689019872e-05, "loss": 7.5584, "step": 32760 }, { "epoch": 0.09215902986942497, "grad_norm": 66.0, "learning_rate": 1.9953469316171836e-05, "loss": 8.645, "step": 32770 }, { "epoch": 0.09218715285687368, "grad_norm": 41.5, "learning_rate": 1.9953440934690937e-05, "loss": 8.3548, "step": 32780 }, { "epoch": 0.09221527584432239, "grad_norm": 52.25, "learning_rate": 1.995341254457721e-05, "loss": 7.9617, "step": 32790 }, { "epoch": 0.0922433988317711, "grad_norm": 26.5, "learning_rate": 1.995338414583067e-05, "loss": 7.9166, "step": 32800 }, { "epoch": 0.09227152181921981, "grad_norm": 36.0, "learning_rate": 1.9953355738451348e-05, "loss": 8.1291, "step": 32810 }, { "epoch": 0.09229964480666852, "grad_norm": 32.25, "learning_rate": 1.9953327322439264e-05, "loss": 8.5053, "step": 32820 }, { "epoch": 0.09232776779411724, "grad_norm": 21.5, "learning_rate": 1.9953298897794445e-05, "loss": 8.0581, "step": 32830 }, { "epoch": 0.09235589078156595, "grad_norm": 37.25, "learning_rate": 1.9953270464516916e-05, "loss": 8.0874, "step": 32840 }, { "epoch": 0.09238401376901466, "grad_norm": 26.125, "learning_rate": 1.9953242022606702e-05, "loss": 8.662, "step": 32850 }, { "epoch": 0.09241213675646337, "grad_norm": 41.0, "learning_rate": 1.9953213572063823e-05, "loss": 8.3718, "step": 32860 }, { "epoch": 0.09244025974391208, "grad_norm": 27.125, "learning_rate": 1.995318511288831e-05, "loss": 8.7256, "step": 32870 }, { "epoch": 0.0924683827313608, "grad_norm": 23.875, "learning_rate": 1.9953156645080184e-05, "loss": 7.5468, "step": 32880 }, { "epoch": 0.09249650571880949, "grad_norm": 26.5, "learning_rate": 1.995312816863947e-05, "loss": 7.3359, "step": 32890 }, { "epoch": 0.0925246287062582, "grad_norm": 37.25, "learning_rate": 1.9953099683566194e-05, "loss": 7.5172, "step": 32900 }, { "epoch": 0.09255275169370691, "grad_norm": 22.625, "learning_rate": 1.995307118986038e-05, "loss": 8.1141, "step": 32910 }, { "epoch": 0.09258087468115563, "grad_norm": 27.375, "learning_rate": 1.9953042687522056e-05, "loss": 7.7569, "step": 32920 }, { "epoch": 0.09260899766860434, "grad_norm": 33.25, "learning_rate": 1.995301417655124e-05, "loss": 7.5513, "step": 32930 }, { "epoch": 0.09263712065605305, "grad_norm": 27.75, "learning_rate": 1.9952985656947962e-05, "loss": 8.6064, "step": 32940 }, { "epoch": 0.09266524364350176, "grad_norm": 30.125, "learning_rate": 1.995295712871224e-05, "loss": 7.7238, "step": 32950 }, { "epoch": 0.09269336663095047, "grad_norm": 32.5, "learning_rate": 1.995292859184411e-05, "loss": 8.0163, "step": 32960 }, { "epoch": 0.09272148961839918, "grad_norm": 30.5, "learning_rate": 1.9952900046343585e-05, "loss": 7.7521, "step": 32970 }, { "epoch": 0.0927496126058479, "grad_norm": 21.875, "learning_rate": 1.99528714922107e-05, "loss": 7.9217, "step": 32980 }, { "epoch": 0.0927777355932966, "grad_norm": 35.25, "learning_rate": 1.9952842929445473e-05, "loss": 8.711, "step": 32990 }, { "epoch": 0.09280585858074532, "grad_norm": 40.25, "learning_rate": 1.9952814358047933e-05, "loss": 7.3418, "step": 33000 }, { "epoch": 0.09283398156819403, "grad_norm": 22.25, "learning_rate": 1.9952785778018097e-05, "loss": 8.1675, "step": 33010 }, { "epoch": 0.09286210455564274, "grad_norm": 41.0, "learning_rate": 1.9952757189356e-05, "loss": 7.9406, "step": 33020 }, { "epoch": 0.09289022754309145, "grad_norm": 26.25, "learning_rate": 1.995272859206166e-05, "loss": 7.7902, "step": 33030 }, { "epoch": 0.09291835053054016, "grad_norm": 27.875, "learning_rate": 1.9952699986135107e-05, "loss": 7.6033, "step": 33040 }, { "epoch": 0.09294647351798888, "grad_norm": 25.25, "learning_rate": 1.9952671371576356e-05, "loss": 7.7843, "step": 33050 }, { "epoch": 0.09297459650543757, "grad_norm": 26.25, "learning_rate": 1.9952642748385448e-05, "loss": 8.2589, "step": 33060 }, { "epoch": 0.09300271949288628, "grad_norm": 26.625, "learning_rate": 1.9952614116562394e-05, "loss": 7.9632, "step": 33070 }, { "epoch": 0.093030842480335, "grad_norm": 26.5, "learning_rate": 1.995258547610722e-05, "loss": 8.2487, "step": 33080 }, { "epoch": 0.09305896546778371, "grad_norm": 34.25, "learning_rate": 1.9952556827019956e-05, "loss": 8.5531, "step": 33090 }, { "epoch": 0.09308708845523242, "grad_norm": 34.75, "learning_rate": 1.9952528169300628e-05, "loss": 8.3147, "step": 33100 }, { "epoch": 0.09311521144268113, "grad_norm": 26.0, "learning_rate": 1.9952499502949255e-05, "loss": 7.9879, "step": 33110 }, { "epoch": 0.09314333443012984, "grad_norm": 45.25, "learning_rate": 1.9952470827965863e-05, "loss": 7.3487, "step": 33120 }, { "epoch": 0.09317145741757855, "grad_norm": 44.0, "learning_rate": 1.995244214435048e-05, "loss": 8.0107, "step": 33130 }, { "epoch": 0.09319958040502727, "grad_norm": 31.875, "learning_rate": 1.995241345210313e-05, "loss": 7.4307, "step": 33140 }, { "epoch": 0.09322770339247598, "grad_norm": 22.75, "learning_rate": 1.9952384751223836e-05, "loss": 7.8279, "step": 33150 }, { "epoch": 0.09325582637992469, "grad_norm": 27.625, "learning_rate": 1.9952356041712627e-05, "loss": 7.953, "step": 33160 }, { "epoch": 0.0932839493673734, "grad_norm": 44.5, "learning_rate": 1.9952327323569523e-05, "loss": 7.869, "step": 33170 }, { "epoch": 0.09331207235482211, "grad_norm": 31.625, "learning_rate": 1.995229859679455e-05, "loss": 8.1221, "step": 33180 }, { "epoch": 0.09334019534227082, "grad_norm": 36.25, "learning_rate": 1.9952269861387737e-05, "loss": 8.3212, "step": 33190 }, { "epoch": 0.09336831832971954, "grad_norm": 22.375, "learning_rate": 1.9952241117349104e-05, "loss": 7.9912, "step": 33200 }, { "epoch": 0.09339644131716825, "grad_norm": 35.25, "learning_rate": 1.9952212364678676e-05, "loss": 8.3282, "step": 33210 }, { "epoch": 0.09342456430461694, "grad_norm": 41.75, "learning_rate": 1.995218360337648e-05, "loss": 8.3339, "step": 33220 }, { "epoch": 0.09345268729206566, "grad_norm": 31.75, "learning_rate": 1.995215483344254e-05, "loss": 7.5689, "step": 33230 }, { "epoch": 0.09348081027951437, "grad_norm": 37.25, "learning_rate": 1.9952126054876886e-05, "loss": 7.9586, "step": 33240 }, { "epoch": 0.09350893326696308, "grad_norm": 37.0, "learning_rate": 1.9952097267679535e-05, "loss": 7.0005, "step": 33250 }, { "epoch": 0.09353705625441179, "grad_norm": 39.75, "learning_rate": 1.9952068471850516e-05, "loss": 7.9167, "step": 33260 }, { "epoch": 0.0935651792418605, "grad_norm": 22.75, "learning_rate": 1.9952039667389854e-05, "loss": 7.1487, "step": 33270 }, { "epoch": 0.09359330222930921, "grad_norm": 54.75, "learning_rate": 1.9952010854297575e-05, "loss": 8.1651, "step": 33280 }, { "epoch": 0.09362142521675793, "grad_norm": 24.75, "learning_rate": 1.99519820325737e-05, "loss": 7.9803, "step": 33290 }, { "epoch": 0.09364954820420664, "grad_norm": 30.125, "learning_rate": 1.9951953202218253e-05, "loss": 8.1252, "step": 33300 }, { "epoch": 0.09367767119165535, "grad_norm": 24.625, "learning_rate": 1.995192436323127e-05, "loss": 7.6334, "step": 33310 }, { "epoch": 0.09370579417910406, "grad_norm": 44.75, "learning_rate": 1.995189551561276e-05, "loss": 7.8594, "step": 33320 }, { "epoch": 0.09373391716655277, "grad_norm": 26.25, "learning_rate": 1.9951866659362764e-05, "loss": 7.9857, "step": 33330 }, { "epoch": 0.09376204015400148, "grad_norm": 31.5, "learning_rate": 1.9951837794481297e-05, "loss": 7.5463, "step": 33340 }, { "epoch": 0.0937901631414502, "grad_norm": 33.5, "learning_rate": 1.9951808920968386e-05, "loss": 8.3125, "step": 33350 }, { "epoch": 0.0938182861288989, "grad_norm": 36.5, "learning_rate": 1.9951780038824056e-05, "loss": 7.7852, "step": 33360 }, { "epoch": 0.09384640911634762, "grad_norm": 36.0, "learning_rate": 1.9951751148048335e-05, "loss": 8.0585, "step": 33370 }, { "epoch": 0.09387453210379632, "grad_norm": 22.5, "learning_rate": 1.9951722248641244e-05, "loss": 7.4017, "step": 33380 }, { "epoch": 0.09390265509124503, "grad_norm": 26.375, "learning_rate": 1.9951693340602812e-05, "loss": 7.4639, "step": 33390 }, { "epoch": 0.09393077807869374, "grad_norm": 29.375, "learning_rate": 1.995166442393306e-05, "loss": 7.6276, "step": 33400 }, { "epoch": 0.09395890106614245, "grad_norm": 25.625, "learning_rate": 1.9951635498632012e-05, "loss": 7.9188, "step": 33410 }, { "epoch": 0.09398702405359116, "grad_norm": 27.875, "learning_rate": 1.99516065646997e-05, "loss": 7.1325, "step": 33420 }, { "epoch": 0.09401514704103987, "grad_norm": 31.25, "learning_rate": 1.9951577622136146e-05, "loss": 7.869, "step": 33430 }, { "epoch": 0.09404327002848858, "grad_norm": 28.25, "learning_rate": 1.995154867094137e-05, "loss": 8.1573, "step": 33440 }, { "epoch": 0.0940713930159373, "grad_norm": 23.5, "learning_rate": 1.99515197111154e-05, "loss": 7.9537, "step": 33450 }, { "epoch": 0.09409951600338601, "grad_norm": 81.5, "learning_rate": 1.9951490742658267e-05, "loss": 8.169, "step": 33460 }, { "epoch": 0.09412763899083472, "grad_norm": 39.75, "learning_rate": 1.995146176556999e-05, "loss": 8.2499, "step": 33470 }, { "epoch": 0.09415576197828343, "grad_norm": 24.375, "learning_rate": 1.9951432779850598e-05, "loss": 8.3257, "step": 33480 }, { "epoch": 0.09418388496573214, "grad_norm": 29.5, "learning_rate": 1.9951403785500112e-05, "loss": 7.2528, "step": 33490 }, { "epoch": 0.09421200795318085, "grad_norm": 35.25, "learning_rate": 1.9951374782518556e-05, "loss": 8.024, "step": 33500 }, { "epoch": 0.09424013094062957, "grad_norm": 37.0, "learning_rate": 1.9951345770905962e-05, "loss": 8.3641, "step": 33510 }, { "epoch": 0.09426825392807828, "grad_norm": 44.75, "learning_rate": 1.995131675066235e-05, "loss": 8.0259, "step": 33520 }, { "epoch": 0.09429637691552699, "grad_norm": 35.0, "learning_rate": 1.9951287721787746e-05, "loss": 8.4489, "step": 33530 }, { "epoch": 0.09432449990297569, "grad_norm": 23.5, "learning_rate": 1.995125868428218e-05, "loss": 7.9927, "step": 33540 }, { "epoch": 0.0943526228904244, "grad_norm": 21.375, "learning_rate": 1.9951229638145663e-05, "loss": 7.3291, "step": 33550 }, { "epoch": 0.09438074587787311, "grad_norm": 21.25, "learning_rate": 1.9951200583378235e-05, "loss": 7.704, "step": 33560 }, { "epoch": 0.09440886886532182, "grad_norm": 64.0, "learning_rate": 1.995117151997992e-05, "loss": 7.7443, "step": 33570 }, { "epoch": 0.09443699185277053, "grad_norm": 25.875, "learning_rate": 1.9951142447950736e-05, "loss": 8.3682, "step": 33580 }, { "epoch": 0.09446511484021924, "grad_norm": 35.0, "learning_rate": 1.995111336729071e-05, "loss": 8.7455, "step": 33590 }, { "epoch": 0.09449323782766796, "grad_norm": 33.5, "learning_rate": 1.995108427799987e-05, "loss": 8.12, "step": 33600 }, { "epoch": 0.09452136081511667, "grad_norm": 25.0, "learning_rate": 1.9951055180078245e-05, "loss": 8.3369, "step": 33610 }, { "epoch": 0.09454948380256538, "grad_norm": 32.0, "learning_rate": 1.995102607352585e-05, "loss": 7.5879, "step": 33620 }, { "epoch": 0.09457760679001409, "grad_norm": 43.75, "learning_rate": 1.9950996958342717e-05, "loss": 7.9791, "step": 33630 }, { "epoch": 0.0946057297774628, "grad_norm": 30.875, "learning_rate": 1.995096783452887e-05, "loss": 8.7794, "step": 33640 }, { "epoch": 0.09463385276491151, "grad_norm": 28.0, "learning_rate": 1.9950938702084332e-05, "loss": 7.8254, "step": 33650 }, { "epoch": 0.09466197575236023, "grad_norm": 23.0, "learning_rate": 1.9950909561009132e-05, "loss": 8.8056, "step": 33660 }, { "epoch": 0.09469009873980894, "grad_norm": 40.5, "learning_rate": 1.9950880411303296e-05, "loss": 7.9687, "step": 33670 }, { "epoch": 0.09471822172725765, "grad_norm": 33.25, "learning_rate": 1.9950851252966842e-05, "loss": 8.2234, "step": 33680 }, { "epoch": 0.09474634471470636, "grad_norm": 34.0, "learning_rate": 1.9950822085999803e-05, "loss": 8.9285, "step": 33690 }, { "epoch": 0.09477446770215507, "grad_norm": 46.75, "learning_rate": 1.9950792910402203e-05, "loss": 7.8893, "step": 33700 }, { "epoch": 0.09480259068960377, "grad_norm": 50.5, "learning_rate": 1.995076372617406e-05, "loss": 7.6993, "step": 33710 }, { "epoch": 0.09483071367705248, "grad_norm": 30.75, "learning_rate": 1.995073453331541e-05, "loss": 8.126, "step": 33720 }, { "epoch": 0.09485883666450119, "grad_norm": 31.5, "learning_rate": 1.995070533182627e-05, "loss": 8.0762, "step": 33730 }, { "epoch": 0.0948869596519499, "grad_norm": 18.75, "learning_rate": 1.9950676121706674e-05, "loss": 7.4353, "step": 33740 }, { "epoch": 0.09491508263939862, "grad_norm": 50.25, "learning_rate": 1.9950646902956638e-05, "loss": 7.7661, "step": 33750 }, { "epoch": 0.09494320562684733, "grad_norm": 46.0, "learning_rate": 1.995061767557619e-05, "loss": 7.8698, "step": 33760 }, { "epoch": 0.09497132861429604, "grad_norm": 48.75, "learning_rate": 1.995058843956536e-05, "loss": 7.9607, "step": 33770 }, { "epoch": 0.09499945160174475, "grad_norm": 46.5, "learning_rate": 1.995055919492417e-05, "loss": 7.6457, "step": 33780 }, { "epoch": 0.09502757458919346, "grad_norm": 21.125, "learning_rate": 1.9950529941652647e-05, "loss": 7.9269, "step": 33790 }, { "epoch": 0.09505569757664217, "grad_norm": 34.25, "learning_rate": 1.9950500679750813e-05, "loss": 7.7117, "step": 33800 }, { "epoch": 0.09508382056409088, "grad_norm": 32.0, "learning_rate": 1.9950471409218695e-05, "loss": 8.1605, "step": 33810 }, { "epoch": 0.0951119435515396, "grad_norm": 31.5, "learning_rate": 1.9950442130056318e-05, "loss": 8.404, "step": 33820 }, { "epoch": 0.09514006653898831, "grad_norm": 34.25, "learning_rate": 1.9950412842263705e-05, "loss": 8.2777, "step": 33830 }, { "epoch": 0.09516818952643702, "grad_norm": 33.0, "learning_rate": 1.995038354584089e-05, "loss": 7.8773, "step": 33840 }, { "epoch": 0.09519631251388573, "grad_norm": 107.5, "learning_rate": 1.995035424078789e-05, "loss": 8.733, "step": 33850 }, { "epoch": 0.09522443550133444, "grad_norm": 45.5, "learning_rate": 1.9950324927104737e-05, "loss": 8.177, "step": 33860 }, { "epoch": 0.09525255848878314, "grad_norm": 48.5, "learning_rate": 1.995029560479145e-05, "loss": 8.4047, "step": 33870 }, { "epoch": 0.09528068147623185, "grad_norm": 31.875, "learning_rate": 1.9950266273848057e-05, "loss": 7.4535, "step": 33880 }, { "epoch": 0.09530880446368056, "grad_norm": 36.25, "learning_rate": 1.9950236934274582e-05, "loss": 8.4049, "step": 33890 }, { "epoch": 0.09533692745112927, "grad_norm": 41.25, "learning_rate": 1.9950207586071054e-05, "loss": 7.8892, "step": 33900 }, { "epoch": 0.09536505043857799, "grad_norm": 22.875, "learning_rate": 1.9950178229237494e-05, "loss": 7.4575, "step": 33910 }, { "epoch": 0.0953931734260267, "grad_norm": 42.75, "learning_rate": 1.9950148863773932e-05, "loss": 7.8154, "step": 33920 }, { "epoch": 0.09542129641347541, "grad_norm": 34.0, "learning_rate": 1.995011948968039e-05, "loss": 7.3913, "step": 33930 }, { "epoch": 0.09544941940092412, "grad_norm": 35.5, "learning_rate": 1.99500901069569e-05, "loss": 8.1025, "step": 33940 }, { "epoch": 0.09547754238837283, "grad_norm": 35.0, "learning_rate": 1.9950060715603478e-05, "loss": 7.7723, "step": 33950 }, { "epoch": 0.09550566537582154, "grad_norm": 25.875, "learning_rate": 1.995003131562015e-05, "loss": 8.2555, "step": 33960 }, { "epoch": 0.09553378836327026, "grad_norm": 23.875, "learning_rate": 1.9950001907006952e-05, "loss": 7.81, "step": 33970 }, { "epoch": 0.09556191135071897, "grad_norm": 40.0, "learning_rate": 1.99499724897639e-05, "loss": 8.7186, "step": 33980 }, { "epoch": 0.09559003433816768, "grad_norm": 39.0, "learning_rate": 1.9949943063891022e-05, "loss": 7.6267, "step": 33990 }, { "epoch": 0.09561815732561639, "grad_norm": 26.875, "learning_rate": 1.9949913629388346e-05, "loss": 7.2896, "step": 34000 }, { "epoch": 0.0956462803130651, "grad_norm": 28.625, "learning_rate": 1.9949884186255894e-05, "loss": 7.5311, "step": 34010 }, { "epoch": 0.09567440330051381, "grad_norm": 29.25, "learning_rate": 1.994985473449369e-05, "loss": 7.9879, "step": 34020 }, { "epoch": 0.09570252628796251, "grad_norm": 31.5, "learning_rate": 1.9949825274101768e-05, "loss": 8.336, "step": 34030 }, { "epoch": 0.09573064927541122, "grad_norm": 31.25, "learning_rate": 1.9949795805080144e-05, "loss": 8.4426, "step": 34040 }, { "epoch": 0.09575877226285993, "grad_norm": 24.75, "learning_rate": 1.9949766327428846e-05, "loss": 7.9934, "step": 34050 }, { "epoch": 0.09578689525030865, "grad_norm": 28.625, "learning_rate": 1.9949736841147906e-05, "loss": 7.1479, "step": 34060 }, { "epoch": 0.09581501823775736, "grad_norm": 23.5, "learning_rate": 1.9949707346237344e-05, "loss": 8.1959, "step": 34070 }, { "epoch": 0.09584314122520607, "grad_norm": 41.0, "learning_rate": 1.9949677842697186e-05, "loss": 8.5101, "step": 34080 }, { "epoch": 0.09587126421265478, "grad_norm": 32.75, "learning_rate": 1.9949648330527456e-05, "loss": 7.6801, "step": 34090 }, { "epoch": 0.09589938720010349, "grad_norm": 21.875, "learning_rate": 1.9949618809728182e-05, "loss": 8.6535, "step": 34100 }, { "epoch": 0.0959275101875522, "grad_norm": 27.0, "learning_rate": 1.9949589280299387e-05, "loss": 8.2399, "step": 34110 }, { "epoch": 0.09595563317500092, "grad_norm": 24.125, "learning_rate": 1.9949559742241105e-05, "loss": 7.6858, "step": 34120 }, { "epoch": 0.09598375616244963, "grad_norm": 29.0, "learning_rate": 1.994953019555335e-05, "loss": 8.0112, "step": 34130 }, { "epoch": 0.09601187914989834, "grad_norm": 31.5, "learning_rate": 1.9949500640236154e-05, "loss": 8.6509, "step": 34140 }, { "epoch": 0.09604000213734705, "grad_norm": 28.625, "learning_rate": 1.994947107628954e-05, "loss": 8.1553, "step": 34150 }, { "epoch": 0.09606812512479576, "grad_norm": 24.375, "learning_rate": 1.994944150371354e-05, "loss": 7.2941, "step": 34160 }, { "epoch": 0.09609624811224447, "grad_norm": 30.875, "learning_rate": 1.9949411922508173e-05, "loss": 7.7683, "step": 34170 }, { "epoch": 0.09612437109969318, "grad_norm": 30.625, "learning_rate": 1.9949382332673464e-05, "loss": 8.3982, "step": 34180 }, { "epoch": 0.09615249408714188, "grad_norm": 46.5, "learning_rate": 1.9949352734209444e-05, "loss": 7.7355, "step": 34190 }, { "epoch": 0.0961806170745906, "grad_norm": 23.125, "learning_rate": 1.9949323127116137e-05, "loss": 8.6584, "step": 34200 }, { "epoch": 0.0962087400620393, "grad_norm": 29.25, "learning_rate": 1.9949293511393563e-05, "loss": 7.6814, "step": 34210 }, { "epoch": 0.09623686304948802, "grad_norm": 23.125, "learning_rate": 1.9949263887041753e-05, "loss": 7.7171, "step": 34220 }, { "epoch": 0.09626498603693673, "grad_norm": 20.5, "learning_rate": 1.9949234254060738e-05, "loss": 7.921, "step": 34230 }, { "epoch": 0.09629310902438544, "grad_norm": 22.5, "learning_rate": 1.994920461245053e-05, "loss": 8.018, "step": 34240 }, { "epoch": 0.09632123201183415, "grad_norm": 47.25, "learning_rate": 1.9949174962211167e-05, "loss": 8.8086, "step": 34250 }, { "epoch": 0.09634935499928286, "grad_norm": 25.0, "learning_rate": 1.9949145303342666e-05, "loss": 8.3242, "step": 34260 }, { "epoch": 0.09637747798673157, "grad_norm": 26.0, "learning_rate": 1.994911563584506e-05, "loss": 8.0383, "step": 34270 }, { "epoch": 0.09640560097418029, "grad_norm": 29.75, "learning_rate": 1.994908595971837e-05, "loss": 7.8968, "step": 34280 }, { "epoch": 0.096433723961629, "grad_norm": 39.0, "learning_rate": 1.9949056274962627e-05, "loss": 7.9171, "step": 34290 }, { "epoch": 0.09646184694907771, "grad_norm": 34.25, "learning_rate": 1.994902658157785e-05, "loss": 8.4294, "step": 34300 }, { "epoch": 0.09648996993652642, "grad_norm": 17.0, "learning_rate": 1.9948996879564068e-05, "loss": 9.1109, "step": 34310 }, { "epoch": 0.09651809292397513, "grad_norm": 28.375, "learning_rate": 1.9948967168921305e-05, "loss": 7.9873, "step": 34320 }, { "epoch": 0.09654621591142384, "grad_norm": 23.75, "learning_rate": 1.994893744964959e-05, "loss": 7.8537, "step": 34330 }, { "epoch": 0.09657433889887256, "grad_norm": 24.5, "learning_rate": 1.9948907721748948e-05, "loss": 8.1767, "step": 34340 }, { "epoch": 0.09660246188632127, "grad_norm": 35.25, "learning_rate": 1.99488779852194e-05, "loss": 7.831, "step": 34350 }, { "epoch": 0.09663058487376996, "grad_norm": 37.0, "learning_rate": 1.994884824006098e-05, "loss": 7.8766, "step": 34360 }, { "epoch": 0.09665870786121868, "grad_norm": 30.0, "learning_rate": 1.9948818486273708e-05, "loss": 8.0223, "step": 34370 }, { "epoch": 0.09668683084866739, "grad_norm": 30.5, "learning_rate": 1.994878872385761e-05, "loss": 7.6153, "step": 34380 }, { "epoch": 0.0967149538361161, "grad_norm": 30.25, "learning_rate": 1.9948758952812715e-05, "loss": 8.1448, "step": 34390 }, { "epoch": 0.09674307682356481, "grad_norm": 30.625, "learning_rate": 1.9948729173139044e-05, "loss": 7.7981, "step": 34400 }, { "epoch": 0.09677119981101352, "grad_norm": 23.0, "learning_rate": 1.994869938483663e-05, "loss": 7.8277, "step": 34410 }, { "epoch": 0.09679932279846223, "grad_norm": 36.5, "learning_rate": 1.994866958790549e-05, "loss": 8.1864, "step": 34420 }, { "epoch": 0.09682744578591095, "grad_norm": 30.625, "learning_rate": 1.9948639782345655e-05, "loss": 7.3568, "step": 34430 }, { "epoch": 0.09685556877335966, "grad_norm": 35.0, "learning_rate": 1.9948609968157153e-05, "loss": 8.401, "step": 34440 }, { "epoch": 0.09688369176080837, "grad_norm": 25.75, "learning_rate": 1.9948580145340008e-05, "loss": 7.9404, "step": 34450 }, { "epoch": 0.09691181474825708, "grad_norm": 27.625, "learning_rate": 1.994855031389424e-05, "loss": 7.6216, "step": 34460 }, { "epoch": 0.09693993773570579, "grad_norm": 28.875, "learning_rate": 1.9948520473819882e-05, "loss": 8.0419, "step": 34470 }, { "epoch": 0.0969680607231545, "grad_norm": 26.5, "learning_rate": 1.994849062511696e-05, "loss": 7.6162, "step": 34480 }, { "epoch": 0.09699618371060321, "grad_norm": 36.25, "learning_rate": 1.9948460767785494e-05, "loss": 7.4739, "step": 34490 }, { "epoch": 0.09702430669805193, "grad_norm": 28.0, "learning_rate": 1.994843090182552e-05, "loss": 8.4236, "step": 34500 }, { "epoch": 0.09705242968550064, "grad_norm": 39.75, "learning_rate": 1.994840102723705e-05, "loss": 8.691, "step": 34510 }, { "epoch": 0.09708055267294934, "grad_norm": 29.75, "learning_rate": 1.994837114402012e-05, "loss": 7.315, "step": 34520 }, { "epoch": 0.09710867566039805, "grad_norm": 38.25, "learning_rate": 1.994834125217475e-05, "loss": 7.9799, "step": 34530 }, { "epoch": 0.09713679864784676, "grad_norm": 35.5, "learning_rate": 1.9948311351700975e-05, "loss": 7.4159, "step": 34540 }, { "epoch": 0.09716492163529547, "grad_norm": 39.0, "learning_rate": 1.9948281442598813e-05, "loss": 8.1024, "step": 34550 }, { "epoch": 0.09719304462274418, "grad_norm": 39.0, "learning_rate": 1.9948251524868292e-05, "loss": 7.6426, "step": 34560 }, { "epoch": 0.0972211676101929, "grad_norm": 24.5, "learning_rate": 1.9948221598509436e-05, "loss": 7.7878, "step": 34570 }, { "epoch": 0.0972492905976416, "grad_norm": 34.25, "learning_rate": 1.9948191663522275e-05, "loss": 7.7768, "step": 34580 }, { "epoch": 0.09727741358509032, "grad_norm": 28.75, "learning_rate": 1.994816171990683e-05, "loss": 7.858, "step": 34590 }, { "epoch": 0.09730553657253903, "grad_norm": 27.375, "learning_rate": 1.9948131767663136e-05, "loss": 8.1617, "step": 34600 }, { "epoch": 0.09733365955998774, "grad_norm": 27.75, "learning_rate": 1.9948101806791207e-05, "loss": 7.07, "step": 34610 }, { "epoch": 0.09736178254743645, "grad_norm": 45.25, "learning_rate": 1.9948071837291076e-05, "loss": 8.3011, "step": 34620 }, { "epoch": 0.09738990553488516, "grad_norm": 28.5, "learning_rate": 1.994804185916277e-05, "loss": 7.3544, "step": 34630 }, { "epoch": 0.09741802852233387, "grad_norm": 26.25, "learning_rate": 1.994801187240631e-05, "loss": 8.1194, "step": 34640 }, { "epoch": 0.09744615150978259, "grad_norm": 27.25, "learning_rate": 1.9947981877021726e-05, "loss": 8.4025, "step": 34650 }, { "epoch": 0.0974742744972313, "grad_norm": 27.625, "learning_rate": 1.9947951873009044e-05, "loss": 8.092, "step": 34660 }, { "epoch": 0.09750239748468001, "grad_norm": 23.125, "learning_rate": 1.994792186036829e-05, "loss": 8.1229, "step": 34670 }, { "epoch": 0.0975305204721287, "grad_norm": 36.25, "learning_rate": 1.9947891839099485e-05, "loss": 8.4437, "step": 34680 }, { "epoch": 0.09755864345957742, "grad_norm": 76.0, "learning_rate": 1.994786180920266e-05, "loss": 8.7042, "step": 34690 }, { "epoch": 0.09758676644702613, "grad_norm": 28.625, "learning_rate": 1.994783177067784e-05, "loss": 8.337, "step": 34700 }, { "epoch": 0.09761488943447484, "grad_norm": 27.0, "learning_rate": 1.9947801723525052e-05, "loss": 7.2786, "step": 34710 }, { "epoch": 0.09764301242192355, "grad_norm": 26.25, "learning_rate": 1.9947771667744322e-05, "loss": 8.52, "step": 34720 }, { "epoch": 0.09767113540937226, "grad_norm": 22.875, "learning_rate": 1.9947741603335674e-05, "loss": 7.8198, "step": 34730 }, { "epoch": 0.09769925839682098, "grad_norm": 32.75, "learning_rate": 1.9947711530299134e-05, "loss": 7.4021, "step": 34740 }, { "epoch": 0.09772738138426969, "grad_norm": 55.25, "learning_rate": 1.994768144863473e-05, "loss": 7.9327, "step": 34750 }, { "epoch": 0.0977555043717184, "grad_norm": 56.5, "learning_rate": 1.994765135834249e-05, "loss": 8.3904, "step": 34760 }, { "epoch": 0.09778362735916711, "grad_norm": 32.25, "learning_rate": 1.9947621259422437e-05, "loss": 8.3421, "step": 34770 }, { "epoch": 0.09781175034661582, "grad_norm": 33.75, "learning_rate": 1.9947591151874594e-05, "loss": 8.0124, "step": 34780 }, { "epoch": 0.09783987333406453, "grad_norm": 35.0, "learning_rate": 1.9947561035698995e-05, "loss": 8.6144, "step": 34790 }, { "epoch": 0.09786799632151325, "grad_norm": 30.0, "learning_rate": 1.994753091089566e-05, "loss": 7.5272, "step": 34800 }, { "epoch": 0.09789611930896196, "grad_norm": 26.75, "learning_rate": 1.9947500777464615e-05, "loss": 7.8444, "step": 34810 }, { "epoch": 0.09792424229641067, "grad_norm": 29.5, "learning_rate": 1.9947470635405892e-05, "loss": 7.2193, "step": 34820 }, { "epoch": 0.09795236528385938, "grad_norm": 28.875, "learning_rate": 1.9947440484719508e-05, "loss": 8.3753, "step": 34830 }, { "epoch": 0.09798048827130808, "grad_norm": 43.75, "learning_rate": 1.99474103254055e-05, "loss": 7.7153, "step": 34840 }, { "epoch": 0.09800861125875679, "grad_norm": 38.0, "learning_rate": 1.9947380157463886e-05, "loss": 8.1989, "step": 34850 }, { "epoch": 0.0980367342462055, "grad_norm": 28.25, "learning_rate": 1.9947349980894696e-05, "loss": 8.2828, "step": 34860 }, { "epoch": 0.09806485723365421, "grad_norm": 27.5, "learning_rate": 1.9947319795697953e-05, "loss": 7.9424, "step": 34870 }, { "epoch": 0.09809298022110292, "grad_norm": 27.0, "learning_rate": 1.9947289601873685e-05, "loss": 7.9443, "step": 34880 }, { "epoch": 0.09812110320855164, "grad_norm": 34.25, "learning_rate": 1.994725939942192e-05, "loss": 7.2726, "step": 34890 }, { "epoch": 0.09814922619600035, "grad_norm": 28.25, "learning_rate": 1.994722918834268e-05, "loss": 8.2475, "step": 34900 }, { "epoch": 0.09817734918344906, "grad_norm": 26.875, "learning_rate": 1.9947198968635995e-05, "loss": 8.0215, "step": 34910 }, { "epoch": 0.09820547217089777, "grad_norm": 22.5, "learning_rate": 1.9947168740301893e-05, "loss": 8.3231, "step": 34920 }, { "epoch": 0.09823359515834648, "grad_norm": 36.25, "learning_rate": 1.9947138503340392e-05, "loss": 7.8721, "step": 34930 }, { "epoch": 0.0982617181457952, "grad_norm": 40.0, "learning_rate": 1.9947108257751526e-05, "loss": 7.9962, "step": 34940 }, { "epoch": 0.0982898411332439, "grad_norm": 52.5, "learning_rate": 1.9947078003535316e-05, "loss": 7.9704, "step": 34950 }, { "epoch": 0.09831796412069262, "grad_norm": 21.0, "learning_rate": 1.9947047740691795e-05, "loss": 7.8284, "step": 34960 }, { "epoch": 0.09834608710814133, "grad_norm": 35.25, "learning_rate": 1.9947017469220983e-05, "loss": 7.5414, "step": 34970 }, { "epoch": 0.09837421009559004, "grad_norm": 23.875, "learning_rate": 1.994698718912291e-05, "loss": 7.8494, "step": 34980 }, { "epoch": 0.09840233308303875, "grad_norm": 33.0, "learning_rate": 1.99469569003976e-05, "loss": 8.441, "step": 34990 }, { "epoch": 0.09843045607048746, "grad_norm": 21.0, "learning_rate": 1.9946926603045074e-05, "loss": 6.9415, "step": 35000 }, { "epoch": 0.09845857905793616, "grad_norm": 23.0, "learning_rate": 1.994689629706537e-05, "loss": 7.4531, "step": 35010 }, { "epoch": 0.09848670204538487, "grad_norm": 27.375, "learning_rate": 1.9946865982458507e-05, "loss": 7.7361, "step": 35020 }, { "epoch": 0.09851482503283358, "grad_norm": 25.75, "learning_rate": 1.9946835659224512e-05, "loss": 8.5667, "step": 35030 }, { "epoch": 0.0985429480202823, "grad_norm": 38.0, "learning_rate": 1.994680532736341e-05, "loss": 7.8589, "step": 35040 }, { "epoch": 0.098571071007731, "grad_norm": 32.25, "learning_rate": 1.9946774986875236e-05, "loss": 7.6877, "step": 35050 }, { "epoch": 0.09859919399517972, "grad_norm": 24.625, "learning_rate": 1.9946744637760004e-05, "loss": 7.9455, "step": 35060 }, { "epoch": 0.09862731698262843, "grad_norm": 29.75, "learning_rate": 1.994671428001775e-05, "loss": 8.1593, "step": 35070 }, { "epoch": 0.09865543997007714, "grad_norm": 29.5, "learning_rate": 1.9946683913648494e-05, "loss": 7.8354, "step": 35080 }, { "epoch": 0.09868356295752585, "grad_norm": 28.75, "learning_rate": 1.9946653538652264e-05, "loss": 7.6561, "step": 35090 }, { "epoch": 0.09871168594497456, "grad_norm": 36.0, "learning_rate": 1.994662315502909e-05, "loss": 7.7363, "step": 35100 }, { "epoch": 0.09873980893242328, "grad_norm": 25.5, "learning_rate": 1.9946592762778988e-05, "loss": 7.9574, "step": 35110 }, { "epoch": 0.09876793191987199, "grad_norm": 23.625, "learning_rate": 1.9946562361902e-05, "loss": 8.0755, "step": 35120 }, { "epoch": 0.0987960549073207, "grad_norm": 24.5, "learning_rate": 1.9946531952398138e-05, "loss": 7.5878, "step": 35130 }, { "epoch": 0.09882417789476941, "grad_norm": 27.875, "learning_rate": 1.9946501534267436e-05, "loss": 7.3152, "step": 35140 }, { "epoch": 0.09885230088221812, "grad_norm": 30.0, "learning_rate": 1.994647110750992e-05, "loss": 7.9381, "step": 35150 }, { "epoch": 0.09888042386966683, "grad_norm": 30.125, "learning_rate": 1.9946440672125616e-05, "loss": 7.6638, "step": 35160 }, { "epoch": 0.09890854685711553, "grad_norm": 22.5, "learning_rate": 1.9946410228114548e-05, "loss": 7.8539, "step": 35170 }, { "epoch": 0.09893666984456424, "grad_norm": 31.125, "learning_rate": 1.9946379775476744e-05, "loss": 7.1999, "step": 35180 }, { "epoch": 0.09896479283201295, "grad_norm": 20.75, "learning_rate": 1.994634931421223e-05, "loss": 7.997, "step": 35190 }, { "epoch": 0.09899291581946167, "grad_norm": 43.5, "learning_rate": 1.9946318844321034e-05, "loss": 8.3306, "step": 35200 }, { "epoch": 0.09902103880691038, "grad_norm": 31.875, "learning_rate": 1.9946288365803182e-05, "loss": 7.5004, "step": 35210 }, { "epoch": 0.09904916179435909, "grad_norm": 22.25, "learning_rate": 1.9946257878658696e-05, "loss": 8.2654, "step": 35220 }, { "epoch": 0.0990772847818078, "grad_norm": 38.75, "learning_rate": 1.994622738288761e-05, "loss": 8.0553, "step": 35230 }, { "epoch": 0.09910540776925651, "grad_norm": 31.625, "learning_rate": 1.9946196878489946e-05, "loss": 7.9849, "step": 35240 }, { "epoch": 0.09913353075670522, "grad_norm": 23.25, "learning_rate": 1.9946166365465727e-05, "loss": 8.2555, "step": 35250 }, { "epoch": 0.09916165374415394, "grad_norm": 30.75, "learning_rate": 1.9946135843814985e-05, "loss": 8.1368, "step": 35260 }, { "epoch": 0.09918977673160265, "grad_norm": 26.0, "learning_rate": 1.994610531353775e-05, "loss": 8.1076, "step": 35270 }, { "epoch": 0.09921789971905136, "grad_norm": 28.875, "learning_rate": 1.994607477463404e-05, "loss": 7.6869, "step": 35280 }, { "epoch": 0.09924602270650007, "grad_norm": 31.75, "learning_rate": 1.9946044227103884e-05, "loss": 7.9467, "step": 35290 }, { "epoch": 0.09927414569394878, "grad_norm": 44.75, "learning_rate": 1.994601367094731e-05, "loss": 8.0237, "step": 35300 }, { "epoch": 0.0993022686813975, "grad_norm": 41.0, "learning_rate": 1.9945983106164343e-05, "loss": 8.5538, "step": 35310 }, { "epoch": 0.0993303916688462, "grad_norm": 33.0, "learning_rate": 1.9945952532755013e-05, "loss": 7.9339, "step": 35320 }, { "epoch": 0.0993585146562949, "grad_norm": 45.0, "learning_rate": 1.9945921950719344e-05, "loss": 7.7516, "step": 35330 }, { "epoch": 0.09938663764374361, "grad_norm": 24.125, "learning_rate": 1.994589136005736e-05, "loss": 7.5385, "step": 35340 }, { "epoch": 0.09941476063119233, "grad_norm": 21.0, "learning_rate": 1.9945860760769088e-05, "loss": 7.6339, "step": 35350 }, { "epoch": 0.09944288361864104, "grad_norm": 26.875, "learning_rate": 1.9945830152854563e-05, "loss": 8.0207, "step": 35360 }, { "epoch": 0.09947100660608975, "grad_norm": 28.875, "learning_rate": 1.99457995363138e-05, "loss": 7.8396, "step": 35370 }, { "epoch": 0.09949912959353846, "grad_norm": 34.25, "learning_rate": 1.9945768911146833e-05, "loss": 8.5064, "step": 35380 }, { "epoch": 0.09952725258098717, "grad_norm": 23.5, "learning_rate": 1.9945738277353683e-05, "loss": 8.0252, "step": 35390 }, { "epoch": 0.09955537556843588, "grad_norm": 25.5, "learning_rate": 1.9945707634934383e-05, "loss": 8.0851, "step": 35400 }, { "epoch": 0.0995834985558846, "grad_norm": 21.125, "learning_rate": 1.9945676983888957e-05, "loss": 8.1571, "step": 35410 }, { "epoch": 0.0996116215433333, "grad_norm": 23.5, "learning_rate": 1.994564632421743e-05, "loss": 7.6056, "step": 35420 }, { "epoch": 0.09963974453078202, "grad_norm": 29.5, "learning_rate": 1.994561565591983e-05, "loss": 8.536, "step": 35430 }, { "epoch": 0.09966786751823073, "grad_norm": 37.0, "learning_rate": 1.9945584978996178e-05, "loss": 8.1186, "step": 35440 }, { "epoch": 0.09969599050567944, "grad_norm": 25.0, "learning_rate": 1.9945554293446513e-05, "loss": 8.1453, "step": 35450 }, { "epoch": 0.09972411349312815, "grad_norm": 22.125, "learning_rate": 1.9945523599270847e-05, "loss": 7.6347, "step": 35460 }, { "epoch": 0.09975223648057686, "grad_norm": 28.5, "learning_rate": 1.994549289646922e-05, "loss": 7.6964, "step": 35470 }, { "epoch": 0.09978035946802558, "grad_norm": 32.0, "learning_rate": 1.9945462185041652e-05, "loss": 8.6327, "step": 35480 }, { "epoch": 0.09980848245547427, "grad_norm": 23.0, "learning_rate": 1.994543146498817e-05, "loss": 8.5013, "step": 35490 }, { "epoch": 0.09983660544292298, "grad_norm": 23.125, "learning_rate": 1.9945400736308797e-05, "loss": 7.8948, "step": 35500 }, { "epoch": 0.0998647284303717, "grad_norm": 22.875, "learning_rate": 1.994536999900357e-05, "loss": 8.0567, "step": 35510 }, { "epoch": 0.09989285141782041, "grad_norm": 30.875, "learning_rate": 1.9945339253072503e-05, "loss": 7.4077, "step": 35520 }, { "epoch": 0.09992097440526912, "grad_norm": 62.25, "learning_rate": 1.9945308498515633e-05, "loss": 7.7931, "step": 35530 }, { "epoch": 0.09994909739271783, "grad_norm": 38.5, "learning_rate": 1.994527773533298e-05, "loss": 7.4245, "step": 35540 }, { "epoch": 0.09997722038016654, "grad_norm": 75.0, "learning_rate": 1.9945246963524575e-05, "loss": 8.6513, "step": 35550 }, { "epoch": 0.10000534336761525, "grad_norm": 20.25, "learning_rate": 1.9945216183090438e-05, "loss": 8.1036, "step": 35560 }, { "epoch": 0.10003346635506397, "grad_norm": 42.5, "learning_rate": 1.994518539403061e-05, "loss": 8.4752, "step": 35570 }, { "epoch": 0.10006158934251268, "grad_norm": 21.75, "learning_rate": 1.99451545963451e-05, "loss": 8.1236, "step": 35580 }, { "epoch": 0.10008971232996139, "grad_norm": 29.125, "learning_rate": 1.9945123790033945e-05, "loss": 8.6495, "step": 35590 }, { "epoch": 0.1001178353174101, "grad_norm": 21.5, "learning_rate": 1.994509297509717e-05, "loss": 7.7275, "step": 35600 }, { "epoch": 0.10014595830485881, "grad_norm": 25.875, "learning_rate": 1.9945062151534803e-05, "loss": 7.8538, "step": 35610 }, { "epoch": 0.10017408129230752, "grad_norm": 21.375, "learning_rate": 1.994503131934687e-05, "loss": 8.1354, "step": 35620 }, { "epoch": 0.10020220427975624, "grad_norm": 39.25, "learning_rate": 1.9945000478533394e-05, "loss": 7.7368, "step": 35630 }, { "epoch": 0.10023032726720495, "grad_norm": 43.25, "learning_rate": 1.9944969629094407e-05, "loss": 8.6796, "step": 35640 }, { "epoch": 0.10025845025465366, "grad_norm": 60.0, "learning_rate": 1.9944938771029928e-05, "loss": 7.421, "step": 35650 }, { "epoch": 0.10028657324210236, "grad_norm": 39.5, "learning_rate": 1.9944907904339997e-05, "loss": 7.9019, "step": 35660 }, { "epoch": 0.10031469622955107, "grad_norm": 23.75, "learning_rate": 1.994487702902463e-05, "loss": 7.9089, "step": 35670 }, { "epoch": 0.10034281921699978, "grad_norm": 27.125, "learning_rate": 1.9944846145083857e-05, "loss": 7.4266, "step": 35680 }, { "epoch": 0.10037094220444849, "grad_norm": 25.875, "learning_rate": 1.9944815252517703e-05, "loss": 8.6505, "step": 35690 }, { "epoch": 0.1003990651918972, "grad_norm": 34.25, "learning_rate": 1.9944784351326195e-05, "loss": 7.5449, "step": 35700 }, { "epoch": 0.10042718817934591, "grad_norm": 31.875, "learning_rate": 1.9944753441509365e-05, "loss": 7.5257, "step": 35710 }, { "epoch": 0.10045531116679463, "grad_norm": 39.25, "learning_rate": 1.9944722523067232e-05, "loss": 8.2313, "step": 35720 }, { "epoch": 0.10048343415424334, "grad_norm": 30.875, "learning_rate": 1.994469159599983e-05, "loss": 7.1398, "step": 35730 }, { "epoch": 0.10051155714169205, "grad_norm": 33.75, "learning_rate": 1.994466066030718e-05, "loss": 8.1076, "step": 35740 }, { "epoch": 0.10053968012914076, "grad_norm": 29.75, "learning_rate": 1.9944629715989315e-05, "loss": 7.6846, "step": 35750 }, { "epoch": 0.10056780311658947, "grad_norm": 30.5, "learning_rate": 1.9944598763046256e-05, "loss": 7.6026, "step": 35760 }, { "epoch": 0.10059592610403818, "grad_norm": 36.0, "learning_rate": 1.994456780147803e-05, "loss": 7.1352, "step": 35770 }, { "epoch": 0.1006240490914869, "grad_norm": 33.75, "learning_rate": 1.9944536831284667e-05, "loss": 7.9691, "step": 35780 }, { "epoch": 0.1006521720789356, "grad_norm": 28.875, "learning_rate": 1.9944505852466195e-05, "loss": 8.5828, "step": 35790 }, { "epoch": 0.10068029506638432, "grad_norm": 45.5, "learning_rate": 1.9944474865022637e-05, "loss": 7.9256, "step": 35800 }, { "epoch": 0.10070841805383303, "grad_norm": 28.25, "learning_rate": 1.9944443868954024e-05, "loss": 7.6761, "step": 35810 }, { "epoch": 0.10073654104128173, "grad_norm": 43.5, "learning_rate": 1.994441286426038e-05, "loss": 8.0824, "step": 35820 }, { "epoch": 0.10076466402873044, "grad_norm": 30.5, "learning_rate": 1.994438185094173e-05, "loss": 8.0222, "step": 35830 }, { "epoch": 0.10079278701617915, "grad_norm": 25.625, "learning_rate": 1.9944350828998106e-05, "loss": 8.3097, "step": 35840 }, { "epoch": 0.10082091000362786, "grad_norm": 47.75, "learning_rate": 1.994431979842953e-05, "loss": 7.1629, "step": 35850 }, { "epoch": 0.10084903299107657, "grad_norm": 47.0, "learning_rate": 1.994428875923603e-05, "loss": 8.1664, "step": 35860 }, { "epoch": 0.10087715597852528, "grad_norm": 46.0, "learning_rate": 1.9944257711417637e-05, "loss": 8.706, "step": 35870 }, { "epoch": 0.100905278965974, "grad_norm": 29.25, "learning_rate": 1.994422665497437e-05, "loss": 7.677, "step": 35880 }, { "epoch": 0.10093340195342271, "grad_norm": 46.25, "learning_rate": 1.9944195589906265e-05, "loss": 8.384, "step": 35890 }, { "epoch": 0.10096152494087142, "grad_norm": 31.125, "learning_rate": 1.9944164516213343e-05, "loss": 8.1885, "step": 35900 }, { "epoch": 0.10098964792832013, "grad_norm": 28.25, "learning_rate": 1.9944133433895634e-05, "loss": 8.0183, "step": 35910 }, { "epoch": 0.10101777091576884, "grad_norm": 28.5, "learning_rate": 1.9944102342953164e-05, "loss": 8.1459, "step": 35920 }, { "epoch": 0.10104589390321755, "grad_norm": 54.5, "learning_rate": 1.9944071243385958e-05, "loss": 8.0077, "step": 35930 }, { "epoch": 0.10107401689066627, "grad_norm": 30.5, "learning_rate": 1.9944040135194048e-05, "loss": 8.3934, "step": 35940 }, { "epoch": 0.10110213987811498, "grad_norm": 29.625, "learning_rate": 1.9944009018377453e-05, "loss": 8.2555, "step": 35950 }, { "epoch": 0.10113026286556369, "grad_norm": 25.5, "learning_rate": 1.9943977892936205e-05, "loss": 7.8803, "step": 35960 }, { "epoch": 0.1011583858530124, "grad_norm": 40.25, "learning_rate": 1.9943946758870332e-05, "loss": 8.1515, "step": 35970 }, { "epoch": 0.1011865088404611, "grad_norm": 28.375, "learning_rate": 1.9943915616179862e-05, "loss": 7.8195, "step": 35980 }, { "epoch": 0.10121463182790981, "grad_norm": 22.25, "learning_rate": 1.9943884464864814e-05, "loss": 8.0787, "step": 35990 }, { "epoch": 0.10124275481535852, "grad_norm": 30.75, "learning_rate": 1.9943853304925226e-05, "loss": 7.0991, "step": 36000 }, { "epoch": 0.10127087780280723, "grad_norm": 30.75, "learning_rate": 1.994382213636112e-05, "loss": 7.5796, "step": 36010 }, { "epoch": 0.10129900079025594, "grad_norm": 27.625, "learning_rate": 1.994379095917252e-05, "loss": 6.7867, "step": 36020 }, { "epoch": 0.10132712377770466, "grad_norm": 53.75, "learning_rate": 1.9943759773359456e-05, "loss": 7.9833, "step": 36030 }, { "epoch": 0.10135524676515337, "grad_norm": 27.0, "learning_rate": 1.9943728578921954e-05, "loss": 8.0137, "step": 36040 }, { "epoch": 0.10138336975260208, "grad_norm": 55.5, "learning_rate": 1.9943697375860044e-05, "loss": 8.0634, "step": 36050 }, { "epoch": 0.10141149274005079, "grad_norm": 32.25, "learning_rate": 1.994366616417375e-05, "loss": 8.5468, "step": 36060 }, { "epoch": 0.1014396157274995, "grad_norm": 33.0, "learning_rate": 1.9943634943863097e-05, "loss": 8.3418, "step": 36070 }, { "epoch": 0.10146773871494821, "grad_norm": 33.75, "learning_rate": 1.9943603714928118e-05, "loss": 8.2579, "step": 36080 }, { "epoch": 0.10149586170239693, "grad_norm": 25.375, "learning_rate": 1.9943572477368836e-05, "loss": 8.0526, "step": 36090 }, { "epoch": 0.10152398468984564, "grad_norm": 25.375, "learning_rate": 1.994354123118528e-05, "loss": 8.4065, "step": 36100 }, { "epoch": 0.10155210767729435, "grad_norm": 38.75, "learning_rate": 1.9943509976377478e-05, "loss": 8.1438, "step": 36110 }, { "epoch": 0.10158023066474306, "grad_norm": 30.125, "learning_rate": 1.9943478712945453e-05, "loss": 7.3432, "step": 36120 }, { "epoch": 0.10160835365219177, "grad_norm": 49.25, "learning_rate": 1.9943447440889233e-05, "loss": 8.7826, "step": 36130 }, { "epoch": 0.10163647663964047, "grad_norm": 25.125, "learning_rate": 1.9943416160208852e-05, "loss": 7.9611, "step": 36140 }, { "epoch": 0.10166459962708918, "grad_norm": 47.75, "learning_rate": 1.9943384870904327e-05, "loss": 7.9598, "step": 36150 }, { "epoch": 0.10169272261453789, "grad_norm": 35.25, "learning_rate": 1.9943353572975692e-05, "loss": 8.4308, "step": 36160 }, { "epoch": 0.1017208456019866, "grad_norm": 29.375, "learning_rate": 1.994332226642297e-05, "loss": 8.091, "step": 36170 }, { "epoch": 0.10174896858943532, "grad_norm": 33.5, "learning_rate": 1.9943290951246194e-05, "loss": 8.5679, "step": 36180 }, { "epoch": 0.10177709157688403, "grad_norm": 35.0, "learning_rate": 1.9943259627445385e-05, "loss": 7.8919, "step": 36190 }, { "epoch": 0.10180521456433274, "grad_norm": 26.875, "learning_rate": 1.9943228295020575e-05, "loss": 7.7832, "step": 36200 }, { "epoch": 0.10183333755178145, "grad_norm": 43.75, "learning_rate": 1.9943196953971783e-05, "loss": 7.6631, "step": 36210 }, { "epoch": 0.10186146053923016, "grad_norm": 31.625, "learning_rate": 1.9943165604299048e-05, "loss": 9.0123, "step": 36220 }, { "epoch": 0.10188958352667887, "grad_norm": 36.25, "learning_rate": 1.9943134246002388e-05, "loss": 7.4637, "step": 36230 }, { "epoch": 0.10191770651412758, "grad_norm": 29.625, "learning_rate": 1.9943102879081832e-05, "loss": 7.6421, "step": 36240 }, { "epoch": 0.1019458295015763, "grad_norm": 31.0, "learning_rate": 1.9943071503537414e-05, "loss": 7.6428, "step": 36250 }, { "epoch": 0.10197395248902501, "grad_norm": 29.375, "learning_rate": 1.994304011936915e-05, "loss": 7.9238, "step": 36260 }, { "epoch": 0.10200207547647372, "grad_norm": 31.125, "learning_rate": 1.9943008726577077e-05, "loss": 7.7321, "step": 36270 }, { "epoch": 0.10203019846392243, "grad_norm": 27.875, "learning_rate": 1.9942977325161213e-05, "loss": 8.0739, "step": 36280 }, { "epoch": 0.10205832145137114, "grad_norm": 34.0, "learning_rate": 1.9942945915121594e-05, "loss": 7.6602, "step": 36290 }, { "epoch": 0.10208644443881985, "grad_norm": 38.0, "learning_rate": 1.994291449645824e-05, "loss": 7.6828, "step": 36300 }, { "epoch": 0.10211456742626855, "grad_norm": 42.0, "learning_rate": 1.994288306917119e-05, "loss": 7.5799, "step": 36310 }, { "epoch": 0.10214269041371726, "grad_norm": 36.0, "learning_rate": 1.9942851633260457e-05, "loss": 7.3619, "step": 36320 }, { "epoch": 0.10217081340116597, "grad_norm": 28.125, "learning_rate": 1.9942820188726076e-05, "loss": 7.4803, "step": 36330 }, { "epoch": 0.10219893638861469, "grad_norm": 38.0, "learning_rate": 1.9942788735568072e-05, "loss": 8.5912, "step": 36340 }, { "epoch": 0.1022270593760634, "grad_norm": 26.125, "learning_rate": 1.9942757273786474e-05, "loss": 7.8641, "step": 36350 }, { "epoch": 0.10225518236351211, "grad_norm": 33.5, "learning_rate": 1.9942725803381307e-05, "loss": 7.8217, "step": 36360 }, { "epoch": 0.10228330535096082, "grad_norm": 30.375, "learning_rate": 1.9942694324352597e-05, "loss": 7.5193, "step": 36370 }, { "epoch": 0.10231142833840953, "grad_norm": 27.5, "learning_rate": 1.994266283670038e-05, "loss": 7.9239, "step": 36380 }, { "epoch": 0.10233955132585824, "grad_norm": 30.125, "learning_rate": 1.9942631340424675e-05, "loss": 7.5537, "step": 36390 }, { "epoch": 0.10236767431330696, "grad_norm": 36.5, "learning_rate": 1.9942599835525512e-05, "loss": 7.45, "step": 36400 }, { "epoch": 0.10239579730075567, "grad_norm": 35.5, "learning_rate": 1.9942568322002916e-05, "loss": 7.8622, "step": 36410 }, { "epoch": 0.10242392028820438, "grad_norm": 25.0, "learning_rate": 1.9942536799856915e-05, "loss": 8.0497, "step": 36420 }, { "epoch": 0.10245204327565309, "grad_norm": 29.375, "learning_rate": 1.994250526908754e-05, "loss": 8.5126, "step": 36430 }, { "epoch": 0.1024801662631018, "grad_norm": 38.0, "learning_rate": 1.9942473729694812e-05, "loss": 8.2699, "step": 36440 }, { "epoch": 0.10250828925055051, "grad_norm": 25.5, "learning_rate": 1.9942442181678768e-05, "loss": 7.8617, "step": 36450 }, { "epoch": 0.10253641223799922, "grad_norm": 29.25, "learning_rate": 1.9942410625039427e-05, "loss": 8.0625, "step": 36460 }, { "epoch": 0.10256453522544792, "grad_norm": 40.75, "learning_rate": 1.9942379059776816e-05, "loss": 7.8067, "step": 36470 }, { "epoch": 0.10259265821289663, "grad_norm": 34.75, "learning_rate": 1.9942347485890966e-05, "loss": 7.7717, "step": 36480 }, { "epoch": 0.10262078120034535, "grad_norm": 29.125, "learning_rate": 1.9942315903381907e-05, "loss": 7.9271, "step": 36490 }, { "epoch": 0.10264890418779406, "grad_norm": 36.25, "learning_rate": 1.9942284312249663e-05, "loss": 8.7874, "step": 36500 }, { "epoch": 0.10267702717524277, "grad_norm": 28.75, "learning_rate": 1.9942252712494258e-05, "loss": 7.8891, "step": 36510 }, { "epoch": 0.10270515016269148, "grad_norm": 27.0, "learning_rate": 1.9942221104115726e-05, "loss": 8.2818, "step": 36520 }, { "epoch": 0.10273327315014019, "grad_norm": 37.0, "learning_rate": 1.9942189487114093e-05, "loss": 7.7552, "step": 36530 }, { "epoch": 0.1027613961375889, "grad_norm": 19.875, "learning_rate": 1.994215786148938e-05, "loss": 9.1373, "step": 36540 }, { "epoch": 0.10278951912503762, "grad_norm": 24.375, "learning_rate": 1.994212622724162e-05, "loss": 7.2317, "step": 36550 }, { "epoch": 0.10281764211248633, "grad_norm": 37.0, "learning_rate": 1.9942094584370843e-05, "loss": 8.384, "step": 36560 }, { "epoch": 0.10284576509993504, "grad_norm": 33.75, "learning_rate": 1.994206293287707e-05, "loss": 7.95, "step": 36570 }, { "epoch": 0.10287388808738375, "grad_norm": 48.75, "learning_rate": 1.994203127276033e-05, "loss": 7.9459, "step": 36580 }, { "epoch": 0.10290201107483246, "grad_norm": 52.0, "learning_rate": 1.9941999604020655e-05, "loss": 7.3141, "step": 36590 }, { "epoch": 0.10293013406228117, "grad_norm": 29.25, "learning_rate": 1.994196792665807e-05, "loss": 8.5512, "step": 36600 }, { "epoch": 0.10295825704972988, "grad_norm": 25.25, "learning_rate": 1.99419362406726e-05, "loss": 8.3649, "step": 36610 }, { "epoch": 0.1029863800371786, "grad_norm": 26.625, "learning_rate": 1.9941904546064276e-05, "loss": 8.5234, "step": 36620 }, { "epoch": 0.1030145030246273, "grad_norm": 26.0, "learning_rate": 1.994187284283312e-05, "loss": 7.7746, "step": 36630 }, { "epoch": 0.103042626012076, "grad_norm": 46.75, "learning_rate": 1.9941841130979166e-05, "loss": 8.1299, "step": 36640 }, { "epoch": 0.10307074899952472, "grad_norm": 22.875, "learning_rate": 1.9941809410502436e-05, "loss": 8.0411, "step": 36650 }, { "epoch": 0.10309887198697343, "grad_norm": 27.625, "learning_rate": 1.9941777681402966e-05, "loss": 7.9705, "step": 36660 }, { "epoch": 0.10312699497442214, "grad_norm": 31.125, "learning_rate": 1.9941745943680774e-05, "loss": 7.3433, "step": 36670 }, { "epoch": 0.10315511796187085, "grad_norm": 27.75, "learning_rate": 1.9941714197335892e-05, "loss": 8.0139, "step": 36680 }, { "epoch": 0.10318324094931956, "grad_norm": 33.75, "learning_rate": 1.9941682442368347e-05, "loss": 7.449, "step": 36690 }, { "epoch": 0.10321136393676827, "grad_norm": 30.75, "learning_rate": 1.994165067877817e-05, "loss": 7.9531, "step": 36700 }, { "epoch": 0.10323948692421699, "grad_norm": 34.5, "learning_rate": 1.9941618906565382e-05, "loss": 7.9519, "step": 36710 }, { "epoch": 0.1032676099116657, "grad_norm": 30.375, "learning_rate": 1.994158712573001e-05, "loss": 7.8351, "step": 36720 }, { "epoch": 0.10329573289911441, "grad_norm": 33.5, "learning_rate": 1.994155533627209e-05, "loss": 8.7328, "step": 36730 }, { "epoch": 0.10332385588656312, "grad_norm": 31.25, "learning_rate": 1.994152353819164e-05, "loss": 7.5858, "step": 36740 }, { "epoch": 0.10335197887401183, "grad_norm": 29.125, "learning_rate": 1.99414917314887e-05, "loss": 7.6278, "step": 36750 }, { "epoch": 0.10338010186146054, "grad_norm": 21.0, "learning_rate": 1.9941459916163284e-05, "loss": 7.8423, "step": 36760 }, { "epoch": 0.10340822484890926, "grad_norm": 31.5, "learning_rate": 1.994142809221543e-05, "loss": 7.5101, "step": 36770 }, { "epoch": 0.10343634783635797, "grad_norm": 24.75, "learning_rate": 1.9941396259645157e-05, "loss": 7.8242, "step": 36780 }, { "epoch": 0.10346447082380666, "grad_norm": 22.5, "learning_rate": 1.9941364418452497e-05, "loss": 8.0069, "step": 36790 }, { "epoch": 0.10349259381125538, "grad_norm": 31.0, "learning_rate": 1.9941332568637476e-05, "loss": 7.8419, "step": 36800 }, { "epoch": 0.10352071679870409, "grad_norm": 32.75, "learning_rate": 1.9941300710200123e-05, "loss": 7.4385, "step": 36810 }, { "epoch": 0.1035488397861528, "grad_norm": 22.0, "learning_rate": 1.9941268843140473e-05, "loss": 7.753, "step": 36820 }, { "epoch": 0.10357696277360151, "grad_norm": 25.875, "learning_rate": 1.994123696745854e-05, "loss": 7.7354, "step": 36830 }, { "epoch": 0.10360508576105022, "grad_norm": 43.75, "learning_rate": 1.9941205083154358e-05, "loss": 7.4854, "step": 36840 }, { "epoch": 0.10363320874849893, "grad_norm": 26.375, "learning_rate": 1.9941173190227954e-05, "loss": 7.4266, "step": 36850 }, { "epoch": 0.10366133173594765, "grad_norm": 24.375, "learning_rate": 1.994114128867936e-05, "loss": 7.648, "step": 36860 }, { "epoch": 0.10368945472339636, "grad_norm": 24.875, "learning_rate": 1.9941109378508594e-05, "loss": 8.2916, "step": 36870 }, { "epoch": 0.10371757771084507, "grad_norm": 23.125, "learning_rate": 1.9941077459715692e-05, "loss": 7.9781, "step": 36880 }, { "epoch": 0.10374570069829378, "grad_norm": 27.625, "learning_rate": 1.994104553230068e-05, "loss": 8.0527, "step": 36890 }, { "epoch": 0.10377382368574249, "grad_norm": 43.25, "learning_rate": 1.9941013596263585e-05, "loss": 8.6009, "step": 36900 }, { "epoch": 0.1038019466731912, "grad_norm": 24.375, "learning_rate": 1.9940981651604434e-05, "loss": 7.3807, "step": 36910 }, { "epoch": 0.10383006966063991, "grad_norm": 26.0, "learning_rate": 1.9940949698323257e-05, "loss": 8.1778, "step": 36920 }, { "epoch": 0.10385819264808863, "grad_norm": 54.5, "learning_rate": 1.9940917736420074e-05, "loss": 8.0029, "step": 36930 }, { "epoch": 0.10388631563553734, "grad_norm": 32.25, "learning_rate": 1.9940885765894926e-05, "loss": 7.7953, "step": 36940 }, { "epoch": 0.10391443862298605, "grad_norm": 39.0, "learning_rate": 1.9940853786747832e-05, "loss": 7.3966, "step": 36950 }, { "epoch": 0.10394256161043475, "grad_norm": 63.0, "learning_rate": 1.9940821798978818e-05, "loss": 7.5035, "step": 36960 }, { "epoch": 0.10397068459788346, "grad_norm": 31.25, "learning_rate": 1.9940789802587916e-05, "loss": 7.3781, "step": 36970 }, { "epoch": 0.10399880758533217, "grad_norm": 34.75, "learning_rate": 1.9940757797575157e-05, "loss": 8.181, "step": 36980 }, { "epoch": 0.10402693057278088, "grad_norm": 30.875, "learning_rate": 1.994072578394056e-05, "loss": 7.4985, "step": 36990 }, { "epoch": 0.1040550535602296, "grad_norm": 38.5, "learning_rate": 1.9940693761684157e-05, "loss": 6.9427, "step": 37000 }, { "epoch": 0.1040831765476783, "grad_norm": 34.0, "learning_rate": 1.9940661730805977e-05, "loss": 7.6136, "step": 37010 }, { "epoch": 0.10411129953512702, "grad_norm": 29.375, "learning_rate": 1.9940629691306048e-05, "loss": 7.9447, "step": 37020 }, { "epoch": 0.10413942252257573, "grad_norm": 26.875, "learning_rate": 1.9940597643184394e-05, "loss": 8.3829, "step": 37030 }, { "epoch": 0.10416754551002444, "grad_norm": 32.75, "learning_rate": 1.994056558644105e-05, "loss": 8.5726, "step": 37040 }, { "epoch": 0.10419566849747315, "grad_norm": 32.25, "learning_rate": 1.9940533521076035e-05, "loss": 8.2909, "step": 37050 }, { "epoch": 0.10422379148492186, "grad_norm": 31.625, "learning_rate": 1.994050144708938e-05, "loss": 7.454, "step": 37060 }, { "epoch": 0.10425191447237057, "grad_norm": 30.75, "learning_rate": 1.9940469364481115e-05, "loss": 8.4485, "step": 37070 }, { "epoch": 0.10428003745981929, "grad_norm": 23.875, "learning_rate": 1.994043727325127e-05, "loss": 7.6704, "step": 37080 }, { "epoch": 0.104308160447268, "grad_norm": 30.25, "learning_rate": 1.9940405173399865e-05, "loss": 7.3739, "step": 37090 }, { "epoch": 0.10433628343471671, "grad_norm": 38.5, "learning_rate": 1.9940373064926938e-05, "loss": 8.6649, "step": 37100 }, { "epoch": 0.10436440642216542, "grad_norm": 41.25, "learning_rate": 1.9940340947832503e-05, "loss": 7.579, "step": 37110 }, { "epoch": 0.10439252940961412, "grad_norm": 35.5, "learning_rate": 1.9940308822116604e-05, "loss": 7.8713, "step": 37120 }, { "epoch": 0.10442065239706283, "grad_norm": 28.125, "learning_rate": 1.9940276687779258e-05, "loss": 7.9107, "step": 37130 }, { "epoch": 0.10444877538451154, "grad_norm": 33.5, "learning_rate": 1.9940244544820492e-05, "loss": 8.5009, "step": 37140 }, { "epoch": 0.10447689837196025, "grad_norm": 20.125, "learning_rate": 1.994021239324034e-05, "loss": 8.3403, "step": 37150 }, { "epoch": 0.10450502135940896, "grad_norm": 50.75, "learning_rate": 1.994018023303883e-05, "loss": 8.3618, "step": 37160 }, { "epoch": 0.10453314434685768, "grad_norm": 24.0, "learning_rate": 1.9940148064215984e-05, "loss": 7.5348, "step": 37170 }, { "epoch": 0.10456126733430639, "grad_norm": 56.25, "learning_rate": 1.9940115886771833e-05, "loss": 8.145, "step": 37180 }, { "epoch": 0.1045893903217551, "grad_norm": 29.875, "learning_rate": 1.994008370070641e-05, "loss": 8.2811, "step": 37190 }, { "epoch": 0.10461751330920381, "grad_norm": 33.25, "learning_rate": 1.9940051506019734e-05, "loss": 7.3916, "step": 37200 }, { "epoch": 0.10464563629665252, "grad_norm": 27.625, "learning_rate": 1.9940019302711836e-05, "loss": 7.7962, "step": 37210 }, { "epoch": 0.10467375928410123, "grad_norm": 23.375, "learning_rate": 1.9939987090782748e-05, "loss": 8.1145, "step": 37220 }, { "epoch": 0.10470188227154995, "grad_norm": 41.0, "learning_rate": 1.9939954870232495e-05, "loss": 8.0341, "step": 37230 }, { "epoch": 0.10473000525899866, "grad_norm": 40.25, "learning_rate": 1.99399226410611e-05, "loss": 8.7699, "step": 37240 }, { "epoch": 0.10475812824644737, "grad_norm": 28.0, "learning_rate": 1.99398904032686e-05, "loss": 7.3443, "step": 37250 }, { "epoch": 0.10478625123389608, "grad_norm": 24.125, "learning_rate": 1.993985815685502e-05, "loss": 7.2997, "step": 37260 }, { "epoch": 0.10481437422134479, "grad_norm": 24.625, "learning_rate": 1.9939825901820384e-05, "loss": 8.0188, "step": 37270 }, { "epoch": 0.10484249720879349, "grad_norm": 30.125, "learning_rate": 1.9939793638164722e-05, "loss": 7.9894, "step": 37280 }, { "epoch": 0.1048706201962422, "grad_norm": 38.75, "learning_rate": 1.9939761365888067e-05, "loss": 7.5391, "step": 37290 }, { "epoch": 0.10489874318369091, "grad_norm": 42.25, "learning_rate": 1.9939729084990436e-05, "loss": 8.465, "step": 37300 }, { "epoch": 0.10492686617113962, "grad_norm": 29.125, "learning_rate": 1.993969679547187e-05, "loss": 7.17, "step": 37310 }, { "epoch": 0.10495498915858834, "grad_norm": 28.75, "learning_rate": 1.9939664497332386e-05, "loss": 8.027, "step": 37320 }, { "epoch": 0.10498311214603705, "grad_norm": 25.875, "learning_rate": 1.9939632190572024e-05, "loss": 7.7895, "step": 37330 }, { "epoch": 0.10501123513348576, "grad_norm": 31.875, "learning_rate": 1.9939599875190798e-05, "loss": 7.9384, "step": 37340 }, { "epoch": 0.10503935812093447, "grad_norm": 22.375, "learning_rate": 1.9939567551188745e-05, "loss": 7.5696, "step": 37350 }, { "epoch": 0.10506748110838318, "grad_norm": 25.5, "learning_rate": 1.9939535218565888e-05, "loss": 8.0689, "step": 37360 }, { "epoch": 0.1050956040958319, "grad_norm": 27.125, "learning_rate": 1.9939502877322263e-05, "loss": 7.6621, "step": 37370 }, { "epoch": 0.1051237270832806, "grad_norm": 32.25, "learning_rate": 1.9939470527457888e-05, "loss": 7.7695, "step": 37380 }, { "epoch": 0.10515185007072932, "grad_norm": 43.25, "learning_rate": 1.9939438168972795e-05, "loss": 7.6483, "step": 37390 }, { "epoch": 0.10517997305817803, "grad_norm": 24.0, "learning_rate": 1.9939405801867017e-05, "loss": 8.0962, "step": 37400 }, { "epoch": 0.10520809604562674, "grad_norm": 23.0, "learning_rate": 1.9939373426140575e-05, "loss": 8.2683, "step": 37410 }, { "epoch": 0.10523621903307545, "grad_norm": 32.5, "learning_rate": 1.9939341041793506e-05, "loss": 8.4331, "step": 37420 }, { "epoch": 0.10526434202052416, "grad_norm": 31.5, "learning_rate": 1.9939308648825827e-05, "loss": 7.7573, "step": 37430 }, { "epoch": 0.10529246500797286, "grad_norm": 20.25, "learning_rate": 1.9939276247237572e-05, "loss": 8.1228, "step": 37440 }, { "epoch": 0.10532058799542157, "grad_norm": 39.25, "learning_rate": 1.993924383702877e-05, "loss": 8.1064, "step": 37450 }, { "epoch": 0.10534871098287028, "grad_norm": 36.5, "learning_rate": 1.9939211418199445e-05, "loss": 8.1962, "step": 37460 }, { "epoch": 0.105376833970319, "grad_norm": 32.5, "learning_rate": 1.9939178990749627e-05, "loss": 8.1945, "step": 37470 }, { "epoch": 0.1054049569577677, "grad_norm": 25.0, "learning_rate": 1.993914655467935e-05, "loss": 7.8759, "step": 37480 }, { "epoch": 0.10543307994521642, "grad_norm": 33.25, "learning_rate": 1.9939114109988633e-05, "loss": 8.132, "step": 37490 }, { "epoch": 0.10546120293266513, "grad_norm": 22.5, "learning_rate": 1.9939081656677507e-05, "loss": 8.2575, "step": 37500 }, { "epoch": 0.10548932592011384, "grad_norm": 23.5, "learning_rate": 1.9939049194746004e-05, "loss": 8.2233, "step": 37510 }, { "epoch": 0.10551744890756255, "grad_norm": 24.0, "learning_rate": 1.993901672419415e-05, "loss": 7.0798, "step": 37520 }, { "epoch": 0.10554557189501126, "grad_norm": 22.875, "learning_rate": 1.993898424502197e-05, "loss": 7.7662, "step": 37530 }, { "epoch": 0.10557369488245998, "grad_norm": 28.375, "learning_rate": 1.9938951757229494e-05, "loss": 8.581, "step": 37540 }, { "epoch": 0.10560181786990869, "grad_norm": 38.25, "learning_rate": 1.9938919260816754e-05, "loss": 8.3698, "step": 37550 }, { "epoch": 0.1056299408573574, "grad_norm": 25.125, "learning_rate": 1.9938886755783773e-05, "loss": 7.6149, "step": 37560 }, { "epoch": 0.10565806384480611, "grad_norm": 26.625, "learning_rate": 1.9938854242130582e-05, "loss": 7.6656, "step": 37570 }, { "epoch": 0.10568618683225482, "grad_norm": 60.75, "learning_rate": 1.993882171985721e-05, "loss": 7.4342, "step": 37580 }, { "epoch": 0.10571430981970353, "grad_norm": 38.0, "learning_rate": 1.9938789188963682e-05, "loss": 7.4924, "step": 37590 }, { "epoch": 0.10574243280715223, "grad_norm": 30.0, "learning_rate": 1.9938756649450026e-05, "loss": 8.1272, "step": 37600 }, { "epoch": 0.10577055579460094, "grad_norm": 21.375, "learning_rate": 1.9938724101316272e-05, "loss": 7.9996, "step": 37610 }, { "epoch": 0.10579867878204965, "grad_norm": 22.25, "learning_rate": 1.993869154456245e-05, "loss": 7.8753, "step": 37620 }, { "epoch": 0.10582680176949837, "grad_norm": 24.875, "learning_rate": 1.993865897918859e-05, "loss": 8.1137, "step": 37630 }, { "epoch": 0.10585492475694708, "grad_norm": 36.75, "learning_rate": 1.993862640519471e-05, "loss": 7.5714, "step": 37640 }, { "epoch": 0.10588304774439579, "grad_norm": 40.75, "learning_rate": 1.993859382258085e-05, "loss": 8.6856, "step": 37650 }, { "epoch": 0.1059111707318445, "grad_norm": 43.5, "learning_rate": 1.993856123134703e-05, "loss": 8.0123, "step": 37660 }, { "epoch": 0.10593929371929321, "grad_norm": 25.875, "learning_rate": 1.9938528631493287e-05, "loss": 7.2639, "step": 37670 }, { "epoch": 0.10596741670674192, "grad_norm": 32.75, "learning_rate": 1.993849602301964e-05, "loss": 8.0832, "step": 37680 }, { "epoch": 0.10599553969419064, "grad_norm": 30.75, "learning_rate": 1.993846340592612e-05, "loss": 7.6851, "step": 37690 }, { "epoch": 0.10602366268163935, "grad_norm": 29.875, "learning_rate": 1.993843078021276e-05, "loss": 7.7075, "step": 37700 }, { "epoch": 0.10605178566908806, "grad_norm": 50.0, "learning_rate": 1.9938398145879582e-05, "loss": 7.9974, "step": 37710 }, { "epoch": 0.10607990865653677, "grad_norm": 28.625, "learning_rate": 1.9938365502926615e-05, "loss": 7.8758, "step": 37720 }, { "epoch": 0.10610803164398548, "grad_norm": 27.625, "learning_rate": 1.9938332851353892e-05, "loss": 8.1574, "step": 37730 }, { "epoch": 0.10613615463143419, "grad_norm": 30.0, "learning_rate": 1.993830019116144e-05, "loss": 8.2086, "step": 37740 }, { "epoch": 0.1061642776188829, "grad_norm": 32.75, "learning_rate": 1.9938267522349282e-05, "loss": 8.6949, "step": 37750 }, { "epoch": 0.10619240060633162, "grad_norm": 27.75, "learning_rate": 1.9938234844917453e-05, "loss": 7.2705, "step": 37760 }, { "epoch": 0.10622052359378031, "grad_norm": 24.75, "learning_rate": 1.993820215886598e-05, "loss": 8.1567, "step": 37770 }, { "epoch": 0.10624864658122903, "grad_norm": 44.75, "learning_rate": 1.993816946419489e-05, "loss": 8.4836, "step": 37780 }, { "epoch": 0.10627676956867774, "grad_norm": 31.25, "learning_rate": 1.9938136760904208e-05, "loss": 7.6923, "step": 37790 }, { "epoch": 0.10630489255612645, "grad_norm": 40.75, "learning_rate": 1.9938104048993967e-05, "loss": 8.018, "step": 37800 }, { "epoch": 0.10633301554357516, "grad_norm": 30.75, "learning_rate": 1.9938071328464193e-05, "loss": 7.7576, "step": 37810 }, { "epoch": 0.10636113853102387, "grad_norm": 60.0, "learning_rate": 1.9938038599314918e-05, "loss": 7.9061, "step": 37820 }, { "epoch": 0.10638926151847258, "grad_norm": 26.125, "learning_rate": 1.9938005861546167e-05, "loss": 8.0266, "step": 37830 }, { "epoch": 0.1064173845059213, "grad_norm": 31.125, "learning_rate": 1.9937973115157972e-05, "loss": 7.648, "step": 37840 }, { "epoch": 0.10644550749337, "grad_norm": 29.5, "learning_rate": 1.9937940360150353e-05, "loss": 8.1198, "step": 37850 }, { "epoch": 0.10647363048081872, "grad_norm": 41.75, "learning_rate": 1.9937907596523346e-05, "loss": 7.8347, "step": 37860 }, { "epoch": 0.10650175346826743, "grad_norm": 24.5, "learning_rate": 1.993787482427698e-05, "loss": 8.3854, "step": 37870 }, { "epoch": 0.10652987645571614, "grad_norm": 51.75, "learning_rate": 1.993784204341128e-05, "loss": 7.7123, "step": 37880 }, { "epoch": 0.10655799944316485, "grad_norm": 26.625, "learning_rate": 1.9937809253926273e-05, "loss": 7.8755, "step": 37890 }, { "epoch": 0.10658612243061356, "grad_norm": 27.875, "learning_rate": 1.9937776455821994e-05, "loss": 7.5196, "step": 37900 }, { "epoch": 0.10661424541806228, "grad_norm": 34.0, "learning_rate": 1.9937743649098462e-05, "loss": 7.767, "step": 37910 }, { "epoch": 0.10664236840551099, "grad_norm": 41.25, "learning_rate": 1.9937710833755717e-05, "loss": 7.3884, "step": 37920 }, { "epoch": 0.10667049139295968, "grad_norm": 29.625, "learning_rate": 1.9937678009793778e-05, "loss": 7.2371, "step": 37930 }, { "epoch": 0.1066986143804084, "grad_norm": 43.75, "learning_rate": 1.9937645177212676e-05, "loss": 7.7799, "step": 37940 }, { "epoch": 0.10672673736785711, "grad_norm": 31.375, "learning_rate": 1.993761233601244e-05, "loss": 7.306, "step": 37950 }, { "epoch": 0.10675486035530582, "grad_norm": 26.625, "learning_rate": 1.9937579486193096e-05, "loss": 7.4619, "step": 37960 }, { "epoch": 0.10678298334275453, "grad_norm": 24.5, "learning_rate": 1.993754662775468e-05, "loss": 7.2675, "step": 37970 }, { "epoch": 0.10681110633020324, "grad_norm": 36.75, "learning_rate": 1.9937513760697212e-05, "loss": 7.9504, "step": 37980 }, { "epoch": 0.10683922931765195, "grad_norm": 59.25, "learning_rate": 1.9937480885020727e-05, "loss": 7.9405, "step": 37990 }, { "epoch": 0.10686735230510067, "grad_norm": 26.5, "learning_rate": 1.9937448000725248e-05, "loss": 7.982, "step": 38000 }, { "epoch": 0.10689547529254938, "grad_norm": 26.875, "learning_rate": 1.993741510781081e-05, "loss": 8.093, "step": 38010 }, { "epoch": 0.10692359827999809, "grad_norm": 26.25, "learning_rate": 1.9937382206277433e-05, "loss": 7.3795, "step": 38020 }, { "epoch": 0.1069517212674468, "grad_norm": 22.75, "learning_rate": 1.993734929612515e-05, "loss": 7.485, "step": 38030 }, { "epoch": 0.10697984425489551, "grad_norm": 22.5, "learning_rate": 1.993731637735399e-05, "loss": 7.463, "step": 38040 }, { "epoch": 0.10700796724234422, "grad_norm": 46.25, "learning_rate": 1.9937283449963986e-05, "loss": 7.8458, "step": 38050 }, { "epoch": 0.10703609022979294, "grad_norm": 30.875, "learning_rate": 1.993725051395516e-05, "loss": 8.2385, "step": 38060 }, { "epoch": 0.10706421321724165, "grad_norm": 36.25, "learning_rate": 1.9937217569327537e-05, "loss": 8.235, "step": 38070 }, { "epoch": 0.10709233620469036, "grad_norm": 25.25, "learning_rate": 1.9937184616081156e-05, "loss": 7.8194, "step": 38080 }, { "epoch": 0.10712045919213906, "grad_norm": 39.75, "learning_rate": 1.993715165421604e-05, "loss": 7.691, "step": 38090 }, { "epoch": 0.10714858217958777, "grad_norm": 49.5, "learning_rate": 1.9937118683732217e-05, "loss": 7.9876, "step": 38100 }, { "epoch": 0.10717670516703648, "grad_norm": 39.0, "learning_rate": 1.993708570462972e-05, "loss": 8.2332, "step": 38110 }, { "epoch": 0.10720482815448519, "grad_norm": 44.0, "learning_rate": 1.9937052716908573e-05, "loss": 7.9076, "step": 38120 }, { "epoch": 0.1072329511419339, "grad_norm": 22.25, "learning_rate": 1.99370197205688e-05, "loss": 8.1853, "step": 38130 }, { "epoch": 0.10726107412938261, "grad_norm": 23.5, "learning_rate": 1.9936986715610443e-05, "loss": 8.5357, "step": 38140 }, { "epoch": 0.10728919711683133, "grad_norm": 40.75, "learning_rate": 1.993695370203352e-05, "loss": 7.591, "step": 38150 }, { "epoch": 0.10731732010428004, "grad_norm": 20.125, "learning_rate": 1.9936920679838066e-05, "loss": 7.7734, "step": 38160 }, { "epoch": 0.10734544309172875, "grad_norm": 28.0, "learning_rate": 1.99368876490241e-05, "loss": 7.7951, "step": 38170 }, { "epoch": 0.10737356607917746, "grad_norm": 21.125, "learning_rate": 1.993685460959166e-05, "loss": 8.2128, "step": 38180 }, { "epoch": 0.10740168906662617, "grad_norm": 46.0, "learning_rate": 1.9936821561540776e-05, "loss": 7.8836, "step": 38190 }, { "epoch": 0.10742981205407488, "grad_norm": 24.5, "learning_rate": 1.993678850487147e-05, "loss": 7.9884, "step": 38200 }, { "epoch": 0.1074579350415236, "grad_norm": 36.5, "learning_rate": 1.9936755439583773e-05, "loss": 7.1352, "step": 38210 }, { "epoch": 0.1074860580289723, "grad_norm": 62.75, "learning_rate": 1.9936722365677708e-05, "loss": 8.0675, "step": 38220 }, { "epoch": 0.10751418101642102, "grad_norm": 24.375, "learning_rate": 1.9936689283153317e-05, "loss": 7.6122, "step": 38230 }, { "epoch": 0.10754230400386973, "grad_norm": 27.25, "learning_rate": 1.993665619201062e-05, "loss": 7.8434, "step": 38240 }, { "epoch": 0.10757042699131843, "grad_norm": 28.375, "learning_rate": 1.9936623092249644e-05, "loss": 7.8096, "step": 38250 }, { "epoch": 0.10759854997876714, "grad_norm": 24.75, "learning_rate": 1.9936589983870423e-05, "loss": 8.1659, "step": 38260 }, { "epoch": 0.10762667296621585, "grad_norm": 33.5, "learning_rate": 1.9936556866872984e-05, "loss": 8.0282, "step": 38270 }, { "epoch": 0.10765479595366456, "grad_norm": 24.5, "learning_rate": 1.993652374125735e-05, "loss": 7.9018, "step": 38280 }, { "epoch": 0.10768291894111327, "grad_norm": 24.0, "learning_rate": 1.993649060702356e-05, "loss": 7.9096, "step": 38290 }, { "epoch": 0.10771104192856198, "grad_norm": 35.75, "learning_rate": 1.9936457464171636e-05, "loss": 7.3442, "step": 38300 }, { "epoch": 0.1077391649160107, "grad_norm": 31.0, "learning_rate": 1.9936424312701606e-05, "loss": 8.0277, "step": 38310 }, { "epoch": 0.10776728790345941, "grad_norm": 30.375, "learning_rate": 1.9936391152613504e-05, "loss": 7.9162, "step": 38320 }, { "epoch": 0.10779541089090812, "grad_norm": 40.0, "learning_rate": 1.9936357983907353e-05, "loss": 8.5291, "step": 38330 }, { "epoch": 0.10782353387835683, "grad_norm": 34.75, "learning_rate": 1.9936324806583187e-05, "loss": 8.0942, "step": 38340 }, { "epoch": 0.10785165686580554, "grad_norm": 22.125, "learning_rate": 1.993629162064103e-05, "loss": 7.735, "step": 38350 }, { "epoch": 0.10787977985325425, "grad_norm": 24.875, "learning_rate": 1.9936258426080916e-05, "loss": 7.7416, "step": 38360 }, { "epoch": 0.10790790284070297, "grad_norm": 33.75, "learning_rate": 1.993622522290287e-05, "loss": 7.2172, "step": 38370 }, { "epoch": 0.10793602582815168, "grad_norm": 23.5, "learning_rate": 1.993619201110692e-05, "loss": 7.8767, "step": 38380 }, { "epoch": 0.10796414881560039, "grad_norm": 43.0, "learning_rate": 1.9936158790693095e-05, "loss": 7.7351, "step": 38390 }, { "epoch": 0.1079922718030491, "grad_norm": 46.0, "learning_rate": 1.9936125561661427e-05, "loss": 8.4808, "step": 38400 }, { "epoch": 0.10802039479049781, "grad_norm": 28.75, "learning_rate": 1.9936092324011948e-05, "loss": 7.7932, "step": 38410 }, { "epoch": 0.10804851777794651, "grad_norm": 27.25, "learning_rate": 1.9936059077744673e-05, "loss": 7.4869, "step": 38420 }, { "epoch": 0.10807664076539522, "grad_norm": 29.5, "learning_rate": 1.9936025822859645e-05, "loss": 8.0268, "step": 38430 }, { "epoch": 0.10810476375284393, "grad_norm": 25.875, "learning_rate": 1.993599255935689e-05, "loss": 8.8957, "step": 38440 }, { "epoch": 0.10813288674029264, "grad_norm": 26.375, "learning_rate": 1.993595928723643e-05, "loss": 7.9603, "step": 38450 }, { "epoch": 0.10816100972774136, "grad_norm": 32.0, "learning_rate": 1.99359260064983e-05, "loss": 7.707, "step": 38460 }, { "epoch": 0.10818913271519007, "grad_norm": 34.0, "learning_rate": 1.9935892717142527e-05, "loss": 8.0403, "step": 38470 }, { "epoch": 0.10821725570263878, "grad_norm": 31.375, "learning_rate": 1.993585941916914e-05, "loss": 8.6465, "step": 38480 }, { "epoch": 0.10824537869008749, "grad_norm": 23.625, "learning_rate": 1.9935826112578164e-05, "loss": 7.9494, "step": 38490 }, { "epoch": 0.1082735016775362, "grad_norm": 29.5, "learning_rate": 1.9935792797369638e-05, "loss": 6.708, "step": 38500 }, { "epoch": 0.10830162466498491, "grad_norm": 28.25, "learning_rate": 1.9935759473543582e-05, "loss": 7.4881, "step": 38510 }, { "epoch": 0.10832974765243363, "grad_norm": 27.125, "learning_rate": 1.9935726141100026e-05, "loss": 7.9426, "step": 38520 }, { "epoch": 0.10835787063988234, "grad_norm": 29.375, "learning_rate": 1.9935692800039005e-05, "loss": 8.0752, "step": 38530 }, { "epoch": 0.10838599362733105, "grad_norm": 36.75, "learning_rate": 1.9935659450360538e-05, "loss": 7.7482, "step": 38540 }, { "epoch": 0.10841411661477976, "grad_norm": 48.25, "learning_rate": 1.9935626092064665e-05, "loss": 7.7859, "step": 38550 }, { "epoch": 0.10844223960222847, "grad_norm": 31.25, "learning_rate": 1.9935592725151405e-05, "loss": 7.3673, "step": 38560 }, { "epoch": 0.10847036258967718, "grad_norm": 33.25, "learning_rate": 1.993555934962079e-05, "loss": 8.2916, "step": 38570 }, { "epoch": 0.10849848557712588, "grad_norm": 29.625, "learning_rate": 1.9935525965472854e-05, "loss": 8.0459, "step": 38580 }, { "epoch": 0.10852660856457459, "grad_norm": 25.5, "learning_rate": 1.9935492572707623e-05, "loss": 8.422, "step": 38590 }, { "epoch": 0.1085547315520233, "grad_norm": 26.625, "learning_rate": 1.9935459171325123e-05, "loss": 7.9267, "step": 38600 }, { "epoch": 0.10858285453947202, "grad_norm": 24.375, "learning_rate": 1.9935425761325384e-05, "loss": 8.7817, "step": 38610 }, { "epoch": 0.10861097752692073, "grad_norm": 23.125, "learning_rate": 1.9935392342708436e-05, "loss": 7.4169, "step": 38620 }, { "epoch": 0.10863910051436944, "grad_norm": 23.125, "learning_rate": 1.993535891547431e-05, "loss": 7.3181, "step": 38630 }, { "epoch": 0.10866722350181815, "grad_norm": 32.25, "learning_rate": 1.9935325479623032e-05, "loss": 7.9157, "step": 38640 }, { "epoch": 0.10869534648926686, "grad_norm": 37.75, "learning_rate": 1.9935292035154632e-05, "loss": 8.0658, "step": 38650 }, { "epoch": 0.10872346947671557, "grad_norm": 43.5, "learning_rate": 1.993525858206914e-05, "loss": 7.5357, "step": 38660 }, { "epoch": 0.10875159246416428, "grad_norm": 28.875, "learning_rate": 1.993522512036658e-05, "loss": 7.9626, "step": 38670 }, { "epoch": 0.108779715451613, "grad_norm": 28.125, "learning_rate": 1.993519165004699e-05, "loss": 8.3615, "step": 38680 }, { "epoch": 0.10880783843906171, "grad_norm": 36.75, "learning_rate": 1.9935158171110393e-05, "loss": 7.46, "step": 38690 }, { "epoch": 0.10883596142651042, "grad_norm": 41.25, "learning_rate": 1.993512468355682e-05, "loss": 7.8048, "step": 38700 }, { "epoch": 0.10886408441395913, "grad_norm": 50.0, "learning_rate": 1.9935091187386294e-05, "loss": 7.4619, "step": 38710 }, { "epoch": 0.10889220740140784, "grad_norm": 23.125, "learning_rate": 1.9935057682598855e-05, "loss": 7.8125, "step": 38720 }, { "epoch": 0.10892033038885655, "grad_norm": 33.5, "learning_rate": 1.9935024169194523e-05, "loss": 7.2023, "step": 38730 }, { "epoch": 0.10894845337630525, "grad_norm": 25.625, "learning_rate": 1.9934990647173332e-05, "loss": 7.8011, "step": 38740 }, { "epoch": 0.10897657636375396, "grad_norm": 31.5, "learning_rate": 1.993495711653531e-05, "loss": 7.6673, "step": 38750 }, { "epoch": 0.10900469935120267, "grad_norm": 28.5, "learning_rate": 1.9934923577280483e-05, "loss": 8.1333, "step": 38760 }, { "epoch": 0.10903282233865139, "grad_norm": 29.5, "learning_rate": 1.9934890029408883e-05, "loss": 7.2259, "step": 38770 }, { "epoch": 0.1090609453261001, "grad_norm": 24.125, "learning_rate": 1.9934856472920542e-05, "loss": 8.1442, "step": 38780 }, { "epoch": 0.10908906831354881, "grad_norm": 34.75, "learning_rate": 1.993482290781548e-05, "loss": 7.69, "step": 38790 }, { "epoch": 0.10911719130099752, "grad_norm": 37.25, "learning_rate": 1.9934789334093736e-05, "loss": 8.0078, "step": 38800 }, { "epoch": 0.10914531428844623, "grad_norm": 29.875, "learning_rate": 1.993475575175534e-05, "loss": 7.8476, "step": 38810 }, { "epoch": 0.10917343727589494, "grad_norm": 28.375, "learning_rate": 1.9934722160800305e-05, "loss": 8.8081, "step": 38820 }, { "epoch": 0.10920156026334366, "grad_norm": 25.375, "learning_rate": 1.993468856122868e-05, "loss": 7.4624, "step": 38830 }, { "epoch": 0.10922968325079237, "grad_norm": 26.375, "learning_rate": 1.993465495304048e-05, "loss": 8.3472, "step": 38840 }, { "epoch": 0.10925780623824108, "grad_norm": 20.125, "learning_rate": 1.9934621336235744e-05, "loss": 7.6499, "step": 38850 }, { "epoch": 0.10928592922568979, "grad_norm": 39.75, "learning_rate": 1.9934587710814493e-05, "loss": 8.4416, "step": 38860 }, { "epoch": 0.1093140522131385, "grad_norm": 22.125, "learning_rate": 1.9934554076776762e-05, "loss": 7.7076, "step": 38870 }, { "epoch": 0.10934217520058721, "grad_norm": 54.75, "learning_rate": 1.9934520434122575e-05, "loss": 7.9891, "step": 38880 }, { "epoch": 0.10937029818803592, "grad_norm": 32.0, "learning_rate": 1.993448678285197e-05, "loss": 7.6102, "step": 38890 }, { "epoch": 0.10939842117548462, "grad_norm": 20.5, "learning_rate": 1.9934453122964964e-05, "loss": 7.247, "step": 38900 }, { "epoch": 0.10942654416293333, "grad_norm": 22.0, "learning_rate": 1.9934419454461595e-05, "loss": 9.1693, "step": 38910 }, { "epoch": 0.10945466715038205, "grad_norm": 51.75, "learning_rate": 1.993438577734189e-05, "loss": 7.5433, "step": 38920 }, { "epoch": 0.10948279013783076, "grad_norm": 44.5, "learning_rate": 1.993435209160588e-05, "loss": 8.2855, "step": 38930 }, { "epoch": 0.10951091312527947, "grad_norm": 52.5, "learning_rate": 1.9934318397253593e-05, "loss": 7.7567, "step": 38940 }, { "epoch": 0.10953903611272818, "grad_norm": 31.75, "learning_rate": 1.9934284694285052e-05, "loss": 8.002, "step": 38950 }, { "epoch": 0.10956715910017689, "grad_norm": 28.0, "learning_rate": 1.99342509827003e-05, "loss": 7.9833, "step": 38960 }, { "epoch": 0.1095952820876256, "grad_norm": 27.75, "learning_rate": 1.9934217262499352e-05, "loss": 8.8831, "step": 38970 }, { "epoch": 0.10962340507507431, "grad_norm": 38.25, "learning_rate": 1.9934183533682243e-05, "loss": 8.0133, "step": 38980 }, { "epoch": 0.10965152806252303, "grad_norm": 41.25, "learning_rate": 1.9934149796249002e-05, "loss": 8.0578, "step": 38990 }, { "epoch": 0.10967965104997174, "grad_norm": 36.75, "learning_rate": 1.993411605019966e-05, "loss": 7.9605, "step": 39000 }, { "epoch": 0.10970777403742045, "grad_norm": 22.25, "learning_rate": 1.9934082295534244e-05, "loss": 7.3487, "step": 39010 }, { "epoch": 0.10973589702486916, "grad_norm": 26.875, "learning_rate": 1.9934048532252787e-05, "loss": 8.9964, "step": 39020 }, { "epoch": 0.10976402001231787, "grad_norm": 23.375, "learning_rate": 1.9934014760355314e-05, "loss": 8.1482, "step": 39030 }, { "epoch": 0.10979214299976658, "grad_norm": 30.75, "learning_rate": 1.9933980979841857e-05, "loss": 7.5367, "step": 39040 }, { "epoch": 0.1098202659872153, "grad_norm": 29.25, "learning_rate": 1.9933947190712444e-05, "loss": 7.764, "step": 39050 }, { "epoch": 0.10984838897466401, "grad_norm": 21.5, "learning_rate": 1.9933913392967103e-05, "loss": 8.0977, "step": 39060 }, { "epoch": 0.1098765119621127, "grad_norm": 22.875, "learning_rate": 1.9933879586605866e-05, "loss": 8.1165, "step": 39070 }, { "epoch": 0.10990463494956142, "grad_norm": 23.125, "learning_rate": 1.9933845771628758e-05, "loss": 7.4995, "step": 39080 }, { "epoch": 0.10993275793701013, "grad_norm": 26.5, "learning_rate": 1.9933811948035814e-05, "loss": 8.1445, "step": 39090 }, { "epoch": 0.10996088092445884, "grad_norm": 23.375, "learning_rate": 1.993377811582706e-05, "loss": 8.1046, "step": 39100 }, { "epoch": 0.10998900391190755, "grad_norm": 28.25, "learning_rate": 1.9933744275002527e-05, "loss": 8.1115, "step": 39110 }, { "epoch": 0.11001712689935626, "grad_norm": 21.5, "learning_rate": 1.9933710425562244e-05, "loss": 8.5553, "step": 39120 }, { "epoch": 0.11004524988680497, "grad_norm": 28.75, "learning_rate": 1.993367656750624e-05, "loss": 7.7598, "step": 39130 }, { "epoch": 0.11007337287425369, "grad_norm": 29.875, "learning_rate": 1.9933642700834542e-05, "loss": 7.7913, "step": 39140 }, { "epoch": 0.1101014958617024, "grad_norm": 23.375, "learning_rate": 1.993360882554718e-05, "loss": 8.078, "step": 39150 }, { "epoch": 0.11012961884915111, "grad_norm": 45.5, "learning_rate": 1.993357494164419e-05, "loss": 7.9046, "step": 39160 }, { "epoch": 0.11015774183659982, "grad_norm": 37.25, "learning_rate": 1.9933541049125594e-05, "loss": 7.7218, "step": 39170 }, { "epoch": 0.11018586482404853, "grad_norm": 33.5, "learning_rate": 1.9933507147991422e-05, "loss": 8.1146, "step": 39180 }, { "epoch": 0.11021398781149724, "grad_norm": 34.75, "learning_rate": 1.9933473238241705e-05, "loss": 8.322, "step": 39190 }, { "epoch": 0.11024211079894596, "grad_norm": 26.375, "learning_rate": 1.9933439319876475e-05, "loss": 7.8498, "step": 39200 }, { "epoch": 0.11027023378639467, "grad_norm": 44.25, "learning_rate": 1.9933405392895756e-05, "loss": 7.5955, "step": 39210 }, { "epoch": 0.11029835677384338, "grad_norm": 27.0, "learning_rate": 1.9933371457299585e-05, "loss": 7.3886, "step": 39220 }, { "epoch": 0.11032647976129208, "grad_norm": 23.875, "learning_rate": 1.9933337513087984e-05, "loss": 7.68, "step": 39230 }, { "epoch": 0.11035460274874079, "grad_norm": 46.0, "learning_rate": 1.9933303560260984e-05, "loss": 8.0385, "step": 39240 }, { "epoch": 0.1103827257361895, "grad_norm": 31.375, "learning_rate": 1.9933269598818615e-05, "loss": 7.9032, "step": 39250 }, { "epoch": 0.11041084872363821, "grad_norm": 31.125, "learning_rate": 1.993323562876091e-05, "loss": 8.1931, "step": 39260 }, { "epoch": 0.11043897171108692, "grad_norm": 33.0, "learning_rate": 1.9933201650087894e-05, "loss": 7.5755, "step": 39270 }, { "epoch": 0.11046709469853563, "grad_norm": 22.625, "learning_rate": 1.99331676627996e-05, "loss": 7.5847, "step": 39280 }, { "epoch": 0.11049521768598435, "grad_norm": 27.25, "learning_rate": 1.993313366689605e-05, "loss": 8.3142, "step": 39290 }, { "epoch": 0.11052334067343306, "grad_norm": 31.375, "learning_rate": 1.9933099662377286e-05, "loss": 7.5893, "step": 39300 }, { "epoch": 0.11055146366088177, "grad_norm": 26.5, "learning_rate": 1.993306564924333e-05, "loss": 7.9664, "step": 39310 }, { "epoch": 0.11057958664833048, "grad_norm": 61.25, "learning_rate": 1.9933031627494207e-05, "loss": 7.7768, "step": 39320 }, { "epoch": 0.11060770963577919, "grad_norm": 28.625, "learning_rate": 1.9932997597129953e-05, "loss": 7.6146, "step": 39330 }, { "epoch": 0.1106358326232279, "grad_norm": 40.5, "learning_rate": 1.9932963558150598e-05, "loss": 8.3587, "step": 39340 }, { "epoch": 0.11066395561067661, "grad_norm": 33.75, "learning_rate": 1.993292951055617e-05, "loss": 8.1968, "step": 39350 }, { "epoch": 0.11069207859812533, "grad_norm": 35.0, "learning_rate": 1.9932895454346694e-05, "loss": 8.0016, "step": 39360 }, { "epoch": 0.11072020158557404, "grad_norm": 37.75, "learning_rate": 1.993286138952221e-05, "loss": 7.9206, "step": 39370 }, { "epoch": 0.11074832457302275, "grad_norm": 30.25, "learning_rate": 1.9932827316082735e-05, "loss": 7.5879, "step": 39380 }, { "epoch": 0.11077644756047145, "grad_norm": 33.5, "learning_rate": 1.9932793234028306e-05, "loss": 8.7761, "step": 39390 }, { "epoch": 0.11080457054792016, "grad_norm": 19.5, "learning_rate": 1.9932759143358955e-05, "loss": 8.1826, "step": 39400 }, { "epoch": 0.11083269353536887, "grad_norm": 22.875, "learning_rate": 1.9932725044074707e-05, "loss": 7.3338, "step": 39410 }, { "epoch": 0.11086081652281758, "grad_norm": 22.75, "learning_rate": 1.9932690936175588e-05, "loss": 7.8626, "step": 39420 }, { "epoch": 0.1108889395102663, "grad_norm": 31.125, "learning_rate": 1.9932656819661636e-05, "loss": 8.351, "step": 39430 }, { "epoch": 0.110917062497715, "grad_norm": 41.5, "learning_rate": 1.9932622694532874e-05, "loss": 7.4187, "step": 39440 }, { "epoch": 0.11094518548516372, "grad_norm": 26.0, "learning_rate": 1.9932588560789337e-05, "loss": 8.1838, "step": 39450 }, { "epoch": 0.11097330847261243, "grad_norm": 31.625, "learning_rate": 1.993255441843105e-05, "loss": 7.7531, "step": 39460 }, { "epoch": 0.11100143146006114, "grad_norm": 30.5, "learning_rate": 1.9932520267458043e-05, "loss": 7.8584, "step": 39470 }, { "epoch": 0.11102955444750985, "grad_norm": 55.75, "learning_rate": 1.993248610787035e-05, "loss": 8.2651, "step": 39480 }, { "epoch": 0.11105767743495856, "grad_norm": 25.375, "learning_rate": 1.9932451939668e-05, "loss": 8.1219, "step": 39490 }, { "epoch": 0.11108580042240727, "grad_norm": 37.0, "learning_rate": 1.9932417762851014e-05, "loss": 8.2001, "step": 39500 }, { "epoch": 0.11111392340985599, "grad_norm": 35.75, "learning_rate": 1.9932383577419432e-05, "loss": 7.2401, "step": 39510 }, { "epoch": 0.1111420463973047, "grad_norm": 32.5, "learning_rate": 1.9932349383373278e-05, "loss": 7.6313, "step": 39520 }, { "epoch": 0.11117016938475341, "grad_norm": 65.5, "learning_rate": 1.9932315180712582e-05, "loss": 8.0156, "step": 39530 }, { "epoch": 0.11119829237220212, "grad_norm": 40.25, "learning_rate": 1.9932280969437378e-05, "loss": 8.2392, "step": 39540 }, { "epoch": 0.11122641535965082, "grad_norm": 26.375, "learning_rate": 1.9932246749547693e-05, "loss": 8.1649, "step": 39550 }, { "epoch": 0.11125453834709953, "grad_norm": 36.75, "learning_rate": 1.9932212521043554e-05, "loss": 8.0057, "step": 39560 }, { "epoch": 0.11128266133454824, "grad_norm": 31.375, "learning_rate": 1.9932178283924992e-05, "loss": 8.1593, "step": 39570 }, { "epoch": 0.11131078432199695, "grad_norm": 27.125, "learning_rate": 1.993214403819204e-05, "loss": 8.1344, "step": 39580 }, { "epoch": 0.11133890730944566, "grad_norm": 20.875, "learning_rate": 1.9932109783844723e-05, "loss": 7.6902, "step": 39590 }, { "epoch": 0.11136703029689438, "grad_norm": 35.25, "learning_rate": 1.9932075520883077e-05, "loss": 6.4047, "step": 39600 }, { "epoch": 0.11139515328434309, "grad_norm": 37.25, "learning_rate": 1.993204124930712e-05, "loss": 7.9469, "step": 39610 }, { "epoch": 0.1114232762717918, "grad_norm": 62.5, "learning_rate": 1.9932006969116898e-05, "loss": 6.9958, "step": 39620 }, { "epoch": 0.11145139925924051, "grad_norm": 24.0, "learning_rate": 1.993197268031243e-05, "loss": 7.4426, "step": 39630 }, { "epoch": 0.11147952224668922, "grad_norm": 27.625, "learning_rate": 1.9931938382893744e-05, "loss": 7.5213, "step": 39640 }, { "epoch": 0.11150764523413793, "grad_norm": 22.625, "learning_rate": 1.9931904076860877e-05, "loss": 8.2028, "step": 39650 }, { "epoch": 0.11153576822158665, "grad_norm": 34.25, "learning_rate": 1.9931869762213856e-05, "loss": 7.0324, "step": 39660 }, { "epoch": 0.11156389120903536, "grad_norm": 25.375, "learning_rate": 1.9931835438952707e-05, "loss": 8.0254, "step": 39670 }, { "epoch": 0.11159201419648407, "grad_norm": 40.75, "learning_rate": 1.9931801107077467e-05, "loss": 8.0834, "step": 39680 }, { "epoch": 0.11162013718393278, "grad_norm": 27.125, "learning_rate": 1.993176676658816e-05, "loss": 8.0478, "step": 39690 }, { "epoch": 0.11164826017138149, "grad_norm": 23.0, "learning_rate": 1.9931732417484818e-05, "loss": 7.8568, "step": 39700 }, { "epoch": 0.1116763831588302, "grad_norm": 35.5, "learning_rate": 1.993169805976747e-05, "loss": 8.0158, "step": 39710 }, { "epoch": 0.1117045061462789, "grad_norm": 28.75, "learning_rate": 1.9931663693436146e-05, "loss": 7.6441, "step": 39720 }, { "epoch": 0.11173262913372761, "grad_norm": 32.75, "learning_rate": 1.9931629318490876e-05, "loss": 8.2805, "step": 39730 }, { "epoch": 0.11176075212117632, "grad_norm": 27.375, "learning_rate": 1.9931594934931692e-05, "loss": 7.3246, "step": 39740 }, { "epoch": 0.11178887510862504, "grad_norm": 43.0, "learning_rate": 1.9931560542758616e-05, "loss": 7.8646, "step": 39750 }, { "epoch": 0.11181699809607375, "grad_norm": 26.125, "learning_rate": 1.9931526141971685e-05, "loss": 8.6715, "step": 39760 }, { "epoch": 0.11184512108352246, "grad_norm": 42.0, "learning_rate": 1.993149173257093e-05, "loss": 7.5787, "step": 39770 }, { "epoch": 0.11187324407097117, "grad_norm": 25.875, "learning_rate": 1.993145731455638e-05, "loss": 7.7623, "step": 39780 }, { "epoch": 0.11190136705841988, "grad_norm": 31.0, "learning_rate": 1.993142288792806e-05, "loss": 7.8898, "step": 39790 }, { "epoch": 0.1119294900458686, "grad_norm": 29.75, "learning_rate": 1.9931388452686002e-05, "loss": 7.6983, "step": 39800 }, { "epoch": 0.1119576130333173, "grad_norm": 33.25, "learning_rate": 1.9931354008830238e-05, "loss": 8.3527, "step": 39810 }, { "epoch": 0.11198573602076602, "grad_norm": 22.125, "learning_rate": 1.9931319556360797e-05, "loss": 8.0936, "step": 39820 }, { "epoch": 0.11201385900821473, "grad_norm": 22.625, "learning_rate": 1.9931285095277707e-05, "loss": 7.8523, "step": 39830 }, { "epoch": 0.11204198199566344, "grad_norm": 24.25, "learning_rate": 1.9931250625581e-05, "loss": 8.0918, "step": 39840 }, { "epoch": 0.11207010498311215, "grad_norm": 34.5, "learning_rate": 1.9931216147270705e-05, "loss": 8.219, "step": 39850 }, { "epoch": 0.11209822797056086, "grad_norm": 29.25, "learning_rate": 1.9931181660346853e-05, "loss": 7.4523, "step": 39860 }, { "epoch": 0.11212635095800957, "grad_norm": 22.625, "learning_rate": 1.993114716480947e-05, "loss": 7.7385, "step": 39870 }, { "epoch": 0.11215447394545827, "grad_norm": 25.375, "learning_rate": 1.9931112660658593e-05, "loss": 8.9317, "step": 39880 }, { "epoch": 0.11218259693290698, "grad_norm": 47.25, "learning_rate": 1.9931078147894246e-05, "loss": 7.8708, "step": 39890 }, { "epoch": 0.1122107199203557, "grad_norm": 22.875, "learning_rate": 1.993104362651646e-05, "loss": 7.8496, "step": 39900 }, { "epoch": 0.1122388429078044, "grad_norm": 35.5, "learning_rate": 1.993100909652527e-05, "loss": 7.8997, "step": 39910 }, { "epoch": 0.11226696589525312, "grad_norm": 21.125, "learning_rate": 1.9930974557920695e-05, "loss": 8.1541, "step": 39920 }, { "epoch": 0.11229508888270183, "grad_norm": 34.5, "learning_rate": 1.9930940010702775e-05, "loss": 8.8548, "step": 39930 }, { "epoch": 0.11232321187015054, "grad_norm": 25.125, "learning_rate": 1.993090545487154e-05, "loss": 8.0927, "step": 39940 }, { "epoch": 0.11235133485759925, "grad_norm": 28.75, "learning_rate": 1.993087089042701e-05, "loss": 7.729, "step": 39950 }, { "epoch": 0.11237945784504796, "grad_norm": 32.5, "learning_rate": 1.9930836317369226e-05, "loss": 7.6121, "step": 39960 }, { "epoch": 0.11240758083249668, "grad_norm": 31.5, "learning_rate": 1.9930801735698213e-05, "loss": 7.4255, "step": 39970 }, { "epoch": 0.11243570381994539, "grad_norm": 54.25, "learning_rate": 1.9930767145414e-05, "loss": 8.117, "step": 39980 }, { "epoch": 0.1124638268073941, "grad_norm": 26.25, "learning_rate": 1.993073254651662e-05, "loss": 7.7727, "step": 39990 }, { "epoch": 0.11249194979484281, "grad_norm": 25.75, "learning_rate": 1.99306979390061e-05, "loss": 7.6773, "step": 40000 }, { "epoch": 0.11252007278229152, "grad_norm": 36.0, "learning_rate": 1.9930663322882476e-05, "loss": 7.8418, "step": 40010 }, { "epoch": 0.11254819576974023, "grad_norm": 25.0, "learning_rate": 1.993062869814577e-05, "loss": 8.5814, "step": 40020 }, { "epoch": 0.11257631875718895, "grad_norm": 24.0, "learning_rate": 1.9930594064796013e-05, "loss": 7.6275, "step": 40030 }, { "epoch": 0.11260444174463764, "grad_norm": 39.75, "learning_rate": 1.9930559422833243e-05, "loss": 7.8084, "step": 40040 }, { "epoch": 0.11263256473208635, "grad_norm": 29.375, "learning_rate": 1.993052477225748e-05, "loss": 8.169, "step": 40050 }, { "epoch": 0.11266068771953507, "grad_norm": 32.0, "learning_rate": 1.9930490113068762e-05, "loss": 7.5766, "step": 40060 }, { "epoch": 0.11268881070698378, "grad_norm": 33.0, "learning_rate": 1.9930455445267113e-05, "loss": 7.9427, "step": 40070 }, { "epoch": 0.11271693369443249, "grad_norm": 31.5, "learning_rate": 1.9930420768852568e-05, "loss": 7.2257, "step": 40080 }, { "epoch": 0.1127450566818812, "grad_norm": 23.125, "learning_rate": 1.993038608382516e-05, "loss": 6.6947, "step": 40090 }, { "epoch": 0.11277317966932991, "grad_norm": 26.125, "learning_rate": 1.9930351390184905e-05, "loss": 8.1295, "step": 40100 }, { "epoch": 0.11280130265677862, "grad_norm": 31.0, "learning_rate": 1.9930316687931847e-05, "loss": 7.7295, "step": 40110 }, { "epoch": 0.11282942564422734, "grad_norm": 44.75, "learning_rate": 1.993028197706601e-05, "loss": 8.4976, "step": 40120 }, { "epoch": 0.11285754863167605, "grad_norm": 28.375, "learning_rate": 1.993024725758743e-05, "loss": 8.7573, "step": 40130 }, { "epoch": 0.11288567161912476, "grad_norm": 28.25, "learning_rate": 1.9930212529496122e-05, "loss": 8.0468, "step": 40140 }, { "epoch": 0.11291379460657347, "grad_norm": 44.5, "learning_rate": 1.9930177792792133e-05, "loss": 8.1924, "step": 40150 }, { "epoch": 0.11294191759402218, "grad_norm": 29.5, "learning_rate": 1.9930143047475487e-05, "loss": 7.9185, "step": 40160 }, { "epoch": 0.11297004058147089, "grad_norm": 30.875, "learning_rate": 1.9930108293546216e-05, "loss": 8.0364, "step": 40170 }, { "epoch": 0.1129981635689196, "grad_norm": 27.0, "learning_rate": 1.9930073531004345e-05, "loss": 7.8107, "step": 40180 }, { "epoch": 0.11302628655636832, "grad_norm": 34.5, "learning_rate": 1.9930038759849907e-05, "loss": 7.8012, "step": 40190 }, { "epoch": 0.11305440954381701, "grad_norm": 24.0, "learning_rate": 1.993000398008293e-05, "loss": 7.6009, "step": 40200 }, { "epoch": 0.11308253253126573, "grad_norm": 26.375, "learning_rate": 1.992996919170345e-05, "loss": 7.8997, "step": 40210 }, { "epoch": 0.11311065551871444, "grad_norm": 24.125, "learning_rate": 1.9929934394711492e-05, "loss": 8.0802, "step": 40220 }, { "epoch": 0.11313877850616315, "grad_norm": 45.5, "learning_rate": 1.992989958910709e-05, "loss": 7.4456, "step": 40230 }, { "epoch": 0.11316690149361186, "grad_norm": 28.625, "learning_rate": 1.992986477489027e-05, "loss": 8.2285, "step": 40240 }, { "epoch": 0.11319502448106057, "grad_norm": 24.75, "learning_rate": 1.9929829952061065e-05, "loss": 8.2793, "step": 40250 }, { "epoch": 0.11322314746850928, "grad_norm": 80.5, "learning_rate": 1.9929795120619504e-05, "loss": 7.9103, "step": 40260 }, { "epoch": 0.113251270455958, "grad_norm": 23.5, "learning_rate": 1.9929760280565617e-05, "loss": 8.1574, "step": 40270 }, { "epoch": 0.1132793934434067, "grad_norm": 32.25, "learning_rate": 1.9929725431899435e-05, "loss": 8.1167, "step": 40280 }, { "epoch": 0.11330751643085542, "grad_norm": 29.375, "learning_rate": 1.992969057462099e-05, "loss": 7.6598, "step": 40290 }, { "epoch": 0.11333563941830413, "grad_norm": 23.625, "learning_rate": 1.992965570873031e-05, "loss": 8.3943, "step": 40300 }, { "epoch": 0.11336376240575284, "grad_norm": 20.375, "learning_rate": 1.9929620834227423e-05, "loss": 8.2627, "step": 40310 }, { "epoch": 0.11339188539320155, "grad_norm": 31.25, "learning_rate": 1.9929585951112363e-05, "loss": 8.5344, "step": 40320 }, { "epoch": 0.11342000838065026, "grad_norm": 29.625, "learning_rate": 1.992955105938516e-05, "loss": 7.991, "step": 40330 }, { "epoch": 0.11344813136809898, "grad_norm": 24.375, "learning_rate": 1.992951615904584e-05, "loss": 7.4237, "step": 40340 }, { "epoch": 0.11347625435554769, "grad_norm": 32.25, "learning_rate": 1.992948125009444e-05, "loss": 7.5065, "step": 40350 }, { "epoch": 0.1135043773429964, "grad_norm": 29.0, "learning_rate": 1.9929446332530985e-05, "loss": 8.0293, "step": 40360 }, { "epoch": 0.1135325003304451, "grad_norm": 22.75, "learning_rate": 1.992941140635551e-05, "loss": 7.3145, "step": 40370 }, { "epoch": 0.11356062331789381, "grad_norm": 26.625, "learning_rate": 1.992937647156804e-05, "loss": 7.7182, "step": 40380 }, { "epoch": 0.11358874630534252, "grad_norm": 33.25, "learning_rate": 1.992934152816861e-05, "loss": 8.6878, "step": 40390 }, { "epoch": 0.11361686929279123, "grad_norm": 37.5, "learning_rate": 1.9929306576157244e-05, "loss": 8.2749, "step": 40400 }, { "epoch": 0.11364499228023994, "grad_norm": 25.375, "learning_rate": 1.9929271615533977e-05, "loss": 8.1686, "step": 40410 }, { "epoch": 0.11367311526768865, "grad_norm": 26.0, "learning_rate": 1.9929236646298845e-05, "loss": 8.0307, "step": 40420 }, { "epoch": 0.11370123825513737, "grad_norm": 32.75, "learning_rate": 1.9929201668451864e-05, "loss": 7.8786, "step": 40430 }, { "epoch": 0.11372936124258608, "grad_norm": 52.25, "learning_rate": 1.9929166681993076e-05, "loss": 7.8957, "step": 40440 }, { "epoch": 0.11375748423003479, "grad_norm": 30.75, "learning_rate": 1.9929131686922507e-05, "loss": 8.1903, "step": 40450 }, { "epoch": 0.1137856072174835, "grad_norm": 40.25, "learning_rate": 1.992909668324019e-05, "loss": 8.153, "step": 40460 }, { "epoch": 0.11381373020493221, "grad_norm": 22.5, "learning_rate": 1.992906167094615e-05, "loss": 8.0373, "step": 40470 }, { "epoch": 0.11384185319238092, "grad_norm": 35.5, "learning_rate": 1.9929026650040422e-05, "loss": 6.9035, "step": 40480 }, { "epoch": 0.11386997617982964, "grad_norm": 25.75, "learning_rate": 1.9928991620523036e-05, "loss": 7.6534, "step": 40490 }, { "epoch": 0.11389809916727835, "grad_norm": 29.375, "learning_rate": 1.992895658239402e-05, "loss": 8.3205, "step": 40500 }, { "epoch": 0.11392622215472706, "grad_norm": 23.25, "learning_rate": 1.992892153565341e-05, "loss": 7.5767, "step": 40510 }, { "epoch": 0.11395434514217577, "grad_norm": 28.25, "learning_rate": 1.9928886480301227e-05, "loss": 8.2087, "step": 40520 }, { "epoch": 0.11398246812962447, "grad_norm": 19.75, "learning_rate": 1.9928851416337508e-05, "loss": 8.4736, "step": 40530 }, { "epoch": 0.11401059111707318, "grad_norm": 35.5, "learning_rate": 1.9928816343762282e-05, "loss": 8.0914, "step": 40540 }, { "epoch": 0.11403871410452189, "grad_norm": 27.25, "learning_rate": 1.992878126257558e-05, "loss": 8.4895, "step": 40550 }, { "epoch": 0.1140668370919706, "grad_norm": 55.25, "learning_rate": 1.9928746172777432e-05, "loss": 7.4542, "step": 40560 }, { "epoch": 0.11409496007941931, "grad_norm": 30.75, "learning_rate": 1.992871107436787e-05, "loss": 7.4491, "step": 40570 }, { "epoch": 0.11412308306686803, "grad_norm": 67.0, "learning_rate": 1.9928675967346923e-05, "loss": 8.521, "step": 40580 }, { "epoch": 0.11415120605431674, "grad_norm": 26.625, "learning_rate": 1.992864085171462e-05, "loss": 8.2914, "step": 40590 }, { "epoch": 0.11417932904176545, "grad_norm": 21.375, "learning_rate": 1.9928605727470992e-05, "loss": 7.3769, "step": 40600 }, { "epoch": 0.11420745202921416, "grad_norm": 28.0, "learning_rate": 1.992857059461607e-05, "loss": 7.8811, "step": 40610 }, { "epoch": 0.11423557501666287, "grad_norm": 30.375, "learning_rate": 1.9928535453149887e-05, "loss": 7.9895, "step": 40620 }, { "epoch": 0.11426369800411158, "grad_norm": 20.625, "learning_rate": 1.992850030307247e-05, "loss": 8.1156, "step": 40630 }, { "epoch": 0.1142918209915603, "grad_norm": 31.625, "learning_rate": 1.9928465144383852e-05, "loss": 7.5317, "step": 40640 }, { "epoch": 0.114319943979009, "grad_norm": 39.25, "learning_rate": 1.9928429977084058e-05, "loss": 8.3036, "step": 40650 }, { "epoch": 0.11434806696645772, "grad_norm": 32.75, "learning_rate": 1.9928394801173124e-05, "loss": 7.7765, "step": 40660 }, { "epoch": 0.11437618995390643, "grad_norm": 25.625, "learning_rate": 1.992835961665108e-05, "loss": 8.5802, "step": 40670 }, { "epoch": 0.11440431294135514, "grad_norm": 38.0, "learning_rate": 1.9928324423517957e-05, "loss": 7.9659, "step": 40680 }, { "epoch": 0.11443243592880384, "grad_norm": 25.5, "learning_rate": 1.9928289221773785e-05, "loss": 7.1605, "step": 40690 }, { "epoch": 0.11446055891625255, "grad_norm": 38.25, "learning_rate": 1.992825401141859e-05, "loss": 7.8393, "step": 40700 }, { "epoch": 0.11448868190370126, "grad_norm": 36.5, "learning_rate": 1.992821879245241e-05, "loss": 7.6257, "step": 40710 }, { "epoch": 0.11451680489114997, "grad_norm": 27.875, "learning_rate": 1.9928183564875268e-05, "loss": 8.8567, "step": 40720 }, { "epoch": 0.11454492787859868, "grad_norm": 37.5, "learning_rate": 1.99281483286872e-05, "loss": 8.3439, "step": 40730 }, { "epoch": 0.1145730508660474, "grad_norm": 44.75, "learning_rate": 1.9928113083888238e-05, "loss": 8.1339, "step": 40740 }, { "epoch": 0.11460117385349611, "grad_norm": 27.125, "learning_rate": 1.9928077830478403e-05, "loss": 7.9466, "step": 40750 }, { "epoch": 0.11462929684094482, "grad_norm": 28.375, "learning_rate": 1.9928042568457737e-05, "loss": 7.523, "step": 40760 }, { "epoch": 0.11465741982839353, "grad_norm": 46.5, "learning_rate": 1.9928007297826263e-05, "loss": 7.9849, "step": 40770 }, { "epoch": 0.11468554281584224, "grad_norm": 28.0, "learning_rate": 1.9927972018584018e-05, "loss": 8.7341, "step": 40780 }, { "epoch": 0.11471366580329095, "grad_norm": 29.375, "learning_rate": 1.9927936730731025e-05, "loss": 7.8543, "step": 40790 }, { "epoch": 0.11474178879073967, "grad_norm": 26.75, "learning_rate": 1.992790143426732e-05, "loss": 8.2812, "step": 40800 }, { "epoch": 0.11476991177818838, "grad_norm": 26.625, "learning_rate": 1.992786612919293e-05, "loss": 7.7779, "step": 40810 }, { "epoch": 0.11479803476563709, "grad_norm": 26.0, "learning_rate": 1.992783081550789e-05, "loss": 7.4658, "step": 40820 }, { "epoch": 0.1148261577530858, "grad_norm": 29.5, "learning_rate": 1.992779549321223e-05, "loss": 8.4586, "step": 40830 }, { "epoch": 0.11485428074053451, "grad_norm": 35.25, "learning_rate": 1.9927760162305976e-05, "loss": 7.5136, "step": 40840 }, { "epoch": 0.11488240372798321, "grad_norm": 21.0, "learning_rate": 1.992772482278916e-05, "loss": 7.7615, "step": 40850 }, { "epoch": 0.11491052671543192, "grad_norm": 32.5, "learning_rate": 1.9927689474661816e-05, "loss": 7.246, "step": 40860 }, { "epoch": 0.11493864970288063, "grad_norm": 26.125, "learning_rate": 1.9927654117923974e-05, "loss": 8.2247, "step": 40870 }, { "epoch": 0.11496677269032934, "grad_norm": 31.375, "learning_rate": 1.9927618752575663e-05, "loss": 8.0046, "step": 40880 }, { "epoch": 0.11499489567777806, "grad_norm": 42.0, "learning_rate": 1.9927583378616916e-05, "loss": 7.6489, "step": 40890 }, { "epoch": 0.11502301866522677, "grad_norm": 41.75, "learning_rate": 1.992754799604776e-05, "loss": 7.8937, "step": 40900 }, { "epoch": 0.11505114165267548, "grad_norm": 39.5, "learning_rate": 1.9927512604868228e-05, "loss": 8.6107, "step": 40910 }, { "epoch": 0.11507926464012419, "grad_norm": 25.0, "learning_rate": 1.992747720507835e-05, "loss": 8.4515, "step": 40920 }, { "epoch": 0.1151073876275729, "grad_norm": 46.75, "learning_rate": 1.9927441796678154e-05, "loss": 8.0222, "step": 40930 }, { "epoch": 0.11513551061502161, "grad_norm": 27.25, "learning_rate": 1.9927406379667677e-05, "loss": 8.3719, "step": 40940 }, { "epoch": 0.11516363360247032, "grad_norm": 39.0, "learning_rate": 1.9927370954046947e-05, "loss": 7.949, "step": 40950 }, { "epoch": 0.11519175658991904, "grad_norm": 35.0, "learning_rate": 1.9927335519815992e-05, "loss": 7.863, "step": 40960 }, { "epoch": 0.11521987957736775, "grad_norm": 24.75, "learning_rate": 1.9927300076974846e-05, "loss": 8.3, "step": 40970 }, { "epoch": 0.11524800256481646, "grad_norm": 33.0, "learning_rate": 1.9927264625523538e-05, "loss": 8.0492, "step": 40980 }, { "epoch": 0.11527612555226517, "grad_norm": 26.5, "learning_rate": 1.9927229165462103e-05, "loss": 7.7945, "step": 40990 }, { "epoch": 0.11530424853971388, "grad_norm": 20.375, "learning_rate": 1.992719369679056e-05, "loss": 7.9621, "step": 41000 }, { "epoch": 0.1153323715271626, "grad_norm": 28.125, "learning_rate": 1.9927158219508954e-05, "loss": 7.8373, "step": 41010 }, { "epoch": 0.11536049451461129, "grad_norm": 43.25, "learning_rate": 1.9927122733617314e-05, "loss": 8.271, "step": 41020 }, { "epoch": 0.11538861750206, "grad_norm": 33.75, "learning_rate": 1.9927087239115658e-05, "loss": 7.6339, "step": 41030 }, { "epoch": 0.11541674048950872, "grad_norm": 28.5, "learning_rate": 1.9927051736004028e-05, "loss": 7.6038, "step": 41040 }, { "epoch": 0.11544486347695743, "grad_norm": 30.0, "learning_rate": 1.9927016224282454e-05, "loss": 7.4886, "step": 41050 }, { "epoch": 0.11547298646440614, "grad_norm": 24.375, "learning_rate": 1.9926980703950962e-05, "loss": 7.3445, "step": 41060 }, { "epoch": 0.11550110945185485, "grad_norm": 27.625, "learning_rate": 1.9926945175009587e-05, "loss": 7.8267, "step": 41070 }, { "epoch": 0.11552923243930356, "grad_norm": 27.875, "learning_rate": 1.9926909637458357e-05, "loss": 7.1621, "step": 41080 }, { "epoch": 0.11555735542675227, "grad_norm": 32.5, "learning_rate": 1.9926874091297307e-05, "loss": 8.0609, "step": 41090 }, { "epoch": 0.11558547841420098, "grad_norm": 21.75, "learning_rate": 1.9926838536526462e-05, "loss": 7.9371, "step": 41100 }, { "epoch": 0.1156136014016497, "grad_norm": 27.125, "learning_rate": 1.992680297314586e-05, "loss": 7.468, "step": 41110 }, { "epoch": 0.11564172438909841, "grad_norm": 48.75, "learning_rate": 1.9926767401155526e-05, "loss": 7.7543, "step": 41120 }, { "epoch": 0.11566984737654712, "grad_norm": 26.875, "learning_rate": 1.992673182055549e-05, "loss": 7.8023, "step": 41130 }, { "epoch": 0.11569797036399583, "grad_norm": 32.25, "learning_rate": 1.9926696231345785e-05, "loss": 7.9129, "step": 41140 }, { "epoch": 0.11572609335144454, "grad_norm": 36.0, "learning_rate": 1.9926660633526445e-05, "loss": 7.9453, "step": 41150 }, { "epoch": 0.11575421633889325, "grad_norm": 21.125, "learning_rate": 1.99266250270975e-05, "loss": 8.0921, "step": 41160 }, { "epoch": 0.11578233932634197, "grad_norm": 38.25, "learning_rate": 1.9926589412058974e-05, "loss": 7.6832, "step": 41170 }, { "epoch": 0.11581046231379066, "grad_norm": 47.25, "learning_rate": 1.9926553788410906e-05, "loss": 8.3778, "step": 41180 }, { "epoch": 0.11583858530123937, "grad_norm": 23.625, "learning_rate": 1.9926518156153328e-05, "loss": 7.8141, "step": 41190 }, { "epoch": 0.11586670828868809, "grad_norm": 26.75, "learning_rate": 1.992648251528626e-05, "loss": 7.8312, "step": 41200 }, { "epoch": 0.1158948312761368, "grad_norm": 22.875, "learning_rate": 1.9926446865809742e-05, "loss": 8.0516, "step": 41210 }, { "epoch": 0.11592295426358551, "grad_norm": 23.5, "learning_rate": 1.9926411207723805e-05, "loss": 8.4521, "step": 41220 }, { "epoch": 0.11595107725103422, "grad_norm": 25.5, "learning_rate": 1.9926375541028476e-05, "loss": 7.3135, "step": 41230 }, { "epoch": 0.11597920023848293, "grad_norm": 43.0, "learning_rate": 1.9926339865723786e-05, "loss": 7.5481, "step": 41240 }, { "epoch": 0.11600732322593164, "grad_norm": 38.75, "learning_rate": 1.9926304181809768e-05, "loss": 7.3959, "step": 41250 }, { "epoch": 0.11603544621338036, "grad_norm": 24.75, "learning_rate": 1.9926268489286453e-05, "loss": 7.3439, "step": 41260 }, { "epoch": 0.11606356920082907, "grad_norm": 36.0, "learning_rate": 1.992623278815387e-05, "loss": 8.1905, "step": 41270 }, { "epoch": 0.11609169218827778, "grad_norm": 36.0, "learning_rate": 1.9926197078412053e-05, "loss": 8.0033, "step": 41280 }, { "epoch": 0.11611981517572649, "grad_norm": 25.875, "learning_rate": 1.992616136006103e-05, "loss": 8.1932, "step": 41290 }, { "epoch": 0.1161479381631752, "grad_norm": 22.125, "learning_rate": 1.9926125633100835e-05, "loss": 7.7511, "step": 41300 }, { "epoch": 0.11617606115062391, "grad_norm": 30.75, "learning_rate": 1.9926089897531495e-05, "loss": 7.9502, "step": 41310 }, { "epoch": 0.11620418413807262, "grad_norm": 49.75, "learning_rate": 1.9926054153353045e-05, "loss": 8.1812, "step": 41320 }, { "epoch": 0.11623230712552134, "grad_norm": 27.875, "learning_rate": 1.992601840056551e-05, "loss": 7.9953, "step": 41330 }, { "epoch": 0.11626043011297003, "grad_norm": 21.75, "learning_rate": 1.9925982639168934e-05, "loss": 7.399, "step": 41340 }, { "epoch": 0.11628855310041875, "grad_norm": 30.0, "learning_rate": 1.9925946869163334e-05, "loss": 8.3527, "step": 41350 }, { "epoch": 0.11631667608786746, "grad_norm": 57.25, "learning_rate": 1.9925911090548746e-05, "loss": 8.0927, "step": 41360 }, { "epoch": 0.11634479907531617, "grad_norm": 29.125, "learning_rate": 1.9925875303325202e-05, "loss": 7.8971, "step": 41370 }, { "epoch": 0.11637292206276488, "grad_norm": 24.875, "learning_rate": 1.992583950749273e-05, "loss": 8.6742, "step": 41380 }, { "epoch": 0.11640104505021359, "grad_norm": 26.375, "learning_rate": 1.9925803703051366e-05, "loss": 7.686, "step": 41390 }, { "epoch": 0.1164291680376623, "grad_norm": 26.375, "learning_rate": 1.9925767890001138e-05, "loss": 7.2153, "step": 41400 }, { "epoch": 0.11645729102511101, "grad_norm": 32.75, "learning_rate": 1.992573206834208e-05, "loss": 8.8144, "step": 41410 }, { "epoch": 0.11648541401255973, "grad_norm": 23.375, "learning_rate": 1.9925696238074216e-05, "loss": 7.6394, "step": 41420 }, { "epoch": 0.11651353700000844, "grad_norm": 30.875, "learning_rate": 1.9925660399197584e-05, "loss": 7.4493, "step": 41430 }, { "epoch": 0.11654165998745715, "grad_norm": 31.0, "learning_rate": 1.9925624551712213e-05, "loss": 8.1826, "step": 41440 }, { "epoch": 0.11656978297490586, "grad_norm": 33.75, "learning_rate": 1.9925588695618132e-05, "loss": 7.5433, "step": 41450 }, { "epoch": 0.11659790596235457, "grad_norm": 45.0, "learning_rate": 1.9925552830915377e-05, "loss": 8.6094, "step": 41460 }, { "epoch": 0.11662602894980328, "grad_norm": 33.5, "learning_rate": 1.9925516957603972e-05, "loss": 7.1974, "step": 41470 }, { "epoch": 0.116654151937252, "grad_norm": 30.375, "learning_rate": 1.9925481075683955e-05, "loss": 7.5409, "step": 41480 }, { "epoch": 0.11668227492470071, "grad_norm": 39.5, "learning_rate": 1.992544518515535e-05, "loss": 8.2786, "step": 41490 }, { "epoch": 0.1167103979121494, "grad_norm": 31.0, "learning_rate": 1.99254092860182e-05, "loss": 7.5049, "step": 41500 }, { "epoch": 0.11673852089959812, "grad_norm": 26.625, "learning_rate": 1.9925373378272524e-05, "loss": 7.8198, "step": 41510 }, { "epoch": 0.11676664388704683, "grad_norm": 52.5, "learning_rate": 1.992533746191836e-05, "loss": 7.7909, "step": 41520 }, { "epoch": 0.11679476687449554, "grad_norm": 28.125, "learning_rate": 1.9925301536955734e-05, "loss": 7.5859, "step": 41530 }, { "epoch": 0.11682288986194425, "grad_norm": 37.0, "learning_rate": 1.992526560338468e-05, "loss": 7.2414, "step": 41540 }, { "epoch": 0.11685101284939296, "grad_norm": 25.75, "learning_rate": 1.992522966120523e-05, "loss": 7.469, "step": 41550 }, { "epoch": 0.11687913583684167, "grad_norm": 32.5, "learning_rate": 1.9925193710417417e-05, "loss": 7.614, "step": 41560 }, { "epoch": 0.11690725882429039, "grad_norm": 34.0, "learning_rate": 1.9925157751021267e-05, "loss": 7.964, "step": 41570 }, { "epoch": 0.1169353818117391, "grad_norm": 20.875, "learning_rate": 1.9925121783016814e-05, "loss": 7.3999, "step": 41580 }, { "epoch": 0.11696350479918781, "grad_norm": 22.5, "learning_rate": 1.9925085806404086e-05, "loss": 7.9171, "step": 41590 }, { "epoch": 0.11699162778663652, "grad_norm": 48.5, "learning_rate": 1.992504982118312e-05, "loss": 8.326, "step": 41600 }, { "epoch": 0.11701975077408523, "grad_norm": 32.75, "learning_rate": 1.9925013827353945e-05, "loss": 8.3404, "step": 41610 }, { "epoch": 0.11704787376153394, "grad_norm": 30.375, "learning_rate": 1.9924977824916592e-05, "loss": 8.084, "step": 41620 }, { "epoch": 0.11707599674898266, "grad_norm": 33.0, "learning_rate": 1.9924941813871087e-05, "loss": 7.9195, "step": 41630 }, { "epoch": 0.11710411973643137, "grad_norm": 51.25, "learning_rate": 1.992490579421747e-05, "loss": 7.9102, "step": 41640 }, { "epoch": 0.11713224272388008, "grad_norm": 35.25, "learning_rate": 1.992486976595577e-05, "loss": 8.1494, "step": 41650 }, { "epoch": 0.11716036571132879, "grad_norm": 26.875, "learning_rate": 1.9924833729086015e-05, "loss": 7.658, "step": 41660 }, { "epoch": 0.11718848869877749, "grad_norm": 25.75, "learning_rate": 1.9924797683608234e-05, "loss": 8.2779, "step": 41670 }, { "epoch": 0.1172166116862262, "grad_norm": 37.0, "learning_rate": 1.9924761629522465e-05, "loss": 8.0279, "step": 41680 }, { "epoch": 0.11724473467367491, "grad_norm": 30.625, "learning_rate": 1.9924725566828737e-05, "loss": 8.3131, "step": 41690 }, { "epoch": 0.11727285766112362, "grad_norm": 27.125, "learning_rate": 1.992468949552708e-05, "loss": 7.818, "step": 41700 }, { "epoch": 0.11730098064857233, "grad_norm": 23.75, "learning_rate": 1.9924653415617526e-05, "loss": 7.8327, "step": 41710 }, { "epoch": 0.11732910363602105, "grad_norm": 29.5, "learning_rate": 1.9924617327100108e-05, "loss": 7.4355, "step": 41720 }, { "epoch": 0.11735722662346976, "grad_norm": 31.625, "learning_rate": 1.992458122997485e-05, "loss": 8.3332, "step": 41730 }, { "epoch": 0.11738534961091847, "grad_norm": 32.75, "learning_rate": 1.992454512424179e-05, "loss": 8.936, "step": 41740 }, { "epoch": 0.11741347259836718, "grad_norm": 37.25, "learning_rate": 1.992450900990096e-05, "loss": 8.6034, "step": 41750 }, { "epoch": 0.11744159558581589, "grad_norm": 33.5, "learning_rate": 1.9924472886952393e-05, "loss": 9.0471, "step": 41760 }, { "epoch": 0.1174697185732646, "grad_norm": 25.875, "learning_rate": 1.9924436755396112e-05, "loss": 8.03, "step": 41770 }, { "epoch": 0.11749784156071331, "grad_norm": 30.125, "learning_rate": 1.9924400615232154e-05, "loss": 7.6247, "step": 41780 }, { "epoch": 0.11752596454816203, "grad_norm": 29.375, "learning_rate": 1.992436446646055e-05, "loss": 8.211, "step": 41790 }, { "epoch": 0.11755408753561074, "grad_norm": 54.25, "learning_rate": 1.9924328309081328e-05, "loss": 7.8395, "step": 41800 }, { "epoch": 0.11758221052305945, "grad_norm": 24.5, "learning_rate": 1.9924292143094527e-05, "loss": 8.0087, "step": 41810 }, { "epoch": 0.11761033351050816, "grad_norm": 33.0, "learning_rate": 1.992425596850017e-05, "loss": 7.6672, "step": 41820 }, { "epoch": 0.11763845649795686, "grad_norm": 29.375, "learning_rate": 1.9924219785298293e-05, "loss": 7.7278, "step": 41830 }, { "epoch": 0.11766657948540557, "grad_norm": 32.75, "learning_rate": 1.9924183593488923e-05, "loss": 7.6387, "step": 41840 }, { "epoch": 0.11769470247285428, "grad_norm": 32.5, "learning_rate": 1.99241473930721e-05, "loss": 8.3007, "step": 41850 }, { "epoch": 0.117722825460303, "grad_norm": 29.0, "learning_rate": 1.9924111184047847e-05, "loss": 8.5404, "step": 41860 }, { "epoch": 0.1177509484477517, "grad_norm": 32.5, "learning_rate": 1.9924074966416197e-05, "loss": 8.1067, "step": 41870 }, { "epoch": 0.11777907143520042, "grad_norm": 28.0, "learning_rate": 1.9924038740177185e-05, "loss": 7.7048, "step": 41880 }, { "epoch": 0.11780719442264913, "grad_norm": 28.125, "learning_rate": 1.992400250533084e-05, "loss": 8.0787, "step": 41890 }, { "epoch": 0.11783531741009784, "grad_norm": 21.375, "learning_rate": 1.992396626187719e-05, "loss": 8.2071, "step": 41900 }, { "epoch": 0.11786344039754655, "grad_norm": 24.75, "learning_rate": 1.9923930009816278e-05, "loss": 8.5741, "step": 41910 }, { "epoch": 0.11789156338499526, "grad_norm": 25.375, "learning_rate": 1.992389374914812e-05, "loss": 7.8593, "step": 41920 }, { "epoch": 0.11791968637244397, "grad_norm": 26.875, "learning_rate": 1.992385747987276e-05, "loss": 7.4798, "step": 41930 }, { "epoch": 0.11794780935989269, "grad_norm": 27.0, "learning_rate": 1.992382120199022e-05, "loss": 6.99, "step": 41940 }, { "epoch": 0.1179759323473414, "grad_norm": 37.25, "learning_rate": 1.992378491550054e-05, "loss": 7.0314, "step": 41950 }, { "epoch": 0.11800405533479011, "grad_norm": 28.5, "learning_rate": 1.9923748620403748e-05, "loss": 8.5149, "step": 41960 }, { "epoch": 0.11803217832223882, "grad_norm": 34.5, "learning_rate": 1.9923712316699873e-05, "loss": 8.0895, "step": 41970 }, { "epoch": 0.11806030130968753, "grad_norm": 28.75, "learning_rate": 1.9923676004388947e-05, "loss": 7.6601, "step": 41980 }, { "epoch": 0.11808842429713623, "grad_norm": 52.25, "learning_rate": 1.9923639683471004e-05, "loss": 7.2399, "step": 41990 }, { "epoch": 0.11811654728458494, "grad_norm": 28.25, "learning_rate": 1.9923603353946075e-05, "loss": 7.6709, "step": 42000 }, { "epoch": 0.11814467027203365, "grad_norm": 37.0, "learning_rate": 1.992356701581419e-05, "loss": 7.6057, "step": 42010 }, { "epoch": 0.11817279325948236, "grad_norm": 24.875, "learning_rate": 1.992353066907538e-05, "loss": 7.3972, "step": 42020 }, { "epoch": 0.11820091624693108, "grad_norm": 42.25, "learning_rate": 1.992349431372968e-05, "loss": 7.9869, "step": 42030 }, { "epoch": 0.11822903923437979, "grad_norm": 27.5, "learning_rate": 1.9923457949777118e-05, "loss": 7.9521, "step": 42040 }, { "epoch": 0.1182571622218285, "grad_norm": 48.75, "learning_rate": 1.992342157721773e-05, "loss": 7.7016, "step": 42050 }, { "epoch": 0.11828528520927721, "grad_norm": 22.125, "learning_rate": 1.992338519605154e-05, "loss": 7.7236, "step": 42060 }, { "epoch": 0.11831340819672592, "grad_norm": 25.75, "learning_rate": 1.9923348806278588e-05, "loss": 7.9081, "step": 42070 }, { "epoch": 0.11834153118417463, "grad_norm": 37.5, "learning_rate": 1.99233124078989e-05, "loss": 7.9078, "step": 42080 }, { "epoch": 0.11836965417162335, "grad_norm": 26.25, "learning_rate": 1.992327600091251e-05, "loss": 7.6559, "step": 42090 }, { "epoch": 0.11839777715907206, "grad_norm": 32.75, "learning_rate": 1.9923239585319447e-05, "loss": 8.2386, "step": 42100 }, { "epoch": 0.11842590014652077, "grad_norm": 31.625, "learning_rate": 1.9923203161119745e-05, "loss": 8.2743, "step": 42110 }, { "epoch": 0.11845402313396948, "grad_norm": 27.125, "learning_rate": 1.9923166728313436e-05, "loss": 8.4952, "step": 42120 }, { "epoch": 0.11848214612141819, "grad_norm": 26.5, "learning_rate": 1.992313028690055e-05, "loss": 7.0648, "step": 42130 }, { "epoch": 0.1185102691088669, "grad_norm": 26.125, "learning_rate": 1.992309383688112e-05, "loss": 7.5642, "step": 42140 }, { "epoch": 0.1185383920963156, "grad_norm": 33.25, "learning_rate": 1.9923057378255175e-05, "loss": 7.7963, "step": 42150 }, { "epoch": 0.11856651508376431, "grad_norm": 32.75, "learning_rate": 1.992302091102275e-05, "loss": 7.4376, "step": 42160 }, { "epoch": 0.11859463807121302, "grad_norm": 24.625, "learning_rate": 1.9922984435183873e-05, "loss": 8.1368, "step": 42170 }, { "epoch": 0.11862276105866174, "grad_norm": 23.875, "learning_rate": 1.992294795073858e-05, "loss": 7.7827, "step": 42180 }, { "epoch": 0.11865088404611045, "grad_norm": 26.5, "learning_rate": 1.99229114576869e-05, "loss": 7.9585, "step": 42190 }, { "epoch": 0.11867900703355916, "grad_norm": 43.75, "learning_rate": 1.9922874956028863e-05, "loss": 7.7376, "step": 42200 }, { "epoch": 0.11870713002100787, "grad_norm": 25.375, "learning_rate": 1.9922838445764504e-05, "loss": 8.3933, "step": 42210 }, { "epoch": 0.11873525300845658, "grad_norm": 40.75, "learning_rate": 1.9922801926893854e-05, "loss": 7.8417, "step": 42220 }, { "epoch": 0.1187633759959053, "grad_norm": 28.125, "learning_rate": 1.9922765399416942e-05, "loss": 7.7912, "step": 42230 }, { "epoch": 0.118791498983354, "grad_norm": 31.0, "learning_rate": 1.9922728863333803e-05, "loss": 8.3337, "step": 42240 }, { "epoch": 0.11881962197080272, "grad_norm": 26.5, "learning_rate": 1.9922692318644467e-05, "loss": 8.4044, "step": 42250 }, { "epoch": 0.11884774495825143, "grad_norm": 37.0, "learning_rate": 1.9922655765348968e-05, "loss": 7.9306, "step": 42260 }, { "epoch": 0.11887586794570014, "grad_norm": 28.75, "learning_rate": 1.9922619203447332e-05, "loss": 7.5269, "step": 42270 }, { "epoch": 0.11890399093314885, "grad_norm": 32.5, "learning_rate": 1.99225826329396e-05, "loss": 7.6651, "step": 42280 }, { "epoch": 0.11893211392059756, "grad_norm": 22.875, "learning_rate": 1.992254605382579e-05, "loss": 7.6543, "step": 42290 }, { "epoch": 0.11896023690804627, "grad_norm": 38.75, "learning_rate": 1.9922509466105947e-05, "loss": 8.4023, "step": 42300 }, { "epoch": 0.11898835989549497, "grad_norm": 30.25, "learning_rate": 1.99224728697801e-05, "loss": 8.0359, "step": 42310 }, { "epoch": 0.11901648288294368, "grad_norm": 50.0, "learning_rate": 1.9922436264848272e-05, "loss": 8.1061, "step": 42320 }, { "epoch": 0.1190446058703924, "grad_norm": 28.375, "learning_rate": 1.9922399651310506e-05, "loss": 7.6448, "step": 42330 }, { "epoch": 0.1190727288578411, "grad_norm": 22.125, "learning_rate": 1.9922363029166827e-05, "loss": 7.9373, "step": 42340 }, { "epoch": 0.11910085184528982, "grad_norm": 47.25, "learning_rate": 1.992232639841727e-05, "loss": 8.4119, "step": 42350 }, { "epoch": 0.11912897483273853, "grad_norm": 27.625, "learning_rate": 1.992228975906186e-05, "loss": 6.976, "step": 42360 }, { "epoch": 0.11915709782018724, "grad_norm": 31.625, "learning_rate": 1.992225311110064e-05, "loss": 7.878, "step": 42370 }, { "epoch": 0.11918522080763595, "grad_norm": 62.75, "learning_rate": 1.9922216454533635e-05, "loss": 8.2264, "step": 42380 }, { "epoch": 0.11921334379508466, "grad_norm": 40.0, "learning_rate": 1.992217978936088e-05, "loss": 7.9325, "step": 42390 }, { "epoch": 0.11924146678253338, "grad_norm": 34.0, "learning_rate": 1.99221431155824e-05, "loss": 8.1091, "step": 42400 }, { "epoch": 0.11926958976998209, "grad_norm": 35.0, "learning_rate": 1.9922106433198233e-05, "loss": 8.1991, "step": 42410 }, { "epoch": 0.1192977127574308, "grad_norm": 43.25, "learning_rate": 1.992206974220841e-05, "loss": 7.6998, "step": 42420 }, { "epoch": 0.11932583574487951, "grad_norm": 29.375, "learning_rate": 1.992203304261296e-05, "loss": 8.1427, "step": 42430 }, { "epoch": 0.11935395873232822, "grad_norm": 22.125, "learning_rate": 1.992199633441192e-05, "loss": 7.4231, "step": 42440 }, { "epoch": 0.11938208171977693, "grad_norm": 40.5, "learning_rate": 1.9921959617605316e-05, "loss": 8.3654, "step": 42450 }, { "epoch": 0.11941020470722565, "grad_norm": 22.875, "learning_rate": 1.9921922892193183e-05, "loss": 8.5591, "step": 42460 }, { "epoch": 0.11943832769467436, "grad_norm": 30.625, "learning_rate": 1.992188615817555e-05, "loss": 7.637, "step": 42470 }, { "epoch": 0.11946645068212305, "grad_norm": 48.75, "learning_rate": 1.9921849415552453e-05, "loss": 8.2896, "step": 42480 }, { "epoch": 0.11949457366957177, "grad_norm": 28.0, "learning_rate": 1.9921812664323928e-05, "loss": 7.3377, "step": 42490 }, { "epoch": 0.11952269665702048, "grad_norm": 42.25, "learning_rate": 1.9921775904489995e-05, "loss": 7.9522, "step": 42500 }, { "epoch": 0.11955081964446919, "grad_norm": 24.0, "learning_rate": 1.9921739136050692e-05, "loss": 7.8872, "step": 42510 }, { "epoch": 0.1195789426319179, "grad_norm": 76.0, "learning_rate": 1.9921702359006053e-05, "loss": 8.4018, "step": 42520 }, { "epoch": 0.11960706561936661, "grad_norm": 22.5, "learning_rate": 1.9921665573356104e-05, "loss": 8.0526, "step": 42530 }, { "epoch": 0.11963518860681532, "grad_norm": 41.75, "learning_rate": 1.9921628779100884e-05, "loss": 7.7303, "step": 42540 }, { "epoch": 0.11966331159426404, "grad_norm": 20.375, "learning_rate": 1.9921591976240418e-05, "loss": 7.2234, "step": 42550 }, { "epoch": 0.11969143458171275, "grad_norm": 37.0, "learning_rate": 1.9921555164774745e-05, "loss": 8.2108, "step": 42560 }, { "epoch": 0.11971955756916146, "grad_norm": 28.625, "learning_rate": 1.992151834470389e-05, "loss": 8.2093, "step": 42570 }, { "epoch": 0.11974768055661017, "grad_norm": 31.125, "learning_rate": 1.992148151602789e-05, "loss": 8.0135, "step": 42580 }, { "epoch": 0.11977580354405888, "grad_norm": 32.5, "learning_rate": 1.9921444678746778e-05, "loss": 8.3989, "step": 42590 }, { "epoch": 0.11980392653150759, "grad_norm": 31.125, "learning_rate": 1.992140783286058e-05, "loss": 8.0066, "step": 42600 }, { "epoch": 0.1198320495189563, "grad_norm": 35.75, "learning_rate": 1.992137097836933e-05, "loss": 8.2794, "step": 42610 }, { "epoch": 0.11986017250640502, "grad_norm": 48.25, "learning_rate": 1.9921334115273064e-05, "loss": 7.5298, "step": 42620 }, { "epoch": 0.11988829549385373, "grad_norm": 28.5, "learning_rate": 1.992129724357181e-05, "loss": 7.2895, "step": 42630 }, { "epoch": 0.11991641848130243, "grad_norm": 27.625, "learning_rate": 1.99212603632656e-05, "loss": 8.1777, "step": 42640 }, { "epoch": 0.11994454146875114, "grad_norm": 29.375, "learning_rate": 1.9921223474354467e-05, "loss": 7.1873, "step": 42650 }, { "epoch": 0.11997266445619985, "grad_norm": 38.75, "learning_rate": 1.9921186576838446e-05, "loss": 8.0276, "step": 42660 }, { "epoch": 0.12000078744364856, "grad_norm": 32.5, "learning_rate": 1.992114967071756e-05, "loss": 8.1141, "step": 42670 }, { "epoch": 0.12002891043109727, "grad_norm": 32.0, "learning_rate": 1.992111275599185e-05, "loss": 8.3714, "step": 42680 }, { "epoch": 0.12005703341854598, "grad_norm": 22.125, "learning_rate": 1.9921075832661346e-05, "loss": 8.3235, "step": 42690 }, { "epoch": 0.1200851564059947, "grad_norm": 27.0, "learning_rate": 1.992103890072608e-05, "loss": 8.3359, "step": 42700 }, { "epoch": 0.1201132793934434, "grad_norm": 23.125, "learning_rate": 1.992100196018608e-05, "loss": 6.9898, "step": 42710 }, { "epoch": 0.12014140238089212, "grad_norm": 24.125, "learning_rate": 1.9920965011041385e-05, "loss": 7.6583, "step": 42720 }, { "epoch": 0.12016952536834083, "grad_norm": 38.5, "learning_rate": 1.992092805329202e-05, "loss": 7.0192, "step": 42730 }, { "epoch": 0.12019764835578954, "grad_norm": 25.875, "learning_rate": 1.992089108693802e-05, "loss": 7.9117, "step": 42740 }, { "epoch": 0.12022577134323825, "grad_norm": 35.75, "learning_rate": 1.992085411197942e-05, "loss": 7.9449, "step": 42750 }, { "epoch": 0.12025389433068696, "grad_norm": 33.75, "learning_rate": 1.9920817128416247e-05, "loss": 8.2311, "step": 42760 }, { "epoch": 0.12028201731813568, "grad_norm": 39.0, "learning_rate": 1.9920780136248535e-05, "loss": 7.3514, "step": 42770 }, { "epoch": 0.12031014030558439, "grad_norm": 33.5, "learning_rate": 1.9920743135476318e-05, "loss": 8.6543, "step": 42780 }, { "epoch": 0.1203382632930331, "grad_norm": 38.5, "learning_rate": 1.9920706126099628e-05, "loss": 7.7001, "step": 42790 }, { "epoch": 0.1203663862804818, "grad_norm": 34.5, "learning_rate": 1.9920669108118493e-05, "loss": 7.618, "step": 42800 }, { "epoch": 0.12039450926793051, "grad_norm": 30.625, "learning_rate": 1.992063208153295e-05, "loss": 7.7473, "step": 42810 }, { "epoch": 0.12042263225537922, "grad_norm": 23.375, "learning_rate": 1.992059504634303e-05, "loss": 7.9674, "step": 42820 }, { "epoch": 0.12045075524282793, "grad_norm": 29.75, "learning_rate": 1.992055800254876e-05, "loss": 7.6763, "step": 42830 }, { "epoch": 0.12047887823027664, "grad_norm": 41.0, "learning_rate": 1.9920520950150174e-05, "loss": 7.8634, "step": 42840 }, { "epoch": 0.12050700121772535, "grad_norm": 19.125, "learning_rate": 1.9920483889147314e-05, "loss": 7.7972, "step": 42850 }, { "epoch": 0.12053512420517407, "grad_norm": 28.125, "learning_rate": 1.99204468195402e-05, "loss": 7.7158, "step": 42860 }, { "epoch": 0.12056324719262278, "grad_norm": 27.375, "learning_rate": 1.9920409741328868e-05, "loss": 8.3534, "step": 42870 }, { "epoch": 0.12059137018007149, "grad_norm": 50.0, "learning_rate": 1.9920372654513353e-05, "loss": 8.1732, "step": 42880 }, { "epoch": 0.1206194931675202, "grad_norm": 36.5, "learning_rate": 1.992033555909368e-05, "loss": 7.8201, "step": 42890 }, { "epoch": 0.12064761615496891, "grad_norm": 20.375, "learning_rate": 1.9920298455069892e-05, "loss": 8.4392, "step": 42900 }, { "epoch": 0.12067573914241762, "grad_norm": 22.25, "learning_rate": 1.9920261342442013e-05, "loss": 7.5271, "step": 42910 }, { "epoch": 0.12070386212986634, "grad_norm": 34.5, "learning_rate": 1.992022422121008e-05, "loss": 7.8112, "step": 42920 }, { "epoch": 0.12073198511731505, "grad_norm": 41.5, "learning_rate": 1.992018709137412e-05, "loss": 7.2164, "step": 42930 }, { "epoch": 0.12076010810476376, "grad_norm": 37.0, "learning_rate": 1.992014995293417e-05, "loss": 7.6638, "step": 42940 }, { "epoch": 0.12078823109221247, "grad_norm": 36.5, "learning_rate": 1.992011280589026e-05, "loss": 7.2362, "step": 42950 }, { "epoch": 0.12081635407966117, "grad_norm": 41.75, "learning_rate": 1.9920075650242417e-05, "loss": 8.114, "step": 42960 }, { "epoch": 0.12084447706710988, "grad_norm": 36.75, "learning_rate": 1.992003848599068e-05, "loss": 8.6004, "step": 42970 }, { "epoch": 0.12087260005455859, "grad_norm": 26.5, "learning_rate": 1.9920001313135082e-05, "loss": 8.5947, "step": 42980 }, { "epoch": 0.1209007230420073, "grad_norm": 18.25, "learning_rate": 1.9919964131675656e-05, "loss": 8.2641, "step": 42990 }, { "epoch": 0.12092884602945601, "grad_norm": 21.0, "learning_rate": 1.9919926941612426e-05, "loss": 7.8477, "step": 43000 }, { "epoch": 0.12095696901690473, "grad_norm": 21.0, "learning_rate": 1.991988974294543e-05, "loss": 7.9433, "step": 43010 }, { "epoch": 0.12098509200435344, "grad_norm": 51.5, "learning_rate": 1.9919852535674706e-05, "loss": 7.5675, "step": 43020 }, { "epoch": 0.12101321499180215, "grad_norm": 61.5, "learning_rate": 1.9919815319800274e-05, "loss": 7.9803, "step": 43030 }, { "epoch": 0.12104133797925086, "grad_norm": 34.0, "learning_rate": 1.9919778095322172e-05, "loss": 8.0937, "step": 43040 }, { "epoch": 0.12106946096669957, "grad_norm": 29.75, "learning_rate": 1.9919740862240436e-05, "loss": 7.818, "step": 43050 }, { "epoch": 0.12109758395414828, "grad_norm": 29.5, "learning_rate": 1.991970362055509e-05, "loss": 8.4118, "step": 43060 }, { "epoch": 0.121125706941597, "grad_norm": 43.0, "learning_rate": 1.9919666370266174e-05, "loss": 7.9424, "step": 43070 }, { "epoch": 0.1211538299290457, "grad_norm": 21.25, "learning_rate": 1.991962911137372e-05, "loss": 7.6413, "step": 43080 }, { "epoch": 0.12118195291649442, "grad_norm": 43.25, "learning_rate": 1.9919591843877753e-05, "loss": 7.8705, "step": 43090 }, { "epoch": 0.12121007590394313, "grad_norm": 39.5, "learning_rate": 1.9919554567778316e-05, "loss": 8.3751, "step": 43100 }, { "epoch": 0.12123819889139184, "grad_norm": 23.75, "learning_rate": 1.991951728307543e-05, "loss": 8.0245, "step": 43110 }, { "epoch": 0.12126632187884055, "grad_norm": 21.375, "learning_rate": 1.9919479989769136e-05, "loss": 8.1295, "step": 43120 }, { "epoch": 0.12129444486628925, "grad_norm": 37.75, "learning_rate": 1.9919442687859464e-05, "loss": 7.4788, "step": 43130 }, { "epoch": 0.12132256785373796, "grad_norm": 31.125, "learning_rate": 1.991940537734644e-05, "loss": 8.4124, "step": 43140 }, { "epoch": 0.12135069084118667, "grad_norm": 37.75, "learning_rate": 1.9919368058230107e-05, "loss": 8.1673, "step": 43150 }, { "epoch": 0.12137881382863538, "grad_norm": 36.25, "learning_rate": 1.991933073051049e-05, "loss": 8.0086, "step": 43160 }, { "epoch": 0.1214069368160841, "grad_norm": 32.0, "learning_rate": 1.9919293394187624e-05, "loss": 8.0508, "step": 43170 }, { "epoch": 0.12143505980353281, "grad_norm": 28.75, "learning_rate": 1.9919256049261543e-05, "loss": 7.8168, "step": 43180 }, { "epoch": 0.12146318279098152, "grad_norm": 33.75, "learning_rate": 1.9919218695732276e-05, "loss": 7.3593, "step": 43190 }, { "epoch": 0.12149130577843023, "grad_norm": 34.0, "learning_rate": 1.9919181333599856e-05, "loss": 8.2071, "step": 43200 }, { "epoch": 0.12151942876587894, "grad_norm": 27.25, "learning_rate": 1.9919143962864318e-05, "loss": 8.2301, "step": 43210 }, { "epoch": 0.12154755175332765, "grad_norm": 20.875, "learning_rate": 1.991910658352569e-05, "loss": 7.572, "step": 43220 }, { "epoch": 0.12157567474077637, "grad_norm": 45.75, "learning_rate": 1.991906919558401e-05, "loss": 7.3833, "step": 43230 }, { "epoch": 0.12160379772822508, "grad_norm": 30.625, "learning_rate": 1.9919031799039303e-05, "loss": 8.8688, "step": 43240 }, { "epoch": 0.12163192071567379, "grad_norm": 26.0, "learning_rate": 1.991899439389161e-05, "loss": 8.1349, "step": 43250 }, { "epoch": 0.1216600437031225, "grad_norm": 32.0, "learning_rate": 1.9918956980140956e-05, "loss": 8.0715, "step": 43260 }, { "epoch": 0.12168816669057121, "grad_norm": 37.25, "learning_rate": 1.991891955778738e-05, "loss": 7.9527, "step": 43270 }, { "epoch": 0.12171628967801992, "grad_norm": 30.375, "learning_rate": 1.9918882126830913e-05, "loss": 7.6584, "step": 43280 }, { "epoch": 0.12174441266546862, "grad_norm": 34.75, "learning_rate": 1.991884468727158e-05, "loss": 8.1153, "step": 43290 }, { "epoch": 0.12177253565291733, "grad_norm": 49.25, "learning_rate": 1.991880723910942e-05, "loss": 8.3004, "step": 43300 }, { "epoch": 0.12180065864036604, "grad_norm": 29.875, "learning_rate": 1.991876978234447e-05, "loss": 8.0323, "step": 43310 }, { "epoch": 0.12182878162781476, "grad_norm": 30.0, "learning_rate": 1.991873231697675e-05, "loss": 7.5858, "step": 43320 }, { "epoch": 0.12185690461526347, "grad_norm": 39.5, "learning_rate": 1.99186948430063e-05, "loss": 7.4457, "step": 43330 }, { "epoch": 0.12188502760271218, "grad_norm": 33.0, "learning_rate": 1.991865736043316e-05, "loss": 7.8812, "step": 43340 }, { "epoch": 0.12191315059016089, "grad_norm": 32.75, "learning_rate": 1.9918619869257348e-05, "loss": 7.5621, "step": 43350 }, { "epoch": 0.1219412735776096, "grad_norm": 27.375, "learning_rate": 1.9918582369478906e-05, "loss": 7.3778, "step": 43360 }, { "epoch": 0.12196939656505831, "grad_norm": 22.75, "learning_rate": 1.9918544861097864e-05, "loss": 8.5422, "step": 43370 }, { "epoch": 0.12199751955250702, "grad_norm": 27.375, "learning_rate": 1.991850734411425e-05, "loss": 7.2368, "step": 43380 }, { "epoch": 0.12202564253995574, "grad_norm": 40.25, "learning_rate": 1.9918469818528104e-05, "loss": 8.5729, "step": 43390 }, { "epoch": 0.12205376552740445, "grad_norm": 24.875, "learning_rate": 1.9918432284339456e-05, "loss": 7.8115, "step": 43400 }, { "epoch": 0.12208188851485316, "grad_norm": 53.0, "learning_rate": 1.991839474154834e-05, "loss": 8.5151, "step": 43410 }, { "epoch": 0.12211001150230187, "grad_norm": 27.125, "learning_rate": 1.991835719015478e-05, "loss": 7.097, "step": 43420 }, { "epoch": 0.12213813448975058, "grad_norm": 30.25, "learning_rate": 1.991831963015882e-05, "loss": 7.3283, "step": 43430 }, { "epoch": 0.1221662574771993, "grad_norm": 23.125, "learning_rate": 1.9918282061560484e-05, "loss": 7.653, "step": 43440 }, { "epoch": 0.12219438046464799, "grad_norm": 45.5, "learning_rate": 1.991824448435981e-05, "loss": 7.8538, "step": 43450 }, { "epoch": 0.1222225034520967, "grad_norm": 23.625, "learning_rate": 1.9918206898556828e-05, "loss": 8.9023, "step": 43460 }, { "epoch": 0.12225062643954542, "grad_norm": 29.875, "learning_rate": 1.9918169304151575e-05, "loss": 8.0306, "step": 43470 }, { "epoch": 0.12227874942699413, "grad_norm": 28.25, "learning_rate": 1.9918131701144075e-05, "loss": 7.9355, "step": 43480 }, { "epoch": 0.12230687241444284, "grad_norm": 28.625, "learning_rate": 1.991809408953437e-05, "loss": 7.4743, "step": 43490 }, { "epoch": 0.12233499540189155, "grad_norm": 46.75, "learning_rate": 1.9918056469322482e-05, "loss": 7.602, "step": 43500 }, { "epoch": 0.12236311838934026, "grad_norm": 25.125, "learning_rate": 1.9918018840508453e-05, "loss": 9.1103, "step": 43510 }, { "epoch": 0.12239124137678897, "grad_norm": 30.0, "learning_rate": 1.9917981203092313e-05, "loss": 7.5116, "step": 43520 }, { "epoch": 0.12241936436423768, "grad_norm": 24.125, "learning_rate": 1.9917943557074097e-05, "loss": 7.6431, "step": 43530 }, { "epoch": 0.1224474873516864, "grad_norm": 30.875, "learning_rate": 1.991790590245383e-05, "loss": 8.1252, "step": 43540 }, { "epoch": 0.12247561033913511, "grad_norm": 26.125, "learning_rate": 1.991786823923155e-05, "loss": 8.6345, "step": 43550 }, { "epoch": 0.12250373332658382, "grad_norm": 42.5, "learning_rate": 1.991783056740729e-05, "loss": 8.699, "step": 43560 }, { "epoch": 0.12253185631403253, "grad_norm": 34.75, "learning_rate": 1.9917792886981083e-05, "loss": 7.2963, "step": 43570 }, { "epoch": 0.12255997930148124, "grad_norm": 25.625, "learning_rate": 1.991775519795296e-05, "loss": 7.4507, "step": 43580 }, { "epoch": 0.12258810228892995, "grad_norm": 29.125, "learning_rate": 1.9917717500322952e-05, "loss": 7.2891, "step": 43590 }, { "epoch": 0.12261622527637867, "grad_norm": 24.625, "learning_rate": 1.9917679794091094e-05, "loss": 7.9408, "step": 43600 }, { "epoch": 0.12264434826382736, "grad_norm": 151.0, "learning_rate": 1.991764207925742e-05, "loss": 7.6819, "step": 43610 }, { "epoch": 0.12267247125127607, "grad_norm": 23.375, "learning_rate": 1.991760435582196e-05, "loss": 6.8868, "step": 43620 }, { "epoch": 0.12270059423872479, "grad_norm": 26.25, "learning_rate": 1.991756662378475e-05, "loss": 7.0039, "step": 43630 }, { "epoch": 0.1227287172261735, "grad_norm": 27.875, "learning_rate": 1.991752888314582e-05, "loss": 8.2015, "step": 43640 }, { "epoch": 0.12275684021362221, "grad_norm": 44.25, "learning_rate": 1.99174911339052e-05, "loss": 7.7878, "step": 43650 }, { "epoch": 0.12278496320107092, "grad_norm": 26.25, "learning_rate": 1.991745337606293e-05, "loss": 7.4298, "step": 43660 }, { "epoch": 0.12281308618851963, "grad_norm": 27.75, "learning_rate": 1.991741560961904e-05, "loss": 8.3192, "step": 43670 }, { "epoch": 0.12284120917596834, "grad_norm": 26.25, "learning_rate": 1.9917377834573557e-05, "loss": 8.4248, "step": 43680 }, { "epoch": 0.12286933216341706, "grad_norm": 25.625, "learning_rate": 1.991734005092652e-05, "loss": 8.1707, "step": 43690 }, { "epoch": 0.12289745515086577, "grad_norm": 22.0, "learning_rate": 1.9917302258677962e-05, "loss": 7.7223, "step": 43700 }, { "epoch": 0.12292557813831448, "grad_norm": 28.625, "learning_rate": 1.9917264457827913e-05, "loss": 8.0313, "step": 43710 }, { "epoch": 0.12295370112576319, "grad_norm": 21.875, "learning_rate": 1.9917226648376407e-05, "loss": 7.4001, "step": 43720 }, { "epoch": 0.1229818241132119, "grad_norm": 29.25, "learning_rate": 1.9917188830323474e-05, "loss": 8.0921, "step": 43730 }, { "epoch": 0.12300994710066061, "grad_norm": 22.875, "learning_rate": 1.9917151003669153e-05, "loss": 7.9055, "step": 43740 }, { "epoch": 0.12303807008810932, "grad_norm": 33.5, "learning_rate": 1.9917113168413472e-05, "loss": 7.8247, "step": 43750 }, { "epoch": 0.12306619307555804, "grad_norm": 38.5, "learning_rate": 1.9917075324556464e-05, "loss": 7.8692, "step": 43760 }, { "epoch": 0.12309431606300675, "grad_norm": 29.625, "learning_rate": 1.991703747209816e-05, "loss": 7.8179, "step": 43770 }, { "epoch": 0.12312243905045545, "grad_norm": 33.5, "learning_rate": 1.99169996110386e-05, "loss": 8.3988, "step": 43780 }, { "epoch": 0.12315056203790416, "grad_norm": 35.5, "learning_rate": 1.9916961741377814e-05, "loss": 7.8793, "step": 43790 }, { "epoch": 0.12317868502535287, "grad_norm": 29.75, "learning_rate": 1.9916923863115828e-05, "loss": 8.2983, "step": 43800 }, { "epoch": 0.12320680801280158, "grad_norm": 29.875, "learning_rate": 1.9916885976252683e-05, "loss": 8.2648, "step": 43810 }, { "epoch": 0.12323493100025029, "grad_norm": 28.625, "learning_rate": 1.991684808078841e-05, "loss": 8.0012, "step": 43820 }, { "epoch": 0.123263053987699, "grad_norm": 23.625, "learning_rate": 1.991681017672304e-05, "loss": 8.2324, "step": 43830 }, { "epoch": 0.12329117697514771, "grad_norm": 32.5, "learning_rate": 1.9916772264056604e-05, "loss": 8.0498, "step": 43840 }, { "epoch": 0.12331929996259643, "grad_norm": 43.0, "learning_rate": 1.991673434278914e-05, "loss": 8.2379, "step": 43850 }, { "epoch": 0.12334742295004514, "grad_norm": 27.25, "learning_rate": 1.991669641292068e-05, "loss": 8.0651, "step": 43860 }, { "epoch": 0.12337554593749385, "grad_norm": 27.125, "learning_rate": 1.9916658474451252e-05, "loss": 7.9731, "step": 43870 }, { "epoch": 0.12340366892494256, "grad_norm": 30.5, "learning_rate": 1.9916620527380893e-05, "loss": 7.6134, "step": 43880 }, { "epoch": 0.12343179191239127, "grad_norm": 22.5, "learning_rate": 1.991658257170964e-05, "loss": 8.1534, "step": 43890 }, { "epoch": 0.12345991489983998, "grad_norm": 21.875, "learning_rate": 1.9916544607437517e-05, "loss": 7.5378, "step": 43900 }, { "epoch": 0.1234880378872887, "grad_norm": 31.75, "learning_rate": 1.991650663456456e-05, "loss": 7.6263, "step": 43910 }, { "epoch": 0.12351616087473741, "grad_norm": 36.75, "learning_rate": 1.9916468653090802e-05, "loss": 7.3201, "step": 43920 }, { "epoch": 0.12354428386218612, "grad_norm": 47.0, "learning_rate": 1.9916430663016283e-05, "loss": 7.5229, "step": 43930 }, { "epoch": 0.12357240684963482, "grad_norm": 33.75, "learning_rate": 1.9916392664341028e-05, "loss": 7.271, "step": 43940 }, { "epoch": 0.12360052983708353, "grad_norm": 31.0, "learning_rate": 1.991635465706507e-05, "loss": 7.6172, "step": 43950 }, { "epoch": 0.12362865282453224, "grad_norm": 37.0, "learning_rate": 1.991631664118844e-05, "loss": 8.4038, "step": 43960 }, { "epoch": 0.12365677581198095, "grad_norm": 34.0, "learning_rate": 1.991627861671118e-05, "loss": 8.38, "step": 43970 }, { "epoch": 0.12368489879942966, "grad_norm": 25.75, "learning_rate": 1.9916240583633318e-05, "loss": 8.4333, "step": 43980 }, { "epoch": 0.12371302178687837, "grad_norm": 31.625, "learning_rate": 1.9916202541954885e-05, "loss": 7.6844, "step": 43990 }, { "epoch": 0.12374114477432709, "grad_norm": 26.25, "learning_rate": 1.9916164491675916e-05, "loss": 8.0323, "step": 44000 }, { "epoch": 0.1237692677617758, "grad_norm": 35.25, "learning_rate": 1.9916126432796443e-05, "loss": 8.7711, "step": 44010 }, { "epoch": 0.12379739074922451, "grad_norm": 22.0, "learning_rate": 1.9916088365316502e-05, "loss": 7.7043, "step": 44020 }, { "epoch": 0.12382551373667322, "grad_norm": 28.875, "learning_rate": 1.991605028923612e-05, "loss": 7.6165, "step": 44030 }, { "epoch": 0.12385363672412193, "grad_norm": 28.375, "learning_rate": 1.9916012204555336e-05, "loss": 8.4116, "step": 44040 }, { "epoch": 0.12388175971157064, "grad_norm": 35.0, "learning_rate": 1.9915974111274182e-05, "loss": 8.0699, "step": 44050 }, { "epoch": 0.12390988269901936, "grad_norm": 23.25, "learning_rate": 1.991593600939269e-05, "loss": 7.9621, "step": 44060 }, { "epoch": 0.12393800568646807, "grad_norm": 41.25, "learning_rate": 1.9915897898910888e-05, "loss": 7.5328, "step": 44070 }, { "epoch": 0.12396612867391678, "grad_norm": 35.0, "learning_rate": 1.9915859779828823e-05, "loss": 7.6328, "step": 44080 }, { "epoch": 0.12399425166136549, "grad_norm": 22.25, "learning_rate": 1.9915821652146513e-05, "loss": 7.4859, "step": 44090 }, { "epoch": 0.12402237464881419, "grad_norm": 35.75, "learning_rate": 1.9915783515863997e-05, "loss": 7.3922, "step": 44100 }, { "epoch": 0.1240504976362629, "grad_norm": 30.875, "learning_rate": 1.9915745370981308e-05, "loss": 7.8724, "step": 44110 }, { "epoch": 0.12407862062371161, "grad_norm": 24.25, "learning_rate": 1.991570721749848e-05, "loss": 7.9552, "step": 44120 }, { "epoch": 0.12410674361116032, "grad_norm": 33.75, "learning_rate": 1.9915669055415547e-05, "loss": 9.177, "step": 44130 }, { "epoch": 0.12413486659860903, "grad_norm": 67.0, "learning_rate": 1.991563088473254e-05, "loss": 8.2073, "step": 44140 }, { "epoch": 0.12416298958605775, "grad_norm": 22.625, "learning_rate": 1.991559270544949e-05, "loss": 7.975, "step": 44150 }, { "epoch": 0.12419111257350646, "grad_norm": 26.0, "learning_rate": 1.9915554517566432e-05, "loss": 8.4547, "step": 44160 }, { "epoch": 0.12421923556095517, "grad_norm": 32.0, "learning_rate": 1.9915516321083403e-05, "loss": 9.1469, "step": 44170 }, { "epoch": 0.12424735854840388, "grad_norm": 30.125, "learning_rate": 1.9915478116000433e-05, "loss": 7.6905, "step": 44180 }, { "epoch": 0.12427548153585259, "grad_norm": 31.0, "learning_rate": 1.9915439902317554e-05, "loss": 7.8882, "step": 44190 }, { "epoch": 0.1243036045233013, "grad_norm": 26.75, "learning_rate": 1.99154016800348e-05, "loss": 7.2601, "step": 44200 }, { "epoch": 0.12433172751075001, "grad_norm": 29.75, "learning_rate": 1.9915363449152205e-05, "loss": 7.9805, "step": 44210 }, { "epoch": 0.12435985049819873, "grad_norm": 21.75, "learning_rate": 1.9915325209669803e-05, "loss": 8.4179, "step": 44220 }, { "epoch": 0.12438797348564744, "grad_norm": 43.0, "learning_rate": 1.991528696158762e-05, "loss": 7.4565, "step": 44230 }, { "epoch": 0.12441609647309615, "grad_norm": 25.875, "learning_rate": 1.99152487049057e-05, "loss": 7.4887, "step": 44240 }, { "epoch": 0.12444421946054486, "grad_norm": 19.375, "learning_rate": 1.991521043962407e-05, "loss": 8.0544, "step": 44250 }, { "epoch": 0.12447234244799356, "grad_norm": 40.5, "learning_rate": 1.9915172165742764e-05, "loss": 7.8951, "step": 44260 }, { "epoch": 0.12450046543544227, "grad_norm": 46.5, "learning_rate": 1.9915133883261816e-05, "loss": 7.781, "step": 44270 }, { "epoch": 0.12452858842289098, "grad_norm": 31.75, "learning_rate": 1.991509559218126e-05, "loss": 8.1637, "step": 44280 }, { "epoch": 0.1245567114103397, "grad_norm": 25.0, "learning_rate": 1.9915057292501125e-05, "loss": 7.8485, "step": 44290 }, { "epoch": 0.1245848343977884, "grad_norm": 39.0, "learning_rate": 1.9915018984221447e-05, "loss": 8.0817, "step": 44300 }, { "epoch": 0.12461295738523712, "grad_norm": 36.25, "learning_rate": 1.991498066734226e-05, "loss": 8.2061, "step": 44310 }, { "epoch": 0.12464108037268583, "grad_norm": 37.25, "learning_rate": 1.9914942341863598e-05, "loss": 7.939, "step": 44320 }, { "epoch": 0.12466920336013454, "grad_norm": 31.25, "learning_rate": 1.991490400778549e-05, "loss": 8.3724, "step": 44330 }, { "epoch": 0.12469732634758325, "grad_norm": 28.875, "learning_rate": 1.9914865665107972e-05, "loss": 8.0691, "step": 44340 }, { "epoch": 0.12472544933503196, "grad_norm": 31.375, "learning_rate": 1.991482731383108e-05, "loss": 7.817, "step": 44350 }, { "epoch": 0.12475357232248067, "grad_norm": 31.25, "learning_rate": 1.9914788953954843e-05, "loss": 8.3676, "step": 44360 }, { "epoch": 0.12478169530992939, "grad_norm": 32.75, "learning_rate": 1.9914750585479298e-05, "loss": 8.0087, "step": 44370 }, { "epoch": 0.1248098182973781, "grad_norm": 26.75, "learning_rate": 1.991471220840447e-05, "loss": 7.7846, "step": 44380 }, { "epoch": 0.12483794128482681, "grad_norm": 26.625, "learning_rate": 1.9914673822730404e-05, "loss": 8.355, "step": 44390 }, { "epoch": 0.12486606427227552, "grad_norm": 23.25, "learning_rate": 1.9914635428457125e-05, "loss": 8.1914, "step": 44400 }, { "epoch": 0.12489418725972423, "grad_norm": 26.125, "learning_rate": 1.991459702558467e-05, "loss": 7.5201, "step": 44410 }, { "epoch": 0.12492231024717294, "grad_norm": 22.5, "learning_rate": 1.9914558614113072e-05, "loss": 7.3034, "step": 44420 }, { "epoch": 0.12495043323462164, "grad_norm": 23.625, "learning_rate": 1.9914520194042365e-05, "loss": 7.4072, "step": 44430 }, { "epoch": 0.12497855622207035, "grad_norm": 31.875, "learning_rate": 1.9914481765372576e-05, "loss": 7.3925, "step": 44440 }, { "epoch": 0.12500667920951908, "grad_norm": 36.0, "learning_rate": 1.9914443328103746e-05, "loss": 8.5892, "step": 44450 }, { "epoch": 0.12503480219696778, "grad_norm": 43.25, "learning_rate": 1.9914404882235905e-05, "loss": 8.1262, "step": 44460 }, { "epoch": 0.1250629251844165, "grad_norm": 27.875, "learning_rate": 1.991436642776909e-05, "loss": 8.3774, "step": 44470 }, { "epoch": 0.1250910481718652, "grad_norm": 22.625, "learning_rate": 1.9914327964703327e-05, "loss": 8.1674, "step": 44480 }, { "epoch": 0.12511917115931392, "grad_norm": 29.0, "learning_rate": 1.9914289493038654e-05, "loss": 8.1085, "step": 44490 }, { "epoch": 0.12514729414676262, "grad_norm": 67.0, "learning_rate": 1.9914251012775106e-05, "loss": 8.2124, "step": 44500 }, { "epoch": 0.12517541713421132, "grad_norm": 29.375, "learning_rate": 1.9914212523912713e-05, "loss": 7.7332, "step": 44510 }, { "epoch": 0.12520354012166005, "grad_norm": 44.75, "learning_rate": 1.991417402645151e-05, "loss": 7.2597, "step": 44520 }, { "epoch": 0.12523166310910874, "grad_norm": 30.125, "learning_rate": 1.9914135520391532e-05, "loss": 8.8132, "step": 44530 }, { "epoch": 0.12525978609655747, "grad_norm": 32.5, "learning_rate": 1.991409700573281e-05, "loss": 8.173, "step": 44540 }, { "epoch": 0.12528790908400617, "grad_norm": 34.75, "learning_rate": 1.9914058482475376e-05, "loss": 7.7879, "step": 44550 }, { "epoch": 0.1253160320714549, "grad_norm": 30.25, "learning_rate": 1.9914019950619264e-05, "loss": 8.2806, "step": 44560 }, { "epoch": 0.1253441550589036, "grad_norm": 37.0, "learning_rate": 1.991398141016451e-05, "loss": 8.5648, "step": 44570 }, { "epoch": 0.12537227804635231, "grad_norm": 31.875, "learning_rate": 1.9913942861111147e-05, "loss": 8.1016, "step": 44580 }, { "epoch": 0.125400401033801, "grad_norm": 36.75, "learning_rate": 1.991390430345921e-05, "loss": 7.4242, "step": 44590 }, { "epoch": 0.12542852402124974, "grad_norm": 24.625, "learning_rate": 1.9913865737208726e-05, "loss": 7.926, "step": 44600 }, { "epoch": 0.12545664700869844, "grad_norm": 24.75, "learning_rate": 1.9913827162359736e-05, "loss": 8.4242, "step": 44610 }, { "epoch": 0.12548476999614716, "grad_norm": 35.0, "learning_rate": 1.9913788578912267e-05, "loss": 7.8015, "step": 44620 }, { "epoch": 0.12551289298359586, "grad_norm": 23.875, "learning_rate": 1.9913749986866357e-05, "loss": 8.2369, "step": 44630 }, { "epoch": 0.12554101597104458, "grad_norm": 34.5, "learning_rate": 1.9913711386222035e-05, "loss": 8.1834, "step": 44640 }, { "epoch": 0.12556913895849328, "grad_norm": 21.875, "learning_rate": 1.9913672776979344e-05, "loss": 8.241, "step": 44650 }, { "epoch": 0.125597261945942, "grad_norm": 22.375, "learning_rate": 1.9913634159138305e-05, "loss": 7.342, "step": 44660 }, { "epoch": 0.1256253849333907, "grad_norm": 28.25, "learning_rate": 1.9913595532698958e-05, "loss": 7.6523, "step": 44670 }, { "epoch": 0.1256535079208394, "grad_norm": 32.25, "learning_rate": 1.9913556897661338e-05, "loss": 8.2159, "step": 44680 }, { "epoch": 0.12568163090828813, "grad_norm": 33.25, "learning_rate": 1.9913518254025477e-05, "loss": 7.8621, "step": 44690 }, { "epoch": 0.12570975389573683, "grad_norm": 26.75, "learning_rate": 1.9913479601791404e-05, "loss": 7.1099, "step": 44700 }, { "epoch": 0.12573787688318555, "grad_norm": 28.25, "learning_rate": 1.9913440940959158e-05, "loss": 7.7682, "step": 44710 }, { "epoch": 0.12576599987063425, "grad_norm": 30.125, "learning_rate": 1.991340227152877e-05, "loss": 8.1713, "step": 44720 }, { "epoch": 0.12579412285808297, "grad_norm": 37.25, "learning_rate": 1.991336359350028e-05, "loss": 7.5024, "step": 44730 }, { "epoch": 0.12582224584553167, "grad_norm": 70.5, "learning_rate": 1.991332490687371e-05, "loss": 7.9639, "step": 44740 }, { "epoch": 0.1258503688329804, "grad_norm": 31.625, "learning_rate": 1.9913286211649105e-05, "loss": 7.2957, "step": 44750 }, { "epoch": 0.1258784918204291, "grad_norm": 24.125, "learning_rate": 1.9913247507826487e-05, "loss": 7.6057, "step": 44760 }, { "epoch": 0.12590661480787782, "grad_norm": 23.25, "learning_rate": 1.99132087954059e-05, "loss": 7.565, "step": 44770 }, { "epoch": 0.12593473779532652, "grad_norm": 36.75, "learning_rate": 1.991317007438737e-05, "loss": 7.8619, "step": 44780 }, { "epoch": 0.12596286078277524, "grad_norm": 32.25, "learning_rate": 1.9913131344770937e-05, "loss": 7.9224, "step": 44790 }, { "epoch": 0.12599098377022394, "grad_norm": 33.25, "learning_rate": 1.991309260655663e-05, "loss": 7.7658, "step": 44800 }, { "epoch": 0.12601910675767267, "grad_norm": 29.5, "learning_rate": 1.9913053859744485e-05, "loss": 7.4781, "step": 44810 }, { "epoch": 0.12604722974512136, "grad_norm": 25.125, "learning_rate": 1.9913015104334535e-05, "loss": 7.9938, "step": 44820 }, { "epoch": 0.12607535273257006, "grad_norm": 26.25, "learning_rate": 1.991297634032681e-05, "loss": 7.9185, "step": 44830 }, { "epoch": 0.1261034757200188, "grad_norm": 23.75, "learning_rate": 1.9912937567721348e-05, "loss": 7.5742, "step": 44840 }, { "epoch": 0.12613159870746748, "grad_norm": 27.375, "learning_rate": 1.9912898786518184e-05, "loss": 8.0946, "step": 44850 }, { "epoch": 0.1261597216949162, "grad_norm": 29.25, "learning_rate": 1.991285999671735e-05, "loss": 8.0335, "step": 44860 }, { "epoch": 0.1261878446823649, "grad_norm": 60.0, "learning_rate": 1.9912821198318874e-05, "loss": 8.1619, "step": 44870 }, { "epoch": 0.12621596766981363, "grad_norm": 23.875, "learning_rate": 1.99127823913228e-05, "loss": 7.6644, "step": 44880 }, { "epoch": 0.12624409065726233, "grad_norm": 32.5, "learning_rate": 1.9912743575729152e-05, "loss": 8.5606, "step": 44890 }, { "epoch": 0.12627221364471106, "grad_norm": 27.875, "learning_rate": 1.991270475153797e-05, "loss": 7.9702, "step": 44900 }, { "epoch": 0.12630033663215975, "grad_norm": 29.5, "learning_rate": 1.9912665918749283e-05, "loss": 7.9345, "step": 44910 }, { "epoch": 0.12632845961960848, "grad_norm": 42.0, "learning_rate": 1.9912627077363128e-05, "loss": 7.3068, "step": 44920 }, { "epoch": 0.12635658260705718, "grad_norm": 39.25, "learning_rate": 1.9912588227379537e-05, "loss": 7.3179, "step": 44930 }, { "epoch": 0.1263847055945059, "grad_norm": 41.25, "learning_rate": 1.991254936879855e-05, "loss": 8.3695, "step": 44940 }, { "epoch": 0.1264128285819546, "grad_norm": 24.375, "learning_rate": 1.991251050162019e-05, "loss": 7.7571, "step": 44950 }, { "epoch": 0.12644095156940333, "grad_norm": 28.75, "learning_rate": 1.9912471625844496e-05, "loss": 7.386, "step": 44960 }, { "epoch": 0.12646907455685202, "grad_norm": 27.5, "learning_rate": 1.99124327414715e-05, "loss": 8.2161, "step": 44970 }, { "epoch": 0.12649719754430075, "grad_norm": 25.75, "learning_rate": 1.9912393848501242e-05, "loss": 8.0966, "step": 44980 }, { "epoch": 0.12652532053174945, "grad_norm": 33.0, "learning_rate": 1.9912354946933748e-05, "loss": 7.6225, "step": 44990 }, { "epoch": 0.12655344351919814, "grad_norm": 32.5, "learning_rate": 1.9912316036769056e-05, "loss": 8.0792, "step": 45000 } ], "logging_steps": 10, "max_steps": 1066743, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.300677993447987e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }