{ "best_metric": null, "best_model_checkpoint": null, "epoch": 18.717996723613386, "eval_steps": 500, "global_step": 40000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00468055230517201, "grad_norm": 113.99021911621094, "learning_rate": 2.747252747252747e-07, "loss": 10.0694, "step": 10 }, { "epoch": 0.00936110461034402, "grad_norm": 103.04177856445312, "learning_rate": 5.244755244755246e-07, "loss": 9.9547, "step": 20 }, { "epoch": 0.01404165691551603, "grad_norm": 58.11725997924805, "learning_rate": 7.742257742257742e-07, "loss": 8.5087, "step": 30 }, { "epoch": 0.01872220922068804, "grad_norm": 18.604448318481445, "learning_rate": 1.023976023976024e-06, "loss": 6.8387, "step": 40 }, { "epoch": 0.02340276152586005, "grad_norm": 9.087578773498535, "learning_rate": 1.2737262737262739e-06, "loss": 5.7037, "step": 50 }, { "epoch": 0.02808331383103206, "grad_norm": 4.204442977905273, "learning_rate": 1.5234765234765236e-06, "loss": 4.7015, "step": 60 }, { "epoch": 0.03276386613620407, "grad_norm": 3.4276981353759766, "learning_rate": 1.7732267732267732e-06, "loss": 4.4253, "step": 70 }, { "epoch": 0.03744441844137608, "grad_norm": 3.409438133239746, "learning_rate": 2.022977022977023e-06, "loss": 4.0873, "step": 80 }, { "epoch": 0.04212497074654809, "grad_norm": 2.13608455657959, "learning_rate": 2.2727272727272728e-06, "loss": 3.8436, "step": 90 }, { "epoch": 0.0468055230517201, "grad_norm": 2.4906747341156006, "learning_rate": 2.5224775224775224e-06, "loss": 3.5869, "step": 100 }, { "epoch": 0.05148607535689211, "grad_norm": 2.6044459342956543, "learning_rate": 2.772227772227772e-06, "loss": 3.3837, "step": 110 }, { "epoch": 0.05616662766206412, "grad_norm": 2.443059206008911, "learning_rate": 3.0219780219780218e-06, "loss": 3.2371, "step": 120 }, { "epoch": 0.06084717996723613, "grad_norm": 2.446427822113037, "learning_rate": 3.2717282717282723e-06, "loss": 3.1378, "step": 130 }, { "epoch": 0.06552773227240814, "grad_norm": 2.4115986824035645, "learning_rate": 3.521478521478522e-06, "loss": 3.0157, "step": 140 }, { "epoch": 0.07020828457758016, "grad_norm": 2.655240774154663, "learning_rate": 3.771228771228771e-06, "loss": 2.9746, "step": 150 }, { "epoch": 0.07488883688275216, "grad_norm": 2.826531171798706, "learning_rate": 4.020979020979021e-06, "loss": 2.8406, "step": 160 }, { "epoch": 0.07956938918792418, "grad_norm": 2.269925117492676, "learning_rate": 4.270729270729271e-06, "loss": 2.8085, "step": 170 }, { "epoch": 0.08424994149309618, "grad_norm": 3.9025580883026123, "learning_rate": 4.52047952047952e-06, "loss": 2.7488, "step": 180 }, { "epoch": 0.0889304937982682, "grad_norm": 3.199418306350708, "learning_rate": 4.77022977022977e-06, "loss": 2.6638, "step": 190 }, { "epoch": 0.0936110461034402, "grad_norm": 5.408358573913574, "learning_rate": 5.01998001998002e-06, "loss": 2.6184, "step": 200 }, { "epoch": 0.09829159840861222, "grad_norm": 3.0930752754211426, "learning_rate": 5.26973026973027e-06, "loss": 2.5562, "step": 210 }, { "epoch": 0.10297215071378422, "grad_norm": 4.07468318939209, "learning_rate": 5.51948051948052e-06, "loss": 2.5073, "step": 220 }, { "epoch": 0.10765270301895624, "grad_norm": 2.52290415763855, "learning_rate": 5.76923076923077e-06, "loss": 2.4085, "step": 230 }, { "epoch": 0.11233325532412824, "grad_norm": 3.1595826148986816, "learning_rate": 6.018981018981019e-06, "loss": 2.3178, "step": 240 }, { "epoch": 0.11701380762930026, "grad_norm": 2.94028902053833, "learning_rate": 6.268731268731269e-06, "loss": 2.2731, "step": 250 }, { "epoch": 0.12169435993447227, "grad_norm": 5.33275842666626, "learning_rate": 6.51848151848152e-06, "loss": 2.1856, "step": 260 }, { "epoch": 0.12637491223964428, "grad_norm": 4.337407112121582, "learning_rate": 6.76823176823177e-06, "loss": 2.1669, "step": 270 }, { "epoch": 0.13105546454481629, "grad_norm": 3.2848076820373535, "learning_rate": 7.017982017982019e-06, "loss": 2.0925, "step": 280 }, { "epoch": 0.1357360168499883, "grad_norm": 4.74451208114624, "learning_rate": 7.267732267732269e-06, "loss": 2.0596, "step": 290 }, { "epoch": 0.14041656915516032, "grad_norm": 4.024461269378662, "learning_rate": 7.517482517482519e-06, "loss": 2.0135, "step": 300 }, { "epoch": 0.14509712146033232, "grad_norm": 3.2569680213928223, "learning_rate": 7.767232767232769e-06, "loss": 1.9554, "step": 310 }, { "epoch": 0.14977767376550433, "grad_norm": 3.66388201713562, "learning_rate": 8.016983016983017e-06, "loss": 1.9246, "step": 320 }, { "epoch": 0.15445822607067633, "grad_norm": 4.457973003387451, "learning_rate": 8.266733266733269e-06, "loss": 1.8666, "step": 330 }, { "epoch": 0.15913877837584836, "grad_norm": 4.45363187789917, "learning_rate": 8.516483516483519e-06, "loss": 1.8309, "step": 340 }, { "epoch": 0.16381933068102036, "grad_norm": 5.552116394042969, "learning_rate": 8.766233766233767e-06, "loss": 1.7863, "step": 350 }, { "epoch": 0.16849988298619237, "grad_norm": 5.3646697998046875, "learning_rate": 9.015984015984016e-06, "loss": 1.7577, "step": 360 }, { "epoch": 0.17318043529136437, "grad_norm": 3.436530113220215, "learning_rate": 9.265734265734268e-06, "loss": 1.7173, "step": 370 }, { "epoch": 0.1778609875965364, "grad_norm": 7.08469295501709, "learning_rate": 9.515484515484518e-06, "loss": 1.6778, "step": 380 }, { "epoch": 0.1825415399017084, "grad_norm": 4.826805114746094, "learning_rate": 9.765234765234766e-06, "loss": 1.6565, "step": 390 }, { "epoch": 0.1872220922068804, "grad_norm": 4.957118988037109, "learning_rate": 1.0014985014985016e-05, "loss": 1.5986, "step": 400 }, { "epoch": 0.1919026445120524, "grad_norm": 5.167900085449219, "learning_rate": 1.0264735264735266e-05, "loss": 1.6084, "step": 410 }, { "epoch": 0.19658319681722444, "grad_norm": 5.682621955871582, "learning_rate": 1.0514485514485516e-05, "loss": 1.5829, "step": 420 }, { "epoch": 0.20126374912239645, "grad_norm": 4.279897689819336, "learning_rate": 1.0764235764235765e-05, "loss": 1.5396, "step": 430 }, { "epoch": 0.20594430142756845, "grad_norm": 4.7789692878723145, "learning_rate": 1.1013986013986015e-05, "loss": 1.4936, "step": 440 }, { "epoch": 0.21062485373274045, "grad_norm": 6.312705039978027, "learning_rate": 1.1263736263736267e-05, "loss": 1.5265, "step": 450 }, { "epoch": 0.21530540603791248, "grad_norm": 5.130682945251465, "learning_rate": 1.1513486513486515e-05, "loss": 1.4738, "step": 460 }, { "epoch": 0.2199859583430845, "grad_norm": 4.651071071624756, "learning_rate": 1.1763236763236763e-05, "loss": 1.448, "step": 470 }, { "epoch": 0.2246665106482565, "grad_norm": 5.198265552520752, "learning_rate": 1.2012987012987014e-05, "loss": 1.4097, "step": 480 }, { "epoch": 0.2293470629534285, "grad_norm": 5.103764057159424, "learning_rate": 1.2262737262737264e-05, "loss": 1.4097, "step": 490 }, { "epoch": 0.23402761525860052, "grad_norm": 6.41683292388916, "learning_rate": 1.2512487512487514e-05, "loss": 1.3652, "step": 500 }, { "epoch": 0.23870816756377253, "grad_norm": 6.282182216644287, "learning_rate": 1.2762237762237764e-05, "loss": 1.4102, "step": 510 }, { "epoch": 0.24338871986894453, "grad_norm": 5.09309196472168, "learning_rate": 1.3011988011988014e-05, "loss": 1.3643, "step": 520 }, { "epoch": 0.24806927217411653, "grad_norm": 4.40934944152832, "learning_rate": 1.3261738261738262e-05, "loss": 1.3616, "step": 530 }, { "epoch": 0.25274982447928857, "grad_norm": 6.133150577545166, "learning_rate": 1.3511488511488512e-05, "loss": 1.3452, "step": 540 }, { "epoch": 0.25743037678446057, "grad_norm": 5.8139729499816895, "learning_rate": 1.3761238761238762e-05, "loss": 1.2833, "step": 550 }, { "epoch": 0.26211092908963257, "grad_norm": 5.788191795349121, "learning_rate": 1.4010989010989013e-05, "loss": 1.2715, "step": 560 }, { "epoch": 0.2667914813948046, "grad_norm": 6.7671074867248535, "learning_rate": 1.426073926073926e-05, "loss": 1.2782, "step": 570 }, { "epoch": 0.2714720336999766, "grad_norm": 6.770794868469238, "learning_rate": 1.4510489510489511e-05, "loss": 1.2615, "step": 580 }, { "epoch": 0.2761525860051486, "grad_norm": 4.702939987182617, "learning_rate": 1.4760239760239761e-05, "loss": 1.2534, "step": 590 }, { "epoch": 0.28083313831032064, "grad_norm": 4.541275978088379, "learning_rate": 1.5009990009990011e-05, "loss": 1.2191, "step": 600 }, { "epoch": 0.28551369061549264, "grad_norm": 4.851327419281006, "learning_rate": 1.525974025974026e-05, "loss": 1.2302, "step": 610 }, { "epoch": 0.29019424292066465, "grad_norm": 6.825650691986084, "learning_rate": 1.5509490509490508e-05, "loss": 1.2111, "step": 620 }, { "epoch": 0.29487479522583665, "grad_norm": 5.741777420043945, "learning_rate": 1.575924075924076e-05, "loss": 1.1916, "step": 630 }, { "epoch": 0.29955534753100865, "grad_norm": 5.5274882316589355, "learning_rate": 1.6008991008991008e-05, "loss": 1.1508, "step": 640 }, { "epoch": 0.30423589983618066, "grad_norm": 6.007941722869873, "learning_rate": 1.625874125874126e-05, "loss": 1.1722, "step": 650 }, { "epoch": 0.30891645214135266, "grad_norm": 4.887717247009277, "learning_rate": 1.6508491508491512e-05, "loss": 1.1369, "step": 660 }, { "epoch": 0.31359700444652466, "grad_norm": 4.4725775718688965, "learning_rate": 1.675824175824176e-05, "loss": 1.143, "step": 670 }, { "epoch": 0.3182775567516967, "grad_norm": 4.86273193359375, "learning_rate": 1.700799200799201e-05, "loss": 1.144, "step": 680 }, { "epoch": 0.3229581090568687, "grad_norm": 6.0766987800598145, "learning_rate": 1.7257742257742257e-05, "loss": 1.1184, "step": 690 }, { "epoch": 0.32763866136204073, "grad_norm": 4.458099365234375, "learning_rate": 1.7507492507492505e-05, "loss": 1.1004, "step": 700 }, { "epoch": 0.33231921366721273, "grad_norm": 5.469143867492676, "learning_rate": 1.7757242757242757e-05, "loss": 1.1625, "step": 710 }, { "epoch": 0.33699976597238473, "grad_norm": 6.150203227996826, "learning_rate": 1.8006993006993005e-05, "loss": 1.1249, "step": 720 }, { "epoch": 0.34168031827755674, "grad_norm": 3.628385066986084, "learning_rate": 1.8256743256743257e-05, "loss": 1.1262, "step": 730 }, { "epoch": 0.34636087058272874, "grad_norm": 4.50604248046875, "learning_rate": 1.850649350649351e-05, "loss": 1.0728, "step": 740 }, { "epoch": 0.35104142288790074, "grad_norm": 5.255850791931152, "learning_rate": 1.8756243756243757e-05, "loss": 1.0797, "step": 750 }, { "epoch": 0.3557219751930728, "grad_norm": 4.420411109924316, "learning_rate": 1.9005994005994006e-05, "loss": 1.0874, "step": 760 }, { "epoch": 0.3604025274982448, "grad_norm": 3.781198501586914, "learning_rate": 1.9255744255744258e-05, "loss": 1.0761, "step": 770 }, { "epoch": 0.3650830798034168, "grad_norm": 6.394122123718262, "learning_rate": 1.9505494505494506e-05, "loss": 1.0877, "step": 780 }, { "epoch": 0.3697636321085888, "grad_norm": 5.270087242126465, "learning_rate": 1.9755244755244758e-05, "loss": 1.0313, "step": 790 }, { "epoch": 0.3744441844137608, "grad_norm": 5.54249906539917, "learning_rate": 2.0004995004995006e-05, "loss": 1.0497, "step": 800 }, { "epoch": 0.3791247367189328, "grad_norm": 5.3278632164001465, "learning_rate": 2.0254745254745258e-05, "loss": 1.0526, "step": 810 }, { "epoch": 0.3838052890241048, "grad_norm": 3.612354278564453, "learning_rate": 2.0504495504495506e-05, "loss": 1.0779, "step": 820 }, { "epoch": 0.3884858413292769, "grad_norm": 4.097999095916748, "learning_rate": 2.0754245754245755e-05, "loss": 1.031, "step": 830 }, { "epoch": 0.3931663936344489, "grad_norm": 4.478975296020508, "learning_rate": 2.1003996003996003e-05, "loss": 0.9877, "step": 840 }, { "epoch": 0.3978469459396209, "grad_norm": 3.829751491546631, "learning_rate": 2.1253746253746255e-05, "loss": 1.0187, "step": 850 }, { "epoch": 0.4025274982447929, "grad_norm": 2.950035572052002, "learning_rate": 2.1503496503496503e-05, "loss": 0.9841, "step": 860 }, { "epoch": 0.4072080505499649, "grad_norm": 5.0696563720703125, "learning_rate": 2.1753246753246752e-05, "loss": 0.9973, "step": 870 }, { "epoch": 0.4118886028551369, "grad_norm": 5.203789234161377, "learning_rate": 2.2002997002997004e-05, "loss": 0.9966, "step": 880 }, { "epoch": 0.4165691551603089, "grad_norm": 5.499011516571045, "learning_rate": 2.2252747252747255e-05, "loss": 0.9711, "step": 890 }, { "epoch": 0.4212497074654809, "grad_norm": 6.685848236083984, "learning_rate": 2.2502497502497504e-05, "loss": 1.0213, "step": 900 }, { "epoch": 0.42593025977065296, "grad_norm": 5.142881393432617, "learning_rate": 2.2752247752247755e-05, "loss": 1.0193, "step": 910 }, { "epoch": 0.43061081207582497, "grad_norm": 2.7867729663848877, "learning_rate": 2.3001998001998004e-05, "loss": 1.0137, "step": 920 }, { "epoch": 0.43529136438099697, "grad_norm": 5.450408935546875, "learning_rate": 2.3251748251748252e-05, "loss": 0.9988, "step": 930 }, { "epoch": 0.439971916686169, "grad_norm": 4.581586837768555, "learning_rate": 2.35014985014985e-05, "loss": 0.9721, "step": 940 }, { "epoch": 0.444652468991341, "grad_norm": 3.9815585613250732, "learning_rate": 2.375124875124875e-05, "loss": 0.9507, "step": 950 }, { "epoch": 0.449333021296513, "grad_norm": 3.5166015625, "learning_rate": 2.4000999000999e-05, "loss": 0.9794, "step": 960 }, { "epoch": 0.454013573601685, "grad_norm": 4.782320976257324, "learning_rate": 2.4250749250749253e-05, "loss": 0.9807, "step": 970 }, { "epoch": 0.458694125906857, "grad_norm": 4.533699989318848, "learning_rate": 2.45004995004995e-05, "loss": 0.9357, "step": 980 }, { "epoch": 0.46337467821202905, "grad_norm": 4.103953838348389, "learning_rate": 2.4750249750249753e-05, "loss": 0.9399, "step": 990 }, { "epoch": 0.46805523051720105, "grad_norm": 3.016542434692383, "learning_rate": 2.5e-05, "loss": 0.9361, "step": 1000 }, { "epoch": 0.47273578282237305, "grad_norm": 3.609334707260132, "learning_rate": 2.499999634999857e-05, "loss": 0.9285, "step": 1010 }, { "epoch": 0.47741633512754506, "grad_norm": 3.2129101753234863, "learning_rate": 2.499998539999665e-05, "loss": 0.9231, "step": 1020 }, { "epoch": 0.48209688743271706, "grad_norm": 3.8144664764404297, "learning_rate": 2.499996715000134e-05, "loss": 0.949, "step": 1030 }, { "epoch": 0.48677743973788906, "grad_norm": 5.084255218505859, "learning_rate": 2.4999941600024485e-05, "loss": 0.9097, "step": 1040 }, { "epoch": 0.49145799204306106, "grad_norm": 3.652376890182495, "learning_rate": 2.4999908750082667e-05, "loss": 0.8953, "step": 1050 }, { "epoch": 0.49613854434823307, "grad_norm": 3.3343253135681152, "learning_rate": 2.49998686001972e-05, "loss": 0.9002, "step": 1060 }, { "epoch": 0.5008190966534051, "grad_norm": 4.726000785827637, "learning_rate": 2.4999821150394134e-05, "loss": 0.8662, "step": 1070 }, { "epoch": 0.5054996489585771, "grad_norm": 4.927451133728027, "learning_rate": 2.499976640070426e-05, "loss": 0.8987, "step": 1080 }, { "epoch": 0.5101802012637491, "grad_norm": 4.912125587463379, "learning_rate": 2.4999704351163107e-05, "loss": 0.8761, "step": 1090 }, { "epoch": 0.5148607535689211, "grad_norm": 5.660706996917725, "learning_rate": 2.499963500181094e-05, "loss": 0.8711, "step": 1100 }, { "epoch": 0.5195413058740932, "grad_norm": 3.181816816329956, "learning_rate": 2.4999558352692754e-05, "loss": 0.9184, "step": 1110 }, { "epoch": 0.5242218581792651, "grad_norm": 2.727332592010498, "learning_rate": 2.4999474403858285e-05, "loss": 0.8754, "step": 1120 }, { "epoch": 0.5289024104844372, "grad_norm": 5.869384288787842, "learning_rate": 2.4999383155362014e-05, "loss": 0.8839, "step": 1130 }, { "epoch": 0.5335829627896091, "grad_norm": 3.9496467113494873, "learning_rate": 2.499928460726314e-05, "loss": 0.9166, "step": 1140 }, { "epoch": 0.5382635150947812, "grad_norm": 4.840493679046631, "learning_rate": 2.499917875962562e-05, "loss": 0.8483, "step": 1150 }, { "epoch": 0.5429440673999532, "grad_norm": 4.201657772064209, "learning_rate": 2.4999065612518134e-05, "loss": 0.8587, "step": 1160 }, { "epoch": 0.5476246197051252, "grad_norm": 4.193175792694092, "learning_rate": 2.4998945166014102e-05, "loss": 0.8575, "step": 1170 }, { "epoch": 0.5523051720102972, "grad_norm": 4.076774597167969, "learning_rate": 2.4998817420191678e-05, "loss": 0.8592, "step": 1180 }, { "epoch": 0.5569857243154692, "grad_norm": 3.0956521034240723, "learning_rate": 2.4998682375133754e-05, "loss": 0.8286, "step": 1190 }, { "epoch": 0.5616662766206413, "grad_norm": 3.379718542098999, "learning_rate": 2.4998540030927965e-05, "loss": 0.8671, "step": 1200 }, { "epoch": 0.5663468289258132, "grad_norm": 4.5094380378723145, "learning_rate": 2.4998390387666675e-05, "loss": 0.8534, "step": 1210 }, { "epoch": 0.5710273812309853, "grad_norm": 4.560956954956055, "learning_rate": 2.499823344544698e-05, "loss": 0.8452, "step": 1220 }, { "epoch": 0.5757079335361572, "grad_norm": 2.7052083015441895, "learning_rate": 2.4998069204370726e-05, "loss": 0.8297, "step": 1230 }, { "epoch": 0.5803884858413293, "grad_norm": 2.980043411254883, "learning_rate": 2.4997897664544488e-05, "loss": 0.8352, "step": 1240 }, { "epoch": 0.5850690381465012, "grad_norm": 3.765872001647949, "learning_rate": 2.4997718826079567e-05, "loss": 0.8455, "step": 1250 }, { "epoch": 0.5897495904516733, "grad_norm": 3.9807660579681396, "learning_rate": 2.4997532689092013e-05, "loss": 0.8156, "step": 1260 }, { "epoch": 0.5944301427568454, "grad_norm": 4.443795680999756, "learning_rate": 2.4997339253702612e-05, "loss": 0.8286, "step": 1270 }, { "epoch": 0.5991106950620173, "grad_norm": 4.818281650543213, "learning_rate": 2.4997138520036876e-05, "loss": 0.8293, "step": 1280 }, { "epoch": 0.6037912473671894, "grad_norm": 4.464036464691162, "learning_rate": 2.499693048822507e-05, "loss": 0.8064, "step": 1290 }, { "epoch": 0.6084717996723613, "grad_norm": 3.463008165359497, "learning_rate": 2.4996715158402173e-05, "loss": 0.8061, "step": 1300 }, { "epoch": 0.6131523519775334, "grad_norm": 3.8619461059570312, "learning_rate": 2.4996492530707912e-05, "loss": 0.7996, "step": 1310 }, { "epoch": 0.6178329042827053, "grad_norm": 4.485871315002441, "learning_rate": 2.499626260528675e-05, "loss": 0.8073, "step": 1320 }, { "epoch": 0.6225134565878774, "grad_norm": 3.734315872192383, "learning_rate": 2.499602538228788e-05, "loss": 0.7987, "step": 1330 }, { "epoch": 0.6271940088930493, "grad_norm": 5.0168776512146, "learning_rate": 2.4995780861865244e-05, "loss": 0.8076, "step": 1340 }, { "epoch": 0.6318745611982214, "grad_norm": 4.573477745056152, "learning_rate": 2.4995529044177495e-05, "loss": 0.7969, "step": 1350 }, { "epoch": 0.6365551135033934, "grad_norm": 4.539201736450195, "learning_rate": 2.499526992938804e-05, "loss": 0.8068, "step": 1360 }, { "epoch": 0.6412356658085654, "grad_norm": 3.2654716968536377, "learning_rate": 2.4995003517665014e-05, "loss": 0.7942, "step": 1370 }, { "epoch": 0.6459162181137375, "grad_norm": 3.2625584602355957, "learning_rate": 2.4994729809181294e-05, "loss": 0.7515, "step": 1380 }, { "epoch": 0.6505967704189094, "grad_norm": 4.092380523681641, "learning_rate": 2.4994448804114483e-05, "loss": 0.7674, "step": 1390 }, { "epoch": 0.6552773227240815, "grad_norm": 3.36704158782959, "learning_rate": 2.499416050264692e-05, "loss": 0.7433, "step": 1400 }, { "epoch": 0.6599578750292534, "grad_norm": 2.8636553287506104, "learning_rate": 2.4993864904965684e-05, "loss": 0.7624, "step": 1410 }, { "epoch": 0.6646384273344255, "grad_norm": 3.573730230331421, "learning_rate": 2.4993562011262584e-05, "loss": 0.7523, "step": 1420 }, { "epoch": 0.6693189796395975, "grad_norm": 4.456461429595947, "learning_rate": 2.4993251821734166e-05, "loss": 0.7505, "step": 1430 }, { "epoch": 0.6739995319447695, "grad_norm": 2.322962760925293, "learning_rate": 2.4992934336581703e-05, "loss": 0.7572, "step": 1440 }, { "epoch": 0.6786800842499415, "grad_norm": 4.144594192504883, "learning_rate": 2.4992609556011214e-05, "loss": 0.7631, "step": 1450 }, { "epoch": 0.6833606365551135, "grad_norm": 4.623259544372559, "learning_rate": 2.4992277480233446e-05, "loss": 0.7703, "step": 1460 }, { "epoch": 0.6880411888602855, "grad_norm": 3.7853190898895264, "learning_rate": 2.499193810946387e-05, "loss": 0.7463, "step": 1470 }, { "epoch": 0.6927217411654575, "grad_norm": 3.079454183578491, "learning_rate": 2.4991591443922715e-05, "loss": 0.7537, "step": 1480 }, { "epoch": 0.6974022934706295, "grad_norm": 4.131396293640137, "learning_rate": 2.4991237483834916e-05, "loss": 0.7531, "step": 1490 }, { "epoch": 0.7020828457758015, "grad_norm": 3.473195791244507, "learning_rate": 2.499087622943016e-05, "loss": 0.751, "step": 1500 }, { "epoch": 0.7067633980809735, "grad_norm": 3.535104990005493, "learning_rate": 2.4990507680942858e-05, "loss": 0.7224, "step": 1510 }, { "epoch": 0.7114439503861456, "grad_norm": 3.896449089050293, "learning_rate": 2.4990131838612157e-05, "loss": 0.7747, "step": 1520 }, { "epoch": 0.7161245026913176, "grad_norm": 4.232148170471191, "learning_rate": 2.498974870268194e-05, "loss": 0.7461, "step": 1530 }, { "epoch": 0.7208050549964896, "grad_norm": 3.3962056636810303, "learning_rate": 2.4989358273400816e-05, "loss": 0.7244, "step": 1540 }, { "epoch": 0.7254856073016616, "grad_norm": 2.7601301670074463, "learning_rate": 2.4988960551022133e-05, "loss": 0.7254, "step": 1550 }, { "epoch": 0.7301661596068336, "grad_norm": 3.430008888244629, "learning_rate": 2.498855553580397e-05, "loss": 0.7425, "step": 1560 }, { "epoch": 0.7348467119120056, "grad_norm": 3.0408380031585693, "learning_rate": 2.4988143228009136e-05, "loss": 0.7505, "step": 1570 }, { "epoch": 0.7395272642171776, "grad_norm": 4.622409343719482, "learning_rate": 2.498772362790517e-05, "loss": 0.7437, "step": 1580 }, { "epoch": 0.7442078165223497, "grad_norm": 3.4763619899749756, "learning_rate": 2.4987296735764344e-05, "loss": 0.7428, "step": 1590 }, { "epoch": 0.7488883688275216, "grad_norm": 5.350825309753418, "learning_rate": 2.4986862551863675e-05, "loss": 0.7346, "step": 1600 }, { "epoch": 0.7535689211326937, "grad_norm": 3.7304649353027344, "learning_rate": 2.498642107648489e-05, "loss": 0.7525, "step": 1610 }, { "epoch": 0.7582494734378656, "grad_norm": 2.7406253814697266, "learning_rate": 2.498597230991446e-05, "loss": 0.733, "step": 1620 }, { "epoch": 0.7629300257430377, "grad_norm": 2.8434998989105225, "learning_rate": 2.4985516252443584e-05, "loss": 0.729, "step": 1630 }, { "epoch": 0.7676105780482096, "grad_norm": 2.424365997314453, "learning_rate": 2.49850529043682e-05, "loss": 0.744, "step": 1640 }, { "epoch": 0.7722911303533817, "grad_norm": 4.634812831878662, "learning_rate": 2.4984582265988958e-05, "loss": 0.7433, "step": 1650 }, { "epoch": 0.7769716826585538, "grad_norm": 2.6760475635528564, "learning_rate": 2.4984104337611257e-05, "loss": 0.736, "step": 1660 }, { "epoch": 0.7816522349637257, "grad_norm": 3.8371646404266357, "learning_rate": 2.4983619119545213e-05, "loss": 0.6897, "step": 1670 }, { "epoch": 0.7863327872688978, "grad_norm": 3.878369092941284, "learning_rate": 2.4983126612105695e-05, "loss": 0.7195, "step": 1680 }, { "epoch": 0.7910133395740697, "grad_norm": 3.508744955062866, "learning_rate": 2.498262681561227e-05, "loss": 0.6784, "step": 1690 }, { "epoch": 0.7956938918792418, "grad_norm": 3.628477096557617, "learning_rate": 2.498211973038925e-05, "loss": 0.6934, "step": 1700 }, { "epoch": 0.8003744441844137, "grad_norm": 2.0725696086883545, "learning_rate": 2.4981605356765685e-05, "loss": 0.6956, "step": 1710 }, { "epoch": 0.8050549964895858, "grad_norm": 2.2320377826690674, "learning_rate": 2.4981083695075346e-05, "loss": 0.6829, "step": 1720 }, { "epoch": 0.8097355487947577, "grad_norm": 4.491719722747803, "learning_rate": 2.4980554745656734e-05, "loss": 0.7283, "step": 1730 }, { "epoch": 0.8144161010999298, "grad_norm": 3.8385534286499023, "learning_rate": 2.4980018508853072e-05, "loss": 0.7125, "step": 1740 }, { "epoch": 0.8190966534051018, "grad_norm": 3.4204437732696533, "learning_rate": 2.4979474985012323e-05, "loss": 0.6908, "step": 1750 }, { "epoch": 0.8237772057102738, "grad_norm": 3.208190441131592, "learning_rate": 2.4978924174487172e-05, "loss": 0.6911, "step": 1760 }, { "epoch": 0.8284577580154459, "grad_norm": 5.083669185638428, "learning_rate": 2.4978366077635037e-05, "loss": 0.7425, "step": 1770 }, { "epoch": 0.8331383103206178, "grad_norm": 2.729381799697876, "learning_rate": 2.4977800694818057e-05, "loss": 0.7179, "step": 1780 }, { "epoch": 0.8378188626257899, "grad_norm": 3.4688165187835693, "learning_rate": 2.4977228026403103e-05, "loss": 0.6955, "step": 1790 }, { "epoch": 0.8424994149309618, "grad_norm": 2.9568862915039062, "learning_rate": 2.497664807276178e-05, "loss": 0.6927, "step": 1800 }, { "epoch": 0.8471799672361339, "grad_norm": 3.379720687866211, "learning_rate": 2.4976060834270404e-05, "loss": 0.684, "step": 1810 }, { "epoch": 0.8518605195413059, "grad_norm": 3.620739698410034, "learning_rate": 2.4975466311310035e-05, "loss": 0.6997, "step": 1820 }, { "epoch": 0.8565410718464779, "grad_norm": 2.392825126647949, "learning_rate": 2.4974864504266447e-05, "loss": 0.6783, "step": 1830 }, { "epoch": 0.8612216241516499, "grad_norm": 3.593935012817383, "learning_rate": 2.497425541353015e-05, "loss": 0.6527, "step": 1840 }, { "epoch": 0.8659021764568219, "grad_norm": 2.6064984798431396, "learning_rate": 2.4973639039496375e-05, "loss": 0.6785, "step": 1850 }, { "epoch": 0.8705827287619939, "grad_norm": 3.6274938583374023, "learning_rate": 2.497301538256508e-05, "loss": 0.6965, "step": 1860 }, { "epoch": 0.8752632810671659, "grad_norm": 3.1540467739105225, "learning_rate": 2.497238444314095e-05, "loss": 0.6793, "step": 1870 }, { "epoch": 0.879943833372338, "grad_norm": 3.056166410446167, "learning_rate": 2.4971746221633397e-05, "loss": 0.6785, "step": 1880 }, { "epoch": 0.8846243856775099, "grad_norm": 3.839226484298706, "learning_rate": 2.497110071845655e-05, "loss": 0.6815, "step": 1890 }, { "epoch": 0.889304937982682, "grad_norm": 4.675917148590088, "learning_rate": 2.4970447934029278e-05, "loss": 0.675, "step": 1900 }, { "epoch": 0.893985490287854, "grad_norm": 3.1041252613067627, "learning_rate": 2.4969787868775155e-05, "loss": 0.6877, "step": 1910 }, { "epoch": 0.898666042593026, "grad_norm": 2.9247403144836426, "learning_rate": 2.4969120523122492e-05, "loss": 0.6673, "step": 1920 }, { "epoch": 0.903346594898198, "grad_norm": 3.168060302734375, "learning_rate": 2.4968445897504337e-05, "loss": 0.6624, "step": 1930 }, { "epoch": 0.90802714720337, "grad_norm": 4.023138046264648, "learning_rate": 2.4967763992358424e-05, "loss": 0.6733, "step": 1940 }, { "epoch": 0.912707699508542, "grad_norm": 3.471324920654297, "learning_rate": 2.496707480812726e-05, "loss": 0.6718, "step": 1950 }, { "epoch": 0.917388251813714, "grad_norm": 2.6294305324554443, "learning_rate": 2.496637834525802e-05, "loss": 0.6429, "step": 1960 }, { "epoch": 0.922068804118886, "grad_norm": 3.1548171043395996, "learning_rate": 2.4965674604202658e-05, "loss": 0.6412, "step": 1970 }, { "epoch": 0.9267493564240581, "grad_norm": 4.196736812591553, "learning_rate": 2.4964963585417807e-05, "loss": 0.6755, "step": 1980 }, { "epoch": 0.93142990872923, "grad_norm": 3.3811662197113037, "learning_rate": 2.4964245289364843e-05, "loss": 0.6643, "step": 1990 }, { "epoch": 0.9361104610344021, "grad_norm": 4.443936347961426, "learning_rate": 2.4963519716509864e-05, "loss": 0.6527, "step": 2000 }, { "epoch": 0.940791013339574, "grad_norm": 2.876857042312622, "learning_rate": 2.4962786867323685e-05, "loss": 0.6587, "step": 2010 }, { "epoch": 0.9454715656447461, "grad_norm": 3.7666287422180176, "learning_rate": 2.496204674228184e-05, "loss": 0.6754, "step": 2020 }, { "epoch": 0.950152117949918, "grad_norm": 2.9911322593688965, "learning_rate": 2.4961299341864596e-05, "loss": 0.6513, "step": 2030 }, { "epoch": 0.9548326702550901, "grad_norm": 3.6061625480651855, "learning_rate": 2.4960544666556924e-05, "loss": 0.6459, "step": 2040 }, { "epoch": 0.9595132225602621, "grad_norm": 4.005845546722412, "learning_rate": 2.4959782716848532e-05, "loss": 0.6763, "step": 2050 }, { "epoch": 0.9641937748654341, "grad_norm": 2.564246892929077, "learning_rate": 2.4959013493233836e-05, "loss": 0.6442, "step": 2060 }, { "epoch": 0.9688743271706062, "grad_norm": 3.0459370613098145, "learning_rate": 2.495823699621198e-05, "loss": 0.6297, "step": 2070 }, { "epoch": 0.9735548794757781, "grad_norm": 3.6598117351531982, "learning_rate": 2.4957453226286825e-05, "loss": 0.6477, "step": 2080 }, { "epoch": 0.9782354317809502, "grad_norm": 5.387185573577881, "learning_rate": 2.4956662183966947e-05, "loss": 0.66, "step": 2090 }, { "epoch": 0.9829159840861221, "grad_norm": 3.6650660037994385, "learning_rate": 2.4955863869765646e-05, "loss": 0.6657, "step": 2100 }, { "epoch": 0.9875965363912942, "grad_norm": 2.482680320739746, "learning_rate": 2.4955058284200942e-05, "loss": 0.6494, "step": 2110 }, { "epoch": 0.9922770886964661, "grad_norm": 2.9131698608398438, "learning_rate": 2.4954245427795568e-05, "loss": 0.6534, "step": 2120 }, { "epoch": 0.9969576410016382, "grad_norm": 2.8355190753936768, "learning_rate": 2.495342530107698e-05, "loss": 0.6507, "step": 2130 }, { "epoch": 1.0014041656915516, "grad_norm": 3.655856132507324, "learning_rate": 2.4952597904577348e-05, "loss": 0.5969, "step": 2140 }, { "epoch": 1.0060847179967236, "grad_norm": 2.4286835193634033, "learning_rate": 2.4951763238833556e-05, "loss": 0.6397, "step": 2150 }, { "epoch": 1.0107652703018957, "grad_norm": 2.7437515258789062, "learning_rate": 2.4950921304387218e-05, "loss": 0.6105, "step": 2160 }, { "epoch": 1.0154458226070677, "grad_norm": 5.373265266418457, "learning_rate": 2.4950072101784653e-05, "loss": 0.6396, "step": 2170 }, { "epoch": 1.0201263749122396, "grad_norm": 3.8893978595733643, "learning_rate": 2.4949215631576896e-05, "loss": 0.6358, "step": 2180 }, { "epoch": 1.0248069272174116, "grad_norm": 4.682613372802734, "learning_rate": 2.4948351894319706e-05, "loss": 0.6194, "step": 2190 }, { "epoch": 1.0294874795225837, "grad_norm": 4.537768363952637, "learning_rate": 2.4947480890573548e-05, "loss": 0.6283, "step": 2200 }, { "epoch": 1.0341680318277557, "grad_norm": 3.698514938354492, "learning_rate": 2.4946602620903612e-05, "loss": 0.6544, "step": 2210 }, { "epoch": 1.0388485841329276, "grad_norm": 2.399303674697876, "learning_rate": 2.4945717085879794e-05, "loss": 0.6069, "step": 2220 }, { "epoch": 1.0435291364380996, "grad_norm": 3.4032204151153564, "learning_rate": 2.494482428607671e-05, "loss": 0.6112, "step": 2230 }, { "epoch": 1.0482096887432717, "grad_norm": 2.7787024974823, "learning_rate": 2.4943924222073688e-05, "loss": 0.6302, "step": 2240 }, { "epoch": 1.0528902410484438, "grad_norm": 2.6428260803222656, "learning_rate": 2.4943016894454763e-05, "loss": 0.6353, "step": 2250 }, { "epoch": 1.0575707933536158, "grad_norm": 2.592304229736328, "learning_rate": 2.49421023038087e-05, "loss": 0.6263, "step": 2260 }, { "epoch": 1.0622513456587876, "grad_norm": 3.580124616622925, "learning_rate": 2.4941180450728962e-05, "loss": 0.633, "step": 2270 }, { "epoch": 1.0669318979639597, "grad_norm": 2.7745516300201416, "learning_rate": 2.4940251335813732e-05, "loss": 0.6365, "step": 2280 }, { "epoch": 1.0716124502691318, "grad_norm": 3.339881658554077, "learning_rate": 2.4939314959665905e-05, "loss": 0.6065, "step": 2290 }, { "epoch": 1.0762930025743038, "grad_norm": 3.6740922927856445, "learning_rate": 2.4938371322893075e-05, "loss": 0.6333, "step": 2300 }, { "epoch": 1.0809735548794759, "grad_norm": 2.320887565612793, "learning_rate": 2.4937420426107565e-05, "loss": 0.6161, "step": 2310 }, { "epoch": 1.0856541071846477, "grad_norm": 2.087496757507324, "learning_rate": 2.4936462269926405e-05, "loss": 0.6211, "step": 2320 }, { "epoch": 1.0903346594898198, "grad_norm": 2.5441393852233887, "learning_rate": 2.4935496854971324e-05, "loss": 0.6202, "step": 2330 }, { "epoch": 1.0950152117949918, "grad_norm": 3.5602433681488037, "learning_rate": 2.4934524181868774e-05, "loss": 0.6429, "step": 2340 }, { "epoch": 1.099695764100164, "grad_norm": 2.8797030448913574, "learning_rate": 2.493354425124991e-05, "loss": 0.636, "step": 2350 }, { "epoch": 1.1043763164053357, "grad_norm": 3.2398569583892822, "learning_rate": 2.4932557063750598e-05, "loss": 0.6337, "step": 2360 }, { "epoch": 1.1090568687105078, "grad_norm": 3.344186305999756, "learning_rate": 2.4931562620011417e-05, "loss": 0.5838, "step": 2370 }, { "epoch": 1.1137374210156799, "grad_norm": 3.6723439693450928, "learning_rate": 2.493056092067765e-05, "loss": 0.6228, "step": 2380 }, { "epoch": 1.118417973320852, "grad_norm": 3.603300094604492, "learning_rate": 2.4929551966399285e-05, "loss": 0.6022, "step": 2390 }, { "epoch": 1.123098525626024, "grad_norm": 3.143249273300171, "learning_rate": 2.4928535757831023e-05, "loss": 0.593, "step": 2400 }, { "epoch": 1.1277790779311958, "grad_norm": 4.391036510467529, "learning_rate": 2.4927512295632274e-05, "loss": 0.6184, "step": 2410 }, { "epoch": 1.1324596302363679, "grad_norm": 2.5532634258270264, "learning_rate": 2.4926481580467146e-05, "loss": 0.6083, "step": 2420 }, { "epoch": 1.13714018254154, "grad_norm": 2.984487533569336, "learning_rate": 2.492544361300446e-05, "loss": 0.5891, "step": 2430 }, { "epoch": 1.141820734846712, "grad_norm": 2.228177547454834, "learning_rate": 2.4924398393917747e-05, "loss": 0.5947, "step": 2440 }, { "epoch": 1.1465012871518838, "grad_norm": 3.789468288421631, "learning_rate": 2.492334592388523e-05, "loss": 0.6051, "step": 2450 }, { "epoch": 1.1511818394570559, "grad_norm": 3.0307705402374268, "learning_rate": 2.4922286203589856e-05, "loss": 0.5989, "step": 2460 }, { "epoch": 1.155862391762228, "grad_norm": 1.6991273164749146, "learning_rate": 2.4921219233719263e-05, "loss": 0.6014, "step": 2470 }, { "epoch": 1.1605429440674, "grad_norm": 3.6469240188598633, "learning_rate": 2.4920145014965787e-05, "loss": 0.6076, "step": 2480 }, { "epoch": 1.165223496372572, "grad_norm": 2.386862277984619, "learning_rate": 2.4919063548026487e-05, "loss": 0.6075, "step": 2490 }, { "epoch": 1.169904048677744, "grad_norm": 2.614692449569702, "learning_rate": 2.4917974833603108e-05, "loss": 0.5996, "step": 2500 }, { "epoch": 1.174584600982916, "grad_norm": 3.538140058517456, "learning_rate": 2.491687887240211e-05, "loss": 0.6008, "step": 2510 }, { "epoch": 1.179265153288088, "grad_norm": 3.8050315380096436, "learning_rate": 2.4915775665134652e-05, "loss": 0.5907, "step": 2520 }, { "epoch": 1.18394570559326, "grad_norm": 3.183634042739868, "learning_rate": 2.4914665212516587e-05, "loss": 0.6021, "step": 2530 }, { "epoch": 1.1886262578984321, "grad_norm": 3.7335095405578613, "learning_rate": 2.4913547515268475e-05, "loss": 0.589, "step": 2540 }, { "epoch": 1.193306810203604, "grad_norm": 2.7750136852264404, "learning_rate": 2.4912422574115583e-05, "loss": 0.604, "step": 2550 }, { "epoch": 1.197987362508776, "grad_norm": 1.9918687343597412, "learning_rate": 2.4911290389787878e-05, "loss": 0.5933, "step": 2560 }, { "epoch": 1.202667914813948, "grad_norm": 1.913959264755249, "learning_rate": 2.491015096302001e-05, "loss": 0.5807, "step": 2570 }, { "epoch": 1.2073484671191201, "grad_norm": 2.0997657775878906, "learning_rate": 2.4909004294551346e-05, "loss": 0.5938, "step": 2580 }, { "epoch": 1.212029019424292, "grad_norm": 1.9585241079330444, "learning_rate": 2.4907850385125948e-05, "loss": 0.583, "step": 2590 }, { "epoch": 1.216709571729464, "grad_norm": 2.999281167984009, "learning_rate": 2.4906689235492574e-05, "loss": 0.5809, "step": 2600 }, { "epoch": 1.221390124034636, "grad_norm": 2.7258493900299072, "learning_rate": 2.490552084640468e-05, "loss": 0.578, "step": 2610 }, { "epoch": 1.2260706763398082, "grad_norm": 2.847775936126709, "learning_rate": 2.4904345218620425e-05, "loss": 0.5971, "step": 2620 }, { "epoch": 1.23075122864498, "grad_norm": 2.481193780899048, "learning_rate": 2.4903162352902656e-05, "loss": 0.5942, "step": 2630 }, { "epoch": 1.235431780950152, "grad_norm": 3.2105813026428223, "learning_rate": 2.4901972250018925e-05, "loss": 0.5782, "step": 2640 }, { "epoch": 1.240112333255324, "grad_norm": 2.5655674934387207, "learning_rate": 2.4900774910741483e-05, "loss": 0.57, "step": 2650 }, { "epoch": 1.2447928855604962, "grad_norm": 2.266183614730835, "learning_rate": 2.489957033584725e-05, "loss": 0.584, "step": 2660 }, { "epoch": 1.2494734378656682, "grad_norm": 2.9888863563537598, "learning_rate": 2.4898358526117887e-05, "loss": 0.6156, "step": 2670 }, { "epoch": 1.25415399017084, "grad_norm": 2.2222864627838135, "learning_rate": 2.489713948233971e-05, "loss": 0.5819, "step": 2680 }, { "epoch": 1.2588345424760121, "grad_norm": 2.689727783203125, "learning_rate": 2.4895913205303743e-05, "loss": 0.5811, "step": 2690 }, { "epoch": 1.2635150947811842, "grad_norm": 3.0483717918395996, "learning_rate": 2.4894679695805707e-05, "loss": 0.6113, "step": 2700 }, { "epoch": 1.2681956470863562, "grad_norm": 1.7325674295425415, "learning_rate": 2.4893438954646012e-05, "loss": 0.5901, "step": 2710 }, { "epoch": 1.2728761993915283, "grad_norm": 2.143134355545044, "learning_rate": 2.4892190982629764e-05, "loss": 0.5731, "step": 2720 }, { "epoch": 1.2775567516967001, "grad_norm": 2.454489231109619, "learning_rate": 2.4890935780566755e-05, "loss": 0.586, "step": 2730 }, { "epoch": 1.2822373040018722, "grad_norm": 2.1955394744873047, "learning_rate": 2.4889673349271474e-05, "loss": 0.5752, "step": 2740 }, { "epoch": 1.2869178563070443, "grad_norm": 2.094567060470581, "learning_rate": 2.4888403689563097e-05, "loss": 0.5829, "step": 2750 }, { "epoch": 1.2915984086122163, "grad_norm": 2.008989095687866, "learning_rate": 2.4887126802265493e-05, "loss": 0.5585, "step": 2760 }, { "epoch": 1.2962789609173884, "grad_norm": 2.3874173164367676, "learning_rate": 2.4885842688207222e-05, "loss": 0.5658, "step": 2770 }, { "epoch": 1.3009595132225602, "grad_norm": 1.9886997938156128, "learning_rate": 2.488455134822153e-05, "loss": 0.5629, "step": 2780 }, { "epoch": 1.3056400655277323, "grad_norm": 2.669487953186035, "learning_rate": 2.488325278314636e-05, "loss": 0.5507, "step": 2790 }, { "epoch": 1.3103206178329043, "grad_norm": 2.663259744644165, "learning_rate": 2.4881946993824325e-05, "loss": 0.5737, "step": 2800 }, { "epoch": 1.3150011701380762, "grad_norm": 2.625232696533203, "learning_rate": 2.4880633981102742e-05, "loss": 0.5888, "step": 2810 }, { "epoch": 1.3196817224432484, "grad_norm": 2.136828660964966, "learning_rate": 2.487931374583362e-05, "loss": 0.5646, "step": 2820 }, { "epoch": 1.3243622747484203, "grad_norm": 3.2671587467193604, "learning_rate": 2.487798628887363e-05, "loss": 0.5601, "step": 2830 }, { "epoch": 1.3290428270535923, "grad_norm": 2.9742186069488525, "learning_rate": 2.487665161108416e-05, "loss": 0.5768, "step": 2840 }, { "epoch": 1.3337233793587644, "grad_norm": 3.1379737854003906, "learning_rate": 2.4875309713331253e-05, "loss": 0.5874, "step": 2850 }, { "epoch": 1.3384039316639362, "grad_norm": 2.5274195671081543, "learning_rate": 2.487396059648566e-05, "loss": 0.5671, "step": 2860 }, { "epoch": 1.3430844839691083, "grad_norm": 2.5484821796417236, "learning_rate": 2.4872604261422808e-05, "loss": 0.5632, "step": 2870 }, { "epoch": 1.3477650362742803, "grad_norm": 3.8027968406677246, "learning_rate": 2.4871240709022806e-05, "loss": 0.5755, "step": 2880 }, { "epoch": 1.3524455885794524, "grad_norm": 1.9872463941574097, "learning_rate": 2.4869869940170453e-05, "loss": 0.5535, "step": 2890 }, { "epoch": 1.3571261408846245, "grad_norm": 2.102604627609253, "learning_rate": 2.486849195575522e-05, "loss": 0.5623, "step": 2900 }, { "epoch": 1.3618066931897963, "grad_norm": 2.481153726577759, "learning_rate": 2.486710675667127e-05, "loss": 0.5867, "step": 2910 }, { "epoch": 1.3664872454949684, "grad_norm": 3.581080198287964, "learning_rate": 2.4865714343817436e-05, "loss": 0.5781, "step": 2920 }, { "epoch": 1.3711677978001404, "grad_norm": 2.0311365127563477, "learning_rate": 2.4864314718097253e-05, "loss": 0.561, "step": 2930 }, { "epoch": 1.3758483501053125, "grad_norm": 2.327099323272705, "learning_rate": 2.486290788041891e-05, "loss": 0.5581, "step": 2940 }, { "epoch": 1.3805289024104845, "grad_norm": 2.6514971256256104, "learning_rate": 2.48614938316953e-05, "loss": 0.5538, "step": 2950 }, { "epoch": 1.3852094547156564, "grad_norm": 3.318211317062378, "learning_rate": 2.4860072572843977e-05, "loss": 0.555, "step": 2960 }, { "epoch": 1.3898900070208284, "grad_norm": 3.1590030193328857, "learning_rate": 2.485864410478718e-05, "loss": 0.5715, "step": 2970 }, { "epoch": 1.3945705593260005, "grad_norm": 1.6830084323883057, "learning_rate": 2.485720842845183e-05, "loss": 0.5713, "step": 2980 }, { "epoch": 1.3992511116311726, "grad_norm": 2.0587496757507324, "learning_rate": 2.485576554476952e-05, "loss": 0.5352, "step": 2990 }, { "epoch": 1.4039316639363446, "grad_norm": 2.0481648445129395, "learning_rate": 2.485431545467652e-05, "loss": 0.5548, "step": 3000 }, { "epoch": 1.4086122162415164, "grad_norm": 2.001458168029785, "learning_rate": 2.4852858159113783e-05, "loss": 0.5591, "step": 3010 }, { "epoch": 1.4132927685466885, "grad_norm": 2.3117613792419434, "learning_rate": 2.4851393659026925e-05, "loss": 0.5262, "step": 3020 }, { "epoch": 1.4179733208518606, "grad_norm": 2.682530641555786, "learning_rate": 2.4849921955366247e-05, "loss": 0.5532, "step": 3030 }, { "epoch": 1.4226538731570324, "grad_norm": 1.9757784605026245, "learning_rate": 2.4848443049086723e-05, "loss": 0.5701, "step": 3040 }, { "epoch": 1.4273344254622045, "grad_norm": 2.6655733585357666, "learning_rate": 2.4846956941148e-05, "loss": 0.551, "step": 3050 }, { "epoch": 1.4320149777673765, "grad_norm": 2.853151559829712, "learning_rate": 2.4845463632514396e-05, "loss": 0.5684, "step": 3060 }, { "epoch": 1.4366955300725486, "grad_norm": 4.09670877456665, "learning_rate": 2.48439631241549e-05, "loss": 0.5699, "step": 3070 }, { "epoch": 1.4413760823777206, "grad_norm": 2.297355890274048, "learning_rate": 2.4842455417043182e-05, "loss": 0.5334, "step": 3080 }, { "epoch": 1.4460566346828925, "grad_norm": 1.5394313335418701, "learning_rate": 2.484094051215757e-05, "loss": 0.5429, "step": 3090 }, { "epoch": 1.4507371869880645, "grad_norm": 1.726246953010559, "learning_rate": 2.483941841048107e-05, "loss": 0.5316, "step": 3100 }, { "epoch": 1.4554177392932366, "grad_norm": 1.818416953086853, "learning_rate": 2.4837889113001366e-05, "loss": 0.5444, "step": 3110 }, { "epoch": 1.4600982915984086, "grad_norm": 3.13503360748291, "learning_rate": 2.4836352620710792e-05, "loss": 0.5645, "step": 3120 }, { "epoch": 1.4647788439035807, "grad_norm": 2.1946887969970703, "learning_rate": 2.483480893460637e-05, "loss": 0.5536, "step": 3130 }, { "epoch": 1.4694593962087525, "grad_norm": 2.6889395713806152, "learning_rate": 2.4833258055689767e-05, "loss": 0.5362, "step": 3140 }, { "epoch": 1.4741399485139246, "grad_norm": 2.2109031677246094, "learning_rate": 2.4831699984967345e-05, "loss": 0.5439, "step": 3150 }, { "epoch": 1.4788205008190967, "grad_norm": 2.1929731369018555, "learning_rate": 2.483013472345012e-05, "loss": 0.5521, "step": 3160 }, { "epoch": 1.4835010531242687, "grad_norm": 2.2968199253082275, "learning_rate": 2.4828562272153767e-05, "loss": 0.5745, "step": 3170 }, { "epoch": 1.4881816054294408, "grad_norm": 2.168401002883911, "learning_rate": 2.482698263209863e-05, "loss": 0.5609, "step": 3180 }, { "epoch": 1.4928621577346126, "grad_norm": 3.081282377243042, "learning_rate": 2.4825395804309724e-05, "loss": 0.5416, "step": 3190 }, { "epoch": 1.4975427100397847, "grad_norm": 2.745562791824341, "learning_rate": 2.4823801789816726e-05, "loss": 0.5436, "step": 3200 }, { "epoch": 1.5022232623449567, "grad_norm": 2.4738898277282715, "learning_rate": 2.482220058965397e-05, "loss": 0.524, "step": 3210 }, { "epoch": 1.5069038146501286, "grad_norm": 2.796806812286377, "learning_rate": 2.4820592204860467e-05, "loss": 0.5229, "step": 3220 }, { "epoch": 1.5115843669553009, "grad_norm": 2.539262294769287, "learning_rate": 2.481897663647987e-05, "loss": 0.5477, "step": 3230 }, { "epoch": 1.5162649192604727, "grad_norm": 2.840386152267456, "learning_rate": 2.4817353885560507e-05, "loss": 0.5416, "step": 3240 }, { "epoch": 1.5209454715656447, "grad_norm": 2.6302034854888916, "learning_rate": 2.4815723953155367e-05, "loss": 0.5309, "step": 3250 }, { "epoch": 1.5256260238708168, "grad_norm": 1.7659411430358887, "learning_rate": 2.481408684032209e-05, "loss": 0.5421, "step": 3260 }, { "epoch": 1.5303065761759886, "grad_norm": 1.3924206495285034, "learning_rate": 2.4812442548122987e-05, "loss": 0.5149, "step": 3270 }, { "epoch": 1.534987128481161, "grad_norm": 2.520704984664917, "learning_rate": 2.481079107762501e-05, "loss": 0.5436, "step": 3280 }, { "epoch": 1.5396676807863328, "grad_norm": 2.5662615299224854, "learning_rate": 2.4809132429899795e-05, "loss": 0.5505, "step": 3290 }, { "epoch": 1.5443482330915048, "grad_norm": 2.228850841522217, "learning_rate": 2.4807466606023616e-05, "loss": 0.5225, "step": 3300 }, { "epoch": 1.5490287853966769, "grad_norm": 2.1749751567840576, "learning_rate": 2.4805793607077397e-05, "loss": 0.5355, "step": 3310 }, { "epoch": 1.5537093377018487, "grad_norm": 1.9089665412902832, "learning_rate": 2.480411343414674e-05, "loss": 0.5436, "step": 3320 }, { "epoch": 1.558389890007021, "grad_norm": 2.486740827560425, "learning_rate": 2.4802426088321893e-05, "loss": 0.5248, "step": 3330 }, { "epoch": 1.5630704423121928, "grad_norm": 1.9546035528182983, "learning_rate": 2.4800731570697748e-05, "loss": 0.5218, "step": 3340 }, { "epoch": 1.567750994617365, "grad_norm": 2.44998836517334, "learning_rate": 2.4799029882373866e-05, "loss": 0.5266, "step": 3350 }, { "epoch": 1.572431546922537, "grad_norm": 2.823894500732422, "learning_rate": 2.479732102445445e-05, "loss": 0.54, "step": 3360 }, { "epoch": 1.5771120992277088, "grad_norm": 2.298309087753296, "learning_rate": 2.4795604998048355e-05, "loss": 0.5223, "step": 3370 }, { "epoch": 1.5817926515328808, "grad_norm": 1.746207594871521, "learning_rate": 2.4793881804269104e-05, "loss": 0.51, "step": 3380 }, { "epoch": 1.586473203838053, "grad_norm": 1.7362998723983765, "learning_rate": 2.4792151444234844e-05, "loss": 0.5272, "step": 3390 }, { "epoch": 1.5911537561432247, "grad_norm": 2.3355674743652344, "learning_rate": 2.4790413919068405e-05, "loss": 0.5299, "step": 3400 }, { "epoch": 1.595834308448397, "grad_norm": 1.837893009185791, "learning_rate": 2.478866922989723e-05, "loss": 0.521, "step": 3410 }, { "epoch": 1.6005148607535689, "grad_norm": 2.9375462532043457, "learning_rate": 2.478691737785344e-05, "loss": 0.5139, "step": 3420 }, { "epoch": 1.605195413058741, "grad_norm": 2.260241746902466, "learning_rate": 2.4785158364073788e-05, "loss": 0.5167, "step": 3430 }, { "epoch": 1.609875965363913, "grad_norm": 2.730088472366333, "learning_rate": 2.478339218969968e-05, "loss": 0.529, "step": 3440 }, { "epoch": 1.6145565176690848, "grad_norm": 2.1657540798187256, "learning_rate": 2.478161885587717e-05, "loss": 0.4977, "step": 3450 }, { "epoch": 1.619237069974257, "grad_norm": 2.6852376461029053, "learning_rate": 2.4779838363756954e-05, "loss": 0.5306, "step": 3460 }, { "epoch": 1.623917622279429, "grad_norm": 1.5314548015594482, "learning_rate": 2.477805071449437e-05, "loss": 0.5236, "step": 3470 }, { "epoch": 1.628598174584601, "grad_norm": 3.1246533393859863, "learning_rate": 2.477625590924941e-05, "loss": 0.523, "step": 3480 }, { "epoch": 1.633278726889773, "grad_norm": 2.191880702972412, "learning_rate": 2.47744539491867e-05, "loss": 0.5367, "step": 3490 }, { "epoch": 1.6379592791949449, "grad_norm": 2.4295766353607178, "learning_rate": 2.4772644835475512e-05, "loss": 0.5087, "step": 3500 }, { "epoch": 1.6426398315001172, "grad_norm": 2.2890944480895996, "learning_rate": 2.477082856928976e-05, "loss": 0.5071, "step": 3510 }, { "epoch": 1.647320383805289, "grad_norm": 1.8764160871505737, "learning_rate": 2.4769005151808006e-05, "loss": 0.5158, "step": 3520 }, { "epoch": 1.652000936110461, "grad_norm": 1.9357949495315552, "learning_rate": 2.4767174584213438e-05, "loss": 0.515, "step": 3530 }, { "epoch": 1.6566814884156331, "grad_norm": 2.3908157348632812, "learning_rate": 2.4765336867693894e-05, "loss": 0.5026, "step": 3540 }, { "epoch": 1.661362040720805, "grad_norm": 1.7691785097122192, "learning_rate": 2.4763492003441846e-05, "loss": 0.5202, "step": 3550 }, { "epoch": 1.666042593025977, "grad_norm": 1.9369893074035645, "learning_rate": 2.476163999265441e-05, "loss": 0.5043, "step": 3560 }, { "epoch": 1.670723145331149, "grad_norm": 2.1190826892852783, "learning_rate": 2.475978083653334e-05, "loss": 0.5257, "step": 3570 }, { "epoch": 1.6754036976363211, "grad_norm": 2.07397198677063, "learning_rate": 2.475791453628501e-05, "loss": 0.524, "step": 3580 }, { "epoch": 1.6800842499414932, "grad_norm": 2.8113210201263428, "learning_rate": 2.4756041093120448e-05, "loss": 0.5105, "step": 3590 }, { "epoch": 1.684764802246665, "grad_norm": 2.388242483139038, "learning_rate": 2.4754160508255315e-05, "loss": 0.5434, "step": 3600 }, { "epoch": 1.689445354551837, "grad_norm": 2.6325297355651855, "learning_rate": 2.4752272782909897e-05, "loss": 0.5113, "step": 3610 }, { "epoch": 1.6941259068570091, "grad_norm": 2.6054131984710693, "learning_rate": 2.475037791830912e-05, "loss": 0.5209, "step": 3620 }, { "epoch": 1.698806459162181, "grad_norm": 1.501359224319458, "learning_rate": 2.4748475915682542e-05, "loss": 0.4958, "step": 3630 }, { "epoch": 1.7034870114673533, "grad_norm": 1.9417635202407837, "learning_rate": 2.4746566776264352e-05, "loss": 0.4834, "step": 3640 }, { "epoch": 1.708167563772525, "grad_norm": 2.8802366256713867, "learning_rate": 2.4744650501293367e-05, "loss": 0.5224, "step": 3650 }, { "epoch": 1.7128481160776972, "grad_norm": 2.8734092712402344, "learning_rate": 2.474272709201304e-05, "loss": 0.5242, "step": 3660 }, { "epoch": 1.7175286683828692, "grad_norm": 1.701008677482605, "learning_rate": 2.474079654967145e-05, "loss": 0.5114, "step": 3670 }, { "epoch": 1.722209220688041, "grad_norm": 2.2424612045288086, "learning_rate": 2.47388588755213e-05, "loss": 0.4899, "step": 3680 }, { "epoch": 1.7268897729932133, "grad_norm": 1.4738070964813232, "learning_rate": 2.4736914070819935e-05, "loss": 0.4969, "step": 3690 }, { "epoch": 1.7315703252983852, "grad_norm": 1.5454394817352295, "learning_rate": 2.473496213682931e-05, "loss": 0.5132, "step": 3700 }, { "epoch": 1.7362508776035572, "grad_norm": 1.7049986124038696, "learning_rate": 2.4733003074816018e-05, "loss": 0.5117, "step": 3710 }, { "epoch": 1.7409314299087293, "grad_norm": 2.090161085128784, "learning_rate": 2.4731036886051272e-05, "loss": 0.5152, "step": 3720 }, { "epoch": 1.7456119822139011, "grad_norm": 2.3464460372924805, "learning_rate": 2.4729063571810908e-05, "loss": 0.5326, "step": 3730 }, { "epoch": 1.7502925345190734, "grad_norm": 1.596229910850525, "learning_rate": 2.4727083133375393e-05, "loss": 0.495, "step": 3740 }, { "epoch": 1.7549730868242452, "grad_norm": 2.162134885787964, "learning_rate": 2.4725095572029807e-05, "loss": 0.5076, "step": 3750 }, { "epoch": 1.7596536391294173, "grad_norm": 1.5083073377609253, "learning_rate": 2.4723100889063858e-05, "loss": 0.4899, "step": 3760 }, { "epoch": 1.7643341914345894, "grad_norm": 2.2652368545532227, "learning_rate": 2.4721099085771878e-05, "loss": 0.4898, "step": 3770 }, { "epoch": 1.7690147437397612, "grad_norm": 2.1119472980499268, "learning_rate": 2.4719090163452813e-05, "loss": 0.4988, "step": 3780 }, { "epoch": 1.7736952960449333, "grad_norm": 2.2119765281677246, "learning_rate": 2.471707412341023e-05, "loss": 0.5069, "step": 3790 }, { "epoch": 1.7783758483501053, "grad_norm": 2.036655902862549, "learning_rate": 2.4715050966952318e-05, "loss": 0.5015, "step": 3800 }, { "epoch": 1.7830564006552774, "grad_norm": 2.3007988929748535, "learning_rate": 2.4713020695391874e-05, "loss": 0.4868, "step": 3810 }, { "epoch": 1.7877369529604494, "grad_norm": 1.308218240737915, "learning_rate": 2.4710983310046325e-05, "loss": 0.5031, "step": 3820 }, { "epoch": 1.7924175052656213, "grad_norm": 3.2836198806762695, "learning_rate": 2.4708938812237712e-05, "loss": 0.5217, "step": 3830 }, { "epoch": 1.7970980575707933, "grad_norm": 1.5418808460235596, "learning_rate": 2.470688720329268e-05, "loss": 0.4882, "step": 3840 }, { "epoch": 1.8017786098759654, "grad_norm": 2.5364322662353516, "learning_rate": 2.47048284845425e-05, "loss": 0.4897, "step": 3850 }, { "epoch": 1.8064591621811372, "grad_norm": 1.8033487796783447, "learning_rate": 2.4702762657323052e-05, "loss": 0.4797, "step": 3860 }, { "epoch": 1.8111397144863095, "grad_norm": 1.8141584396362305, "learning_rate": 2.470068972297483e-05, "loss": 0.5063, "step": 3870 }, { "epoch": 1.8158202667914813, "grad_norm": 3.2668750286102295, "learning_rate": 2.4698609682842934e-05, "loss": 0.4798, "step": 3880 }, { "epoch": 1.8205008190966534, "grad_norm": 2.192279100418091, "learning_rate": 2.4696522538277085e-05, "loss": 0.4883, "step": 3890 }, { "epoch": 1.8251813714018255, "grad_norm": 2.1419787406921387, "learning_rate": 2.46944282906316e-05, "loss": 0.5002, "step": 3900 }, { "epoch": 1.8298619237069973, "grad_norm": 1.6178820133209229, "learning_rate": 2.4692326941265426e-05, "loss": 0.4884, "step": 3910 }, { "epoch": 1.8345424760121696, "grad_norm": 1.4948581457138062, "learning_rate": 2.469021849154209e-05, "loss": 0.4959, "step": 3920 }, { "epoch": 1.8392230283173414, "grad_norm": 2.0906271934509277, "learning_rate": 2.4688102942829762e-05, "loss": 0.5129, "step": 3930 }, { "epoch": 1.8439035806225135, "grad_norm": 1.7972549200057983, "learning_rate": 2.468598029650118e-05, "loss": 0.485, "step": 3940 }, { "epoch": 1.8485841329276855, "grad_norm": 1.7919487953186035, "learning_rate": 2.468385055393372e-05, "loss": 0.4996, "step": 3950 }, { "epoch": 1.8532646852328574, "grad_norm": 2.5396249294281006, "learning_rate": 2.4681713716509338e-05, "loss": 0.4777, "step": 3960 }, { "epoch": 1.8579452375380296, "grad_norm": 2.033449172973633, "learning_rate": 2.4679569785614607e-05, "loss": 0.4825, "step": 3970 }, { "epoch": 1.8626257898432015, "grad_norm": 1.6664479970932007, "learning_rate": 2.4677418762640708e-05, "loss": 0.4967, "step": 3980 }, { "epoch": 1.8673063421483735, "grad_norm": 1.6007065773010254, "learning_rate": 2.4675260648983405e-05, "loss": 0.4916, "step": 3990 }, { "epoch": 1.8719868944535456, "grad_norm": 1.9727587699890137, "learning_rate": 2.4673095446043087e-05, "loss": 0.4961, "step": 4000 }, { "epoch": 1.8766674467587174, "grad_norm": 1.7293976545333862, "learning_rate": 2.467092315522472e-05, "loss": 0.4828, "step": 4010 }, { "epoch": 1.8813479990638895, "grad_norm": 2.182835578918457, "learning_rate": 2.466874377793789e-05, "loss": 0.4822, "step": 4020 }, { "epoch": 1.8860285513690616, "grad_norm": 1.7677901983261108, "learning_rate": 2.466655731559676e-05, "loss": 0.4883, "step": 4030 }, { "epoch": 1.8907091036742334, "grad_norm": 2.258283853530884, "learning_rate": 2.4664363769620112e-05, "loss": 0.4994, "step": 4040 }, { "epoch": 1.8953896559794057, "grad_norm": 1.0971386432647705, "learning_rate": 2.4662163141431305e-05, "loss": 0.4785, "step": 4050 }, { "epoch": 1.9000702082845775, "grad_norm": 1.9168668985366821, "learning_rate": 2.4659955432458307e-05, "loss": 0.4934, "step": 4060 }, { "epoch": 1.9047507605897496, "grad_norm": 1.4714734554290771, "learning_rate": 2.465774064413368e-05, "loss": 0.4749, "step": 4070 }, { "epoch": 1.9094313128949216, "grad_norm": 2.088679075241089, "learning_rate": 2.465551877789457e-05, "loss": 0.4687, "step": 4080 }, { "epoch": 1.9141118652000935, "grad_norm": 1.7301603555679321, "learning_rate": 2.4653289835182726e-05, "loss": 0.4634, "step": 4090 }, { "epoch": 1.9187924175052657, "grad_norm": 1.5578752756118774, "learning_rate": 2.4651053817444484e-05, "loss": 0.4792, "step": 4100 }, { "epoch": 1.9234729698104376, "grad_norm": 1.8688186407089233, "learning_rate": 2.4648810726130765e-05, "loss": 0.4902, "step": 4110 }, { "epoch": 1.9281535221156096, "grad_norm": 1.837570071220398, "learning_rate": 2.4646560562697102e-05, "loss": 0.4709, "step": 4120 }, { "epoch": 1.9328340744207817, "grad_norm": 1.680984377861023, "learning_rate": 2.4644303328603588e-05, "loss": 0.4672, "step": 4130 }, { "epoch": 1.9375146267259535, "grad_norm": 1.4290308952331543, "learning_rate": 2.464203902531492e-05, "loss": 0.4807, "step": 4140 }, { "epoch": 1.9421951790311258, "grad_norm": 1.3431026935577393, "learning_rate": 2.4639767654300382e-05, "loss": 0.4651, "step": 4150 }, { "epoch": 1.9468757313362977, "grad_norm": 2.4977755546569824, "learning_rate": 2.4637489217033845e-05, "loss": 0.4879, "step": 4160 }, { "epoch": 1.9515562836414697, "grad_norm": 2.183385133743286, "learning_rate": 2.4635203714993754e-05, "loss": 0.4577, "step": 4170 }, { "epoch": 1.9562368359466418, "grad_norm": 1.9955743551254272, "learning_rate": 2.4632911149663155e-05, "loss": 0.4813, "step": 4180 }, { "epoch": 1.9609173882518136, "grad_norm": 1.4352885484695435, "learning_rate": 2.4630611522529666e-05, "loss": 0.4578, "step": 4190 }, { "epoch": 1.965597940556986, "grad_norm": 2.3339343070983887, "learning_rate": 2.4628304835085487e-05, "loss": 0.4753, "step": 4200 }, { "epoch": 1.9702784928621577, "grad_norm": 2.0189261436462402, "learning_rate": 2.4625991088827403e-05, "loss": 0.4778, "step": 4210 }, { "epoch": 1.9749590451673298, "grad_norm": 1.9559507369995117, "learning_rate": 2.462367028525678e-05, "loss": 0.4644, "step": 4220 }, { "epoch": 1.9796395974725018, "grad_norm": 1.6456373929977417, "learning_rate": 2.4621342425879566e-05, "loss": 0.478, "step": 4230 }, { "epoch": 1.9843201497776737, "grad_norm": 1.3628051280975342, "learning_rate": 2.4619007512206276e-05, "loss": 0.4687, "step": 4240 }, { "epoch": 1.9890007020828457, "grad_norm": 1.488852620124817, "learning_rate": 2.4616665545752017e-05, "loss": 0.4649, "step": 4250 }, { "epoch": 1.9936812543880178, "grad_norm": 1.4470138549804688, "learning_rate": 2.4614316528036466e-05, "loss": 0.4634, "step": 4260 }, { "epoch": 1.9983618066931896, "grad_norm": 1.3075069189071655, "learning_rate": 2.461196046058387e-05, "loss": 0.4515, "step": 4270 }, { "epoch": 2.002808331383103, "grad_norm": 1.7434836626052856, "learning_rate": 2.460959734492306e-05, "loss": 0.4256, "step": 4280 }, { "epoch": 2.0074888836882754, "grad_norm": 2.2262320518493652, "learning_rate": 2.460722718258743e-05, "loss": 0.4487, "step": 4290 }, { "epoch": 2.012169435993447, "grad_norm": 1.1432287693023682, "learning_rate": 2.4604849975114957e-05, "loss": 0.4554, "step": 4300 }, { "epoch": 2.016849988298619, "grad_norm": 1.5842173099517822, "learning_rate": 2.4602465724048183e-05, "loss": 0.4716, "step": 4310 }, { "epoch": 2.0215305406037913, "grad_norm": 1.4356974363327026, "learning_rate": 2.4600074430934226e-05, "loss": 0.4503, "step": 4320 }, { "epoch": 2.026211092908963, "grad_norm": 2.049867630004883, "learning_rate": 2.459767609732477e-05, "loss": 0.4566, "step": 4330 }, { "epoch": 2.0308916452141355, "grad_norm": 1.3758702278137207, "learning_rate": 2.4595270724776063e-05, "loss": 0.4547, "step": 4340 }, { "epoch": 2.0355721975193073, "grad_norm": 1.5012731552124023, "learning_rate": 2.459285831484893e-05, "loss": 0.4491, "step": 4350 }, { "epoch": 2.040252749824479, "grad_norm": 1.3813717365264893, "learning_rate": 2.4590438869108755e-05, "loss": 0.4483, "step": 4360 }, { "epoch": 2.0449333021296514, "grad_norm": 2.2309539318084717, "learning_rate": 2.4588012389125488e-05, "loss": 0.4473, "step": 4370 }, { "epoch": 2.0496138544348232, "grad_norm": 2.0346450805664062, "learning_rate": 2.4585578876473655e-05, "loss": 0.435, "step": 4380 }, { "epoch": 2.0542944067399955, "grad_norm": 1.8221855163574219, "learning_rate": 2.4583138332732325e-05, "loss": 0.4715, "step": 4390 }, { "epoch": 2.0589749590451674, "grad_norm": 1.5658855438232422, "learning_rate": 2.4580690759485146e-05, "loss": 0.4655, "step": 4400 }, { "epoch": 2.063655511350339, "grad_norm": 2.510479688644409, "learning_rate": 2.457823615832032e-05, "loss": 0.4436, "step": 4410 }, { "epoch": 2.0683360636555115, "grad_norm": 1.9367445707321167, "learning_rate": 2.457577453083061e-05, "loss": 0.4601, "step": 4420 }, { "epoch": 2.0730166159606833, "grad_norm": 1.458039402961731, "learning_rate": 2.4573305878613345e-05, "loss": 0.4794, "step": 4430 }, { "epoch": 2.077697168265855, "grad_norm": 2.4821622371673584, "learning_rate": 2.4570830203270398e-05, "loss": 0.4446, "step": 4440 }, { "epoch": 2.0823777205710274, "grad_norm": 2.2142693996429443, "learning_rate": 2.4568347506408214e-05, "loss": 0.4583, "step": 4450 }, { "epoch": 2.0870582728761993, "grad_norm": 1.8356170654296875, "learning_rate": 2.4565857789637782e-05, "loss": 0.4413, "step": 4460 }, { "epoch": 2.0917388251813716, "grad_norm": 1.4371867179870605, "learning_rate": 2.456336105457466e-05, "loss": 0.423, "step": 4470 }, { "epoch": 2.0964193774865434, "grad_norm": 1.4702847003936768, "learning_rate": 2.456085730283895e-05, "loss": 0.4344, "step": 4480 }, { "epoch": 2.1010999297917152, "grad_norm": 2.1477692127227783, "learning_rate": 2.4558346536055305e-05, "loss": 0.4646, "step": 4490 }, { "epoch": 2.1057804820968875, "grad_norm": 1.4044678211212158, "learning_rate": 2.455582875585294e-05, "loss": 0.4345, "step": 4500 }, { "epoch": 2.1104610344020593, "grad_norm": 1.1526572704315186, "learning_rate": 2.455330396386561e-05, "loss": 0.4314, "step": 4510 }, { "epoch": 2.1151415867072316, "grad_norm": 1.1477956771850586, "learning_rate": 2.455077216173163e-05, "loss": 0.4169, "step": 4520 }, { "epoch": 2.1198221390124035, "grad_norm": 1.4985871315002441, "learning_rate": 2.4548233351093856e-05, "loss": 0.4423, "step": 4530 }, { "epoch": 2.1245026913175753, "grad_norm": 1.1660696268081665, "learning_rate": 2.4545687533599693e-05, "loss": 0.4497, "step": 4540 }, { "epoch": 2.1291832436227476, "grad_norm": 1.150203824043274, "learning_rate": 2.45431347109011e-05, "loss": 0.4334, "step": 4550 }, { "epoch": 2.1338637959279194, "grad_norm": 1.4059340953826904, "learning_rate": 2.4540574884654572e-05, "loss": 0.4405, "step": 4560 }, { "epoch": 2.1385443482330917, "grad_norm": 1.32100248336792, "learning_rate": 2.4538008056521146e-05, "loss": 0.4224, "step": 4570 }, { "epoch": 2.1432249005382635, "grad_norm": 1.3468341827392578, "learning_rate": 2.4535434228166422e-05, "loss": 0.4493, "step": 4580 }, { "epoch": 2.1479054528434354, "grad_norm": 1.4417171478271484, "learning_rate": 2.453285340126053e-05, "loss": 0.43, "step": 4590 }, { "epoch": 2.1525860051486077, "grad_norm": 1.4482311010360718, "learning_rate": 2.453026557747812e-05, "loss": 0.4385, "step": 4600 }, { "epoch": 2.1572665574537795, "grad_norm": 1.2260043621063232, "learning_rate": 2.4527670758498425e-05, "loss": 0.4151, "step": 4610 }, { "epoch": 2.1619471097589518, "grad_norm": 1.0835292339324951, "learning_rate": 2.4525068946005182e-05, "loss": 0.4535, "step": 4620 }, { "epoch": 2.1666276620641236, "grad_norm": 1.3461333513259888, "learning_rate": 2.4522460141686686e-05, "loss": 0.4265, "step": 4630 }, { "epoch": 2.1713082143692954, "grad_norm": 1.8656353950500488, "learning_rate": 2.451984434723576e-05, "loss": 0.4271, "step": 4640 }, { "epoch": 2.1759887666744677, "grad_norm": 1.7686610221862793, "learning_rate": 2.4517221564349762e-05, "loss": 0.4437, "step": 4650 }, { "epoch": 2.1806693189796396, "grad_norm": 0.9750983119010925, "learning_rate": 2.4514591794730583e-05, "loss": 0.4292, "step": 4660 }, { "epoch": 2.185349871284812, "grad_norm": 1.4384491443634033, "learning_rate": 2.4511955040084667e-05, "loss": 0.4167, "step": 4670 }, { "epoch": 2.1900304235899837, "grad_norm": 1.372576355934143, "learning_rate": 2.450931130212296e-05, "loss": 0.439, "step": 4680 }, { "epoch": 2.1947109758951555, "grad_norm": 1.4232302904129028, "learning_rate": 2.4506660582560968e-05, "loss": 0.4163, "step": 4690 }, { "epoch": 2.199391528200328, "grad_norm": 1.1078970432281494, "learning_rate": 2.4504002883118706e-05, "loss": 0.4227, "step": 4700 }, { "epoch": 2.2040720805054996, "grad_norm": 1.7319152355194092, "learning_rate": 2.4501338205520723e-05, "loss": 0.3975, "step": 4710 }, { "epoch": 2.2087526328106715, "grad_norm": 1.4937297105789185, "learning_rate": 2.4498666551496105e-05, "loss": 0.4313, "step": 4720 }, { "epoch": 2.2134331851158437, "grad_norm": 1.3618268966674805, "learning_rate": 2.449598792277846e-05, "loss": 0.4282, "step": 4730 }, { "epoch": 2.2181137374210156, "grad_norm": 1.6983134746551514, "learning_rate": 2.4493302321105915e-05, "loss": 0.4301, "step": 4740 }, { "epoch": 2.222794289726188, "grad_norm": 1.231747031211853, "learning_rate": 2.449060974822114e-05, "loss": 0.4162, "step": 4750 }, { "epoch": 2.2274748420313597, "grad_norm": 1.4712797403335571, "learning_rate": 2.4487910205871304e-05, "loss": 0.4363, "step": 4760 }, { "epoch": 2.2321553943365315, "grad_norm": 1.3790812492370605, "learning_rate": 2.4485203695808115e-05, "loss": 0.4358, "step": 4770 }, { "epoch": 2.236835946641704, "grad_norm": 1.5056368112564087, "learning_rate": 2.44824902197878e-05, "loss": 0.4077, "step": 4780 }, { "epoch": 2.2415164989468757, "grad_norm": 1.1107404232025146, "learning_rate": 2.44797697795711e-05, "loss": 0.414, "step": 4790 }, { "epoch": 2.246197051252048, "grad_norm": 1.0583478212356567, "learning_rate": 2.4477042376923283e-05, "loss": 0.4336, "step": 4800 }, { "epoch": 2.2508776035572198, "grad_norm": 1.077876091003418, "learning_rate": 2.447430801361413e-05, "loss": 0.4243, "step": 4810 }, { "epoch": 2.2555581558623916, "grad_norm": 1.7167621850967407, "learning_rate": 2.447156669141794e-05, "loss": 0.4231, "step": 4820 }, { "epoch": 2.260238708167564, "grad_norm": 1.3201441764831543, "learning_rate": 2.4468818412113524e-05, "loss": 0.4055, "step": 4830 }, { "epoch": 2.2649192604727357, "grad_norm": 2.580179214477539, "learning_rate": 2.4466063177484212e-05, "loss": 0.4194, "step": 4840 }, { "epoch": 2.2695998127779076, "grad_norm": 1.6211419105529785, "learning_rate": 2.446330098931785e-05, "loss": 0.4396, "step": 4850 }, { "epoch": 2.27428036508308, "grad_norm": 1.204243540763855, "learning_rate": 2.4460531849406786e-05, "loss": 0.4277, "step": 4860 }, { "epoch": 2.2789609173882517, "grad_norm": 1.2233787775039673, "learning_rate": 2.445775575954789e-05, "loss": 0.406, "step": 4870 }, { "epoch": 2.283641469693424, "grad_norm": 1.4034836292266846, "learning_rate": 2.4454972721542535e-05, "loss": 0.4165, "step": 4880 }, { "epoch": 2.288322021998596, "grad_norm": 1.0401949882507324, "learning_rate": 2.4452182737196596e-05, "loss": 0.4244, "step": 4890 }, { "epoch": 2.2930025743037676, "grad_norm": 1.2266819477081299, "learning_rate": 2.444938580832048e-05, "loss": 0.4008, "step": 4900 }, { "epoch": 2.29768312660894, "grad_norm": 1.2613273859024048, "learning_rate": 2.444658193672907e-05, "loss": 0.4099, "step": 4910 }, { "epoch": 2.3023636789141118, "grad_norm": 1.2485719919204712, "learning_rate": 2.444377112424177e-05, "loss": 0.4205, "step": 4920 }, { "epoch": 2.307044231219284, "grad_norm": 1.3757052421569824, "learning_rate": 2.4440953372682493e-05, "loss": 0.4191, "step": 4930 }, { "epoch": 2.311724783524456, "grad_norm": 1.3709882497787476, "learning_rate": 2.443812868387964e-05, "loss": 0.4192, "step": 4940 }, { "epoch": 2.3164053358296277, "grad_norm": 1.3024760484695435, "learning_rate": 2.4435297059666124e-05, "loss": 0.4154, "step": 4950 }, { "epoch": 2.3210858881348, "grad_norm": 0.9720996022224426, "learning_rate": 2.4432458501879353e-05, "loss": 0.3981, "step": 4960 }, { "epoch": 2.325766440439972, "grad_norm": 1.7499051094055176, "learning_rate": 2.4429613012361243e-05, "loss": 0.4334, "step": 4970 }, { "epoch": 2.330446992745144, "grad_norm": 1.2706553936004639, "learning_rate": 2.442676059295819e-05, "loss": 0.4106, "step": 4980 }, { "epoch": 2.335127545050316, "grad_norm": 2.1515471935272217, "learning_rate": 2.4423901245521107e-05, "loss": 0.4387, "step": 4990 }, { "epoch": 2.339808097355488, "grad_norm": 1.4249286651611328, "learning_rate": 2.442103497190539e-05, "loss": 0.4333, "step": 5000 }, { "epoch": 2.34448864966066, "grad_norm": 1.182088017463684, "learning_rate": 2.4418161773970934e-05, "loss": 0.4074, "step": 5010 }, { "epoch": 2.349169201965832, "grad_norm": 1.553331971168518, "learning_rate": 2.4415281653582123e-05, "loss": 0.411, "step": 5020 }, { "epoch": 2.353849754271004, "grad_norm": 1.0859484672546387, "learning_rate": 2.441239461260784e-05, "loss": 0.3978, "step": 5030 }, { "epoch": 2.358530306576176, "grad_norm": 1.0739890336990356, "learning_rate": 2.4409500652921453e-05, "loss": 0.4313, "step": 5040 }, { "epoch": 2.363210858881348, "grad_norm": 1.2760951519012451, "learning_rate": 2.440659977640082e-05, "loss": 0.3985, "step": 5050 }, { "epoch": 2.36789141118652, "grad_norm": 0.7984686493873596, "learning_rate": 2.440369198492829e-05, "loss": 0.4148, "step": 5060 }, { "epoch": 2.372571963491692, "grad_norm": 1.0215308666229248, "learning_rate": 2.44007772803907e-05, "loss": 0.4115, "step": 5070 }, { "epoch": 2.3772525157968643, "grad_norm": 1.1597445011138916, "learning_rate": 2.439785566467936e-05, "loss": 0.3981, "step": 5080 }, { "epoch": 2.381933068102036, "grad_norm": 1.211276888847351, "learning_rate": 2.4394927139690085e-05, "loss": 0.4132, "step": 5090 }, { "epoch": 2.386613620407208, "grad_norm": 1.2150553464889526, "learning_rate": 2.439199170732316e-05, "loss": 0.4275, "step": 5100 }, { "epoch": 2.39129417271238, "grad_norm": 1.048223853111267, "learning_rate": 2.438904936948335e-05, "loss": 0.4288, "step": 5110 }, { "epoch": 2.395974725017552, "grad_norm": 1.5827559232711792, "learning_rate": 2.4386100128079917e-05, "loss": 0.4235, "step": 5120 }, { "epoch": 2.4006552773227243, "grad_norm": 1.151752233505249, "learning_rate": 2.4383143985026583e-05, "loss": 0.4115, "step": 5130 }, { "epoch": 2.405335829627896, "grad_norm": 1.1136796474456787, "learning_rate": 2.438018094224156e-05, "loss": 0.3914, "step": 5140 }, { "epoch": 2.410016381933068, "grad_norm": 1.0078845024108887, "learning_rate": 2.437721100164753e-05, "loss": 0.3975, "step": 5150 }, { "epoch": 2.4146969342382403, "grad_norm": 1.8876729011535645, "learning_rate": 2.437423416517166e-05, "loss": 0.4126, "step": 5160 }, { "epoch": 2.419377486543412, "grad_norm": 0.9853624105453491, "learning_rate": 2.437125043474559e-05, "loss": 0.4056, "step": 5170 }, { "epoch": 2.424058038848584, "grad_norm": 1.0727262496948242, "learning_rate": 2.4368259812305426e-05, "loss": 0.4131, "step": 5180 }, { "epoch": 2.4287385911537562, "grad_norm": 0.8258790373802185, "learning_rate": 2.4365262299791748e-05, "loss": 0.3999, "step": 5190 }, { "epoch": 2.433419143458928, "grad_norm": 1.0611512660980225, "learning_rate": 2.436225789914961e-05, "loss": 0.4241, "step": 5200 }, { "epoch": 2.4380996957641003, "grad_norm": 0.8885713219642639, "learning_rate": 2.4359246612328542e-05, "loss": 0.384, "step": 5210 }, { "epoch": 2.442780248069272, "grad_norm": 1.0685890913009644, "learning_rate": 2.4356228441282524e-05, "loss": 0.3987, "step": 5220 }, { "epoch": 2.447460800374444, "grad_norm": 1.2337216138839722, "learning_rate": 2.4353203387970025e-05, "loss": 0.4098, "step": 5230 }, { "epoch": 2.4521413526796163, "grad_norm": 0.9504034519195557, "learning_rate": 2.4350171454353965e-05, "loss": 0.4006, "step": 5240 }, { "epoch": 2.456821904984788, "grad_norm": 1.904151439666748, "learning_rate": 2.434713264240173e-05, "loss": 0.3807, "step": 5250 }, { "epoch": 2.46150245728996, "grad_norm": 1.1646426916122437, "learning_rate": 2.4344086954085177e-05, "loss": 0.3948, "step": 5260 }, { "epoch": 2.4661830095951323, "grad_norm": 1.5423636436462402, "learning_rate": 2.434103439138062e-05, "loss": 0.412, "step": 5270 }, { "epoch": 2.470863561900304, "grad_norm": 1.074639081954956, "learning_rate": 2.433797495626883e-05, "loss": 0.4095, "step": 5280 }, { "epoch": 2.4755441142054764, "grad_norm": 1.0263150930404663, "learning_rate": 2.433490865073504e-05, "loss": 0.4094, "step": 5290 }, { "epoch": 2.480224666510648, "grad_norm": 0.8800832629203796, "learning_rate": 2.4331835476768943e-05, "loss": 0.3951, "step": 5300 }, { "epoch": 2.48490521881582, "grad_norm": 1.0649349689483643, "learning_rate": 2.4328755436364693e-05, "loss": 0.4116, "step": 5310 }, { "epoch": 2.4895857711209923, "grad_norm": 1.6284252405166626, "learning_rate": 2.43256685315209e-05, "loss": 0.4166, "step": 5320 }, { "epoch": 2.494266323426164, "grad_norm": 1.3523905277252197, "learning_rate": 2.4322574764240605e-05, "loss": 0.4063, "step": 5330 }, { "epoch": 2.4989468757313364, "grad_norm": 1.3464818000793457, "learning_rate": 2.4319474136531335e-05, "loss": 0.4057, "step": 5340 }, { "epoch": 2.5036274280365083, "grad_norm": 1.049660563468933, "learning_rate": 2.4316366650405047e-05, "loss": 0.3884, "step": 5350 }, { "epoch": 2.50830798034168, "grad_norm": 1.1225115060806274, "learning_rate": 2.4313252307878155e-05, "loss": 0.4051, "step": 5360 }, { "epoch": 2.5129885326468524, "grad_norm": 1.1433706283569336, "learning_rate": 2.4310131110971532e-05, "loss": 0.4043, "step": 5370 }, { "epoch": 2.5176690849520242, "grad_norm": 1.4642736911773682, "learning_rate": 2.4307003061710475e-05, "loss": 0.3925, "step": 5380 }, { "epoch": 2.5223496372571965, "grad_norm": 1.2610245943069458, "learning_rate": 2.4303868162124747e-05, "loss": 0.3745, "step": 5390 }, { "epoch": 2.5270301895623684, "grad_norm": 0.9679880738258362, "learning_rate": 2.430072641424855e-05, "loss": 0.3955, "step": 5400 }, { "epoch": 2.53171074186754, "grad_norm": 0.821582555770874, "learning_rate": 2.429757782012053e-05, "loss": 0.4038, "step": 5410 }, { "epoch": 2.5363912941727125, "grad_norm": 0.9552710056304932, "learning_rate": 2.4294422381783783e-05, "loss": 0.3905, "step": 5420 }, { "epoch": 2.5410718464778843, "grad_norm": 0.9821903109550476, "learning_rate": 2.4291260101285824e-05, "loss": 0.3961, "step": 5430 }, { "epoch": 2.5457523987830566, "grad_norm": 1.0351616144180298, "learning_rate": 2.4288090980678634e-05, "loss": 0.3953, "step": 5440 }, { "epoch": 2.5504329510882284, "grad_norm": 1.3785297870635986, "learning_rate": 2.428491502201861e-05, "loss": 0.4163, "step": 5450 }, { "epoch": 2.5551135033934003, "grad_norm": 0.8629039525985718, "learning_rate": 2.4281732227366614e-05, "loss": 0.3799, "step": 5460 }, { "epoch": 2.5597940556985725, "grad_norm": 1.0753625631332397, "learning_rate": 2.427854259878791e-05, "loss": 0.4064, "step": 5470 }, { "epoch": 2.5644746080037444, "grad_norm": 1.1808897256851196, "learning_rate": 2.4275346138352224e-05, "loss": 0.3963, "step": 5480 }, { "epoch": 2.5691551603089167, "grad_norm": 0.9280069470405579, "learning_rate": 2.42721428481337e-05, "loss": 0.3983, "step": 5490 }, { "epoch": 2.5738357126140885, "grad_norm": 1.109930396080017, "learning_rate": 2.426893273021092e-05, "loss": 0.3944, "step": 5500 }, { "epoch": 2.5785162649192603, "grad_norm": 1.4714018106460571, "learning_rate": 2.426571578666689e-05, "loss": 0.4033, "step": 5510 }, { "epoch": 2.5831968172244326, "grad_norm": 1.5634015798568726, "learning_rate": 2.4262492019589055e-05, "loss": 0.4033, "step": 5520 }, { "epoch": 2.5878773695296045, "grad_norm": 1.0896543264389038, "learning_rate": 2.4259261431069283e-05, "loss": 0.3912, "step": 5530 }, { "epoch": 2.5925579218347767, "grad_norm": 0.7900036573410034, "learning_rate": 2.4256024023203863e-05, "loss": 0.388, "step": 5540 }, { "epoch": 2.5972384741399486, "grad_norm": 1.2350980043411255, "learning_rate": 2.425277979809352e-05, "loss": 0.3777, "step": 5550 }, { "epoch": 2.6019190264451204, "grad_norm": 0.9451947212219238, "learning_rate": 2.4249528757843392e-05, "loss": 0.3998, "step": 5560 }, { "epoch": 2.6065995787502927, "grad_norm": 1.3558471202850342, "learning_rate": 2.4246270904563045e-05, "loss": 0.3897, "step": 5570 }, { "epoch": 2.6112801310554645, "grad_norm": 1.2140488624572754, "learning_rate": 2.4243006240366464e-05, "loss": 0.4039, "step": 5580 }, { "epoch": 2.615960683360637, "grad_norm": 0.9372486472129822, "learning_rate": 2.423973476737205e-05, "loss": 0.3917, "step": 5590 }, { "epoch": 2.6206412356658086, "grad_norm": 0.8702487945556641, "learning_rate": 2.4236456487702642e-05, "loss": 0.3818, "step": 5600 }, { "epoch": 2.6253217879709805, "grad_norm": 1.064036250114441, "learning_rate": 2.4233171403485457e-05, "loss": 0.394, "step": 5610 }, { "epoch": 2.6300023402761523, "grad_norm": 0.7792849540710449, "learning_rate": 2.4229879516852168e-05, "loss": 0.3904, "step": 5620 }, { "epoch": 2.6346828925813246, "grad_norm": 1.1560310125350952, "learning_rate": 2.4226580829938838e-05, "loss": 0.3871, "step": 5630 }, { "epoch": 2.639363444886497, "grad_norm": 0.8449503779411316, "learning_rate": 2.4223275344885955e-05, "loss": 0.4092, "step": 5640 }, { "epoch": 2.6440439971916687, "grad_norm": 0.8907197713851929, "learning_rate": 2.42199630638384e-05, "loss": 0.3967, "step": 5650 }, { "epoch": 2.6487245494968406, "grad_norm": 0.9207234382629395, "learning_rate": 2.4216643988945486e-05, "loss": 0.3889, "step": 5660 }, { "epoch": 2.6534051018020124, "grad_norm": 1.3757447004318237, "learning_rate": 2.4213318122360926e-05, "loss": 0.3871, "step": 5670 }, { "epoch": 2.6580856541071847, "grad_norm": 1.4068087339401245, "learning_rate": 2.420998546624283e-05, "loss": 0.4055, "step": 5680 }, { "epoch": 2.6627662064123565, "grad_norm": 0.9505828022956848, "learning_rate": 2.4206646022753726e-05, "loss": 0.3791, "step": 5690 }, { "epoch": 2.667446758717529, "grad_norm": 0.9574126601219177, "learning_rate": 2.420329979406055e-05, "loss": 0.383, "step": 5700 }, { "epoch": 2.6721273110227006, "grad_norm": 0.9345055222511292, "learning_rate": 2.4199946782334627e-05, "loss": 0.3893, "step": 5710 }, { "epoch": 2.6768078633278725, "grad_norm": 1.0515793561935425, "learning_rate": 2.4196586989751687e-05, "loss": 0.402, "step": 5720 }, { "epoch": 2.6814884156330447, "grad_norm": 1.1397265195846558, "learning_rate": 2.4193220418491866e-05, "loss": 0.3905, "step": 5730 }, { "epoch": 2.6861689679382166, "grad_norm": 0.7644771337509155, "learning_rate": 2.4189847070739698e-05, "loss": 0.4056, "step": 5740 }, { "epoch": 2.690849520243389, "grad_norm": 1.4255975484848022, "learning_rate": 2.4186466948684107e-05, "loss": 0.3927, "step": 5750 }, { "epoch": 2.6955300725485607, "grad_norm": 0.7417317032814026, "learning_rate": 2.4183080054518423e-05, "loss": 0.3692, "step": 5760 }, { "epoch": 2.7002106248537325, "grad_norm": 1.444225788116455, "learning_rate": 2.4179686390440362e-05, "loss": 0.4061, "step": 5770 }, { "epoch": 2.704891177158905, "grad_norm": 0.8588378429412842, "learning_rate": 2.4176285958652035e-05, "loss": 0.4054, "step": 5780 }, { "epoch": 2.7095717294640767, "grad_norm": 0.825032651424408, "learning_rate": 2.4172878761359946e-05, "loss": 0.398, "step": 5790 }, { "epoch": 2.714252281769249, "grad_norm": 1.226813793182373, "learning_rate": 2.4169464800774985e-05, "loss": 0.3944, "step": 5800 }, { "epoch": 2.7189328340744208, "grad_norm": 1.0852247476577759, "learning_rate": 2.4166044079112443e-05, "loss": 0.3719, "step": 5810 }, { "epoch": 2.7236133863795926, "grad_norm": 1.0339412689208984, "learning_rate": 2.4162616598591982e-05, "loss": 0.3976, "step": 5820 }, { "epoch": 2.728293938684765, "grad_norm": 0.9744462370872498, "learning_rate": 2.4159182361437654e-05, "loss": 0.3867, "step": 5830 }, { "epoch": 2.7329744909899367, "grad_norm": 0.8868589997291565, "learning_rate": 2.4155741369877908e-05, "loss": 0.3934, "step": 5840 }, { "epoch": 2.737655043295109, "grad_norm": 0.9003865122795105, "learning_rate": 2.4152293626145555e-05, "loss": 0.3864, "step": 5850 }, { "epoch": 2.742335595600281, "grad_norm": 1.1178038120269775, "learning_rate": 2.4148839132477808e-05, "loss": 0.3987, "step": 5860 }, { "epoch": 2.7470161479054527, "grad_norm": 0.9226582646369934, "learning_rate": 2.4145377891116246e-05, "loss": 0.375, "step": 5870 }, { "epoch": 2.751696700210625, "grad_norm": 1.3296798467636108, "learning_rate": 2.414190990430683e-05, "loss": 0.3949, "step": 5880 }, { "epoch": 2.756377252515797, "grad_norm": 1.142209768295288, "learning_rate": 2.4138435174299907e-05, "loss": 0.4057, "step": 5890 }, { "epoch": 2.761057804820969, "grad_norm": 0.7229512929916382, "learning_rate": 2.4134953703350176e-05, "loss": 0.365, "step": 5900 }, { "epoch": 2.765738357126141, "grad_norm": 1.3079508543014526, "learning_rate": 2.4131465493716737e-05, "loss": 0.3774, "step": 5910 }, { "epoch": 2.7704189094313127, "grad_norm": 1.4346837997436523, "learning_rate": 2.412797054766305e-05, "loss": 0.3853, "step": 5920 }, { "epoch": 2.775099461736485, "grad_norm": 1.312178373336792, "learning_rate": 2.412446886745695e-05, "loss": 0.3732, "step": 5930 }, { "epoch": 2.779780014041657, "grad_norm": 1.1261032819747925, "learning_rate": 2.412096045537063e-05, "loss": 0.381, "step": 5940 }, { "epoch": 2.784460566346829, "grad_norm": 1.3363999128341675, "learning_rate": 2.411744531368067e-05, "loss": 0.3903, "step": 5950 }, { "epoch": 2.789141118652001, "grad_norm": 1.1642149686813354, "learning_rate": 2.4113923444668004e-05, "loss": 0.3889, "step": 5960 }, { "epoch": 2.793821670957173, "grad_norm": 1.2174254655838013, "learning_rate": 2.4110394850617932e-05, "loss": 0.4008, "step": 5970 }, { "epoch": 2.798502223262345, "grad_norm": 0.7484317421913147, "learning_rate": 2.4106859533820125e-05, "loss": 0.3683, "step": 5980 }, { "epoch": 2.803182775567517, "grad_norm": 0.9064446091651917, "learning_rate": 2.410331749656861e-05, "loss": 0.3761, "step": 5990 }, { "epoch": 2.807863327872689, "grad_norm": 0.7995849251747131, "learning_rate": 2.4099768741161772e-05, "loss": 0.3906, "step": 6000 }, { "epoch": 2.812543880177861, "grad_norm": 0.9148076772689819, "learning_rate": 2.4096213269902367e-05, "loss": 0.3777, "step": 6010 }, { "epoch": 2.817224432483033, "grad_norm": 1.131311058998108, "learning_rate": 2.4092651085097497e-05, "loss": 0.4177, "step": 6020 }, { "epoch": 2.821904984788205, "grad_norm": 0.9738128185272217, "learning_rate": 2.4089082189058625e-05, "loss": 0.3857, "step": 6030 }, { "epoch": 2.826585537093377, "grad_norm": 0.9224892258644104, "learning_rate": 2.4085506584101576e-05, "loss": 0.3623, "step": 6040 }, { "epoch": 2.8312660893985493, "grad_norm": 0.9864346385002136, "learning_rate": 2.408192427254651e-05, "loss": 0.3933, "step": 6050 }, { "epoch": 2.835946641703721, "grad_norm": 0.8103283643722534, "learning_rate": 2.4078335256717958e-05, "loss": 0.3964, "step": 6060 }, { "epoch": 2.840627194008893, "grad_norm": 0.8176175951957703, "learning_rate": 2.407473953894479e-05, "loss": 0.3906, "step": 6070 }, { "epoch": 2.845307746314065, "grad_norm": 1.0078715085983276, "learning_rate": 2.4071137121560228e-05, "loss": 0.3804, "step": 6080 }, { "epoch": 2.849988298619237, "grad_norm": 1.0918940305709839, "learning_rate": 2.4067528006901848e-05, "loss": 0.3965, "step": 6090 }, { "epoch": 2.854668850924409, "grad_norm": 1.648101806640625, "learning_rate": 2.4063912197311556e-05, "loss": 0.3786, "step": 6100 }, { "epoch": 2.859349403229581, "grad_norm": 0.648232102394104, "learning_rate": 2.4060289695135614e-05, "loss": 0.3832, "step": 6110 }, { "epoch": 2.864029955534753, "grad_norm": 0.7912532687187195, "learning_rate": 2.4056660502724625e-05, "loss": 0.3746, "step": 6120 }, { "epoch": 2.868710507839925, "grad_norm": 0.6149617433547974, "learning_rate": 2.405302462243354e-05, "loss": 0.3846, "step": 6130 }, { "epoch": 2.873391060145097, "grad_norm": 6.665676116943359, "learning_rate": 2.404938205662163e-05, "loss": 0.3836, "step": 6140 }, { "epoch": 2.878071612450269, "grad_norm": 0.9104690551757812, "learning_rate": 2.4045732807652526e-05, "loss": 0.3827, "step": 6150 }, { "epoch": 2.8827521647554413, "grad_norm": 0.9904946684837341, "learning_rate": 2.4042076877894182e-05, "loss": 0.3898, "step": 6160 }, { "epoch": 2.887432717060613, "grad_norm": 1.2312121391296387, "learning_rate": 2.403841426971889e-05, "loss": 0.3748, "step": 6170 }, { "epoch": 2.892113269365785, "grad_norm": 1.0638936758041382, "learning_rate": 2.403474498550328e-05, "loss": 0.3907, "step": 6180 }, { "epoch": 2.8967938216709572, "grad_norm": 0.7468035817146301, "learning_rate": 2.4031069027628307e-05, "loss": 0.3889, "step": 6190 }, { "epoch": 2.901474373976129, "grad_norm": 0.836681067943573, "learning_rate": 2.402738639847926e-05, "loss": 0.3698, "step": 6200 }, { "epoch": 2.9061549262813013, "grad_norm": 0.8973950147628784, "learning_rate": 2.4023697100445765e-05, "loss": 0.3824, "step": 6210 }, { "epoch": 2.910835478586473, "grad_norm": 1.1576110124588013, "learning_rate": 2.4020001135921754e-05, "loss": 0.3758, "step": 6220 }, { "epoch": 2.915516030891645, "grad_norm": 0.9314565062522888, "learning_rate": 2.401629850730551e-05, "loss": 0.3678, "step": 6230 }, { "epoch": 2.9201965831968173, "grad_norm": 1.2183713912963867, "learning_rate": 2.4012589216999627e-05, "loss": 0.3571, "step": 6240 }, { "epoch": 2.924877135501989, "grad_norm": 1.10460364818573, "learning_rate": 2.4008873267411018e-05, "loss": 0.3829, "step": 6250 }, { "epoch": 2.9295576878071614, "grad_norm": 1.1579524278640747, "learning_rate": 2.400515066095092e-05, "loss": 0.3807, "step": 6260 }, { "epoch": 2.9342382401123333, "grad_norm": 0.9387984275817871, "learning_rate": 2.4001421400034905e-05, "loss": 0.3507, "step": 6270 }, { "epoch": 2.938918792417505, "grad_norm": 1.022390365600586, "learning_rate": 2.399768548708283e-05, "loss": 0.3825, "step": 6280 }, { "epoch": 2.9435993447226774, "grad_norm": 0.9797431826591492, "learning_rate": 2.3993942924518907e-05, "loss": 0.3816, "step": 6290 }, { "epoch": 2.948279897027849, "grad_norm": 1.3848159313201904, "learning_rate": 2.399019371477164e-05, "loss": 0.3835, "step": 6300 }, { "epoch": 2.9529604493330215, "grad_norm": 0.8364588618278503, "learning_rate": 2.3986437860273847e-05, "loss": 0.3877, "step": 6310 }, { "epoch": 2.9576410016381933, "grad_norm": 0.8672960996627808, "learning_rate": 2.398267536346266e-05, "loss": 0.3625, "step": 6320 }, { "epoch": 2.962321553943365, "grad_norm": 0.9633205533027649, "learning_rate": 2.3978906226779524e-05, "loss": 0.3654, "step": 6330 }, { "epoch": 2.9670021062485374, "grad_norm": 1.1044373512268066, "learning_rate": 2.3975130452670188e-05, "loss": 0.3553, "step": 6340 }, { "epoch": 2.9716826585537093, "grad_norm": 0.8462681174278259, "learning_rate": 2.3971348043584717e-05, "loss": 0.3504, "step": 6350 }, { "epoch": 2.9763632108588816, "grad_norm": 0.8201923370361328, "learning_rate": 2.3967559001977475e-05, "loss": 0.3841, "step": 6360 }, { "epoch": 2.9810437631640534, "grad_norm": 1.0502488613128662, "learning_rate": 2.3963763330307123e-05, "loss": 0.3751, "step": 6370 }, { "epoch": 2.9857243154692252, "grad_norm": 1.1331840753555298, "learning_rate": 2.3959961031036637e-05, "loss": 0.3903, "step": 6380 }, { "epoch": 2.9904048677743975, "grad_norm": 0.7075087428092957, "learning_rate": 2.3956152106633284e-05, "loss": 0.3583, "step": 6390 }, { "epoch": 2.9950854200795693, "grad_norm": 0.8603501915931702, "learning_rate": 2.3952336559568636e-05, "loss": 0.3818, "step": 6400 }, { "epoch": 2.9997659723847416, "grad_norm": 1.0096824169158936, "learning_rate": 2.3948514392318564e-05, "loss": 0.3728, "step": 6410 }, { "epoch": 3.0042124970746547, "grad_norm": 0.7965402603149414, "learning_rate": 2.394468560736322e-05, "loss": 0.3621, "step": 6420 }, { "epoch": 3.008893049379827, "grad_norm": 1.300311803817749, "learning_rate": 2.394085020718707e-05, "loss": 0.3672, "step": 6430 }, { "epoch": 3.0135736016849988, "grad_norm": 0.9037348031997681, "learning_rate": 2.393700819427886e-05, "loss": 0.3641, "step": 6440 }, { "epoch": 3.018254153990171, "grad_norm": 0.9734289050102234, "learning_rate": 2.3933159571131637e-05, "loss": 0.3689, "step": 6450 }, { "epoch": 3.022934706295343, "grad_norm": 0.7314546704292297, "learning_rate": 2.3929304340242722e-05, "loss": 0.3652, "step": 6460 }, { "epoch": 3.0276152586005147, "grad_norm": 0.9770709872245789, "learning_rate": 2.3925442504113733e-05, "loss": 0.3471, "step": 6470 }, { "epoch": 3.032295810905687, "grad_norm": 0.985780656337738, "learning_rate": 2.3921574065250577e-05, "loss": 0.3626, "step": 6480 }, { "epoch": 3.036976363210859, "grad_norm": 1.1551388502120972, "learning_rate": 2.3917699026163442e-05, "loss": 0.3561, "step": 6490 }, { "epoch": 3.0416569155160307, "grad_norm": 0.8383092880249023, "learning_rate": 2.3913817389366803e-05, "loss": 0.3482, "step": 6500 }, { "epoch": 3.046337467821203, "grad_norm": 0.780727207660675, "learning_rate": 2.3909929157379398e-05, "loss": 0.3593, "step": 6510 }, { "epoch": 3.051018020126375, "grad_norm": 0.9523969888687134, "learning_rate": 2.3906034332724277e-05, "loss": 0.3645, "step": 6520 }, { "epoch": 3.055698572431547, "grad_norm": 1.3010329008102417, "learning_rate": 2.3902132917928734e-05, "loss": 0.3501, "step": 6530 }, { "epoch": 3.060379124736719, "grad_norm": 0.8623541593551636, "learning_rate": 2.389822491552436e-05, "loss": 0.3649, "step": 6540 }, { "epoch": 3.0650596770418908, "grad_norm": 0.8203200101852417, "learning_rate": 2.389431032804702e-05, "loss": 0.3646, "step": 6550 }, { "epoch": 3.069740229347063, "grad_norm": 0.7927433252334595, "learning_rate": 2.389038915803684e-05, "loss": 0.3429, "step": 6560 }, { "epoch": 3.074420781652235, "grad_norm": 0.9052014946937561, "learning_rate": 2.388646140803823e-05, "loss": 0.3408, "step": 6570 }, { "epoch": 3.079101333957407, "grad_norm": 1.1179463863372803, "learning_rate": 2.388252708059986e-05, "loss": 0.3677, "step": 6580 }, { "epoch": 3.083781886262579, "grad_norm": 0.9343701004981995, "learning_rate": 2.387858617827468e-05, "loss": 0.3581, "step": 6590 }, { "epoch": 3.088462438567751, "grad_norm": 1.072296142578125, "learning_rate": 2.3874638703619885e-05, "loss": 0.3524, "step": 6600 }, { "epoch": 3.093142990872923, "grad_norm": 0.923150897026062, "learning_rate": 2.3870684659196965e-05, "loss": 0.3558, "step": 6610 }, { "epoch": 3.097823543178095, "grad_norm": 0.7665930390357971, "learning_rate": 2.3866724047571646e-05, "loss": 0.3513, "step": 6620 }, { "epoch": 3.102504095483267, "grad_norm": 1.2578281164169312, "learning_rate": 2.3862756871313924e-05, "loss": 0.3581, "step": 6630 }, { "epoch": 3.107184647788439, "grad_norm": 0.6566106081008911, "learning_rate": 2.3858783132998064e-05, "loss": 0.3378, "step": 6640 }, { "epoch": 3.111865200093611, "grad_norm": 0.7213088274002075, "learning_rate": 2.385480283520258e-05, "loss": 0.3511, "step": 6650 }, { "epoch": 3.116545752398783, "grad_norm": 0.7017402052879333, "learning_rate": 2.3850815980510242e-05, "loss": 0.3548, "step": 6660 }, { "epoch": 3.121226304703955, "grad_norm": 1.070061206817627, "learning_rate": 2.3846822571508075e-05, "loss": 0.3557, "step": 6670 }, { "epoch": 3.1259068570091273, "grad_norm": 0.5982992053031921, "learning_rate": 2.384282261078736e-05, "loss": 0.351, "step": 6680 }, { "epoch": 3.130587409314299, "grad_norm": 0.7595919370651245, "learning_rate": 2.383881610094363e-05, "loss": 0.3445, "step": 6690 }, { "epoch": 3.135267961619471, "grad_norm": 1.0000642538070679, "learning_rate": 2.3834803044576666e-05, "loss": 0.3678, "step": 6700 }, { "epoch": 3.1399485139246432, "grad_norm": 0.6610831022262573, "learning_rate": 2.3830783444290498e-05, "loss": 0.3488, "step": 6710 }, { "epoch": 3.144629066229815, "grad_norm": 0.7902114987373352, "learning_rate": 2.3826757302693396e-05, "loss": 0.3544, "step": 6720 }, { "epoch": 3.149309618534987, "grad_norm": 0.7744640707969666, "learning_rate": 2.3822724622397882e-05, "loss": 0.3417, "step": 6730 }, { "epoch": 3.153990170840159, "grad_norm": 0.6750772595405579, "learning_rate": 2.381868540602072e-05, "loss": 0.3491, "step": 6740 }, { "epoch": 3.158670723145331, "grad_norm": 0.7904402017593384, "learning_rate": 2.3814639656182906e-05, "loss": 0.3519, "step": 6750 }, { "epoch": 3.1633512754505033, "grad_norm": 0.9930589199066162, "learning_rate": 2.3810587375509695e-05, "loss": 0.3455, "step": 6760 }, { "epoch": 3.168031827755675, "grad_norm": 0.6187907457351685, "learning_rate": 2.380652856663056e-05, "loss": 0.3535, "step": 6770 }, { "epoch": 3.172712380060847, "grad_norm": 0.9628554582595825, "learning_rate": 2.3802463232179222e-05, "loss": 0.384, "step": 6780 }, { "epoch": 3.1773929323660193, "grad_norm": 0.7260007262229919, "learning_rate": 2.3798391374793632e-05, "loss": 0.3364, "step": 6790 }, { "epoch": 3.182073484671191, "grad_norm": 1.0409977436065674, "learning_rate": 2.3794312997115974e-05, "loss": 0.3456, "step": 6800 }, { "epoch": 3.1867540369763634, "grad_norm": 0.7438042759895325, "learning_rate": 2.3790228101792657e-05, "loss": 0.3437, "step": 6810 }, { "epoch": 3.1914345892815352, "grad_norm": 1.5852065086364746, "learning_rate": 2.378613669147433e-05, "loss": 0.3578, "step": 6820 }, { "epoch": 3.196115141586707, "grad_norm": 0.9437373280525208, "learning_rate": 2.3782038768815866e-05, "loss": 0.3509, "step": 6830 }, { "epoch": 3.2007956938918793, "grad_norm": 0.6904426217079163, "learning_rate": 2.3777934336476362e-05, "loss": 0.3652, "step": 6840 }, { "epoch": 3.205476246197051, "grad_norm": 0.9807417988777161, "learning_rate": 2.3773823397119138e-05, "loss": 0.3526, "step": 6850 }, { "epoch": 3.2101567985022235, "grad_norm": 1.4547901153564453, "learning_rate": 2.376970595341174e-05, "loss": 0.3723, "step": 6860 }, { "epoch": 3.2148373508073953, "grad_norm": 0.7976546883583069, "learning_rate": 2.3765582008025934e-05, "loss": 0.3776, "step": 6870 }, { "epoch": 3.219517903112567, "grad_norm": 1.0308752059936523, "learning_rate": 2.3761451563637697e-05, "loss": 0.3622, "step": 6880 }, { "epoch": 3.2241984554177394, "grad_norm": 0.8784774541854858, "learning_rate": 2.3757314622927225e-05, "loss": 0.3672, "step": 6890 }, { "epoch": 3.2288790077229113, "grad_norm": 0.6223140358924866, "learning_rate": 2.3753171188578955e-05, "loss": 0.3424, "step": 6900 }, { "epoch": 3.233559560028083, "grad_norm": 0.759529709815979, "learning_rate": 2.37490212632815e-05, "loss": 0.347, "step": 6910 }, { "epoch": 3.2382401123332554, "grad_norm": 0.8837883472442627, "learning_rate": 2.3744864849727703e-05, "loss": 0.3432, "step": 6920 }, { "epoch": 3.242920664638427, "grad_norm": 0.7717587947845459, "learning_rate": 2.3740701950614615e-05, "loss": 0.3491, "step": 6930 }, { "epoch": 3.2476012169435995, "grad_norm": 0.7546653747558594, "learning_rate": 2.3736532568643504e-05, "loss": 0.3668, "step": 6940 }, { "epoch": 3.2522817692487713, "grad_norm": 0.8277179002761841, "learning_rate": 2.373235670651983e-05, "loss": 0.3582, "step": 6950 }, { "epoch": 3.256962321553943, "grad_norm": 0.8066296577453613, "learning_rate": 2.3728174366953267e-05, "loss": 0.3401, "step": 6960 }, { "epoch": 3.2616428738591154, "grad_norm": 0.6292073130607605, "learning_rate": 2.3723985552657686e-05, "loss": 0.3636, "step": 6970 }, { "epoch": 3.2663234261642873, "grad_norm": 1.0031654834747314, "learning_rate": 2.3719790266351166e-05, "loss": 0.3408, "step": 6980 }, { "epoch": 3.2710039784694596, "grad_norm": 1.168251872062683, "learning_rate": 2.3715588510755986e-05, "loss": 0.3555, "step": 6990 }, { "epoch": 3.2756845307746314, "grad_norm": 0.6438710689544678, "learning_rate": 2.3711380288598613e-05, "loss": 0.3583, "step": 7000 }, { "epoch": 3.2803650830798032, "grad_norm": 1.0191352367401123, "learning_rate": 2.3707165602609718e-05, "loss": 0.3574, "step": 7010 }, { "epoch": 3.2850456353849755, "grad_norm": 0.9012807011604309, "learning_rate": 2.370294445552417e-05, "loss": 0.3643, "step": 7020 }, { "epoch": 3.2897261876901474, "grad_norm": 0.8039408326148987, "learning_rate": 2.3698716850081023e-05, "loss": 0.3453, "step": 7030 }, { "epoch": 3.2944067399953196, "grad_norm": 0.8539829850196838, "learning_rate": 2.3694482789023522e-05, "loss": 0.3442, "step": 7040 }, { "epoch": 3.2990872923004915, "grad_norm": 0.8931768536567688, "learning_rate": 2.3690242275099107e-05, "loss": 0.3349, "step": 7050 }, { "epoch": 3.3037678446056633, "grad_norm": 1.273759365081787, "learning_rate": 2.36859953110594e-05, "loss": 0.3524, "step": 7060 }, { "epoch": 3.3084483969108356, "grad_norm": 0.6800476908683777, "learning_rate": 2.3681741899660205e-05, "loss": 0.3721, "step": 7070 }, { "epoch": 3.3131289492160074, "grad_norm": 1.2932230234146118, "learning_rate": 2.3677482043661525e-05, "loss": 0.3499, "step": 7080 }, { "epoch": 3.3178095015211797, "grad_norm": 1.2586382627487183, "learning_rate": 2.367321574582752e-05, "loss": 0.3635, "step": 7090 }, { "epoch": 3.3224900538263515, "grad_norm": 0.6715126037597656, "learning_rate": 2.366894300892656e-05, "loss": 0.3376, "step": 7100 }, { "epoch": 3.3271706061315234, "grad_norm": 0.7664366364479065, "learning_rate": 2.3664663835731162e-05, "loss": 0.3489, "step": 7110 }, { "epoch": 3.3318511584366957, "grad_norm": 1.6183384656906128, "learning_rate": 2.366037822901805e-05, "loss": 0.3469, "step": 7120 }, { "epoch": 3.3365317107418675, "grad_norm": 0.7429299354553223, "learning_rate": 2.3656086191568096e-05, "loss": 0.3351, "step": 7130 }, { "epoch": 3.3412122630470398, "grad_norm": 0.7262730002403259, "learning_rate": 2.3651787726166364e-05, "loss": 0.3369, "step": 7140 }, { "epoch": 3.3458928153522116, "grad_norm": 0.6610113978385925, "learning_rate": 2.3647482835602076e-05, "loss": 0.3551, "step": 7150 }, { "epoch": 3.3505733676573835, "grad_norm": 0.8181681632995605, "learning_rate": 2.3643171522668634e-05, "loss": 0.3452, "step": 7160 }, { "epoch": 3.3552539199625557, "grad_norm": 0.7465345859527588, "learning_rate": 2.3638853790163602e-05, "loss": 0.3768, "step": 7170 }, { "epoch": 3.3599344722677276, "grad_norm": 0.8595597743988037, "learning_rate": 2.3634529640888704e-05, "loss": 0.3366, "step": 7180 }, { "epoch": 3.3646150245729, "grad_norm": 0.897952139377594, "learning_rate": 2.3630199077649835e-05, "loss": 0.3515, "step": 7190 }, { "epoch": 3.3692955768780717, "grad_norm": 0.7369933128356934, "learning_rate": 2.3625862103257053e-05, "loss": 0.3545, "step": 7200 }, { "epoch": 3.3739761291832435, "grad_norm": 0.6390635371208191, "learning_rate": 2.3621518720524575e-05, "loss": 0.3461, "step": 7210 }, { "epoch": 3.378656681488416, "grad_norm": 1.1387066841125488, "learning_rate": 2.361716893227077e-05, "loss": 0.3428, "step": 7220 }, { "epoch": 3.3833372337935876, "grad_norm": 1.1786819696426392, "learning_rate": 2.361281274131817e-05, "loss": 0.3443, "step": 7230 }, { "epoch": 3.3880177860987595, "grad_norm": 1.048596978187561, "learning_rate": 2.3608450150493468e-05, "loss": 0.3521, "step": 7240 }, { "epoch": 3.3926983384039318, "grad_norm": 0.8547099232673645, "learning_rate": 2.3604081162627488e-05, "loss": 0.3467, "step": 7250 }, { "epoch": 3.3973788907091036, "grad_norm": 0.7871001958847046, "learning_rate": 2.3599705780555227e-05, "loss": 0.3462, "step": 7260 }, { "epoch": 3.402059443014276, "grad_norm": 0.9244853258132935, "learning_rate": 2.3595324007115825e-05, "loss": 0.348, "step": 7270 }, { "epoch": 3.4067399953194477, "grad_norm": 0.90443354845047, "learning_rate": 2.3590935845152565e-05, "loss": 0.356, "step": 7280 }, { "epoch": 3.4114205476246195, "grad_norm": 0.585089385509491, "learning_rate": 2.3586541297512873e-05, "loss": 0.3465, "step": 7290 }, { "epoch": 3.416101099929792, "grad_norm": 0.9762527942657471, "learning_rate": 2.3582140367048326e-05, "loss": 0.3546, "step": 7300 }, { "epoch": 3.4207816522349637, "grad_norm": 0.5876173973083496, "learning_rate": 2.3577733056614645e-05, "loss": 0.357, "step": 7310 }, { "epoch": 3.4254622045401355, "grad_norm": 0.827947735786438, "learning_rate": 2.3573319369071678e-05, "loss": 0.3389, "step": 7320 }, { "epoch": 3.430142756845308, "grad_norm": 1.186875581741333, "learning_rate": 2.3568899307283422e-05, "loss": 0.3601, "step": 7330 }, { "epoch": 3.4348233091504796, "grad_norm": 0.9063706398010254, "learning_rate": 2.3564472874118013e-05, "loss": 0.3416, "step": 7340 }, { "epoch": 3.439503861455652, "grad_norm": 0.7569247484207153, "learning_rate": 2.3560040072447705e-05, "loss": 0.3561, "step": 7350 }, { "epoch": 3.4441844137608237, "grad_norm": 1.1505720615386963, "learning_rate": 2.3555600905148906e-05, "loss": 0.346, "step": 7360 }, { "epoch": 3.4488649660659956, "grad_norm": 2.01847767829895, "learning_rate": 2.3551155375102133e-05, "loss": 0.3559, "step": 7370 }, { "epoch": 3.453545518371168, "grad_norm": 0.948765754699707, "learning_rate": 2.3546703485192047e-05, "loss": 0.3489, "step": 7380 }, { "epoch": 3.4582260706763397, "grad_norm": 0.7339697480201721, "learning_rate": 2.354224523830743e-05, "loss": 0.3525, "step": 7390 }, { "epoch": 3.462906622981512, "grad_norm": 0.5677586793899536, "learning_rate": 2.3537780637341192e-05, "loss": 0.3375, "step": 7400 }, { "epoch": 3.467587175286684, "grad_norm": 0.9445962905883789, "learning_rate": 2.3533309685190364e-05, "loss": 0.3465, "step": 7410 }, { "epoch": 3.4722677275918556, "grad_norm": 0.9939059019088745, "learning_rate": 2.35288323847561e-05, "loss": 0.3424, "step": 7420 }, { "epoch": 3.476948279897028, "grad_norm": 0.9694239497184753, "learning_rate": 2.3524348738943658e-05, "loss": 0.3367, "step": 7430 }, { "epoch": 3.4816288322021998, "grad_norm": 0.8759608268737793, "learning_rate": 2.3519858750662448e-05, "loss": 0.3481, "step": 7440 }, { "epoch": 3.486309384507372, "grad_norm": 1.124565601348877, "learning_rate": 2.351536242282596e-05, "loss": 0.354, "step": 7450 }, { "epoch": 3.490989936812544, "grad_norm": 0.5828760862350464, "learning_rate": 2.3510859758351816e-05, "loss": 0.3554, "step": 7460 }, { "epoch": 3.4956704891177157, "grad_norm": 0.8962104916572571, "learning_rate": 2.3506350760161752e-05, "loss": 0.3412, "step": 7470 }, { "epoch": 3.500351041422888, "grad_norm": 0.9500762820243835, "learning_rate": 2.3501835431181595e-05, "loss": 0.3511, "step": 7480 }, { "epoch": 3.50503159372806, "grad_norm": 0.7874006628990173, "learning_rate": 2.3497313774341307e-05, "loss": 0.3395, "step": 7490 }, { "epoch": 3.509712146033232, "grad_norm": 0.6060056090354919, "learning_rate": 2.3492785792574938e-05, "loss": 0.3532, "step": 7500 }, { "epoch": 3.514392698338404, "grad_norm": 0.6492898464202881, "learning_rate": 2.348825148882064e-05, "loss": 0.3509, "step": 7510 }, { "epoch": 3.519073250643576, "grad_norm": 0.9737768173217773, "learning_rate": 2.348371086602068e-05, "loss": 0.3327, "step": 7520 }, { "epoch": 3.523753802948748, "grad_norm": 1.026826024055481, "learning_rate": 2.3479163927121413e-05, "loss": 0.3527, "step": 7530 }, { "epoch": 3.52843435525392, "grad_norm": 0.7904170751571655, "learning_rate": 2.34746106750733e-05, "loss": 0.323, "step": 7540 }, { "epoch": 3.533114907559092, "grad_norm": 1.4108541011810303, "learning_rate": 2.34700511128309e-05, "loss": 0.3496, "step": 7550 }, { "epoch": 3.537795459864264, "grad_norm": 0.6315344572067261, "learning_rate": 2.3465485243352867e-05, "loss": 0.3354, "step": 7560 }, { "epoch": 3.542476012169436, "grad_norm": 1.0450727939605713, "learning_rate": 2.3460913069601937e-05, "loss": 0.3433, "step": 7570 }, { "epoch": 3.547156564474608, "grad_norm": 0.8338617086410522, "learning_rate": 2.3456334594544942e-05, "loss": 0.3407, "step": 7580 }, { "epoch": 3.55183711677978, "grad_norm": 1.2579275369644165, "learning_rate": 2.3451749821152813e-05, "loss": 0.3578, "step": 7590 }, { "epoch": 3.5565176690849523, "grad_norm": 1.0316251516342163, "learning_rate": 2.3447158752400556e-05, "loss": 0.3506, "step": 7600 }, { "epoch": 3.561198221390124, "grad_norm": 1.193884015083313, "learning_rate": 2.3442561391267264e-05, "loss": 0.3388, "step": 7610 }, { "epoch": 3.565878773695296, "grad_norm": 2.0334949493408203, "learning_rate": 2.343795774073611e-05, "loss": 0.3533, "step": 7620 }, { "epoch": 3.570559326000468, "grad_norm": 0.9891320466995239, "learning_rate": 2.3433347803794368e-05, "loss": 0.3481, "step": 7630 }, { "epoch": 3.57523987830564, "grad_norm": 0.8513662219047546, "learning_rate": 2.3428731583433363e-05, "loss": 0.3427, "step": 7640 }, { "epoch": 3.5799204306108123, "grad_norm": 0.6587763428688049, "learning_rate": 2.3424109082648514e-05, "loss": 0.3633, "step": 7650 }, { "epoch": 3.584600982915984, "grad_norm": 0.9413192868232727, "learning_rate": 2.3419480304439308e-05, "loss": 0.3429, "step": 7660 }, { "epoch": 3.589281535221156, "grad_norm": 0.6457734107971191, "learning_rate": 2.3414845251809317e-05, "loss": 0.3395, "step": 7670 }, { "epoch": 3.593962087526328, "grad_norm": 0.9778665900230408, "learning_rate": 2.3410203927766168e-05, "loss": 0.3261, "step": 7680 }, { "epoch": 3.5986426398315, "grad_norm": 0.7454681396484375, "learning_rate": 2.3405556335321575e-05, "loss": 0.3471, "step": 7690 }, { "epoch": 3.6033231921366724, "grad_norm": 0.6248010993003845, "learning_rate": 2.3400902477491297e-05, "loss": 0.3442, "step": 7700 }, { "epoch": 3.6080037444418442, "grad_norm": 0.5518543720245361, "learning_rate": 2.3396242357295187e-05, "loss": 0.3434, "step": 7710 }, { "epoch": 3.612684296747016, "grad_norm": 0.6907958984375, "learning_rate": 2.3391575977757138e-05, "loss": 0.3304, "step": 7720 }, { "epoch": 3.617364849052188, "grad_norm": 0.8528406620025635, "learning_rate": 2.3386903341905113e-05, "loss": 0.3569, "step": 7730 }, { "epoch": 3.62204540135736, "grad_norm": 0.6039168238639832, "learning_rate": 2.3382224452771135e-05, "loss": 0.3337, "step": 7740 }, { "epoch": 3.626725953662532, "grad_norm": 0.839394211769104, "learning_rate": 2.3377539313391285e-05, "loss": 0.3605, "step": 7750 }, { "epoch": 3.6314065059677043, "grad_norm": 1.2250992059707642, "learning_rate": 2.3372847926805703e-05, "loss": 0.3441, "step": 7760 }, { "epoch": 3.636087058272876, "grad_norm": 0.851437509059906, "learning_rate": 2.336815029605857e-05, "loss": 0.3494, "step": 7770 }, { "epoch": 3.640767610578048, "grad_norm": 0.7641233801841736, "learning_rate": 2.3363446424198135e-05, "loss": 0.3506, "step": 7780 }, { "epoch": 3.6454481628832203, "grad_norm": 0.5906285047531128, "learning_rate": 2.3358736314276687e-05, "loss": 0.3534, "step": 7790 }, { "epoch": 3.650128715188392, "grad_norm": 0.9253901839256287, "learning_rate": 2.3354019969350565e-05, "loss": 0.3517, "step": 7800 }, { "epoch": 3.6548092674935644, "grad_norm": 1.521715521812439, "learning_rate": 2.3349297392480155e-05, "loss": 0.3389, "step": 7810 }, { "epoch": 3.659489819798736, "grad_norm": 1.0693355798721313, "learning_rate": 2.3344568586729883e-05, "loss": 0.3555, "step": 7820 }, { "epoch": 3.664170372103908, "grad_norm": 0.686738908290863, "learning_rate": 2.3339833555168223e-05, "loss": 0.333, "step": 7830 }, { "epoch": 3.6688509244090803, "grad_norm": 0.7157394886016846, "learning_rate": 2.3335092300867683e-05, "loss": 0.3449, "step": 7840 }, { "epoch": 3.673531476714252, "grad_norm": 1.0483309030532837, "learning_rate": 2.3330344826904818e-05, "loss": 0.3535, "step": 7850 }, { "epoch": 3.6782120290194245, "grad_norm": 0.9577176570892334, "learning_rate": 2.33255911363602e-05, "loss": 0.3507, "step": 7860 }, { "epoch": 3.6828925813245963, "grad_norm": 0.7466033101081848, "learning_rate": 2.3320831232318457e-05, "loss": 0.3447, "step": 7870 }, { "epoch": 3.687573133629768, "grad_norm": 0.9283835291862488, "learning_rate": 2.331606511786824e-05, "loss": 0.3468, "step": 7880 }, { "epoch": 3.6922536859349404, "grad_norm": 0.7625275254249573, "learning_rate": 2.331129279610222e-05, "loss": 0.3387, "step": 7890 }, { "epoch": 3.6969342382401122, "grad_norm": 1.094892978668213, "learning_rate": 2.3306514270117112e-05, "loss": 0.3381, "step": 7900 }, { "epoch": 3.7016147905452845, "grad_norm": 0.9514840841293335, "learning_rate": 2.330172954301365e-05, "loss": 0.3442, "step": 7910 }, { "epoch": 3.7062953428504564, "grad_norm": 0.6921585202217102, "learning_rate": 2.3296938617896584e-05, "loss": 0.3353, "step": 7920 }, { "epoch": 3.710975895155628, "grad_norm": 1.6093900203704834, "learning_rate": 2.3292141497874707e-05, "loss": 0.3283, "step": 7930 }, { "epoch": 3.7156564474608005, "grad_norm": 1.0154404640197754, "learning_rate": 2.32873381860608e-05, "loss": 0.3291, "step": 7940 }, { "epoch": 3.7203369997659723, "grad_norm": 1.2234174013137817, "learning_rate": 2.3282528685571696e-05, "loss": 0.3315, "step": 7950 }, { "epoch": 3.7250175520711446, "grad_norm": 0.71647709608078, "learning_rate": 2.3277712999528218e-05, "loss": 0.3247, "step": 7960 }, { "epoch": 3.7296981043763164, "grad_norm": 0.5740870237350464, "learning_rate": 2.3272891131055215e-05, "loss": 0.338, "step": 7970 }, { "epoch": 3.7343786566814883, "grad_norm": 0.6990614533424377, "learning_rate": 2.326806308328155e-05, "loss": 0.3379, "step": 7980 }, { "epoch": 3.7390592089866606, "grad_norm": 0.9485940933227539, "learning_rate": 2.326322885934008e-05, "loss": 0.3471, "step": 7990 }, { "epoch": 3.7437397612918324, "grad_norm": 0.7517099976539612, "learning_rate": 2.325838846236769e-05, "loss": 0.3361, "step": 8000 }, { "epoch": 3.7484203135970047, "grad_norm": 0.7130835652351379, "learning_rate": 2.325354189550526e-05, "loss": 0.3283, "step": 8010 }, { "epoch": 3.7531008659021765, "grad_norm": 0.8035351634025574, "learning_rate": 2.3248689161897673e-05, "loss": 0.3347, "step": 8020 }, { "epoch": 3.7577814182073483, "grad_norm": 0.6642847061157227, "learning_rate": 2.324383026469381e-05, "loss": 0.3477, "step": 8030 }, { "epoch": 3.7624619705125206, "grad_norm": 0.7582077980041504, "learning_rate": 2.323896520704656e-05, "loss": 0.3447, "step": 8040 }, { "epoch": 3.7671425228176925, "grad_norm": 0.5692678093910217, "learning_rate": 2.3234093992112814e-05, "loss": 0.3259, "step": 8050 }, { "epoch": 3.7718230751228647, "grad_norm": 1.104515552520752, "learning_rate": 2.322921662305344e-05, "loss": 0.3237, "step": 8060 }, { "epoch": 3.7765036274280366, "grad_norm": 0.624703586101532, "learning_rate": 2.3224333103033316e-05, "loss": 0.3363, "step": 8070 }, { "epoch": 3.7811841797332084, "grad_norm": 0.6520636677742004, "learning_rate": 2.32194434352213e-05, "loss": 0.3334, "step": 8080 }, { "epoch": 3.7858647320383803, "grad_norm": 0.9964982271194458, "learning_rate": 2.321454762279025e-05, "loss": 0.3329, "step": 8090 }, { "epoch": 3.7905452843435525, "grad_norm": 0.8680127859115601, "learning_rate": 2.3209645668917e-05, "loss": 0.3258, "step": 8100 }, { "epoch": 3.795225836648725, "grad_norm": 0.8002684116363525, "learning_rate": 2.320473757678238e-05, "loss": 0.3453, "step": 8110 }, { "epoch": 3.7999063889538967, "grad_norm": 0.7807113528251648, "learning_rate": 2.3199823349571193e-05, "loss": 0.3515, "step": 8120 }, { "epoch": 3.8045869412590685, "grad_norm": 0.6325606107711792, "learning_rate": 2.3194902990472226e-05, "loss": 0.3357, "step": 8130 }, { "epoch": 3.8092674935642403, "grad_norm": 0.5856669545173645, "learning_rate": 2.3189976502678256e-05, "loss": 0.3305, "step": 8140 }, { "epoch": 3.8139480458694126, "grad_norm": 0.6338105797767639, "learning_rate": 2.318504388938602e-05, "loss": 0.334, "step": 8150 }, { "epoch": 3.8186285981745844, "grad_norm": 0.6572860479354858, "learning_rate": 2.318010515379624e-05, "loss": 0.3415, "step": 8160 }, { "epoch": 3.8233091504797567, "grad_norm": 0.859535813331604, "learning_rate": 2.3175160299113604e-05, "loss": 0.3375, "step": 8170 }, { "epoch": 3.8279897027849286, "grad_norm": 0.875016450881958, "learning_rate": 2.3170209328546786e-05, "loss": 0.3354, "step": 8180 }, { "epoch": 3.8326702550901004, "grad_norm": 0.6580078601837158, "learning_rate": 2.3165252245308397e-05, "loss": 0.3448, "step": 8190 }, { "epoch": 3.8373508073952727, "grad_norm": 0.6825771927833557, "learning_rate": 2.316028905261505e-05, "loss": 0.3127, "step": 8200 }, { "epoch": 3.8420313597004445, "grad_norm": 0.7895457744598389, "learning_rate": 2.3155319753687308e-05, "loss": 0.3412, "step": 8210 }, { "epoch": 3.846711912005617, "grad_norm": 1.0796229839324951, "learning_rate": 2.3150344351749684e-05, "loss": 0.3446, "step": 8220 }, { "epoch": 3.8513924643107886, "grad_norm": 1.012742280960083, "learning_rate": 2.314536285003067e-05, "loss": 0.3368, "step": 8230 }, { "epoch": 3.8560730166159605, "grad_norm": 0.5472418069839478, "learning_rate": 2.3140375251762706e-05, "loss": 0.3155, "step": 8240 }, { "epoch": 3.8607535689211328, "grad_norm": 0.8044186234474182, "learning_rate": 2.313538156018219e-05, "loss": 0.3322, "step": 8250 }, { "epoch": 3.8654341212263046, "grad_norm": 0.6203462481498718, "learning_rate": 2.313038177852948e-05, "loss": 0.3251, "step": 8260 }, { "epoch": 3.870114673531477, "grad_norm": 1.0071461200714111, "learning_rate": 2.312537591004887e-05, "loss": 0.3275, "step": 8270 }, { "epoch": 3.8747952258366487, "grad_norm": 0.7887601852416992, "learning_rate": 2.312036395798862e-05, "loss": 0.3423, "step": 8280 }, { "epoch": 3.8794757781418205, "grad_norm": 0.81620854139328, "learning_rate": 2.311534592560093e-05, "loss": 0.3211, "step": 8290 }, { "epoch": 3.884156330446993, "grad_norm": 0.9154536724090576, "learning_rate": 2.311032181614195e-05, "loss": 0.3442, "step": 8300 }, { "epoch": 3.8888368827521647, "grad_norm": 0.6854469776153564, "learning_rate": 2.3105291632871765e-05, "loss": 0.318, "step": 8310 }, { "epoch": 3.893517435057337, "grad_norm": 0.8955985903739929, "learning_rate": 2.310025537905441e-05, "loss": 0.3451, "step": 8320 }, { "epoch": 3.8981979873625088, "grad_norm": 0.8162173628807068, "learning_rate": 2.309521305795785e-05, "loss": 0.3193, "step": 8330 }, { "epoch": 3.9028785396676806, "grad_norm": 0.6988288164138794, "learning_rate": 2.3090164672853997e-05, "loss": 0.32, "step": 8340 }, { "epoch": 3.907559091972853, "grad_norm": 0.6463280320167542, "learning_rate": 2.3085110227018695e-05, "loss": 0.3181, "step": 8350 }, { "epoch": 3.9122396442780247, "grad_norm": 1.2228721380233765, "learning_rate": 2.3080049723731713e-05, "loss": 0.3332, "step": 8360 }, { "epoch": 3.916920196583197, "grad_norm": 0.7444502711296082, "learning_rate": 2.3074983166276767e-05, "loss": 0.3342, "step": 8370 }, { "epoch": 3.921600748888369, "grad_norm": 1.1337993144989014, "learning_rate": 2.3069910557941478e-05, "loss": 0.3265, "step": 8380 }, { "epoch": 3.9262813011935407, "grad_norm": 0.8006963729858398, "learning_rate": 2.3064831902017418e-05, "loss": 0.3337, "step": 8390 }, { "epoch": 3.930961853498713, "grad_norm": 0.5106991529464722, "learning_rate": 2.3059747201800064e-05, "loss": 0.3265, "step": 8400 }, { "epoch": 3.935642405803885, "grad_norm": 0.5900511741638184, "learning_rate": 2.305465646058882e-05, "loss": 0.3237, "step": 8410 }, { "epoch": 3.940322958109057, "grad_norm": 0.6634641289710999, "learning_rate": 2.3049559681687025e-05, "loss": 0.3299, "step": 8420 }, { "epoch": 3.945003510414229, "grad_norm": 0.5794097781181335, "learning_rate": 2.304445686840191e-05, "loss": 0.3232, "step": 8430 }, { "epoch": 3.9496840627194008, "grad_norm": 0.8140740990638733, "learning_rate": 2.3039348024044646e-05, "loss": 0.3272, "step": 8440 }, { "epoch": 3.954364615024573, "grad_norm": 0.5921916961669922, "learning_rate": 2.3034233151930305e-05, "loss": 0.3203, "step": 8450 }, { "epoch": 3.959045167329745, "grad_norm": 0.49779680371284485, "learning_rate": 2.3029112255377864e-05, "loss": 0.3487, "step": 8460 }, { "epoch": 3.963725719634917, "grad_norm": 1.0255091190338135, "learning_rate": 2.3023985337710224e-05, "loss": 0.3259, "step": 8470 }, { "epoch": 3.968406271940089, "grad_norm": 0.6951221823692322, "learning_rate": 2.3018852402254184e-05, "loss": 0.3364, "step": 8480 }, { "epoch": 3.973086824245261, "grad_norm": 0.9265257120132446, "learning_rate": 2.301371345234045e-05, "loss": 0.3363, "step": 8490 }, { "epoch": 3.977767376550433, "grad_norm": 0.5224654674530029, "learning_rate": 2.3008568491303634e-05, "loss": 0.331, "step": 8500 }, { "epoch": 3.982447928855605, "grad_norm": 0.648169994354248, "learning_rate": 2.3003417522482242e-05, "loss": 0.3114, "step": 8510 }, { "epoch": 3.9871284811607772, "grad_norm": 0.7846493721008301, "learning_rate": 2.2998260549218684e-05, "loss": 0.337, "step": 8520 }, { "epoch": 3.991809033465949, "grad_norm": 0.7310739159584045, "learning_rate": 2.299309757485926e-05, "loss": 0.3298, "step": 8530 }, { "epoch": 3.996489585771121, "grad_norm": 0.8681071996688843, "learning_rate": 2.2987928602754172e-05, "loss": 0.341, "step": 8540 }, { "epoch": 4.000936110461034, "grad_norm": 0.566116988658905, "learning_rate": 2.2982753636257507e-05, "loss": 0.3105, "step": 8550 }, { "epoch": 4.005616662766206, "grad_norm": 0.5598528385162354, "learning_rate": 2.2977572678727245e-05, "loss": 0.3211, "step": 8560 }, { "epoch": 4.010297215071378, "grad_norm": 1.4071004390716553, "learning_rate": 2.297238573352526e-05, "loss": 0.3107, "step": 8570 }, { "epoch": 4.014977767376551, "grad_norm": 0.8272298574447632, "learning_rate": 2.2967192804017295e-05, "loss": 0.3062, "step": 8580 }, { "epoch": 4.019658319681723, "grad_norm": 0.626724898815155, "learning_rate": 2.2961993893572985e-05, "loss": 0.3147, "step": 8590 }, { "epoch": 4.024338871986894, "grad_norm": 0.5509130358695984, "learning_rate": 2.2956789005565854e-05, "loss": 0.3203, "step": 8600 }, { "epoch": 4.029019424292066, "grad_norm": 1.1165103912353516, "learning_rate": 2.2951578143373295e-05, "loss": 0.312, "step": 8610 }, { "epoch": 4.033699976597238, "grad_norm": 0.5772186517715454, "learning_rate": 2.2946361310376576e-05, "loss": 0.308, "step": 8620 }, { "epoch": 4.038380528902411, "grad_norm": 0.6857577562332153, "learning_rate": 2.2941138509960848e-05, "loss": 0.3237, "step": 8630 }, { "epoch": 4.043061081207583, "grad_norm": 1.2756868600845337, "learning_rate": 2.293590974551513e-05, "loss": 0.3161, "step": 8640 }, { "epoch": 4.0477416335127545, "grad_norm": 0.6877562403678894, "learning_rate": 2.2930675020432304e-05, "loss": 0.3103, "step": 8650 }, { "epoch": 4.052422185817926, "grad_norm": 0.7694326639175415, "learning_rate": 2.2925434338109137e-05, "loss": 0.3052, "step": 8660 }, { "epoch": 4.057102738123098, "grad_norm": 0.7773103713989258, "learning_rate": 2.2920187701946235e-05, "loss": 0.3297, "step": 8670 }, { "epoch": 4.061783290428271, "grad_norm": 1.0500596761703491, "learning_rate": 2.29149351153481e-05, "loss": 0.3173, "step": 8680 }, { "epoch": 4.066463842733443, "grad_norm": 0.6070728898048401, "learning_rate": 2.2909676581723068e-05, "loss": 0.3175, "step": 8690 }, { "epoch": 4.071144395038615, "grad_norm": 0.6457033157348633, "learning_rate": 2.2904412104483347e-05, "loss": 0.3119, "step": 8700 }, { "epoch": 4.075824947343786, "grad_norm": 0.6540968418121338, "learning_rate": 2.2899141687044996e-05, "loss": 0.3127, "step": 8710 }, { "epoch": 4.080505499648958, "grad_norm": 0.5889397859573364, "learning_rate": 2.289386533282794e-05, "loss": 0.3231, "step": 8720 }, { "epoch": 4.085186051954131, "grad_norm": 0.9046448469161987, "learning_rate": 2.2888583045255944e-05, "loss": 0.2999, "step": 8730 }, { "epoch": 4.089866604259303, "grad_norm": 0.5888004302978516, "learning_rate": 2.288329482775662e-05, "loss": 0.328, "step": 8740 }, { "epoch": 4.094547156564475, "grad_norm": 0.6740261912345886, "learning_rate": 2.2878000683761448e-05, "loss": 0.3148, "step": 8750 }, { "epoch": 4.0992277088696465, "grad_norm": 0.8595446348190308, "learning_rate": 2.287270061670573e-05, "loss": 0.3296, "step": 8760 }, { "epoch": 4.103908261174818, "grad_norm": 1.0162955522537231, "learning_rate": 2.2867394630028626e-05, "loss": 0.3008, "step": 8770 }, { "epoch": 4.108588813479991, "grad_norm": 0.7098690271377563, "learning_rate": 2.286208272717313e-05, "loss": 0.3382, "step": 8780 }, { "epoch": 4.113269365785163, "grad_norm": 0.7157993912696838, "learning_rate": 2.2856764911586083e-05, "loss": 0.323, "step": 8790 }, { "epoch": 4.117949918090335, "grad_norm": 0.6038935780525208, "learning_rate": 2.285144118671816e-05, "loss": 0.3171, "step": 8800 }, { "epoch": 4.122630470395507, "grad_norm": 1.1487013101577759, "learning_rate": 2.284611155602386e-05, "loss": 0.3035, "step": 8810 }, { "epoch": 4.127311022700678, "grad_norm": 0.5179697275161743, "learning_rate": 2.2840776022961523e-05, "loss": 0.2978, "step": 8820 }, { "epoch": 4.13199157500585, "grad_norm": 0.6126347780227661, "learning_rate": 2.283543459099333e-05, "loss": 0.3124, "step": 8830 }, { "epoch": 4.136672127311023, "grad_norm": 0.6823699474334717, "learning_rate": 2.2830087263585264e-05, "loss": 0.3152, "step": 8840 }, { "epoch": 4.141352679616195, "grad_norm": 0.6297472715377808, "learning_rate": 2.2824734044207156e-05, "loss": 0.3263, "step": 8850 }, { "epoch": 4.146033231921367, "grad_norm": 0.7417789101600647, "learning_rate": 2.2819374936332645e-05, "loss": 0.3015, "step": 8860 }, { "epoch": 4.1507137842265385, "grad_norm": 0.6071353554725647, "learning_rate": 2.2814009943439212e-05, "loss": 0.3186, "step": 8870 }, { "epoch": 4.15539433653171, "grad_norm": 1.0698319673538208, "learning_rate": 2.2808639069008124e-05, "loss": 0.3097, "step": 8880 }, { "epoch": 4.160074888836883, "grad_norm": 0.7516188621520996, "learning_rate": 2.2803262316524498e-05, "loss": 0.322, "step": 8890 }, { "epoch": 4.164755441142055, "grad_norm": 0.5266650319099426, "learning_rate": 2.279787968947724e-05, "loss": 0.2933, "step": 8900 }, { "epoch": 4.169435993447227, "grad_norm": 0.7159454822540283, "learning_rate": 2.2792491191359086e-05, "loss": 0.3087, "step": 8910 }, { "epoch": 4.1741165457523985, "grad_norm": 0.651553213596344, "learning_rate": 2.278709682566657e-05, "loss": 0.2954, "step": 8920 }, { "epoch": 4.17879709805757, "grad_norm": 0.5937948226928711, "learning_rate": 2.278169659590004e-05, "loss": 0.3175, "step": 8930 }, { "epoch": 4.183477650362743, "grad_norm": 0.9292193651199341, "learning_rate": 2.2776290505563643e-05, "loss": 0.3192, "step": 8940 }, { "epoch": 4.188158202667915, "grad_norm": 1.1865739822387695, "learning_rate": 2.2770878558165336e-05, "loss": 0.3025, "step": 8950 }, { "epoch": 4.192838754973087, "grad_norm": 0.5645105242729187, "learning_rate": 2.2765460757216868e-05, "loss": 0.3147, "step": 8960 }, { "epoch": 4.197519307278259, "grad_norm": 1.4186055660247803, "learning_rate": 2.2760037106233795e-05, "loss": 0.3194, "step": 8970 }, { "epoch": 4.2021998595834305, "grad_norm": 0.653283417224884, "learning_rate": 2.2754607608735464e-05, "loss": 0.3142, "step": 8980 }, { "epoch": 4.206880411888603, "grad_norm": 1.1518044471740723, "learning_rate": 2.2749172268245017e-05, "loss": 0.3081, "step": 8990 }, { "epoch": 4.211560964193775, "grad_norm": 0.6831100583076477, "learning_rate": 2.274373108828939e-05, "loss": 0.3021, "step": 9000 }, { "epoch": 4.216241516498947, "grad_norm": 1.9681510925292969, "learning_rate": 2.2738284072399303e-05, "loss": 0.3013, "step": 9010 }, { "epoch": 4.220922068804119, "grad_norm": 0.9693263173103333, "learning_rate": 2.2732831224109263e-05, "loss": 0.3211, "step": 9020 }, { "epoch": 4.2256026211092905, "grad_norm": 0.5804410576820374, "learning_rate": 2.272737254695757e-05, "loss": 0.3071, "step": 9030 }, { "epoch": 4.230283173414463, "grad_norm": 1.095511555671692, "learning_rate": 2.2721908044486294e-05, "loss": 0.3111, "step": 9040 }, { "epoch": 4.234963725719635, "grad_norm": 0.8043597936630249, "learning_rate": 2.2716437720241296e-05, "loss": 0.3094, "step": 9050 }, { "epoch": 4.239644278024807, "grad_norm": 0.6841867566108704, "learning_rate": 2.271096157777221e-05, "loss": 0.3127, "step": 9060 }, { "epoch": 4.244324830329979, "grad_norm": 0.6575802564620972, "learning_rate": 2.2705479620632446e-05, "loss": 0.3101, "step": 9070 }, { "epoch": 4.249005382635151, "grad_norm": 0.6748194098472595, "learning_rate": 2.269999185237918e-05, "loss": 0.3064, "step": 9080 }, { "epoch": 4.253685934940323, "grad_norm": 0.7197110652923584, "learning_rate": 2.269449827657337e-05, "loss": 0.303, "step": 9090 }, { "epoch": 4.258366487245495, "grad_norm": 0.7041828036308289, "learning_rate": 2.2688998896779738e-05, "loss": 0.3196, "step": 9100 }, { "epoch": 4.263047039550667, "grad_norm": 0.7190040946006775, "learning_rate": 2.268349371656677e-05, "loss": 0.3134, "step": 9110 }, { "epoch": 4.267727591855839, "grad_norm": 0.710882306098938, "learning_rate": 2.2677982739506722e-05, "loss": 0.2953, "step": 9120 }, { "epoch": 4.272408144161011, "grad_norm": 0.7359614372253418, "learning_rate": 2.26724659691756e-05, "loss": 0.3074, "step": 9130 }, { "epoch": 4.277088696466183, "grad_norm": 0.7268602848052979, "learning_rate": 2.2666943409153183e-05, "loss": 0.289, "step": 9140 }, { "epoch": 4.281769248771355, "grad_norm": 0.7893595099449158, "learning_rate": 2.2661415063023e-05, "loss": 0.3046, "step": 9150 }, { "epoch": 4.286449801076527, "grad_norm": 0.9552383422851562, "learning_rate": 2.2655880934372327e-05, "loss": 0.3012, "step": 9160 }, { "epoch": 4.291130353381699, "grad_norm": 1.3556251525878906, "learning_rate": 2.265034102679221e-05, "loss": 0.3042, "step": 9170 }, { "epoch": 4.295810905686871, "grad_norm": 0.8045382499694824, "learning_rate": 2.2644795343877426e-05, "loss": 0.3136, "step": 9180 }, { "epoch": 4.3004914579920435, "grad_norm": 0.7135992050170898, "learning_rate": 2.263924388922652e-05, "loss": 0.3017, "step": 9190 }, { "epoch": 4.305172010297215, "grad_norm": 0.5250877737998962, "learning_rate": 2.2633686666441757e-05, "loss": 0.3111, "step": 9200 }, { "epoch": 4.309852562602387, "grad_norm": 0.5669058561325073, "learning_rate": 2.2628123679129168e-05, "loss": 0.2992, "step": 9210 }, { "epoch": 4.314533114907559, "grad_norm": 2.575505495071411, "learning_rate": 2.2622554930898506e-05, "loss": 0.3104, "step": 9220 }, { "epoch": 4.319213667212731, "grad_norm": 1.4801559448242188, "learning_rate": 2.2616980425363286e-05, "loss": 0.2987, "step": 9230 }, { "epoch": 4.3238942195179035, "grad_norm": 0.7252615094184875, "learning_rate": 2.261140016614073e-05, "loss": 0.3252, "step": 9240 }, { "epoch": 4.328574771823075, "grad_norm": 0.6200851798057556, "learning_rate": 2.2605814156851814e-05, "loss": 0.2992, "step": 9250 }, { "epoch": 4.333255324128247, "grad_norm": 1.0603396892547607, "learning_rate": 2.2600222401121242e-05, "loss": 0.3171, "step": 9260 }, { "epoch": 4.337935876433419, "grad_norm": 0.8196407556533813, "learning_rate": 2.2594624902577435e-05, "loss": 0.3134, "step": 9270 }, { "epoch": 4.342616428738591, "grad_norm": 0.6397809386253357, "learning_rate": 2.2589021664852553e-05, "loss": 0.3056, "step": 9280 }, { "epoch": 4.347296981043764, "grad_norm": 1.1665788888931274, "learning_rate": 2.2583412691582477e-05, "loss": 0.3008, "step": 9290 }, { "epoch": 4.3519775333489354, "grad_norm": 0.73846834897995, "learning_rate": 2.257779798640681e-05, "loss": 0.3147, "step": 9300 }, { "epoch": 4.356658085654107, "grad_norm": 0.7690420150756836, "learning_rate": 2.2572177552968872e-05, "loss": 0.2932, "step": 9310 }, { "epoch": 4.361338637959279, "grad_norm": 1.5453609228134155, "learning_rate": 2.25665513949157e-05, "loss": 0.3268, "step": 9320 }, { "epoch": 4.366019190264451, "grad_norm": 0.6519848704338074, "learning_rate": 2.2560919515898046e-05, "loss": 0.3146, "step": 9330 }, { "epoch": 4.370699742569624, "grad_norm": 1.2217148542404175, "learning_rate": 2.2555281919570378e-05, "loss": 0.3103, "step": 9340 }, { "epoch": 4.3753802948747955, "grad_norm": 0.8212413191795349, "learning_rate": 2.254963860959087e-05, "loss": 0.3164, "step": 9350 }, { "epoch": 4.380060847179967, "grad_norm": 0.773189902305603, "learning_rate": 2.2543989589621406e-05, "loss": 0.3131, "step": 9360 }, { "epoch": 4.384741399485139, "grad_norm": 0.5418788194656372, "learning_rate": 2.253833486332757e-05, "loss": 0.3088, "step": 9370 }, { "epoch": 4.389421951790311, "grad_norm": 0.7187689542770386, "learning_rate": 2.2532674434378656e-05, "loss": 0.307, "step": 9380 }, { "epoch": 4.394102504095483, "grad_norm": 0.7630032896995544, "learning_rate": 2.2527008306447655e-05, "loss": 0.3218, "step": 9390 }, { "epoch": 4.398783056400656, "grad_norm": 0.6271687746047974, "learning_rate": 2.2521336483211252e-05, "loss": 0.2912, "step": 9400 }, { "epoch": 4.403463608705827, "grad_norm": 0.6300660371780396, "learning_rate": 2.2515658968349834e-05, "loss": 0.3095, "step": 9410 }, { "epoch": 4.408144161010999, "grad_norm": 0.668866753578186, "learning_rate": 2.250997576554748e-05, "loss": 0.3161, "step": 9420 }, { "epoch": 4.412824713316171, "grad_norm": 0.7870913743972778, "learning_rate": 2.250428687849196e-05, "loss": 0.3025, "step": 9430 }, { "epoch": 4.417505265621343, "grad_norm": 0.6260764598846436, "learning_rate": 2.249859231087473e-05, "loss": 0.2894, "step": 9440 }, { "epoch": 4.422185817926516, "grad_norm": 0.637990415096283, "learning_rate": 2.2492892066390924e-05, "loss": 0.3127, "step": 9450 }, { "epoch": 4.4268663702316875, "grad_norm": 0.6778397560119629, "learning_rate": 2.248718614873938e-05, "loss": 0.3055, "step": 9460 }, { "epoch": 4.431546922536859, "grad_norm": 0.6881290674209595, "learning_rate": 2.24814745616226e-05, "loss": 0.3104, "step": 9470 }, { "epoch": 4.436227474842031, "grad_norm": 0.8866451978683472, "learning_rate": 2.2475757308746777e-05, "loss": 0.3152, "step": 9480 }, { "epoch": 4.440908027147203, "grad_norm": 0.7085728049278259, "learning_rate": 2.2470034393821768e-05, "loss": 0.3027, "step": 9490 }, { "epoch": 4.445588579452376, "grad_norm": 0.6973881721496582, "learning_rate": 2.2464305820561112e-05, "loss": 0.2975, "step": 9500 }, { "epoch": 4.450269131757548, "grad_norm": 0.6433224678039551, "learning_rate": 2.2458571592682027e-05, "loss": 0.306, "step": 9510 }, { "epoch": 4.454949684062719, "grad_norm": 1.224456548690796, "learning_rate": 2.2452831713905373e-05, "loss": 0.3227, "step": 9520 }, { "epoch": 4.459630236367891, "grad_norm": 0.5910502672195435, "learning_rate": 2.244708618795571e-05, "loss": 0.2938, "step": 9530 }, { "epoch": 4.464310788673063, "grad_norm": 0.9767402410507202, "learning_rate": 2.244133501856124e-05, "loss": 0.3031, "step": 9540 }, { "epoch": 4.468991340978236, "grad_norm": 0.629233181476593, "learning_rate": 2.243557820945384e-05, "loss": 0.3062, "step": 9550 }, { "epoch": 4.473671893283408, "grad_norm": 0.5998203754425049, "learning_rate": 2.2429815764369034e-05, "loss": 0.3036, "step": 9560 }, { "epoch": 4.4783524455885795, "grad_norm": 0.7377626299858093, "learning_rate": 2.2424047687046017e-05, "loss": 0.2971, "step": 9570 }, { "epoch": 4.483032997893751, "grad_norm": 0.8055640459060669, "learning_rate": 2.241827398122763e-05, "loss": 0.305, "step": 9580 }, { "epoch": 4.487713550198923, "grad_norm": 0.6225429773330688, "learning_rate": 2.2412494650660366e-05, "loss": 0.3114, "step": 9590 }, { "epoch": 4.492394102504096, "grad_norm": 0.6319803595542908, "learning_rate": 2.240670969909437e-05, "loss": 0.3205, "step": 9600 }, { "epoch": 4.497074654809268, "grad_norm": 0.7073454856872559, "learning_rate": 2.240091913028343e-05, "loss": 0.3059, "step": 9610 }, { "epoch": 4.5017552071144395, "grad_norm": 0.8540297150611877, "learning_rate": 2.2395122947984996e-05, "loss": 0.3194, "step": 9620 }, { "epoch": 4.506435759419611, "grad_norm": 1.2078957557678223, "learning_rate": 2.2389321155960135e-05, "loss": 0.2993, "step": 9630 }, { "epoch": 4.511116311724783, "grad_norm": 0.7158048152923584, "learning_rate": 2.238351375797357e-05, "loss": 0.3071, "step": 9640 }, { "epoch": 4.515796864029955, "grad_norm": 0.6311137676239014, "learning_rate": 2.2377700757793667e-05, "loss": 0.3052, "step": 9650 }, { "epoch": 4.520477416335128, "grad_norm": 0.5888808965682983, "learning_rate": 2.2371882159192405e-05, "loss": 0.3185, "step": 9660 }, { "epoch": 4.5251579686403, "grad_norm": 0.4861615002155304, "learning_rate": 2.2366057965945417e-05, "loss": 0.3252, "step": 9670 }, { "epoch": 4.5298385209454715, "grad_norm": 0.57795250415802, "learning_rate": 2.236022818183196e-05, "loss": 0.304, "step": 9680 }, { "epoch": 4.534519073250643, "grad_norm": 0.6848006844520569, "learning_rate": 2.2354392810634917e-05, "loss": 0.3161, "step": 9690 }, { "epoch": 4.539199625555815, "grad_norm": 1.8568609952926636, "learning_rate": 2.2348551856140792e-05, "loss": 0.3085, "step": 9700 }, { "epoch": 4.543880177860988, "grad_norm": 1.1583820581436157, "learning_rate": 2.2342705322139727e-05, "loss": 0.3116, "step": 9710 }, { "epoch": 4.54856073016616, "grad_norm": 0.5066291689872742, "learning_rate": 2.2336853212425463e-05, "loss": 0.3082, "step": 9720 }, { "epoch": 4.5532412824713315, "grad_norm": 0.6405032277107239, "learning_rate": 2.2330995530795375e-05, "loss": 0.3235, "step": 9730 }, { "epoch": 4.557921834776503, "grad_norm": 1.0469591617584229, "learning_rate": 2.2325132281050453e-05, "loss": 0.3064, "step": 9740 }, { "epoch": 4.562602387081675, "grad_norm": 0.6130898594856262, "learning_rate": 2.2319263466995292e-05, "loss": 0.3054, "step": 9750 }, { "epoch": 4.567282939386848, "grad_norm": 0.5325705409049988, "learning_rate": 2.2313389092438104e-05, "loss": 0.3004, "step": 9760 }, { "epoch": 4.57196349169202, "grad_norm": 0.8929054737091064, "learning_rate": 2.23075091611907e-05, "loss": 0.3067, "step": 9770 }, { "epoch": 4.576644043997192, "grad_norm": 1.456038236618042, "learning_rate": 2.230162367706851e-05, "loss": 0.3029, "step": 9780 }, { "epoch": 4.581324596302363, "grad_norm": 0.5890605449676514, "learning_rate": 2.2295732643890562e-05, "loss": 0.2825, "step": 9790 }, { "epoch": 4.586005148607535, "grad_norm": 1.5248751640319824, "learning_rate": 2.228983606547948e-05, "loss": 0.3229, "step": 9800 }, { "epoch": 4.590685700912708, "grad_norm": 0.5402883291244507, "learning_rate": 2.228393394566149e-05, "loss": 0.2949, "step": 9810 }, { "epoch": 4.59536625321788, "grad_norm": 0.5734983682632446, "learning_rate": 2.2278026288266417e-05, "loss": 0.3019, "step": 9820 }, { "epoch": 4.600046805523052, "grad_norm": 0.8212027549743652, "learning_rate": 2.2272113097127674e-05, "loss": 0.3105, "step": 9830 }, { "epoch": 4.6047273578282235, "grad_norm": 1.139513611793518, "learning_rate": 2.226619437608226e-05, "loss": 0.3144, "step": 9840 }, { "epoch": 4.609407910133395, "grad_norm": 0.5081031322479248, "learning_rate": 2.2260270128970777e-05, "loss": 0.3263, "step": 9850 }, { "epoch": 4.614088462438568, "grad_norm": 0.6418612003326416, "learning_rate": 2.2254340359637403e-05, "loss": 0.3023, "step": 9860 }, { "epoch": 4.61876901474374, "grad_norm": 0.8758525848388672, "learning_rate": 2.22484050719299e-05, "loss": 0.3027, "step": 9870 }, { "epoch": 4.623449567048912, "grad_norm": 0.4839285612106323, "learning_rate": 2.224246426969961e-05, "loss": 0.2819, "step": 9880 }, { "epoch": 4.628130119354084, "grad_norm": 0.530853807926178, "learning_rate": 2.2236517956801457e-05, "loss": 0.3082, "step": 9890 }, { "epoch": 4.632810671659255, "grad_norm": 0.7967572212219238, "learning_rate": 2.223056613709394e-05, "loss": 0.2997, "step": 9900 }, { "epoch": 4.637491223964428, "grad_norm": 0.5045170783996582, "learning_rate": 2.2224608814439125e-05, "loss": 0.2966, "step": 9910 }, { "epoch": 4.6421717762696, "grad_norm": 0.6405930519104004, "learning_rate": 2.2218645992702666e-05, "loss": 0.3086, "step": 9920 }, { "epoch": 4.646852328574772, "grad_norm": 0.7978336811065674, "learning_rate": 2.221267767575376e-05, "loss": 0.311, "step": 9930 }, { "epoch": 4.651532880879944, "grad_norm": 0.6651483178138733, "learning_rate": 2.2206703867465185e-05, "loss": 0.316, "step": 9940 }, { "epoch": 4.6562134331851155, "grad_norm": 0.573940098285675, "learning_rate": 2.2200724571713285e-05, "loss": 0.3036, "step": 9950 }, { "epoch": 4.660893985490288, "grad_norm": 0.639828085899353, "learning_rate": 2.2194739792377963e-05, "loss": 0.3055, "step": 9960 }, { "epoch": 4.66557453779546, "grad_norm": 0.8312051296234131, "learning_rate": 2.218874953334267e-05, "loss": 0.3122, "step": 9970 }, { "epoch": 4.670255090100632, "grad_norm": 0.6265290379524231, "learning_rate": 2.2182753798494424e-05, "loss": 0.3063, "step": 9980 }, { "epoch": 4.674935642405804, "grad_norm": 0.6753984689712524, "learning_rate": 2.217675259172379e-05, "loss": 0.3234, "step": 9990 }, { "epoch": 4.679616194710976, "grad_norm": 0.7891761660575867, "learning_rate": 2.2170745916924886e-05, "loss": 0.3106, "step": 10000 }, { "epoch": 4.684296747016148, "grad_norm": 0.6338467001914978, "learning_rate": 2.2164733777995383e-05, "loss": 0.3017, "step": 10010 }, { "epoch": 4.68897729932132, "grad_norm": 0.8514565825462341, "learning_rate": 2.2158716178836488e-05, "loss": 0.3048, "step": 10020 }, { "epoch": 4.693657851626492, "grad_norm": 1.0406382083892822, "learning_rate": 2.2152693123352954e-05, "loss": 0.3168, "step": 10030 }, { "epoch": 4.698338403931664, "grad_norm": 0.6495448350906372, "learning_rate": 2.214666461545308e-05, "loss": 0.3079, "step": 10040 }, { "epoch": 4.703018956236836, "grad_norm": 0.8854176998138428, "learning_rate": 2.21406306590487e-05, "loss": 0.3109, "step": 10050 }, { "epoch": 4.707699508542008, "grad_norm": 0.8038166761398315, "learning_rate": 2.2134591258055183e-05, "loss": 0.3012, "step": 10060 }, { "epoch": 4.71238006084718, "grad_norm": 0.632312536239624, "learning_rate": 2.2128546416391425e-05, "loss": 0.2923, "step": 10070 }, { "epoch": 4.717060613152352, "grad_norm": 0.6757199764251709, "learning_rate": 2.212249613797986e-05, "loss": 0.2851, "step": 10080 }, { "epoch": 4.721741165457524, "grad_norm": 0.5782247185707092, "learning_rate": 2.211644042674645e-05, "loss": 0.3, "step": 10090 }, { "epoch": 4.726421717762696, "grad_norm": 0.6048909425735474, "learning_rate": 2.2110379286620686e-05, "loss": 0.3203, "step": 10100 }, { "epoch": 4.731102270067868, "grad_norm": 0.597554087638855, "learning_rate": 2.2104312721535567e-05, "loss": 0.312, "step": 10110 }, { "epoch": 4.73578282237304, "grad_norm": 1.0081018209457397, "learning_rate": 2.209824073542763e-05, "loss": 0.2982, "step": 10120 }, { "epoch": 4.740463374678212, "grad_norm": 0.6984496712684631, "learning_rate": 2.2092163332236907e-05, "loss": 0.296, "step": 10130 }, { "epoch": 4.745143926983384, "grad_norm": 0.5710594058036804, "learning_rate": 2.208608051590697e-05, "loss": 0.2988, "step": 10140 }, { "epoch": 4.749824479288556, "grad_norm": 0.8808846473693848, "learning_rate": 2.207999229038489e-05, "loss": 0.308, "step": 10150 }, { "epoch": 4.7545050315937285, "grad_norm": 0.6961671113967896, "learning_rate": 2.2073898659621246e-05, "loss": 0.2941, "step": 10160 }, { "epoch": 4.7591855838989, "grad_norm": 0.9580798745155334, "learning_rate": 2.206779962757014e-05, "loss": 0.3239, "step": 10170 }, { "epoch": 4.763866136204072, "grad_norm": 0.7978768348693848, "learning_rate": 2.2061695198189157e-05, "loss": 0.3063, "step": 10180 }, { "epoch": 4.768546688509244, "grad_norm": 0.9600862860679626, "learning_rate": 2.20555853754394e-05, "loss": 0.2975, "step": 10190 }, { "epoch": 4.773227240814416, "grad_norm": 0.628053605556488, "learning_rate": 2.204947016328547e-05, "loss": 0.2928, "step": 10200 }, { "epoch": 4.777907793119589, "grad_norm": 0.7714797258377075, "learning_rate": 2.2043349565695447e-05, "loss": 0.292, "step": 10210 }, { "epoch": 4.78258834542476, "grad_norm": 0.5832931995391846, "learning_rate": 2.2037223586640937e-05, "loss": 0.2999, "step": 10220 }, { "epoch": 4.787268897729932, "grad_norm": 0.6144176721572876, "learning_rate": 2.2031092230097013e-05, "loss": 0.3071, "step": 10230 }, { "epoch": 4.791949450035104, "grad_norm": 0.6781201958656311, "learning_rate": 2.2024955500042245e-05, "loss": 0.2955, "step": 10240 }, { "epoch": 4.796630002340276, "grad_norm": 0.8037640452384949, "learning_rate": 2.2018813400458697e-05, "loss": 0.2963, "step": 10250 }, { "epoch": 4.801310554645449, "grad_norm": 0.6932002305984497, "learning_rate": 2.2012665935331905e-05, "loss": 0.3039, "step": 10260 }, { "epoch": 4.8059911069506205, "grad_norm": 0.5244041681289673, "learning_rate": 2.2006513108650894e-05, "loss": 0.3085, "step": 10270 }, { "epoch": 4.810671659255792, "grad_norm": 0.9634856581687927, "learning_rate": 2.2000354924408168e-05, "loss": 0.2997, "step": 10280 }, { "epoch": 4.815352211560964, "grad_norm": 0.8545072674751282, "learning_rate": 2.1994191386599704e-05, "loss": 0.3067, "step": 10290 }, { "epoch": 4.820032763866136, "grad_norm": 0.5785797834396362, "learning_rate": 2.1988022499224953e-05, "loss": 0.3141, "step": 10300 }, { "epoch": 4.824713316171308, "grad_norm": 1.2812572717666626, "learning_rate": 2.198184826628684e-05, "loss": 0.2845, "step": 10310 }, { "epoch": 4.8293938684764806, "grad_norm": 0.49574413895606995, "learning_rate": 2.1975668691791764e-05, "loss": 0.2975, "step": 10320 }, { "epoch": 4.834074420781652, "grad_norm": 0.6668761968612671, "learning_rate": 2.1969483779749577e-05, "loss": 0.3086, "step": 10330 }, { "epoch": 4.838754973086824, "grad_norm": 0.6815934181213379, "learning_rate": 2.19632935341736e-05, "loss": 0.3072, "step": 10340 }, { "epoch": 4.843435525391996, "grad_norm": 0.7179082632064819, "learning_rate": 2.1957097959080622e-05, "loss": 0.3013, "step": 10350 }, { "epoch": 4.848116077697168, "grad_norm": 0.5954571962356567, "learning_rate": 2.195089705849088e-05, "loss": 0.3115, "step": 10360 }, { "epoch": 4.852796630002341, "grad_norm": 0.9413081407546997, "learning_rate": 2.1944690836428074e-05, "loss": 0.2958, "step": 10370 }, { "epoch": 4.8574771823075125, "grad_norm": 1.1789945363998413, "learning_rate": 2.1938479296919352e-05, "loss": 0.3097, "step": 10380 }, { "epoch": 4.862157734612684, "grad_norm": 1.1566299200057983, "learning_rate": 2.1932262443995317e-05, "loss": 0.3115, "step": 10390 }, { "epoch": 4.866838286917856, "grad_norm": 1.3222434520721436, "learning_rate": 2.1926040281690015e-05, "loss": 0.3113, "step": 10400 }, { "epoch": 4.871518839223028, "grad_norm": 0.562021791934967, "learning_rate": 2.1919812814040935e-05, "loss": 0.307, "step": 10410 }, { "epoch": 4.876199391528201, "grad_norm": 0.7405872344970703, "learning_rate": 2.1913580045089026e-05, "loss": 0.2927, "step": 10420 }, { "epoch": 4.8808799438333725, "grad_norm": 0.707518458366394, "learning_rate": 2.1907341978878652e-05, "loss": 0.3094, "step": 10430 }, { "epoch": 4.885560496138544, "grad_norm": 0.9147388935089111, "learning_rate": 2.1901098619457635e-05, "loss": 0.291, "step": 10440 }, { "epoch": 4.890241048443716, "grad_norm": 0.566011905670166, "learning_rate": 2.1894849970877214e-05, "loss": 0.3042, "step": 10450 }, { "epoch": 4.894921600748888, "grad_norm": 0.5678245425224304, "learning_rate": 2.1888596037192073e-05, "loss": 0.3033, "step": 10460 }, { "epoch": 4.89960215305406, "grad_norm": 0.9700899720191956, "learning_rate": 2.1882336822460326e-05, "loss": 0.3098, "step": 10470 }, { "epoch": 4.904282705359233, "grad_norm": 0.8686472177505493, "learning_rate": 2.187607233074351e-05, "loss": 0.3072, "step": 10480 }, { "epoch": 4.9089632576644044, "grad_norm": 0.8342567086219788, "learning_rate": 2.1869802566106577e-05, "loss": 0.2975, "step": 10490 }, { "epoch": 4.913643809969576, "grad_norm": 0.6434285044670105, "learning_rate": 2.1863527532617914e-05, "loss": 0.3085, "step": 10500 }, { "epoch": 4.918324362274748, "grad_norm": 1.4057658910751343, "learning_rate": 2.1857247234349324e-05, "loss": 0.2972, "step": 10510 }, { "epoch": 4.92300491457992, "grad_norm": 1.6400980949401855, "learning_rate": 2.1850961675376026e-05, "loss": 0.2886, "step": 10520 }, { "epoch": 4.927685466885093, "grad_norm": 0.6111001968383789, "learning_rate": 2.1844670859776645e-05, "loss": 0.2942, "step": 10530 }, { "epoch": 4.9323660191902645, "grad_norm": 1.0366156101226807, "learning_rate": 2.1838374791633225e-05, "loss": 0.2945, "step": 10540 }, { "epoch": 4.937046571495436, "grad_norm": 0.7356938719749451, "learning_rate": 2.1832073475031215e-05, "loss": 0.3043, "step": 10550 }, { "epoch": 4.941727123800608, "grad_norm": 0.591815710067749, "learning_rate": 2.1825766914059475e-05, "loss": 0.2849, "step": 10560 }, { "epoch": 4.94640767610578, "grad_norm": 0.56697016954422, "learning_rate": 2.1819455112810264e-05, "loss": 0.3045, "step": 10570 }, { "epoch": 4.951088228410953, "grad_norm": 0.6427369117736816, "learning_rate": 2.1813138075379234e-05, "loss": 0.2816, "step": 10580 }, { "epoch": 4.955768780716125, "grad_norm": 0.6340849995613098, "learning_rate": 2.1806815805865452e-05, "loss": 0.2968, "step": 10590 }, { "epoch": 4.960449333021296, "grad_norm": 0.5794550180435181, "learning_rate": 2.1800488308371363e-05, "loss": 0.303, "step": 10600 }, { "epoch": 4.965129885326468, "grad_norm": 0.504429042339325, "learning_rate": 2.1794155587002817e-05, "loss": 0.3019, "step": 10610 }, { "epoch": 4.96981043763164, "grad_norm": 0.5852867960929871, "learning_rate": 2.1787817645869038e-05, "loss": 0.294, "step": 10620 }, { "epoch": 4.974490989936813, "grad_norm": 0.5641798377037048, "learning_rate": 2.178147448908266e-05, "loss": 0.3051, "step": 10630 }, { "epoch": 4.979171542241985, "grad_norm": 0.6054232716560364, "learning_rate": 2.177512612075968e-05, "loss": 0.3194, "step": 10640 }, { "epoch": 4.9838520945471565, "grad_norm": 0.9099509716033936, "learning_rate": 2.1768772545019485e-05, "loss": 0.292, "step": 10650 }, { "epoch": 4.988532646852328, "grad_norm": 0.6770991683006287, "learning_rate": 2.176241376598485e-05, "loss": 0.2997, "step": 10660 }, { "epoch": 4.9932131991575, "grad_norm": 0.7100474238395691, "learning_rate": 2.175604978778191e-05, "loss": 0.3098, "step": 10670 }, { "epoch": 4.997893751462673, "grad_norm": 1.0454846620559692, "learning_rate": 2.1749680614540175e-05, "loss": 0.3044, "step": 10680 }, { "epoch": 5.002340276152586, "grad_norm": 0.6053687930107117, "learning_rate": 2.1743306250392553e-05, "loss": 0.2755, "step": 10690 }, { "epoch": 5.007020828457758, "grad_norm": 0.5955689549446106, "learning_rate": 2.173692669947528e-05, "loss": 0.2761, "step": 10700 }, { "epoch": 5.01170138076293, "grad_norm": 0.6039512753486633, "learning_rate": 2.1730541965927987e-05, "loss": 0.2757, "step": 10710 }, { "epoch": 5.016381933068102, "grad_norm": 1.0267727375030518, "learning_rate": 2.1724152053893657e-05, "loss": 0.2934, "step": 10720 }, { "epoch": 5.021062485373274, "grad_norm": 0.6068044304847717, "learning_rate": 2.171775696751863e-05, "loss": 0.276, "step": 10730 }, { "epoch": 5.025743037678446, "grad_norm": 0.7563526630401611, "learning_rate": 2.171135671095261e-05, "loss": 0.2793, "step": 10740 }, { "epoch": 5.030423589983618, "grad_norm": 1.2892018556594849, "learning_rate": 2.170495128834866e-05, "loss": 0.2885, "step": 10750 }, { "epoch": 5.03510414228879, "grad_norm": 0.5566613078117371, "learning_rate": 2.1698540703863185e-05, "loss": 0.2677, "step": 10760 }, { "epoch": 5.039784694593962, "grad_norm": 0.618682861328125, "learning_rate": 2.169212496165594e-05, "loss": 0.2677, "step": 10770 }, { "epoch": 5.044465246899134, "grad_norm": 0.5257024168968201, "learning_rate": 2.1685704065890035e-05, "loss": 0.2659, "step": 10780 }, { "epoch": 5.0491457992043065, "grad_norm": 0.5305277705192566, "learning_rate": 2.1679278020731918e-05, "loss": 0.2602, "step": 10790 }, { "epoch": 5.053826351509478, "grad_norm": 0.5429583191871643, "learning_rate": 2.1672846830351385e-05, "loss": 0.2608, "step": 10800 }, { "epoch": 5.05850690381465, "grad_norm": 0.5448108315467834, "learning_rate": 2.1666410498921553e-05, "loss": 0.2757, "step": 10810 }, { "epoch": 5.063187456119822, "grad_norm": 0.5928840637207031, "learning_rate": 2.1659969030618897e-05, "loss": 0.2697, "step": 10820 }, { "epoch": 5.067868008424994, "grad_norm": 0.7092738747596741, "learning_rate": 2.1653522429623217e-05, "loss": 0.282, "step": 10830 }, { "epoch": 5.072548560730167, "grad_norm": 0.5361157655715942, "learning_rate": 2.164707070011764e-05, "loss": 0.285, "step": 10840 }, { "epoch": 5.077229113035338, "grad_norm": 0.7162908315658569, "learning_rate": 2.1640613846288625e-05, "loss": 0.2859, "step": 10850 }, { "epoch": 5.08190966534051, "grad_norm": 0.6641838550567627, "learning_rate": 2.1634151872325947e-05, "loss": 0.2746, "step": 10860 }, { "epoch": 5.086590217645682, "grad_norm": 0.627221405506134, "learning_rate": 2.1627684782422717e-05, "loss": 0.2807, "step": 10870 }, { "epoch": 5.091270769950854, "grad_norm": 0.5610705614089966, "learning_rate": 2.1621212580775362e-05, "loss": 0.2766, "step": 10880 }, { "epoch": 5.095951322256026, "grad_norm": 0.7687805891036987, "learning_rate": 2.161473527158362e-05, "loss": 0.2671, "step": 10890 }, { "epoch": 5.1006318745611985, "grad_norm": 0.5057036876678467, "learning_rate": 2.1608252859050545e-05, "loss": 0.268, "step": 10900 }, { "epoch": 5.10531242686637, "grad_norm": 0.48849135637283325, "learning_rate": 2.16017653473825e-05, "loss": 0.2693, "step": 10910 }, { "epoch": 5.109992979171542, "grad_norm": 1.0769367218017578, "learning_rate": 2.1595272740789174e-05, "loss": 0.2851, "step": 10920 }, { "epoch": 5.114673531476714, "grad_norm": 0.5431815981864929, "learning_rate": 2.1588775043483535e-05, "loss": 0.2849, "step": 10930 }, { "epoch": 5.119354083781886, "grad_norm": 0.7572799324989319, "learning_rate": 2.158227225968188e-05, "loss": 0.2792, "step": 10940 }, { "epoch": 5.124034636087059, "grad_norm": 0.5130917429924011, "learning_rate": 2.157576439360379e-05, "loss": 0.2851, "step": 10950 }, { "epoch": 5.12871518839223, "grad_norm": 0.6854354739189148, "learning_rate": 2.1569251449472144e-05, "loss": 0.2669, "step": 10960 }, { "epoch": 5.133395740697402, "grad_norm": 0.7029089331626892, "learning_rate": 2.156273343151313e-05, "loss": 0.2831, "step": 10970 }, { "epoch": 5.138076293002574, "grad_norm": 0.5101448893547058, "learning_rate": 2.155621034395621e-05, "loss": 0.2639, "step": 10980 }, { "epoch": 5.142756845307746, "grad_norm": 0.529848039150238, "learning_rate": 2.1549682191034148e-05, "loss": 0.2696, "step": 10990 }, { "epoch": 5.147437397612919, "grad_norm": 0.5795655250549316, "learning_rate": 2.1543148976982996e-05, "loss": 0.2624, "step": 11000 }, { "epoch": 5.1521179499180905, "grad_norm": 0.8253093361854553, "learning_rate": 2.1536610706042077e-05, "loss": 0.2655, "step": 11010 }, { "epoch": 5.156798502223262, "grad_norm": 1.4185512065887451, "learning_rate": 2.1530067382454013e-05, "loss": 0.2693, "step": 11020 }, { "epoch": 5.161479054528434, "grad_norm": 0.5623319745063782, "learning_rate": 2.152351901046469e-05, "loss": 0.2758, "step": 11030 }, { "epoch": 5.166159606833606, "grad_norm": 0.7941523194313049, "learning_rate": 2.1516965594323275e-05, "loss": 0.294, "step": 11040 }, { "epoch": 5.170840159138779, "grad_norm": 0.8227137923240662, "learning_rate": 2.1510407138282214e-05, "loss": 0.2873, "step": 11050 }, { "epoch": 5.1755207114439505, "grad_norm": 0.5608475804328918, "learning_rate": 2.1503843646597213e-05, "loss": 0.2688, "step": 11060 }, { "epoch": 5.180201263749122, "grad_norm": 0.47194990515708923, "learning_rate": 2.149727512352725e-05, "loss": 0.2753, "step": 11070 }, { "epoch": 5.184881816054294, "grad_norm": 0.5589197874069214, "learning_rate": 2.1490701573334573e-05, "loss": 0.282, "step": 11080 }, { "epoch": 5.189562368359466, "grad_norm": 0.6374989748001099, "learning_rate": 2.1484123000284684e-05, "loss": 0.2768, "step": 11090 }, { "epoch": 5.194242920664639, "grad_norm": 0.5687602162361145, "learning_rate": 2.1477539408646345e-05, "loss": 0.2759, "step": 11100 }, { "epoch": 5.198923472969811, "grad_norm": 0.8030719757080078, "learning_rate": 2.1470950802691587e-05, "loss": 0.2796, "step": 11110 }, { "epoch": 5.2036040252749824, "grad_norm": 0.8852208852767944, "learning_rate": 2.1464357186695673e-05, "loss": 0.2912, "step": 11120 }, { "epoch": 5.208284577580154, "grad_norm": 0.5596963763237, "learning_rate": 2.1457758564937137e-05, "loss": 0.2661, "step": 11130 }, { "epoch": 5.212965129885326, "grad_norm": 0.6163317561149597, "learning_rate": 2.145115494169775e-05, "loss": 0.2695, "step": 11140 }, { "epoch": 5.217645682190499, "grad_norm": 0.536572277545929, "learning_rate": 2.1444546321262528e-05, "loss": 0.2801, "step": 11150 }, { "epoch": 5.222326234495671, "grad_norm": 0.666915774345398, "learning_rate": 2.143793270791974e-05, "loss": 0.2795, "step": 11160 }, { "epoch": 5.2270067868008425, "grad_norm": 0.6009789109230042, "learning_rate": 2.1431314105960885e-05, "loss": 0.284, "step": 11170 }, { "epoch": 5.231687339106014, "grad_norm": 0.5853307247161865, "learning_rate": 2.14246905196807e-05, "loss": 0.2857, "step": 11180 }, { "epoch": 5.236367891411186, "grad_norm": 0.6759030222892761, "learning_rate": 2.141806195337716e-05, "loss": 0.2735, "step": 11190 }, { "epoch": 5.241048443716359, "grad_norm": 0.7793769240379333, "learning_rate": 2.141142841135147e-05, "loss": 0.2723, "step": 11200 }, { "epoch": 5.245728996021531, "grad_norm": 0.546599805355072, "learning_rate": 2.1404789897908062e-05, "loss": 0.2836, "step": 11210 }, { "epoch": 5.250409548326703, "grad_norm": 0.8173686861991882, "learning_rate": 2.1398146417354594e-05, "loss": 0.2673, "step": 11220 }, { "epoch": 5.255090100631874, "grad_norm": 1.0125235319137573, "learning_rate": 2.1391497974001955e-05, "loss": 0.2927, "step": 11230 }, { "epoch": 5.259770652937046, "grad_norm": 0.7701604962348938, "learning_rate": 2.138484457216424e-05, "loss": 0.282, "step": 11240 }, { "epoch": 5.264451205242219, "grad_norm": 0.7389867901802063, "learning_rate": 2.1378186216158773e-05, "loss": 0.2845, "step": 11250 }, { "epoch": 5.269131757547391, "grad_norm": 0.6022249460220337, "learning_rate": 2.137152291030609e-05, "loss": 0.2775, "step": 11260 }, { "epoch": 5.273812309852563, "grad_norm": 0.6003959774971008, "learning_rate": 2.1364854658929935e-05, "loss": 0.2705, "step": 11270 }, { "epoch": 5.2784928621577345, "grad_norm": 0.6107620000839233, "learning_rate": 2.1358181466357262e-05, "loss": 0.276, "step": 11280 }, { "epoch": 5.283173414462906, "grad_norm": 0.5099548101425171, "learning_rate": 2.135150333691824e-05, "loss": 0.2707, "step": 11290 }, { "epoch": 5.287853966768079, "grad_norm": 0.6789670586585999, "learning_rate": 2.1344820274946235e-05, "loss": 0.2771, "step": 11300 }, { "epoch": 5.292534519073251, "grad_norm": 0.7570161819458008, "learning_rate": 2.1338132284777802e-05, "loss": 0.2754, "step": 11310 }, { "epoch": 5.297215071378423, "grad_norm": 0.7577111721038818, "learning_rate": 2.1331439370752715e-05, "loss": 0.2857, "step": 11320 }, { "epoch": 5.301895623683595, "grad_norm": 0.676343560218811, "learning_rate": 2.132474153721393e-05, "loss": 0.2811, "step": 11330 }, { "epoch": 5.306576175988766, "grad_norm": 0.7239387631416321, "learning_rate": 2.13180387885076e-05, "loss": 0.2808, "step": 11340 }, { "epoch": 5.311256728293939, "grad_norm": 0.5597242712974548, "learning_rate": 2.1311331128983065e-05, "loss": 0.2679, "step": 11350 }, { "epoch": 5.315937280599111, "grad_norm": 0.8891215920448303, "learning_rate": 2.1304618562992858e-05, "loss": 0.2764, "step": 11360 }, { "epoch": 5.320617832904283, "grad_norm": 0.5334022641181946, "learning_rate": 2.1297901094892682e-05, "loss": 0.2808, "step": 11370 }, { "epoch": 5.325298385209455, "grad_norm": 0.6772896647453308, "learning_rate": 2.1291178729041427e-05, "loss": 0.2782, "step": 11380 }, { "epoch": 5.3299789375146265, "grad_norm": 0.8392282128334045, "learning_rate": 2.128445146980117e-05, "loss": 0.2859, "step": 11390 }, { "epoch": 5.334659489819799, "grad_norm": 0.5249620079994202, "learning_rate": 2.1277719321537156e-05, "loss": 0.2658, "step": 11400 }, { "epoch": 5.339340042124971, "grad_norm": 0.6185750365257263, "learning_rate": 2.12709822886178e-05, "loss": 0.2748, "step": 11410 }, { "epoch": 5.344020594430143, "grad_norm": 0.58663409948349, "learning_rate": 2.1264240375414694e-05, "loss": 0.2735, "step": 11420 }, { "epoch": 5.348701146735315, "grad_norm": 1.1787241697311401, "learning_rate": 2.1257493586302584e-05, "loss": 0.2739, "step": 11430 }, { "epoch": 5.3533816990404866, "grad_norm": 0.48962539434432983, "learning_rate": 2.1250741925659402e-05, "loss": 0.2772, "step": 11440 }, { "epoch": 5.358062251345658, "grad_norm": 0.5597631335258484, "learning_rate": 2.1243985397866213e-05, "loss": 0.2825, "step": 11450 }, { "epoch": 5.362742803650831, "grad_norm": 0.7045869827270508, "learning_rate": 2.123722400730726e-05, "loss": 0.2629, "step": 11460 }, { "epoch": 5.367423355956003, "grad_norm": 0.6736089587211609, "learning_rate": 2.1230457758369938e-05, "loss": 0.2692, "step": 11470 }, { "epoch": 5.372103908261175, "grad_norm": 0.5544044971466064, "learning_rate": 2.122368665544479e-05, "loss": 0.2745, "step": 11480 }, { "epoch": 5.376784460566347, "grad_norm": 0.7533393502235413, "learning_rate": 2.121691070292551e-05, "loss": 0.2855, "step": 11490 }, { "epoch": 5.3814650128715185, "grad_norm": 0.7116189002990723, "learning_rate": 2.1210129905208947e-05, "loss": 0.2759, "step": 11500 }, { "epoch": 5.386145565176691, "grad_norm": 0.5797187685966492, "learning_rate": 2.120334426669508e-05, "loss": 0.2849, "step": 11510 }, { "epoch": 5.390826117481863, "grad_norm": 0.6084765195846558, "learning_rate": 2.1196553791787037e-05, "loss": 0.283, "step": 11520 }, { "epoch": 5.395506669787035, "grad_norm": 0.49055221676826477, "learning_rate": 2.1189758484891085e-05, "loss": 0.2708, "step": 11530 }, { "epoch": 5.400187222092207, "grad_norm": 0.4634683132171631, "learning_rate": 2.1182958350416628e-05, "loss": 0.2612, "step": 11540 }, { "epoch": 5.4048677743973785, "grad_norm": 0.9760159850120544, "learning_rate": 2.117615339277619e-05, "loss": 0.2798, "step": 11550 }, { "epoch": 5.409548326702551, "grad_norm": 0.5642823576927185, "learning_rate": 2.116934361638544e-05, "loss": 0.2741, "step": 11560 }, { "epoch": 5.414228879007723, "grad_norm": 1.0059454441070557, "learning_rate": 2.1162529025663173e-05, "loss": 0.2813, "step": 11570 }, { "epoch": 5.418909431312895, "grad_norm": 0.6380130052566528, "learning_rate": 2.1155709625031293e-05, "loss": 0.2639, "step": 11580 }, { "epoch": 5.423589983618067, "grad_norm": 0.8794243335723877, "learning_rate": 2.1148885418914844e-05, "loss": 0.278, "step": 11590 }, { "epoch": 5.428270535923239, "grad_norm": 0.6206068396568298, "learning_rate": 2.1142056411741972e-05, "loss": 0.2728, "step": 11600 }, { "epoch": 5.432951088228411, "grad_norm": 0.6550421714782715, "learning_rate": 2.113522260794395e-05, "loss": 0.2767, "step": 11610 }, { "epoch": 5.437631640533583, "grad_norm": 0.6506906151771545, "learning_rate": 2.112838401195516e-05, "loss": 0.2703, "step": 11620 }, { "epoch": 5.442312192838755, "grad_norm": 0.8731294274330139, "learning_rate": 2.1121540628213085e-05, "loss": 0.2682, "step": 11630 }, { "epoch": 5.446992745143927, "grad_norm": 0.5192842483520508, "learning_rate": 2.111469246115833e-05, "loss": 0.2731, "step": 11640 }, { "epoch": 5.451673297449099, "grad_norm": 0.5446183085441589, "learning_rate": 2.1107839515234602e-05, "loss": 0.278, "step": 11650 }, { "epoch": 5.456353849754271, "grad_norm": 1.1976028680801392, "learning_rate": 2.110098179488869e-05, "loss": 0.2746, "step": 11660 }, { "epoch": 5.461034402059443, "grad_norm": 0.45854318141937256, "learning_rate": 2.1094119304570497e-05, "loss": 0.2675, "step": 11670 }, { "epoch": 5.465714954364615, "grad_norm": 0.9387004375457764, "learning_rate": 2.1087252048733023e-05, "loss": 0.282, "step": 11680 }, { "epoch": 5.470395506669787, "grad_norm": 1.151279330253601, "learning_rate": 2.1080380031832353e-05, "loss": 0.2716, "step": 11690 }, { "epoch": 5.475076058974959, "grad_norm": 0.7151018381118774, "learning_rate": 2.107350325832767e-05, "loss": 0.2695, "step": 11700 }, { "epoch": 5.479756611280131, "grad_norm": 0.9995922446250916, "learning_rate": 2.1066621732681226e-05, "loss": 0.2813, "step": 11710 }, { "epoch": 5.484437163585303, "grad_norm": 0.5453210473060608, "learning_rate": 2.105973545935838e-05, "loss": 0.2834, "step": 11720 }, { "epoch": 5.489117715890475, "grad_norm": 0.6070712208747864, "learning_rate": 2.1052844442827554e-05, "loss": 0.2873, "step": 11730 }, { "epoch": 5.493798268195647, "grad_norm": 0.7904199361801147, "learning_rate": 2.1045948687560254e-05, "loss": 0.274, "step": 11740 }, { "epoch": 5.498478820500819, "grad_norm": 0.5574101805686951, "learning_rate": 2.1039048198031063e-05, "loss": 0.2709, "step": 11750 }, { "epoch": 5.503159372805991, "grad_norm": 0.6305064558982849, "learning_rate": 2.1032142978717632e-05, "loss": 0.2864, "step": 11760 }, { "epoch": 5.507839925111163, "grad_norm": 1.080420970916748, "learning_rate": 2.102523303410068e-05, "loss": 0.2796, "step": 11770 }, { "epoch": 5.512520477416335, "grad_norm": 1.033000111579895, "learning_rate": 2.1018318368664003e-05, "loss": 0.2872, "step": 11780 }, { "epoch": 5.517201029721507, "grad_norm": 0.6707507371902466, "learning_rate": 2.1011398986894446e-05, "loss": 0.2695, "step": 11790 }, { "epoch": 5.521881582026679, "grad_norm": 0.6412596702575684, "learning_rate": 2.100447489328192e-05, "loss": 0.2778, "step": 11800 }, { "epoch": 5.526562134331851, "grad_norm": 0.62774258852005, "learning_rate": 2.09975460923194e-05, "loss": 0.2801, "step": 11810 }, { "epoch": 5.5312426866370235, "grad_norm": 0.6212994456291199, "learning_rate": 2.0990612588502904e-05, "loss": 0.2682, "step": 11820 }, { "epoch": 5.535923238942195, "grad_norm": 0.7561893463134766, "learning_rate": 2.098367438633151e-05, "loss": 0.2833, "step": 11830 }, { "epoch": 5.540603791247367, "grad_norm": 0.6979212164878845, "learning_rate": 2.097673149030734e-05, "loss": 0.2671, "step": 11840 }, { "epoch": 5.545284343552539, "grad_norm": 0.7833976745605469, "learning_rate": 2.0969783904935568e-05, "loss": 0.2782, "step": 11850 }, { "epoch": 5.549964895857711, "grad_norm": 0.6076081991195679, "learning_rate": 2.09628316347244e-05, "loss": 0.257, "step": 11860 }, { "epoch": 5.5546454481628835, "grad_norm": 0.5729600191116333, "learning_rate": 2.09558746841851e-05, "loss": 0.286, "step": 11870 }, { "epoch": 5.559326000468055, "grad_norm": 0.6656695008277893, "learning_rate": 2.0948913057831944e-05, "loss": 0.2662, "step": 11880 }, { "epoch": 5.564006552773227, "grad_norm": 0.6053357124328613, "learning_rate": 2.0941946760182264e-05, "loss": 0.2584, "step": 11890 }, { "epoch": 5.568687105078399, "grad_norm": 0.6214652061462402, "learning_rate": 2.0934975795756416e-05, "loss": 0.2727, "step": 11900 }, { "epoch": 5.573367657383571, "grad_norm": 0.5597169399261475, "learning_rate": 2.0928000169077778e-05, "loss": 0.2707, "step": 11910 }, { "epoch": 5.578048209688744, "grad_norm": 0.7437730431556702, "learning_rate": 2.092101988467276e-05, "loss": 0.2744, "step": 11920 }, { "epoch": 5.582728761993915, "grad_norm": 0.7124475836753845, "learning_rate": 2.0914034947070797e-05, "loss": 0.2829, "step": 11930 }, { "epoch": 5.587409314299087, "grad_norm": 0.5633047223091125, "learning_rate": 2.0907045360804334e-05, "loss": 0.2875, "step": 11940 }, { "epoch": 5.592089866604259, "grad_norm": 0.5440492630004883, "learning_rate": 2.0900051130408836e-05, "loss": 0.2565, "step": 11950 }, { "epoch": 5.596770418909431, "grad_norm": 0.6957356333732605, "learning_rate": 2.0893052260422787e-05, "loss": 0.2626, "step": 11960 }, { "epoch": 5.601450971214604, "grad_norm": 0.6125474572181702, "learning_rate": 2.088604875538768e-05, "loss": 0.2675, "step": 11970 }, { "epoch": 5.6061315235197755, "grad_norm": 0.7755016684532166, "learning_rate": 2.0879040619848008e-05, "loss": 0.2743, "step": 11980 }, { "epoch": 5.610812075824947, "grad_norm": 0.8224356770515442, "learning_rate": 2.087202785835128e-05, "loss": 0.2821, "step": 11990 }, { "epoch": 5.615492628130119, "grad_norm": 0.6007598042488098, "learning_rate": 2.0865010475447994e-05, "loss": 0.2654, "step": 12000 }, { "epoch": 5.620173180435291, "grad_norm": 0.4969356060028076, "learning_rate": 2.0857988475691663e-05, "loss": 0.2636, "step": 12010 }, { "epoch": 5.624853732740464, "grad_norm": 0.8178346753120422, "learning_rate": 2.0850961863638775e-05, "loss": 0.2632, "step": 12020 }, { "epoch": 5.629534285045636, "grad_norm": 0.5893983244895935, "learning_rate": 2.0843930643848826e-05, "loss": 0.2772, "step": 12030 }, { "epoch": 5.634214837350807, "grad_norm": 0.9206592440605164, "learning_rate": 2.0836894820884302e-05, "loss": 0.2791, "step": 12040 }, { "epoch": 5.638895389655979, "grad_norm": 1.2072422504425049, "learning_rate": 2.0829854399310674e-05, "loss": 0.2754, "step": 12050 }, { "epoch": 5.643575941961151, "grad_norm": 0.9602249264717102, "learning_rate": 2.0822809383696385e-05, "loss": 0.2753, "step": 12060 }, { "epoch": 5.648256494266324, "grad_norm": 0.5788623690605164, "learning_rate": 2.0815759778612878e-05, "loss": 0.2843, "step": 12070 }, { "epoch": 5.652937046571496, "grad_norm": 0.5024712681770325, "learning_rate": 2.0808705588634562e-05, "loss": 0.2625, "step": 12080 }, { "epoch": 5.6576175988766675, "grad_norm": 0.47738540172576904, "learning_rate": 2.080164681833883e-05, "loss": 0.2636, "step": 12090 }, { "epoch": 5.662298151181839, "grad_norm": 0.7795660495758057, "learning_rate": 2.079458347230603e-05, "loss": 0.2776, "step": 12100 }, { "epoch": 5.666978703487011, "grad_norm": 1.179853916168213, "learning_rate": 2.07875155551195e-05, "loss": 0.2818, "step": 12110 }, { "epoch": 5.671659255792184, "grad_norm": 0.6344669461250305, "learning_rate": 2.078044307136553e-05, "loss": 0.2734, "step": 12120 }, { "epoch": 5.676339808097356, "grad_norm": 0.6462633609771729, "learning_rate": 2.077336602563338e-05, "loss": 0.2697, "step": 12130 }, { "epoch": 5.681020360402528, "grad_norm": 0.5380310416221619, "learning_rate": 2.0766284422515268e-05, "loss": 0.2762, "step": 12140 }, { "epoch": 5.685700912707699, "grad_norm": 0.5530344843864441, "learning_rate": 2.0759198266606365e-05, "loss": 0.278, "step": 12150 }, { "epoch": 5.690381465012871, "grad_norm": 0.5757228136062622, "learning_rate": 2.075210756250481e-05, "loss": 0.2677, "step": 12160 }, { "epoch": 5.695062017318044, "grad_norm": 0.5183617472648621, "learning_rate": 2.0745012314811675e-05, "loss": 0.2724, "step": 12170 }, { "epoch": 5.699742569623216, "grad_norm": 0.7009425759315491, "learning_rate": 2.0737912528130992e-05, "loss": 0.2662, "step": 12180 }, { "epoch": 5.704423121928388, "grad_norm": 0.45341354608535767, "learning_rate": 2.0730808207069737e-05, "loss": 0.2703, "step": 12190 }, { "epoch": 5.7091036742335595, "grad_norm": 0.567229688167572, "learning_rate": 2.072369935623783e-05, "loss": 0.2721, "step": 12200 }, { "epoch": 5.713784226538731, "grad_norm": 0.77081298828125, "learning_rate": 2.071658598024812e-05, "loss": 0.2737, "step": 12210 }, { "epoch": 5.718464778843904, "grad_norm": 0.47937020659446716, "learning_rate": 2.0709468083716398e-05, "loss": 0.2639, "step": 12220 }, { "epoch": 5.723145331149076, "grad_norm": 0.9305668473243713, "learning_rate": 2.07023456712614e-05, "loss": 0.271, "step": 12230 }, { "epoch": 5.727825883454248, "grad_norm": 0.7328349351882935, "learning_rate": 2.0695218747504774e-05, "loss": 0.2835, "step": 12240 }, { "epoch": 5.7325064357594195, "grad_norm": 0.502650260925293, "learning_rate": 2.0688087317071114e-05, "loss": 0.2798, "step": 12250 }, { "epoch": 5.737186988064591, "grad_norm": 1.1721916198730469, "learning_rate": 2.068095138458791e-05, "loss": 0.2702, "step": 12260 }, { "epoch": 5.741867540369764, "grad_norm": 0.6653527617454529, "learning_rate": 2.0673810954685603e-05, "loss": 0.2795, "step": 12270 }, { "epoch": 5.746548092674936, "grad_norm": 0.6164671182632446, "learning_rate": 2.066666603199754e-05, "loss": 0.2754, "step": 12280 }, { "epoch": 5.751228644980108, "grad_norm": 0.5062130689620972, "learning_rate": 2.065951662115998e-05, "loss": 0.2836, "step": 12290 }, { "epoch": 5.75590919728528, "grad_norm": 0.5975573062896729, "learning_rate": 2.06523627268121e-05, "loss": 0.2568, "step": 12300 }, { "epoch": 5.7605897495904514, "grad_norm": 0.553719162940979, "learning_rate": 2.064520435359599e-05, "loss": 0.2523, "step": 12310 }, { "epoch": 5.765270301895624, "grad_norm": 0.6743308305740356, "learning_rate": 2.0638041506156627e-05, "loss": 0.2687, "step": 12320 }, { "epoch": 5.769950854200796, "grad_norm": 0.5988495349884033, "learning_rate": 2.0630874189141917e-05, "loss": 0.2531, "step": 12330 }, { "epoch": 5.774631406505968, "grad_norm": 0.826839804649353, "learning_rate": 2.062370240720265e-05, "loss": 0.2796, "step": 12340 }, { "epoch": 5.77931195881114, "grad_norm": 0.6268085241317749, "learning_rate": 2.061652616499252e-05, "loss": 0.276, "step": 12350 }, { "epoch": 5.7839925111163115, "grad_norm": 0.8104456067085266, "learning_rate": 2.0609345467168106e-05, "loss": 0.2899, "step": 12360 }, { "epoch": 5.788673063421483, "grad_norm": 0.4970828890800476, "learning_rate": 2.060216031838889e-05, "loss": 0.2718, "step": 12370 }, { "epoch": 5.793353615726656, "grad_norm": 1.010885238647461, "learning_rate": 2.0594970723317236e-05, "loss": 0.2685, "step": 12380 }, { "epoch": 5.798034168031828, "grad_norm": 0.711955189704895, "learning_rate": 2.05877766866184e-05, "loss": 0.2504, "step": 12390 }, { "epoch": 5.802714720337, "grad_norm": 0.4871058166027069, "learning_rate": 2.0580578212960502e-05, "loss": 0.2655, "step": 12400 }, { "epoch": 5.807395272642172, "grad_norm": 0.7483100295066833, "learning_rate": 2.0573375307014563e-05, "loss": 0.2639, "step": 12410 }, { "epoch": 5.812075824947343, "grad_norm": 0.8890340924263, "learning_rate": 2.0566167973454467e-05, "loss": 0.2713, "step": 12420 }, { "epoch": 5.816756377252516, "grad_norm": 0.7185916900634766, "learning_rate": 2.055895621695698e-05, "loss": 0.2674, "step": 12430 }, { "epoch": 5.821436929557688, "grad_norm": 0.6790950298309326, "learning_rate": 2.055174004220172e-05, "loss": 0.291, "step": 12440 }, { "epoch": 5.82611748186286, "grad_norm": 0.6171966195106506, "learning_rate": 2.0544519453871192e-05, "loss": 0.2642, "step": 12450 }, { "epoch": 5.830798034168032, "grad_norm": 0.605247974395752, "learning_rate": 2.0537294456650764e-05, "loss": 0.2714, "step": 12460 }, { "epoch": 5.8354785864732035, "grad_norm": 0.6143172383308411, "learning_rate": 2.0530065055228648e-05, "loss": 0.2531, "step": 12470 }, { "epoch": 5.840159138778376, "grad_norm": 1.0937918424606323, "learning_rate": 2.0522831254295924e-05, "loss": 0.2782, "step": 12480 }, { "epoch": 5.844839691083548, "grad_norm": 0.589340090751648, "learning_rate": 2.0515593058546536e-05, "loss": 0.2603, "step": 12490 }, { "epoch": 5.84952024338872, "grad_norm": 0.7405624389648438, "learning_rate": 2.0508350472677265e-05, "loss": 0.2831, "step": 12500 }, { "epoch": 5.854200795693892, "grad_norm": 0.6368409395217896, "learning_rate": 2.0501103501387744e-05, "loss": 0.2763, "step": 12510 }, { "epoch": 5.858881347999064, "grad_norm": 0.6284805536270142, "learning_rate": 2.0493852149380456e-05, "loss": 0.277, "step": 12520 }, { "epoch": 5.863561900304235, "grad_norm": 0.5934088826179504, "learning_rate": 2.0486596421360728e-05, "loss": 0.268, "step": 12530 }, { "epoch": 5.868242452609408, "grad_norm": 0.9666668176651001, "learning_rate": 2.047933632203672e-05, "loss": 0.2773, "step": 12540 }, { "epoch": 5.87292300491458, "grad_norm": 0.583179771900177, "learning_rate": 2.0472071856119433e-05, "loss": 0.2859, "step": 12550 }, { "epoch": 5.877603557219752, "grad_norm": 0.7405192255973816, "learning_rate": 2.0464803028322697e-05, "loss": 0.2643, "step": 12560 }, { "epoch": 5.882284109524924, "grad_norm": 0.5196687579154968, "learning_rate": 2.0457529843363178e-05, "loss": 0.2713, "step": 12570 }, { "epoch": 5.8869646618300955, "grad_norm": 0.6103324294090271, "learning_rate": 2.0450252305960367e-05, "loss": 0.2616, "step": 12580 }, { "epoch": 5.891645214135268, "grad_norm": 0.9634934663772583, "learning_rate": 2.044297042083658e-05, "loss": 0.2563, "step": 12590 }, { "epoch": 5.89632576644044, "grad_norm": 0.5472492575645447, "learning_rate": 2.0435684192716947e-05, "loss": 0.2649, "step": 12600 }, { "epoch": 5.901006318745612, "grad_norm": 5.122225284576416, "learning_rate": 2.042839362632943e-05, "loss": 0.2697, "step": 12610 }, { "epoch": 5.905686871050784, "grad_norm": 0.4607537090778351, "learning_rate": 2.042109872640479e-05, "loss": 0.2726, "step": 12620 }, { "epoch": 5.9103674233559556, "grad_norm": 0.7830342054367065, "learning_rate": 2.0413799497676622e-05, "loss": 0.2692, "step": 12630 }, { "epoch": 5.915047975661128, "grad_norm": 0.5277116894721985, "learning_rate": 2.0406495944881304e-05, "loss": 0.278, "step": 12640 }, { "epoch": 5.9197285279663, "grad_norm": 0.610689640045166, "learning_rate": 2.0399188072758036e-05, "loss": 0.2531, "step": 12650 }, { "epoch": 5.924409080271472, "grad_norm": 1.0955737829208374, "learning_rate": 2.039187588604881e-05, "loss": 0.2829, "step": 12660 }, { "epoch": 5.929089632576644, "grad_norm": 0.6287468075752258, "learning_rate": 2.038455938949844e-05, "loss": 0.2638, "step": 12670 }, { "epoch": 5.933770184881816, "grad_norm": 0.6184262633323669, "learning_rate": 2.037723858785451e-05, "loss": 0.2764, "step": 12680 }, { "epoch": 5.938450737186988, "grad_norm": 0.6222171187400818, "learning_rate": 2.036991348586741e-05, "loss": 0.276, "step": 12690 }, { "epoch": 5.94313128949216, "grad_norm": 1.0037462711334229, "learning_rate": 2.0362584088290323e-05, "loss": 0.2669, "step": 12700 }, { "epoch": 5.947811841797332, "grad_norm": 0.5077148079872131, "learning_rate": 2.0355250399879214e-05, "loss": 0.2698, "step": 12710 }, { "epoch": 5.952492394102504, "grad_norm": 0.9561362862586975, "learning_rate": 2.034791242539283e-05, "loss": 0.2625, "step": 12720 }, { "epoch": 5.957172946407676, "grad_norm": 0.7409958839416504, "learning_rate": 2.03405701695927e-05, "loss": 0.2703, "step": 12730 }, { "epoch": 5.961853498712848, "grad_norm": 0.6210848689079285, "learning_rate": 2.033322363724315e-05, "loss": 0.2749, "step": 12740 }, { "epoch": 5.96653405101802, "grad_norm": 0.6347478032112122, "learning_rate": 2.032587283311125e-05, "loss": 0.2698, "step": 12750 }, { "epoch": 5.971214603323192, "grad_norm": 0.8770266175270081, "learning_rate": 2.0318517761966867e-05, "loss": 0.2627, "step": 12760 }, { "epoch": 5.975895155628364, "grad_norm": 1.056298017501831, "learning_rate": 2.031115842858261e-05, "loss": 0.2652, "step": 12770 }, { "epoch": 5.980575707933536, "grad_norm": 0.8183233737945557, "learning_rate": 2.030379483773389e-05, "loss": 0.2643, "step": 12780 }, { "epoch": 5.9852562602387085, "grad_norm": 0.5097810626029968, "learning_rate": 2.0296426994198852e-05, "loss": 0.2717, "step": 12790 }, { "epoch": 5.98993681254388, "grad_norm": 0.6399802565574646, "learning_rate": 2.0289054902758414e-05, "loss": 0.2629, "step": 12800 }, { "epoch": 5.994617364849052, "grad_norm": 0.5399758219718933, "learning_rate": 2.028167856819624e-05, "loss": 0.27, "step": 12810 }, { "epoch": 5.999297917154224, "grad_norm": 0.9235866665840149, "learning_rate": 2.0274297995298758e-05, "loss": 0.272, "step": 12820 }, { "epoch": 6.0037444418441375, "grad_norm": 0.7583636045455933, "learning_rate": 2.0266913188855136e-05, "loss": 0.2484, "step": 12830 }, { "epoch": 6.008424994149309, "grad_norm": 0.5695137977600098, "learning_rate": 2.0259524153657306e-05, "loss": 0.2452, "step": 12840 }, { "epoch": 6.013105546454482, "grad_norm": 0.6551746726036072, "learning_rate": 2.0252130894499922e-05, "loss": 0.2334, "step": 12850 }, { "epoch": 6.017786098759654, "grad_norm": 0.6922371983528137, "learning_rate": 2.0244733416180392e-05, "loss": 0.2381, "step": 12860 }, { "epoch": 6.022466651064826, "grad_norm": 0.8612797260284424, "learning_rate": 2.023733172349886e-05, "loss": 0.2439, "step": 12870 }, { "epoch": 6.0271472033699975, "grad_norm": 0.8979517221450806, "learning_rate": 2.0229925821258205e-05, "loss": 0.2565, "step": 12880 }, { "epoch": 6.031827755675169, "grad_norm": 0.5305075645446777, "learning_rate": 2.0222515714264035e-05, "loss": 0.24, "step": 12890 }, { "epoch": 6.036508307980342, "grad_norm": 0.6175830364227295, "learning_rate": 2.0215101407324695e-05, "loss": 0.2428, "step": 12900 }, { "epoch": 6.041188860285514, "grad_norm": 0.4556317627429962, "learning_rate": 2.0207682905251237e-05, "loss": 0.2334, "step": 12910 }, { "epoch": 6.045869412590686, "grad_norm": 0.5158896446228027, "learning_rate": 2.0200260212857453e-05, "loss": 0.2566, "step": 12920 }, { "epoch": 6.050549964895858, "grad_norm": 0.5680518746376038, "learning_rate": 2.0192833334959848e-05, "loss": 0.2479, "step": 12930 }, { "epoch": 6.0552305172010294, "grad_norm": 0.7149266600608826, "learning_rate": 2.018540227637764e-05, "loss": 0.2403, "step": 12940 }, { "epoch": 6.059911069506201, "grad_norm": 0.5462586879730225, "learning_rate": 2.017796704193276e-05, "loss": 0.238, "step": 12950 }, { "epoch": 6.064591621811374, "grad_norm": 0.5384701490402222, "learning_rate": 2.017052763644986e-05, "loss": 0.2454, "step": 12960 }, { "epoch": 6.069272174116546, "grad_norm": 0.8138917684555054, "learning_rate": 2.016308406475628e-05, "loss": 0.2529, "step": 12970 }, { "epoch": 6.073952726421718, "grad_norm": 0.5576090216636658, "learning_rate": 2.0155636331682072e-05, "loss": 0.2403, "step": 12980 }, { "epoch": 6.0786332787268895, "grad_norm": 0.6453830599784851, "learning_rate": 2.014818444206e-05, "loss": 0.2519, "step": 12990 }, { "epoch": 6.083313831032061, "grad_norm": 0.5377636551856995, "learning_rate": 2.0140728400725502e-05, "loss": 0.2469, "step": 13000 }, { "epoch": 6.087994383337234, "grad_norm": 1.1703904867172241, "learning_rate": 2.0133268212516733e-05, "loss": 0.2337, "step": 13010 }, { "epoch": 6.092674935642406, "grad_norm": 0.5678302645683289, "learning_rate": 2.012580388227452e-05, "loss": 0.2395, "step": 13020 }, { "epoch": 6.097355487947578, "grad_norm": 0.604201078414917, "learning_rate": 2.011833541484239e-05, "loss": 0.242, "step": 13030 }, { "epoch": 6.10203604025275, "grad_norm": 0.6231510639190674, "learning_rate": 2.0110862815066547e-05, "loss": 0.2659, "step": 13040 }, { "epoch": 6.106716592557921, "grad_norm": 0.7381459474563599, "learning_rate": 2.010338608779589e-05, "loss": 0.2462, "step": 13050 }, { "epoch": 6.111397144863094, "grad_norm": 0.656815230846405, "learning_rate": 2.0095905237881972e-05, "loss": 0.2447, "step": 13060 }, { "epoch": 6.116077697168266, "grad_norm": 0.5565825700759888, "learning_rate": 2.0088420270179038e-05, "loss": 0.2515, "step": 13070 }, { "epoch": 6.120758249473438, "grad_norm": 0.5770668387413025, "learning_rate": 2.0080931189544013e-05, "loss": 0.2459, "step": 13080 }, { "epoch": 6.12543880177861, "grad_norm": 0.5641608834266663, "learning_rate": 2.007343800083647e-05, "loss": 0.2623, "step": 13090 }, { "epoch": 6.1301193540837815, "grad_norm": 1.056819200515747, "learning_rate": 2.0065940708918662e-05, "loss": 0.2436, "step": 13100 }, { "epoch": 6.134799906388954, "grad_norm": 1.126328945159912, "learning_rate": 2.0058439318655497e-05, "loss": 0.2685, "step": 13110 }, { "epoch": 6.139480458694126, "grad_norm": 1.082535743713379, "learning_rate": 2.0050933834914554e-05, "loss": 0.2545, "step": 13120 }, { "epoch": 6.144161010999298, "grad_norm": 0.8558759093284607, "learning_rate": 2.0043424262566042e-05, "loss": 0.2455, "step": 13130 }, { "epoch": 6.14884156330447, "grad_norm": 0.6075338125228882, "learning_rate": 2.0035910606482864e-05, "loss": 0.2489, "step": 13140 }, { "epoch": 6.153522115609642, "grad_norm": 0.71807461977005, "learning_rate": 2.0028392871540532e-05, "loss": 0.2596, "step": 13150 }, { "epoch": 6.158202667914814, "grad_norm": 0.7039890289306641, "learning_rate": 2.0020871062617233e-05, "loss": 0.2495, "step": 13160 }, { "epoch": 6.162883220219986, "grad_norm": 0.7137534022331238, "learning_rate": 2.0013345184593782e-05, "loss": 0.2459, "step": 13170 }, { "epoch": 6.167563772525158, "grad_norm": 0.7055923342704773, "learning_rate": 2.000581524235364e-05, "loss": 0.2481, "step": 13180 }, { "epoch": 6.17224432483033, "grad_norm": 0.49837395548820496, "learning_rate": 1.9998281240782908e-05, "loss": 0.2496, "step": 13190 }, { "epoch": 6.176924877135502, "grad_norm": 1.1499333381652832, "learning_rate": 1.9990743184770313e-05, "loss": 0.2509, "step": 13200 }, { "epoch": 6.181605429440674, "grad_norm": 0.6603314876556396, "learning_rate": 1.998320107920722e-05, "loss": 0.2573, "step": 13210 }, { "epoch": 6.186285981745846, "grad_norm": 0.5288977026939392, "learning_rate": 1.9975654928987623e-05, "loss": 0.2392, "step": 13220 }, { "epoch": 6.190966534051018, "grad_norm": 0.810886561870575, "learning_rate": 1.9968104739008132e-05, "loss": 0.2318, "step": 13230 }, { "epoch": 6.19564708635619, "grad_norm": 0.9398677349090576, "learning_rate": 1.9960550514167983e-05, "loss": 0.2577, "step": 13240 }, { "epoch": 6.200327638661362, "grad_norm": 0.5204474329948425, "learning_rate": 1.9952992259369036e-05, "loss": 0.2454, "step": 13250 }, { "epoch": 6.205008190966534, "grad_norm": 1.025014042854309, "learning_rate": 1.994542997951575e-05, "loss": 0.2493, "step": 13260 }, { "epoch": 6.209688743271706, "grad_norm": 0.6104615330696106, "learning_rate": 1.9937863679515217e-05, "loss": 0.246, "step": 13270 }, { "epoch": 6.214369295576878, "grad_norm": 0.7140375971794128, "learning_rate": 1.9930293364277126e-05, "loss": 0.2511, "step": 13280 }, { "epoch": 6.21904984788205, "grad_norm": 0.494251549243927, "learning_rate": 1.9922719038713766e-05, "loss": 0.237, "step": 13290 }, { "epoch": 6.223730400187222, "grad_norm": 0.8539058566093445, "learning_rate": 1.991514070774004e-05, "loss": 0.2429, "step": 13300 }, { "epoch": 6.2284109524923945, "grad_norm": 0.7666001319885254, "learning_rate": 1.990755837627344e-05, "loss": 0.2557, "step": 13310 }, { "epoch": 6.233091504797566, "grad_norm": 0.6557801961898804, "learning_rate": 1.989997204923406e-05, "loss": 0.2543, "step": 13320 }, { "epoch": 6.237772057102738, "grad_norm": 0.6306620240211487, "learning_rate": 1.9892381731544593e-05, "loss": 0.2693, "step": 13330 }, { "epoch": 6.24245260940791, "grad_norm": 0.6589447855949402, "learning_rate": 1.9884787428130304e-05, "loss": 0.2576, "step": 13340 }, { "epoch": 6.247133161713082, "grad_norm": 1.2366838455200195, "learning_rate": 1.9877189143919056e-05, "loss": 0.2587, "step": 13350 }, { "epoch": 6.251813714018255, "grad_norm": 0.6010467410087585, "learning_rate": 1.9869586883841295e-05, "loss": 0.2615, "step": 13360 }, { "epoch": 6.256494266323426, "grad_norm": 0.624974250793457, "learning_rate": 1.9861980652830043e-05, "loss": 0.2487, "step": 13370 }, { "epoch": 6.261174818628598, "grad_norm": 0.6886376738548279, "learning_rate": 1.9854370455820906e-05, "loss": 0.2317, "step": 13380 }, { "epoch": 6.26585537093377, "grad_norm": 0.7037292122840881, "learning_rate": 1.984675629775205e-05, "loss": 0.2551, "step": 13390 }, { "epoch": 6.270535923238942, "grad_norm": 0.6688735485076904, "learning_rate": 1.983913818356423e-05, "loss": 0.2445, "step": 13400 }, { "epoch": 6.275216475544115, "grad_norm": 0.5642569661140442, "learning_rate": 1.983151611820075e-05, "loss": 0.253, "step": 13410 }, { "epoch": 6.2798970278492865, "grad_norm": 0.5614574551582336, "learning_rate": 1.9823890106607483e-05, "loss": 0.251, "step": 13420 }, { "epoch": 6.284577580154458, "grad_norm": 0.7556859850883484, "learning_rate": 1.981626015373287e-05, "loss": 0.2562, "step": 13430 }, { "epoch": 6.28925813245963, "grad_norm": 0.561943769454956, "learning_rate": 1.9808626264527908e-05, "loss": 0.2344, "step": 13440 }, { "epoch": 6.293938684764802, "grad_norm": 0.7138586044311523, "learning_rate": 1.9800988443946135e-05, "loss": 0.2501, "step": 13450 }, { "epoch": 6.298619237069974, "grad_norm": 0.7116035223007202, "learning_rate": 1.9793346696943657e-05, "loss": 0.2481, "step": 13460 }, { "epoch": 6.303299789375147, "grad_norm": 0.4667774736881256, "learning_rate": 1.978570102847912e-05, "loss": 0.2497, "step": 13470 }, { "epoch": 6.307980341680318, "grad_norm": 0.6189751029014587, "learning_rate": 1.977805144351371e-05, "loss": 0.2426, "step": 13480 }, { "epoch": 6.31266089398549, "grad_norm": 0.5911774635314941, "learning_rate": 1.9770397947011168e-05, "loss": 0.2416, "step": 13490 }, { "epoch": 6.317341446290662, "grad_norm": 0.7979702949523926, "learning_rate": 1.9762740543937764e-05, "loss": 0.2404, "step": 13500 }, { "epoch": 6.322021998595834, "grad_norm": 0.8758843541145325, "learning_rate": 1.9755079239262297e-05, "loss": 0.2537, "step": 13510 }, { "epoch": 6.326702550901007, "grad_norm": 0.5540998578071594, "learning_rate": 1.974741403795611e-05, "loss": 0.2512, "step": 13520 }, { "epoch": 6.3313831032061785, "grad_norm": 0.5090383887290955, "learning_rate": 1.973974494499306e-05, "loss": 0.2302, "step": 13530 }, { "epoch": 6.33606365551135, "grad_norm": 0.5635587573051453, "learning_rate": 1.9732071965349553e-05, "loss": 0.2422, "step": 13540 }, { "epoch": 6.340744207816522, "grad_norm": 0.9124117493629456, "learning_rate": 1.9724395104004485e-05, "loss": 0.2454, "step": 13550 }, { "epoch": 6.345424760121694, "grad_norm": 0.6194251775741577, "learning_rate": 1.9716714365939306e-05, "loss": 0.2656, "step": 13560 }, { "epoch": 6.350105312426867, "grad_norm": 1.1091364622116089, "learning_rate": 1.970902975613795e-05, "loss": 0.2438, "step": 13570 }, { "epoch": 6.3547858647320385, "grad_norm": 0.7087998390197754, "learning_rate": 1.9701341279586886e-05, "loss": 0.2515, "step": 13580 }, { "epoch": 6.35946641703721, "grad_norm": 0.4907483458518982, "learning_rate": 1.969364894127507e-05, "loss": 0.2525, "step": 13590 }, { "epoch": 6.364146969342382, "grad_norm": 0.8841670155525208, "learning_rate": 1.9685952746193996e-05, "loss": 0.2563, "step": 13600 }, { "epoch": 6.368827521647554, "grad_norm": 0.5128297805786133, "learning_rate": 1.9678252699337625e-05, "loss": 0.2464, "step": 13610 }, { "epoch": 6.373508073952727, "grad_norm": 0.5718302726745605, "learning_rate": 1.9670548805702438e-05, "loss": 0.2393, "step": 13620 }, { "epoch": 6.378188626257899, "grad_norm": 0.5370417237281799, "learning_rate": 1.9662841070287414e-05, "loss": 0.2546, "step": 13630 }, { "epoch": 6.3828691785630705, "grad_norm": 1.1714404821395874, "learning_rate": 1.965512949809401e-05, "loss": 0.2542, "step": 13640 }, { "epoch": 6.387549730868242, "grad_norm": 0.5633280277252197, "learning_rate": 1.9647414094126183e-05, "loss": 0.2518, "step": 13650 }, { "epoch": 6.392230283173414, "grad_norm": 0.7702293395996094, "learning_rate": 1.9639694863390385e-05, "loss": 0.2489, "step": 13660 }, { "epoch": 6.396910835478587, "grad_norm": 0.6361388564109802, "learning_rate": 1.9631971810895532e-05, "loss": 0.2534, "step": 13670 }, { "epoch": 6.401591387783759, "grad_norm": 0.619991660118103, "learning_rate": 1.9624244941653027e-05, "loss": 0.2599, "step": 13680 }, { "epoch": 6.4062719400889305, "grad_norm": 0.8226330876350403, "learning_rate": 1.9616514260676757e-05, "loss": 0.2441, "step": 13690 }, { "epoch": 6.410952492394102, "grad_norm": 0.6800082921981812, "learning_rate": 1.9608779772983078e-05, "loss": 0.2449, "step": 13700 }, { "epoch": 6.415633044699274, "grad_norm": 1.205610752105713, "learning_rate": 1.960104148359081e-05, "loss": 0.2366, "step": 13710 }, { "epoch": 6.420313597004447, "grad_norm": 0.6899071931838989, "learning_rate": 1.9593299397521244e-05, "loss": 0.2325, "step": 13720 }, { "epoch": 6.424994149309619, "grad_norm": 0.49789389967918396, "learning_rate": 1.9585553519798142e-05, "loss": 0.2509, "step": 13730 }, { "epoch": 6.429674701614791, "grad_norm": 0.9911171197891235, "learning_rate": 1.9577803855447717e-05, "loss": 0.2464, "step": 13740 }, { "epoch": 6.434355253919962, "grad_norm": 0.5230299234390259, "learning_rate": 1.9570050409498638e-05, "loss": 0.2423, "step": 13750 }, { "epoch": 6.439035806225134, "grad_norm": 0.6336504817008972, "learning_rate": 1.956229318698204e-05, "loss": 0.2414, "step": 13760 }, { "epoch": 6.443716358530306, "grad_norm": 1.4676645994186401, "learning_rate": 1.9554532192931494e-05, "loss": 0.2538, "step": 13770 }, { "epoch": 6.448396910835479, "grad_norm": 0.6005668640136719, "learning_rate": 1.954676743238303e-05, "loss": 0.2457, "step": 13780 }, { "epoch": 6.453077463140651, "grad_norm": 0.6972920298576355, "learning_rate": 1.9538998910375113e-05, "loss": 0.2392, "step": 13790 }, { "epoch": 6.4577580154458225, "grad_norm": 0.5351078510284424, "learning_rate": 1.9531226631948647e-05, "loss": 0.2436, "step": 13800 }, { "epoch": 6.462438567750994, "grad_norm": 0.8339813351631165, "learning_rate": 1.952345060214699e-05, "loss": 0.2616, "step": 13810 }, { "epoch": 6.467119120056166, "grad_norm": 0.8698364496231079, "learning_rate": 1.951567082601592e-05, "loss": 0.2596, "step": 13820 }, { "epoch": 6.471799672361339, "grad_norm": 0.9300540089607239, "learning_rate": 1.950788730860365e-05, "loss": 0.2469, "step": 13830 }, { "epoch": 6.476480224666511, "grad_norm": 0.5276472568511963, "learning_rate": 1.9500100054960813e-05, "loss": 0.2506, "step": 13840 }, { "epoch": 6.481160776971683, "grad_norm": 0.9485363364219666, "learning_rate": 1.949230907014048e-05, "loss": 0.2417, "step": 13850 }, { "epoch": 6.485841329276854, "grad_norm": 0.572791337966919, "learning_rate": 1.9484514359198133e-05, "loss": 0.2486, "step": 13860 }, { "epoch": 6.490521881582026, "grad_norm": 0.5722269415855408, "learning_rate": 1.9476715927191677e-05, "loss": 0.2614, "step": 13870 }, { "epoch": 6.495202433887199, "grad_norm": 0.8157618045806885, "learning_rate": 1.9468913779181432e-05, "loss": 0.2575, "step": 13880 }, { "epoch": 6.499882986192371, "grad_norm": 0.5308186411857605, "learning_rate": 1.9461107920230128e-05, "loss": 0.2314, "step": 13890 }, { "epoch": 6.504563538497543, "grad_norm": 0.8965286016464233, "learning_rate": 1.9453298355402895e-05, "loss": 0.2448, "step": 13900 }, { "epoch": 6.5092440908027145, "grad_norm": 0.514733612537384, "learning_rate": 1.944548508976728e-05, "loss": 0.2414, "step": 13910 }, { "epoch": 6.513924643107886, "grad_norm": 0.7687851786613464, "learning_rate": 1.9437668128393225e-05, "loss": 0.2443, "step": 13920 }, { "epoch": 6.518605195413059, "grad_norm": 0.5519185662269592, "learning_rate": 1.9429847476353068e-05, "loss": 0.2512, "step": 13930 }, { "epoch": 6.523285747718231, "grad_norm": 0.6093600392341614, "learning_rate": 1.9422023138721555e-05, "loss": 0.25, "step": 13940 }, { "epoch": 6.527966300023403, "grad_norm": 0.7438730001449585, "learning_rate": 1.9414195120575805e-05, "loss": 0.2441, "step": 13950 }, { "epoch": 6.532646852328575, "grad_norm": 0.5960105061531067, "learning_rate": 1.9406363426995336e-05, "loss": 0.2492, "step": 13960 }, { "epoch": 6.537327404633746, "grad_norm": 0.8657155632972717, "learning_rate": 1.939852806306205e-05, "loss": 0.2445, "step": 13970 }, { "epoch": 6.542007956938919, "grad_norm": 0.8149390816688538, "learning_rate": 1.9390689033860227e-05, "loss": 0.2349, "step": 13980 }, { "epoch": 6.546688509244091, "grad_norm": 0.5836580395698547, "learning_rate": 1.9382846344476532e-05, "loss": 0.247, "step": 13990 }, { "epoch": 6.551369061549263, "grad_norm": 0.6965262293815613, "learning_rate": 1.9375e-05, "loss": 0.2398, "step": 14000 }, { "epoch": 6.556049613854435, "grad_norm": 0.7260543704032898, "learning_rate": 1.9367150005522035e-05, "loss": 0.2368, "step": 14010 }, { "epoch": 6.5607301661596065, "grad_norm": 0.5752415657043457, "learning_rate": 1.9359296366136424e-05, "loss": 0.253, "step": 14020 }, { "epoch": 6.565410718464779, "grad_norm": 0.5372495055198669, "learning_rate": 1.935143908693929e-05, "loss": 0.2461, "step": 14030 }, { "epoch": 6.570091270769951, "grad_norm": 0.9101308584213257, "learning_rate": 1.934357817302915e-05, "loss": 0.2469, "step": 14040 }, { "epoch": 6.574771823075123, "grad_norm": 0.520999550819397, "learning_rate": 1.9335713629506867e-05, "loss": 0.2443, "step": 14050 }, { "epoch": 6.579452375380295, "grad_norm": 0.4562556743621826, "learning_rate": 1.9327845461475647e-05, "loss": 0.2421, "step": 14060 }, { "epoch": 6.5841329276854665, "grad_norm": 0.7562935948371887, "learning_rate": 1.9319973674041068e-05, "loss": 0.2573, "step": 14070 }, { "epoch": 6.588813479990639, "grad_norm": 1.0723003149032593, "learning_rate": 1.9312098272311042e-05, "loss": 0.2479, "step": 14080 }, { "epoch": 6.593494032295811, "grad_norm": 0.7346293926239014, "learning_rate": 1.930421926139583e-05, "loss": 0.2446, "step": 14090 }, { "epoch": 6.598174584600983, "grad_norm": 0.799443781375885, "learning_rate": 1.929633664640805e-05, "loss": 0.2448, "step": 14100 }, { "epoch": 6.602855136906155, "grad_norm": 0.8048286437988281, "learning_rate": 1.928845043246263e-05, "loss": 0.248, "step": 14110 }, { "epoch": 6.607535689211327, "grad_norm": 0.5633934736251831, "learning_rate": 1.9280560624676855e-05, "loss": 0.2673, "step": 14120 }, { "epoch": 6.612216241516499, "grad_norm": 1.021183729171753, "learning_rate": 1.9272667228170336e-05, "loss": 0.2448, "step": 14130 }, { "epoch": 6.616896793821671, "grad_norm": 0.6097493171691895, "learning_rate": 1.926477024806501e-05, "loss": 0.2388, "step": 14140 }, { "epoch": 6.621577346126843, "grad_norm": 0.5264994502067566, "learning_rate": 1.9256869689485145e-05, "loss": 0.2427, "step": 14150 }, { "epoch": 6.626257898432015, "grad_norm": 0.6458749771118164, "learning_rate": 1.9248965557557326e-05, "loss": 0.248, "step": 14160 }, { "epoch": 6.630938450737187, "grad_norm": 0.5264807939529419, "learning_rate": 1.9241057857410456e-05, "loss": 0.2448, "step": 14170 }, { "epoch": 6.635619003042359, "grad_norm": 0.632937490940094, "learning_rate": 1.9233146594175757e-05, "loss": 0.2504, "step": 14180 }, { "epoch": 6.640299555347531, "grad_norm": 0.7859373688697815, "learning_rate": 1.9225231772986765e-05, "loss": 0.2533, "step": 14190 }, { "epoch": 6.644980107652703, "grad_norm": 0.5849300026893616, "learning_rate": 1.9217313398979317e-05, "loss": 0.2573, "step": 14200 }, { "epoch": 6.649660659957875, "grad_norm": 0.5501264929771423, "learning_rate": 1.9209391477291565e-05, "loss": 0.2468, "step": 14210 }, { "epoch": 6.654341212263047, "grad_norm": 1.0711802244186401, "learning_rate": 1.9201466013063958e-05, "loss": 0.257, "step": 14220 }, { "epoch": 6.6590217645682195, "grad_norm": 0.5843037962913513, "learning_rate": 1.9193537011439234e-05, "loss": 0.2425, "step": 14230 }, { "epoch": 6.663702316873391, "grad_norm": 0.590034544467926, "learning_rate": 1.9185604477562447e-05, "loss": 0.2459, "step": 14240 }, { "epoch": 6.668382869178563, "grad_norm": 0.5160447955131531, "learning_rate": 1.9177668416580927e-05, "loss": 0.2436, "step": 14250 }, { "epoch": 6.673063421483735, "grad_norm": 0.6211285591125488, "learning_rate": 1.9169728833644298e-05, "loss": 0.2412, "step": 14260 }, { "epoch": 6.677743973788907, "grad_norm": 0.8035475611686707, "learning_rate": 1.9161785733904473e-05, "loss": 0.2315, "step": 14270 }, { "epoch": 6.6824245260940796, "grad_norm": 0.6578551530838013, "learning_rate": 1.9153839122515634e-05, "loss": 0.232, "step": 14280 }, { "epoch": 6.687105078399251, "grad_norm": 0.7346625328063965, "learning_rate": 1.914588900463426e-05, "loss": 0.2431, "step": 14290 }, { "epoch": 6.691785630704423, "grad_norm": 0.5772590041160583, "learning_rate": 1.9137935385419094e-05, "loss": 0.2585, "step": 14300 }, { "epoch": 6.696466183009595, "grad_norm": 0.5145176649093628, "learning_rate": 1.9129978270031154e-05, "loss": 0.2448, "step": 14310 }, { "epoch": 6.701146735314767, "grad_norm": 0.7332508563995361, "learning_rate": 1.9122017663633723e-05, "loss": 0.257, "step": 14320 }, { "epoch": 6.70582728761994, "grad_norm": 1.372212290763855, "learning_rate": 1.9114053571392355e-05, "loss": 0.2531, "step": 14330 }, { "epoch": 6.7105078399251115, "grad_norm": 0.49534645676612854, "learning_rate": 1.910608599847486e-05, "loss": 0.2325, "step": 14340 }, { "epoch": 6.715188392230283, "grad_norm": 0.7063036561012268, "learning_rate": 1.9098114950051308e-05, "loss": 0.2541, "step": 14350 }, { "epoch": 6.719868944535455, "grad_norm": 0.5166476368904114, "learning_rate": 1.9090140431294033e-05, "loss": 0.238, "step": 14360 }, { "epoch": 6.724549496840627, "grad_norm": 0.8153067231178284, "learning_rate": 1.9082162447377603e-05, "loss": 0.2605, "step": 14370 }, { "epoch": 6.7292300491458, "grad_norm": 0.751232922077179, "learning_rate": 1.9074181003478858e-05, "loss": 0.2427, "step": 14380 }, { "epoch": 6.7339106014509715, "grad_norm": 0.6696172952651978, "learning_rate": 1.906619610477686e-05, "loss": 0.2471, "step": 14390 }, { "epoch": 6.738591153756143, "grad_norm": 0.6142516732215881, "learning_rate": 1.9058207756452923e-05, "loss": 0.2439, "step": 14400 }, { "epoch": 6.743271706061315, "grad_norm": 0.635977566242218, "learning_rate": 1.905021596369061e-05, "loss": 0.249, "step": 14410 }, { "epoch": 6.747952258366487, "grad_norm": 0.5595885515213013, "learning_rate": 1.90422207316757e-05, "loss": 0.2355, "step": 14420 }, { "epoch": 6.752632810671659, "grad_norm": 0.5399144887924194, "learning_rate": 1.9034222065596212e-05, "loss": 0.2509, "step": 14430 }, { "epoch": 6.757313362976832, "grad_norm": 0.4941023290157318, "learning_rate": 1.90262199706424e-05, "loss": 0.2505, "step": 14440 }, { "epoch": 6.761993915282003, "grad_norm": 0.57335364818573, "learning_rate": 1.9018214452006727e-05, "loss": 0.2318, "step": 14450 }, { "epoch": 6.766674467587175, "grad_norm": 0.749226987361908, "learning_rate": 1.90102055148839e-05, "loss": 0.2473, "step": 14460 }, { "epoch": 6.771355019892347, "grad_norm": 0.5624325275421143, "learning_rate": 1.9002193164470813e-05, "loss": 0.2491, "step": 14470 }, { "epoch": 6.776035572197519, "grad_norm": 0.5521528720855713, "learning_rate": 1.8994177405966607e-05, "loss": 0.2427, "step": 14480 }, { "epoch": 6.780716124502692, "grad_norm": 0.49560272693634033, "learning_rate": 1.8986158244572625e-05, "loss": 0.2385, "step": 14490 }, { "epoch": 6.7853966768078635, "grad_norm": 1.143494963645935, "learning_rate": 1.8978135685492397e-05, "loss": 0.2533, "step": 14500 }, { "epoch": 6.790077229113035, "grad_norm": 0.6418824791908264, "learning_rate": 1.8970109733931688e-05, "loss": 0.2342, "step": 14510 }, { "epoch": 6.794757781418207, "grad_norm": 0.5065920948982239, "learning_rate": 1.896208039509845e-05, "loss": 0.2426, "step": 14520 }, { "epoch": 6.799438333723379, "grad_norm": 0.7141706943511963, "learning_rate": 1.895404767420282e-05, "loss": 0.2495, "step": 14530 }, { "epoch": 6.804118886028552, "grad_norm": 0.6789798736572266, "learning_rate": 1.894601157645716e-05, "loss": 0.2431, "step": 14540 }, { "epoch": 6.808799438333724, "grad_norm": 0.8050999641418457, "learning_rate": 1.8937972107076e-05, "loss": 0.2396, "step": 14550 }, { "epoch": 6.813479990638895, "grad_norm": 1.2313908338546753, "learning_rate": 1.8929929271276066e-05, "loss": 0.2471, "step": 14560 }, { "epoch": 6.818160542944067, "grad_norm": 0.6004180908203125, "learning_rate": 1.8921883074276268e-05, "loss": 0.2391, "step": 14570 }, { "epoch": 6.822841095249239, "grad_norm": 0.5695251226425171, "learning_rate": 1.8913833521297685e-05, "loss": 0.2457, "step": 14580 }, { "epoch": 6.827521647554411, "grad_norm": 0.5522708296775818, "learning_rate": 1.8905780617563597e-05, "loss": 0.2561, "step": 14590 }, { "epoch": 6.832202199859584, "grad_norm": 0.6044539213180542, "learning_rate": 1.889772436829945e-05, "loss": 0.2342, "step": 14600 }, { "epoch": 6.8368827521647555, "grad_norm": 0.7857436537742615, "learning_rate": 1.8889664778732836e-05, "loss": 0.2469, "step": 14610 }, { "epoch": 6.841563304469927, "grad_norm": 0.7694128751754761, "learning_rate": 1.8881601854093555e-05, "loss": 0.227, "step": 14620 }, { "epoch": 6.846243856775099, "grad_norm": 0.6611140370368958, "learning_rate": 1.8873535599613543e-05, "loss": 0.2493, "step": 14630 }, { "epoch": 6.850924409080271, "grad_norm": 0.4808056056499481, "learning_rate": 1.8865466020526903e-05, "loss": 0.253, "step": 14640 }, { "epoch": 6.855604961385444, "grad_norm": 0.8638703227043152, "learning_rate": 1.8857393122069903e-05, "loss": 0.2347, "step": 14650 }, { "epoch": 6.860285513690616, "grad_norm": 0.6729128360748291, "learning_rate": 1.8849316909480956e-05, "loss": 0.2414, "step": 14660 }, { "epoch": 6.864966065995787, "grad_norm": 0.4916436970233917, "learning_rate": 1.8841237388000626e-05, "loss": 0.2353, "step": 14670 }, { "epoch": 6.869646618300959, "grad_norm": 0.6481614708900452, "learning_rate": 1.8833154562871634e-05, "loss": 0.2405, "step": 14680 }, { "epoch": 6.874327170606131, "grad_norm": 0.5425630211830139, "learning_rate": 1.882506843933883e-05, "loss": 0.2441, "step": 14690 }, { "epoch": 6.879007722911304, "grad_norm": 1.009690523147583, "learning_rate": 1.8816979022649216e-05, "loss": 0.259, "step": 14700 }, { "epoch": 6.883688275216476, "grad_norm": 0.6043288707733154, "learning_rate": 1.8808886318051924e-05, "loss": 0.2438, "step": 14710 }, { "epoch": 6.8883688275216475, "grad_norm": 0.5980479121208191, "learning_rate": 1.8800790330798225e-05, "loss": 0.2297, "step": 14720 }, { "epoch": 6.893049379826819, "grad_norm": 0.7756524682044983, "learning_rate": 1.8792691066141516e-05, "loss": 0.2408, "step": 14730 }, { "epoch": 6.897729932131991, "grad_norm": 0.5070231556892395, "learning_rate": 1.878458852933732e-05, "loss": 0.2379, "step": 14740 }, { "epoch": 6.902410484437164, "grad_norm": 0.5400096774101257, "learning_rate": 1.877648272564329e-05, "loss": 0.2719, "step": 14750 }, { "epoch": 6.907091036742336, "grad_norm": 0.7874789834022522, "learning_rate": 1.8768373660319192e-05, "loss": 0.2535, "step": 14760 }, { "epoch": 6.9117715890475075, "grad_norm": 0.7198300957679749, "learning_rate": 1.8760261338626907e-05, "loss": 0.2517, "step": 14770 }, { "epoch": 6.916452141352679, "grad_norm": 0.5897890329360962, "learning_rate": 1.8752145765830432e-05, "loss": 0.2511, "step": 14780 }, { "epoch": 6.921132693657851, "grad_norm": 0.5896884202957153, "learning_rate": 1.8744026947195884e-05, "loss": 0.2363, "step": 14790 }, { "epoch": 6.925813245963024, "grad_norm": 1.5356378555297852, "learning_rate": 1.8735904887991468e-05, "loss": 0.2512, "step": 14800 }, { "epoch": 6.930493798268196, "grad_norm": 1.1086297035217285, "learning_rate": 1.87277795934875e-05, "loss": 0.2513, "step": 14810 }, { "epoch": 6.935174350573368, "grad_norm": 0.5525735020637512, "learning_rate": 1.8719651068956392e-05, "loss": 0.2406, "step": 14820 }, { "epoch": 6.9398549028785395, "grad_norm": 0.7730054259300232, "learning_rate": 1.8711519319672668e-05, "loss": 0.2503, "step": 14830 }, { "epoch": 6.944535455183711, "grad_norm": 0.49828454852104187, "learning_rate": 1.8703384350912917e-05, "loss": 0.2472, "step": 14840 }, { "epoch": 6.949216007488884, "grad_norm": 0.6297841668128967, "learning_rate": 1.8695246167955845e-05, "loss": 0.2437, "step": 14850 }, { "epoch": 6.953896559794056, "grad_norm": 0.6952335834503174, "learning_rate": 1.868710477608222e-05, "loss": 0.2599, "step": 14860 }, { "epoch": 6.958577112099228, "grad_norm": 0.5858743786811829, "learning_rate": 1.8678960180574908e-05, "loss": 0.2483, "step": 14870 }, { "epoch": 6.9632576644043995, "grad_norm": 0.8397697806358337, "learning_rate": 1.867081238671885e-05, "loss": 0.2447, "step": 14880 }, { "epoch": 6.967938216709571, "grad_norm": 0.7276701927185059, "learning_rate": 1.8662661399801055e-05, "loss": 0.2596, "step": 14890 }, { "epoch": 6.972618769014744, "grad_norm": 0.8733302354812622, "learning_rate": 1.8654507225110615e-05, "loss": 0.251, "step": 14900 }, { "epoch": 6.977299321319916, "grad_norm": 0.6175621151924133, "learning_rate": 1.8646349867938688e-05, "loss": 0.2425, "step": 14910 }, { "epoch": 6.981979873625088, "grad_norm": 0.49297502636909485, "learning_rate": 1.863818933357849e-05, "loss": 0.2446, "step": 14920 }, { "epoch": 6.98666042593026, "grad_norm": 0.5614346265792847, "learning_rate": 1.8630025627325294e-05, "loss": 0.243, "step": 14930 }, { "epoch": 6.991340978235431, "grad_norm": 0.6016019582748413, "learning_rate": 1.862185875447646e-05, "loss": 0.2317, "step": 14940 }, { "epoch": 6.996021530540604, "grad_norm": 1.0948785543441772, "learning_rate": 1.8613688720331372e-05, "loss": 0.242, "step": 14950 }, { "epoch": 7.000468055230518, "grad_norm": 0.6159272193908691, "learning_rate": 1.8605515530191472e-05, "loss": 0.2198, "step": 14960 }, { "epoch": 7.0051486075356895, "grad_norm": 0.509032130241394, "learning_rate": 1.8597339189360266e-05, "loss": 0.2271, "step": 14970 }, { "epoch": 7.009829159840861, "grad_norm": 0.7314696311950684, "learning_rate": 1.8589159703143286e-05, "loss": 0.2323, "step": 14980 }, { "epoch": 7.014509712146033, "grad_norm": 0.5788626074790955, "learning_rate": 1.8580977076848112e-05, "loss": 0.2226, "step": 14990 }, { "epoch": 7.019190264451205, "grad_norm": 0.8355691432952881, "learning_rate": 1.857279131578436e-05, "loss": 0.2347, "step": 15000 }, { "epoch": 7.023870816756377, "grad_norm": 0.5578908920288086, "learning_rate": 1.8564602425263686e-05, "loss": 0.2156, "step": 15010 }, { "epoch": 7.0285513690615495, "grad_norm": 1.1534175872802734, "learning_rate": 1.8556410410599768e-05, "loss": 0.2231, "step": 15020 }, { "epoch": 7.033231921366721, "grad_norm": 0.5614079833030701, "learning_rate": 1.854821527710832e-05, "loss": 0.2314, "step": 15030 }, { "epoch": 7.037912473671893, "grad_norm": 1.007794737815857, "learning_rate": 1.854001703010707e-05, "loss": 0.2243, "step": 15040 }, { "epoch": 7.042593025977065, "grad_norm": 0.6497473120689392, "learning_rate": 1.8531815674915773e-05, "loss": 0.227, "step": 15050 }, { "epoch": 7.047273578282237, "grad_norm": 0.5593804717063904, "learning_rate": 1.85236112168562e-05, "loss": 0.2292, "step": 15060 }, { "epoch": 7.05195413058741, "grad_norm": 0.515387773513794, "learning_rate": 1.8515403661252137e-05, "loss": 0.2221, "step": 15070 }, { "epoch": 7.0566346828925814, "grad_norm": 0.5055288076400757, "learning_rate": 1.850719301342937e-05, "loss": 0.2117, "step": 15080 }, { "epoch": 7.061315235197753, "grad_norm": 0.5345869064331055, "learning_rate": 1.849897927871571e-05, "loss": 0.2195, "step": 15090 }, { "epoch": 7.065995787502925, "grad_norm": 0.46943965554237366, "learning_rate": 1.8490762462440957e-05, "loss": 0.2251, "step": 15100 }, { "epoch": 7.070676339808097, "grad_norm": 0.5683743357658386, "learning_rate": 1.8482542569936906e-05, "loss": 0.22, "step": 15110 }, { "epoch": 7.07535689211327, "grad_norm": 0.5806471109390259, "learning_rate": 1.847431960653736e-05, "loss": 0.2079, "step": 15120 }, { "epoch": 7.0800374444184415, "grad_norm": 0.5650232434272766, "learning_rate": 1.846609357757812e-05, "loss": 0.2332, "step": 15130 }, { "epoch": 7.084717996723613, "grad_norm": 0.5602533221244812, "learning_rate": 1.8457864488396958e-05, "loss": 0.2323, "step": 15140 }, { "epoch": 7.089398549028785, "grad_norm": 0.6571400165557861, "learning_rate": 1.8449632344333636e-05, "loss": 0.2194, "step": 15150 }, { "epoch": 7.094079101333957, "grad_norm": 0.5369571447372437, "learning_rate": 1.8441397150729914e-05, "loss": 0.2176, "step": 15160 }, { "epoch": 7.09875965363913, "grad_norm": 0.7291166186332703, "learning_rate": 1.8433158912929515e-05, "loss": 0.2311, "step": 15170 }, { "epoch": 7.103440205944302, "grad_norm": 0.727276623249054, "learning_rate": 1.842491763627814e-05, "loss": 0.2105, "step": 15180 }, { "epoch": 7.108120758249473, "grad_norm": 0.5937271118164062, "learning_rate": 1.841667332612347e-05, "loss": 0.2254, "step": 15190 }, { "epoch": 7.112801310554645, "grad_norm": 0.7369917035102844, "learning_rate": 1.8408425987815136e-05, "loss": 0.2307, "step": 15200 }, { "epoch": 7.117481862859817, "grad_norm": 0.8618708252906799, "learning_rate": 1.8400175626704762e-05, "loss": 0.217, "step": 15210 }, { "epoch": 7.12216241516499, "grad_norm": 0.6192373633384705, "learning_rate": 1.8391922248145904e-05, "loss": 0.2495, "step": 15220 }, { "epoch": 7.126842967470162, "grad_norm": 0.7342912554740906, "learning_rate": 1.8383665857494095e-05, "loss": 0.2154, "step": 15230 }, { "epoch": 7.1315235197753335, "grad_norm": 0.7225720286369324, "learning_rate": 1.8375406460106823e-05, "loss": 0.2389, "step": 15240 }, { "epoch": 7.136204072080505, "grad_norm": 0.7131741642951965, "learning_rate": 1.8367144061343518e-05, "loss": 0.2051, "step": 15250 }, { "epoch": 7.140884624385677, "grad_norm": 0.6468222141265869, "learning_rate": 1.8358878666565552e-05, "loss": 0.2201, "step": 15260 }, { "epoch": 7.14556517669085, "grad_norm": 0.854997992515564, "learning_rate": 1.8350610281136263e-05, "loss": 0.221, "step": 15270 }, { "epoch": 7.150245728996022, "grad_norm": 1.0825238227844238, "learning_rate": 1.8342338910420908e-05, "loss": 0.2176, "step": 15280 }, { "epoch": 7.154926281301194, "grad_norm": 0.5493514537811279, "learning_rate": 1.8334064559786693e-05, "loss": 0.2164, "step": 15290 }, { "epoch": 7.159606833606365, "grad_norm": 0.5384340286254883, "learning_rate": 1.8325787234602754e-05, "loss": 0.2307, "step": 15300 }, { "epoch": 7.164287385911537, "grad_norm": 0.5602570176124573, "learning_rate": 1.8317506940240156e-05, "loss": 0.2096, "step": 15310 }, { "epoch": 7.16896793821671, "grad_norm": 0.7810906171798706, "learning_rate": 1.8309223682071895e-05, "loss": 0.2267, "step": 15320 }, { "epoch": 7.173648490521882, "grad_norm": 0.5323313474655151, "learning_rate": 1.8300937465472883e-05, "loss": 0.211, "step": 15330 }, { "epoch": 7.178329042827054, "grad_norm": 0.6011442542076111, "learning_rate": 1.8292648295819958e-05, "loss": 0.2275, "step": 15340 }, { "epoch": 7.1830095951322255, "grad_norm": 0.7840837836265564, "learning_rate": 1.828435617849188e-05, "loss": 0.2152, "step": 15350 }, { "epoch": 7.187690147437397, "grad_norm": 0.5787049531936646, "learning_rate": 1.8276061118869296e-05, "loss": 0.2194, "step": 15360 }, { "epoch": 7.19237069974257, "grad_norm": 1.6009138822555542, "learning_rate": 1.8267763122334797e-05, "loss": 0.2319, "step": 15370 }, { "epoch": 7.197051252047742, "grad_norm": 0.4903049170970917, "learning_rate": 1.8259462194272857e-05, "loss": 0.2142, "step": 15380 }, { "epoch": 7.201731804352914, "grad_norm": 0.5951883792877197, "learning_rate": 1.8251158340069855e-05, "loss": 0.2233, "step": 15390 }, { "epoch": 7.2064123566580855, "grad_norm": 0.8645315170288086, "learning_rate": 1.8242851565114074e-05, "loss": 0.2086, "step": 15400 }, { "epoch": 7.211092908963257, "grad_norm": 0.5232827067375183, "learning_rate": 1.8234541874795697e-05, "loss": 0.2281, "step": 15410 }, { "epoch": 7.21577346126843, "grad_norm": 1.0756291151046753, "learning_rate": 1.8226229274506777e-05, "loss": 0.2318, "step": 15420 }, { "epoch": 7.220454013573602, "grad_norm": 0.6031705737113953, "learning_rate": 1.821791376964128e-05, "loss": 0.2265, "step": 15430 }, { "epoch": 7.225134565878774, "grad_norm": 0.5812997817993164, "learning_rate": 1.8209595365595052e-05, "loss": 0.2101, "step": 15440 }, { "epoch": 7.229815118183946, "grad_norm": 0.5769907832145691, "learning_rate": 1.82012740677658e-05, "loss": 0.2196, "step": 15450 }, { "epoch": 7.2344956704891175, "grad_norm": 0.670404851436615, "learning_rate": 1.819294988155314e-05, "loss": 0.2232, "step": 15460 }, { "epoch": 7.23917622279429, "grad_norm": 0.48492878675460815, "learning_rate": 1.8184622812358534e-05, "loss": 0.2228, "step": 15470 }, { "epoch": 7.243856775099462, "grad_norm": 0.7273831963539124, "learning_rate": 1.817629286558533e-05, "loss": 0.2137, "step": 15480 }, { "epoch": 7.248537327404634, "grad_norm": 0.5730300545692444, "learning_rate": 1.8167960046638742e-05, "loss": 0.2183, "step": 15490 }, { "epoch": 7.253217879709806, "grad_norm": 1.0127036571502686, "learning_rate": 1.8159624360925845e-05, "loss": 0.2272, "step": 15500 }, { "epoch": 7.2578984320149775, "grad_norm": 0.7885344624519348, "learning_rate": 1.8151285813855574e-05, "loss": 0.2191, "step": 15510 }, { "epoch": 7.262578984320149, "grad_norm": 0.8864800333976746, "learning_rate": 1.8142944410838727e-05, "loss": 0.2231, "step": 15520 }, { "epoch": 7.267259536625322, "grad_norm": 0.6033172011375427, "learning_rate": 1.813460015728794e-05, "loss": 0.235, "step": 15530 }, { "epoch": 7.271940088930494, "grad_norm": 0.6103598475456238, "learning_rate": 1.8126253058617713e-05, "loss": 0.2186, "step": 15540 }, { "epoch": 7.276620641235666, "grad_norm": 0.8805299997329712, "learning_rate": 1.8117903120244394e-05, "loss": 0.2212, "step": 15550 }, { "epoch": 7.281301193540838, "grad_norm": 0.4972810447216034, "learning_rate": 1.810955034758616e-05, "loss": 0.2155, "step": 15560 }, { "epoch": 7.285981745846009, "grad_norm": 0.6420400738716125, "learning_rate": 1.8101194746063034e-05, "loss": 0.2076, "step": 15570 }, { "epoch": 7.290662298151182, "grad_norm": 0.8634189963340759, "learning_rate": 1.809283632109688e-05, "loss": 0.2056, "step": 15580 }, { "epoch": 7.295342850456354, "grad_norm": 0.5508739948272705, "learning_rate": 1.8084475078111387e-05, "loss": 0.2091, "step": 15590 }, { "epoch": 7.300023402761526, "grad_norm": 0.9245696663856506, "learning_rate": 1.8076111022532074e-05, "loss": 0.2152, "step": 15600 }, { "epoch": 7.304703955066698, "grad_norm": 0.5284427404403687, "learning_rate": 1.8067744159786284e-05, "loss": 0.2251, "step": 15610 }, { "epoch": 7.3093845073718695, "grad_norm": 0.6066507697105408, "learning_rate": 1.8059374495303184e-05, "loss": 0.2105, "step": 15620 }, { "epoch": 7.314065059677042, "grad_norm": 0.6392506957054138, "learning_rate": 1.805100203451377e-05, "loss": 0.2233, "step": 15630 }, { "epoch": 7.318745611982214, "grad_norm": 0.6169950366020203, "learning_rate": 1.804262678285082e-05, "loss": 0.2087, "step": 15640 }, { "epoch": 7.323426164287386, "grad_norm": 0.743990421295166, "learning_rate": 1.8034248745748964e-05, "loss": 0.2209, "step": 15650 }, { "epoch": 7.328106716592558, "grad_norm": 0.5910294651985168, "learning_rate": 1.8025867928644605e-05, "loss": 0.2297, "step": 15660 }, { "epoch": 7.33278726889773, "grad_norm": 0.7435188889503479, "learning_rate": 1.801748433697597e-05, "loss": 0.2247, "step": 15670 }, { "epoch": 7.337467821202902, "grad_norm": 0.5511724948883057, "learning_rate": 1.800909797618308e-05, "loss": 0.2167, "step": 15680 }, { "epoch": 7.342148373508074, "grad_norm": 0.6171037554740906, "learning_rate": 1.8000708851707756e-05, "loss": 0.2283, "step": 15690 }, { "epoch": 7.346828925813246, "grad_norm": 0.6177123188972473, "learning_rate": 1.7992316968993607e-05, "loss": 0.2204, "step": 15700 }, { "epoch": 7.351509478118418, "grad_norm": 0.5243306159973145, "learning_rate": 1.7983922333486035e-05, "loss": 0.2136, "step": 15710 }, { "epoch": 7.35619003042359, "grad_norm": 1.0150810480117798, "learning_rate": 1.7975524950632226e-05, "loss": 0.2053, "step": 15720 }, { "epoch": 7.360870582728762, "grad_norm": 0.7060448527336121, "learning_rate": 1.796712482588115e-05, "loss": 0.2153, "step": 15730 }, { "epoch": 7.365551135033934, "grad_norm": 0.5698014497756958, "learning_rate": 1.7958721964683566e-05, "loss": 0.2202, "step": 15740 }, { "epoch": 7.370231687339106, "grad_norm": 0.5396445989608765, "learning_rate": 1.795031637249199e-05, "loss": 0.2223, "step": 15750 }, { "epoch": 7.374912239644278, "grad_norm": 0.6524749994277954, "learning_rate": 1.7941908054760718e-05, "loss": 0.2243, "step": 15760 }, { "epoch": 7.37959279194945, "grad_norm": 0.6443986892700195, "learning_rate": 1.7933497016945825e-05, "loss": 0.2096, "step": 15770 }, { "epoch": 7.3842733442546225, "grad_norm": 0.93411785364151, "learning_rate": 1.7925083264505132e-05, "loss": 0.2225, "step": 15780 }, { "epoch": 7.388953896559794, "grad_norm": 0.6665774583816528, "learning_rate": 1.7916666802898242e-05, "loss": 0.2224, "step": 15790 }, { "epoch": 7.393634448864966, "grad_norm": 0.591867208480835, "learning_rate": 1.79082476375865e-05, "loss": 0.2166, "step": 15800 }, { "epoch": 7.398315001170138, "grad_norm": 0.8861029148101807, "learning_rate": 1.7899825774033008e-05, "loss": 0.2262, "step": 15810 }, { "epoch": 7.40299555347531, "grad_norm": 0.7086699604988098, "learning_rate": 1.789140121770263e-05, "loss": 0.2241, "step": 15820 }, { "epoch": 7.407676105780482, "grad_norm": 0.8372015357017517, "learning_rate": 1.7882973974061957e-05, "loss": 0.234, "step": 15830 }, { "epoch": 7.412356658085654, "grad_norm": 0.7444201707839966, "learning_rate": 1.787454404857935e-05, "loss": 0.2054, "step": 15840 }, { "epoch": 7.417037210390826, "grad_norm": 0.7671411037445068, "learning_rate": 1.786611144672489e-05, "loss": 0.2267, "step": 15850 }, { "epoch": 7.421717762695998, "grad_norm": 0.5359457731246948, "learning_rate": 1.7857676173970394e-05, "loss": 0.2417, "step": 15860 }, { "epoch": 7.42639831500117, "grad_norm": 0.7914373874664307, "learning_rate": 1.784923823578943e-05, "loss": 0.2119, "step": 15870 }, { "epoch": 7.431078867306342, "grad_norm": 0.570206880569458, "learning_rate": 1.784079763765728e-05, "loss": 0.209, "step": 15880 }, { "epoch": 7.435759419611514, "grad_norm": 0.7103692293167114, "learning_rate": 1.7832354385050952e-05, "loss": 0.2141, "step": 15890 }, { "epoch": 7.440439971916686, "grad_norm": 0.6371856927871704, "learning_rate": 1.782390848344919e-05, "loss": 0.2331, "step": 15900 }, { "epoch": 7.445120524221858, "grad_norm": 0.6715120077133179, "learning_rate": 1.781545993833244e-05, "loss": 0.2324, "step": 15910 }, { "epoch": 7.44980107652703, "grad_norm": 0.5529088377952576, "learning_rate": 1.7807008755182876e-05, "loss": 0.2208, "step": 15920 }, { "epoch": 7.454481628832202, "grad_norm": 0.8380878567695618, "learning_rate": 1.7798554939484374e-05, "loss": 0.2178, "step": 15930 }, { "epoch": 7.4591621811373745, "grad_norm": 0.6186197996139526, "learning_rate": 1.7790098496722525e-05, "loss": 0.2367, "step": 15940 }, { "epoch": 7.463842733442546, "grad_norm": 0.5096815824508667, "learning_rate": 1.7781639432384627e-05, "loss": 0.2247, "step": 15950 }, { "epoch": 7.468523285747718, "grad_norm": 0.6687304973602295, "learning_rate": 1.7773177751959674e-05, "loss": 0.2142, "step": 15960 }, { "epoch": 7.47320383805289, "grad_norm": 0.6912170648574829, "learning_rate": 1.7764713460938357e-05, "loss": 0.2149, "step": 15970 }, { "epoch": 7.477884390358062, "grad_norm": 1.0457619428634644, "learning_rate": 1.775624656481306e-05, "loss": 0.2343, "step": 15980 }, { "epoch": 7.482564942663235, "grad_norm": 0.6876556873321533, "learning_rate": 1.774777706907786e-05, "loss": 0.2152, "step": 15990 }, { "epoch": 7.487245494968406, "grad_norm": 0.6016440391540527, "learning_rate": 1.7739304979228528e-05, "loss": 0.2119, "step": 16000 }, { "epoch": 7.491926047273578, "grad_norm": 0.6822038292884827, "learning_rate": 1.7730830300762502e-05, "loss": 0.2126, "step": 16010 }, { "epoch": 7.49660659957875, "grad_norm": 0.7714473605155945, "learning_rate": 1.772235303917892e-05, "loss": 0.2179, "step": 16020 }, { "epoch": 7.501287151883922, "grad_norm": 0.601470410823822, "learning_rate": 1.7713873199978566e-05, "loss": 0.2212, "step": 16030 }, { "epoch": 7.505967704189095, "grad_norm": 0.5343827605247498, "learning_rate": 1.7705390788663935e-05, "loss": 0.2224, "step": 16040 }, { "epoch": 7.5106482564942665, "grad_norm": 1.0227444171905518, "learning_rate": 1.7696905810739168e-05, "loss": 0.2128, "step": 16050 }, { "epoch": 7.515328808799438, "grad_norm": 0.657282829284668, "learning_rate": 1.7688418271710064e-05, "loss": 0.2286, "step": 16060 }, { "epoch": 7.52000936110461, "grad_norm": 0.4957966208457947, "learning_rate": 1.7679928177084106e-05, "loss": 0.2198, "step": 16070 }, { "epoch": 7.524689913409782, "grad_norm": 0.6254600286483765, "learning_rate": 1.7671435532370423e-05, "loss": 0.2058, "step": 16080 }, { "epoch": 7.529370465714955, "grad_norm": 0.6553667187690735, "learning_rate": 1.766294034307979e-05, "loss": 0.2243, "step": 16090 }, { "epoch": 7.5340510180201266, "grad_norm": 0.4990363121032715, "learning_rate": 1.7654442614724658e-05, "loss": 0.2123, "step": 16100 }, { "epoch": 7.538731570325298, "grad_norm": 0.5518467426300049, "learning_rate": 1.76459423528191e-05, "loss": 0.2113, "step": 16110 }, { "epoch": 7.54341212263047, "grad_norm": 0.6161865592002869, "learning_rate": 1.763743956287885e-05, "loss": 0.2231, "step": 16120 }, { "epoch": 7.548092674935642, "grad_norm": 1.2340329885482788, "learning_rate": 1.7628934250421272e-05, "loss": 0.2267, "step": 16130 }, { "epoch": 7.552773227240815, "grad_norm": 0.598009467124939, "learning_rate": 1.7620426420965366e-05, "loss": 0.2113, "step": 16140 }, { "epoch": 7.557453779545987, "grad_norm": 0.6183295249938965, "learning_rate": 1.7611916080031783e-05, "loss": 0.2127, "step": 16150 }, { "epoch": 7.5621343318511585, "grad_norm": 0.5383630990982056, "learning_rate": 1.760340323314278e-05, "loss": 0.2228, "step": 16160 }, { "epoch": 7.56681488415633, "grad_norm": 0.6615634560585022, "learning_rate": 1.759488788582226e-05, "loss": 0.2288, "step": 16170 }, { "epoch": 7.571495436461502, "grad_norm": 0.5217282176017761, "learning_rate": 1.7586370043595725e-05, "loss": 0.217, "step": 16180 }, { "epoch": 7.576175988766675, "grad_norm": 0.5128883123397827, "learning_rate": 1.7577849711990326e-05, "loss": 0.2265, "step": 16190 }, { "epoch": 7.580856541071847, "grad_norm": 0.8143375515937805, "learning_rate": 1.756932689653481e-05, "loss": 0.2195, "step": 16200 }, { "epoch": 7.5855370933770185, "grad_norm": 0.6193900108337402, "learning_rate": 1.756080160275953e-05, "loss": 0.2122, "step": 16210 }, { "epoch": 7.59021764568219, "grad_norm": 0.7454729676246643, "learning_rate": 1.7552273836196466e-05, "loss": 0.2098, "step": 16220 }, { "epoch": 7.594898197987362, "grad_norm": 0.9602459669113159, "learning_rate": 1.7543743602379194e-05, "loss": 0.2326, "step": 16230 }, { "epoch": 7.599578750292535, "grad_norm": 0.8455100655555725, "learning_rate": 1.753521090684288e-05, "loss": 0.2212, "step": 16240 }, { "epoch": 7.604259302597707, "grad_norm": 0.6199435591697693, "learning_rate": 1.7526675755124307e-05, "loss": 0.2295, "step": 16250 }, { "epoch": 7.608939854902879, "grad_norm": 0.5798132419586182, "learning_rate": 1.7518138152761838e-05, "loss": 0.2159, "step": 16260 }, { "epoch": 7.6136204072080504, "grad_norm": 0.5343846678733826, "learning_rate": 1.7509598105295432e-05, "loss": 0.2217, "step": 16270 }, { "epoch": 7.618300959513222, "grad_norm": 0.5548704266548157, "learning_rate": 1.750105561826664e-05, "loss": 0.2209, "step": 16280 }, { "epoch": 7.622981511818395, "grad_norm": 0.5318810343742371, "learning_rate": 1.749251069721857e-05, "loss": 0.2108, "step": 16290 }, { "epoch": 7.627662064123567, "grad_norm": 0.6222618222236633, "learning_rate": 1.7483963347695945e-05, "loss": 0.2217, "step": 16300 }, { "epoch": 7.632342616428739, "grad_norm": 0.501157820224762, "learning_rate": 1.7475413575245046e-05, "loss": 0.2237, "step": 16310 }, { "epoch": 7.6370231687339105, "grad_norm": 0.8312394022941589, "learning_rate": 1.746686138541372e-05, "loss": 0.2159, "step": 16320 }, { "epoch": 7.641703721039082, "grad_norm": 0.5955852270126343, "learning_rate": 1.7458306783751395e-05, "loss": 0.233, "step": 16330 }, { "epoch": 7.646384273344255, "grad_norm": 0.7798283100128174, "learning_rate": 1.744974977580906e-05, "loss": 0.227, "step": 16340 }, { "epoch": 7.651064825649427, "grad_norm": 0.5496701002120972, "learning_rate": 1.744119036713927e-05, "loss": 0.2269, "step": 16350 }, { "epoch": 7.655745377954599, "grad_norm": 1.4436089992523193, "learning_rate": 1.7432628563296114e-05, "loss": 0.2196, "step": 16360 }, { "epoch": 7.660425930259771, "grad_norm": 0.628045916557312, "learning_rate": 1.742406436983527e-05, "loss": 0.2199, "step": 16370 }, { "epoch": 7.665106482564942, "grad_norm": 0.5682174563407898, "learning_rate": 1.7415497792313946e-05, "loss": 0.2187, "step": 16380 }, { "epoch": 7.669787034870115, "grad_norm": 0.8670811057090759, "learning_rate": 1.74069288362909e-05, "loss": 0.2296, "step": 16390 }, { "epoch": 7.674467587175287, "grad_norm": 0.61847323179245, "learning_rate": 1.739835750732643e-05, "loss": 0.2098, "step": 16400 }, { "epoch": 7.679148139480459, "grad_norm": 0.5495385527610779, "learning_rate": 1.738978381098239e-05, "loss": 0.2206, "step": 16410 }, { "epoch": 7.683828691785631, "grad_norm": 0.7259724736213684, "learning_rate": 1.738120775282215e-05, "loss": 0.2376, "step": 16420 }, { "epoch": 7.6885092440908025, "grad_norm": 0.576655387878418, "learning_rate": 1.7372629338410618e-05, "loss": 0.2408, "step": 16430 }, { "epoch": 7.693189796395975, "grad_norm": 0.4983188807964325, "learning_rate": 1.7364048573314243e-05, "loss": 0.2241, "step": 16440 }, { "epoch": 7.697870348701147, "grad_norm": 0.7253009080886841, "learning_rate": 1.7355465463100984e-05, "loss": 0.2137, "step": 16450 }, { "epoch": 7.702550901006319, "grad_norm": 0.5696832537651062, "learning_rate": 1.734688001334033e-05, "loss": 0.2349, "step": 16460 }, { "epoch": 7.707231453311491, "grad_norm": 0.543479859828949, "learning_rate": 1.733829222960329e-05, "loss": 0.2179, "step": 16470 }, { "epoch": 7.711912005616663, "grad_norm": 0.5527817010879517, "learning_rate": 1.7329702117462375e-05, "loss": 0.2136, "step": 16480 }, { "epoch": 7.716592557921834, "grad_norm": 0.6224623322486877, "learning_rate": 1.7321109682491624e-05, "loss": 0.2217, "step": 16490 }, { "epoch": 7.721273110227007, "grad_norm": 0.5793954133987427, "learning_rate": 1.7312514930266568e-05, "loss": 0.2104, "step": 16500 }, { "epoch": 7.725953662532179, "grad_norm": 0.8345975279808044, "learning_rate": 1.730391786636425e-05, "loss": 0.235, "step": 16510 }, { "epoch": 7.730634214837351, "grad_norm": 0.8464972972869873, "learning_rate": 1.7295318496363217e-05, "loss": 0.2229, "step": 16520 }, { "epoch": 7.735314767142523, "grad_norm": 1.1662299633026123, "learning_rate": 1.72867168258435e-05, "loss": 0.2289, "step": 16530 }, { "epoch": 7.7399953194476945, "grad_norm": 0.8126525282859802, "learning_rate": 1.7278112860386634e-05, "loss": 0.2206, "step": 16540 }, { "epoch": 7.744675871752867, "grad_norm": 0.6799127459526062, "learning_rate": 1.7269506605575636e-05, "loss": 0.2282, "step": 16550 }, { "epoch": 7.749356424058039, "grad_norm": 0.6594548225402832, "learning_rate": 1.726089806699501e-05, "loss": 0.2364, "step": 16560 }, { "epoch": 7.754036976363211, "grad_norm": 0.5750401616096497, "learning_rate": 1.725228725023075e-05, "loss": 0.2261, "step": 16570 }, { "epoch": 7.758717528668383, "grad_norm": 0.7956562042236328, "learning_rate": 1.7243674160870315e-05, "loss": 0.2149, "step": 16580 }, { "epoch": 7.7633980809735545, "grad_norm": 0.8455357551574707, "learning_rate": 1.7235058804502646e-05, "loss": 0.2054, "step": 16590 }, { "epoch": 7.768078633278727, "grad_norm": 1.1800440549850464, "learning_rate": 1.7226441186718158e-05, "loss": 0.215, "step": 16600 }, { "epoch": 7.772759185583899, "grad_norm": 0.9977177381515503, "learning_rate": 1.721782131310873e-05, "loss": 0.2208, "step": 16610 }, { "epoch": 7.777439737889071, "grad_norm": 0.648064374923706, "learning_rate": 1.7209199189267694e-05, "loss": 0.2106, "step": 16620 }, { "epoch": 7.782120290194243, "grad_norm": 1.0630425214767456, "learning_rate": 1.7200574820789866e-05, "loss": 0.2234, "step": 16630 }, { "epoch": 7.786800842499415, "grad_norm": 0.6137318015098572, "learning_rate": 1.7191948213271497e-05, "loss": 0.2195, "step": 16640 }, { "epoch": 7.7914813948045865, "grad_norm": 0.5875081419944763, "learning_rate": 1.71833193723103e-05, "loss": 0.2172, "step": 16650 }, { "epoch": 7.796161947109759, "grad_norm": 0.8082150816917419, "learning_rate": 1.7174688303505445e-05, "loss": 0.2388, "step": 16660 }, { "epoch": 7.800842499414931, "grad_norm": 0.5513693690299988, "learning_rate": 1.716605501245752e-05, "loss": 0.2387, "step": 16670 }, { "epoch": 7.805523051720103, "grad_norm": 0.6960384249687195, "learning_rate": 1.7157419504768588e-05, "loss": 0.2059, "step": 16680 }, { "epoch": 7.810203604025275, "grad_norm": 1.021820306777954, "learning_rate": 1.7148781786042135e-05, "loss": 0.2155, "step": 16690 }, { "epoch": 7.8148841563304465, "grad_norm": 0.6635968685150146, "learning_rate": 1.714014186188308e-05, "loss": 0.2109, "step": 16700 }, { "epoch": 7.819564708635619, "grad_norm": 0.6821480989456177, "learning_rate": 1.713149973789778e-05, "loss": 0.2232, "step": 16710 }, { "epoch": 7.824245260940791, "grad_norm": 0.8709922432899475, "learning_rate": 1.7122855419694012e-05, "loss": 0.226, "step": 16720 }, { "epoch": 7.828925813245963, "grad_norm": 1.0378779172897339, "learning_rate": 1.711420891288098e-05, "loss": 0.22, "step": 16730 }, { "epoch": 7.833606365551135, "grad_norm": 0.6803951263427734, "learning_rate": 1.7105560223069306e-05, "loss": 0.207, "step": 16740 }, { "epoch": 7.838286917856307, "grad_norm": 0.9429142475128174, "learning_rate": 1.7096909355871037e-05, "loss": 0.2174, "step": 16750 }, { "epoch": 7.842967470161479, "grad_norm": 0.6312472820281982, "learning_rate": 1.7088256316899617e-05, "loss": 0.2206, "step": 16760 }, { "epoch": 7.847648022466651, "grad_norm": 0.9193050861358643, "learning_rate": 1.707960111176993e-05, "loss": 0.2122, "step": 16770 }, { "epoch": 7.852328574771823, "grad_norm": 0.729135274887085, "learning_rate": 1.7070943746098218e-05, "loss": 0.2203, "step": 16780 }, { "epoch": 7.857009127076995, "grad_norm": 0.5685886740684509, "learning_rate": 1.706228422550217e-05, "loss": 0.204, "step": 16790 }, { "epoch": 7.861689679382167, "grad_norm": 0.4920748472213745, "learning_rate": 1.705362255560085e-05, "loss": 0.2261, "step": 16800 }, { "epoch": 7.866370231687339, "grad_norm": 0.5565910935401917, "learning_rate": 1.7044958742014715e-05, "loss": 0.2203, "step": 16810 }, { "epoch": 7.871050783992511, "grad_norm": 0.5655674338340759, "learning_rate": 1.7036292790365626e-05, "loss": 0.2097, "step": 16820 }, { "epoch": 7.875731336297683, "grad_norm": 0.5826112031936646, "learning_rate": 1.702762470627683e-05, "loss": 0.2052, "step": 16830 }, { "epoch": 7.880411888602855, "grad_norm": 0.4730232357978821, "learning_rate": 1.7018954495372943e-05, "loss": 0.2317, "step": 16840 }, { "epoch": 7.885092440908027, "grad_norm": 0.8247324228286743, "learning_rate": 1.7010282163279976e-05, "loss": 0.2103, "step": 16850 }, { "epoch": 7.8897729932131995, "grad_norm": 0.8716912269592285, "learning_rate": 1.7001607715625305e-05, "loss": 0.2087, "step": 16860 }, { "epoch": 7.894453545518371, "grad_norm": 0.6119325757026672, "learning_rate": 1.6992931158037692e-05, "loss": 0.2402, "step": 16870 }, { "epoch": 7.899134097823543, "grad_norm": 0.5803152322769165, "learning_rate": 1.6984252496147267e-05, "loss": 0.2187, "step": 16880 }, { "epoch": 7.903814650128715, "grad_norm": 1.047824501991272, "learning_rate": 1.69755717355855e-05, "loss": 0.2228, "step": 16890 }, { "epoch": 7.908495202433887, "grad_norm": 0.6097688674926758, "learning_rate": 1.6966888881985256e-05, "loss": 0.2069, "step": 16900 }, { "epoch": 7.9131757547390595, "grad_norm": 1.068161129951477, "learning_rate": 1.695820394098074e-05, "loss": 0.2133, "step": 16910 }, { "epoch": 7.917856307044231, "grad_norm": 0.7641195058822632, "learning_rate": 1.694951691820752e-05, "loss": 0.2138, "step": 16920 }, { "epoch": 7.922536859349403, "grad_norm": 0.6565209031105042, "learning_rate": 1.6940827819302514e-05, "loss": 0.2075, "step": 16930 }, { "epoch": 7.927217411654575, "grad_norm": 0.5277952551841736, "learning_rate": 1.693213664990397e-05, "loss": 0.2178, "step": 16940 }, { "epoch": 7.931897963959747, "grad_norm": 0.5801923871040344, "learning_rate": 1.6923443415651504e-05, "loss": 0.2244, "step": 16950 }, { "epoch": 7.93657851626492, "grad_norm": 0.7866443395614624, "learning_rate": 1.691474812218606e-05, "loss": 0.2059, "step": 16960 }, { "epoch": 7.9412590685700915, "grad_norm": 0.6912701725959778, "learning_rate": 1.690605077514992e-05, "loss": 0.2103, "step": 16970 }, { "epoch": 7.945939620875263, "grad_norm": 1.0827720165252686, "learning_rate": 1.6897351380186694e-05, "loss": 0.2074, "step": 16980 }, { "epoch": 7.950620173180435, "grad_norm": 0.6208859086036682, "learning_rate": 1.6888649942941333e-05, "loss": 0.2325, "step": 16990 }, { "epoch": 7.955300725485607, "grad_norm": 0.7841382026672363, "learning_rate": 1.6879946469060093e-05, "loss": 0.2131, "step": 17000 }, { "epoch": 7.95998127779078, "grad_norm": 0.6996336579322815, "learning_rate": 1.6871240964190575e-05, "loss": 0.2299, "step": 17010 }, { "epoch": 7.9646618300959515, "grad_norm": 0.5900646448135376, "learning_rate": 1.686253343398168e-05, "loss": 0.2176, "step": 17020 }, { "epoch": 7.969342382401123, "grad_norm": 0.5607137084007263, "learning_rate": 1.685382388408363e-05, "loss": 0.2251, "step": 17030 }, { "epoch": 7.974022934706295, "grad_norm": 0.4745372235774994, "learning_rate": 1.6845112320147966e-05, "loss": 0.2086, "step": 17040 }, { "epoch": 7.978703487011467, "grad_norm": 0.8173272609710693, "learning_rate": 1.6836398747827518e-05, "loss": 0.2207, "step": 17050 }, { "epoch": 7.98338403931664, "grad_norm": 0.5551992654800415, "learning_rate": 1.6827683172776427e-05, "loss": 0.2123, "step": 17060 }, { "epoch": 7.988064591621812, "grad_norm": 0.5929774045944214, "learning_rate": 1.6818965600650145e-05, "loss": 0.2187, "step": 17070 }, { "epoch": 7.992745143926983, "grad_norm": 0.5504363775253296, "learning_rate": 1.68102460371054e-05, "loss": 0.2084, "step": 17080 }, { "epoch": 7.997425696232155, "grad_norm": 0.5703722238540649, "learning_rate": 1.680152448780022e-05, "loss": 0.2165, "step": 17090 }, { "epoch": 8.001872220922069, "grad_norm": 0.5487920641899109, "learning_rate": 1.6792800958393944e-05, "loss": 0.1932, "step": 17100 }, { "epoch": 8.006552773227241, "grad_norm": 0.5663619041442871, "learning_rate": 1.6784075454547155e-05, "loss": 0.2066, "step": 17110 }, { "epoch": 8.011233325532412, "grad_norm": 1.0433664321899414, "learning_rate": 1.6775347981921738e-05, "loss": 0.2048, "step": 17120 }, { "epoch": 8.015913877837585, "grad_norm": 0.5513502359390259, "learning_rate": 1.676661854618087e-05, "loss": 0.1958, "step": 17130 }, { "epoch": 8.020594430142756, "grad_norm": 0.7351158261299133, "learning_rate": 1.6757887152988972e-05, "loss": 0.1908, "step": 17140 }, { "epoch": 8.025274982447929, "grad_norm": 0.8257043957710266, "learning_rate": 1.6749153808011763e-05, "loss": 0.2086, "step": 17150 }, { "epoch": 8.029955534753102, "grad_norm": 0.5494638085365295, "learning_rate": 1.6740418516916204e-05, "loss": 0.1978, "step": 17160 }, { "epoch": 8.034636087058272, "grad_norm": 0.6293280720710754, "learning_rate": 1.673168128537054e-05, "loss": 0.1872, "step": 17170 }, { "epoch": 8.039316639363445, "grad_norm": 0.5867990851402283, "learning_rate": 1.6722942119044254e-05, "loss": 0.1888, "step": 17180 }, { "epoch": 8.043997191668616, "grad_norm": 0.566412627696991, "learning_rate": 1.6714201023608112e-05, "loss": 0.1981, "step": 17190 }, { "epoch": 8.048677743973789, "grad_norm": 0.5915672183036804, "learning_rate": 1.6705458004734104e-05, "loss": 0.1957, "step": 17200 }, { "epoch": 8.053358296278962, "grad_norm": 0.5123924016952515, "learning_rate": 1.6696713068095484e-05, "loss": 0.1965, "step": 17210 }, { "epoch": 8.058038848584133, "grad_norm": 1.0926095247268677, "learning_rate": 1.6687966219366748e-05, "loss": 0.2121, "step": 17220 }, { "epoch": 8.062719400889305, "grad_norm": 0.5407959222793579, "learning_rate": 1.6679217464223624e-05, "loss": 0.2042, "step": 17230 }, { "epoch": 8.067399953194476, "grad_norm": 0.6248275637626648, "learning_rate": 1.6670466808343097e-05, "loss": 0.1969, "step": 17240 }, { "epoch": 8.072080505499649, "grad_norm": 0.8239249587059021, "learning_rate": 1.666171425740336e-05, "loss": 0.203, "step": 17250 }, { "epoch": 8.076761057804822, "grad_norm": 0.962350606918335, "learning_rate": 1.6652959817083854e-05, "loss": 0.2051, "step": 17260 }, { "epoch": 8.081441610109993, "grad_norm": 0.6880154609680176, "learning_rate": 1.6644203493065245e-05, "loss": 0.1992, "step": 17270 }, { "epoch": 8.086122162415165, "grad_norm": 0.8263044953346252, "learning_rate": 1.6635445291029404e-05, "loss": 0.1972, "step": 17280 }, { "epoch": 8.090802714720336, "grad_norm": 0.5736439228057861, "learning_rate": 1.662668521665944e-05, "loss": 0.1972, "step": 17290 }, { "epoch": 8.095483267025509, "grad_norm": 0.8065967559814453, "learning_rate": 1.6617923275639675e-05, "loss": 0.2169, "step": 17300 }, { "epoch": 8.100163819330682, "grad_norm": 0.585658848285675, "learning_rate": 1.660915947365563e-05, "loss": 0.2152, "step": 17310 }, { "epoch": 8.104844371635853, "grad_norm": 0.7794381976127625, "learning_rate": 1.6600393816394046e-05, "loss": 0.209, "step": 17320 }, { "epoch": 8.109524923941025, "grad_norm": 0.5614486932754517, "learning_rate": 1.659162630954286e-05, "loss": 0.201, "step": 17330 }, { "epoch": 8.114205476246196, "grad_norm": 0.9003437161445618, "learning_rate": 1.6582856958791216e-05, "loss": 0.1935, "step": 17340 }, { "epoch": 8.118886028551369, "grad_norm": 0.8390218615531921, "learning_rate": 1.6574085769829445e-05, "loss": 0.2007, "step": 17350 }, { "epoch": 8.123566580856542, "grad_norm": 0.6865512132644653, "learning_rate": 1.6565312748349077e-05, "loss": 0.1959, "step": 17360 }, { "epoch": 8.128247133161713, "grad_norm": 0.578255295753479, "learning_rate": 1.6556537900042835e-05, "loss": 0.2035, "step": 17370 }, { "epoch": 8.132927685466885, "grad_norm": 0.5532321929931641, "learning_rate": 1.654776123060462e-05, "loss": 0.2055, "step": 17380 }, { "epoch": 8.137608237772056, "grad_norm": 0.6991555690765381, "learning_rate": 1.6538982745729514e-05, "loss": 0.1947, "step": 17390 }, { "epoch": 8.14228879007723, "grad_norm": 0.6085647344589233, "learning_rate": 1.653020245111378e-05, "loss": 0.1961, "step": 17400 }, { "epoch": 8.146969342382402, "grad_norm": 0.5962769985198975, "learning_rate": 1.652142035245487e-05, "loss": 0.1912, "step": 17410 }, { "epoch": 8.151649894687573, "grad_norm": 0.8391318321228027, "learning_rate": 1.6512636455451378e-05, "loss": 0.1978, "step": 17420 }, { "epoch": 8.156330446992746, "grad_norm": 0.6222918033599854, "learning_rate": 1.6503850765803083e-05, "loss": 0.1984, "step": 17430 }, { "epoch": 8.161010999297917, "grad_norm": 0.5052781105041504, "learning_rate": 1.6495063289210922e-05, "loss": 0.2138, "step": 17440 }, { "epoch": 8.16569155160309, "grad_norm": 0.7160609364509583, "learning_rate": 1.6486274031376998e-05, "loss": 0.1973, "step": 17450 }, { "epoch": 8.170372103908262, "grad_norm": 0.5487427711486816, "learning_rate": 1.6477482998004563e-05, "loss": 0.201, "step": 17460 }, { "epoch": 8.175052656213433, "grad_norm": 0.5513998866081238, "learning_rate": 1.646869019479802e-05, "loss": 0.1967, "step": 17470 }, { "epoch": 8.179733208518606, "grad_norm": 0.7406542301177979, "learning_rate": 1.645989562746293e-05, "loss": 0.2138, "step": 17480 }, { "epoch": 8.184413760823777, "grad_norm": 0.5216871500015259, "learning_rate": 1.6451099301705983e-05, "loss": 0.1863, "step": 17490 }, { "epoch": 8.18909431312895, "grad_norm": 0.5149765014648438, "learning_rate": 1.6442301223235028e-05, "loss": 0.1905, "step": 17500 }, { "epoch": 8.193774865434122, "grad_norm": 0.6732597351074219, "learning_rate": 1.6433501397759036e-05, "loss": 0.1959, "step": 17510 }, { "epoch": 8.198455417739293, "grad_norm": 0.6451987028121948, "learning_rate": 1.6424699830988123e-05, "loss": 0.1957, "step": 17520 }, { "epoch": 8.203135970044466, "grad_norm": 0.6118354797363281, "learning_rate": 1.641589652863353e-05, "loss": 0.1977, "step": 17530 }, { "epoch": 8.207816522349637, "grad_norm": 0.6570689678192139, "learning_rate": 1.6407091496407622e-05, "loss": 0.2006, "step": 17540 }, { "epoch": 8.21249707465481, "grad_norm": 0.8089761137962341, "learning_rate": 1.639828474002389e-05, "loss": 0.196, "step": 17550 }, { "epoch": 8.217177626959982, "grad_norm": 0.565295398235321, "learning_rate": 1.6389476265196936e-05, "loss": 0.2037, "step": 17560 }, { "epoch": 8.221858179265153, "grad_norm": 0.741314172744751, "learning_rate": 1.63806660776425e-05, "loss": 0.2023, "step": 17570 }, { "epoch": 8.226538731570326, "grad_norm": 0.4988119602203369, "learning_rate": 1.6371854183077402e-05, "loss": 0.2021, "step": 17580 }, { "epoch": 8.231219283875497, "grad_norm": 0.727222740650177, "learning_rate": 1.6363040587219592e-05, "loss": 0.1997, "step": 17590 }, { "epoch": 8.23589983618067, "grad_norm": 0.7387743592262268, "learning_rate": 1.635422529578811e-05, "loss": 0.1943, "step": 17600 }, { "epoch": 8.240580388485842, "grad_norm": 0.7142209410667419, "learning_rate": 1.634540831450311e-05, "loss": 0.2103, "step": 17610 }, { "epoch": 8.245260940791013, "grad_norm": 0.7132827043533325, "learning_rate": 1.6336589649085832e-05, "loss": 0.2045, "step": 17620 }, { "epoch": 8.249941493096186, "grad_norm": 0.5865382552146912, "learning_rate": 1.6327769305258614e-05, "loss": 0.1897, "step": 17630 }, { "epoch": 8.254622045401357, "grad_norm": 0.8972445726394653, "learning_rate": 1.6318947288744878e-05, "loss": 0.1878, "step": 17640 }, { "epoch": 8.25930259770653, "grad_norm": 1.0295881032943726, "learning_rate": 1.6310123605269135e-05, "loss": 0.1849, "step": 17650 }, { "epoch": 8.2639831500117, "grad_norm": 0.5168728232383728, "learning_rate": 1.6301298260556988e-05, "loss": 0.2015, "step": 17660 }, { "epoch": 8.268663702316873, "grad_norm": 0.5525350570678711, "learning_rate": 1.6292471260335093e-05, "loss": 0.1986, "step": 17670 }, { "epoch": 8.273344254622046, "grad_norm": 0.8121612071990967, "learning_rate": 1.6283642610331202e-05, "loss": 0.1997, "step": 17680 }, { "epoch": 8.278024806927217, "grad_norm": 0.5691242814064026, "learning_rate": 1.6274812316274125e-05, "loss": 0.2032, "step": 17690 }, { "epoch": 8.28270535923239, "grad_norm": 0.5862091183662415, "learning_rate": 1.626598038389375e-05, "loss": 0.1899, "step": 17700 }, { "epoch": 8.287385911537562, "grad_norm": 0.6377902030944824, "learning_rate": 1.625714681892102e-05, "loss": 0.1908, "step": 17710 }, { "epoch": 8.292066463842733, "grad_norm": 0.5718311071395874, "learning_rate": 1.624831162708794e-05, "loss": 0.1952, "step": 17720 }, { "epoch": 8.296747016147906, "grad_norm": 1.4448940753936768, "learning_rate": 1.6239474814127565e-05, "loss": 0.212, "step": 17730 }, { "epoch": 8.301427568453077, "grad_norm": 0.6364192366600037, "learning_rate": 1.6230636385774016e-05, "loss": 0.2173, "step": 17740 }, { "epoch": 8.30610812075825, "grad_norm": 0.503524661064148, "learning_rate": 1.6221796347762445e-05, "loss": 0.2075, "step": 17750 }, { "epoch": 8.31078867306342, "grad_norm": 0.6923774480819702, "learning_rate": 1.6212954705829058e-05, "loss": 0.1975, "step": 17760 }, { "epoch": 8.315469225368593, "grad_norm": 0.7466774582862854, "learning_rate": 1.6204111465711104e-05, "loss": 0.1954, "step": 17770 }, { "epoch": 8.320149777673766, "grad_norm": 0.7049933671951294, "learning_rate": 1.6195266633146866e-05, "loss": 0.1988, "step": 17780 }, { "epoch": 8.324830329978937, "grad_norm": 0.7495594024658203, "learning_rate": 1.6186420213875653e-05, "loss": 0.203, "step": 17790 }, { "epoch": 8.32951088228411, "grad_norm": 0.5477129817008972, "learning_rate": 1.617757221363782e-05, "loss": 0.21, "step": 17800 }, { "epoch": 8.33419143458928, "grad_norm": 0.8126005530357361, "learning_rate": 1.616872263817473e-05, "loss": 0.2015, "step": 17810 }, { "epoch": 8.338871986894453, "grad_norm": 0.6757808327674866, "learning_rate": 1.615987149322878e-05, "loss": 0.1945, "step": 17820 }, { "epoch": 8.343552539199626, "grad_norm": 0.5201929807662964, "learning_rate": 1.6151018784543387e-05, "loss": 0.2034, "step": 17830 }, { "epoch": 8.348233091504797, "grad_norm": 0.7576472759246826, "learning_rate": 1.614216451786297e-05, "loss": 0.2082, "step": 17840 }, { "epoch": 8.35291364380997, "grad_norm": 0.6445069909095764, "learning_rate": 1.6133308698932965e-05, "loss": 0.2047, "step": 17850 }, { "epoch": 8.35759419611514, "grad_norm": 0.6340272426605225, "learning_rate": 1.612445133349982e-05, "loss": 0.1915, "step": 17860 }, { "epoch": 8.362274748420313, "grad_norm": 0.57203209400177, "learning_rate": 1.6115592427310988e-05, "loss": 0.1987, "step": 17870 }, { "epoch": 8.366955300725486, "grad_norm": 0.5827383399009705, "learning_rate": 1.6106731986114908e-05, "loss": 0.2067, "step": 17880 }, { "epoch": 8.371635853030657, "grad_norm": 0.5522224307060242, "learning_rate": 1.609787001566103e-05, "loss": 0.1975, "step": 17890 }, { "epoch": 8.37631640533583, "grad_norm": 1.1273924112319946, "learning_rate": 1.6089006521699794e-05, "loss": 0.211, "step": 17900 }, { "epoch": 8.380996957641, "grad_norm": 0.8888628482818604, "learning_rate": 1.6080141509982614e-05, "loss": 0.1996, "step": 17910 }, { "epoch": 8.385677509946174, "grad_norm": 0.528516411781311, "learning_rate": 1.6071274986261913e-05, "loss": 0.1941, "step": 17920 }, { "epoch": 8.390358062251346, "grad_norm": 0.635844886302948, "learning_rate": 1.606240695629107e-05, "loss": 0.2072, "step": 17930 }, { "epoch": 8.395038614556517, "grad_norm": 1.0586057901382446, "learning_rate": 1.605353742582446e-05, "loss": 0.205, "step": 17940 }, { "epoch": 8.39971916686169, "grad_norm": 0.692442774772644, "learning_rate": 1.6044666400617425e-05, "loss": 0.1988, "step": 17950 }, { "epoch": 8.404399719166861, "grad_norm": 0.5414044857025146, "learning_rate": 1.6035793886426272e-05, "loss": 0.1801, "step": 17960 }, { "epoch": 8.409080271472034, "grad_norm": 1.0629262924194336, "learning_rate": 1.6026919889008288e-05, "loss": 0.2063, "step": 17970 }, { "epoch": 8.413760823777206, "grad_norm": 2.3731377124786377, "learning_rate": 1.60180444141217e-05, "loss": 0.1964, "step": 17980 }, { "epoch": 8.418441376082377, "grad_norm": 0.4686722755432129, "learning_rate": 1.6009167467525723e-05, "loss": 0.197, "step": 17990 }, { "epoch": 8.42312192838755, "grad_norm": 0.707685112953186, "learning_rate": 1.6000289054980502e-05, "loss": 0.1978, "step": 18000 }, { "epoch": 8.427802480692721, "grad_norm": 0.595973551273346, "learning_rate": 1.599140918224714e-05, "loss": 0.1964, "step": 18010 }, { "epoch": 8.432483032997894, "grad_norm": 0.5438023209571838, "learning_rate": 1.59825278550877e-05, "loss": 0.202, "step": 18020 }, { "epoch": 8.437163585303066, "grad_norm": 0.6349436640739441, "learning_rate": 1.597364507926517e-05, "loss": 0.1826, "step": 18030 }, { "epoch": 8.441844137608237, "grad_norm": 0.9415587186813354, "learning_rate": 1.596476086054349e-05, "loss": 0.1908, "step": 18040 }, { "epoch": 8.44652468991341, "grad_norm": 0.5518685579299927, "learning_rate": 1.5955875204687534e-05, "loss": 0.1908, "step": 18050 }, { "epoch": 8.451205242218581, "grad_norm": 0.6669245958328247, "learning_rate": 1.5946988117463106e-05, "loss": 0.207, "step": 18060 }, { "epoch": 8.455885794523754, "grad_norm": 0.7358018755912781, "learning_rate": 1.5938099604636942e-05, "loss": 0.2026, "step": 18070 }, { "epoch": 8.460566346828926, "grad_norm": 1.000209927558899, "learning_rate": 1.59292096719767e-05, "loss": 0.2164, "step": 18080 }, { "epoch": 8.465246899134097, "grad_norm": 0.6907699108123779, "learning_rate": 1.592031832525096e-05, "loss": 0.212, "step": 18090 }, { "epoch": 8.46992745143927, "grad_norm": 0.6223982572555542, "learning_rate": 1.591142557022922e-05, "loss": 0.1943, "step": 18100 }, { "epoch": 8.474608003744441, "grad_norm": 0.6072043180465698, "learning_rate": 1.5902531412681896e-05, "loss": 0.1992, "step": 18110 }, { "epoch": 8.479288556049614, "grad_norm": 0.940746009349823, "learning_rate": 1.5893635858380304e-05, "loss": 0.2038, "step": 18120 }, { "epoch": 8.483969108354787, "grad_norm": 0.5847403407096863, "learning_rate": 1.5884738913096683e-05, "loss": 0.1921, "step": 18130 }, { "epoch": 8.488649660659958, "grad_norm": 0.8999812006950378, "learning_rate": 1.5875840582604156e-05, "loss": 0.205, "step": 18140 }, { "epoch": 8.49333021296513, "grad_norm": 0.7508083581924438, "learning_rate": 1.5866940872676753e-05, "loss": 0.1827, "step": 18150 }, { "epoch": 8.498010765270301, "grad_norm": 0.5019994378089905, "learning_rate": 1.5858039789089405e-05, "loss": 0.1978, "step": 18160 }, { "epoch": 8.502691317575474, "grad_norm": 0.5321983695030212, "learning_rate": 1.5849137337617915e-05, "loss": 0.1988, "step": 18170 }, { "epoch": 8.507371869880647, "grad_norm": 0.7220326662063599, "learning_rate": 1.5840233524039008e-05, "loss": 0.1998, "step": 18180 }, { "epoch": 8.512052422185818, "grad_norm": 0.7304027676582336, "learning_rate": 1.5831328354130257e-05, "loss": 0.1937, "step": 18190 }, { "epoch": 8.51673297449099, "grad_norm": 0.930352509021759, "learning_rate": 1.5822421833670132e-05, "loss": 0.1935, "step": 18200 }, { "epoch": 8.521413526796161, "grad_norm": 0.6743907332420349, "learning_rate": 1.5813513968437982e-05, "loss": 0.2062, "step": 18210 }, { "epoch": 8.526094079101334, "grad_norm": 0.559935450553894, "learning_rate": 1.5804604764214015e-05, "loss": 0.1856, "step": 18220 }, { "epoch": 8.530774631406507, "grad_norm": 0.5745419859886169, "learning_rate": 1.5795694226779325e-05, "loss": 0.1874, "step": 18230 }, { "epoch": 8.535455183711678, "grad_norm": 0.9124456644058228, "learning_rate": 1.5786782361915867e-05, "loss": 0.1904, "step": 18240 }, { "epoch": 8.54013573601685, "grad_norm": 0.5357590913772583, "learning_rate": 1.577786917540644e-05, "loss": 0.2121, "step": 18250 }, { "epoch": 8.544816288322021, "grad_norm": 0.6190833449363708, "learning_rate": 1.5768954673034726e-05, "loss": 0.1929, "step": 18260 }, { "epoch": 8.549496840627194, "grad_norm": 0.7221451997756958, "learning_rate": 1.5760038860585243e-05, "loss": 0.211, "step": 18270 }, { "epoch": 8.554177392932367, "grad_norm": 0.5702475905418396, "learning_rate": 1.5751121743843365e-05, "loss": 0.2021, "step": 18280 }, { "epoch": 8.558857945237538, "grad_norm": 0.6748855710029602, "learning_rate": 1.5742203328595313e-05, "loss": 0.1991, "step": 18290 }, { "epoch": 8.56353849754271, "grad_norm": 0.5505242943763733, "learning_rate": 1.5733283620628153e-05, "loss": 0.186, "step": 18300 }, { "epoch": 8.568219049847881, "grad_norm": 0.6877039074897766, "learning_rate": 1.572436262572978e-05, "loss": 0.2022, "step": 18310 }, { "epoch": 8.572899602153054, "grad_norm": 0.570561945438385, "learning_rate": 1.5715440349688938e-05, "loss": 0.2057, "step": 18320 }, { "epoch": 8.577580154458227, "grad_norm": 0.576374888420105, "learning_rate": 1.5706516798295186e-05, "loss": 0.2028, "step": 18330 }, { "epoch": 8.582260706763398, "grad_norm": 0.7697909474372864, "learning_rate": 1.569759197733893e-05, "loss": 0.1906, "step": 18340 }, { "epoch": 8.58694125906857, "grad_norm": 0.6249477863311768, "learning_rate": 1.5688665892611386e-05, "loss": 0.1839, "step": 18350 }, { "epoch": 8.591621811373741, "grad_norm": 0.49159789085388184, "learning_rate": 1.5679738549904586e-05, "loss": 0.192, "step": 18360 }, { "epoch": 8.596302363678914, "grad_norm": 0.8054395914077759, "learning_rate": 1.5670809955011397e-05, "loss": 0.1887, "step": 18370 }, { "epoch": 8.600982915984087, "grad_norm": 0.9482566714286804, "learning_rate": 1.566188011372548e-05, "loss": 0.1916, "step": 18380 }, { "epoch": 8.605663468289258, "grad_norm": 1.0352697372436523, "learning_rate": 1.5652949031841315e-05, "loss": 0.1855, "step": 18390 }, { "epoch": 8.61034402059443, "grad_norm": 0.5629974007606506, "learning_rate": 1.564401671515418e-05, "loss": 0.1968, "step": 18400 }, { "epoch": 8.615024572899602, "grad_norm": 0.6988675594329834, "learning_rate": 1.563508316946017e-05, "loss": 0.1852, "step": 18410 }, { "epoch": 8.619705125204774, "grad_norm": 0.8802774548530579, "learning_rate": 1.5626148400556152e-05, "loss": 0.2007, "step": 18420 }, { "epoch": 8.624385677509947, "grad_norm": 0.6155832409858704, "learning_rate": 1.5617212414239806e-05, "loss": 0.196, "step": 18430 }, { "epoch": 8.629066229815118, "grad_norm": 0.517564058303833, "learning_rate": 1.5608275216309598e-05, "loss": 0.1986, "step": 18440 }, { "epoch": 8.63374678212029, "grad_norm": 0.6603909134864807, "learning_rate": 1.559933681256477e-05, "loss": 0.2139, "step": 18450 }, { "epoch": 8.638427334425462, "grad_norm": 0.7864003777503967, "learning_rate": 1.5590397208805372e-05, "loss": 0.2055, "step": 18460 }, { "epoch": 8.643107886730634, "grad_norm": 0.7669918537139893, "learning_rate": 1.5581456410832206e-05, "loss": 0.209, "step": 18470 }, { "epoch": 8.647788439035807, "grad_norm": 0.9717556238174438, "learning_rate": 1.557251442444685e-05, "loss": 0.1951, "step": 18480 }, { "epoch": 8.652468991340978, "grad_norm": 1.3998225927352905, "learning_rate": 1.5563571255451678e-05, "loss": 0.1994, "step": 18490 }, { "epoch": 8.65714954364615, "grad_norm": 0.7414512038230896, "learning_rate": 1.5554626909649803e-05, "loss": 0.2024, "step": 18500 }, { "epoch": 8.661830095951322, "grad_norm": 0.6624657511711121, "learning_rate": 1.554568139284512e-05, "loss": 0.1849, "step": 18510 }, { "epoch": 8.666510648256494, "grad_norm": 1.0509225130081177, "learning_rate": 1.5536734710842283e-05, "loss": 0.1881, "step": 18520 }, { "epoch": 8.671191200561665, "grad_norm": 0.5837467312812805, "learning_rate": 1.552778686944668e-05, "loss": 0.1951, "step": 18530 }, { "epoch": 8.675871752866838, "grad_norm": 0.6522549986839294, "learning_rate": 1.5518837874464488e-05, "loss": 0.1998, "step": 18540 }, { "epoch": 8.68055230517201, "grad_norm": 0.5137141346931458, "learning_rate": 1.5509887731702598e-05, "loss": 0.1766, "step": 18550 }, { "epoch": 8.685232857477182, "grad_norm": 0.5123811960220337, "learning_rate": 1.5500936446968666e-05, "loss": 0.1888, "step": 18560 }, { "epoch": 8.689913409782354, "grad_norm": 0.5945032238960266, "learning_rate": 1.5491984026071084e-05, "loss": 0.1991, "step": 18570 }, { "epoch": 8.694593962087527, "grad_norm": 0.710303783416748, "learning_rate": 1.5483030474818982e-05, "loss": 0.1866, "step": 18580 }, { "epoch": 8.699274514392698, "grad_norm": 0.5862831473350525, "learning_rate": 1.5474075799022218e-05, "loss": 0.1813, "step": 18590 }, { "epoch": 8.703955066697871, "grad_norm": 0.53226637840271, "learning_rate": 1.546512000449139e-05, "loss": 0.201, "step": 18600 }, { "epoch": 8.708635619003042, "grad_norm": 0.7463897466659546, "learning_rate": 1.5456163097037804e-05, "loss": 0.192, "step": 18610 }, { "epoch": 8.713316171308215, "grad_norm": 0.6590136289596558, "learning_rate": 1.5447205082473508e-05, "loss": 0.1911, "step": 18620 }, { "epoch": 8.717996723613386, "grad_norm": 0.5896122455596924, "learning_rate": 1.5438245966611272e-05, "loss": 0.1916, "step": 18630 }, { "epoch": 8.722677275918558, "grad_norm": 0.9076349139213562, "learning_rate": 1.5429285755264548e-05, "loss": 0.1975, "step": 18640 }, { "epoch": 8.727357828223731, "grad_norm": 0.6340839862823486, "learning_rate": 1.5420324454247528e-05, "loss": 0.2025, "step": 18650 }, { "epoch": 8.732038380528902, "grad_norm": 0.598698079586029, "learning_rate": 1.541136206937511e-05, "loss": 0.198, "step": 18660 }, { "epoch": 8.736718932834075, "grad_norm": 0.6346487998962402, "learning_rate": 1.5402398606462876e-05, "loss": 0.1918, "step": 18670 }, { "epoch": 8.741399485139247, "grad_norm": 0.5139946937561035, "learning_rate": 1.539343407132713e-05, "loss": 0.2092, "step": 18680 }, { "epoch": 8.746080037444418, "grad_norm": 0.5667792558670044, "learning_rate": 1.538446846978486e-05, "loss": 0.2017, "step": 18690 }, { "epoch": 8.750760589749591, "grad_norm": 0.8776169419288635, "learning_rate": 1.537550180765374e-05, "loss": 0.1934, "step": 18700 }, { "epoch": 8.755441142054762, "grad_norm": 0.7074890732765198, "learning_rate": 1.5366534090752146e-05, "loss": 0.1937, "step": 18710 }, { "epoch": 8.760121694359935, "grad_norm": 0.5618233680725098, "learning_rate": 1.5357565324899132e-05, "loss": 0.1988, "step": 18720 }, { "epoch": 8.764802246665106, "grad_norm": 0.5169440507888794, "learning_rate": 1.5348595515914436e-05, "loss": 0.1985, "step": 18730 }, { "epoch": 8.769482798970278, "grad_norm": 0.6666449904441833, "learning_rate": 1.533962466961846e-05, "loss": 0.1932, "step": 18740 }, { "epoch": 8.774163351275451, "grad_norm": 0.5933835506439209, "learning_rate": 1.5330652791832295e-05, "loss": 0.1922, "step": 18750 }, { "epoch": 8.778843903580622, "grad_norm": 0.5872207880020142, "learning_rate": 1.5321679888377695e-05, "loss": 0.1798, "step": 18760 }, { "epoch": 8.783524455885795, "grad_norm": 0.5674715042114258, "learning_rate": 1.531270596507708e-05, "loss": 0.1984, "step": 18770 }, { "epoch": 8.788205008190966, "grad_norm": 0.5657553672790527, "learning_rate": 1.5303731027753524e-05, "loss": 0.1964, "step": 18780 }, { "epoch": 8.792885560496138, "grad_norm": 1.0846128463745117, "learning_rate": 1.529475508223078e-05, "loss": 0.1854, "step": 18790 }, { "epoch": 8.797566112801311, "grad_norm": 0.503665030002594, "learning_rate": 1.5285778134333234e-05, "loss": 0.2022, "step": 18800 }, { "epoch": 8.802246665106482, "grad_norm": 0.9628728032112122, "learning_rate": 1.5276800189885924e-05, "loss": 0.2059, "step": 18810 }, { "epoch": 8.806927217411655, "grad_norm": 0.5476600527763367, "learning_rate": 1.5267821254714554e-05, "loss": 0.1976, "step": 18820 }, { "epoch": 8.811607769716826, "grad_norm": 0.8080584406852722, "learning_rate": 1.5258841334645447e-05, "loss": 0.1939, "step": 18830 }, { "epoch": 8.816288322021999, "grad_norm": 0.6341618299484253, "learning_rate": 1.5249860435505578e-05, "loss": 0.1847, "step": 18840 }, { "epoch": 8.820968874327171, "grad_norm": 0.7243251204490662, "learning_rate": 1.5240878563122562e-05, "loss": 0.2013, "step": 18850 }, { "epoch": 8.825649426632342, "grad_norm": 0.8071234226226807, "learning_rate": 1.5231895723324628e-05, "loss": 0.2033, "step": 18860 }, { "epoch": 8.830329978937515, "grad_norm": 1.0044448375701904, "learning_rate": 1.5222911921940646e-05, "loss": 0.1928, "step": 18870 }, { "epoch": 8.835010531242686, "grad_norm": 0.7434523701667786, "learning_rate": 1.5213927164800113e-05, "loss": 0.1931, "step": 18880 }, { "epoch": 8.839691083547859, "grad_norm": 0.6961126327514648, "learning_rate": 1.5204941457733133e-05, "loss": 0.1987, "step": 18890 }, { "epoch": 8.844371635853031, "grad_norm": 0.7147197127342224, "learning_rate": 1.519595480657044e-05, "loss": 0.1918, "step": 18900 }, { "epoch": 8.849052188158202, "grad_norm": 0.5392239689826965, "learning_rate": 1.5186967217143373e-05, "loss": 0.1826, "step": 18910 }, { "epoch": 8.853732740463375, "grad_norm": 0.6056162714958191, "learning_rate": 1.5177978695283873e-05, "loss": 0.1879, "step": 18920 }, { "epoch": 8.858413292768546, "grad_norm": 0.5024731159210205, "learning_rate": 1.5168989246824508e-05, "loss": 0.2006, "step": 18930 }, { "epoch": 8.863093845073719, "grad_norm": 0.5041071176528931, "learning_rate": 1.5159998877598424e-05, "loss": 0.2037, "step": 18940 }, { "epoch": 8.867774397378891, "grad_norm": 1.5313140153884888, "learning_rate": 1.5151007593439376e-05, "loss": 0.1898, "step": 18950 }, { "epoch": 8.872454949684062, "grad_norm": 0.48913198709487915, "learning_rate": 1.5142015400181713e-05, "loss": 0.1789, "step": 18960 }, { "epoch": 8.877135501989235, "grad_norm": 0.8180843591690063, "learning_rate": 1.5133022303660365e-05, "loss": 0.1917, "step": 18970 }, { "epoch": 8.881816054294406, "grad_norm": 0.5254727005958557, "learning_rate": 1.5124028309710864e-05, "loss": 0.185, "step": 18980 }, { "epoch": 8.886496606599579, "grad_norm": 0.594558835029602, "learning_rate": 1.511503342416931e-05, "loss": 0.1976, "step": 18990 }, { "epoch": 8.891177158904751, "grad_norm": 0.6450402140617371, "learning_rate": 1.5106037652872384e-05, "loss": 0.2034, "step": 19000 }, { "epoch": 8.895857711209922, "grad_norm": 0.6091890335083008, "learning_rate": 1.5097041001657352e-05, "loss": 0.1922, "step": 19010 }, { "epoch": 8.900538263515095, "grad_norm": 0.5556630492210388, "learning_rate": 1.5088043476362035e-05, "loss": 0.1822, "step": 19020 }, { "epoch": 8.905218815820266, "grad_norm": 0.6024225950241089, "learning_rate": 1.5079045082824835e-05, "loss": 0.1976, "step": 19030 }, { "epoch": 8.909899368125439, "grad_norm": 0.7211591005325317, "learning_rate": 1.5070045826884708e-05, "loss": 0.1866, "step": 19040 }, { "epoch": 8.914579920430612, "grad_norm": 0.5622275471687317, "learning_rate": 1.5061045714381177e-05, "loss": 0.1797, "step": 19050 }, { "epoch": 8.919260472735782, "grad_norm": 0.6139369010925293, "learning_rate": 1.5052044751154318e-05, "loss": 0.2044, "step": 19060 }, { "epoch": 8.923941025040955, "grad_norm": 0.6242719888687134, "learning_rate": 1.504304294304475e-05, "loss": 0.1911, "step": 19070 }, { "epoch": 8.928621577346126, "grad_norm": 0.6522964239120483, "learning_rate": 1.5034040295893658e-05, "loss": 0.1739, "step": 19080 }, { "epoch": 8.933302129651299, "grad_norm": 0.6306370496749878, "learning_rate": 1.5025036815542759e-05, "loss": 0.1907, "step": 19090 }, { "epoch": 8.937982681956472, "grad_norm": 0.6872206330299377, "learning_rate": 1.5016032507834313e-05, "loss": 0.1922, "step": 19100 }, { "epoch": 8.942663234261643, "grad_norm": 0.5520445704460144, "learning_rate": 1.500702737861112e-05, "loss": 0.1925, "step": 19110 }, { "epoch": 8.947343786566815, "grad_norm": 0.679347813129425, "learning_rate": 1.4998021433716506e-05, "loss": 0.1911, "step": 19120 }, { "epoch": 8.952024338871986, "grad_norm": 0.78802889585495, "learning_rate": 1.4989014678994331e-05, "loss": 0.1742, "step": 19130 }, { "epoch": 8.956704891177159, "grad_norm": 0.6569318771362305, "learning_rate": 1.4980007120288987e-05, "loss": 0.1927, "step": 19140 }, { "epoch": 8.961385443482332, "grad_norm": 0.6577615141868591, "learning_rate": 1.4970998763445373e-05, "loss": 0.1943, "step": 19150 }, { "epoch": 8.966065995787503, "grad_norm": 0.5828383564949036, "learning_rate": 1.4961989614308922e-05, "loss": 0.199, "step": 19160 }, { "epoch": 8.970746548092675, "grad_norm": 0.5133561491966248, "learning_rate": 1.4952979678725564e-05, "loss": 0.2053, "step": 19170 }, { "epoch": 8.975427100397846, "grad_norm": 0.5556127429008484, "learning_rate": 1.4943968962541755e-05, "loss": 0.1895, "step": 19180 }, { "epoch": 8.980107652703019, "grad_norm": 0.6363630890846252, "learning_rate": 1.4934957471604446e-05, "loss": 0.1891, "step": 19190 }, { "epoch": 8.984788205008192, "grad_norm": 0.9434332847595215, "learning_rate": 1.4925945211761102e-05, "loss": 0.1963, "step": 19200 }, { "epoch": 8.989468757313363, "grad_norm": 0.6762727499008179, "learning_rate": 1.4916932188859677e-05, "loss": 0.2146, "step": 19210 }, { "epoch": 8.994149309618535, "grad_norm": 0.6007422208786011, "learning_rate": 1.4907918408748625e-05, "loss": 0.1864, "step": 19220 }, { "epoch": 8.998829861923706, "grad_norm": 0.5944421887397766, "learning_rate": 1.4898903877276884e-05, "loss": 0.2086, "step": 19230 }, { "epoch": 9.00327638661362, "grad_norm": 0.8908073902130127, "learning_rate": 1.4889888600293896e-05, "loss": 0.1797, "step": 19240 }, { "epoch": 9.007956938918792, "grad_norm": 0.5756704807281494, "learning_rate": 1.4880872583649569e-05, "loss": 0.1787, "step": 19250 }, { "epoch": 9.012637491223964, "grad_norm": 0.5032514333724976, "learning_rate": 1.4871855833194298e-05, "loss": 0.1744, "step": 19260 }, { "epoch": 9.017318043529137, "grad_norm": 0.60345059633255, "learning_rate": 1.486283835477896e-05, "loss": 0.1851, "step": 19270 }, { "epoch": 9.021998595834308, "grad_norm": 0.5423219203948975, "learning_rate": 1.4853820154254896e-05, "loss": 0.1705, "step": 19280 }, { "epoch": 9.02667914813948, "grad_norm": 0.9632617235183716, "learning_rate": 1.4844801237473913e-05, "loss": 0.1746, "step": 19290 }, { "epoch": 9.031359700444652, "grad_norm": 0.599894106388092, "learning_rate": 1.4835781610288296e-05, "loss": 0.1676, "step": 19300 }, { "epoch": 9.036040252749824, "grad_norm": 0.9172232747077942, "learning_rate": 1.482676127855078e-05, "loss": 0.1794, "step": 19310 }, { "epoch": 9.040720805054997, "grad_norm": 0.5193334221839905, "learning_rate": 1.4817740248114561e-05, "loss": 0.1746, "step": 19320 }, { "epoch": 9.045401357360168, "grad_norm": 0.5033785104751587, "learning_rate": 1.480871852483329e-05, "loss": 0.1736, "step": 19330 }, { "epoch": 9.05008190966534, "grad_norm": 1.257498860359192, "learning_rate": 1.4799696114561056e-05, "loss": 0.1922, "step": 19340 }, { "epoch": 9.054762461970512, "grad_norm": 0.6723366379737854, "learning_rate": 1.4790673023152416e-05, "loss": 0.1707, "step": 19350 }, { "epoch": 9.059443014275685, "grad_norm": 0.6602997183799744, "learning_rate": 1.4781649256462345e-05, "loss": 0.1661, "step": 19360 }, { "epoch": 9.064123566580857, "grad_norm": 0.5669138431549072, "learning_rate": 1.4772624820346273e-05, "loss": 0.1935, "step": 19370 }, { "epoch": 9.068804118886028, "grad_norm": 1.0748611688613892, "learning_rate": 1.4763599720660059e-05, "loss": 0.1676, "step": 19380 }, { "epoch": 9.073484671191201, "grad_norm": 0.6427395343780518, "learning_rate": 1.475457396325999e-05, "loss": 0.1777, "step": 19390 }, { "epoch": 9.078165223496372, "grad_norm": 0.5571708679199219, "learning_rate": 1.4745547554002783e-05, "loss": 0.1728, "step": 19400 }, { "epoch": 9.082845775801545, "grad_norm": 0.5040459632873535, "learning_rate": 1.473652049874558e-05, "loss": 0.1801, "step": 19410 }, { "epoch": 9.087526328106717, "grad_norm": 0.6334531903266907, "learning_rate": 1.472749280334593e-05, "loss": 0.1837, "step": 19420 }, { "epoch": 9.092206880411888, "grad_norm": 0.6148951053619385, "learning_rate": 1.4718464473661813e-05, "loss": 0.1917, "step": 19430 }, { "epoch": 9.096887432717061, "grad_norm": 0.7001243233680725, "learning_rate": 1.4709435515551617e-05, "loss": 0.1833, "step": 19440 }, { "epoch": 9.101567985022232, "grad_norm": 0.5488430857658386, "learning_rate": 1.470040593487413e-05, "loss": 0.1723, "step": 19450 }, { "epoch": 9.106248537327405, "grad_norm": 0.5443117022514343, "learning_rate": 1.4691375737488553e-05, "loss": 0.1787, "step": 19460 }, { "epoch": 9.110929089632577, "grad_norm": 0.5497661828994751, "learning_rate": 1.4682344929254476e-05, "loss": 0.1727, "step": 19470 }, { "epoch": 9.115609641937748, "grad_norm": 0.5882095098495483, "learning_rate": 1.4673313516031895e-05, "loss": 0.1726, "step": 19480 }, { "epoch": 9.120290194242921, "grad_norm": 0.7422622442245483, "learning_rate": 1.4664281503681204e-05, "loss": 0.1878, "step": 19490 }, { "epoch": 9.124970746548092, "grad_norm": 0.8199818730354309, "learning_rate": 1.465524889806317e-05, "loss": 0.1755, "step": 19500 }, { "epoch": 9.129651298853265, "grad_norm": 0.6691847443580627, "learning_rate": 1.4646215705038951e-05, "loss": 0.1685, "step": 19510 }, { "epoch": 9.134331851158437, "grad_norm": 0.6538181900978088, "learning_rate": 1.4637181930470093e-05, "loss": 0.1768, "step": 19520 }, { "epoch": 9.139012403463608, "grad_norm": 0.4768422245979309, "learning_rate": 1.4628147580218509e-05, "loss": 0.1758, "step": 19530 }, { "epoch": 9.143692955768781, "grad_norm": 0.9785133004188538, "learning_rate": 1.46191126601465e-05, "loss": 0.1828, "step": 19540 }, { "epoch": 9.148373508073952, "grad_norm": 0.9485018253326416, "learning_rate": 1.4610077176116718e-05, "loss": 0.1741, "step": 19550 }, { "epoch": 9.153054060379125, "grad_norm": 0.5446116924285889, "learning_rate": 1.4601041133992196e-05, "loss": 0.178, "step": 19560 }, { "epoch": 9.157734612684298, "grad_norm": 0.8625629544258118, "learning_rate": 1.459200453963632e-05, "loss": 0.1823, "step": 19570 }, { "epoch": 9.162415164989469, "grad_norm": 0.6328079700469971, "learning_rate": 1.4582967398912839e-05, "loss": 0.1703, "step": 19580 }, { "epoch": 9.167095717294641, "grad_norm": 0.6821820735931396, "learning_rate": 1.4573929717685858e-05, "loss": 0.18, "step": 19590 }, { "epoch": 9.171776269599812, "grad_norm": 0.6607327461242676, "learning_rate": 1.456489150181983e-05, "loss": 0.1745, "step": 19600 }, { "epoch": 9.176456821904985, "grad_norm": 0.6440249681472778, "learning_rate": 1.4555852757179553e-05, "loss": 0.1667, "step": 19610 }, { "epoch": 9.181137374210158, "grad_norm": 0.5863327980041504, "learning_rate": 1.4546813489630165e-05, "loss": 0.1844, "step": 19620 }, { "epoch": 9.185817926515329, "grad_norm": 0.5817156434059143, "learning_rate": 1.4537773705037162e-05, "loss": 0.1661, "step": 19630 }, { "epoch": 9.190498478820501, "grad_norm": 0.5367399454116821, "learning_rate": 1.4528733409266351e-05, "loss": 0.2, "step": 19640 }, { "epoch": 9.195179031125672, "grad_norm": 1.0753389596939087, "learning_rate": 1.4519692608183882e-05, "loss": 0.1785, "step": 19650 }, { "epoch": 9.199859583430845, "grad_norm": 0.5881237983703613, "learning_rate": 1.4510651307656245e-05, "loss": 0.1784, "step": 19660 }, { "epoch": 9.204540135736018, "grad_norm": 0.6421921849250793, "learning_rate": 1.4501609513550226e-05, "loss": 0.17, "step": 19670 }, { "epoch": 9.209220688041189, "grad_norm": 0.5239199995994568, "learning_rate": 1.4492567231732956e-05, "loss": 0.175, "step": 19680 }, { "epoch": 9.213901240346361, "grad_norm": 0.5323271155357361, "learning_rate": 1.4483524468071869e-05, "loss": 0.1756, "step": 19690 }, { "epoch": 9.218581792651532, "grad_norm": 0.616630494594574, "learning_rate": 1.447448122843472e-05, "loss": 0.1725, "step": 19700 }, { "epoch": 9.223262344956705, "grad_norm": 0.5970675349235535, "learning_rate": 1.446543751868957e-05, "loss": 0.1624, "step": 19710 }, { "epoch": 9.227942897261878, "grad_norm": 1.063173532485962, "learning_rate": 1.4456393344704778e-05, "loss": 0.1726, "step": 19720 }, { "epoch": 9.232623449567049, "grad_norm": 0.5475329160690308, "learning_rate": 1.4447348712349016e-05, "loss": 0.1864, "step": 19730 }, { "epoch": 9.237304001872221, "grad_norm": 0.5009306073188782, "learning_rate": 1.4438303627491242e-05, "loss": 0.1603, "step": 19740 }, { "epoch": 9.241984554177392, "grad_norm": 0.48211580514907837, "learning_rate": 1.442925809600072e-05, "loss": 0.187, "step": 19750 }, { "epoch": 9.246665106482565, "grad_norm": 0.6101060509681702, "learning_rate": 1.4420212123746993e-05, "loss": 0.173, "step": 19760 }, { "epoch": 9.251345658787738, "grad_norm": 0.6153058409690857, "learning_rate": 1.4411165716599898e-05, "loss": 0.179, "step": 19770 }, { "epoch": 9.256026211092909, "grad_norm": 0.6685967445373535, "learning_rate": 1.4402118880429544e-05, "loss": 0.1797, "step": 19780 }, { "epoch": 9.260706763398082, "grad_norm": 0.7350291609764099, "learning_rate": 1.4393071621106328e-05, "loss": 0.1752, "step": 19790 }, { "epoch": 9.265387315703252, "grad_norm": 0.9307417273521423, "learning_rate": 1.4384023944500921e-05, "loss": 0.1777, "step": 19800 }, { "epoch": 9.270067868008425, "grad_norm": 0.4952935576438904, "learning_rate": 1.437497585648426e-05, "loss": 0.1546, "step": 19810 }, { "epoch": 9.274748420313596, "grad_norm": 0.6107485294342041, "learning_rate": 1.4365927362927551e-05, "loss": 0.1691, "step": 19820 }, { "epoch": 9.279428972618769, "grad_norm": 0.6259174942970276, "learning_rate": 1.4356878469702263e-05, "loss": 0.1735, "step": 19830 }, { "epoch": 9.284109524923942, "grad_norm": 0.9364478588104248, "learning_rate": 1.4347829182680122e-05, "loss": 0.1681, "step": 19840 }, { "epoch": 9.288790077229113, "grad_norm": 0.6316061019897461, "learning_rate": 1.4338779507733122e-05, "loss": 0.17, "step": 19850 }, { "epoch": 9.293470629534285, "grad_norm": 0.6004688143730164, "learning_rate": 1.4329729450733484e-05, "loss": 0.1761, "step": 19860 }, { "epoch": 9.298151181839456, "grad_norm": 0.5681875944137573, "learning_rate": 1.4320679017553703e-05, "loss": 0.1696, "step": 19870 }, { "epoch": 9.302831734144629, "grad_norm": 0.6632291674613953, "learning_rate": 1.4311628214066505e-05, "loss": 0.1674, "step": 19880 }, { "epoch": 9.307512286449802, "grad_norm": 0.6508898735046387, "learning_rate": 1.4302577046144852e-05, "loss": 0.1588, "step": 19890 }, { "epoch": 9.312192838754973, "grad_norm": 0.6984019875526428, "learning_rate": 1.4293525519661955e-05, "loss": 0.1664, "step": 19900 }, { "epoch": 9.316873391060145, "grad_norm": 0.8475158214569092, "learning_rate": 1.428447364049125e-05, "loss": 0.1788, "step": 19910 }, { "epoch": 9.321553943365316, "grad_norm": 0.46103769540786743, "learning_rate": 1.4275421414506401e-05, "loss": 0.1802, "step": 19920 }, { "epoch": 9.326234495670489, "grad_norm": 0.6400139331817627, "learning_rate": 1.4266368847581301e-05, "loss": 0.1922, "step": 19930 }, { "epoch": 9.330915047975662, "grad_norm": 0.45123761892318726, "learning_rate": 1.4257315945590066e-05, "loss": 0.1756, "step": 19940 }, { "epoch": 9.335595600280833, "grad_norm": 0.7799246907234192, "learning_rate": 1.4248262714407018e-05, "loss": 0.1849, "step": 19950 }, { "epoch": 9.340276152586005, "grad_norm": 0.8667450547218323, "learning_rate": 1.4239209159906705e-05, "loss": 0.1697, "step": 19960 }, { "epoch": 9.344956704891176, "grad_norm": 0.5584301948547363, "learning_rate": 1.4230155287963875e-05, "loss": 0.175, "step": 19970 }, { "epoch": 9.349637257196349, "grad_norm": 0.7900187373161316, "learning_rate": 1.4221101104453497e-05, "loss": 0.1769, "step": 19980 }, { "epoch": 9.354317809501522, "grad_norm": 0.6469388604164124, "learning_rate": 1.4212046615250726e-05, "loss": 0.164, "step": 19990 }, { "epoch": 9.358998361806693, "grad_norm": 0.8280735611915588, "learning_rate": 1.420299182623092e-05, "loss": 0.1991, "step": 20000 }, { "epoch": 9.363678914111865, "grad_norm": 0.6858882904052734, "learning_rate": 1.4193936743269639e-05, "loss": 0.1719, "step": 20010 }, { "epoch": 9.368359466417036, "grad_norm": 0.7309482097625732, "learning_rate": 1.4184881372242625e-05, "loss": 0.1756, "step": 20020 }, { "epoch": 9.37304001872221, "grad_norm": 0.6894615292549133, "learning_rate": 1.4175825719025809e-05, "loss": 0.1756, "step": 20030 }, { "epoch": 9.377720571027382, "grad_norm": 0.6052764058113098, "learning_rate": 1.4166769789495307e-05, "loss": 0.1741, "step": 20040 }, { "epoch": 9.382401123332553, "grad_norm": 0.5464836955070496, "learning_rate": 1.4157713589527415e-05, "loss": 0.1748, "step": 20050 }, { "epoch": 9.387081675637726, "grad_norm": 0.5768723487854004, "learning_rate": 1.4148657124998598e-05, "loss": 0.1757, "step": 20060 }, { "epoch": 9.391762227942897, "grad_norm": 1.0329021215438843, "learning_rate": 1.4139600401785506e-05, "loss": 0.1809, "step": 20070 }, { "epoch": 9.39644278024807, "grad_norm": 0.6342684626579285, "learning_rate": 1.4130543425764937e-05, "loss": 0.1862, "step": 20080 }, { "epoch": 9.401123332553242, "grad_norm": 0.7494005560874939, "learning_rate": 1.4121486202813874e-05, "loss": 0.1763, "step": 20090 }, { "epoch": 9.405803884858413, "grad_norm": 0.4835908114910126, "learning_rate": 1.4112428738809449e-05, "loss": 0.1885, "step": 20100 }, { "epoch": 9.410484437163586, "grad_norm": 0.5431362986564636, "learning_rate": 1.4103371039628943e-05, "loss": 0.1819, "step": 20110 }, { "epoch": 9.415164989468757, "grad_norm": 0.4829239249229431, "learning_rate": 1.4094313111149812e-05, "loss": 0.1812, "step": 20120 }, { "epoch": 9.41984554177393, "grad_norm": 0.49065089225769043, "learning_rate": 1.4085254959249642e-05, "loss": 0.1742, "step": 20130 }, { "epoch": 9.424526094079102, "grad_norm": 0.5777250528335571, "learning_rate": 1.4076196589806165e-05, "loss": 0.1637, "step": 20140 }, { "epoch": 9.429206646384273, "grad_norm": 0.6064701080322266, "learning_rate": 1.4067138008697265e-05, "loss": 0.1716, "step": 20150 }, { "epoch": 9.433887198689446, "grad_norm": 1.1319832801818848, "learning_rate": 1.4058079221800952e-05, "loss": 0.162, "step": 20160 }, { "epoch": 9.438567750994617, "grad_norm": 0.8844035267829895, "learning_rate": 1.4049020234995378e-05, "loss": 0.1659, "step": 20170 }, { "epoch": 9.44324830329979, "grad_norm": 0.7983923554420471, "learning_rate": 1.403996105415882e-05, "loss": 0.1837, "step": 20180 }, { "epoch": 9.447928855604962, "grad_norm": 0.577618420124054, "learning_rate": 1.4030901685169682e-05, "loss": 0.188, "step": 20190 }, { "epoch": 9.452609407910133, "grad_norm": 0.6464549899101257, "learning_rate": 1.402184213390649e-05, "loss": 0.1693, "step": 20200 }, { "epoch": 9.457289960215306, "grad_norm": 0.5241626501083374, "learning_rate": 1.401278240624789e-05, "loss": 0.1698, "step": 20210 }, { "epoch": 9.461970512520477, "grad_norm": 0.9782401323318481, "learning_rate": 1.4003722508072637e-05, "loss": 0.1962, "step": 20220 }, { "epoch": 9.46665106482565, "grad_norm": 0.6084905862808228, "learning_rate": 1.3994662445259604e-05, "loss": 0.1747, "step": 20230 }, { "epoch": 9.471331617130822, "grad_norm": 0.4708838164806366, "learning_rate": 1.3985602223687767e-05, "loss": 0.1789, "step": 20240 }, { "epoch": 9.476012169435993, "grad_norm": 0.619002103805542, "learning_rate": 1.3976541849236205e-05, "loss": 0.1622, "step": 20250 }, { "epoch": 9.480692721741166, "grad_norm": 0.5262901782989502, "learning_rate": 1.3967481327784096e-05, "loss": 0.1647, "step": 20260 }, { "epoch": 9.485373274046337, "grad_norm": 0.5504419803619385, "learning_rate": 1.3958420665210716e-05, "loss": 0.1661, "step": 20270 }, { "epoch": 9.49005382635151, "grad_norm": 0.6240934133529663, "learning_rate": 1.3949359867395428e-05, "loss": 0.1868, "step": 20280 }, { "epoch": 9.494734378656682, "grad_norm": 0.8998789191246033, "learning_rate": 1.3940298940217683e-05, "loss": 0.1844, "step": 20290 }, { "epoch": 9.499414930961853, "grad_norm": 0.574389636516571, "learning_rate": 1.3931237889557028e-05, "loss": 0.1692, "step": 20300 }, { "epoch": 9.504095483267026, "grad_norm": 0.5695172548294067, "learning_rate": 1.3922176721293073e-05, "loss": 0.172, "step": 20310 }, { "epoch": 9.508776035572197, "grad_norm": 0.5780273675918579, "learning_rate": 1.391311544130551e-05, "loss": 0.1719, "step": 20320 }, { "epoch": 9.51345658787737, "grad_norm": 1.207268476486206, "learning_rate": 1.3904054055474111e-05, "loss": 0.1775, "step": 20330 }, { "epoch": 9.518137140182542, "grad_norm": 0.4937889873981476, "learning_rate": 1.3894992569678705e-05, "loss": 0.1665, "step": 20340 }, { "epoch": 9.522817692487713, "grad_norm": 0.5967661738395691, "learning_rate": 1.3885930989799196e-05, "loss": 0.1789, "step": 20350 }, { "epoch": 9.527498244792886, "grad_norm": 0.5558335185050964, "learning_rate": 1.3876869321715544e-05, "loss": 0.1731, "step": 20360 }, { "epoch": 9.532178797098057, "grad_norm": 0.4869462251663208, "learning_rate": 1.3867807571307767e-05, "loss": 0.176, "step": 20370 }, { "epoch": 9.53685934940323, "grad_norm": 0.5166340470314026, "learning_rate": 1.3858745744455936e-05, "loss": 0.1745, "step": 20380 }, { "epoch": 9.541539901708402, "grad_norm": 0.8750209808349609, "learning_rate": 1.3849683847040171e-05, "loss": 0.1733, "step": 20390 }, { "epoch": 9.546220454013573, "grad_norm": 0.5490562319755554, "learning_rate": 1.3840621884940638e-05, "loss": 0.1729, "step": 20400 }, { "epoch": 9.550901006318746, "grad_norm": 0.570711076259613, "learning_rate": 1.383155986403755e-05, "loss": 0.1753, "step": 20410 }, { "epoch": 9.555581558623917, "grad_norm": 0.7319356203079224, "learning_rate": 1.3822497790211155e-05, "loss": 0.1723, "step": 20420 }, { "epoch": 9.56026211092909, "grad_norm": 0.5639404654502869, "learning_rate": 1.3813435669341723e-05, "loss": 0.1759, "step": 20430 }, { "epoch": 9.564942663234262, "grad_norm": 0.7344599962234497, "learning_rate": 1.3804373507309581e-05, "loss": 0.1789, "step": 20440 }, { "epoch": 9.569623215539433, "grad_norm": 0.5603479146957397, "learning_rate": 1.3795311309995056e-05, "loss": 0.1771, "step": 20450 }, { "epoch": 9.574303767844606, "grad_norm": 0.8047216534614563, "learning_rate": 1.3786249083278519e-05, "loss": 0.1828, "step": 20460 }, { "epoch": 9.578984320149777, "grad_norm": 0.5735139846801758, "learning_rate": 1.377718683304034e-05, "loss": 0.184, "step": 20470 }, { "epoch": 9.58366487245495, "grad_norm": 0.5279703140258789, "learning_rate": 1.3768124565160923e-05, "loss": 0.1842, "step": 20480 }, { "epoch": 9.588345424760123, "grad_norm": 0.5627964735031128, "learning_rate": 1.3759062285520674e-05, "loss": 0.1838, "step": 20490 }, { "epoch": 9.593025977065293, "grad_norm": 0.6149129867553711, "learning_rate": 1.3750000000000002e-05, "loss": 0.1575, "step": 20500 }, { "epoch": 9.597706529370466, "grad_norm": 0.6524822115898132, "learning_rate": 1.3740937714479332e-05, "loss": 0.1797, "step": 20510 }, { "epoch": 9.602387081675637, "grad_norm": 0.5263031721115112, "learning_rate": 1.3731875434839081e-05, "loss": 0.1783, "step": 20520 }, { "epoch": 9.60706763398081, "grad_norm": 0.5853891968727112, "learning_rate": 1.3722813166959664e-05, "loss": 0.1729, "step": 20530 }, { "epoch": 9.611748186285983, "grad_norm": 0.5934178233146667, "learning_rate": 1.3713750916721486e-05, "loss": 0.1579, "step": 20540 }, { "epoch": 9.616428738591154, "grad_norm": 0.5239492058753967, "learning_rate": 1.3704688690004946e-05, "loss": 0.1775, "step": 20550 }, { "epoch": 9.621109290896326, "grad_norm": 1.05600106716156, "learning_rate": 1.3695626492690425e-05, "loss": 0.1698, "step": 20560 }, { "epoch": 9.625789843201497, "grad_norm": 1.1222347021102905, "learning_rate": 1.3686564330658278e-05, "loss": 0.1936, "step": 20570 }, { "epoch": 9.63047039550667, "grad_norm": 0.6623438596725464, "learning_rate": 1.3677502209788853e-05, "loss": 0.1696, "step": 20580 }, { "epoch": 9.635150947811841, "grad_norm": 1.0344915390014648, "learning_rate": 1.3668440135962448e-05, "loss": 0.169, "step": 20590 }, { "epoch": 9.639831500117014, "grad_norm": 1.0145000219345093, "learning_rate": 1.3659378115059365e-05, "loss": 0.1751, "step": 20600 }, { "epoch": 9.644512052422186, "grad_norm": 0.6257618069648743, "learning_rate": 1.3650316152959836e-05, "loss": 0.1649, "step": 20610 }, { "epoch": 9.649192604727357, "grad_norm": 0.5026218295097351, "learning_rate": 1.3641254255544067e-05, "loss": 0.18, "step": 20620 }, { "epoch": 9.65387315703253, "grad_norm": 0.5575605034828186, "learning_rate": 1.3632192428692239e-05, "loss": 0.1765, "step": 20630 }, { "epoch": 9.658553709337703, "grad_norm": 0.55947345495224, "learning_rate": 1.3623130678284457e-05, "loss": 0.1931, "step": 20640 }, { "epoch": 9.663234261642874, "grad_norm": 0.7472094893455505, "learning_rate": 1.3614069010200805e-05, "loss": 0.1814, "step": 20650 }, { "epoch": 9.667914813948046, "grad_norm": 0.7312115430831909, "learning_rate": 1.3605007430321296e-05, "loss": 0.182, "step": 20660 }, { "epoch": 9.672595366253217, "grad_norm": 0.5715276002883911, "learning_rate": 1.3595945944525892e-05, "loss": 0.181, "step": 20670 }, { "epoch": 9.67727591855839, "grad_norm": 0.6868329048156738, "learning_rate": 1.3586884558694494e-05, "loss": 0.1811, "step": 20680 }, { "epoch": 9.681956470863561, "grad_norm": 0.610761821269989, "learning_rate": 1.357782327870693e-05, "loss": 0.1722, "step": 20690 }, { "epoch": 9.686637023168734, "grad_norm": 0.862671971321106, "learning_rate": 1.3568762110442975e-05, "loss": 0.1712, "step": 20700 }, { "epoch": 9.691317575473906, "grad_norm": 0.6187286972999573, "learning_rate": 1.3559701059782314e-05, "loss": 0.1873, "step": 20710 }, { "epoch": 9.695998127779077, "grad_norm": 0.6478118300437927, "learning_rate": 1.3550640132604575e-05, "loss": 0.1704, "step": 20720 }, { "epoch": 9.70067868008425, "grad_norm": 0.5458742380142212, "learning_rate": 1.354157933478929e-05, "loss": 0.1668, "step": 20730 }, { "epoch": 9.705359232389423, "grad_norm": 0.5584373474121094, "learning_rate": 1.3532518672215905e-05, "loss": 0.1775, "step": 20740 }, { "epoch": 9.710039784694594, "grad_norm": 0.6496391892433167, "learning_rate": 1.3523458150763798e-05, "loss": 0.1726, "step": 20750 }, { "epoch": 9.714720336999767, "grad_norm": 0.5480362176895142, "learning_rate": 1.3514397776312234e-05, "loss": 0.1787, "step": 20760 }, { "epoch": 9.719400889304938, "grad_norm": 0.5135990381240845, "learning_rate": 1.3505337554740399e-05, "loss": 0.188, "step": 20770 }, { "epoch": 9.72408144161011, "grad_norm": 0.7390865087509155, "learning_rate": 1.3496277491927368e-05, "loss": 0.1831, "step": 20780 }, { "epoch": 9.728761993915281, "grad_norm": 0.5169256329536438, "learning_rate": 1.3487217593752117e-05, "loss": 0.1686, "step": 20790 }, { "epoch": 9.733442546220454, "grad_norm": 0.6813758015632629, "learning_rate": 1.3478157866093516e-05, "loss": 0.1633, "step": 20800 }, { "epoch": 9.738123098525627, "grad_norm": 0.5976643562316895, "learning_rate": 1.346909831483032e-05, "loss": 0.1853, "step": 20810 }, { "epoch": 9.742803650830798, "grad_norm": 0.5875235199928284, "learning_rate": 1.3460038945841183e-05, "loss": 0.1756, "step": 20820 }, { "epoch": 9.74748420313597, "grad_norm": 1.3181053400039673, "learning_rate": 1.3450979765004628e-05, "loss": 0.1822, "step": 20830 }, { "epoch": 9.752164755441141, "grad_norm": 1.185268521308899, "learning_rate": 1.3441920778199049e-05, "loss": 0.1708, "step": 20840 }, { "epoch": 9.756845307746314, "grad_norm": 0.6765438914299011, "learning_rate": 1.343286199130274e-05, "loss": 0.1801, "step": 20850 }, { "epoch": 9.761525860051487, "grad_norm": 0.5367382168769836, "learning_rate": 1.3423803410193836e-05, "loss": 0.1835, "step": 20860 }, { "epoch": 9.766206412356658, "grad_norm": 0.7435438632965088, "learning_rate": 1.3414745040750362e-05, "loss": 0.1773, "step": 20870 }, { "epoch": 9.77088696466183, "grad_norm": 0.6452366709709167, "learning_rate": 1.3405686888850189e-05, "loss": 0.1638, "step": 20880 }, { "epoch": 9.775567516967001, "grad_norm": 0.6083431243896484, "learning_rate": 1.3396628960371058e-05, "loss": 0.1915, "step": 20890 }, { "epoch": 9.780248069272174, "grad_norm": 0.7805378437042236, "learning_rate": 1.3387571261190557e-05, "loss": 0.1733, "step": 20900 }, { "epoch": 9.784928621577347, "grad_norm": 0.8372702598571777, "learning_rate": 1.3378513797186129e-05, "loss": 0.1764, "step": 20910 }, { "epoch": 9.789609173882518, "grad_norm": 0.6774083375930786, "learning_rate": 1.3369456574235064e-05, "loss": 0.1746, "step": 20920 }, { "epoch": 9.79428972618769, "grad_norm": 0.5712336301803589, "learning_rate": 1.3360399598214499e-05, "loss": 0.1866, "step": 20930 }, { "epoch": 9.798970278492861, "grad_norm": 0.5004392862319946, "learning_rate": 1.3351342875001405e-05, "loss": 0.1691, "step": 20940 }, { "epoch": 9.803650830798034, "grad_norm": 0.6846486926078796, "learning_rate": 1.334228641047259e-05, "loss": 0.1847, "step": 20950 }, { "epoch": 9.808331383103207, "grad_norm": 0.5421280860900879, "learning_rate": 1.3333230210504696e-05, "loss": 0.1773, "step": 20960 }, { "epoch": 9.813011935408378, "grad_norm": 0.661289393901825, "learning_rate": 1.3324174280974194e-05, "loss": 0.1706, "step": 20970 }, { "epoch": 9.81769248771355, "grad_norm": 0.6645926833152771, "learning_rate": 1.3315118627757376e-05, "loss": 0.182, "step": 20980 }, { "epoch": 9.822373040018721, "grad_norm": 0.7211066484451294, "learning_rate": 1.3306063256730364e-05, "loss": 0.1718, "step": 20990 }, { "epoch": 9.827053592323894, "grad_norm": 0.7198243141174316, "learning_rate": 1.3297008173769085e-05, "loss": 0.1883, "step": 21000 }, { "epoch": 9.831734144629067, "grad_norm": 1.420576810836792, "learning_rate": 1.3287953384749277e-05, "loss": 0.1863, "step": 21010 }, { "epoch": 9.836414696934238, "grad_norm": 0.6058073043823242, "learning_rate": 1.3278898895546507e-05, "loss": 0.177, "step": 21020 }, { "epoch": 9.84109524923941, "grad_norm": 0.5949601531028748, "learning_rate": 1.3269844712036123e-05, "loss": 0.1823, "step": 21030 }, { "epoch": 9.845775801544582, "grad_norm": 0.6268157362937927, "learning_rate": 1.3260790840093299e-05, "loss": 0.18, "step": 21040 }, { "epoch": 9.850456353849754, "grad_norm": 0.5643357634544373, "learning_rate": 1.3251737285592988e-05, "loss": 0.166, "step": 21050 }, { "epoch": 9.855136906154927, "grad_norm": 0.6154606938362122, "learning_rate": 1.3242684054409938e-05, "loss": 0.1764, "step": 21060 }, { "epoch": 9.859817458460098, "grad_norm": 0.5545603632926941, "learning_rate": 1.32336311524187e-05, "loss": 0.163, "step": 21070 }, { "epoch": 9.86449801076527, "grad_norm": 0.506046712398529, "learning_rate": 1.32245785854936e-05, "loss": 0.1838, "step": 21080 }, { "epoch": 9.869178563070442, "grad_norm": 0.4729001224040985, "learning_rate": 1.3215526359508754e-05, "loss": 0.166, "step": 21090 }, { "epoch": 9.873859115375614, "grad_norm": 0.7111366391181946, "learning_rate": 1.3206474480338046e-05, "loss": 0.1763, "step": 21100 }, { "epoch": 9.878539667680787, "grad_norm": 0.6019900441169739, "learning_rate": 1.3197422953855149e-05, "loss": 0.174, "step": 21110 }, { "epoch": 9.883220219985958, "grad_norm": 0.9198846220970154, "learning_rate": 1.3188371785933501e-05, "loss": 0.1756, "step": 21120 }, { "epoch": 9.88790077229113, "grad_norm": 0.5883786678314209, "learning_rate": 1.31793209824463e-05, "loss": 0.1584, "step": 21130 }, { "epoch": 9.892581324596302, "grad_norm": 0.7118514180183411, "learning_rate": 1.317027054926652e-05, "loss": 0.1705, "step": 21140 }, { "epoch": 9.897261876901474, "grad_norm": 0.531730592250824, "learning_rate": 1.3161220492266882e-05, "loss": 0.1739, "step": 21150 }, { "epoch": 9.901942429206647, "grad_norm": 0.5332835912704468, "learning_rate": 1.3152170817319879e-05, "loss": 0.1712, "step": 21160 }, { "epoch": 9.906622981511818, "grad_norm": 0.6198727488517761, "learning_rate": 1.3143121530297742e-05, "loss": 0.179, "step": 21170 }, { "epoch": 9.91130353381699, "grad_norm": 0.6934273838996887, "learning_rate": 1.3134072637072453e-05, "loss": 0.1855, "step": 21180 }, { "epoch": 9.915984086122162, "grad_norm": 0.496284544467926, "learning_rate": 1.3125024143515743e-05, "loss": 0.1849, "step": 21190 }, { "epoch": 9.920664638427334, "grad_norm": 0.8882848024368286, "learning_rate": 1.311597605549908e-05, "loss": 0.1824, "step": 21200 }, { "epoch": 9.925345190732507, "grad_norm": 0.5388554930686951, "learning_rate": 1.3106928378893673e-05, "loss": 0.1711, "step": 21210 }, { "epoch": 9.930025743037678, "grad_norm": 0.6906712651252747, "learning_rate": 1.3097881119570463e-05, "loss": 0.1671, "step": 21220 }, { "epoch": 9.93470629534285, "grad_norm": 0.5911359786987305, "learning_rate": 1.3088834283400106e-05, "loss": 0.1745, "step": 21230 }, { "epoch": 9.939386847648022, "grad_norm": 0.581566572189331, "learning_rate": 1.3079787876253011e-05, "loss": 0.1629, "step": 21240 }, { "epoch": 9.944067399953195, "grad_norm": 1.11481773853302, "learning_rate": 1.3070741903999281e-05, "loss": 0.1682, "step": 21250 }, { "epoch": 9.948747952258367, "grad_norm": 0.592535674571991, "learning_rate": 1.306169637250876e-05, "loss": 0.1682, "step": 21260 }, { "epoch": 9.953428504563538, "grad_norm": 0.6681244969367981, "learning_rate": 1.3052651287650992e-05, "loss": 0.1789, "step": 21270 }, { "epoch": 9.958109056868711, "grad_norm": 0.4960007965564728, "learning_rate": 1.3043606655295227e-05, "loss": 0.1925, "step": 21280 }, { "epoch": 9.962789609173882, "grad_norm": 0.6144018769264221, "learning_rate": 1.3034562481310436e-05, "loss": 0.1724, "step": 21290 }, { "epoch": 9.967470161479055, "grad_norm": 0.6647139191627502, "learning_rate": 1.3025518771565281e-05, "loss": 0.1844, "step": 21300 }, { "epoch": 9.972150713784227, "grad_norm": 0.645695149898529, "learning_rate": 1.3016475531928132e-05, "loss": 0.178, "step": 21310 }, { "epoch": 9.976831266089398, "grad_norm": 0.666382908821106, "learning_rate": 1.300743276826705e-05, "loss": 0.1887, "step": 21320 }, { "epoch": 9.981511818394571, "grad_norm": 0.6798974871635437, "learning_rate": 1.2998390486449777e-05, "loss": 0.191, "step": 21330 }, { "epoch": 9.986192370699742, "grad_norm": 0.5745602250099182, "learning_rate": 1.2989348692343761e-05, "loss": 0.1801, "step": 21340 }, { "epoch": 9.990872923004915, "grad_norm": 0.6407061815261841, "learning_rate": 1.2980307391816119e-05, "loss": 0.1677, "step": 21350 }, { "epoch": 9.995553475310087, "grad_norm": 0.5243898034095764, "learning_rate": 1.2971266590733655e-05, "loss": 0.1679, "step": 21360 }, { "epoch": 10.0, "grad_norm": 0.601761519908905, "learning_rate": 1.2962226294962843e-05, "loss": 0.1583, "step": 21370 }, { "epoch": 10.004680552305173, "grad_norm": 0.5377545952796936, "learning_rate": 1.2953186510369838e-05, "loss": 0.1611, "step": 21380 }, { "epoch": 10.009361104610344, "grad_norm": 0.48052462935447693, "learning_rate": 1.2944147242820457e-05, "loss": 0.1491, "step": 21390 }, { "epoch": 10.014041656915516, "grad_norm": 0.7522516846656799, "learning_rate": 1.2935108498180176e-05, "loss": 0.1423, "step": 21400 }, { "epoch": 10.018722209220687, "grad_norm": 0.8992882966995239, "learning_rate": 1.2926070282314148e-05, "loss": 0.1512, "step": 21410 }, { "epoch": 10.02340276152586, "grad_norm": 0.8793779015541077, "learning_rate": 1.2917032601087162e-05, "loss": 0.1513, "step": 21420 }, { "epoch": 10.028083313831033, "grad_norm": 0.86927729845047, "learning_rate": 1.2907995460363681e-05, "loss": 0.1489, "step": 21430 }, { "epoch": 10.032763866136204, "grad_norm": 0.5801944732666016, "learning_rate": 1.2898958866007813e-05, "loss": 0.1538, "step": 21440 }, { "epoch": 10.037444418441376, "grad_norm": 0.46760591864585876, "learning_rate": 1.2889922823883283e-05, "loss": 0.1416, "step": 21450 }, { "epoch": 10.042124970746547, "grad_norm": 0.5835723876953125, "learning_rate": 1.2880887339853503e-05, "loss": 0.1616, "step": 21460 }, { "epoch": 10.04680552305172, "grad_norm": 0.6963253617286682, "learning_rate": 1.287185241978149e-05, "loss": 0.1537, "step": 21470 }, { "epoch": 10.051486075356893, "grad_norm": 0.5360548496246338, "learning_rate": 1.2862818069529912e-05, "loss": 0.1494, "step": 21480 }, { "epoch": 10.056166627662064, "grad_norm": 0.5437682867050171, "learning_rate": 1.2853784294961052e-05, "loss": 0.1457, "step": 21490 }, { "epoch": 10.060847179967237, "grad_norm": 0.5209912061691284, "learning_rate": 1.2844751101936836e-05, "loss": 0.1616, "step": 21500 }, { "epoch": 10.065527732272407, "grad_norm": 0.5523772835731506, "learning_rate": 1.2835718496318799e-05, "loss": 0.1576, "step": 21510 }, { "epoch": 10.07020828457758, "grad_norm": 0.5506686568260193, "learning_rate": 1.2826686483968106e-05, "loss": 0.1529, "step": 21520 }, { "epoch": 10.074888836882753, "grad_norm": 0.7049039006233215, "learning_rate": 1.2817655070745526e-05, "loss": 0.1435, "step": 21530 }, { "epoch": 10.079569389187924, "grad_norm": 0.6156631708145142, "learning_rate": 1.2808624262511454e-05, "loss": 0.1562, "step": 21540 }, { "epoch": 10.084249941493097, "grad_norm": 0.5665584206581116, "learning_rate": 1.2799594065125872e-05, "loss": 0.143, "step": 21550 }, { "epoch": 10.088930493798268, "grad_norm": 0.6938361525535583, "learning_rate": 1.2790564484448386e-05, "loss": 0.1503, "step": 21560 }, { "epoch": 10.09361104610344, "grad_norm": 0.9654330611228943, "learning_rate": 1.2781535526338186e-05, "loss": 0.1586, "step": 21570 }, { "epoch": 10.098291598408613, "grad_norm": 0.6676629185676575, "learning_rate": 1.2772507196654073e-05, "loss": 0.1534, "step": 21580 }, { "epoch": 10.102972150713784, "grad_norm": 0.7845817804336548, "learning_rate": 1.2763479501254427e-05, "loss": 0.1703, "step": 21590 }, { "epoch": 10.107652703018957, "grad_norm": 0.708331823348999, "learning_rate": 1.275445244599722e-05, "loss": 0.1669, "step": 21600 }, { "epoch": 10.112333255324128, "grad_norm": 0.726787269115448, "learning_rate": 1.2745426036740013e-05, "loss": 0.1567, "step": 21610 }, { "epoch": 10.1170138076293, "grad_norm": 0.9793049097061157, "learning_rate": 1.2736400279339942e-05, "loss": 0.1581, "step": 21620 }, { "epoch": 10.121694359934473, "grad_norm": 0.9180346727371216, "learning_rate": 1.2727375179653731e-05, "loss": 0.1541, "step": 21630 }, { "epoch": 10.126374912239644, "grad_norm": 0.5720454454421997, "learning_rate": 1.2718350743537654e-05, "loss": 0.1461, "step": 21640 }, { "epoch": 10.131055464544817, "grad_norm": 0.6891485452651978, "learning_rate": 1.2709326976847589e-05, "loss": 0.1599, "step": 21650 }, { "epoch": 10.135736016849988, "grad_norm": 0.95766681432724, "learning_rate": 1.2700303885438947e-05, "loss": 0.1631, "step": 21660 }, { "epoch": 10.14041656915516, "grad_norm": 0.5713412165641785, "learning_rate": 1.2691281475166713e-05, "loss": 0.1565, "step": 21670 }, { "epoch": 10.145097121460333, "grad_norm": 0.687033474445343, "learning_rate": 1.2682259751885442e-05, "loss": 0.1516, "step": 21680 }, { "epoch": 10.149777673765504, "grad_norm": 0.8766445517539978, "learning_rate": 1.2673238721449222e-05, "loss": 0.1574, "step": 21690 }, { "epoch": 10.154458226070677, "grad_norm": 0.8439580202102661, "learning_rate": 1.2664218389711705e-05, "loss": 0.159, "step": 21700 }, { "epoch": 10.159138778375848, "grad_norm": 0.8456753492355347, "learning_rate": 1.2655198762526091e-05, "loss": 0.1665, "step": 21710 }, { "epoch": 10.16381933068102, "grad_norm": 0.6809765696525574, "learning_rate": 1.264617984574511e-05, "loss": 0.1529, "step": 21720 }, { "epoch": 10.168499882986193, "grad_norm": 0.4891361892223358, "learning_rate": 1.2637161645221044e-05, "loss": 0.1425, "step": 21730 }, { "epoch": 10.173180435291364, "grad_norm": 0.5059837698936462, "learning_rate": 1.2628144166805703e-05, "loss": 0.1528, "step": 21740 }, { "epoch": 10.177860987596537, "grad_norm": 1.294456958770752, "learning_rate": 1.2619127416350434e-05, "loss": 0.1607, "step": 21750 }, { "epoch": 10.182541539901708, "grad_norm": 0.6055684685707092, "learning_rate": 1.261011139970611e-05, "loss": 0.1485, "step": 21760 }, { "epoch": 10.18722209220688, "grad_norm": 0.5353481769561768, "learning_rate": 1.2601096122723117e-05, "loss": 0.1573, "step": 21770 }, { "epoch": 10.191902644512052, "grad_norm": 0.7606616616249084, "learning_rate": 1.2592081591251382e-05, "loss": 0.1541, "step": 21780 }, { "epoch": 10.196583196817224, "grad_norm": 0.60831218957901, "learning_rate": 1.2583067811140325e-05, "loss": 0.1613, "step": 21790 }, { "epoch": 10.201263749122397, "grad_norm": 0.8810595273971558, "learning_rate": 1.2574054788238901e-05, "loss": 0.1636, "step": 21800 }, { "epoch": 10.205944301427568, "grad_norm": 0.5331091284751892, "learning_rate": 1.2565042528395557e-05, "loss": 0.1538, "step": 21810 }, { "epoch": 10.21062485373274, "grad_norm": 0.6262661218643188, "learning_rate": 1.255603103745825e-05, "loss": 0.1554, "step": 21820 }, { "epoch": 10.215305406037913, "grad_norm": 0.5517286658287048, "learning_rate": 1.2547020321274442e-05, "loss": 0.1599, "step": 21830 }, { "epoch": 10.219985958343084, "grad_norm": 0.663687527179718, "learning_rate": 1.2538010385691082e-05, "loss": 0.1679, "step": 21840 }, { "epoch": 10.224666510648257, "grad_norm": 0.8001555800437927, "learning_rate": 1.2529001236554633e-05, "loss": 0.1782, "step": 21850 }, { "epoch": 10.229347062953428, "grad_norm": 1.1372547149658203, "learning_rate": 1.2519992879711014e-05, "loss": 0.1562, "step": 21860 }, { "epoch": 10.2340276152586, "grad_norm": 0.6557598114013672, "learning_rate": 1.251098532100567e-05, "loss": 0.1585, "step": 21870 }, { "epoch": 10.238708167563772, "grad_norm": 0.6259006261825562, "learning_rate": 1.25019785662835e-05, "loss": 0.1552, "step": 21880 }, { "epoch": 10.243388719868944, "grad_norm": 0.5069560408592224, "learning_rate": 1.2492972621388885e-05, "loss": 0.1605, "step": 21890 }, { "epoch": 10.248069272174117, "grad_norm": 0.7337058782577515, "learning_rate": 1.2483967492165691e-05, "loss": 0.1631, "step": 21900 }, { "epoch": 10.252749824479288, "grad_norm": 1.3105257749557495, "learning_rate": 1.2474963184457242e-05, "loss": 0.1419, "step": 21910 }, { "epoch": 10.25743037678446, "grad_norm": 0.896395206451416, "learning_rate": 1.2465959704106343e-05, "loss": 0.1563, "step": 21920 }, { "epoch": 10.262110929089632, "grad_norm": 0.5129675269126892, "learning_rate": 1.2456957056955255e-05, "loss": 0.1578, "step": 21930 }, { "epoch": 10.266791481394804, "grad_norm": 0.542823076248169, "learning_rate": 1.2447955248845688e-05, "loss": 0.1532, "step": 21940 }, { "epoch": 10.271472033699977, "grad_norm": 0.6554822325706482, "learning_rate": 1.2438954285618825e-05, "loss": 0.1533, "step": 21950 }, { "epoch": 10.276152586005148, "grad_norm": 0.4810051918029785, "learning_rate": 1.2429954173115293e-05, "loss": 0.1495, "step": 21960 }, { "epoch": 10.28083313831032, "grad_norm": 0.6356202363967896, "learning_rate": 1.242095491717517e-05, "loss": 0.1552, "step": 21970 }, { "epoch": 10.285513690615492, "grad_norm": 0.5383186936378479, "learning_rate": 1.2411956523637969e-05, "loss": 0.147, "step": 21980 }, { "epoch": 10.290194242920665, "grad_norm": 0.58808833360672, "learning_rate": 1.2402958998342654e-05, "loss": 0.1602, "step": 21990 }, { "epoch": 10.294874795225837, "grad_norm": 1.0776275396347046, "learning_rate": 1.2393962347127618e-05, "loss": 0.1676, "step": 22000 }, { "epoch": 10.299555347531008, "grad_norm": 0.871342122554779, "learning_rate": 1.2384966575830692e-05, "loss": 0.1691, "step": 22010 }, { "epoch": 10.304235899836181, "grad_norm": 0.4834568500518799, "learning_rate": 1.237597169028914e-05, "loss": 0.1441, "step": 22020 }, { "epoch": 10.308916452141352, "grad_norm": 0.6864032745361328, "learning_rate": 1.2366977696339637e-05, "loss": 0.1703, "step": 22030 }, { "epoch": 10.313597004446525, "grad_norm": 0.7755934000015259, "learning_rate": 1.235798459981829e-05, "loss": 0.1584, "step": 22040 }, { "epoch": 10.318277556751697, "grad_norm": 0.5442607998847961, "learning_rate": 1.2348992406560627e-05, "loss": 0.1548, "step": 22050 }, { "epoch": 10.322958109056868, "grad_norm": 0.6028562188148499, "learning_rate": 1.2340001122401577e-05, "loss": 0.1532, "step": 22060 }, { "epoch": 10.327638661362041, "grad_norm": 0.5105697512626648, "learning_rate": 1.2331010753175493e-05, "loss": 0.1513, "step": 22070 }, { "epoch": 10.332319213667212, "grad_norm": 0.6275849342346191, "learning_rate": 1.2322021304716131e-05, "loss": 0.1555, "step": 22080 }, { "epoch": 10.336999765972385, "grad_norm": 0.592749834060669, "learning_rate": 1.2313032782856632e-05, "loss": 0.1611, "step": 22090 }, { "epoch": 10.341680318277557, "grad_norm": 0.5525261759757996, "learning_rate": 1.2304045193429561e-05, "loss": 0.1578, "step": 22100 }, { "epoch": 10.346360870582728, "grad_norm": 1.063340187072754, "learning_rate": 1.2295058542266866e-05, "loss": 0.1706, "step": 22110 }, { "epoch": 10.351041422887901, "grad_norm": 0.564104437828064, "learning_rate": 1.2286072835199891e-05, "loss": 0.1537, "step": 22120 }, { "epoch": 10.355721975193072, "grad_norm": 0.4869387447834015, "learning_rate": 1.2277088078059353e-05, "loss": 0.1387, "step": 22130 }, { "epoch": 10.360402527498245, "grad_norm": 0.69718998670578, "learning_rate": 1.2268104276675377e-05, "loss": 0.1663, "step": 22140 }, { "epoch": 10.365083079803417, "grad_norm": 0.622556746006012, "learning_rate": 1.2259121436877444e-05, "loss": 0.16, "step": 22150 }, { "epoch": 10.369763632108588, "grad_norm": 0.5721147060394287, "learning_rate": 1.2250139564494423e-05, "loss": 0.1607, "step": 22160 }, { "epoch": 10.374444184413761, "grad_norm": 0.585877537727356, "learning_rate": 1.2241158665354557e-05, "loss": 0.1718, "step": 22170 }, { "epoch": 10.379124736718932, "grad_norm": 0.7246363759040833, "learning_rate": 1.223217874528545e-05, "loss": 0.157, "step": 22180 }, { "epoch": 10.383805289024105, "grad_norm": 0.9798051118850708, "learning_rate": 1.2223199810114078e-05, "loss": 0.1562, "step": 22190 }, { "epoch": 10.388485841329278, "grad_norm": 0.7455364465713501, "learning_rate": 1.2214221865666772e-05, "loss": 0.1693, "step": 22200 }, { "epoch": 10.393166393634448, "grad_norm": 0.5685511827468872, "learning_rate": 1.2205244917769224e-05, "loss": 0.1635, "step": 22210 }, { "epoch": 10.397846945939621, "grad_norm": 0.627140998840332, "learning_rate": 1.2196268972246477e-05, "loss": 0.1594, "step": 22220 }, { "epoch": 10.402527498244792, "grad_norm": 0.5940977931022644, "learning_rate": 1.2187294034922922e-05, "loss": 0.1539, "step": 22230 }, { "epoch": 10.407208050549965, "grad_norm": 0.9136936664581299, "learning_rate": 1.217832011162231e-05, "loss": 0.152, "step": 22240 }, { "epoch": 10.411888602855138, "grad_norm": 0.6217501163482666, "learning_rate": 1.216934720816771e-05, "loss": 0.1455, "step": 22250 }, { "epoch": 10.416569155160309, "grad_norm": 0.6497983336448669, "learning_rate": 1.2160375330381543e-05, "loss": 0.1625, "step": 22260 }, { "epoch": 10.421249707465481, "grad_norm": 0.7224992513656616, "learning_rate": 1.2151404484085571e-05, "loss": 0.1536, "step": 22270 }, { "epoch": 10.425930259770652, "grad_norm": 0.5658358335494995, "learning_rate": 1.2142434675100867e-05, "loss": 0.1583, "step": 22280 }, { "epoch": 10.430610812075825, "grad_norm": 0.56265789270401, "learning_rate": 1.2133465909247854e-05, "loss": 0.1588, "step": 22290 }, { "epoch": 10.435291364380998, "grad_norm": 0.9571330547332764, "learning_rate": 1.2124498192346264e-05, "loss": 0.1528, "step": 22300 }, { "epoch": 10.439971916686169, "grad_norm": 0.516566812992096, "learning_rate": 1.2115531530215143e-05, "loss": 0.1689, "step": 22310 }, { "epoch": 10.444652468991341, "grad_norm": 0.5235921740531921, "learning_rate": 1.2106565928672872e-05, "loss": 0.1513, "step": 22320 }, { "epoch": 10.449333021296512, "grad_norm": 0.5545494556427002, "learning_rate": 1.2097601393537122e-05, "loss": 0.1479, "step": 22330 }, { "epoch": 10.454013573601685, "grad_norm": 0.7726820111274719, "learning_rate": 1.2088637930624893e-05, "loss": 0.1567, "step": 22340 }, { "epoch": 10.458694125906858, "grad_norm": 0.7698956727981567, "learning_rate": 1.2079675545752473e-05, "loss": 0.1588, "step": 22350 }, { "epoch": 10.463374678212029, "grad_norm": 0.7036251425743103, "learning_rate": 1.2070714244735457e-05, "loss": 0.148, "step": 22360 }, { "epoch": 10.468055230517201, "grad_norm": 0.8127485513687134, "learning_rate": 1.2061754033388734e-05, "loss": 0.1586, "step": 22370 }, { "epoch": 10.472735782822372, "grad_norm": 0.9103930592536926, "learning_rate": 1.205279491752649e-05, "loss": 0.1571, "step": 22380 }, { "epoch": 10.477416335127545, "grad_norm": 0.6346786022186279, "learning_rate": 1.2043836902962197e-05, "loss": 0.1641, "step": 22390 }, { "epoch": 10.482096887432718, "grad_norm": 0.9024659395217896, "learning_rate": 1.2034879995508615e-05, "loss": 0.1686, "step": 22400 }, { "epoch": 10.486777439737889, "grad_norm": 0.8859565854072571, "learning_rate": 1.2025924200977784e-05, "loss": 0.1508, "step": 22410 }, { "epoch": 10.491457992043062, "grad_norm": 0.48661088943481445, "learning_rate": 1.2016969525181024e-05, "loss": 0.1698, "step": 22420 }, { "epoch": 10.496138544348232, "grad_norm": 0.4672151207923889, "learning_rate": 1.2008015973928915e-05, "loss": 0.1576, "step": 22430 }, { "epoch": 10.500819096653405, "grad_norm": 0.5353006720542908, "learning_rate": 1.1999063553031337e-05, "loss": 0.1646, "step": 22440 }, { "epoch": 10.505499648958578, "grad_norm": 0.5763953924179077, "learning_rate": 1.1990112268297402e-05, "loss": 0.157, "step": 22450 }, { "epoch": 10.510180201263749, "grad_norm": 0.49693024158477783, "learning_rate": 1.1981162125535517e-05, "loss": 0.1418, "step": 22460 }, { "epoch": 10.514860753568922, "grad_norm": 0.5667943358421326, "learning_rate": 1.1972213130553323e-05, "loss": 0.1551, "step": 22470 }, { "epoch": 10.519541305874093, "grad_norm": 0.4362475574016571, "learning_rate": 1.1963265289157721e-05, "loss": 0.152, "step": 22480 }, { "epoch": 10.524221858179265, "grad_norm": 0.5627264380455017, "learning_rate": 1.1954318607154883e-05, "loss": 0.1525, "step": 22490 }, { "epoch": 10.528902410484438, "grad_norm": 0.5540770292282104, "learning_rate": 1.1945373090350198e-05, "loss": 0.155, "step": 22500 }, { "epoch": 10.533582962789609, "grad_norm": 0.5287156105041504, "learning_rate": 1.1936428744548325e-05, "loss": 0.1624, "step": 22510 }, { "epoch": 10.538263515094782, "grad_norm": 0.6784474849700928, "learning_rate": 1.1927485575553154e-05, "loss": 0.1563, "step": 22520 }, { "epoch": 10.542944067399953, "grad_norm": 0.6877347826957703, "learning_rate": 1.1918543589167799e-05, "loss": 0.1584, "step": 22530 }, { "epoch": 10.547624619705125, "grad_norm": 0.6722036600112915, "learning_rate": 1.1909602791194632e-05, "loss": 0.1634, "step": 22540 }, { "epoch": 10.552305172010296, "grad_norm": 0.5817556381225586, "learning_rate": 1.190066318743523e-05, "loss": 0.1523, "step": 22550 }, { "epoch": 10.556985724315469, "grad_norm": 0.6082693934440613, "learning_rate": 1.1891724783690408e-05, "loss": 0.1602, "step": 22560 }, { "epoch": 10.561666276620642, "grad_norm": 1.0162500143051147, "learning_rate": 1.1882787585760197e-05, "loss": 0.1496, "step": 22570 }, { "epoch": 10.566346828925813, "grad_norm": 0.5324554443359375, "learning_rate": 1.187385159944385e-05, "loss": 0.1589, "step": 22580 }, { "epoch": 10.571027381230985, "grad_norm": 0.7678310871124268, "learning_rate": 1.1864916830539834e-05, "loss": 0.1561, "step": 22590 }, { "epoch": 10.575707933536158, "grad_norm": 0.5132659673690796, "learning_rate": 1.1855983284845818e-05, "loss": 0.1526, "step": 22600 }, { "epoch": 10.580388485841329, "grad_norm": 0.6820417642593384, "learning_rate": 1.1847050968158688e-05, "loss": 0.1602, "step": 22610 }, { "epoch": 10.585069038146502, "grad_norm": 0.5832083821296692, "learning_rate": 1.1838119886274521e-05, "loss": 0.1633, "step": 22620 }, { "epoch": 10.589749590451673, "grad_norm": 0.839650571346283, "learning_rate": 1.1829190044988606e-05, "loss": 0.1401, "step": 22630 }, { "epoch": 10.594430142756845, "grad_norm": 0.6438979506492615, "learning_rate": 1.1820261450095416e-05, "loss": 0.1571, "step": 22640 }, { "epoch": 10.599110695062016, "grad_norm": 0.6404163241386414, "learning_rate": 1.1811334107388619e-05, "loss": 0.1458, "step": 22650 }, { "epoch": 10.60379124736719, "grad_norm": 0.5374692678451538, "learning_rate": 1.1802408022661073e-05, "loss": 0.1499, "step": 22660 }, { "epoch": 10.608471799672362, "grad_norm": 0.81978440284729, "learning_rate": 1.1793483201704811e-05, "loss": 0.171, "step": 22670 }, { "epoch": 10.613152351977533, "grad_norm": 1.3800994157791138, "learning_rate": 1.1784559650311063e-05, "loss": 0.1777, "step": 22680 }, { "epoch": 10.617832904282706, "grad_norm": 0.9255260825157166, "learning_rate": 1.1775637374270223e-05, "loss": 0.1502, "step": 22690 }, { "epoch": 10.622513456587878, "grad_norm": 0.8851323127746582, "learning_rate": 1.176671637937185e-05, "loss": 0.1409, "step": 22700 }, { "epoch": 10.62719400889305, "grad_norm": 0.6165180802345276, "learning_rate": 1.175779667140469e-05, "loss": 0.1497, "step": 22710 }, { "epoch": 10.631874561198222, "grad_norm": 0.6903367638587952, "learning_rate": 1.1748878256156636e-05, "loss": 0.1547, "step": 22720 }, { "epoch": 10.636555113503393, "grad_norm": 0.5377488136291504, "learning_rate": 1.173996113941476e-05, "loss": 0.1599, "step": 22730 }, { "epoch": 10.641235665808566, "grad_norm": 0.5022105574607849, "learning_rate": 1.1731045326965278e-05, "loss": 0.1732, "step": 22740 }, { "epoch": 10.645916218113737, "grad_norm": 0.602359414100647, "learning_rate": 1.172213082459356e-05, "loss": 0.158, "step": 22750 }, { "epoch": 10.65059677041891, "grad_norm": 0.6461465358734131, "learning_rate": 1.1713217638084139e-05, "loss": 0.1575, "step": 22760 }, { "epoch": 10.655277322724082, "grad_norm": 0.5810430645942688, "learning_rate": 1.1704305773220675e-05, "loss": 0.1515, "step": 22770 }, { "epoch": 10.659957875029253, "grad_norm": 0.5694592595100403, "learning_rate": 1.169539523578599e-05, "loss": 0.1556, "step": 22780 }, { "epoch": 10.664638427334426, "grad_norm": 0.5303595662117004, "learning_rate": 1.1686486031562024e-05, "loss": 0.1463, "step": 22790 }, { "epoch": 10.669318979639598, "grad_norm": 0.9007068872451782, "learning_rate": 1.1677578166329872e-05, "loss": 0.1537, "step": 22800 }, { "epoch": 10.67399953194477, "grad_norm": 0.5557231903076172, "learning_rate": 1.166867164586975e-05, "loss": 0.148, "step": 22810 }, { "epoch": 10.678680084249942, "grad_norm": 0.7783827781677246, "learning_rate": 1.1659766475960995e-05, "loss": 0.1703, "step": 22820 }, { "epoch": 10.683360636555113, "grad_norm": 0.6157659888267517, "learning_rate": 1.1650862662382084e-05, "loss": 0.1538, "step": 22830 }, { "epoch": 10.688041188860286, "grad_norm": 0.7492433786392212, "learning_rate": 1.1641960210910596e-05, "loss": 0.1629, "step": 22840 }, { "epoch": 10.692721741165457, "grad_norm": 0.5983368754386902, "learning_rate": 1.1633059127323248e-05, "loss": 0.1694, "step": 22850 }, { "epoch": 10.69740229347063, "grad_norm": 0.4760008156299591, "learning_rate": 1.162415941739585e-05, "loss": 0.1616, "step": 22860 }, { "epoch": 10.702082845775802, "grad_norm": 0.5451993346214294, "learning_rate": 1.1615261086903318e-05, "loss": 0.1368, "step": 22870 }, { "epoch": 10.706763398080973, "grad_norm": 1.1065813302993774, "learning_rate": 1.1606364141619697e-05, "loss": 0.1602, "step": 22880 }, { "epoch": 10.711443950386146, "grad_norm": 0.7130110859870911, "learning_rate": 1.1597468587318105e-05, "loss": 0.1489, "step": 22890 }, { "epoch": 10.716124502691317, "grad_norm": 0.5563497543334961, "learning_rate": 1.1588574429770782e-05, "loss": 0.1528, "step": 22900 }, { "epoch": 10.72080505499649, "grad_norm": 0.6842336654663086, "learning_rate": 1.1579681674749046e-05, "loss": 0.1549, "step": 22910 }, { "epoch": 10.725485607301662, "grad_norm": 0.6269249320030212, "learning_rate": 1.1570790328023304e-05, "loss": 0.1571, "step": 22920 }, { "epoch": 10.730166159606833, "grad_norm": 0.6225173473358154, "learning_rate": 1.1561900395363062e-05, "loss": 0.1666, "step": 22930 }, { "epoch": 10.734846711912006, "grad_norm": 0.8129770159721375, "learning_rate": 1.1553011882536897e-05, "loss": 0.1665, "step": 22940 }, { "epoch": 10.739527264217177, "grad_norm": 0.5800447463989258, "learning_rate": 1.154412479531247e-05, "loss": 0.1586, "step": 22950 }, { "epoch": 10.74420781652235, "grad_norm": 0.5157530307769775, "learning_rate": 1.1535239139456513e-05, "loss": 0.1488, "step": 22960 }, { "epoch": 10.748888368827522, "grad_norm": 0.724309504032135, "learning_rate": 1.152635492073483e-05, "loss": 0.145, "step": 22970 }, { "epoch": 10.753568921132693, "grad_norm": 0.5770343542098999, "learning_rate": 1.1517472144912304e-05, "loss": 0.1459, "step": 22980 }, { "epoch": 10.758249473437866, "grad_norm": 0.6684057712554932, "learning_rate": 1.150859081775286e-05, "loss": 0.1645, "step": 22990 }, { "epoch": 10.762930025743037, "grad_norm": 0.7144168019294739, "learning_rate": 1.1499710945019504e-05, "loss": 0.1628, "step": 23000 }, { "epoch": 10.76761057804821, "grad_norm": 0.6016170978546143, "learning_rate": 1.149083253247428e-05, "loss": 0.1373, "step": 23010 }, { "epoch": 10.772291130353382, "grad_norm": 0.5320923328399658, "learning_rate": 1.1481955585878302e-05, "loss": 0.1577, "step": 23020 }, { "epoch": 10.776971682658553, "grad_norm": 0.6177768111228943, "learning_rate": 1.1473080110991718e-05, "loss": 0.1394, "step": 23030 }, { "epoch": 10.781652234963726, "grad_norm": 0.8620502352714539, "learning_rate": 1.1464206113573728e-05, "loss": 0.1503, "step": 23040 }, { "epoch": 10.786332787268897, "grad_norm": 0.6578874588012695, "learning_rate": 1.1455333599382578e-05, "loss": 0.1542, "step": 23050 }, { "epoch": 10.79101333957407, "grad_norm": 0.6389061212539673, "learning_rate": 1.144646257417554e-05, "loss": 0.1621, "step": 23060 }, { "epoch": 10.795693891879242, "grad_norm": 0.926965594291687, "learning_rate": 1.1437593043708933e-05, "loss": 0.1637, "step": 23070 }, { "epoch": 10.800374444184413, "grad_norm": 0.587453305721283, "learning_rate": 1.1428725013738093e-05, "loss": 0.1705, "step": 23080 }, { "epoch": 10.805054996489586, "grad_norm": 0.8101313710212708, "learning_rate": 1.1419858490017385e-05, "loss": 0.138, "step": 23090 }, { "epoch": 10.809735548794757, "grad_norm": 0.6024953126907349, "learning_rate": 1.141099347830021e-05, "loss": 0.1559, "step": 23100 }, { "epoch": 10.81441610109993, "grad_norm": 0.633702278137207, "learning_rate": 1.1402129984338966e-05, "loss": 0.1783, "step": 23110 }, { "epoch": 10.819096653405103, "grad_norm": 0.5364848971366882, "learning_rate": 1.1393268013885092e-05, "loss": 0.167, "step": 23120 }, { "epoch": 10.823777205710273, "grad_norm": 0.5143747925758362, "learning_rate": 1.1384407572689018e-05, "loss": 0.1608, "step": 23130 }, { "epoch": 10.828457758015446, "grad_norm": 0.5438597202301025, "learning_rate": 1.137554866650018e-05, "loss": 0.1475, "step": 23140 }, { "epoch": 10.833138310320617, "grad_norm": 0.6237771511077881, "learning_rate": 1.1366691301067039e-05, "loss": 0.1435, "step": 23150 }, { "epoch": 10.83781886262579, "grad_norm": 0.6074762344360352, "learning_rate": 1.1357835482137036e-05, "loss": 0.1533, "step": 23160 }, { "epoch": 10.842499414930963, "grad_norm": 0.670236349105835, "learning_rate": 1.1348981215456617e-05, "loss": 0.1489, "step": 23170 }, { "epoch": 10.847179967236134, "grad_norm": 0.5666065216064453, "learning_rate": 1.134012850677122e-05, "loss": 0.174, "step": 23180 }, { "epoch": 10.851860519541306, "grad_norm": 0.7535435557365417, "learning_rate": 1.1331277361825272e-05, "loss": 0.1543, "step": 23190 }, { "epoch": 10.856541071846477, "grad_norm": 1.0046563148498535, "learning_rate": 1.1322427786362184e-05, "loss": 0.147, "step": 23200 }, { "epoch": 10.86122162415165, "grad_norm": 0.565543532371521, "learning_rate": 1.1313579786124348e-05, "loss": 0.1507, "step": 23210 }, { "epoch": 10.865902176456823, "grad_norm": 0.6196426153182983, "learning_rate": 1.1304733366853138e-05, "loss": 0.1602, "step": 23220 }, { "epoch": 10.870582728761994, "grad_norm": 0.7255396246910095, "learning_rate": 1.1295888534288902e-05, "loss": 0.159, "step": 23230 }, { "epoch": 10.875263281067166, "grad_norm": 0.6040448546409607, "learning_rate": 1.1287045294170945e-05, "loss": 0.169, "step": 23240 }, { "epoch": 10.879943833372337, "grad_norm": 0.6223192811012268, "learning_rate": 1.127820365223756e-05, "loss": 0.1691, "step": 23250 }, { "epoch": 10.88462438567751, "grad_norm": 0.5082964301109314, "learning_rate": 1.1269363614225989e-05, "loss": 0.1607, "step": 23260 }, { "epoch": 10.889304937982683, "grad_norm": 0.6429381370544434, "learning_rate": 1.1260525185872439e-05, "loss": 0.1505, "step": 23270 }, { "epoch": 10.893985490287854, "grad_norm": 0.6153817176818848, "learning_rate": 1.1251688372912067e-05, "loss": 0.1524, "step": 23280 }, { "epoch": 10.898666042593026, "grad_norm": 0.7271292805671692, "learning_rate": 1.1242853181078983e-05, "loss": 0.1566, "step": 23290 }, { "epoch": 10.903346594898197, "grad_norm": 0.9760752320289612, "learning_rate": 1.1234019616106253e-05, "loss": 0.1507, "step": 23300 }, { "epoch": 10.90802714720337, "grad_norm": 0.5703684687614441, "learning_rate": 1.1225187683725874e-05, "loss": 0.1554, "step": 23310 }, { "epoch": 10.912707699508543, "grad_norm": 1.0492792129516602, "learning_rate": 1.1216357389668802e-05, "loss": 0.1512, "step": 23320 }, { "epoch": 10.917388251813714, "grad_norm": 0.5803138613700867, "learning_rate": 1.120752873966491e-05, "loss": 0.1606, "step": 23330 }, { "epoch": 10.922068804118886, "grad_norm": 0.5690540075302124, "learning_rate": 1.1198701739443015e-05, "loss": 0.1497, "step": 23340 }, { "epoch": 10.926749356424057, "grad_norm": 1.3112118244171143, "learning_rate": 1.1189876394730864e-05, "loss": 0.1479, "step": 23350 }, { "epoch": 10.93142990872923, "grad_norm": 0.5878019332885742, "learning_rate": 1.1181052711255125e-05, "loss": 0.1613, "step": 23360 }, { "epoch": 10.936110461034403, "grad_norm": 0.6228576898574829, "learning_rate": 1.117223069474139e-05, "loss": 0.1621, "step": 23370 }, { "epoch": 10.940791013339574, "grad_norm": 0.6768685579299927, "learning_rate": 1.1163410350914169e-05, "loss": 0.1582, "step": 23380 }, { "epoch": 10.945471565644747, "grad_norm": 0.6785877346992493, "learning_rate": 1.1154591685496893e-05, "loss": 0.1546, "step": 23390 }, { "epoch": 10.950152117949917, "grad_norm": 0.6186737418174744, "learning_rate": 1.1145774704211894e-05, "loss": 0.1776, "step": 23400 }, { "epoch": 10.95483267025509, "grad_norm": 0.598477840423584, "learning_rate": 1.1136959412780412e-05, "loss": 0.1566, "step": 23410 }, { "epoch": 10.959513222560261, "grad_norm": 0.6043705940246582, "learning_rate": 1.1128145816922602e-05, "loss": 0.1603, "step": 23420 }, { "epoch": 10.964193774865434, "grad_norm": 0.6676538586616516, "learning_rate": 1.1119333922357503e-05, "loss": 0.1541, "step": 23430 }, { "epoch": 10.968874327170607, "grad_norm": 0.6170877814292908, "learning_rate": 1.1110523734803063e-05, "loss": 0.155, "step": 23440 }, { "epoch": 10.973554879475778, "grad_norm": 0.5216424465179443, "learning_rate": 1.1101715259976118e-05, "loss": 0.1417, "step": 23450 }, { "epoch": 10.97823543178095, "grad_norm": 0.6253873109817505, "learning_rate": 1.1092908503592384e-05, "loss": 0.1463, "step": 23460 }, { "epoch": 10.982915984086123, "grad_norm": 0.5488091111183167, "learning_rate": 1.1084103471366475e-05, "loss": 0.1471, "step": 23470 }, { "epoch": 10.987596536391294, "grad_norm": 0.6806795001029968, "learning_rate": 1.1075300169011876e-05, "loss": 0.1514, "step": 23480 }, { "epoch": 10.992277088696467, "grad_norm": 0.774299144744873, "learning_rate": 1.1066498602240969e-05, "loss": 0.1547, "step": 23490 }, { "epoch": 10.996957641001638, "grad_norm": 0.5439617037773132, "learning_rate": 1.105769877676498e-05, "loss": 0.1639, "step": 23500 }, { "epoch": 11.001404165691552, "grad_norm": 0.7181140184402466, "learning_rate": 1.1048900698294018e-05, "loss": 0.1295, "step": 23510 }, { "epoch": 11.006084717996723, "grad_norm": 0.580805242061615, "learning_rate": 1.1040104372537078e-05, "loss": 0.1355, "step": 23520 }, { "epoch": 11.010765270301896, "grad_norm": 0.6891374588012695, "learning_rate": 1.1031309805201981e-05, "loss": 0.1475, "step": 23530 }, { "epoch": 11.015445822607068, "grad_norm": 0.8347881436347961, "learning_rate": 1.1022517001995442e-05, "loss": 0.1419, "step": 23540 }, { "epoch": 11.02012637491224, "grad_norm": 0.8059455156326294, "learning_rate": 1.1013725968623003e-05, "loss": 0.1374, "step": 23550 }, { "epoch": 11.024806927217412, "grad_norm": 0.8565636873245239, "learning_rate": 1.1004936710789079e-05, "loss": 0.1428, "step": 23560 }, { "epoch": 11.029487479522583, "grad_norm": 0.7815762162208557, "learning_rate": 1.0996149234196923e-05, "loss": 0.1378, "step": 23570 }, { "epoch": 11.034168031827756, "grad_norm": 0.834667444229126, "learning_rate": 1.0987363544548627e-05, "loss": 0.1634, "step": 23580 }, { "epoch": 11.038848584132928, "grad_norm": 0.9868204593658447, "learning_rate": 1.0978579647545134e-05, "loss": 0.1431, "step": 23590 }, { "epoch": 11.0435291364381, "grad_norm": 0.5912026762962341, "learning_rate": 1.0969797548886218e-05, "loss": 0.1502, "step": 23600 }, { "epoch": 11.048209688743272, "grad_norm": 0.5726405382156372, "learning_rate": 1.0961017254270489e-05, "loss": 0.1338, "step": 23610 }, { "epoch": 11.052890241048443, "grad_norm": 0.6000029444694519, "learning_rate": 1.0952238769395387e-05, "loss": 0.1393, "step": 23620 }, { "epoch": 11.057570793353616, "grad_norm": 0.6345407366752625, "learning_rate": 1.094346209995717e-05, "loss": 0.1531, "step": 23630 }, { "epoch": 11.062251345658789, "grad_norm": 0.6605687141418457, "learning_rate": 1.0934687251650927e-05, "loss": 0.1349, "step": 23640 }, { "epoch": 11.06693189796396, "grad_norm": 0.5875235795974731, "learning_rate": 1.0925914230170556e-05, "loss": 0.1372, "step": 23650 }, { "epoch": 11.071612450269132, "grad_norm": 0.9291567802429199, "learning_rate": 1.0917143041208785e-05, "loss": 0.1422, "step": 23660 }, { "epoch": 11.076293002574303, "grad_norm": 0.5706049203872681, "learning_rate": 1.0908373690457142e-05, "loss": 0.1492, "step": 23670 }, { "epoch": 11.080973554879476, "grad_norm": 0.6742461323738098, "learning_rate": 1.0899606183605955e-05, "loss": 0.1368, "step": 23680 }, { "epoch": 11.085654107184649, "grad_norm": 0.5235504508018494, "learning_rate": 1.0890840526344373e-05, "loss": 0.1301, "step": 23690 }, { "epoch": 11.09033465948982, "grad_norm": 0.7384955286979675, "learning_rate": 1.0882076724360325e-05, "loss": 0.1327, "step": 23700 }, { "epoch": 11.095015211794992, "grad_norm": 0.7507896423339844, "learning_rate": 1.0873314783340564e-05, "loss": 0.1428, "step": 23710 }, { "epoch": 11.099695764100163, "grad_norm": 0.5008641481399536, "learning_rate": 1.0864554708970604e-05, "loss": 0.1499, "step": 23720 }, { "epoch": 11.104376316405336, "grad_norm": 0.6289319396018982, "learning_rate": 1.085579650693476e-05, "loss": 0.1433, "step": 23730 }, { "epoch": 11.109056868710509, "grad_norm": 0.69915771484375, "learning_rate": 1.0847040182916152e-05, "loss": 0.1191, "step": 23740 }, { "epoch": 11.11373742101568, "grad_norm": 0.5134388208389282, "learning_rate": 1.0838285742596641e-05, "loss": 0.1541, "step": 23750 }, { "epoch": 11.118417973320852, "grad_norm": 0.6164541840553284, "learning_rate": 1.0829533191656907e-05, "loss": 0.1307, "step": 23760 }, { "epoch": 11.123098525626023, "grad_norm": 0.5147684216499329, "learning_rate": 1.082078253577638e-05, "loss": 0.1376, "step": 23770 }, { "epoch": 11.127779077931196, "grad_norm": 0.5771976709365845, "learning_rate": 1.0812033780633258e-05, "loss": 0.1419, "step": 23780 }, { "epoch": 11.132459630236369, "grad_norm": 0.7668352723121643, "learning_rate": 1.0803286931904519e-05, "loss": 0.1416, "step": 23790 }, { "epoch": 11.13714018254154, "grad_norm": 0.7396911978721619, "learning_rate": 1.0794541995265897e-05, "loss": 0.1282, "step": 23800 }, { "epoch": 11.141820734846712, "grad_norm": 0.5870901346206665, "learning_rate": 1.0785798976391892e-05, "loss": 0.1363, "step": 23810 }, { "epoch": 11.146501287151883, "grad_norm": 0.6051260232925415, "learning_rate": 1.0777057880955744e-05, "loss": 0.1408, "step": 23820 }, { "epoch": 11.151181839457056, "grad_norm": 0.5316891074180603, "learning_rate": 1.0768318714629466e-05, "loss": 0.1533, "step": 23830 }, { "epoch": 11.155862391762227, "grad_norm": 0.5542880892753601, "learning_rate": 1.07595814830838e-05, "loss": 0.1514, "step": 23840 }, { "epoch": 11.1605429440674, "grad_norm": 0.7158204913139343, "learning_rate": 1.0750846191988242e-05, "loss": 0.1273, "step": 23850 }, { "epoch": 11.165223496372572, "grad_norm": 1.1740988492965698, "learning_rate": 1.0742112847011029e-05, "loss": 0.1435, "step": 23860 }, { "epoch": 11.169904048677743, "grad_norm": 0.4748194217681885, "learning_rate": 1.0733381453819132e-05, "loss": 0.1389, "step": 23870 }, { "epoch": 11.174584600982916, "grad_norm": 0.7532009482383728, "learning_rate": 1.0724652018078263e-05, "loss": 0.1426, "step": 23880 }, { "epoch": 11.179265153288087, "grad_norm": 0.5450142025947571, "learning_rate": 1.0715924545452854e-05, "loss": 0.1533, "step": 23890 }, { "epoch": 11.18394570559326, "grad_norm": 0.6606458425521851, "learning_rate": 1.0707199041606059e-05, "loss": 0.1389, "step": 23900 }, { "epoch": 11.188626257898433, "grad_norm": 0.5599029064178467, "learning_rate": 1.0698475512199779e-05, "loss": 0.1363, "step": 23910 }, { "epoch": 11.193306810203604, "grad_norm": 0.5552523136138916, "learning_rate": 1.0689753962894602e-05, "loss": 0.1433, "step": 23920 }, { "epoch": 11.197987362508776, "grad_norm": 0.6852610111236572, "learning_rate": 1.068103439934986e-05, "loss": 0.1411, "step": 23930 }, { "epoch": 11.202667914813947, "grad_norm": 0.7648048996925354, "learning_rate": 1.0672316827223577e-05, "loss": 0.1385, "step": 23940 }, { "epoch": 11.20734846711912, "grad_norm": 0.5039881467819214, "learning_rate": 1.0663601252172486e-05, "loss": 0.1558, "step": 23950 }, { "epoch": 11.212029019424293, "grad_norm": 0.5538755655288696, "learning_rate": 1.0654887679852038e-05, "loss": 0.1384, "step": 23960 }, { "epoch": 11.216709571729464, "grad_norm": 0.47445690631866455, "learning_rate": 1.0646176115916368e-05, "loss": 0.1506, "step": 23970 }, { "epoch": 11.221390124034636, "grad_norm": 0.8145033121109009, "learning_rate": 1.0637466566018323e-05, "loss": 0.1483, "step": 23980 }, { "epoch": 11.226070676339807, "grad_norm": 0.5451047420501709, "learning_rate": 1.062875903580943e-05, "loss": 0.1395, "step": 23990 }, { "epoch": 11.23075122864498, "grad_norm": 0.7811582684516907, "learning_rate": 1.0620053530939908e-05, "loss": 0.1218, "step": 24000 }, { "epoch": 11.235431780950153, "grad_norm": 0.5669838190078735, "learning_rate": 1.0611350057058672e-05, "loss": 0.1339, "step": 24010 }, { "epoch": 11.240112333255324, "grad_norm": 0.5740223526954651, "learning_rate": 1.0602648619813307e-05, "loss": 0.1329, "step": 24020 }, { "epoch": 11.244792885560496, "grad_norm": 0.5339981913566589, "learning_rate": 1.0593949224850082e-05, "loss": 0.1442, "step": 24030 }, { "epoch": 11.249473437865667, "grad_norm": 0.5272551774978638, "learning_rate": 1.058525187781394e-05, "loss": 0.1454, "step": 24040 }, { "epoch": 11.25415399017084, "grad_norm": 0.5015747547149658, "learning_rate": 1.0576556584348499e-05, "loss": 0.1376, "step": 24050 }, { "epoch": 11.258834542476013, "grad_norm": 0.6302873492240906, "learning_rate": 1.0567863350096034e-05, "loss": 0.1306, "step": 24060 }, { "epoch": 11.263515094781184, "grad_norm": 3.025425910949707, "learning_rate": 1.055917218069749e-05, "loss": 0.1495, "step": 24070 }, { "epoch": 11.268195647086356, "grad_norm": 0.5895903706550598, "learning_rate": 1.0550483081792481e-05, "loss": 0.1487, "step": 24080 }, { "epoch": 11.272876199391527, "grad_norm": 0.5186648368835449, "learning_rate": 1.0541796059019257e-05, "loss": 0.1454, "step": 24090 }, { "epoch": 11.2775567516967, "grad_norm": 0.8565667271614075, "learning_rate": 1.0533111118014748e-05, "loss": 0.1339, "step": 24100 }, { "epoch": 11.282237304001873, "grad_norm": 0.512869656085968, "learning_rate": 1.0524428264414506e-05, "loss": 0.1434, "step": 24110 }, { "epoch": 11.286917856307044, "grad_norm": 0.6559303998947144, "learning_rate": 1.0515747503852738e-05, "loss": 0.1285, "step": 24120 }, { "epoch": 11.291598408612217, "grad_norm": 0.6919389963150024, "learning_rate": 1.0507068841962309e-05, "loss": 0.1323, "step": 24130 }, { "epoch": 11.296278960917387, "grad_norm": 0.7362671494483948, "learning_rate": 1.0498392284374694e-05, "loss": 0.1468, "step": 24140 }, { "epoch": 11.30095951322256, "grad_norm": 0.6311518549919128, "learning_rate": 1.0489717836720028e-05, "loss": 0.1489, "step": 24150 }, { "epoch": 11.305640065527733, "grad_norm": 1.011696457862854, "learning_rate": 1.0481045504627063e-05, "loss": 0.1594, "step": 24160 }, { "epoch": 11.310320617832904, "grad_norm": 0.6294897794723511, "learning_rate": 1.0472375293723172e-05, "loss": 0.1387, "step": 24170 }, { "epoch": 11.315001170138077, "grad_norm": 0.837202787399292, "learning_rate": 1.0463707209634375e-05, "loss": 0.1453, "step": 24180 }, { "epoch": 11.319681722443248, "grad_norm": 0.5499151945114136, "learning_rate": 1.0455041257985285e-05, "loss": 0.1306, "step": 24190 }, { "epoch": 11.32436227474842, "grad_norm": 0.5573265552520752, "learning_rate": 1.0446377444399155e-05, "loss": 0.1338, "step": 24200 }, { "epoch": 11.329042827053593, "grad_norm": 0.5401045680046082, "learning_rate": 1.0437715774497832e-05, "loss": 0.1287, "step": 24210 }, { "epoch": 11.333723379358764, "grad_norm": 0.5329187512397766, "learning_rate": 1.0429056253901785e-05, "loss": 0.1336, "step": 24220 }, { "epoch": 11.338403931663937, "grad_norm": 0.8659389615058899, "learning_rate": 1.0420398888230076e-05, "loss": 0.144, "step": 24230 }, { "epoch": 11.343084483969108, "grad_norm": 0.5318187475204468, "learning_rate": 1.0411743683100381e-05, "loss": 0.1352, "step": 24240 }, { "epoch": 11.34776503627428, "grad_norm": 0.5403832793235779, "learning_rate": 1.0403090644128967e-05, "loss": 0.1361, "step": 24250 }, { "epoch": 11.352445588579453, "grad_norm": 0.5844314098358154, "learning_rate": 1.0394439776930701e-05, "loss": 0.1345, "step": 24260 }, { "epoch": 11.357126140884624, "grad_norm": 0.5933575630187988, "learning_rate": 1.0385791087119026e-05, "loss": 0.137, "step": 24270 }, { "epoch": 11.361806693189797, "grad_norm": 0.4842611253261566, "learning_rate": 1.0377144580305995e-05, "loss": 0.1346, "step": 24280 }, { "epoch": 11.366487245494968, "grad_norm": 1.0972256660461426, "learning_rate": 1.0368500262102224e-05, "loss": 0.1533, "step": 24290 }, { "epoch": 11.37116779780014, "grad_norm": 0.5129645466804504, "learning_rate": 1.0359858138116924e-05, "loss": 0.1287, "step": 24300 }, { "epoch": 11.375848350105313, "grad_norm": 0.5239018797874451, "learning_rate": 1.0351218213957864e-05, "loss": 0.1561, "step": 24310 }, { "epoch": 11.380528902410484, "grad_norm": 0.5535427331924438, "learning_rate": 1.0342580495231416e-05, "loss": 0.1427, "step": 24320 }, { "epoch": 11.385209454715657, "grad_norm": 1.009031057357788, "learning_rate": 1.0333944987542484e-05, "loss": 0.1389, "step": 24330 }, { "epoch": 11.389890007020828, "grad_norm": 0.6138202548027039, "learning_rate": 1.0325311696494563e-05, "loss": 0.1348, "step": 24340 }, { "epoch": 11.394570559326, "grad_norm": 0.481876015663147, "learning_rate": 1.0316680627689704e-05, "loss": 0.1384, "step": 24350 }, { "epoch": 11.399251111631173, "grad_norm": 0.5116512179374695, "learning_rate": 1.0308051786728504e-05, "loss": 0.1419, "step": 24360 }, { "epoch": 11.403931663936344, "grad_norm": 0.6917710900306702, "learning_rate": 1.0299425179210137e-05, "loss": 0.1376, "step": 24370 }, { "epoch": 11.408612216241517, "grad_norm": 0.8714600205421448, "learning_rate": 1.029080081073231e-05, "loss": 0.1472, "step": 24380 }, { "epoch": 11.413292768546688, "grad_norm": 0.6735618114471436, "learning_rate": 1.0282178686891275e-05, "loss": 0.1457, "step": 24390 }, { "epoch": 11.41797332085186, "grad_norm": 0.5392712950706482, "learning_rate": 1.0273558813281845e-05, "loss": 0.1392, "step": 24400 }, { "epoch": 11.422653873157033, "grad_norm": 0.5931081175804138, "learning_rate": 1.0264941195497352e-05, "loss": 0.1326, "step": 24410 }, { "epoch": 11.427334425462204, "grad_norm": 0.5912725329399109, "learning_rate": 1.0256325839129688e-05, "loss": 0.1399, "step": 24420 }, { "epoch": 11.432014977767377, "grad_norm": 0.5445056557655334, "learning_rate": 1.0247712749769253e-05, "loss": 0.146, "step": 24430 }, { "epoch": 11.436695530072548, "grad_norm": 0.5706822872161865, "learning_rate": 1.0239101933004992e-05, "loss": 0.1465, "step": 24440 }, { "epoch": 11.44137608237772, "grad_norm": 0.654607355594635, "learning_rate": 1.0230493394424367e-05, "loss": 0.1319, "step": 24450 }, { "epoch": 11.446056634682893, "grad_norm": 0.5827822089195251, "learning_rate": 1.0221887139613368e-05, "loss": 0.1321, "step": 24460 }, { "epoch": 11.450737186988064, "grad_norm": 0.760961651802063, "learning_rate": 1.0213283174156502e-05, "loss": 0.1366, "step": 24470 }, { "epoch": 11.455417739293237, "grad_norm": 0.47783663868904114, "learning_rate": 1.0204681503636787e-05, "loss": 0.1285, "step": 24480 }, { "epoch": 11.460098291598408, "grad_norm": 0.5858319401741028, "learning_rate": 1.019608213363575e-05, "loss": 0.1494, "step": 24490 }, { "epoch": 11.46477884390358, "grad_norm": 0.8314414620399475, "learning_rate": 1.0187485069733438e-05, "loss": 0.1452, "step": 24500 }, { "epoch": 11.469459396208753, "grad_norm": 0.5414323806762695, "learning_rate": 1.017889031750838e-05, "loss": 0.1382, "step": 24510 }, { "epoch": 11.474139948513924, "grad_norm": 0.6516498327255249, "learning_rate": 1.017029788253763e-05, "loss": 0.1297, "step": 24520 }, { "epoch": 11.478820500819097, "grad_norm": 0.5829859375953674, "learning_rate": 1.0161707770396713e-05, "loss": 0.1306, "step": 24530 }, { "epoch": 11.483501053124268, "grad_norm": 0.6239098310470581, "learning_rate": 1.015311998665967e-05, "loss": 0.1365, "step": 24540 }, { "epoch": 11.48818160542944, "grad_norm": 0.6185608506202698, "learning_rate": 1.014453453689902e-05, "loss": 0.1425, "step": 24550 }, { "epoch": 11.492862157734614, "grad_norm": 0.5587952733039856, "learning_rate": 1.0135951426685758e-05, "loss": 0.1345, "step": 24560 }, { "epoch": 11.497542710039784, "grad_norm": 0.6198165416717529, "learning_rate": 1.0127370661589383e-05, "loss": 0.1459, "step": 24570 }, { "epoch": 11.502223262344957, "grad_norm": 0.8172576427459717, "learning_rate": 1.0118792247177855e-05, "loss": 0.1394, "step": 24580 }, { "epoch": 11.506903814650128, "grad_norm": 1.1749573945999146, "learning_rate": 1.0110216189017614e-05, "loss": 0.1487, "step": 24590 }, { "epoch": 11.5115843669553, "grad_norm": 0.49150627851486206, "learning_rate": 1.0101642492673571e-05, "loss": 0.1453, "step": 24600 }, { "epoch": 11.516264919260472, "grad_norm": 0.5862083435058594, "learning_rate": 1.0093071163709106e-05, "loss": 0.1379, "step": 24610 }, { "epoch": 11.520945471565645, "grad_norm": 0.5415554642677307, "learning_rate": 1.0084502207686058e-05, "loss": 0.1426, "step": 24620 }, { "epoch": 11.525626023870817, "grad_norm": 0.6262754797935486, "learning_rate": 1.0075935630164732e-05, "loss": 0.1375, "step": 24630 }, { "epoch": 11.530306576175988, "grad_norm": 0.4973209798336029, "learning_rate": 1.0067371436703887e-05, "loss": 0.1266, "step": 24640 }, { "epoch": 11.534987128481161, "grad_norm": 0.48806580901145935, "learning_rate": 1.0058809632860738e-05, "loss": 0.1321, "step": 24650 }, { "epoch": 11.539667680786334, "grad_norm": 0.6491429209709167, "learning_rate": 1.0050250224190942e-05, "loss": 0.1456, "step": 24660 }, { "epoch": 11.544348233091505, "grad_norm": 0.5335844159126282, "learning_rate": 1.0041693216248604e-05, "loss": 0.1428, "step": 24670 }, { "epoch": 11.549028785396677, "grad_norm": 0.4843107759952545, "learning_rate": 1.0033138614586283e-05, "loss": 0.1322, "step": 24680 }, { "epoch": 11.553709337701848, "grad_norm": 0.49220794439315796, "learning_rate": 1.0024586424754958e-05, "loss": 0.129, "step": 24690 }, { "epoch": 11.558389890007021, "grad_norm": 0.6574116349220276, "learning_rate": 1.001603665230406e-05, "loss": 0.1398, "step": 24700 }, { "epoch": 11.563070442312192, "grad_norm": 0.6693962216377258, "learning_rate": 1.0007489302781434e-05, "loss": 0.1354, "step": 24710 }, { "epoch": 11.567750994617365, "grad_norm": 0.5521634221076965, "learning_rate": 9.998944381733372e-06, "loss": 0.1424, "step": 24720 }, { "epoch": 11.572431546922537, "grad_norm": 0.7097670435905457, "learning_rate": 9.990401894704567e-06, "loss": 0.1338, "step": 24730 }, { "epoch": 11.577112099227708, "grad_norm": 0.5928532481193542, "learning_rate": 9.981861847238166e-06, "loss": 0.141, "step": 24740 }, { "epoch": 11.581792651532881, "grad_norm": 0.5312638282775879, "learning_rate": 9.973324244875701e-06, "loss": 0.1348, "step": 24750 }, { "epoch": 11.586473203838054, "grad_norm": 0.7299242615699768, "learning_rate": 9.964789093157124e-06, "loss": 0.1406, "step": 24760 }, { "epoch": 11.591153756143225, "grad_norm": 0.5201556086540222, "learning_rate": 9.956256397620818e-06, "loss": 0.1596, "step": 24770 }, { "epoch": 11.595834308448397, "grad_norm": 0.5158522129058838, "learning_rate": 9.947726163803537e-06, "loss": 0.1634, "step": 24780 }, { "epoch": 11.600514860753568, "grad_norm": 0.7739445567131042, "learning_rate": 9.939198397240474e-06, "loss": 0.1431, "step": 24790 }, { "epoch": 11.605195413058741, "grad_norm": 0.5374662280082703, "learning_rate": 9.930673103465196e-06, "loss": 0.1317, "step": 24800 }, { "epoch": 11.609875965363912, "grad_norm": 0.5742225050926208, "learning_rate": 9.922150288009674e-06, "loss": 0.1401, "step": 24810 }, { "epoch": 11.614556517669085, "grad_norm": 0.6491795182228088, "learning_rate": 9.913629956404278e-06, "loss": 0.1474, "step": 24820 }, { "epoch": 11.619237069974258, "grad_norm": 1.1620259284973145, "learning_rate": 9.905112114177745e-06, "loss": 0.1305, "step": 24830 }, { "epoch": 11.623917622279428, "grad_norm": 0.7938812971115112, "learning_rate": 9.896596766857222e-06, "loss": 0.1435, "step": 24840 }, { "epoch": 11.628598174584601, "grad_norm": 0.5533220767974854, "learning_rate": 9.88808391996822e-06, "loss": 0.1262, "step": 24850 }, { "epoch": 11.633278726889774, "grad_norm": 0.6481633186340332, "learning_rate": 9.879573579034634e-06, "loss": 0.1504, "step": 24860 }, { "epoch": 11.637959279194945, "grad_norm": 0.5254421830177307, "learning_rate": 9.871065749578734e-06, "loss": 0.1384, "step": 24870 }, { "epoch": 11.642639831500118, "grad_norm": 0.4522911012172699, "learning_rate": 9.862560437121151e-06, "loss": 0.1535, "step": 24880 }, { "epoch": 11.647320383805289, "grad_norm": 0.6572921276092529, "learning_rate": 9.854057647180904e-06, "loss": 0.1417, "step": 24890 }, { "epoch": 11.652000936110461, "grad_norm": 0.6262891888618469, "learning_rate": 9.845557385275342e-06, "loss": 0.1362, "step": 24900 }, { "epoch": 11.656681488415632, "grad_norm": 0.6141103506088257, "learning_rate": 9.837059656920208e-06, "loss": 0.1425, "step": 24910 }, { "epoch": 11.661362040720805, "grad_norm": 0.7454243898391724, "learning_rate": 9.828564467629581e-06, "loss": 0.1349, "step": 24920 }, { "epoch": 11.666042593025978, "grad_norm": 0.6422976851463318, "learning_rate": 9.820071822915893e-06, "loss": 0.1271, "step": 24930 }, { "epoch": 11.670723145331149, "grad_norm": 0.8195533156394958, "learning_rate": 9.811581728289937e-06, "loss": 0.1377, "step": 24940 }, { "epoch": 11.675403697636321, "grad_norm": 0.5858240127563477, "learning_rate": 9.803094189260833e-06, "loss": 0.138, "step": 24950 }, { "epoch": 11.680084249941492, "grad_norm": 0.7770808339118958, "learning_rate": 9.794609211336068e-06, "loss": 0.134, "step": 24960 }, { "epoch": 11.684764802246665, "grad_norm": 1.0127381086349487, "learning_rate": 9.786126800021437e-06, "loss": 0.1244, "step": 24970 }, { "epoch": 11.689445354551838, "grad_norm": 0.5278828144073486, "learning_rate": 9.777646960821085e-06, "loss": 0.1321, "step": 24980 }, { "epoch": 11.694125906857009, "grad_norm": 0.5508668422698975, "learning_rate": 9.769169699237502e-06, "loss": 0.1357, "step": 24990 }, { "epoch": 11.698806459162181, "grad_norm": 0.5961797833442688, "learning_rate": 9.760695020771473e-06, "loss": 0.138, "step": 25000 }, { "epoch": 11.703487011467352, "grad_norm": 0.7714942097663879, "learning_rate": 9.752222930922141e-06, "loss": 0.1438, "step": 25010 }, { "epoch": 11.708167563772525, "grad_norm": 0.6039389371871948, "learning_rate": 9.743753435186942e-06, "loss": 0.1395, "step": 25020 }, { "epoch": 11.712848116077698, "grad_norm": 0.49907952547073364, "learning_rate": 9.735286539061647e-06, "loss": 0.1368, "step": 25030 }, { "epoch": 11.717528668382869, "grad_norm": 0.5381789207458496, "learning_rate": 9.726822248040329e-06, "loss": 0.134, "step": 25040 }, { "epoch": 11.722209220688041, "grad_norm": 0.5737005472183228, "learning_rate": 9.718360567615373e-06, "loss": 0.14, "step": 25050 }, { "epoch": 11.726889772993212, "grad_norm": 0.7351410388946533, "learning_rate": 9.709901503277478e-06, "loss": 0.1499, "step": 25060 }, { "epoch": 11.731570325298385, "grad_norm": 0.5815437436103821, "learning_rate": 9.701445060515627e-06, "loss": 0.1315, "step": 25070 }, { "epoch": 11.736250877603558, "grad_norm": 0.9904586672782898, "learning_rate": 9.692991244817129e-06, "loss": 0.1367, "step": 25080 }, { "epoch": 11.740931429908729, "grad_norm": 0.5564830899238586, "learning_rate": 9.684540061667562e-06, "loss": 0.1338, "step": 25090 }, { "epoch": 11.745611982213902, "grad_norm": 0.648100733757019, "learning_rate": 9.676091516550814e-06, "loss": 0.1467, "step": 25100 }, { "epoch": 11.750292534519073, "grad_norm": 0.799278736114502, "learning_rate": 9.66764561494905e-06, "loss": 0.156, "step": 25110 }, { "epoch": 11.754973086824245, "grad_norm": 0.5639044046401978, "learning_rate": 9.65920236234272e-06, "loss": 0.1479, "step": 25120 }, { "epoch": 11.759653639129418, "grad_norm": 0.6250486373901367, "learning_rate": 9.650761764210573e-06, "loss": 0.1396, "step": 25130 }, { "epoch": 11.764334191434589, "grad_norm": 0.5543119311332703, "learning_rate": 9.642323826029609e-06, "loss": 0.1258, "step": 25140 }, { "epoch": 11.769014743739762, "grad_norm": 1.1604453325271606, "learning_rate": 9.633888553275114e-06, "loss": 0.1322, "step": 25150 }, { "epoch": 11.773695296044933, "grad_norm": 0.5603232383728027, "learning_rate": 9.625455951420654e-06, "loss": 0.133, "step": 25160 }, { "epoch": 11.778375848350105, "grad_norm": 0.9931365847587585, "learning_rate": 9.617026025938038e-06, "loss": 0.131, "step": 25170 }, { "epoch": 11.783056400655278, "grad_norm": 0.5437849164009094, "learning_rate": 9.608598782297373e-06, "loss": 0.1468, "step": 25180 }, { "epoch": 11.787736952960449, "grad_norm": 0.6795463562011719, "learning_rate": 9.600174225966997e-06, "loss": 0.1201, "step": 25190 }, { "epoch": 11.792417505265622, "grad_norm": 0.6419689655303955, "learning_rate": 9.591752362413503e-06, "loss": 0.1447, "step": 25200 }, { "epoch": 11.797098057570793, "grad_norm": 0.9037359952926636, "learning_rate": 9.58333319710176e-06, "loss": 0.1362, "step": 25210 }, { "epoch": 11.801778609875965, "grad_norm": 0.5876637697219849, "learning_rate": 9.574916735494869e-06, "loss": 0.138, "step": 25220 }, { "epoch": 11.806459162181138, "grad_norm": 0.5356689691543579, "learning_rate": 9.56650298305418e-06, "loss": 0.1244, "step": 25230 }, { "epoch": 11.811139714486309, "grad_norm": 0.7173153758049011, "learning_rate": 9.558091945239287e-06, "loss": 0.1506, "step": 25240 }, { "epoch": 11.815820266791482, "grad_norm": 0.5759078860282898, "learning_rate": 9.549683627508016e-06, "loss": 0.1414, "step": 25250 }, { "epoch": 11.820500819096653, "grad_norm": 0.767656147480011, "learning_rate": 9.54127803531644e-06, "loss": 0.1381, "step": 25260 }, { "epoch": 11.825181371401825, "grad_norm": 0.5948965549468994, "learning_rate": 9.532875174118852e-06, "loss": 0.1525, "step": 25270 }, { "epoch": 11.829861923706998, "grad_norm": 0.6233130097389221, "learning_rate": 9.524475049367779e-06, "loss": 0.1378, "step": 25280 }, { "epoch": 11.83454247601217, "grad_norm": 0.6874606609344482, "learning_rate": 9.516077666513967e-06, "loss": 0.1323, "step": 25290 }, { "epoch": 11.839223028317342, "grad_norm": 0.8401157855987549, "learning_rate": 9.507683031006396e-06, "loss": 0.1395, "step": 25300 }, { "epoch": 11.843903580622513, "grad_norm": 0.7652893662452698, "learning_rate": 9.499291148292246e-06, "loss": 0.1258, "step": 25310 }, { "epoch": 11.848584132927686, "grad_norm": 0.944078803062439, "learning_rate": 9.49090202381692e-06, "loss": 0.1447, "step": 25320 }, { "epoch": 11.853264685232858, "grad_norm": 0.5664803385734558, "learning_rate": 9.482515663024034e-06, "loss": 0.1406, "step": 25330 }, { "epoch": 11.85794523753803, "grad_norm": 0.5564867854118347, "learning_rate": 9.474132071355396e-06, "loss": 0.1335, "step": 25340 }, { "epoch": 11.862625789843202, "grad_norm": 0.6334564089775085, "learning_rate": 9.46575125425104e-06, "loss": 0.1545, "step": 25350 }, { "epoch": 11.867306342148373, "grad_norm": 0.617531955242157, "learning_rate": 9.457373217149181e-06, "loss": 0.1491, "step": 25360 }, { "epoch": 11.871986894453546, "grad_norm": 0.6017510294914246, "learning_rate": 9.448997965486233e-06, "loss": 0.1373, "step": 25370 }, { "epoch": 11.876667446758718, "grad_norm": 0.9798033237457275, "learning_rate": 9.440625504696815e-06, "loss": 0.1252, "step": 25380 }, { "epoch": 11.88134799906389, "grad_norm": 0.5222119688987732, "learning_rate": 9.432255840213717e-06, "loss": 0.1457, "step": 25390 }, { "epoch": 11.886028551369062, "grad_norm": 0.5192803144454956, "learning_rate": 9.423888977467929e-06, "loss": 0.1394, "step": 25400 }, { "epoch": 11.890709103674233, "grad_norm": 0.5817142128944397, "learning_rate": 9.415524921888619e-06, "loss": 0.1454, "step": 25410 }, { "epoch": 11.895389655979406, "grad_norm": 0.6191873550415039, "learning_rate": 9.407163678903122e-06, "loss": 0.1229, "step": 25420 }, { "epoch": 11.900070208284578, "grad_norm": 0.48862424492836, "learning_rate": 9.398805253936968e-06, "loss": 0.1322, "step": 25430 }, { "epoch": 11.90475076058975, "grad_norm": 0.5194814801216125, "learning_rate": 9.390449652413844e-06, "loss": 0.1257, "step": 25440 }, { "epoch": 11.909431312894922, "grad_norm": 0.8085774183273315, "learning_rate": 9.382096879755609e-06, "loss": 0.1319, "step": 25450 }, { "epoch": 11.914111865200093, "grad_norm": 0.660165548324585, "learning_rate": 9.37374694138229e-06, "loss": 0.147, "step": 25460 }, { "epoch": 11.918792417505266, "grad_norm": 0.545522928237915, "learning_rate": 9.36539984271206e-06, "loss": 0.1375, "step": 25470 }, { "epoch": 11.923472969810437, "grad_norm": 0.6464325189590454, "learning_rate": 9.357055589161279e-06, "loss": 0.1492, "step": 25480 }, { "epoch": 11.92815352211561, "grad_norm": 0.5750061869621277, "learning_rate": 9.348714186144425e-06, "loss": 0.135, "step": 25490 }, { "epoch": 11.932834074420782, "grad_norm": 0.5737521052360535, "learning_rate": 9.34037563907416e-06, "loss": 0.1233, "step": 25500 }, { "epoch": 11.937514626725953, "grad_norm": 1.042099118232727, "learning_rate": 9.33203995336126e-06, "loss": 0.1394, "step": 25510 }, { "epoch": 11.942195179031126, "grad_norm": 0.6476372480392456, "learning_rate": 9.323707134414672e-06, "loss": 0.1397, "step": 25520 }, { "epoch": 11.946875731336299, "grad_norm": 0.5023000240325928, "learning_rate": 9.315377187641472e-06, "loss": 0.1411, "step": 25530 }, { "epoch": 11.95155628364147, "grad_norm": 0.7695915699005127, "learning_rate": 9.307050118446864e-06, "loss": 0.1423, "step": 25540 }, { "epoch": 11.956236835946642, "grad_norm": 0.9484490752220154, "learning_rate": 9.2987259322342e-06, "loss": 0.1429, "step": 25550 }, { "epoch": 11.960917388251813, "grad_norm": 1.0880632400512695, "learning_rate": 9.290404634404947e-06, "loss": 0.1534, "step": 25560 }, { "epoch": 11.965597940556986, "grad_norm": 0.6154616475105286, "learning_rate": 9.28208623035872e-06, "loss": 0.1392, "step": 25570 }, { "epoch": 11.970278492862157, "grad_norm": 0.9615464210510254, "learning_rate": 9.273770725493227e-06, "loss": 0.135, "step": 25580 }, { "epoch": 11.97495904516733, "grad_norm": 0.648632824420929, "learning_rate": 9.265458125204307e-06, "loss": 0.138, "step": 25590 }, { "epoch": 11.979639597472502, "grad_norm": 0.6197823286056519, "learning_rate": 9.257148434885929e-06, "loss": 0.1362, "step": 25600 }, { "epoch": 11.984320149777673, "grad_norm": 0.48030343651771545, "learning_rate": 9.248841659930143e-06, "loss": 0.1346, "step": 25610 }, { "epoch": 11.989000702082846, "grad_norm": 0.5882071256637573, "learning_rate": 9.240537805727145e-06, "loss": 0.1416, "step": 25620 }, { "epoch": 11.993681254388019, "grad_norm": 0.6748385429382324, "learning_rate": 9.232236877665207e-06, "loss": 0.1345, "step": 25630 }, { "epoch": 11.99836180669319, "grad_norm": 0.7447463870048523, "learning_rate": 9.223938881130703e-06, "loss": 0.1407, "step": 25640 }, { "epoch": 12.002808331383104, "grad_norm": 0.6428523659706116, "learning_rate": 9.215643821508127e-06, "loss": 0.1317, "step": 25650 }, { "epoch": 12.007488883688275, "grad_norm": 0.48481285572052, "learning_rate": 9.207351704180043e-06, "loss": 0.1302, "step": 25660 }, { "epoch": 12.012169435993448, "grad_norm": 0.5295855402946472, "learning_rate": 9.19906253452712e-06, "loss": 0.1338, "step": 25670 }, { "epoch": 12.016849988298619, "grad_norm": 0.5646043419837952, "learning_rate": 9.190776317928109e-06, "loss": 0.1294, "step": 25680 }, { "epoch": 12.021530540603791, "grad_norm": 0.5252078771591187, "learning_rate": 9.182493059759848e-06, "loss": 0.126, "step": 25690 }, { "epoch": 12.026211092908964, "grad_norm": 0.8693196773529053, "learning_rate": 9.17421276539725e-06, "loss": 0.136, "step": 25700 }, { "epoch": 12.030891645214135, "grad_norm": 0.45628079771995544, "learning_rate": 9.165935440213311e-06, "loss": 0.125, "step": 25710 }, { "epoch": 12.035572197519308, "grad_norm": 0.7197898030281067, "learning_rate": 9.157661089579098e-06, "loss": 0.131, "step": 25720 }, { "epoch": 12.040252749824479, "grad_norm": 0.6063697934150696, "learning_rate": 9.149389718863743e-06, "loss": 0.1416, "step": 25730 }, { "epoch": 12.044933302129651, "grad_norm": 0.5381876230239868, "learning_rate": 9.141121333434449e-06, "loss": 0.1171, "step": 25740 }, { "epoch": 12.049613854434824, "grad_norm": 1.0357701778411865, "learning_rate": 9.13285593865649e-06, "loss": 0.1317, "step": 25750 }, { "epoch": 12.054294406739995, "grad_norm": 0.520754873752594, "learning_rate": 9.124593539893178e-06, "loss": 0.1253, "step": 25760 }, { "epoch": 12.058974959045168, "grad_norm": 0.6943230628967285, "learning_rate": 9.116334142505908e-06, "loss": 0.1027, "step": 25770 }, { "epoch": 12.063655511350339, "grad_norm": 0.7646487355232239, "learning_rate": 9.1080777518541e-06, "loss": 0.1388, "step": 25780 }, { "epoch": 12.068336063655511, "grad_norm": 0.549079954624176, "learning_rate": 9.099824373295243e-06, "loss": 0.1292, "step": 25790 }, { "epoch": 12.073016615960684, "grad_norm": 0.5200909376144409, "learning_rate": 9.09157401218487e-06, "loss": 0.1167, "step": 25800 }, { "epoch": 12.077697168265855, "grad_norm": 1.0391125679016113, "learning_rate": 9.083326673876536e-06, "loss": 0.125, "step": 25810 }, { "epoch": 12.082377720571028, "grad_norm": 0.5312511324882507, "learning_rate": 9.075082363721863e-06, "loss": 0.1266, "step": 25820 }, { "epoch": 12.087058272876199, "grad_norm": 0.8059483766555786, "learning_rate": 9.066841087070487e-06, "loss": 0.1192, "step": 25830 }, { "epoch": 12.091738825181372, "grad_norm": 0.5847684741020203, "learning_rate": 9.058602849270087e-06, "loss": 0.1236, "step": 25840 }, { "epoch": 12.096419377486544, "grad_norm": 0.48469865322113037, "learning_rate": 9.050367655666367e-06, "loss": 0.1211, "step": 25850 }, { "epoch": 12.101099929791715, "grad_norm": 0.5778334140777588, "learning_rate": 9.04213551160305e-06, "loss": 0.1415, "step": 25860 }, { "epoch": 12.105780482096888, "grad_norm": 0.6291409730911255, "learning_rate": 9.033906422421883e-06, "loss": 0.1255, "step": 25870 }, { "epoch": 12.110461034402059, "grad_norm": 0.631270706653595, "learning_rate": 9.025680393462638e-06, "loss": 0.1097, "step": 25880 }, { "epoch": 12.115141586707232, "grad_norm": 0.5165445804595947, "learning_rate": 9.0174574300631e-06, "loss": 0.1185, "step": 25890 }, { "epoch": 12.119822139012403, "grad_norm": 0.5099138021469116, "learning_rate": 9.009237537559051e-06, "loss": 0.126, "step": 25900 }, { "epoch": 12.124502691317575, "grad_norm": 0.5590340495109558, "learning_rate": 9.001020721284293e-06, "loss": 0.1195, "step": 25910 }, { "epoch": 12.129183243622748, "grad_norm": 0.548865795135498, "learning_rate": 8.992806986570634e-06, "loss": 0.1269, "step": 25920 }, { "epoch": 12.133863795927919, "grad_norm": 0.47220736742019653, "learning_rate": 8.984596338747867e-06, "loss": 0.1152, "step": 25930 }, { "epoch": 12.138544348233092, "grad_norm": 0.6363371014595032, "learning_rate": 8.976388783143805e-06, "loss": 0.1064, "step": 25940 }, { "epoch": 12.143224900538263, "grad_norm": 0.5463108420372009, "learning_rate": 8.968184325084231e-06, "loss": 0.1284, "step": 25950 }, { "epoch": 12.147905452843435, "grad_norm": 0.6049662828445435, "learning_rate": 8.959982969892933e-06, "loss": 0.1259, "step": 25960 }, { "epoch": 12.152586005148608, "grad_norm": 0.4770444929599762, "learning_rate": 8.951784722891684e-06, "loss": 0.1224, "step": 25970 }, { "epoch": 12.157266557453779, "grad_norm": 0.5458731651306152, "learning_rate": 8.943589589400233e-06, "loss": 0.12, "step": 25980 }, { "epoch": 12.161947109758952, "grad_norm": 0.4931269586086273, "learning_rate": 8.935397574736318e-06, "loss": 0.1351, "step": 25990 }, { "epoch": 12.166627662064123, "grad_norm": 0.8974202871322632, "learning_rate": 8.92720868421564e-06, "loss": 0.1362, "step": 26000 }, { "epoch": 12.171308214369295, "grad_norm": 0.5371221899986267, "learning_rate": 8.919022923151892e-06, "loss": 0.1155, "step": 26010 }, { "epoch": 12.175988766674468, "grad_norm": 0.6593049168586731, "learning_rate": 8.910840296856717e-06, "loss": 0.1373, "step": 26020 }, { "epoch": 12.18066931897964, "grad_norm": 0.5123255848884583, "learning_rate": 8.902660810639735e-06, "loss": 0.1222, "step": 26030 }, { "epoch": 12.185349871284812, "grad_norm": 0.5594803690910339, "learning_rate": 8.89448446980853e-06, "loss": 0.1279, "step": 26040 }, { "epoch": 12.190030423589983, "grad_norm": 0.6453884840011597, "learning_rate": 8.886311279668629e-06, "loss": 0.1126, "step": 26050 }, { "epoch": 12.194710975895156, "grad_norm": 1.00872802734375, "learning_rate": 8.878141245523541e-06, "loss": 0.1291, "step": 26060 }, { "epoch": 12.199391528200328, "grad_norm": 0.48426416516304016, "learning_rate": 8.869974372674709e-06, "loss": 0.1222, "step": 26070 }, { "epoch": 12.2040720805055, "grad_norm": 0.594674289226532, "learning_rate": 8.861810666421516e-06, "loss": 0.1266, "step": 26080 }, { "epoch": 12.208752632810672, "grad_norm": 0.6022767424583435, "learning_rate": 8.853650132061316e-06, "loss": 0.1237, "step": 26090 }, { "epoch": 12.213433185115843, "grad_norm": 0.5594739317893982, "learning_rate": 8.845492774889386e-06, "loss": 0.1305, "step": 26100 }, { "epoch": 12.218113737421016, "grad_norm": 0.9785751700401306, "learning_rate": 8.837338600198947e-06, "loss": 0.1131, "step": 26110 }, { "epoch": 12.222794289726188, "grad_norm": 1.039942979812622, "learning_rate": 8.829187613281156e-06, "loss": 0.1271, "step": 26120 }, { "epoch": 12.22747484203136, "grad_norm": 0.6457395553588867, "learning_rate": 8.821039819425095e-06, "loss": 0.1327, "step": 26130 }, { "epoch": 12.232155394336532, "grad_norm": 0.4894373118877411, "learning_rate": 8.812895223917782e-06, "loss": 0.1372, "step": 26140 }, { "epoch": 12.236835946641703, "grad_norm": 0.6200950741767883, "learning_rate": 8.804753832044158e-06, "loss": 0.1248, "step": 26150 }, { "epoch": 12.241516498946876, "grad_norm": 0.718425989151001, "learning_rate": 8.796615649087086e-06, "loss": 0.1209, "step": 26160 }, { "epoch": 12.246197051252048, "grad_norm": 0.543153703212738, "learning_rate": 8.788480680327338e-06, "loss": 0.1263, "step": 26170 }, { "epoch": 12.25087760355722, "grad_norm": 0.5759413838386536, "learning_rate": 8.780348931043605e-06, "loss": 0.1206, "step": 26180 }, { "epoch": 12.255558155862392, "grad_norm": 0.5361744165420532, "learning_rate": 8.772220406512508e-06, "loss": 0.1324, "step": 26190 }, { "epoch": 12.260238708167563, "grad_norm": 0.5895363688468933, "learning_rate": 8.764095112008536e-06, "loss": 0.1248, "step": 26200 }, { "epoch": 12.264919260472736, "grad_norm": 0.5650883913040161, "learning_rate": 8.755973052804117e-06, "loss": 0.1349, "step": 26210 }, { "epoch": 12.269599812777908, "grad_norm": 0.5309141278266907, "learning_rate": 8.74785423416957e-06, "loss": 0.1336, "step": 26220 }, { "epoch": 12.27428036508308, "grad_norm": 0.5785933136940002, "learning_rate": 8.739738661373096e-06, "loss": 0.111, "step": 26230 }, { "epoch": 12.278960917388252, "grad_norm": 0.493420273065567, "learning_rate": 8.731626339680812e-06, "loss": 0.1304, "step": 26240 }, { "epoch": 12.283641469693423, "grad_norm": 0.5622503161430359, "learning_rate": 8.72351727435671e-06, "loss": 0.1183, "step": 26250 }, { "epoch": 12.288322021998596, "grad_norm": 0.4386247396469116, "learning_rate": 8.715411470662683e-06, "loss": 0.1264, "step": 26260 }, { "epoch": 12.293002574303769, "grad_norm": 0.6515147686004639, "learning_rate": 8.707308933858482e-06, "loss": 0.1161, "step": 26270 }, { "epoch": 12.29768312660894, "grad_norm": 0.8035599589347839, "learning_rate": 8.699209669201778e-06, "loss": 0.123, "step": 26280 }, { "epoch": 12.302363678914112, "grad_norm": 0.5069525241851807, "learning_rate": 8.691113681948082e-06, "loss": 0.1234, "step": 26290 }, { "epoch": 12.307044231219283, "grad_norm": 0.7768059968948364, "learning_rate": 8.683020977350787e-06, "loss": 0.1227, "step": 26300 }, { "epoch": 12.311724783524456, "grad_norm": 0.5704174041748047, "learning_rate": 8.674931560661174e-06, "loss": 0.1304, "step": 26310 }, { "epoch": 12.316405335829629, "grad_norm": 0.5571597814559937, "learning_rate": 8.66684543712837e-06, "loss": 0.1264, "step": 26320 }, { "epoch": 12.3210858881348, "grad_norm": 0.5103576183319092, "learning_rate": 8.658762611999374e-06, "loss": 0.12, "step": 26330 }, { "epoch": 12.325766440439972, "grad_norm": 0.6187427639961243, "learning_rate": 8.650683090519049e-06, "loss": 0.1332, "step": 26340 }, { "epoch": 12.330446992745143, "grad_norm": 0.6269598007202148, "learning_rate": 8.6426068779301e-06, "loss": 0.1238, "step": 26350 }, { "epoch": 12.335127545050316, "grad_norm": 0.4867895543575287, "learning_rate": 8.634533979473096e-06, "loss": 0.1236, "step": 26360 }, { "epoch": 12.339808097355489, "grad_norm": 0.5190547108650208, "learning_rate": 8.626464400386458e-06, "loss": 0.1231, "step": 26370 }, { "epoch": 12.34448864966066, "grad_norm": 1.0548393726348877, "learning_rate": 8.618398145906448e-06, "loss": 0.1234, "step": 26380 }, { "epoch": 12.349169201965832, "grad_norm": 0.5840358138084412, "learning_rate": 8.610335221267163e-06, "loss": 0.1275, "step": 26390 }, { "epoch": 12.353849754271003, "grad_norm": 0.6325676441192627, "learning_rate": 8.602275631700554e-06, "loss": 0.1235, "step": 26400 }, { "epoch": 12.358530306576176, "grad_norm": 0.5469036102294922, "learning_rate": 8.594219382436402e-06, "loss": 0.1357, "step": 26410 }, { "epoch": 12.363210858881349, "grad_norm": 0.6066882610321045, "learning_rate": 8.586166478702315e-06, "loss": 0.114, "step": 26420 }, { "epoch": 12.36789141118652, "grad_norm": 0.5539042949676514, "learning_rate": 8.578116925723737e-06, "loss": 0.1311, "step": 26430 }, { "epoch": 12.372571963491692, "grad_norm": 0.9907528162002563, "learning_rate": 8.57007072872394e-06, "loss": 0.1242, "step": 26440 }, { "epoch": 12.377252515796863, "grad_norm": 0.7679982781410217, "learning_rate": 8.562027892924002e-06, "loss": 0.119, "step": 26450 }, { "epoch": 12.381933068102036, "grad_norm": 0.5962138175964355, "learning_rate": 8.553988423542842e-06, "loss": 0.1332, "step": 26460 }, { "epoch": 12.386613620407209, "grad_norm": 0.4446924328804016, "learning_rate": 8.545952325797179e-06, "loss": 0.1265, "step": 26470 }, { "epoch": 12.39129417271238, "grad_norm": 0.5226466059684753, "learning_rate": 8.53791960490156e-06, "loss": 0.1235, "step": 26480 }, { "epoch": 12.395974725017552, "grad_norm": 0.7524799704551697, "learning_rate": 8.529890266068313e-06, "loss": 0.1289, "step": 26490 }, { "epoch": 12.400655277322723, "grad_norm": 0.6717196106910706, "learning_rate": 8.521864314507604e-06, "loss": 0.1418, "step": 26500 }, { "epoch": 12.405335829627896, "grad_norm": 0.5526494979858398, "learning_rate": 8.513841755427383e-06, "loss": 0.1225, "step": 26510 }, { "epoch": 12.410016381933069, "grad_norm": 0.5007863640785217, "learning_rate": 8.505822594033392e-06, "loss": 0.1193, "step": 26520 }, { "epoch": 12.41469693423824, "grad_norm": 0.6078910231590271, "learning_rate": 8.49780683552919e-06, "loss": 0.1151, "step": 26530 }, { "epoch": 12.419377486543413, "grad_norm": 0.5840892791748047, "learning_rate": 8.489794485116106e-06, "loss": 0.108, "step": 26540 }, { "epoch": 12.424058038848584, "grad_norm": 0.6002477407455444, "learning_rate": 8.481785547993274e-06, "loss": 0.1196, "step": 26550 }, { "epoch": 12.428738591153756, "grad_norm": 0.6239868998527527, "learning_rate": 8.473780029357607e-06, "loss": 0.1264, "step": 26560 }, { "epoch": 12.433419143458929, "grad_norm": 0.7448220252990723, "learning_rate": 8.465777934403791e-06, "loss": 0.1237, "step": 26570 }, { "epoch": 12.4380996957641, "grad_norm": 0.7120158672332764, "learning_rate": 8.457779268324301e-06, "loss": 0.1327, "step": 26580 }, { "epoch": 12.442780248069273, "grad_norm": 0.7988696098327637, "learning_rate": 8.44978403630939e-06, "loss": 0.1341, "step": 26590 }, { "epoch": 12.447460800374444, "grad_norm": 0.8093528151512146, "learning_rate": 8.441792243547078e-06, "loss": 0.1304, "step": 26600 }, { "epoch": 12.452141352679616, "grad_norm": 0.5878836512565613, "learning_rate": 8.433803895223144e-06, "loss": 0.1313, "step": 26610 }, { "epoch": 12.456821904984789, "grad_norm": 0.6388691067695618, "learning_rate": 8.425818996521146e-06, "loss": 0.1217, "step": 26620 }, { "epoch": 12.46150245728996, "grad_norm": 0.4821789264678955, "learning_rate": 8.4178375526224e-06, "loss": 0.1169, "step": 26630 }, { "epoch": 12.466183009595133, "grad_norm": 0.6697648763656616, "learning_rate": 8.409859568705973e-06, "loss": 0.1174, "step": 26640 }, { "epoch": 12.470863561900304, "grad_norm": 0.6852846145629883, "learning_rate": 8.401885049948693e-06, "loss": 0.119, "step": 26650 }, { "epoch": 12.475544114205476, "grad_norm": 0.5851134061813354, "learning_rate": 8.393914001525148e-06, "loss": 0.1292, "step": 26660 }, { "epoch": 12.480224666510649, "grad_norm": 0.5340732336044312, "learning_rate": 8.38594642860765e-06, "loss": 0.121, "step": 26670 }, { "epoch": 12.48490521881582, "grad_norm": 0.7630258798599243, "learning_rate": 8.37798233636628e-06, "loss": 0.1194, "step": 26680 }, { "epoch": 12.489585771120993, "grad_norm": 0.5588361620903015, "learning_rate": 8.370021729968847e-06, "loss": 0.1315, "step": 26690 }, { "epoch": 12.494266323426164, "grad_norm": 0.4845326244831085, "learning_rate": 8.362064614580908e-06, "loss": 0.1237, "step": 26700 }, { "epoch": 12.498946875731336, "grad_norm": 0.562552809715271, "learning_rate": 8.35411099536574e-06, "loss": 0.1392, "step": 26710 }, { "epoch": 12.50362742803651, "grad_norm": 0.6355177760124207, "learning_rate": 8.346160877484367e-06, "loss": 0.1258, "step": 26720 }, { "epoch": 12.50830798034168, "grad_norm": 0.5872390270233154, "learning_rate": 8.338214266095535e-06, "loss": 0.1268, "step": 26730 }, { "epoch": 12.512988532646853, "grad_norm": 0.523926854133606, "learning_rate": 8.330271166355702e-06, "loss": 0.1151, "step": 26740 }, { "epoch": 12.517669084952024, "grad_norm": 0.5703585147857666, "learning_rate": 8.322331583419077e-06, "loss": 0.1096, "step": 26750 }, { "epoch": 12.522349637257197, "grad_norm": 0.6009045243263245, "learning_rate": 8.314395522437556e-06, "loss": 0.1358, "step": 26760 }, { "epoch": 12.527030189562367, "grad_norm": 0.6358831524848938, "learning_rate": 8.306462988560765e-06, "loss": 0.1245, "step": 26770 }, { "epoch": 12.53171074186754, "grad_norm": 0.5196146965026855, "learning_rate": 8.29853398693605e-06, "loss": 0.1014, "step": 26780 }, { "epoch": 12.536391294172713, "grad_norm": 0.5494430065155029, "learning_rate": 8.290608522708435e-06, "loss": 0.1196, "step": 26790 }, { "epoch": 12.541071846477884, "grad_norm": 0.5019586682319641, "learning_rate": 8.282686601020682e-06, "loss": 0.1214, "step": 26800 }, { "epoch": 12.545752398783057, "grad_norm": 0.47387439012527466, "learning_rate": 8.274768227013234e-06, "loss": 0.1256, "step": 26810 }, { "epoch": 12.55043295108823, "grad_norm": 0.5432007312774658, "learning_rate": 8.266853405824246e-06, "loss": 0.1158, "step": 26820 }, { "epoch": 12.5551135033934, "grad_norm": 0.9768918752670288, "learning_rate": 8.258942142589546e-06, "loss": 0.1273, "step": 26830 }, { "epoch": 12.559794055698573, "grad_norm": 0.5289208889007568, "learning_rate": 8.251034442442677e-06, "loss": 0.128, "step": 26840 }, { "epoch": 12.564474608003744, "grad_norm": 0.5907011032104492, "learning_rate": 8.24313031051486e-06, "loss": 0.1307, "step": 26850 }, { "epoch": 12.569155160308917, "grad_norm": 0.6877530813217163, "learning_rate": 8.235229751934993e-06, "loss": 0.1292, "step": 26860 }, { "epoch": 12.573835712614088, "grad_norm": 0.5478507876396179, "learning_rate": 8.227332771829665e-06, "loss": 0.1258, "step": 26870 }, { "epoch": 12.57851626491926, "grad_norm": 0.8992823362350464, "learning_rate": 8.219439375323151e-06, "loss": 0.113, "step": 26880 }, { "epoch": 12.583196817224433, "grad_norm": 0.5356893539428711, "learning_rate": 8.211549567537374e-06, "loss": 0.11, "step": 26890 }, { "epoch": 12.587877369529604, "grad_norm": 0.5422617197036743, "learning_rate": 8.203663353591955e-06, "loss": 0.1247, "step": 26900 }, { "epoch": 12.592557921834777, "grad_norm": 0.46571996808052063, "learning_rate": 8.195780738604169e-06, "loss": 0.1125, "step": 26910 }, { "epoch": 12.597238474139948, "grad_norm": 0.5750419497489929, "learning_rate": 8.187901727688966e-06, "loss": 0.123, "step": 26920 }, { "epoch": 12.60191902644512, "grad_norm": 0.5302878618240356, "learning_rate": 8.18002632595894e-06, "loss": 0.1211, "step": 26930 }, { "epoch": 12.606599578750293, "grad_norm": 0.4877989590167999, "learning_rate": 8.172154538524358e-06, "loss": 0.1259, "step": 26940 }, { "epoch": 12.611280131055464, "grad_norm": 1.0770574808120728, "learning_rate": 8.16428637049314e-06, "loss": 0.1495, "step": 26950 }, { "epoch": 12.615960683360637, "grad_norm": 0.5288820266723633, "learning_rate": 8.156421826970848e-06, "loss": 0.1143, "step": 26960 }, { "epoch": 12.620641235665808, "grad_norm": 0.5483841896057129, "learning_rate": 8.148560913060713e-06, "loss": 0.1291, "step": 26970 }, { "epoch": 12.62532178797098, "grad_norm": 0.6032969951629639, "learning_rate": 8.14070363386358e-06, "loss": 0.1241, "step": 26980 }, { "epoch": 12.630002340276153, "grad_norm": 0.5148544907569885, "learning_rate": 8.132849994477963e-06, "loss": 0.1294, "step": 26990 }, { "epoch": 12.634682892581324, "grad_norm": 0.8227509260177612, "learning_rate": 8.125000000000003e-06, "loss": 0.1186, "step": 27000 }, { "epoch": 12.639363444886497, "grad_norm": 0.6203362345695496, "learning_rate": 8.117153655523469e-06, "loss": 0.1217, "step": 27010 }, { "epoch": 12.644043997191668, "grad_norm": 0.46270471811294556, "learning_rate": 8.109310966139772e-06, "loss": 0.1181, "step": 27020 }, { "epoch": 12.64872454949684, "grad_norm": 0.5240904688835144, "learning_rate": 8.101471936937952e-06, "loss": 0.1314, "step": 27030 }, { "epoch": 12.653405101802013, "grad_norm": 0.9582726955413818, "learning_rate": 8.093636573004668e-06, "loss": 0.1339, "step": 27040 }, { "epoch": 12.658085654107184, "grad_norm": 0.6237166523933411, "learning_rate": 8.085804879424198e-06, "loss": 0.1217, "step": 27050 }, { "epoch": 12.662766206412357, "grad_norm": 0.4959319531917572, "learning_rate": 8.077976861278446e-06, "loss": 0.1131, "step": 27060 }, { "epoch": 12.667446758717528, "grad_norm": 0.8047231435775757, "learning_rate": 8.070152523646935e-06, "loss": 0.1248, "step": 27070 }, { "epoch": 12.6721273110227, "grad_norm": 0.4976648688316345, "learning_rate": 8.062331871606781e-06, "loss": 0.1127, "step": 27080 }, { "epoch": 12.676807863327873, "grad_norm": 0.7784335017204285, "learning_rate": 8.054514910232724e-06, "loss": 0.1297, "step": 27090 }, { "epoch": 12.681488415633044, "grad_norm": 0.6023530960083008, "learning_rate": 8.046701644597112e-06, "loss": 0.1197, "step": 27100 }, { "epoch": 12.686168967938217, "grad_norm": 0.6927734613418579, "learning_rate": 8.038892079769877e-06, "loss": 0.1341, "step": 27110 }, { "epoch": 12.690849520243388, "grad_norm": 0.5181878805160522, "learning_rate": 8.03108622081857e-06, "loss": 0.1229, "step": 27120 }, { "epoch": 12.69553007254856, "grad_norm": 0.526614785194397, "learning_rate": 8.023284072808324e-06, "loss": 0.1239, "step": 27130 }, { "epoch": 12.700210624853733, "grad_norm": 0.5987588763237, "learning_rate": 8.015485640801874e-06, "loss": 0.1221, "step": 27140 }, { "epoch": 12.704891177158904, "grad_norm": 0.6152986884117126, "learning_rate": 8.007690929859526e-06, "loss": 0.1175, "step": 27150 }, { "epoch": 12.709571729464077, "grad_norm": 0.49337151646614075, "learning_rate": 7.999899945039193e-06, "loss": 0.1196, "step": 27160 }, { "epoch": 12.714252281769248, "grad_norm": 0.607397198677063, "learning_rate": 7.992112691396358e-06, "loss": 0.1345, "step": 27170 }, { "epoch": 12.71893283407442, "grad_norm": 0.6566395163536072, "learning_rate": 7.984329173984081e-06, "loss": 0.1253, "step": 27180 }, { "epoch": 12.723613386379593, "grad_norm": 0.5280337929725647, "learning_rate": 7.976549397853013e-06, "loss": 0.1329, "step": 27190 }, { "epoch": 12.728293938684764, "grad_norm": 0.5853262543678284, "learning_rate": 7.968773368051355e-06, "loss": 0.1305, "step": 27200 }, { "epoch": 12.732974490989937, "grad_norm": 0.6586044430732727, "learning_rate": 7.961001089624891e-06, "loss": 0.121, "step": 27210 }, { "epoch": 12.737655043295108, "grad_norm": 0.5458307266235352, "learning_rate": 7.953232567616976e-06, "loss": 0.1228, "step": 27220 }, { "epoch": 12.74233559560028, "grad_norm": 1.0192819833755493, "learning_rate": 7.945467807068507e-06, "loss": 0.1112, "step": 27230 }, { "epoch": 12.747016147905454, "grad_norm": 0.5328567028045654, "learning_rate": 7.93770681301796e-06, "loss": 0.1216, "step": 27240 }, { "epoch": 12.751696700210625, "grad_norm": 0.6156130433082581, "learning_rate": 7.929949590501361e-06, "loss": 0.1203, "step": 27250 }, { "epoch": 12.756377252515797, "grad_norm": 0.5905546545982361, "learning_rate": 7.922196144552287e-06, "loss": 0.1283, "step": 27260 }, { "epoch": 12.761057804820968, "grad_norm": 0.547173798084259, "learning_rate": 7.914446480201862e-06, "loss": 0.1209, "step": 27270 }, { "epoch": 12.765738357126141, "grad_norm": 0.6498005986213684, "learning_rate": 7.906700602478755e-06, "loss": 0.122, "step": 27280 }, { "epoch": 12.770418909431314, "grad_norm": 0.7087777256965637, "learning_rate": 7.898958516409196e-06, "loss": 0.1163, "step": 27290 }, { "epoch": 12.775099461736485, "grad_norm": 0.6388469338417053, "learning_rate": 7.891220227016923e-06, "loss": 0.1293, "step": 27300 }, { "epoch": 12.779780014041657, "grad_norm": 0.6544768810272217, "learning_rate": 7.883485739323244e-06, "loss": 0.1374, "step": 27310 }, { "epoch": 12.784460566346828, "grad_norm": 0.5425980687141418, "learning_rate": 7.875755058346979e-06, "loss": 0.1265, "step": 27320 }, { "epoch": 12.789141118652001, "grad_norm": 3.076070785522461, "learning_rate": 7.86802818910447e-06, "loss": 0.1242, "step": 27330 }, { "epoch": 12.793821670957174, "grad_norm": 0.6139213442802429, "learning_rate": 7.860305136609618e-06, "loss": 0.1202, "step": 27340 }, { "epoch": 12.798502223262345, "grad_norm": 0.6341770887374878, "learning_rate": 7.852585905873814e-06, "loss": 0.123, "step": 27350 }, { "epoch": 12.803182775567517, "grad_norm": 1.1541223526000977, "learning_rate": 7.844870501905991e-06, "loss": 0.1292, "step": 27360 }, { "epoch": 12.807863327872688, "grad_norm": 0.676160454750061, "learning_rate": 7.837158929712592e-06, "loss": 0.1181, "step": 27370 }, { "epoch": 12.812543880177861, "grad_norm": 0.5679797530174255, "learning_rate": 7.829451194297563e-06, "loss": 0.1182, "step": 27380 }, { "epoch": 12.817224432483034, "grad_norm": 0.5915672183036804, "learning_rate": 7.821747300662376e-06, "loss": 0.1418, "step": 27390 }, { "epoch": 12.821904984788205, "grad_norm": 0.5673067569732666, "learning_rate": 7.814047253806006e-06, "loss": 0.1167, "step": 27400 }, { "epoch": 12.826585537093377, "grad_norm": 0.5612977743148804, "learning_rate": 7.806351058724928e-06, "loss": 0.1165, "step": 27410 }, { "epoch": 12.831266089398548, "grad_norm": 0.5202139019966125, "learning_rate": 7.798658720413118e-06, "loss": 0.1053, "step": 27420 }, { "epoch": 12.835946641703721, "grad_norm": 0.6353538632392883, "learning_rate": 7.790970243862051e-06, "loss": 0.1211, "step": 27430 }, { "epoch": 12.840627194008894, "grad_norm": 0.5004377961158752, "learning_rate": 7.783285634060697e-06, "loss": 0.1118, "step": 27440 }, { "epoch": 12.845307746314065, "grad_norm": 0.6072843670845032, "learning_rate": 7.775604895995514e-06, "loss": 0.124, "step": 27450 }, { "epoch": 12.849988298619238, "grad_norm": 0.5600860118865967, "learning_rate": 7.767928034650451e-06, "loss": 0.1086, "step": 27460 }, { "epoch": 12.854668850924408, "grad_norm": 0.5983568429946899, "learning_rate": 7.760255055006937e-06, "loss": 0.1155, "step": 27470 }, { "epoch": 12.859349403229581, "grad_norm": 0.8033786416053772, "learning_rate": 7.752585962043896e-06, "loss": 0.1168, "step": 27480 }, { "epoch": 12.864029955534754, "grad_norm": 0.5554651618003845, "learning_rate": 7.744920760737706e-06, "loss": 0.1339, "step": 27490 }, { "epoch": 12.868710507839925, "grad_norm": 0.49755051732063293, "learning_rate": 7.737259456062238e-06, "loss": 0.1142, "step": 27500 }, { "epoch": 12.873391060145098, "grad_norm": 0.5673978328704834, "learning_rate": 7.729602052988833e-06, "loss": 0.1172, "step": 27510 }, { "epoch": 12.878071612450269, "grad_norm": 0.6387183666229248, "learning_rate": 7.721948556486285e-06, "loss": 0.1347, "step": 27520 }, { "epoch": 12.882752164755441, "grad_norm": 0.58409184217453, "learning_rate": 7.714298971520884e-06, "loss": 0.1187, "step": 27530 }, { "epoch": 12.887432717060612, "grad_norm": 0.7768762707710266, "learning_rate": 7.706653303056349e-06, "loss": 0.1162, "step": 27540 }, { "epoch": 12.892113269365785, "grad_norm": 0.6055747270584106, "learning_rate": 7.699011556053867e-06, "loss": 0.113, "step": 27550 }, { "epoch": 12.896793821670958, "grad_norm": 0.6403014063835144, "learning_rate": 7.691373735472098e-06, "loss": 0.1317, "step": 27560 }, { "epoch": 12.901474373976129, "grad_norm": 0.5135539174079895, "learning_rate": 7.683739846267132e-06, "loss": 0.1194, "step": 27570 }, { "epoch": 12.906154926281301, "grad_norm": 0.6970255374908447, "learning_rate": 7.67610989339252e-06, "loss": 0.1363, "step": 27580 }, { "epoch": 12.910835478586474, "grad_norm": 0.7787900567054749, "learning_rate": 7.668483881799257e-06, "loss": 0.1237, "step": 27590 }, { "epoch": 12.915516030891645, "grad_norm": 0.6107934713363647, "learning_rate": 7.660861816435774e-06, "loss": 0.1215, "step": 27600 }, { "epoch": 12.920196583196818, "grad_norm": 0.6558959484100342, "learning_rate": 7.65324370224795e-06, "loss": 0.1243, "step": 27610 }, { "epoch": 12.924877135501989, "grad_norm": 0.5602393746376038, "learning_rate": 7.645629544179095e-06, "loss": 0.1296, "step": 27620 }, { "epoch": 12.929557687807161, "grad_norm": 0.5386217832565308, "learning_rate": 7.63801934716996e-06, "loss": 0.1046, "step": 27630 }, { "epoch": 12.934238240112332, "grad_norm": 0.703248143196106, "learning_rate": 7.630413116158708e-06, "loss": 0.1233, "step": 27640 }, { "epoch": 12.938918792417505, "grad_norm": 0.7725449800491333, "learning_rate": 7.622810856080946e-06, "loss": 0.1359, "step": 27650 }, { "epoch": 12.943599344722678, "grad_norm": 0.7008839249610901, "learning_rate": 7.615212571869701e-06, "loss": 0.1353, "step": 27660 }, { "epoch": 12.948279897027849, "grad_norm": 1.103939414024353, "learning_rate": 7.60761826845541e-06, "loss": 0.122, "step": 27670 }, { "epoch": 12.952960449333021, "grad_norm": 0.5369539260864258, "learning_rate": 7.60002795076594e-06, "loss": 0.1126, "step": 27680 }, { "epoch": 12.957641001638194, "grad_norm": 0.806370735168457, "learning_rate": 7.592441623726565e-06, "loss": 0.1256, "step": 27690 }, { "epoch": 12.962321553943365, "grad_norm": 0.8107337951660156, "learning_rate": 7.5848592922599656e-06, "loss": 0.1202, "step": 27700 }, { "epoch": 12.967002106248538, "grad_norm": 0.6693693399429321, "learning_rate": 7.577280961286237e-06, "loss": 0.1281, "step": 27710 }, { "epoch": 12.971682658553709, "grad_norm": 0.5639959573745728, "learning_rate": 7.569706635722877e-06, "loss": 0.1038, "step": 27720 }, { "epoch": 12.976363210858882, "grad_norm": 0.6971385478973389, "learning_rate": 7.562136320484786e-06, "loss": 0.129, "step": 27730 }, { "epoch": 12.981043763164053, "grad_norm": 0.7278268933296204, "learning_rate": 7.5545700204842485e-06, "loss": 0.1325, "step": 27740 }, { "epoch": 12.985724315469225, "grad_norm": 0.6543113589286804, "learning_rate": 7.54700774063097e-06, "loss": 0.1332, "step": 27750 }, { "epoch": 12.990404867774398, "grad_norm": 0.5674525499343872, "learning_rate": 7.539449485832023e-06, "loss": 0.1239, "step": 27760 }, { "epoch": 12.995085420079569, "grad_norm": 0.6569710373878479, "learning_rate": 7.53189526099187e-06, "loss": 0.1284, "step": 27770 }, { "epoch": 12.999765972384742, "grad_norm": 0.9371833205223083, "learning_rate": 7.524345071012382e-06, "loss": 0.1206, "step": 27780 }, { "epoch": 13.004212497074654, "grad_norm": 0.8213188052177429, "learning_rate": 7.5167989207927806e-06, "loss": 0.0955, "step": 27790 }, { "epoch": 13.008893049379827, "grad_norm": 0.6461662650108337, "learning_rate": 7.509256815229687e-06, "loss": 0.1097, "step": 27800 }, { "epoch": 13.013573601685, "grad_norm": 0.6470386981964111, "learning_rate": 7.501718759217097e-06, "loss": 0.122, "step": 27810 }, { "epoch": 13.01825415399017, "grad_norm": 0.7956523299217224, "learning_rate": 7.4941847576463605e-06, "loss": 0.1056, "step": 27820 }, { "epoch": 13.022934706295343, "grad_norm": 0.5096884965896606, "learning_rate": 7.486654815406219e-06, "loss": 0.1109, "step": 27830 }, { "epoch": 13.027615258600514, "grad_norm": 0.8408272862434387, "learning_rate": 7.479128937382768e-06, "loss": 0.0996, "step": 27840 }, { "epoch": 13.032295810905687, "grad_norm": 0.531853973865509, "learning_rate": 7.471607128459471e-06, "loss": 0.1266, "step": 27850 }, { "epoch": 13.03697636321086, "grad_norm": 0.5832664370536804, "learning_rate": 7.46408939351714e-06, "loss": 0.1078, "step": 27860 }, { "epoch": 13.04165691551603, "grad_norm": 1.0546294450759888, "learning_rate": 7.456575737433956e-06, "loss": 0.1006, "step": 27870 }, { "epoch": 13.046337467821203, "grad_norm": 0.6204822659492493, "learning_rate": 7.449066165085454e-06, "loss": 0.1118, "step": 27880 }, { "epoch": 13.051018020126374, "grad_norm": 0.47667157649993896, "learning_rate": 7.441560681344504e-06, "loss": 0.1016, "step": 27890 }, { "epoch": 13.055698572431547, "grad_norm": 0.7200061678886414, "learning_rate": 7.434059291081341e-06, "loss": 0.1077, "step": 27900 }, { "epoch": 13.06037912473672, "grad_norm": 0.6958701014518738, "learning_rate": 7.426561999163534e-06, "loss": 0.1165, "step": 27910 }, { "epoch": 13.06505967704189, "grad_norm": 0.4830981492996216, "learning_rate": 7.419068810455991e-06, "loss": 0.1011, "step": 27920 }, { "epoch": 13.069740229347063, "grad_norm": 0.5759109258651733, "learning_rate": 7.411579729820961e-06, "loss": 0.1155, "step": 27930 }, { "epoch": 13.074420781652234, "grad_norm": 0.5646435618400574, "learning_rate": 7.40409476211803e-06, "loss": 0.1119, "step": 27940 }, { "epoch": 13.079101333957407, "grad_norm": 0.493308424949646, "learning_rate": 7.396613912204117e-06, "loss": 0.108, "step": 27950 }, { "epoch": 13.083781886262578, "grad_norm": 0.5212733745574951, "learning_rate": 7.389137184933449e-06, "loss": 0.1135, "step": 27960 }, { "epoch": 13.08846243856775, "grad_norm": 0.5449308156967163, "learning_rate": 7.3816645851576105e-06, "loss": 0.1182, "step": 27970 }, { "epoch": 13.093142990872924, "grad_norm": 0.9705421924591064, "learning_rate": 7.374196117725484e-06, "loss": 0.1181, "step": 27980 }, { "epoch": 13.097823543178094, "grad_norm": 0.520936131477356, "learning_rate": 7.3667317874832665e-06, "loss": 0.1031, "step": 27990 }, { "epoch": 13.102504095483267, "grad_norm": 0.6363201141357422, "learning_rate": 7.359271599274499e-06, "loss": 0.1097, "step": 28000 }, { "epoch": 13.107184647788438, "grad_norm": 0.555008053779602, "learning_rate": 7.351815557940003e-06, "loss": 0.1209, "step": 28010 }, { "epoch": 13.111865200093611, "grad_norm": 0.5021818280220032, "learning_rate": 7.344363668317927e-06, "loss": 0.1174, "step": 28020 }, { "epoch": 13.116545752398784, "grad_norm": 0.6422640085220337, "learning_rate": 7.336915935243727e-06, "loss": 0.1301, "step": 28030 }, { "epoch": 13.121226304703955, "grad_norm": 0.6254779100418091, "learning_rate": 7.329472363550145e-06, "loss": 0.1058, "step": 28040 }, { "epoch": 13.125906857009127, "grad_norm": 0.5680731534957886, "learning_rate": 7.322032958067241e-06, "loss": 0.1176, "step": 28050 }, { "epoch": 13.130587409314298, "grad_norm": 0.9283885955810547, "learning_rate": 7.314597723622363e-06, "loss": 0.1286, "step": 28060 }, { "epoch": 13.135267961619471, "grad_norm": 0.6364361047744751, "learning_rate": 7.307166665040156e-06, "loss": 0.1029, "step": 28070 }, { "epoch": 13.139948513924644, "grad_norm": 0.5144299268722534, "learning_rate": 7.299739787142548e-06, "loss": 0.117, "step": 28080 }, { "epoch": 13.144629066229815, "grad_norm": 0.5765814185142517, "learning_rate": 7.292317094748763e-06, "loss": 0.1028, "step": 28090 }, { "epoch": 13.149309618534987, "grad_norm": 0.5421374440193176, "learning_rate": 7.28489859267531e-06, "loss": 0.1037, "step": 28100 }, { "epoch": 13.153990170840158, "grad_norm": 0.687637209892273, "learning_rate": 7.277484285735965e-06, "loss": 0.1112, "step": 28110 }, { "epoch": 13.158670723145331, "grad_norm": 0.5480566024780273, "learning_rate": 7.270074178741797e-06, "loss": 0.1211, "step": 28120 }, { "epoch": 13.163351275450504, "grad_norm": 0.6252624988555908, "learning_rate": 7.262668276501144e-06, "loss": 0.1051, "step": 28130 }, { "epoch": 13.168031827755675, "grad_norm": 0.5819523334503174, "learning_rate": 7.255266583819613e-06, "loss": 0.1204, "step": 28140 }, { "epoch": 13.172712380060847, "grad_norm": 0.6495111584663391, "learning_rate": 7.247869105500082e-06, "loss": 0.1056, "step": 28150 }, { "epoch": 13.177392932366018, "grad_norm": 0.5784812569618225, "learning_rate": 7.240475846342698e-06, "loss": 0.108, "step": 28160 }, { "epoch": 13.182073484671191, "grad_norm": 0.6435672640800476, "learning_rate": 7.2330868111448655e-06, "loss": 0.099, "step": 28170 }, { "epoch": 13.186754036976364, "grad_norm": 0.7394602298736572, "learning_rate": 7.225702004701243e-06, "loss": 0.1129, "step": 28180 }, { "epoch": 13.191434589281535, "grad_norm": 0.5468359589576721, "learning_rate": 7.218321431803763e-06, "loss": 0.102, "step": 28190 }, { "epoch": 13.196115141586708, "grad_norm": 0.638592541217804, "learning_rate": 7.210945097241592e-06, "loss": 0.1265, "step": 28200 }, { "epoch": 13.200795693891878, "grad_norm": 0.9249528050422668, "learning_rate": 7.203573005801145e-06, "loss": 0.1228, "step": 28210 }, { "epoch": 13.205476246197051, "grad_norm": 0.49686092138290405, "learning_rate": 7.196205162266112e-06, "loss": 0.1139, "step": 28220 }, { "epoch": 13.210156798502224, "grad_norm": 0.49316367506980896, "learning_rate": 7.18884157141739e-06, "loss": 0.1065, "step": 28230 }, { "epoch": 13.214837350807395, "grad_norm": 0.6646007299423218, "learning_rate": 7.181482238033138e-06, "loss": 0.1011, "step": 28240 }, { "epoch": 13.219517903112568, "grad_norm": 0.5297843217849731, "learning_rate": 7.174127166888753e-06, "loss": 0.0966, "step": 28250 }, { "epoch": 13.224198455417739, "grad_norm": 0.6464483737945557, "learning_rate": 7.166776362756852e-06, "loss": 0.1109, "step": 28260 }, { "epoch": 13.228879007722911, "grad_norm": 0.8940072059631348, "learning_rate": 7.159429830407298e-06, "loss": 0.1193, "step": 28270 }, { "epoch": 13.233559560028084, "grad_norm": 0.6095129251480103, "learning_rate": 7.1520875746071716e-06, "loss": 0.1202, "step": 28280 }, { "epoch": 13.238240112333255, "grad_norm": 0.7277082800865173, "learning_rate": 7.144749600120791e-06, "loss": 0.1117, "step": 28290 }, { "epoch": 13.242920664638428, "grad_norm": 0.5721502900123596, "learning_rate": 7.13741591170968e-06, "loss": 0.1162, "step": 28300 }, { "epoch": 13.247601216943599, "grad_norm": 0.535836935043335, "learning_rate": 7.130086514132589e-06, "loss": 0.1013, "step": 28310 }, { "epoch": 13.252281769248771, "grad_norm": 0.7994971871376038, "learning_rate": 7.122761412145493e-06, "loss": 0.1124, "step": 28320 }, { "epoch": 13.256962321553944, "grad_norm": 0.5976752638816833, "learning_rate": 7.115440610501563e-06, "loss": 0.1243, "step": 28330 }, { "epoch": 13.261642873859115, "grad_norm": 0.5184476971626282, "learning_rate": 7.108124113951188e-06, "loss": 0.1106, "step": 28340 }, { "epoch": 13.266323426164288, "grad_norm": 0.536942183971405, "learning_rate": 7.100811927241972e-06, "loss": 0.128, "step": 28350 }, { "epoch": 13.271003978469459, "grad_norm": 0.5638306140899658, "learning_rate": 7.093504055118702e-06, "loss": 0.0966, "step": 28360 }, { "epoch": 13.275684530774631, "grad_norm": 0.5139361619949341, "learning_rate": 7.086200502323382e-06, "loss": 0.106, "step": 28370 }, { "epoch": 13.280365083079804, "grad_norm": 0.6195312142372131, "learning_rate": 7.078901273595209e-06, "loss": 0.1228, "step": 28380 }, { "epoch": 13.285045635384975, "grad_norm": 0.48559871315956116, "learning_rate": 7.071606373670575e-06, "loss": 0.1076, "step": 28390 }, { "epoch": 13.289726187690148, "grad_norm": 0.4513740539550781, "learning_rate": 7.064315807283056e-06, "loss": 0.1116, "step": 28400 }, { "epoch": 13.294406739995319, "grad_norm": 0.4678407311439514, "learning_rate": 7.057029579163424e-06, "loss": 0.1116, "step": 28410 }, { "epoch": 13.299087292300491, "grad_norm": 0.5845938324928284, "learning_rate": 7.049747694039635e-06, "loss": 0.1106, "step": 28420 }, { "epoch": 13.303767844605664, "grad_norm": 0.47690558433532715, "learning_rate": 7.042470156636821e-06, "loss": 0.1087, "step": 28430 }, { "epoch": 13.308448396910835, "grad_norm": 1.0844213962554932, "learning_rate": 7.035196971677305e-06, "loss": 0.1073, "step": 28440 }, { "epoch": 13.313128949216008, "grad_norm": 0.5102370381355286, "learning_rate": 7.027928143880569e-06, "loss": 0.1201, "step": 28450 }, { "epoch": 13.317809501521179, "grad_norm": 0.5280824899673462, "learning_rate": 7.0206636779632796e-06, "loss": 0.107, "step": 28460 }, { "epoch": 13.322490053826352, "grad_norm": 0.5454416871070862, "learning_rate": 7.013403578639274e-06, "loss": 0.1131, "step": 28470 }, { "epoch": 13.327170606131524, "grad_norm": 0.6761260628700256, "learning_rate": 7.0061478506195464e-06, "loss": 0.1118, "step": 28480 }, { "epoch": 13.331851158436695, "grad_norm": 0.6813755631446838, "learning_rate": 6.998896498612257e-06, "loss": 0.0971, "step": 28490 }, { "epoch": 13.336531710741868, "grad_norm": 0.7736191153526306, "learning_rate": 6.9916495273227355e-06, "loss": 0.1045, "step": 28500 }, { "epoch": 13.341212263047039, "grad_norm": 0.5244203209877014, "learning_rate": 6.9844069414534685e-06, "loss": 0.1035, "step": 28510 }, { "epoch": 13.345892815352212, "grad_norm": 0.5961300134658813, "learning_rate": 6.977168745704077e-06, "loss": 0.104, "step": 28520 }, { "epoch": 13.350573367657384, "grad_norm": 0.5614168643951416, "learning_rate": 6.969934944771356e-06, "loss": 0.1139, "step": 28530 }, { "epoch": 13.355253919962555, "grad_norm": 0.600210428237915, "learning_rate": 6.962705543349242e-06, "loss": 0.1188, "step": 28540 }, { "epoch": 13.359934472267728, "grad_norm": 0.4537530243396759, "learning_rate": 6.955480546128807e-06, "loss": 0.1223, "step": 28550 }, { "epoch": 13.364615024572899, "grad_norm": 0.6134674549102783, "learning_rate": 6.948259957798284e-06, "loss": 0.1039, "step": 28560 }, { "epoch": 13.369295576878072, "grad_norm": 0.7559189200401306, "learning_rate": 6.94104378304303e-06, "loss": 0.1092, "step": 28570 }, { "epoch": 13.373976129183244, "grad_norm": 0.6068590879440308, "learning_rate": 6.933832026545534e-06, "loss": 0.1272, "step": 28580 }, { "epoch": 13.378656681488415, "grad_norm": 0.7367731332778931, "learning_rate": 6.926624692985439e-06, "loss": 0.1092, "step": 28590 }, { "epoch": 13.383337233793588, "grad_norm": 0.5876941680908203, "learning_rate": 6.919421787039499e-06, "loss": 0.1137, "step": 28600 }, { "epoch": 13.388017786098759, "grad_norm": 0.6520528793334961, "learning_rate": 6.9122233133816026e-06, "loss": 0.1076, "step": 28610 }, { "epoch": 13.392698338403932, "grad_norm": 0.6459787487983704, "learning_rate": 6.905029276682766e-06, "loss": 0.1187, "step": 28620 }, { "epoch": 13.397378890709104, "grad_norm": 0.6383126378059387, "learning_rate": 6.897839681611113e-06, "loss": 0.0964, "step": 28630 }, { "epoch": 13.402059443014275, "grad_norm": 0.5247782468795776, "learning_rate": 6.890654532831897e-06, "loss": 0.1067, "step": 28640 }, { "epoch": 13.406739995319448, "grad_norm": 0.5247334241867065, "learning_rate": 6.883473835007484e-06, "loss": 0.1156, "step": 28650 }, { "epoch": 13.411420547624619, "grad_norm": 0.9214664101600647, "learning_rate": 6.876297592797354e-06, "loss": 0.1255, "step": 28660 }, { "epoch": 13.416101099929792, "grad_norm": 0.4936921298503876, "learning_rate": 6.869125810858086e-06, "loss": 0.108, "step": 28670 }, { "epoch": 13.420781652234965, "grad_norm": 0.6550752520561218, "learning_rate": 6.8619584938433736e-06, "loss": 0.1136, "step": 28680 }, { "epoch": 13.425462204540136, "grad_norm": 0.6983242034912109, "learning_rate": 6.8547956464040174e-06, "loss": 0.1115, "step": 28690 }, { "epoch": 13.430142756845308, "grad_norm": 0.8945489525794983, "learning_rate": 6.847637273187901e-06, "loss": 0.1136, "step": 28700 }, { "epoch": 13.43482330915048, "grad_norm": 0.5918654203414917, "learning_rate": 6.840483378840021e-06, "loss": 0.1046, "step": 28710 }, { "epoch": 13.439503861455652, "grad_norm": 0.7428432703018188, "learning_rate": 6.833333968002461e-06, "loss": 0.1215, "step": 28720 }, { "epoch": 13.444184413760825, "grad_norm": 0.6372256278991699, "learning_rate": 6.8261890453144e-06, "loss": 0.1096, "step": 28730 }, { "epoch": 13.448864966065996, "grad_norm": 0.6265025734901428, "learning_rate": 6.8190486154120935e-06, "loss": 0.1043, "step": 28740 }, { "epoch": 13.453545518371168, "grad_norm": 0.5585861206054688, "learning_rate": 6.811912682928891e-06, "loss": 0.1218, "step": 28750 }, { "epoch": 13.45822607067634, "grad_norm": 0.5812614560127258, "learning_rate": 6.804781252495226e-06, "loss": 0.1152, "step": 28760 }, { "epoch": 13.462906622981512, "grad_norm": 0.7227446436882019, "learning_rate": 6.797654328738598e-06, "loss": 0.1041, "step": 28770 }, { "epoch": 13.467587175286685, "grad_norm": 0.8678969144821167, "learning_rate": 6.790531916283602e-06, "loss": 0.1074, "step": 28780 }, { "epoch": 13.472267727591856, "grad_norm": 0.6077746748924255, "learning_rate": 6.783414019751887e-06, "loss": 0.098, "step": 28790 }, { "epoch": 13.476948279897028, "grad_norm": 0.7684412002563477, "learning_rate": 6.776300643762171e-06, "loss": 0.1051, "step": 28800 }, { "epoch": 13.4816288322022, "grad_norm": 0.6422973275184631, "learning_rate": 6.769191792930263e-06, "loss": 0.1093, "step": 28810 }, { "epoch": 13.486309384507372, "grad_norm": 1.416799545288086, "learning_rate": 6.762087471869009e-06, "loss": 0.1104, "step": 28820 }, { "epoch": 13.490989936812543, "grad_norm": 0.6060672998428345, "learning_rate": 6.7549876851883274e-06, "loss": 0.1177, "step": 28830 }, { "epoch": 13.495670489117716, "grad_norm": 0.862733781337738, "learning_rate": 6.747892437495196e-06, "loss": 0.1107, "step": 28840 }, { "epoch": 13.500351041422888, "grad_norm": 0.602660059928894, "learning_rate": 6.740801733393637e-06, "loss": 0.1035, "step": 28850 }, { "epoch": 13.50503159372806, "grad_norm": 0.5294807553291321, "learning_rate": 6.733715577484736e-06, "loss": 0.0979, "step": 28860 }, { "epoch": 13.509712146033232, "grad_norm": 0.7949925661087036, "learning_rate": 6.726633974366622e-06, "loss": 0.1109, "step": 28870 }, { "epoch": 13.514392698338405, "grad_norm": 1.0088398456573486, "learning_rate": 6.719556928634474e-06, "loss": 0.1227, "step": 28880 }, { "epoch": 13.519073250643576, "grad_norm": 1.0863347053527832, "learning_rate": 6.712484444880504e-06, "loss": 0.1286, "step": 28890 }, { "epoch": 13.523753802948749, "grad_norm": 0.4877675473690033, "learning_rate": 6.705416527693972e-06, "loss": 0.1115, "step": 28900 }, { "epoch": 13.52843435525392, "grad_norm": 0.5916874408721924, "learning_rate": 6.6983531816611765e-06, "loss": 0.1005, "step": 28910 }, { "epoch": 13.533114907559092, "grad_norm": 0.5387170314788818, "learning_rate": 6.691294411365438e-06, "loss": 0.1111, "step": 28920 }, { "epoch": 13.537795459864263, "grad_norm": 0.6782585382461548, "learning_rate": 6.6842402213871224e-06, "loss": 0.1101, "step": 28930 }, { "epoch": 13.542476012169436, "grad_norm": 0.48551028966903687, "learning_rate": 6.677190616303614e-06, "loss": 0.1115, "step": 28940 }, { "epoch": 13.547156564474609, "grad_norm": 0.49522969126701355, "learning_rate": 6.670145600689332e-06, "loss": 0.105, "step": 28950 }, { "epoch": 13.55183711677978, "grad_norm": 0.6549906730651855, "learning_rate": 6.6631051791157005e-06, "loss": 0.1058, "step": 28960 }, { "epoch": 13.556517669084952, "grad_norm": 0.5765702724456787, "learning_rate": 6.656069356151176e-06, "loss": 0.1046, "step": 28970 }, { "epoch": 13.561198221390123, "grad_norm": 0.5478165149688721, "learning_rate": 6.649038136361231e-06, "loss": 0.1065, "step": 28980 }, { "epoch": 13.565878773695296, "grad_norm": 0.857391893863678, "learning_rate": 6.6420115243083395e-06, "loss": 0.1116, "step": 28990 }, { "epoch": 13.570559326000469, "grad_norm": 0.5299837589263916, "learning_rate": 6.634989524552007e-06, "loss": 0.1069, "step": 29000 }, { "epoch": 13.57523987830564, "grad_norm": 0.6117365956306458, "learning_rate": 6.627972141648725e-06, "loss": 0.0987, "step": 29010 }, { "epoch": 13.579920430610812, "grad_norm": 0.5605888962745667, "learning_rate": 6.620959380151991e-06, "loss": 0.1065, "step": 29020 }, { "epoch": 13.584600982915983, "grad_norm": 0.5516433119773865, "learning_rate": 6.613951244612324e-06, "loss": 0.108, "step": 29030 }, { "epoch": 13.589281535221156, "grad_norm": 0.6630405187606812, "learning_rate": 6.6069477395772155e-06, "loss": 0.0975, "step": 29040 }, { "epoch": 13.593962087526329, "grad_norm": 0.5825487971305847, "learning_rate": 6.599948869591166e-06, "loss": 0.1147, "step": 29050 }, { "epoch": 13.5986426398315, "grad_norm": 0.5380171537399292, "learning_rate": 6.592954639195673e-06, "loss": 0.1204, "step": 29060 }, { "epoch": 13.603323192136672, "grad_norm": 0.508778989315033, "learning_rate": 6.585965052929208e-06, "loss": 0.1018, "step": 29070 }, { "epoch": 13.608003744441843, "grad_norm": 0.5284372568130493, "learning_rate": 6.578980115327241e-06, "loss": 0.1134, "step": 29080 }, { "epoch": 13.612684296747016, "grad_norm": 0.6346684098243713, "learning_rate": 6.571999830922224e-06, "loss": 0.1123, "step": 29090 }, { "epoch": 13.617364849052189, "grad_norm": 0.7146583199501038, "learning_rate": 6.565024204243589e-06, "loss": 0.1124, "step": 29100 }, { "epoch": 13.62204540135736, "grad_norm": 0.6061592102050781, "learning_rate": 6.558053239817738e-06, "loss": 0.1141, "step": 29110 }, { "epoch": 13.626725953662532, "grad_norm": 0.6127926707267761, "learning_rate": 6.5510869421680565e-06, "loss": 0.1093, "step": 29120 }, { "epoch": 13.631406505967703, "grad_norm": 0.4976590573787689, "learning_rate": 6.544125315814906e-06, "loss": 0.1153, "step": 29130 }, { "epoch": 13.636087058272876, "grad_norm": 0.9539696574211121, "learning_rate": 6.5371683652756e-06, "loss": 0.1112, "step": 29140 }, { "epoch": 13.640767610578049, "grad_norm": 1.128508448600769, "learning_rate": 6.530216095064434e-06, "loss": 0.0982, "step": 29150 }, { "epoch": 13.64544816288322, "grad_norm": 0.6392227411270142, "learning_rate": 6.523268509692659e-06, "loss": 0.1023, "step": 29160 }, { "epoch": 13.650128715188393, "grad_norm": 0.6067737936973572, "learning_rate": 6.516325613668495e-06, "loss": 0.1066, "step": 29170 }, { "epoch": 13.654809267493563, "grad_norm": 0.6290593147277832, "learning_rate": 6.509387411497099e-06, "loss": 0.0937, "step": 29180 }, { "epoch": 13.659489819798736, "grad_norm": 0.5201486349105835, "learning_rate": 6.502453907680603e-06, "loss": 0.1087, "step": 29190 }, { "epoch": 13.664170372103909, "grad_norm": 0.7882075905799866, "learning_rate": 6.495525106718084e-06, "loss": 0.1174, "step": 29200 }, { "epoch": 13.66885092440908, "grad_norm": 0.8832878470420837, "learning_rate": 6.488601013105555e-06, "loss": 0.1088, "step": 29210 }, { "epoch": 13.673531476714253, "grad_norm": 0.5483162999153137, "learning_rate": 6.481681631336001e-06, "loss": 0.1051, "step": 29220 }, { "epoch": 13.678212029019424, "grad_norm": 0.5229132771492004, "learning_rate": 6.474766965899323e-06, "loss": 0.1, "step": 29230 }, { "epoch": 13.682892581324596, "grad_norm": 1.0346457958221436, "learning_rate": 6.46785702128237e-06, "loss": 0.1169, "step": 29240 }, { "epoch": 13.687573133629769, "grad_norm": 0.8548501133918762, "learning_rate": 6.46095180196894e-06, "loss": 0.1156, "step": 29250 }, { "epoch": 13.69225368593494, "grad_norm": 0.5056101679801941, "learning_rate": 6.454051312439749e-06, "loss": 0.1018, "step": 29260 }, { "epoch": 13.696934238240113, "grad_norm": 0.5236080884933472, "learning_rate": 6.447155557172446e-06, "loss": 0.1068, "step": 29270 }, { "epoch": 13.701614790545284, "grad_norm": 0.9611905217170715, "learning_rate": 6.440264540641625e-06, "loss": 0.111, "step": 29280 }, { "epoch": 13.706295342850456, "grad_norm": 0.9217109084129333, "learning_rate": 6.433378267318775e-06, "loss": 0.0981, "step": 29290 }, { "epoch": 13.710975895155629, "grad_norm": 0.46564093232154846, "learning_rate": 6.426496741672333e-06, "loss": 0.1081, "step": 29300 }, { "epoch": 13.7156564474608, "grad_norm": 0.5618085861206055, "learning_rate": 6.419619968167646e-06, "loss": 0.1092, "step": 29310 }, { "epoch": 13.720336999765973, "grad_norm": 0.6348780989646912, "learning_rate": 6.41274795126698e-06, "loss": 0.1078, "step": 29320 }, { "epoch": 13.725017552071144, "grad_norm": 0.5360630750656128, "learning_rate": 6.405880695429506e-06, "loss": 0.1123, "step": 29330 }, { "epoch": 13.729698104376316, "grad_norm": 0.5570269823074341, "learning_rate": 6.399018205111314e-06, "loss": 0.118, "step": 29340 }, { "epoch": 13.73437865668149, "grad_norm": 0.6389529705047607, "learning_rate": 6.392160484765404e-06, "loss": 0.1043, "step": 29350 }, { "epoch": 13.73905920898666, "grad_norm": 0.6004288792610168, "learning_rate": 6.38530753884167e-06, "loss": 0.1061, "step": 29360 }, { "epoch": 13.743739761291833, "grad_norm": 0.5381559133529663, "learning_rate": 6.378459371786916e-06, "loss": 0.1117, "step": 29370 }, { "epoch": 13.748420313597004, "grad_norm": 0.6852381229400635, "learning_rate": 6.371615988044846e-06, "loss": 0.1147, "step": 29380 }, { "epoch": 13.753100865902177, "grad_norm": 0.547281801700592, "learning_rate": 6.364777392056054e-06, "loss": 0.1121, "step": 29390 }, { "epoch": 13.75778141820735, "grad_norm": 0.4900471568107605, "learning_rate": 6.357943588258031e-06, "loss": 0.1073, "step": 29400 }, { "epoch": 13.76246197051252, "grad_norm": 0.6202996969223022, "learning_rate": 6.35111458108516e-06, "loss": 0.1072, "step": 29410 }, { "epoch": 13.767142522817693, "grad_norm": 0.6086934208869934, "learning_rate": 6.344290374968711e-06, "loss": 0.1197, "step": 29420 }, { "epoch": 13.771823075122864, "grad_norm": 0.4860411286354065, "learning_rate": 6.337470974336828e-06, "loss": 0.0954, "step": 29430 }, { "epoch": 13.776503627428037, "grad_norm": 1.0715433359146118, "learning_rate": 6.3306563836145614e-06, "loss": 0.1108, "step": 29440 }, { "epoch": 13.78118417973321, "grad_norm": 0.6682178378105164, "learning_rate": 6.3238466072238154e-06, "loss": 0.1029, "step": 29450 }, { "epoch": 13.78586473203838, "grad_norm": 0.8601731061935425, "learning_rate": 6.317041649583376e-06, "loss": 0.1131, "step": 29460 }, { "epoch": 13.790545284343553, "grad_norm": 0.5755960941314697, "learning_rate": 6.310241515108919e-06, "loss": 0.1048, "step": 29470 }, { "epoch": 13.795225836648724, "grad_norm": 0.49190545082092285, "learning_rate": 6.303446208212966e-06, "loss": 0.1028, "step": 29480 }, { "epoch": 13.799906388953897, "grad_norm": 0.810870885848999, "learning_rate": 6.2966557333049205e-06, "loss": 0.1187, "step": 29490 }, { "epoch": 13.80458694125907, "grad_norm": 0.5147506594657898, "learning_rate": 6.2898700947910575e-06, "loss": 0.1133, "step": 29500 }, { "epoch": 13.80926749356424, "grad_norm": 0.4786587059497833, "learning_rate": 6.28308929707449e-06, "loss": 0.107, "step": 29510 }, { "epoch": 13.813948045869413, "grad_norm": 0.4752202332019806, "learning_rate": 6.276313344555213e-06, "loss": 0.1026, "step": 29520 }, { "epoch": 13.818628598174584, "grad_norm": 0.5831592082977295, "learning_rate": 6.269542241630064e-06, "loss": 0.1074, "step": 29530 }, { "epoch": 13.823309150479757, "grad_norm": 0.6276861429214478, "learning_rate": 6.262775992692743e-06, "loss": 0.1156, "step": 29540 }, { "epoch": 13.82798970278493, "grad_norm": 0.589860200881958, "learning_rate": 6.256014602133789e-06, "loss": 0.1135, "step": 29550 }, { "epoch": 13.8326702550901, "grad_norm": 0.5283031463623047, "learning_rate": 6.249258074340601e-06, "loss": 0.1147, "step": 29560 }, { "epoch": 13.837350807395273, "grad_norm": 0.665964663028717, "learning_rate": 6.2425064136974175e-06, "loss": 0.1042, "step": 29570 }, { "epoch": 13.842031359700444, "grad_norm": 0.8763574361801147, "learning_rate": 6.23575962458531e-06, "loss": 0.1124, "step": 29580 }, { "epoch": 13.846711912005617, "grad_norm": 0.7180102467536926, "learning_rate": 6.2290177113822e-06, "loss": 0.1177, "step": 29590 }, { "epoch": 13.851392464310788, "grad_norm": 0.9548866152763367, "learning_rate": 6.2222806784628486e-06, "loss": 0.1051, "step": 29600 }, { "epoch": 13.85607301661596, "grad_norm": 0.588001549243927, "learning_rate": 6.215548530198833e-06, "loss": 0.1073, "step": 29610 }, { "epoch": 13.860753568921133, "grad_norm": 0.5882920026779175, "learning_rate": 6.208821270958576e-06, "loss": 0.1011, "step": 29620 }, { "epoch": 13.865434121226304, "grad_norm": 0.5205572247505188, "learning_rate": 6.202098905107324e-06, "loss": 0.1163, "step": 29630 }, { "epoch": 13.870114673531477, "grad_norm": 0.5789608955383301, "learning_rate": 6.195381437007145e-06, "loss": 0.1172, "step": 29640 }, { "epoch": 13.87479522583665, "grad_norm": 1.136304259300232, "learning_rate": 6.188668871016931e-06, "loss": 0.1022, "step": 29650 }, { "epoch": 13.87947577814182, "grad_norm": 0.8274648785591125, "learning_rate": 6.181961211492401e-06, "loss": 0.1224, "step": 29660 }, { "epoch": 13.884156330446993, "grad_norm": 0.5341648459434509, "learning_rate": 6.175258462786071e-06, "loss": 0.1053, "step": 29670 }, { "epoch": 13.888836882752164, "grad_norm": 0.5087756514549255, "learning_rate": 6.168560629247286e-06, "loss": 0.1096, "step": 29680 }, { "epoch": 13.893517435057337, "grad_norm": 1.2457634210586548, "learning_rate": 6.1618677152222014e-06, "loss": 0.1049, "step": 29690 }, { "epoch": 13.898197987362508, "grad_norm": 0.5195053219795227, "learning_rate": 6.1551797250537705e-06, "loss": 0.0937, "step": 29700 }, { "epoch": 13.90287853966768, "grad_norm": 0.5403427481651306, "learning_rate": 6.148496663081759e-06, "loss": 0.1081, "step": 29710 }, { "epoch": 13.907559091972853, "grad_norm": 0.4837659001350403, "learning_rate": 6.141818533642739e-06, "loss": 0.1067, "step": 29720 }, { "epoch": 13.912239644278024, "grad_norm": 0.5239707827568054, "learning_rate": 6.135145341070068e-06, "loss": 0.1048, "step": 29730 }, { "epoch": 13.916920196583197, "grad_norm": 0.6101850867271423, "learning_rate": 6.1284770896939115e-06, "loss": 0.0986, "step": 29740 }, { "epoch": 13.92160074888837, "grad_norm": 0.5562407374382019, "learning_rate": 6.1218137838412275e-06, "loss": 0.1004, "step": 29750 }, { "epoch": 13.92628130119354, "grad_norm": 0.9209245443344116, "learning_rate": 6.1151554278357646e-06, "loss": 0.1094, "step": 29760 }, { "epoch": 13.930961853498713, "grad_norm": 0.8354862928390503, "learning_rate": 6.108502025998049e-06, "loss": 0.1067, "step": 29770 }, { "epoch": 13.935642405803884, "grad_norm": 0.5126202702522278, "learning_rate": 6.101853582645406e-06, "loss": 0.1044, "step": 29780 }, { "epoch": 13.940322958109057, "grad_norm": 0.5262435674667358, "learning_rate": 6.095210102091943e-06, "loss": 0.0997, "step": 29790 }, { "epoch": 13.945003510414228, "grad_norm": 1.1300545930862427, "learning_rate": 6.088571588648531e-06, "loss": 0.1007, "step": 29800 }, { "epoch": 13.9496840627194, "grad_norm": 0.6726383566856384, "learning_rate": 6.081938046622842e-06, "loss": 0.1046, "step": 29810 }, { "epoch": 13.954364615024573, "grad_norm": 0.633969247341156, "learning_rate": 6.075309480319305e-06, "loss": 0.1222, "step": 29820 }, { "epoch": 13.959045167329744, "grad_norm": 0.5877478122711182, "learning_rate": 6.068685894039115e-06, "loss": 0.1051, "step": 29830 }, { "epoch": 13.963725719634917, "grad_norm": 0.497861385345459, "learning_rate": 6.062067292080263e-06, "loss": 0.0912, "step": 29840 }, { "epoch": 13.968406271940088, "grad_norm": 0.8151667714118958, "learning_rate": 6.055453678737472e-06, "loss": 0.1185, "step": 29850 }, { "epoch": 13.97308682424526, "grad_norm": 0.5649687051773071, "learning_rate": 6.048845058302252e-06, "loss": 0.1112, "step": 29860 }, { "epoch": 13.977767376550434, "grad_norm": 0.6636674404144287, "learning_rate": 6.042241435062867e-06, "loss": 0.1183, "step": 29870 }, { "epoch": 13.982447928855605, "grad_norm": 0.4948883354663849, "learning_rate": 6.035642813304328e-06, "loss": 0.1055, "step": 29880 }, { "epoch": 13.987128481160777, "grad_norm": 0.5379335880279541, "learning_rate": 6.0290491973084156e-06, "loss": 0.1056, "step": 29890 }, { "epoch": 13.991809033465948, "grad_norm": 0.4670410454273224, "learning_rate": 6.0224605913536534e-06, "loss": 0.1009, "step": 29900 }, { "epoch": 13.996489585771121, "grad_norm": 0.6212366819381714, "learning_rate": 6.0158769997153206e-06, "loss": 0.1063, "step": 29910 }, { "epoch": 14.000936110461035, "grad_norm": 0.6047713160514832, "learning_rate": 6.00929842666543e-06, "loss": 0.0932, "step": 29920 }, { "epoch": 14.005616662766206, "grad_norm": 0.5413215160369873, "learning_rate": 6.002724876472751e-06, "loss": 0.0945, "step": 29930 }, { "epoch": 14.010297215071379, "grad_norm": 0.541797935962677, "learning_rate": 5.996156353402792e-06, "loss": 0.1057, "step": 29940 }, { "epoch": 14.01497776737655, "grad_norm": 0.5656141638755798, "learning_rate": 5.98959286171779e-06, "loss": 0.1203, "step": 29950 }, { "epoch": 14.019658319681723, "grad_norm": 0.7349773645401001, "learning_rate": 5.983034405676727e-06, "loss": 0.096, "step": 29960 }, { "epoch": 14.024338871986895, "grad_norm": 0.5177677869796753, "learning_rate": 5.976480989535313e-06, "loss": 0.0999, "step": 29970 }, { "epoch": 14.029019424292066, "grad_norm": 0.4947598874568939, "learning_rate": 5.969932617545991e-06, "loss": 0.0837, "step": 29980 }, { "epoch": 14.033699976597239, "grad_norm": 0.558527946472168, "learning_rate": 5.963389293957925e-06, "loss": 0.1044, "step": 29990 }, { "epoch": 14.03838052890241, "grad_norm": 0.6315997242927551, "learning_rate": 5.956851023017008e-06, "loss": 0.092, "step": 30000 }, { "epoch": 14.043061081207583, "grad_norm": 0.567575991153717, "learning_rate": 5.950317808965855e-06, "loss": 0.0925, "step": 30010 }, { "epoch": 14.047741633512754, "grad_norm": 0.6923149228096008, "learning_rate": 5.94378965604379e-06, "loss": 0.1002, "step": 30020 }, { "epoch": 14.052422185817926, "grad_norm": 0.529094398021698, "learning_rate": 5.937266568486874e-06, "loss": 0.0838, "step": 30030 }, { "epoch": 14.057102738123099, "grad_norm": 0.502872109413147, "learning_rate": 5.930748550527859e-06, "loss": 0.0768, "step": 30040 }, { "epoch": 14.06178329042827, "grad_norm": 0.5799567699432373, "learning_rate": 5.9242356063962096e-06, "loss": 0.0912, "step": 30050 }, { "epoch": 14.066463842733443, "grad_norm": 0.6123964786529541, "learning_rate": 5.917727740318121e-06, "loss": 0.0863, "step": 30060 }, { "epoch": 14.071144395038614, "grad_norm": 0.794944167137146, "learning_rate": 5.911224956516463e-06, "loss": 0.1006, "step": 30070 }, { "epoch": 14.075824947343786, "grad_norm": 0.672288715839386, "learning_rate": 5.904727259210827e-06, "loss": 0.1047, "step": 30080 }, { "epoch": 14.08050549964896, "grad_norm": 0.40370047092437744, "learning_rate": 5.898234652617501e-06, "loss": 0.0865, "step": 30090 }, { "epoch": 14.08518605195413, "grad_norm": 0.7073696255683899, "learning_rate": 5.891747140949461e-06, "loss": 0.1019, "step": 30100 }, { "epoch": 14.089866604259303, "grad_norm": 0.6468791365623474, "learning_rate": 5.885264728416384e-06, "loss": 0.1012, "step": 30110 }, { "epoch": 14.094547156564474, "grad_norm": 0.662376344203949, "learning_rate": 5.878787419224639e-06, "loss": 0.1068, "step": 30120 }, { "epoch": 14.099227708869646, "grad_norm": 0.8490667343139648, "learning_rate": 5.872315217577284e-06, "loss": 0.0911, "step": 30130 }, { "epoch": 14.10390826117482, "grad_norm": 0.5086355209350586, "learning_rate": 5.865848127674054e-06, "loss": 0.0998, "step": 30140 }, { "epoch": 14.10858881347999, "grad_norm": 0.528449535369873, "learning_rate": 5.859386153711378e-06, "loss": 0.0836, "step": 30150 }, { "epoch": 14.113269365785163, "grad_norm": 0.6195168495178223, "learning_rate": 5.852929299882363e-06, "loss": 0.0921, "step": 30160 }, { "epoch": 14.117949918090334, "grad_norm": 0.4605462849140167, "learning_rate": 5.846477570376784e-06, "loss": 0.0972, "step": 30170 }, { "epoch": 14.122630470395507, "grad_norm": 0.5535330772399902, "learning_rate": 5.840030969381102e-06, "loss": 0.0979, "step": 30180 }, { "epoch": 14.12731102270068, "grad_norm": 0.656754195690155, "learning_rate": 5.833589501078447e-06, "loss": 0.1046, "step": 30190 }, { "epoch": 14.13199157500585, "grad_norm": 0.5995354652404785, "learning_rate": 5.827153169648622e-06, "loss": 0.1088, "step": 30200 }, { "epoch": 14.136672127311023, "grad_norm": 0.8066332936286926, "learning_rate": 5.8207219792680845e-06, "loss": 0.1013, "step": 30210 }, { "epoch": 14.141352679616194, "grad_norm": 0.46230894327163696, "learning_rate": 5.814295934109966e-06, "loss": 0.0873, "step": 30220 }, { "epoch": 14.146033231921367, "grad_norm": 0.9334045648574829, "learning_rate": 5.807875038344064e-06, "loss": 0.0886, "step": 30230 }, { "epoch": 14.15071378422654, "grad_norm": 0.5944771766662598, "learning_rate": 5.801459296136816e-06, "loss": 0.0887, "step": 30240 }, { "epoch": 14.15539433653171, "grad_norm": 0.5084375739097595, "learning_rate": 5.795048711651342e-06, "loss": 0.1118, "step": 30250 }, { "epoch": 14.160074888836883, "grad_norm": 0.5610840320587158, "learning_rate": 5.788643289047393e-06, "loss": 0.1112, "step": 30260 }, { "epoch": 14.164755441142054, "grad_norm": 0.6180558204650879, "learning_rate": 5.782243032481371e-06, "loss": 0.0893, "step": 30270 }, { "epoch": 14.169435993447227, "grad_norm": 0.5486648082733154, "learning_rate": 5.775847946106348e-06, "loss": 0.0945, "step": 30280 }, { "epoch": 14.1741165457524, "grad_norm": 0.6775556802749634, "learning_rate": 5.769458034072015e-06, "loss": 0.1071, "step": 30290 }, { "epoch": 14.17879709805757, "grad_norm": 0.7920193076133728, "learning_rate": 5.763073300524721e-06, "loss": 0.0993, "step": 30300 }, { "epoch": 14.183477650362743, "grad_norm": 0.5441699028015137, "learning_rate": 5.756693749607452e-06, "loss": 0.0889, "step": 30310 }, { "epoch": 14.188158202667914, "grad_norm": 0.5335842370986938, "learning_rate": 5.750319385459824e-06, "loss": 0.0897, "step": 30320 }, { "epoch": 14.192838754973087, "grad_norm": 0.7490527033805847, "learning_rate": 5.743950212218094e-06, "loss": 0.0913, "step": 30330 }, { "epoch": 14.19751930727826, "grad_norm": 0.5118102431297302, "learning_rate": 5.737586234015152e-06, "loss": 0.1051, "step": 30340 }, { "epoch": 14.20219985958343, "grad_norm": 0.5335443019866943, "learning_rate": 5.731227454980516e-06, "loss": 0.0982, "step": 30350 }, { "epoch": 14.206880411888603, "grad_norm": 0.6555582880973816, "learning_rate": 5.7248738792403225e-06, "loss": 0.104, "step": 30360 }, { "epoch": 14.211560964193774, "grad_norm": 0.7993584871292114, "learning_rate": 5.718525510917342e-06, "loss": 0.0967, "step": 30370 }, { "epoch": 14.216241516498947, "grad_norm": 0.5273666381835938, "learning_rate": 5.712182354130965e-06, "loss": 0.104, "step": 30380 }, { "epoch": 14.22092206880412, "grad_norm": 0.46554961800575256, "learning_rate": 5.705844412997189e-06, "loss": 0.0862, "step": 30390 }, { "epoch": 14.22560262110929, "grad_norm": 0.9679765105247498, "learning_rate": 5.6995116916286395e-06, "loss": 0.1043, "step": 30400 }, { "epoch": 14.230283173414463, "grad_norm": 0.5047277212142944, "learning_rate": 5.693184194134549e-06, "loss": 0.1039, "step": 30410 }, { "epoch": 14.234963725719634, "grad_norm": 0.6288118958473206, "learning_rate": 5.6868619246207685e-06, "loss": 0.0893, "step": 30420 }, { "epoch": 14.239644278024807, "grad_norm": 0.58355712890625, "learning_rate": 5.6805448871897396e-06, "loss": 0.1007, "step": 30430 }, { "epoch": 14.24432483032998, "grad_norm": 0.5598286986351013, "learning_rate": 5.674233085940526e-06, "loss": 0.1045, "step": 30440 }, { "epoch": 14.24900538263515, "grad_norm": 0.5576395392417908, "learning_rate": 5.66792652496879e-06, "loss": 0.095, "step": 30450 }, { "epoch": 14.253685934940323, "grad_norm": 0.5244736671447754, "learning_rate": 5.661625208366777e-06, "loss": 0.0995, "step": 30460 }, { "epoch": 14.258366487245494, "grad_norm": 0.5576403737068176, "learning_rate": 5.65532914022336e-06, "loss": 0.0978, "step": 30470 }, { "epoch": 14.263047039550667, "grad_norm": 0.6169469356536865, "learning_rate": 5.649038324623981e-06, "loss": 0.0815, "step": 30480 }, { "epoch": 14.26772759185584, "grad_norm": 0.5700797438621521, "learning_rate": 5.6427527656506755e-06, "loss": 0.0937, "step": 30490 }, { "epoch": 14.27240814416101, "grad_norm": 0.47132882475852966, "learning_rate": 5.6364724673820884e-06, "loss": 0.0873, "step": 30500 }, { "epoch": 14.277088696466183, "grad_norm": 0.7929307222366333, "learning_rate": 5.630197433893426e-06, "loss": 0.1023, "step": 30510 }, { "epoch": 14.281769248771354, "grad_norm": 0.5858623385429382, "learning_rate": 5.623927669256494e-06, "loss": 0.105, "step": 30520 }, { "epoch": 14.286449801076527, "grad_norm": 1.1676113605499268, "learning_rate": 5.617663177539677e-06, "loss": 0.0951, "step": 30530 }, { "epoch": 14.2911303533817, "grad_norm": 0.7878853678703308, "learning_rate": 5.611403962807929e-06, "loss": 0.0942, "step": 30540 }, { "epoch": 14.29581090568687, "grad_norm": 0.46795716881752014, "learning_rate": 5.605150029122791e-06, "loss": 0.0951, "step": 30550 }, { "epoch": 14.300491457992043, "grad_norm": 0.7570139169692993, "learning_rate": 5.59890138054237e-06, "loss": 0.1012, "step": 30560 }, { "epoch": 14.305172010297214, "grad_norm": 0.5063762068748474, "learning_rate": 5.5926580211213535e-06, "loss": 0.091, "step": 30570 }, { "epoch": 14.309852562602387, "grad_norm": 0.7474108934402466, "learning_rate": 5.5864199549109794e-06, "loss": 0.0959, "step": 30580 }, { "epoch": 14.31453311490756, "grad_norm": 0.4751439094543457, "learning_rate": 5.580187185959065e-06, "loss": 0.1033, "step": 30590 }, { "epoch": 14.31921366721273, "grad_norm": 0.5756643414497375, "learning_rate": 5.573959718309991e-06, "loss": 0.095, "step": 30600 }, { "epoch": 14.323894219517904, "grad_norm": 0.6345013380050659, "learning_rate": 5.567737556004687e-06, "loss": 0.0909, "step": 30610 }, { "epoch": 14.328574771823074, "grad_norm": 0.5873569250106812, "learning_rate": 5.56152070308065e-06, "loss": 0.1065, "step": 30620 }, { "epoch": 14.333255324128247, "grad_norm": 0.6074391603469849, "learning_rate": 5.555309163571927e-06, "loss": 0.0956, "step": 30630 }, { "epoch": 14.33793587643342, "grad_norm": 0.57746422290802, "learning_rate": 5.549102941509122e-06, "loss": 0.0898, "step": 30640 }, { "epoch": 14.34261642873859, "grad_norm": 0.7796055674552917, "learning_rate": 5.542902040919381e-06, "loss": 0.0874, "step": 30650 }, { "epoch": 14.347296981043764, "grad_norm": 0.6151593327522278, "learning_rate": 5.536706465826401e-06, "loss": 0.109, "step": 30660 }, { "epoch": 14.351977533348935, "grad_norm": 0.5001480579376221, "learning_rate": 5.5305162202504285e-06, "loss": 0.101, "step": 30670 }, { "epoch": 14.356658085654107, "grad_norm": 0.8041204214096069, "learning_rate": 5.524331308208236e-06, "loss": 0.1007, "step": 30680 }, { "epoch": 14.36133863795928, "grad_norm": 0.827435314655304, "learning_rate": 5.518151733713159e-06, "loss": 0.1106, "step": 30690 }, { "epoch": 14.366019190264451, "grad_norm": 0.6632537841796875, "learning_rate": 5.511977500775052e-06, "loss": 0.0972, "step": 30700 }, { "epoch": 14.370699742569624, "grad_norm": 0.5315645933151245, "learning_rate": 5.505808613400298e-06, "loss": 0.0953, "step": 30710 }, { "epoch": 14.375380294874795, "grad_norm": 0.4803508222103119, "learning_rate": 5.499645075591835e-06, "loss": 0.0939, "step": 30720 }, { "epoch": 14.380060847179967, "grad_norm": 0.5764497518539429, "learning_rate": 5.493486891349107e-06, "loss": 0.0933, "step": 30730 }, { "epoch": 14.38474139948514, "grad_norm": 0.6478898525238037, "learning_rate": 5.487334064668095e-06, "loss": 0.1063, "step": 30740 }, { "epoch": 14.389421951790311, "grad_norm": 0.7254159450531006, "learning_rate": 5.481186599541306e-06, "loss": 0.0931, "step": 30750 }, { "epoch": 14.394102504095484, "grad_norm": 0.5309240221977234, "learning_rate": 5.475044499957756e-06, "loss": 0.0984, "step": 30760 }, { "epoch": 14.398783056400655, "grad_norm": 0.6271471381187439, "learning_rate": 5.46890776990299e-06, "loss": 0.0995, "step": 30770 }, { "epoch": 14.403463608705827, "grad_norm": 0.5878830552101135, "learning_rate": 5.462776413359064e-06, "loss": 0.087, "step": 30780 }, { "epoch": 14.408144161011, "grad_norm": 0.5397071838378906, "learning_rate": 5.456650434304555e-06, "loss": 0.0873, "step": 30790 }, { "epoch": 14.412824713316171, "grad_norm": 0.4764270484447479, "learning_rate": 5.450529836714537e-06, "loss": 0.0938, "step": 30800 }, { "epoch": 14.417505265621344, "grad_norm": 0.45013973116874695, "learning_rate": 5.444414624560602e-06, "loss": 0.0887, "step": 30810 }, { "epoch": 14.422185817926515, "grad_norm": 0.5932868123054504, "learning_rate": 5.438304801810847e-06, "loss": 0.1033, "step": 30820 }, { "epoch": 14.426866370231687, "grad_norm": 0.5468652844429016, "learning_rate": 5.432200372429863e-06, "loss": 0.0885, "step": 30830 }, { "epoch": 14.43154692253686, "grad_norm": 0.6420567035675049, "learning_rate": 5.426101340378753e-06, "loss": 0.0905, "step": 30840 }, { "epoch": 14.436227474842031, "grad_norm": 0.4970882534980774, "learning_rate": 5.420007709615117e-06, "loss": 0.0879, "step": 30850 }, { "epoch": 14.440908027147204, "grad_norm": 0.5941027998924255, "learning_rate": 5.4139194840930355e-06, "loss": 0.1014, "step": 30860 }, { "epoch": 14.445588579452375, "grad_norm": 1.041183590888977, "learning_rate": 5.407836667763097e-06, "loss": 0.0877, "step": 30870 }, { "epoch": 14.450269131757548, "grad_norm": 0.5097264051437378, "learning_rate": 5.4017592645723776e-06, "loss": 0.0928, "step": 30880 }, { "epoch": 14.454949684062719, "grad_norm": 0.4802941083908081, "learning_rate": 5.395687278464434e-06, "loss": 0.0947, "step": 30890 }, { "epoch": 14.459630236367891, "grad_norm": 0.580028772354126, "learning_rate": 5.389620713379314e-06, "loss": 0.1064, "step": 30900 }, { "epoch": 14.464310788673064, "grad_norm": 0.8046427965164185, "learning_rate": 5.3835595732535485e-06, "loss": 0.0929, "step": 30910 }, { "epoch": 14.468991340978235, "grad_norm": 0.6295172572135925, "learning_rate": 5.377503862020142e-06, "loss": 0.097, "step": 30920 }, { "epoch": 14.473671893283408, "grad_norm": 0.5761168003082275, "learning_rate": 5.371453583608578e-06, "loss": 0.0862, "step": 30930 }, { "epoch": 14.47835244558858, "grad_norm": 0.6346575617790222, "learning_rate": 5.3654087419448235e-06, "loss": 0.114, "step": 30940 }, { "epoch": 14.483032997893751, "grad_norm": 0.5878459215164185, "learning_rate": 5.359369340951302e-06, "loss": 0.0931, "step": 30950 }, { "epoch": 14.487713550198924, "grad_norm": 0.5281968116760254, "learning_rate": 5.35333538454692e-06, "loss": 0.0817, "step": 30960 }, { "epoch": 14.492394102504095, "grad_norm": 0.56662517786026, "learning_rate": 5.347306876647049e-06, "loss": 0.0992, "step": 30970 }, { "epoch": 14.497074654809268, "grad_norm": 0.6261023283004761, "learning_rate": 5.341283821163514e-06, "loss": 0.1002, "step": 30980 }, { "epoch": 14.501755207114439, "grad_norm": 0.5538915395736694, "learning_rate": 5.335266222004618e-06, "loss": 0.0923, "step": 30990 }, { "epoch": 14.506435759419611, "grad_norm": 0.9364407062530518, "learning_rate": 5.329254083075112e-06, "loss": 0.0887, "step": 31000 }, { "epoch": 14.511116311724784, "grad_norm": 0.7059298157691956, "learning_rate": 5.3232474082762125e-06, "loss": 0.1069, "step": 31010 }, { "epoch": 14.515796864029955, "grad_norm": 0.6910147666931152, "learning_rate": 5.317246201505579e-06, "loss": 0.0961, "step": 31020 }, { "epoch": 14.520477416335128, "grad_norm": 0.5585756897926331, "learning_rate": 5.3112504666573306e-06, "loss": 0.096, "step": 31030 }, { "epoch": 14.525157968640299, "grad_norm": 0.5435622930526733, "learning_rate": 5.305260207622041e-06, "loss": 0.1086, "step": 31040 }, { "epoch": 14.529838520945471, "grad_norm": 0.6893687844276428, "learning_rate": 5.299275428286711e-06, "loss": 0.11, "step": 31050 }, { "epoch": 14.534519073250644, "grad_norm": 0.7173871397972107, "learning_rate": 5.2932961325348155e-06, "loss": 0.0938, "step": 31060 }, { "epoch": 14.539199625555815, "grad_norm": 0.7493114471435547, "learning_rate": 5.2873223242462455e-06, "loss": 0.1119, "step": 31070 }, { "epoch": 14.543880177860988, "grad_norm": 0.6733130216598511, "learning_rate": 5.281354007297336e-06, "loss": 0.0946, "step": 31080 }, { "epoch": 14.548560730166159, "grad_norm": 0.534204363822937, "learning_rate": 5.275391185560876e-06, "loss": 0.0996, "step": 31090 }, { "epoch": 14.553241282471332, "grad_norm": 0.556819498538971, "learning_rate": 5.269433862906062e-06, "loss": 0.0879, "step": 31100 }, { "epoch": 14.557921834776504, "grad_norm": 0.5296688675880432, "learning_rate": 5.2634820431985435e-06, "loss": 0.0937, "step": 31110 }, { "epoch": 14.562602387081675, "grad_norm": 0.5684268474578857, "learning_rate": 5.257535730300389e-06, "loss": 0.1001, "step": 31120 }, { "epoch": 14.567282939386848, "grad_norm": 0.7188594341278076, "learning_rate": 5.251594928070103e-06, "loss": 0.0899, "step": 31130 }, { "epoch": 14.571963491692019, "grad_norm": 0.5267608165740967, "learning_rate": 5.245659640362598e-06, "loss": 0.0825, "step": 31140 }, { "epoch": 14.576644043997192, "grad_norm": 0.5253056883811951, "learning_rate": 5.239729871029222e-06, "loss": 0.0972, "step": 31150 }, { "epoch": 14.581324596302364, "grad_norm": 0.567699670791626, "learning_rate": 5.233805623917743e-06, "loss": 0.0897, "step": 31160 }, { "epoch": 14.586005148607535, "grad_norm": 1.030904769897461, "learning_rate": 5.227886902872331e-06, "loss": 0.1036, "step": 31170 }, { "epoch": 14.590685700912708, "grad_norm": 0.683777391910553, "learning_rate": 5.221973711733583e-06, "loss": 0.0921, "step": 31180 }, { "epoch": 14.595366253217879, "grad_norm": 0.5706810355186462, "learning_rate": 5.2160660543385114e-06, "loss": 0.1081, "step": 31190 }, { "epoch": 14.600046805523052, "grad_norm": 0.6245619654655457, "learning_rate": 5.210163934520522e-06, "loss": 0.1001, "step": 31200 }, { "epoch": 14.604727357828224, "grad_norm": 0.5766677260398865, "learning_rate": 5.204267356109439e-06, "loss": 0.0904, "step": 31210 }, { "epoch": 14.609407910133395, "grad_norm": 0.7092419266700745, "learning_rate": 5.19837632293149e-06, "loss": 0.1004, "step": 31220 }, { "epoch": 14.614088462438568, "grad_norm": 0.8922508955001831, "learning_rate": 5.192490838809303e-06, "loss": 0.0977, "step": 31230 }, { "epoch": 14.618769014743739, "grad_norm": 0.5289874076843262, "learning_rate": 5.186610907561901e-06, "loss": 0.1033, "step": 31240 }, { "epoch": 14.623449567048912, "grad_norm": 0.604262113571167, "learning_rate": 5.18073653300471e-06, "loss": 0.0936, "step": 31250 }, { "epoch": 14.628130119354084, "grad_norm": 0.5321982502937317, "learning_rate": 5.17486771894955e-06, "loss": 0.0926, "step": 31260 }, { "epoch": 14.632810671659255, "grad_norm": 0.5287041664123535, "learning_rate": 5.169004469204624e-06, "loss": 0.1055, "step": 31270 }, { "epoch": 14.637491223964428, "grad_norm": 0.7758051753044128, "learning_rate": 5.163146787574539e-06, "loss": 0.1082, "step": 31280 }, { "epoch": 14.642171776269599, "grad_norm": 0.7420254349708557, "learning_rate": 5.157294677860279e-06, "loss": 0.0854, "step": 31290 }, { "epoch": 14.646852328574772, "grad_norm": 0.6317064166069031, "learning_rate": 5.151448143859206e-06, "loss": 0.0793, "step": 31300 }, { "epoch": 14.651532880879945, "grad_norm": 0.5277919173240662, "learning_rate": 5.145607189365086e-06, "loss": 0.0974, "step": 31310 }, { "epoch": 14.656213433185115, "grad_norm": 0.5116921067237854, "learning_rate": 5.139771818168041e-06, "loss": 0.0934, "step": 31320 }, { "epoch": 14.660893985490288, "grad_norm": 0.9807092547416687, "learning_rate": 5.133942034054583e-06, "loss": 0.0964, "step": 31330 }, { "epoch": 14.66557453779546, "grad_norm": 1.02545166015625, "learning_rate": 5.128117840807599e-06, "loss": 0.0882, "step": 31340 }, { "epoch": 14.670255090100632, "grad_norm": 0.9866653680801392, "learning_rate": 5.1222992422063386e-06, "loss": 0.1025, "step": 31350 }, { "epoch": 14.674935642405805, "grad_norm": 0.6445845365524292, "learning_rate": 5.116486242026431e-06, "loss": 0.0972, "step": 31360 }, { "epoch": 14.679616194710976, "grad_norm": 0.7904019355773926, "learning_rate": 5.110678844039868e-06, "loss": 0.0918, "step": 31370 }, { "epoch": 14.684296747016148, "grad_norm": 0.530304491519928, "learning_rate": 5.10487705201501e-06, "loss": 0.0959, "step": 31380 }, { "epoch": 14.68897729932132, "grad_norm": 0.5745549201965332, "learning_rate": 5.09908086971657e-06, "loss": 0.0879, "step": 31390 }, { "epoch": 14.693657851626492, "grad_norm": 0.6546233296394348, "learning_rate": 5.093290300905633e-06, "loss": 0.1033, "step": 31400 }, { "epoch": 14.698338403931665, "grad_norm": 0.5723467469215393, "learning_rate": 5.087505349339639e-06, "loss": 0.0819, "step": 31410 }, { "epoch": 14.703018956236836, "grad_norm": 0.6146552562713623, "learning_rate": 5.081726018772373e-06, "loss": 0.0974, "step": 31420 }, { "epoch": 14.707699508542008, "grad_norm": 0.6028467416763306, "learning_rate": 5.075952312953983e-06, "loss": 0.1028, "step": 31430 }, { "epoch": 14.71238006084718, "grad_norm": 0.5191776752471924, "learning_rate": 5.070184235630964e-06, "loss": 0.0976, "step": 31440 }, { "epoch": 14.717060613152352, "grad_norm": 0.4525109827518463, "learning_rate": 5.064421790546164e-06, "loss": 0.0936, "step": 31450 }, { "epoch": 14.721741165457525, "grad_norm": 0.7972765564918518, "learning_rate": 5.058664981438761e-06, "loss": 0.0832, "step": 31460 }, { "epoch": 14.726421717762696, "grad_norm": 0.4856254756450653, "learning_rate": 5.052913812044291e-06, "loss": 0.0987, "step": 31470 }, { "epoch": 14.731102270067868, "grad_norm": 0.5074937343597412, "learning_rate": 5.047168286094631e-06, "loss": 0.0893, "step": 31480 }, { "epoch": 14.73578282237304, "grad_norm": 0.5001823902130127, "learning_rate": 5.041428407317976e-06, "loss": 0.1047, "step": 31490 }, { "epoch": 14.740463374678212, "grad_norm": 0.5255525708198547, "learning_rate": 5.035694179438888e-06, "loss": 0.0996, "step": 31500 }, { "epoch": 14.745143926983385, "grad_norm": 0.9100715517997742, "learning_rate": 5.029965606178235e-06, "loss": 0.1, "step": 31510 }, { "epoch": 14.749824479288556, "grad_norm": 0.5189256072044373, "learning_rate": 5.024242691253224e-06, "loss": 0.0978, "step": 31520 }, { "epoch": 14.754505031593729, "grad_norm": 0.7726461291313171, "learning_rate": 5.018525438377401e-06, "loss": 0.0884, "step": 31530 }, { "epoch": 14.7591855838989, "grad_norm": 1.1719928979873657, "learning_rate": 5.012813851260623e-06, "loss": 0.0966, "step": 31540 }, { "epoch": 14.763866136204072, "grad_norm": 0.47473448514938354, "learning_rate": 5.0071079336090774e-06, "loss": 0.08, "step": 31550 }, { "epoch": 14.768546688509245, "grad_norm": 0.6383591890335083, "learning_rate": 5.001407689125277e-06, "loss": 0.1027, "step": 31560 }, { "epoch": 14.773227240814416, "grad_norm": 0.6888076066970825, "learning_rate": 4.995713121508043e-06, "loss": 0.0976, "step": 31570 }, { "epoch": 14.777907793119589, "grad_norm": 0.528263509273529, "learning_rate": 4.99002423445252e-06, "loss": 0.1096, "step": 31580 }, { "epoch": 14.78258834542476, "grad_norm": 0.7474657297134399, "learning_rate": 4.984341031650165e-06, "loss": 0.1014, "step": 31590 }, { "epoch": 14.787268897729932, "grad_norm": 0.49925997853279114, "learning_rate": 4.978663516788749e-06, "loss": 0.1041, "step": 31600 }, { "epoch": 14.791949450035105, "grad_norm": 0.4725641906261444, "learning_rate": 4.9729916935523475e-06, "loss": 0.0898, "step": 31610 }, { "epoch": 14.796630002340276, "grad_norm": 0.5474806427955627, "learning_rate": 4.9673255656213446e-06, "loss": 0.0988, "step": 31620 }, { "epoch": 14.801310554645449, "grad_norm": 0.5200232863426208, "learning_rate": 4.961665136672433e-06, "loss": 0.0855, "step": 31630 }, { "epoch": 14.80599110695062, "grad_norm": 0.7717652320861816, "learning_rate": 4.956010410378597e-06, "loss": 0.0964, "step": 31640 }, { "epoch": 14.810671659255792, "grad_norm": 0.600946843624115, "learning_rate": 4.9503613904091305e-06, "loss": 0.0954, "step": 31650 }, { "epoch": 14.815352211560963, "grad_norm": 0.5399541258811951, "learning_rate": 4.9447180804296225e-06, "loss": 0.1087, "step": 31660 }, { "epoch": 14.820032763866136, "grad_norm": 0.6895051002502441, "learning_rate": 4.939080484101957e-06, "loss": 0.1043, "step": 31670 }, { "epoch": 14.824713316171309, "grad_norm": 0.6532057523727417, "learning_rate": 4.933448605084304e-06, "loss": 0.1022, "step": 31680 }, { "epoch": 14.82939386847648, "grad_norm": 0.5074564218521118, "learning_rate": 4.9278224470311304e-06, "loss": 0.0839, "step": 31690 }, { "epoch": 14.834074420781652, "grad_norm": 0.5184087157249451, "learning_rate": 4.922202013593194e-06, "loss": 0.0924, "step": 31700 }, { "epoch": 14.838754973086825, "grad_norm": 0.42396363615989685, "learning_rate": 4.916587308417523e-06, "loss": 0.1006, "step": 31710 }, { "epoch": 14.843435525391996, "grad_norm": 0.5943351984024048, "learning_rate": 4.91097833514745e-06, "loss": 0.1017, "step": 31720 }, { "epoch": 14.848116077697169, "grad_norm": 0.46193456649780273, "learning_rate": 4.905375097422571e-06, "loss": 0.0883, "step": 31730 }, { "epoch": 14.85279663000234, "grad_norm": 0.531629204750061, "learning_rate": 4.899777598878761e-06, "loss": 0.0915, "step": 31740 }, { "epoch": 14.857477182307512, "grad_norm": 0.8632882833480835, "learning_rate": 4.894185843148187e-06, "loss": 0.0929, "step": 31750 }, { "epoch": 14.862157734612683, "grad_norm": 0.7003263831138611, "learning_rate": 4.8885998338592715e-06, "loss": 0.1029, "step": 31760 }, { "epoch": 14.866838286917856, "grad_norm": 0.510157585144043, "learning_rate": 4.883019574636716e-06, "loss": 0.0865, "step": 31770 }, { "epoch": 14.871518839223029, "grad_norm": 0.901340663433075, "learning_rate": 4.877445069101494e-06, "loss": 0.1051, "step": 31780 }, { "epoch": 14.8761993915282, "grad_norm": 0.7755205035209656, "learning_rate": 4.871876320870837e-06, "loss": 0.0918, "step": 31790 }, { "epoch": 14.880879943833373, "grad_norm": 0.5546610355377197, "learning_rate": 4.866313333558245e-06, "loss": 0.0999, "step": 31800 }, { "epoch": 14.885560496138545, "grad_norm": 0.4806882441043854, "learning_rate": 4.860756110773483e-06, "loss": 0.0876, "step": 31810 }, { "epoch": 14.890241048443716, "grad_norm": 1.529013752937317, "learning_rate": 4.855204656122575e-06, "loss": 0.0934, "step": 31820 }, { "epoch": 14.894921600748889, "grad_norm": 1.0794249773025513, "learning_rate": 4.849658973207792e-06, "loss": 0.0969, "step": 31830 }, { "epoch": 14.89960215305406, "grad_norm": 0.6230061650276184, "learning_rate": 4.844119065627674e-06, "loss": 0.0969, "step": 31840 }, { "epoch": 14.904282705359233, "grad_norm": 0.6597945094108582, "learning_rate": 4.838584936977006e-06, "loss": 0.104, "step": 31850 }, { "epoch": 14.908963257664404, "grad_norm": 0.6180499196052551, "learning_rate": 4.833056590846819e-06, "loss": 0.089, "step": 31860 }, { "epoch": 14.913643809969576, "grad_norm": 0.6538025736808777, "learning_rate": 4.827534030824402e-06, "loss": 0.0893, "step": 31870 }, { "epoch": 14.918324362274749, "grad_norm": 0.4915316104888916, "learning_rate": 4.822017260493281e-06, "loss": 0.0991, "step": 31880 }, { "epoch": 14.92300491457992, "grad_norm": 0.8425338864326477, "learning_rate": 4.816506283433233e-06, "loss": 0.1095, "step": 31890 }, { "epoch": 14.927685466885093, "grad_norm": 0.6024582982063293, "learning_rate": 4.811001103220267e-06, "loss": 0.1001, "step": 31900 }, { "epoch": 14.932366019190264, "grad_norm": 0.5449537038803101, "learning_rate": 4.805501723426634e-06, "loss": 0.0982, "step": 31910 }, { "epoch": 14.937046571495436, "grad_norm": 0.5940849184989929, "learning_rate": 4.800008147620827e-06, "loss": 0.0975, "step": 31920 }, { "epoch": 14.941727123800609, "grad_norm": 0.8166285157203674, "learning_rate": 4.794520379367558e-06, "loss": 0.112, "step": 31930 }, { "epoch": 14.94640767610578, "grad_norm": 0.6287662386894226, "learning_rate": 4.789038422227793e-06, "loss": 0.0958, "step": 31940 }, { "epoch": 14.951088228410953, "grad_norm": 0.6787479519844055, "learning_rate": 4.783562279758709e-06, "loss": 0.0864, "step": 31950 }, { "epoch": 14.955768780716124, "grad_norm": 0.48715975880622864, "learning_rate": 4.778091955513707e-06, "loss": 0.0839, "step": 31960 }, { "epoch": 14.960449333021296, "grad_norm": 0.5292181372642517, "learning_rate": 4.772627453042435e-06, "loss": 0.0961, "step": 31970 }, { "epoch": 14.96512988532647, "grad_norm": 0.5887759923934937, "learning_rate": 4.76716877589074e-06, "loss": 0.0944, "step": 31980 }, { "epoch": 14.96981043763164, "grad_norm": 0.6721293330192566, "learning_rate": 4.7617159276007e-06, "loss": 0.0939, "step": 31990 }, { "epoch": 14.974490989936813, "grad_norm": 0.7314438819885254, "learning_rate": 4.756268911710615e-06, "loss": 0.0858, "step": 32000 }, { "epoch": 14.979171542241984, "grad_norm": 0.4918350875377655, "learning_rate": 4.750827731754985e-06, "loss": 0.0845, "step": 32010 }, { "epoch": 14.983852094547156, "grad_norm": 0.7888549566268921, "learning_rate": 4.745392391264538e-06, "loss": 0.0854, "step": 32020 }, { "epoch": 14.98853264685233, "grad_norm": 0.6289386749267578, "learning_rate": 4.739962893766206e-06, "loss": 0.0827, "step": 32030 }, { "epoch": 14.9932131991575, "grad_norm": 0.652318000793457, "learning_rate": 4.734539242783136e-06, "loss": 0.0988, "step": 32040 }, { "epoch": 14.997893751462673, "grad_norm": 0.6464564800262451, "learning_rate": 4.7291214418346675e-06, "loss": 0.0842, "step": 32050 }, { "epoch": 15.002340276152585, "grad_norm": 0.5040739178657532, "learning_rate": 4.7237094944363585e-06, "loss": 0.0867, "step": 32060 }, { "epoch": 15.007020828457758, "grad_norm": 0.8300098776817322, "learning_rate": 4.718303404099964e-06, "loss": 0.0838, "step": 32070 }, { "epoch": 15.01170138076293, "grad_norm": 0.5044800639152527, "learning_rate": 4.712903174333431e-06, "loss": 0.0956, "step": 32080 }, { "epoch": 15.016381933068102, "grad_norm": 0.5881170034408569, "learning_rate": 4.707508808640915e-06, "loss": 0.0882, "step": 32090 }, { "epoch": 15.021062485373275, "grad_norm": 0.5130196809768677, "learning_rate": 4.70212031052276e-06, "loss": 0.0876, "step": 32100 }, { "epoch": 15.025743037678446, "grad_norm": 1.0781784057617188, "learning_rate": 4.696737683475505e-06, "loss": 0.0878, "step": 32110 }, { "epoch": 15.030423589983618, "grad_norm": 0.6023010611534119, "learning_rate": 4.691360930991878e-06, "loss": 0.0771, "step": 32120 }, { "epoch": 15.03510414228879, "grad_norm": 0.7706753611564636, "learning_rate": 4.6859900565607934e-06, "loss": 0.0781, "step": 32130 }, { "epoch": 15.039784694593962, "grad_norm": 0.6556843519210815, "learning_rate": 4.680625063667354e-06, "loss": 0.0879, "step": 32140 }, { "epoch": 15.044465246899135, "grad_norm": 0.516862690448761, "learning_rate": 4.675265955792846e-06, "loss": 0.0862, "step": 32150 }, { "epoch": 15.049145799204306, "grad_norm": 0.5581883788108826, "learning_rate": 4.669912736414739e-06, "loss": 0.0816, "step": 32160 }, { "epoch": 15.053826351509478, "grad_norm": 0.44654303789138794, "learning_rate": 4.6645654090066725e-06, "loss": 0.0826, "step": 32170 }, { "epoch": 15.05850690381465, "grad_norm": 0.49416109919548035, "learning_rate": 4.659223977038474e-06, "loss": 0.0753, "step": 32180 }, { "epoch": 15.063187456119822, "grad_norm": 0.6370562314987183, "learning_rate": 4.653888443976143e-06, "loss": 0.0868, "step": 32190 }, { "epoch": 15.067868008424995, "grad_norm": 0.444913387298584, "learning_rate": 4.648558813281844e-06, "loss": 0.0866, "step": 32200 }, { "epoch": 15.072548560730166, "grad_norm": 0.6871306896209717, "learning_rate": 4.6432350884139154e-06, "loss": 0.0903, "step": 32210 }, { "epoch": 15.077229113035338, "grad_norm": 0.5632322430610657, "learning_rate": 4.637917272826871e-06, "loss": 0.0861, "step": 32220 }, { "epoch": 15.08190966534051, "grad_norm": 0.47950562834739685, "learning_rate": 4.632605369971376e-06, "loss": 0.0755, "step": 32230 }, { "epoch": 15.086590217645682, "grad_norm": 0.5819576978683472, "learning_rate": 4.627299383294272e-06, "loss": 0.0946, "step": 32240 }, { "epoch": 15.091270769950855, "grad_norm": 0.5902643799781799, "learning_rate": 4.621999316238553e-06, "loss": 0.0928, "step": 32250 }, { "epoch": 15.095951322256026, "grad_norm": 0.9052016139030457, "learning_rate": 4.616705172243381e-06, "loss": 0.0815, "step": 32260 }, { "epoch": 15.100631874561198, "grad_norm": 0.9230824708938599, "learning_rate": 4.61141695474406e-06, "loss": 0.0893, "step": 32270 }, { "epoch": 15.10531242686637, "grad_norm": 0.4601052701473236, "learning_rate": 4.606134667172061e-06, "loss": 0.096, "step": 32280 }, { "epoch": 15.109992979171542, "grad_norm": 0.5361459851264954, "learning_rate": 4.600858312955005e-06, "loss": 0.0839, "step": 32290 }, { "epoch": 15.114673531476715, "grad_norm": 0.5299375653266907, "learning_rate": 4.595587895516654e-06, "loss": 0.0754, "step": 32300 }, { "epoch": 15.119354083781886, "grad_norm": 0.6181790828704834, "learning_rate": 4.590323418276936e-06, "loss": 0.1023, "step": 32310 }, { "epoch": 15.124034636087059, "grad_norm": 0.4760109782218933, "learning_rate": 4.585064884651906e-06, "loss": 0.0764, "step": 32320 }, { "epoch": 15.12871518839223, "grad_norm": 0.5768361687660217, "learning_rate": 4.579812298053765e-06, "loss": 0.0953, "step": 32330 }, { "epoch": 15.133395740697402, "grad_norm": 0.5862269401550293, "learning_rate": 4.574565661890869e-06, "loss": 0.0916, "step": 32340 }, { "epoch": 15.138076293002575, "grad_norm": 0.5494647026062012, "learning_rate": 4.569324979567697e-06, "loss": 0.0876, "step": 32350 }, { "epoch": 15.142756845307746, "grad_norm": 0.5739303231239319, "learning_rate": 4.564090254484871e-06, "loss": 0.0726, "step": 32360 }, { "epoch": 15.147437397612919, "grad_norm": 0.5530461668968201, "learning_rate": 4.55886149003915e-06, "loss": 0.0834, "step": 32370 }, { "epoch": 15.15211794991809, "grad_norm": 0.42885780334472656, "learning_rate": 4.553638689623425e-06, "loss": 0.0851, "step": 32380 }, { "epoch": 15.156798502223262, "grad_norm": 0.43511635065078735, "learning_rate": 4.548421856626707e-06, "loss": 0.0956, "step": 32390 }, { "epoch": 15.161479054528435, "grad_norm": 0.5451171398162842, "learning_rate": 4.543210994434146e-06, "loss": 0.0842, "step": 32400 }, { "epoch": 15.166159606833606, "grad_norm": 0.49135929346084595, "learning_rate": 4.538006106427018e-06, "loss": 0.0847, "step": 32410 }, { "epoch": 15.170840159138779, "grad_norm": 0.5282912850379944, "learning_rate": 4.532807195982711e-06, "loss": 0.0871, "step": 32420 }, { "epoch": 15.17552071144395, "grad_norm": 0.7435702681541443, "learning_rate": 4.527614266474743e-06, "loss": 0.0946, "step": 32430 }, { "epoch": 15.180201263749122, "grad_norm": 0.6627683043479919, "learning_rate": 4.522427321272757e-06, "loss": 0.0873, "step": 32440 }, { "epoch": 15.184881816054295, "grad_norm": 0.5530809164047241, "learning_rate": 4.517246363742495e-06, "loss": 0.0744, "step": 32450 }, { "epoch": 15.189562368359466, "grad_norm": 0.5400897860527039, "learning_rate": 4.51207139724583e-06, "loss": 0.0906, "step": 32460 }, { "epoch": 15.194242920664639, "grad_norm": 0.6159685850143433, "learning_rate": 4.506902425140741e-06, "loss": 0.1013, "step": 32470 }, { "epoch": 15.19892347296981, "grad_norm": 0.7217304110527039, "learning_rate": 4.5017394507813195e-06, "loss": 0.0899, "step": 32480 }, { "epoch": 15.203604025274982, "grad_norm": 0.6271686553955078, "learning_rate": 4.496582477517759e-06, "loss": 0.0795, "step": 32490 }, { "epoch": 15.208284577580155, "grad_norm": 0.5299124717712402, "learning_rate": 4.491431508696366e-06, "loss": 0.0775, "step": 32500 }, { "epoch": 15.212965129885326, "grad_norm": 0.4493083655834198, "learning_rate": 4.48628654765955e-06, "loss": 0.0898, "step": 32510 }, { "epoch": 15.217645682190499, "grad_norm": 0.4822896122932434, "learning_rate": 4.481147597745816e-06, "loss": 0.0971, "step": 32520 }, { "epoch": 15.22232623449567, "grad_norm": 0.5318158864974976, "learning_rate": 4.476014662289777e-06, "loss": 0.0723, "step": 32530 }, { "epoch": 15.227006786800843, "grad_norm": 0.5735426545143127, "learning_rate": 4.47088774462214e-06, "loss": 0.0873, "step": 32540 }, { "epoch": 15.231687339106015, "grad_norm": 0.5624838471412659, "learning_rate": 4.465766848069697e-06, "loss": 0.0916, "step": 32550 }, { "epoch": 15.236367891411186, "grad_norm": 0.7199699282646179, "learning_rate": 4.460651975955354e-06, "loss": 0.0868, "step": 32560 }, { "epoch": 15.241048443716359, "grad_norm": 0.5143914222717285, "learning_rate": 4.45554313159809e-06, "loss": 0.0768, "step": 32570 }, { "epoch": 15.24572899602153, "grad_norm": 0.5830431580543518, "learning_rate": 4.4504403183129775e-06, "loss": 0.0848, "step": 32580 }, { "epoch": 15.250409548326703, "grad_norm": 0.48283231258392334, "learning_rate": 4.445343539411179e-06, "loss": 0.0868, "step": 32590 }, { "epoch": 15.255090100631875, "grad_norm": 0.5240394473075867, "learning_rate": 4.440252798199941e-06, "loss": 0.0872, "step": 32600 }, { "epoch": 15.259770652937046, "grad_norm": 0.6535778641700745, "learning_rate": 4.4351680979825854e-06, "loss": 0.0884, "step": 32610 }, { "epoch": 15.264451205242219, "grad_norm": 0.587691605091095, "learning_rate": 4.430089442058521e-06, "loss": 0.0962, "step": 32620 }, { "epoch": 15.26913175754739, "grad_norm": 0.5294588208198547, "learning_rate": 4.425016833723237e-06, "loss": 0.084, "step": 32630 }, { "epoch": 15.273812309852563, "grad_norm": 0.5421773195266724, "learning_rate": 4.4199502762682855e-06, "loss": 0.0869, "step": 32640 }, { "epoch": 15.278492862157735, "grad_norm": 0.5703675150871277, "learning_rate": 4.414889772981306e-06, "loss": 0.0949, "step": 32650 }, { "epoch": 15.283173414462906, "grad_norm": 0.48247969150543213, "learning_rate": 4.409835327146004e-06, "loss": 0.0774, "step": 32660 }, { "epoch": 15.287853966768079, "grad_norm": 0.46745753288269043, "learning_rate": 4.404786942042153e-06, "loss": 0.0808, "step": 32670 }, { "epoch": 15.29253451907325, "grad_norm": 0.48172882199287415, "learning_rate": 4.399744620945594e-06, "loss": 0.0808, "step": 32680 }, { "epoch": 15.297215071378423, "grad_norm": 0.6405057311058044, "learning_rate": 4.394708367128237e-06, "loss": 0.0875, "step": 32690 }, { "epoch": 15.301895623683595, "grad_norm": 0.5689920783042908, "learning_rate": 4.3896781838580535e-06, "loss": 0.0833, "step": 32700 }, { "epoch": 15.306576175988766, "grad_norm": 0.6442103385925293, "learning_rate": 4.384654074399072e-06, "loss": 0.0814, "step": 32710 }, { "epoch": 15.31125672829394, "grad_norm": 0.4763392508029938, "learning_rate": 4.379636042011381e-06, "loss": 0.0962, "step": 32720 }, { "epoch": 15.31593728059911, "grad_norm": 0.6359899640083313, "learning_rate": 4.374624089951133e-06, "loss": 0.0803, "step": 32730 }, { "epoch": 15.320617832904283, "grad_norm": 0.600735604763031, "learning_rate": 4.369618221470523e-06, "loss": 0.0746, "step": 32740 }, { "epoch": 15.325298385209456, "grad_norm": 0.6247528195381165, "learning_rate": 4.3646184398178105e-06, "loss": 0.0977, "step": 32750 }, { "epoch": 15.329978937514626, "grad_norm": 0.5125231742858887, "learning_rate": 4.359624748237298e-06, "loss": 0.0778, "step": 32760 }, { "epoch": 15.3346594898198, "grad_norm": 0.4848078191280365, "learning_rate": 4.3546371499693316e-06, "loss": 0.0936, "step": 32770 }, { "epoch": 15.33934004212497, "grad_norm": 0.6342813968658447, "learning_rate": 4.3496556482503195e-06, "loss": 0.0957, "step": 32780 }, { "epoch": 15.344020594430143, "grad_norm": 0.4963489770889282, "learning_rate": 4.344680246312696e-06, "loss": 0.0877, "step": 32790 }, { "epoch": 15.348701146735316, "grad_norm": 0.5625659823417664, "learning_rate": 4.3397109473849495e-06, "loss": 0.0825, "step": 32800 }, { "epoch": 15.353381699040487, "grad_norm": 0.6168390512466431, "learning_rate": 4.334747754691602e-06, "loss": 0.103, "step": 32810 }, { "epoch": 15.35806225134566, "grad_norm": 0.5598645210266113, "learning_rate": 4.3297906714532196e-06, "loss": 0.0818, "step": 32820 }, { "epoch": 15.36274280365083, "grad_norm": 0.648432731628418, "learning_rate": 4.3248397008863955e-06, "loss": 0.09, "step": 32830 }, { "epoch": 15.367423355956003, "grad_norm": 0.4513092339038849, "learning_rate": 4.319894846203761e-06, "loss": 0.0862, "step": 32840 }, { "epoch": 15.372103908261176, "grad_norm": 0.7161529064178467, "learning_rate": 4.314956110613981e-06, "loss": 0.0877, "step": 32850 }, { "epoch": 15.376784460566347, "grad_norm": 0.5664865374565125, "learning_rate": 4.310023497321745e-06, "loss": 0.0789, "step": 32860 }, { "epoch": 15.38146501287152, "grad_norm": 0.5498294234275818, "learning_rate": 4.305097009527773e-06, "loss": 0.0895, "step": 32870 }, { "epoch": 15.38614556517669, "grad_norm": 0.42804303765296936, "learning_rate": 4.300176650428812e-06, "loss": 0.0741, "step": 32880 }, { "epoch": 15.390826117481863, "grad_norm": 0.5918130874633789, "learning_rate": 4.295262423217625e-06, "loss": 0.0756, "step": 32890 }, { "epoch": 15.395506669787036, "grad_norm": 0.8255086541175842, "learning_rate": 4.290354331083002e-06, "loss": 0.0874, "step": 32900 }, { "epoch": 15.400187222092207, "grad_norm": 0.592860221862793, "learning_rate": 4.285452377209752e-06, "loss": 0.08, "step": 32910 }, { "epoch": 15.40486777439738, "grad_norm": 0.4948118329048157, "learning_rate": 4.2805565647787025e-06, "loss": 0.0907, "step": 32920 }, { "epoch": 15.40954832670255, "grad_norm": 0.5606045126914978, "learning_rate": 4.275666896966687e-06, "loss": 0.0811, "step": 32930 }, { "epoch": 15.414228879007723, "grad_norm": 0.49244681000709534, "learning_rate": 4.270783376946561e-06, "loss": 0.0973, "step": 32940 }, { "epoch": 15.418909431312894, "grad_norm": 0.6182571053504944, "learning_rate": 4.2659060078871895e-06, "loss": 0.0818, "step": 32950 }, { "epoch": 15.423589983618067, "grad_norm": 0.4866383969783783, "learning_rate": 4.261034792953438e-06, "loss": 0.0822, "step": 32960 }, { "epoch": 15.42827053592324, "grad_norm": 0.4705103635787964, "learning_rate": 4.256169735306192e-06, "loss": 0.0812, "step": 32970 }, { "epoch": 15.43295108822841, "grad_norm": 0.5039820075035095, "learning_rate": 4.2513108381023334e-06, "loss": 0.0906, "step": 32980 }, { "epoch": 15.437631640533583, "grad_norm": 0.6126160621643066, "learning_rate": 4.246458104494741e-06, "loss": 0.0738, "step": 32990 }, { "epoch": 15.442312192838754, "grad_norm": 0.6646852493286133, "learning_rate": 4.241611537632311e-06, "loss": 0.0843, "step": 33000 }, { "epoch": 15.446992745143927, "grad_norm": 0.4539211094379425, "learning_rate": 4.2367711406599206e-06, "loss": 0.0844, "step": 33010 }, { "epoch": 15.4516732974491, "grad_norm": 0.67006516456604, "learning_rate": 4.231936916718453e-06, "loss": 0.0952, "step": 33020 }, { "epoch": 15.45635384975427, "grad_norm": 0.5803046226501465, "learning_rate": 4.227108868944786e-06, "loss": 0.0919, "step": 33030 }, { "epoch": 15.461034402059443, "grad_norm": 0.5729363560676575, "learning_rate": 4.222287000471785e-06, "loss": 0.0868, "step": 33040 }, { "epoch": 15.465714954364614, "grad_norm": 0.6636085510253906, "learning_rate": 4.217471314428307e-06, "loss": 0.0785, "step": 33050 }, { "epoch": 15.470395506669787, "grad_norm": 0.5897777676582336, "learning_rate": 4.2126618139392e-06, "loss": 0.0929, "step": 33060 }, { "epoch": 15.47507605897496, "grad_norm": 0.6185234189033508, "learning_rate": 4.207858502125299e-06, "loss": 0.0727, "step": 33070 }, { "epoch": 15.47975661128013, "grad_norm": 0.48960888385772705, "learning_rate": 4.203061382103416e-06, "loss": 0.0928, "step": 33080 }, { "epoch": 15.484437163585303, "grad_norm": 0.5379383563995361, "learning_rate": 4.198270456986353e-06, "loss": 0.082, "step": 33090 }, { "epoch": 15.489117715890474, "grad_norm": 0.587379515171051, "learning_rate": 4.19348572988289e-06, "loss": 0.0961, "step": 33100 }, { "epoch": 15.493798268195647, "grad_norm": 0.5178543925285339, "learning_rate": 4.188707203897783e-06, "loss": 0.0804, "step": 33110 }, { "epoch": 15.49847882050082, "grad_norm": 0.9580016136169434, "learning_rate": 4.183934882131764e-06, "loss": 0.0994, "step": 33120 }, { "epoch": 15.50315937280599, "grad_norm": 0.6477739214897156, "learning_rate": 4.1791687676815435e-06, "loss": 0.088, "step": 33130 }, { "epoch": 15.507839925111163, "grad_norm": 0.44761261343955994, "learning_rate": 4.174408863639803e-06, "loss": 0.084, "step": 33140 }, { "epoch": 15.512520477416334, "grad_norm": 0.5728349685668945, "learning_rate": 4.169655173095188e-06, "loss": 0.0789, "step": 33150 }, { "epoch": 15.517201029721507, "grad_norm": 0.8717681765556335, "learning_rate": 4.164907699132318e-06, "loss": 0.1092, "step": 33160 }, { "epoch": 15.52188158202668, "grad_norm": 0.5164434909820557, "learning_rate": 4.1601664448317815e-06, "loss": 0.0738, "step": 33170 }, { "epoch": 15.52656213433185, "grad_norm": 0.4614744484424591, "learning_rate": 4.155431413270118e-06, "loss": 0.0867, "step": 33180 }, { "epoch": 15.531242686637023, "grad_norm": 0.5678207874298096, "learning_rate": 4.150702607519849e-06, "loss": 0.0886, "step": 33190 }, { "epoch": 15.535923238942194, "grad_norm": 0.5282150506973267, "learning_rate": 4.145980030649441e-06, "loss": 0.0891, "step": 33200 }, { "epoch": 15.540603791247367, "grad_norm": 0.6429859399795532, "learning_rate": 4.141263685723315e-06, "loss": 0.084, "step": 33210 }, { "epoch": 15.54528434355254, "grad_norm": 0.5838925838470459, "learning_rate": 4.1365535758018685e-06, "loss": 0.0868, "step": 33220 }, { "epoch": 15.54996489585771, "grad_norm": 0.6758358478546143, "learning_rate": 4.131849703941432e-06, "loss": 0.0766, "step": 33230 }, { "epoch": 15.554645448162884, "grad_norm": 0.6407397985458374, "learning_rate": 4.127152073194301e-06, "loss": 0.0856, "step": 33240 }, { "epoch": 15.559326000468054, "grad_norm": 0.5233557820320129, "learning_rate": 4.1224606866087175e-06, "loss": 0.0793, "step": 33250 }, { "epoch": 15.564006552773227, "grad_norm": 0.4700870215892792, "learning_rate": 4.117775547228867e-06, "loss": 0.0774, "step": 33260 }, { "epoch": 15.5686871050784, "grad_norm": 0.5826678276062012, "learning_rate": 4.11309665809489e-06, "loss": 0.0828, "step": 33270 }, { "epoch": 15.57336765738357, "grad_norm": 0.6534188985824585, "learning_rate": 4.108424022242863e-06, "loss": 0.0783, "step": 33280 }, { "epoch": 15.578048209688744, "grad_norm": 0.47702500224113464, "learning_rate": 4.103757642704814e-06, "loss": 0.0823, "step": 33290 }, { "epoch": 15.582728761993915, "grad_norm": 0.3874185383319855, "learning_rate": 4.099097522508702e-06, "loss": 0.0732, "step": 33300 }, { "epoch": 15.587409314299087, "grad_norm": 0.5516690611839294, "learning_rate": 4.094443664678428e-06, "loss": 0.0989, "step": 33310 }, { "epoch": 15.59208986660426, "grad_norm": 0.599694013595581, "learning_rate": 4.089796072233834e-06, "loss": 0.0789, "step": 33320 }, { "epoch": 15.596770418909431, "grad_norm": 0.7988025546073914, "learning_rate": 4.085154748190684e-06, "loss": 0.076, "step": 33330 }, { "epoch": 15.601450971214604, "grad_norm": 0.6222716569900513, "learning_rate": 4.080519695560693e-06, "loss": 0.0878, "step": 33340 }, { "epoch": 15.606131523519775, "grad_norm": 0.7569586634635925, "learning_rate": 4.075890917351489e-06, "loss": 0.0829, "step": 33350 }, { "epoch": 15.610812075824947, "grad_norm": 0.593877375125885, "learning_rate": 4.071268416566638e-06, "loss": 0.0896, "step": 33360 }, { "epoch": 15.61549262813012, "grad_norm": 0.6518276929855347, "learning_rate": 4.066652196205636e-06, "loss": 0.0861, "step": 33370 }, { "epoch": 15.620173180435291, "grad_norm": 0.5684464573860168, "learning_rate": 4.062042259263888e-06, "loss": 0.0853, "step": 33380 }, { "epoch": 15.624853732740464, "grad_norm": 0.604582667350769, "learning_rate": 4.057438608732738e-06, "loss": 0.0874, "step": 33390 }, { "epoch": 15.629534285045635, "grad_norm": 0.46331876516342163, "learning_rate": 4.052841247599446e-06, "loss": 0.0714, "step": 33400 }, { "epoch": 15.634214837350807, "grad_norm": 0.548893392086029, "learning_rate": 4.048250178847188e-06, "loss": 0.0924, "step": 33410 }, { "epoch": 15.63889538965598, "grad_norm": 0.592665433883667, "learning_rate": 4.043665405455058e-06, "loss": 0.0858, "step": 33420 }, { "epoch": 15.643575941961151, "grad_norm": 0.6334443688392639, "learning_rate": 4.039086930398065e-06, "loss": 0.0748, "step": 33430 }, { "epoch": 15.648256494266324, "grad_norm": 0.611028254032135, "learning_rate": 4.0345147566471355e-06, "loss": 0.0798, "step": 33440 }, { "epoch": 15.652937046571495, "grad_norm": 0.6038419008255005, "learning_rate": 4.029948887169099e-06, "loss": 0.0953, "step": 33450 }, { "epoch": 15.657617598876667, "grad_norm": 0.48017263412475586, "learning_rate": 4.0253893249267e-06, "loss": 0.0971, "step": 33460 }, { "epoch": 15.66229815118184, "grad_norm": 0.6227498054504395, "learning_rate": 4.020836072878592e-06, "loss": 0.0942, "step": 33470 }, { "epoch": 15.666978703487011, "grad_norm": 0.49661919474601746, "learning_rate": 4.016289133979326e-06, "loss": 0.0838, "step": 33480 }, { "epoch": 15.671659255792184, "grad_norm": 0.8223468065261841, "learning_rate": 4.011748511179363e-06, "loss": 0.0975, "step": 33490 }, { "epoch": 15.676339808097355, "grad_norm": 0.6367949843406677, "learning_rate": 4.007214207425065e-06, "loss": 0.0857, "step": 33500 }, { "epoch": 15.681020360402528, "grad_norm": 0.5758256912231445, "learning_rate": 4.002686225658695e-06, "loss": 0.0754, "step": 33510 }, { "epoch": 15.6857009127077, "grad_norm": 0.622698962688446, "learning_rate": 3.998164568818405e-06, "loss": 0.0981, "step": 33520 }, { "epoch": 15.690381465012871, "grad_norm": 0.5719441771507263, "learning_rate": 3.9936492398382515e-06, "loss": 0.0821, "step": 33530 }, { "epoch": 15.695062017318044, "grad_norm": 0.7115436792373657, "learning_rate": 3.9891402416481854e-06, "loss": 0.0906, "step": 33540 }, { "epoch": 15.699742569623215, "grad_norm": 0.49906086921691895, "learning_rate": 3.984637577174041e-06, "loss": 0.0804, "step": 33550 }, { "epoch": 15.704423121928388, "grad_norm": 0.7171885967254639, "learning_rate": 3.9801412493375564e-06, "loss": 0.0944, "step": 33560 }, { "epoch": 15.70910367423356, "grad_norm": 0.5597479343414307, "learning_rate": 3.975651261056342e-06, "loss": 0.1034, "step": 33570 }, { "epoch": 15.713784226538731, "grad_norm": 0.5517081022262573, "learning_rate": 3.971167615243906e-06, "loss": 0.0844, "step": 33580 }, { "epoch": 15.718464778843904, "grad_norm": 0.48286527395248413, "learning_rate": 3.96669031480964e-06, "loss": 0.0855, "step": 33590 }, { "epoch": 15.723145331149075, "grad_norm": 0.5700421333312988, "learning_rate": 3.962219362658809e-06, "loss": 0.0921, "step": 33600 }, { "epoch": 15.727825883454248, "grad_norm": 0.4681372046470642, "learning_rate": 3.9577547616925716e-06, "loss": 0.0811, "step": 33610 }, { "epoch": 15.73250643575942, "grad_norm": 0.5076770186424255, "learning_rate": 3.953296514807954e-06, "loss": 0.084, "step": 33620 }, { "epoch": 15.737186988064591, "grad_norm": 0.7069360613822937, "learning_rate": 3.94884462489787e-06, "loss": 0.0839, "step": 33630 }, { "epoch": 15.741867540369764, "grad_norm": 0.5022967457771301, "learning_rate": 3.944399094851097e-06, "loss": 0.0903, "step": 33640 }, { "epoch": 15.746548092674935, "grad_norm": 0.7123374342918396, "learning_rate": 3.939959927552294e-06, "loss": 0.0923, "step": 33650 }, { "epoch": 15.751228644980108, "grad_norm": 0.5305432081222534, "learning_rate": 3.935527125881989e-06, "loss": 0.089, "step": 33660 }, { "epoch": 15.75590919728528, "grad_norm": 0.5764418244361877, "learning_rate": 3.931100692716576e-06, "loss": 0.0848, "step": 33670 }, { "epoch": 15.760589749590451, "grad_norm": 0.5360378623008728, "learning_rate": 3.926680630928321e-06, "loss": 0.087, "step": 33680 }, { "epoch": 15.765270301895624, "grad_norm": 0.5218952894210815, "learning_rate": 3.922266943385359e-06, "loss": 0.0748, "step": 33690 }, { "epoch": 15.769950854200795, "grad_norm": 0.5597801208496094, "learning_rate": 3.917859632951674e-06, "loss": 0.0901, "step": 33700 }, { "epoch": 15.774631406505968, "grad_norm": 0.6128599047660828, "learning_rate": 3.913458702487129e-06, "loss": 0.0861, "step": 33710 }, { "epoch": 15.779311958811139, "grad_norm": 0.5023720860481262, "learning_rate": 3.909064154847437e-06, "loss": 0.0705, "step": 33720 }, { "epoch": 15.783992511116312, "grad_norm": 0.5053625702857971, "learning_rate": 3.904675992884176e-06, "loss": 0.0823, "step": 33730 }, { "epoch": 15.788673063421484, "grad_norm": 0.5896227359771729, "learning_rate": 3.900294219444772e-06, "loss": 0.0887, "step": 33740 }, { "epoch": 15.793353615726655, "grad_norm": 1.0748409032821655, "learning_rate": 3.895918837372512e-06, "loss": 0.0803, "step": 33750 }, { "epoch": 15.798034168031828, "grad_norm": 0.7122445106506348, "learning_rate": 3.891549849506537e-06, "loss": 0.0768, "step": 33760 }, { "epoch": 15.802714720337, "grad_norm": 0.6934918165206909, "learning_rate": 3.887187258681828e-06, "loss": 0.0876, "step": 33770 }, { "epoch": 15.807395272642172, "grad_norm": 0.6493659019470215, "learning_rate": 3.882831067729232e-06, "loss": 0.0846, "step": 33780 }, { "epoch": 15.812075824947344, "grad_norm": 0.6176877021789551, "learning_rate": 3.878481279475429e-06, "loss": 0.0753, "step": 33790 }, { "epoch": 15.816756377252515, "grad_norm": 0.591173529624939, "learning_rate": 3.874137896742949e-06, "loss": 0.0922, "step": 33800 }, { "epoch": 15.821436929557688, "grad_norm": 0.4655899107456207, "learning_rate": 3.869800922350169e-06, "loss": 0.0773, "step": 33810 }, { "epoch": 15.826117481862859, "grad_norm": 0.6605748534202576, "learning_rate": 3.8654703591113e-06, "loss": 0.0955, "step": 33820 }, { "epoch": 15.830798034168032, "grad_norm": 0.5130100250244141, "learning_rate": 3.861146209836402e-06, "loss": 0.0747, "step": 33830 }, { "epoch": 15.835478586473204, "grad_norm": 0.6904376745223999, "learning_rate": 3.856828477331367e-06, "loss": 0.081, "step": 33840 }, { "epoch": 15.840159138778375, "grad_norm": 0.5717787146568298, "learning_rate": 3.852517164397926e-06, "loss": 0.0909, "step": 33850 }, { "epoch": 15.844839691083548, "grad_norm": 0.5147955417633057, "learning_rate": 3.848212273833638e-06, "loss": 0.081, "step": 33860 }, { "epoch": 15.84952024338872, "grad_norm": 0.48573940992355347, "learning_rate": 3.8439138084319045e-06, "loss": 0.0869, "step": 33870 }, { "epoch": 15.854200795693892, "grad_norm": 0.8753173351287842, "learning_rate": 3.839621770981954e-06, "loss": 0.0818, "step": 33880 }, { "epoch": 15.858881347999064, "grad_norm": 0.5830662846565247, "learning_rate": 3.835336164268839e-06, "loss": 0.0774, "step": 33890 }, { "epoch": 15.863561900304235, "grad_norm": 0.5856072902679443, "learning_rate": 3.831056991073444e-06, "loss": 0.0853, "step": 33900 }, { "epoch": 15.868242452609408, "grad_norm": 0.5246248841285706, "learning_rate": 3.826784254172483e-06, "loss": 0.083, "step": 33910 }, { "epoch": 15.872923004914579, "grad_norm": 0.6329289674758911, "learning_rate": 3.82251795633848e-06, "loss": 0.0785, "step": 33920 }, { "epoch": 15.877603557219752, "grad_norm": 0.6734013557434082, "learning_rate": 3.818258100339796e-06, "loss": 0.0829, "step": 33930 }, { "epoch": 15.882284109524925, "grad_norm": 0.5919564962387085, "learning_rate": 3.814004688940603e-06, "loss": 0.0834, "step": 33940 }, { "epoch": 15.886964661830095, "grad_norm": 0.5477489233016968, "learning_rate": 3.8097577249008957e-06, "loss": 0.0794, "step": 33950 }, { "epoch": 15.891645214135268, "grad_norm": 0.7549708485603333, "learning_rate": 3.805517210976479e-06, "loss": 0.0888, "step": 33960 }, { "epoch": 15.89632576644044, "grad_norm": 0.5710998177528381, "learning_rate": 3.801283149918978e-06, "loss": 0.0885, "step": 33970 }, { "epoch": 15.901006318745612, "grad_norm": 0.5289676189422607, "learning_rate": 3.797055544475832e-06, "loss": 0.0892, "step": 33980 }, { "epoch": 15.905686871050785, "grad_norm": 0.575468122959137, "learning_rate": 3.7928343973902802e-06, "loss": 0.088, "step": 33990 }, { "epoch": 15.910367423355956, "grad_norm": 0.5040895342826843, "learning_rate": 3.78861971140139e-06, "loss": 0.0818, "step": 34000 }, { "epoch": 15.915047975661128, "grad_norm": 1.1305022239685059, "learning_rate": 3.784411489244019e-06, "loss": 0.0916, "step": 34010 }, { "epoch": 15.9197285279663, "grad_norm": 0.7029111981391907, "learning_rate": 3.7802097336488347e-06, "loss": 0.0688, "step": 34020 }, { "epoch": 15.924409080271472, "grad_norm": 0.5313442945480347, "learning_rate": 3.7760144473423173e-06, "loss": 0.0809, "step": 34030 }, { "epoch": 15.929089632576645, "grad_norm": 0.7061212062835693, "learning_rate": 3.771825633046737e-06, "loss": 0.0915, "step": 34040 }, { "epoch": 15.933770184881816, "grad_norm": 0.7097363471984863, "learning_rate": 3.767643293480172e-06, "loss": 0.0738, "step": 34050 }, { "epoch": 15.938450737186988, "grad_norm": 0.5283185839653015, "learning_rate": 3.7634674313564973e-06, "loss": 0.0789, "step": 34060 }, { "epoch": 15.94313128949216, "grad_norm": 0.5219956040382385, "learning_rate": 3.759298049385386e-06, "loss": 0.0805, "step": 34070 }, { "epoch": 15.947811841797332, "grad_norm": 0.4153451919555664, "learning_rate": 3.7551351502723e-06, "loss": 0.0872, "step": 34080 }, { "epoch": 15.952492394102505, "grad_norm": 0.6318984031677246, "learning_rate": 3.750978736718503e-06, "loss": 0.0754, "step": 34090 }, { "epoch": 15.957172946407676, "grad_norm": 0.8870299458503723, "learning_rate": 3.7468288114210494e-06, "loss": 0.1023, "step": 34100 }, { "epoch": 15.961853498712848, "grad_norm": 0.6316614151000977, "learning_rate": 3.742685377072773e-06, "loss": 0.092, "step": 34110 }, { "epoch": 15.96653405101802, "grad_norm": 0.6673797965049744, "learning_rate": 3.7385484363623076e-06, "loss": 0.0826, "step": 34120 }, { "epoch": 15.971214603323192, "grad_norm": 0.5302522778511047, "learning_rate": 3.734417991974073e-06, "loss": 0.0748, "step": 34130 }, { "epoch": 15.975895155628365, "grad_norm": 0.5906848311424255, "learning_rate": 3.7302940465882617e-06, "loss": 0.0789, "step": 34140 }, { "epoch": 15.980575707933536, "grad_norm": 0.4773911237716675, "learning_rate": 3.7261766028808626e-06, "loss": 0.0833, "step": 34150 }, { "epoch": 15.985256260238708, "grad_norm": 0.48440080881118774, "learning_rate": 3.7220656635236383e-06, "loss": 0.0867, "step": 34160 }, { "epoch": 15.98993681254388, "grad_norm": 1.0644090175628662, "learning_rate": 3.7179612311841347e-06, "loss": 0.0755, "step": 34170 }, { "epoch": 15.994617364849052, "grad_norm": 0.7391273975372314, "learning_rate": 3.7138633085256704e-06, "loss": 0.0893, "step": 34180 }, { "epoch": 15.999297917154225, "grad_norm": 0.5005290508270264, "learning_rate": 3.7097718982073447e-06, "loss": 0.075, "step": 34190 }, { "epoch": 16.003744441844137, "grad_norm": 0.6088548898696899, "learning_rate": 3.7056870028840305e-06, "loss": 0.0764, "step": 34200 }, { "epoch": 16.00842499414931, "grad_norm": 0.8028940558433533, "learning_rate": 3.7016086252063683e-06, "loss": 0.0684, "step": 34210 }, { "epoch": 16.013105546454483, "grad_norm": 0.6126115322113037, "learning_rate": 3.6975367678207773e-06, "loss": 0.064, "step": 34220 }, { "epoch": 16.017786098759654, "grad_norm": 0.9551007747650146, "learning_rate": 3.693471433369441e-06, "loss": 0.0729, "step": 34230 }, { "epoch": 16.022466651064825, "grad_norm": 0.5220890641212463, "learning_rate": 3.6894126244903044e-06, "loss": 0.0712, "step": 34240 }, { "epoch": 16.02714720337, "grad_norm": 0.6412789821624756, "learning_rate": 3.685360343817094e-06, "loss": 0.0813, "step": 34250 }, { "epoch": 16.03182775567517, "grad_norm": 0.6703452467918396, "learning_rate": 3.681314593979284e-06, "loss": 0.0804, "step": 34260 }, { "epoch": 16.03650830798034, "grad_norm": 0.4889564514160156, "learning_rate": 3.67727537760212e-06, "loss": 0.0758, "step": 34270 }, { "epoch": 16.041188860285512, "grad_norm": 0.4557527005672455, "learning_rate": 3.673242697306605e-06, "loss": 0.0742, "step": 34280 }, { "epoch": 16.045869412590687, "grad_norm": 0.4538358151912689, "learning_rate": 3.6692165557095044e-06, "loss": 0.0784, "step": 34290 }, { "epoch": 16.050549964895858, "grad_norm": 0.4543018043041229, "learning_rate": 3.665196955423333e-06, "loss": 0.0782, "step": 34300 }, { "epoch": 16.05523051720103, "grad_norm": 0.5041641592979431, "learning_rate": 3.661183899056369e-06, "loss": 0.0766, "step": 34310 }, { "epoch": 16.059911069506203, "grad_norm": 0.5117729902267456, "learning_rate": 3.657177389212641e-06, "loss": 0.0813, "step": 34320 }, { "epoch": 16.064591621811374, "grad_norm": 0.6446205973625183, "learning_rate": 3.6531774284919274e-06, "loss": 0.067, "step": 34330 }, { "epoch": 16.069272174116545, "grad_norm": 0.5075494647026062, "learning_rate": 3.649184019489761e-06, "loss": 0.0774, "step": 34340 }, { "epoch": 16.07395272642172, "grad_norm": 0.5262860059738159, "learning_rate": 3.6451971647974236e-06, "loss": 0.0774, "step": 34350 }, { "epoch": 16.07863327872689, "grad_norm": 0.6113573312759399, "learning_rate": 3.641216867001938e-06, "loss": 0.0806, "step": 34360 }, { "epoch": 16.08331383103206, "grad_norm": 0.5215945243835449, "learning_rate": 3.637243128686076e-06, "loss": 0.0872, "step": 34370 }, { "epoch": 16.087994383337232, "grad_norm": 0.49026045203208923, "learning_rate": 3.633275952428357e-06, "loss": 0.0725, "step": 34380 }, { "epoch": 16.092674935642407, "grad_norm": 0.5500040650367737, "learning_rate": 3.6293153408030384e-06, "loss": 0.0765, "step": 34390 }, { "epoch": 16.097355487947578, "grad_norm": 0.5862386226654053, "learning_rate": 3.6253612963801148e-06, "loss": 0.0866, "step": 34400 }, { "epoch": 16.10203604025275, "grad_norm": 0.5224329233169556, "learning_rate": 3.621413821725323e-06, "loss": 0.0821, "step": 34410 }, { "epoch": 16.106716592557923, "grad_norm": 0.7183420658111572, "learning_rate": 3.6174729194001424e-06, "loss": 0.072, "step": 34420 }, { "epoch": 16.111397144863094, "grad_norm": 0.487926721572876, "learning_rate": 3.6135385919617696e-06, "loss": 0.0814, "step": 34430 }, { "epoch": 16.116077697168265, "grad_norm": 0.4660218060016632, "learning_rate": 3.609610841963161e-06, "loss": 0.0736, "step": 34440 }, { "epoch": 16.12075824947344, "grad_norm": 0.5217283964157104, "learning_rate": 3.605689671952983e-06, "loss": 0.0703, "step": 34450 }, { "epoch": 16.12543880177861, "grad_norm": 0.6379431486129761, "learning_rate": 3.6017750844756393e-06, "loss": 0.0888, "step": 34460 }, { "epoch": 16.13011935408378, "grad_norm": 0.5131996870040894, "learning_rate": 3.5978670820712686e-06, "loss": 0.0778, "step": 34470 }, { "epoch": 16.134799906388952, "grad_norm": 0.6939849257469177, "learning_rate": 3.5939656672757256e-06, "loss": 0.0733, "step": 34480 }, { "epoch": 16.139480458694127, "grad_norm": 0.5224313735961914, "learning_rate": 3.5900708426206008e-06, "loss": 0.0737, "step": 34490 }, { "epoch": 16.144161010999298, "grad_norm": 0.49960023164749146, "learning_rate": 3.5861826106332015e-06, "loss": 0.0773, "step": 34500 }, { "epoch": 16.14884156330447, "grad_norm": 0.6553646326065063, "learning_rate": 3.582300973836558e-06, "loss": 0.0985, "step": 34510 }, { "epoch": 16.153522115609643, "grad_norm": 0.6018531918525696, "learning_rate": 3.5784259347494227e-06, "loss": 0.0753, "step": 34520 }, { "epoch": 16.158202667914814, "grad_norm": 0.43574854731559753, "learning_rate": 3.5745574958862672e-06, "loss": 0.0728, "step": 34530 }, { "epoch": 16.162883220219985, "grad_norm": 0.4498015344142914, "learning_rate": 3.5706956597572826e-06, "loss": 0.071, "step": 34540 }, { "epoch": 16.167563772525156, "grad_norm": 0.3873811960220337, "learning_rate": 3.5668404288683655e-06, "loss": 0.0626, "step": 34550 }, { "epoch": 16.17224432483033, "grad_norm": 0.688093900680542, "learning_rate": 3.562991805721138e-06, "loss": 0.0793, "step": 34560 }, { "epoch": 16.1769248771355, "grad_norm": 0.6054037809371948, "learning_rate": 3.5591497928129324e-06, "loss": 0.0722, "step": 34570 }, { "epoch": 16.181605429440673, "grad_norm": 0.5471345782279968, "learning_rate": 3.5553143926367792e-06, "loss": 0.0933, "step": 34580 }, { "epoch": 16.186285981745847, "grad_norm": 0.6969952583312988, "learning_rate": 3.5514856076814403e-06, "loss": 0.066, "step": 34590 }, { "epoch": 16.190966534051018, "grad_norm": 0.4369712769985199, "learning_rate": 3.5476634404313645e-06, "loss": 0.0624, "step": 34600 }, { "epoch": 16.19564708635619, "grad_norm": 0.7440729737281799, "learning_rate": 3.543847893366717e-06, "loss": 0.0946, "step": 34610 }, { "epoch": 16.200327638661363, "grad_norm": 0.586849570274353, "learning_rate": 3.540038968963366e-06, "loss": 0.0663, "step": 34620 }, { "epoch": 16.205008190966534, "grad_norm": 0.6049122214317322, "learning_rate": 3.5362366696928785e-06, "loss": 0.0801, "step": 34630 }, { "epoch": 16.209688743271705, "grad_norm": 0.8172857761383057, "learning_rate": 3.5324409980225274e-06, "loss": 0.0803, "step": 34640 }, { "epoch": 16.214369295576876, "grad_norm": 0.4510122537612915, "learning_rate": 3.528651956415282e-06, "loss": 0.0679, "step": 34650 }, { "epoch": 16.21904984788205, "grad_norm": 0.5774341225624084, "learning_rate": 3.5248695473298135e-06, "loss": 0.0781, "step": 34660 }, { "epoch": 16.223730400187222, "grad_norm": 0.5956670045852661, "learning_rate": 3.5210937732204795e-06, "loss": 0.0782, "step": 34670 }, { "epoch": 16.228410952492393, "grad_norm": 0.5462504625320435, "learning_rate": 3.5173246365373435e-06, "loss": 0.0757, "step": 34680 }, { "epoch": 16.233091504797567, "grad_norm": 0.6007950305938721, "learning_rate": 3.5135621397261576e-06, "loss": 0.0734, "step": 34690 }, { "epoch": 16.237772057102738, "grad_norm": 0.7683466076850891, "learning_rate": 3.5098062852283614e-06, "loss": 0.0813, "step": 34700 }, { "epoch": 16.24245260940791, "grad_norm": 0.5457022786140442, "learning_rate": 3.506057075481091e-06, "loss": 0.0888, "step": 34710 }, { "epoch": 16.247133161713084, "grad_norm": 0.5432125926017761, "learning_rate": 3.5023145129171703e-06, "loss": 0.0641, "step": 34720 }, { "epoch": 16.251813714018255, "grad_norm": 0.856789231300354, "learning_rate": 3.4985785999651007e-06, "loss": 0.07, "step": 34730 }, { "epoch": 16.256494266323426, "grad_norm": 0.6499139666557312, "learning_rate": 3.494849339049082e-06, "loss": 0.0697, "step": 34740 }, { "epoch": 16.261174818628596, "grad_norm": 0.881419837474823, "learning_rate": 3.491126732588987e-06, "loss": 0.0846, "step": 34750 }, { "epoch": 16.26585537093377, "grad_norm": 0.5666645765304565, "learning_rate": 3.487410783000378e-06, "loss": 0.0853, "step": 34760 }, { "epoch": 16.270535923238942, "grad_norm": 0.5191040635108948, "learning_rate": 3.483701492694489e-06, "loss": 0.0724, "step": 34770 }, { "epoch": 16.275216475544113, "grad_norm": 0.6638495326042175, "learning_rate": 3.4799988640782466e-06, "loss": 0.0697, "step": 34780 }, { "epoch": 16.279897027849287, "grad_norm": 0.6312577724456787, "learning_rate": 3.476302899554241e-06, "loss": 0.0825, "step": 34790 }, { "epoch": 16.28457758015446, "grad_norm": 0.5358073115348816, "learning_rate": 3.47261360152074e-06, "loss": 0.0819, "step": 34800 }, { "epoch": 16.28925813245963, "grad_norm": 0.7113581299781799, "learning_rate": 3.4689309723716963e-06, "loss": 0.0727, "step": 34810 }, { "epoch": 16.293938684764804, "grad_norm": 0.4947599470615387, "learning_rate": 3.465255014496723e-06, "loss": 0.0843, "step": 34820 }, { "epoch": 16.298619237069975, "grad_norm": 0.690470278263092, "learning_rate": 3.461585730281111e-06, "loss": 0.0936, "step": 34830 }, { "epoch": 16.303299789375146, "grad_norm": 0.5177793502807617, "learning_rate": 3.457923122105821e-06, "loss": 0.0948, "step": 34840 }, { "epoch": 16.307980341680317, "grad_norm": 0.5123550891876221, "learning_rate": 3.4542671923474755e-06, "loss": 0.0692, "step": 34850 }, { "epoch": 16.31266089398549, "grad_norm": 0.5257759690284729, "learning_rate": 3.4506179433783696e-06, "loss": 0.0887, "step": 34860 }, { "epoch": 16.317341446290662, "grad_norm": 0.5962173938751221, "learning_rate": 3.4469753775664623e-06, "loss": 0.0729, "step": 34870 }, { "epoch": 16.322021998595833, "grad_norm": 0.6460064649581909, "learning_rate": 3.4433394972753752e-06, "loss": 0.0713, "step": 34880 }, { "epoch": 16.326702550901008, "grad_norm": 0.5489939451217651, "learning_rate": 3.439710304864388e-06, "loss": 0.0684, "step": 34890 }, { "epoch": 16.33138310320618, "grad_norm": 0.490231454372406, "learning_rate": 3.436087802688448e-06, "loss": 0.0892, "step": 34900 }, { "epoch": 16.33606365551135, "grad_norm": 0.6559504866600037, "learning_rate": 3.4324719930981577e-06, "loss": 0.0797, "step": 34910 }, { "epoch": 16.340744207816524, "grad_norm": 0.6812690496444702, "learning_rate": 3.4288628784397737e-06, "loss": 0.0798, "step": 34920 }, { "epoch": 16.345424760121695, "grad_norm": 0.41538119316101074, "learning_rate": 3.4252604610552122e-06, "loss": 0.0786, "step": 34930 }, { "epoch": 16.350105312426866, "grad_norm": 0.6007869243621826, "learning_rate": 3.4216647432820458e-06, "loss": 0.0735, "step": 34940 }, { "epoch": 16.354785864732037, "grad_norm": 0.59017014503479, "learning_rate": 3.418075727453493e-06, "loss": 0.081, "step": 34950 }, { "epoch": 16.35946641703721, "grad_norm": 0.6183105707168579, "learning_rate": 3.4144934158984275e-06, "loss": 0.063, "step": 34960 }, { "epoch": 16.364146969342382, "grad_norm": 0.6218346357345581, "learning_rate": 3.410917810941375e-06, "loss": 0.0857, "step": 34970 }, { "epoch": 16.368827521647553, "grad_norm": 0.7962931394577026, "learning_rate": 3.407348914902505e-06, "loss": 0.0796, "step": 34980 }, { "epoch": 16.373508073952728, "grad_norm": 0.5081734657287598, "learning_rate": 3.4037867300976356e-06, "loss": 0.0651, "step": 34990 }, { "epoch": 16.3781886262579, "grad_norm": 0.5473510026931763, "learning_rate": 3.400231258838228e-06, "loss": 0.0801, "step": 35000 }, { "epoch": 16.38286917856307, "grad_norm": 0.7410277128219604, "learning_rate": 3.396682503431394e-06, "loss": 0.0683, "step": 35010 }, { "epoch": 16.387549730868244, "grad_norm": 0.5897364616394043, "learning_rate": 3.393140466179874e-06, "loss": 0.0849, "step": 35020 }, { "epoch": 16.392230283173415, "grad_norm": 0.7324356436729431, "learning_rate": 3.389605149382069e-06, "loss": 0.0729, "step": 35030 }, { "epoch": 16.396910835478586, "grad_norm": 0.5234730839729309, "learning_rate": 3.386076555331997e-06, "loss": 0.0876, "step": 35040 }, { "epoch": 16.401591387783757, "grad_norm": 0.4750821888446808, "learning_rate": 3.3825546863193295e-06, "loss": 0.0691, "step": 35050 }, { "epoch": 16.40627194008893, "grad_norm": 0.6068017482757568, "learning_rate": 3.3790395446293705e-06, "loss": 0.0763, "step": 35060 }, { "epoch": 16.410952492394102, "grad_norm": 0.6275627613067627, "learning_rate": 3.3755311325430527e-06, "loss": 0.0751, "step": 35070 }, { "epoch": 16.415633044699273, "grad_norm": 0.477891743183136, "learning_rate": 3.3720294523369495e-06, "loss": 0.0676, "step": 35080 }, { "epoch": 16.420313597004448, "grad_norm": 0.6708658933639526, "learning_rate": 3.3685345062832627e-06, "loss": 0.0651, "step": 35090 }, { "epoch": 16.42499414930962, "grad_norm": 0.5231038331985474, "learning_rate": 3.3650462966498265e-06, "loss": 0.0752, "step": 35100 }, { "epoch": 16.42967470161479, "grad_norm": 0.5170145630836487, "learning_rate": 3.3615648257000976e-06, "loss": 0.0753, "step": 35110 }, { "epoch": 16.434355253919964, "grad_norm": 0.6355857253074646, "learning_rate": 3.3580900956931692e-06, "loss": 0.0707, "step": 35120 }, { "epoch": 16.439035806225135, "grad_norm": 0.5220704078674316, "learning_rate": 3.354622108883756e-06, "loss": 0.0783, "step": 35130 }, { "epoch": 16.443716358530306, "grad_norm": 0.6097375154495239, "learning_rate": 3.3511608675221935e-06, "loss": 0.0836, "step": 35140 }, { "epoch": 16.448396910835477, "grad_norm": 0.6472396850585938, "learning_rate": 3.347706373854445e-06, "loss": 0.074, "step": 35150 }, { "epoch": 16.45307746314065, "grad_norm": 0.6756008267402649, "learning_rate": 3.3442586301220964e-06, "loss": 0.0614, "step": 35160 }, { "epoch": 16.457758015445823, "grad_norm": 0.6138745546340942, "learning_rate": 3.340817638562348e-06, "loss": 0.0643, "step": 35170 }, { "epoch": 16.462438567750993, "grad_norm": 0.5622860193252563, "learning_rate": 3.3373834014080224e-06, "loss": 0.0672, "step": 35180 }, { "epoch": 16.467119120056168, "grad_norm": 0.6626020073890686, "learning_rate": 3.3339559208875596e-06, "loss": 0.0822, "step": 35190 }, { "epoch": 16.47179967236134, "grad_norm": 0.5109877586364746, "learning_rate": 3.3305351992250164e-06, "loss": 0.0843, "step": 35200 }, { "epoch": 16.47648022466651, "grad_norm": 0.5682936906814575, "learning_rate": 3.3271212386400575e-06, "loss": 0.0733, "step": 35210 }, { "epoch": 16.481160776971684, "grad_norm": 0.5735112428665161, "learning_rate": 3.323714041347968e-06, "loss": 0.0696, "step": 35220 }, { "epoch": 16.485841329276855, "grad_norm": 0.7104275822639465, "learning_rate": 3.3203136095596414e-06, "loss": 0.0704, "step": 35230 }, { "epoch": 16.490521881582026, "grad_norm": 0.581605076789856, "learning_rate": 3.316919945481578e-06, "loss": 0.0737, "step": 35240 }, { "epoch": 16.495202433887197, "grad_norm": 0.6420699954032898, "learning_rate": 3.313533051315893e-06, "loss": 0.0742, "step": 35250 }, { "epoch": 16.49988298619237, "grad_norm": 0.47065338492393494, "learning_rate": 3.3101529292603037e-06, "loss": 0.0678, "step": 35260 }, { "epoch": 16.504563538497543, "grad_norm": 0.4340178072452545, "learning_rate": 3.3067795815081357e-06, "loss": 0.076, "step": 35270 }, { "epoch": 16.509244090802714, "grad_norm": 0.6417252421379089, "learning_rate": 3.3034130102483166e-06, "loss": 0.081, "step": 35280 }, { "epoch": 16.513924643107888, "grad_norm": 0.687799870967865, "learning_rate": 3.3000532176653774e-06, "loss": 0.0783, "step": 35290 }, { "epoch": 16.51860519541306, "grad_norm": 0.8120457530021667, "learning_rate": 3.296700205939452e-06, "loss": 0.0825, "step": 35300 }, { "epoch": 16.52328574771823, "grad_norm": 0.5613884329795837, "learning_rate": 3.2933539772462718e-06, "loss": 0.069, "step": 35310 }, { "epoch": 16.5279663000234, "grad_norm": 0.5538510680198669, "learning_rate": 3.290014533757173e-06, "loss": 0.0661, "step": 35320 }, { "epoch": 16.532646852328575, "grad_norm": 0.6763821840286255, "learning_rate": 3.286681877639078e-06, "loss": 0.0749, "step": 35330 }, { "epoch": 16.537327404633746, "grad_norm": 0.5020249485969543, "learning_rate": 3.283356011054514e-06, "loss": 0.0827, "step": 35340 }, { "epoch": 16.542007956938917, "grad_norm": 0.588002622127533, "learning_rate": 3.2800369361616025e-06, "loss": 0.0759, "step": 35350 }, { "epoch": 16.546688509244092, "grad_norm": 0.4765065610408783, "learning_rate": 3.2767246551140497e-06, "loss": 0.0667, "step": 35360 }, { "epoch": 16.551369061549263, "grad_norm": 0.7561731934547424, "learning_rate": 3.2734191700611623e-06, "loss": 0.0714, "step": 35370 }, { "epoch": 16.556049613854434, "grad_norm": 0.5583257079124451, "learning_rate": 3.2701204831478336e-06, "loss": 0.0678, "step": 35380 }, { "epoch": 16.56073016615961, "grad_norm": 0.5681905150413513, "learning_rate": 3.2668285965145443e-06, "loss": 0.0779, "step": 35390 }, { "epoch": 16.56541071846478, "grad_norm": 0.6936033368110657, "learning_rate": 3.2635435122973637e-06, "loss": 0.0772, "step": 35400 }, { "epoch": 16.57009127076995, "grad_norm": 0.48411646485328674, "learning_rate": 3.2602652326279475e-06, "loss": 0.0774, "step": 35410 }, { "epoch": 16.574771823075125, "grad_norm": 0.5268313884735107, "learning_rate": 3.25699375963354e-06, "loss": 0.0664, "step": 35420 }, { "epoch": 16.579452375380296, "grad_norm": 0.5355003476142883, "learning_rate": 3.253729095436958e-06, "loss": 0.0792, "step": 35430 }, { "epoch": 16.584132927685467, "grad_norm": 0.6485666632652283, "learning_rate": 3.2504712421566093e-06, "loss": 0.0781, "step": 35440 }, { "epoch": 16.588813479990637, "grad_norm": 0.5983465313911438, "learning_rate": 3.247220201906482e-06, "loss": 0.0788, "step": 35450 }, { "epoch": 16.593494032295812, "grad_norm": 0.5692631602287292, "learning_rate": 3.2439759767961363e-06, "loss": 0.0724, "step": 35460 }, { "epoch": 16.598174584600983, "grad_norm": 0.5504335165023804, "learning_rate": 3.2407385689307184e-06, "loss": 0.0836, "step": 35470 }, { "epoch": 16.602855136906154, "grad_norm": 0.6451199650764465, "learning_rate": 3.237507980410946e-06, "loss": 0.0707, "step": 35480 }, { "epoch": 16.60753568921133, "grad_norm": 0.5062490105628967, "learning_rate": 3.234284213333111e-06, "loss": 0.073, "step": 35490 }, { "epoch": 16.6122162415165, "grad_norm": 0.8510181903839111, "learning_rate": 3.231067269789085e-06, "loss": 0.0703, "step": 35500 }, { "epoch": 16.61689679382167, "grad_norm": 0.5415949821472168, "learning_rate": 3.227857151866303e-06, "loss": 0.0688, "step": 35510 }, { "epoch": 16.62157734612684, "grad_norm": 0.6963567137718201, "learning_rate": 3.224653861647778e-06, "loss": 0.071, "step": 35520 }, { "epoch": 16.626257898432016, "grad_norm": 0.4475063979625702, "learning_rate": 3.22145740121209e-06, "loss": 0.0729, "step": 35530 }, { "epoch": 16.630938450737187, "grad_norm": 0.632883608341217, "learning_rate": 3.21826777263339e-06, "loss": 0.0829, "step": 35540 }, { "epoch": 16.635619003042358, "grad_norm": 0.5370528697967529, "learning_rate": 3.2150849779813884e-06, "loss": 0.0737, "step": 35550 }, { "epoch": 16.640299555347532, "grad_norm": 0.8704867959022522, "learning_rate": 3.21190901932137e-06, "loss": 0.079, "step": 35560 }, { "epoch": 16.644980107652703, "grad_norm": 0.6453092694282532, "learning_rate": 3.2087398987141796e-06, "loss": 0.0751, "step": 35570 }, { "epoch": 16.649660659957874, "grad_norm": 0.4791719317436218, "learning_rate": 3.205577618216221e-06, "loss": 0.0715, "step": 35580 }, { "epoch": 16.65434121226305, "grad_norm": 0.6329532861709595, "learning_rate": 3.2024221798794685e-06, "loss": 0.0871, "step": 35590 }, { "epoch": 16.65902176456822, "grad_norm": 0.7504395246505737, "learning_rate": 3.199273585751452e-06, "loss": 0.0768, "step": 35600 }, { "epoch": 16.66370231687339, "grad_norm": 0.5295385122299194, "learning_rate": 3.1961318378752556e-06, "loss": 0.0685, "step": 35610 }, { "epoch": 16.66838286917856, "grad_norm": 0.4993860125541687, "learning_rate": 3.1929969382895276e-06, "loss": 0.0876, "step": 35620 }, { "epoch": 16.673063421483736, "grad_norm": 0.7525779008865356, "learning_rate": 3.189868889028471e-06, "loss": 0.0789, "step": 35630 }, { "epoch": 16.677743973788907, "grad_norm": 0.4965711534023285, "learning_rate": 3.186747692121844e-06, "loss": 0.066, "step": 35640 }, { "epoch": 16.682424526094078, "grad_norm": 0.5417676568031311, "learning_rate": 3.1836333495949543e-06, "loss": 0.068, "step": 35650 }, { "epoch": 16.687105078399252, "grad_norm": 0.7152099013328552, "learning_rate": 3.1805258634686678e-06, "loss": 0.094, "step": 35660 }, { "epoch": 16.691785630704423, "grad_norm": 0.5146581530570984, "learning_rate": 3.1774252357593972e-06, "loss": 0.0691, "step": 35670 }, { "epoch": 16.696466183009594, "grad_norm": 0.6035513281822205, "learning_rate": 3.174331468479104e-06, "loss": 0.0808, "step": 35680 }, { "epoch": 16.70114673531477, "grad_norm": 0.5196797251701355, "learning_rate": 3.1712445636353067e-06, "loss": 0.0729, "step": 35690 }, { "epoch": 16.70582728761994, "grad_norm": 0.5126620531082153, "learning_rate": 3.1681645232310576e-06, "loss": 0.0708, "step": 35700 }, { "epoch": 16.71050783992511, "grad_norm": 0.6478649377822876, "learning_rate": 3.165091349264964e-06, "loss": 0.0666, "step": 35710 }, { "epoch": 16.71518839223028, "grad_norm": 0.4938817620277405, "learning_rate": 3.162025043731176e-06, "loss": 0.0699, "step": 35720 }, { "epoch": 16.719868944535456, "grad_norm": 0.758160412311554, "learning_rate": 3.158965608619385e-06, "loss": 0.082, "step": 35730 }, { "epoch": 16.724549496840627, "grad_norm": 0.5288947820663452, "learning_rate": 3.155913045914824e-06, "loss": 0.0783, "step": 35740 }, { "epoch": 16.729230049145798, "grad_norm": 0.6480432748794556, "learning_rate": 3.152867357598271e-06, "loss": 0.0887, "step": 35750 }, { "epoch": 16.733910601450972, "grad_norm": 0.4835122525691986, "learning_rate": 3.149828545646038e-06, "loss": 0.0737, "step": 35760 }, { "epoch": 16.738591153756143, "grad_norm": 1.0510447025299072, "learning_rate": 3.1467966120299766e-06, "loss": 0.0716, "step": 35770 }, { "epoch": 16.743271706061314, "grad_norm": 0.6913727521896362, "learning_rate": 3.143771558717476e-06, "loss": 0.0799, "step": 35780 }, { "epoch": 16.74795225836649, "grad_norm": 0.4967568814754486, "learning_rate": 3.140753387671462e-06, "loss": 0.0708, "step": 35790 }, { "epoch": 16.75263281067166, "grad_norm": 0.673855185508728, "learning_rate": 3.1377421008503896e-06, "loss": 0.0801, "step": 35800 }, { "epoch": 16.75731336297683, "grad_norm": 0.4626782238483429, "learning_rate": 3.1347377002082547e-06, "loss": 0.0671, "step": 35810 }, { "epoch": 16.761993915282, "grad_norm": 0.6024160385131836, "learning_rate": 3.131740187694578e-06, "loss": 0.0656, "step": 35820 }, { "epoch": 16.766674467587176, "grad_norm": 0.6089556217193604, "learning_rate": 3.128749565254411e-06, "loss": 0.0815, "step": 35830 }, { "epoch": 16.771355019892347, "grad_norm": 0.5525732636451721, "learning_rate": 3.125765834828339e-06, "loss": 0.061, "step": 35840 }, { "epoch": 16.776035572197518, "grad_norm": 0.6542173027992249, "learning_rate": 3.1227889983524704e-06, "loss": 0.0754, "step": 35850 }, { "epoch": 16.780716124502693, "grad_norm": 0.5246800184249878, "learning_rate": 3.119819057758443e-06, "loss": 0.0819, "step": 35860 }, { "epoch": 16.785396676807864, "grad_norm": 0.6351134777069092, "learning_rate": 3.1168560149734205e-06, "loss": 0.0657, "step": 35870 }, { "epoch": 16.790077229113034, "grad_norm": 0.4932760000228882, "learning_rate": 3.113899871920085e-06, "loss": 0.0766, "step": 35880 }, { "epoch": 16.79475778141821, "grad_norm": 0.5607795119285583, "learning_rate": 3.1109506305166484e-06, "loss": 0.0756, "step": 35890 }, { "epoch": 16.79943833372338, "grad_norm": 0.5417966842651367, "learning_rate": 3.1080082926768417e-06, "loss": 0.0793, "step": 35900 }, { "epoch": 16.80411888602855, "grad_norm": 0.8662285804748535, "learning_rate": 3.105072860309916e-06, "loss": 0.0732, "step": 35910 }, { "epoch": 16.808799438333722, "grad_norm": 0.8992531895637512, "learning_rate": 3.1021443353206404e-06, "loss": 0.073, "step": 35920 }, { "epoch": 16.813479990638896, "grad_norm": 0.7373815178871155, "learning_rate": 3.099222719609303e-06, "loss": 0.0792, "step": 35930 }, { "epoch": 16.818160542944067, "grad_norm": 0.5701608657836914, "learning_rate": 3.096308015071711e-06, "loss": 0.0767, "step": 35940 }, { "epoch": 16.822841095249238, "grad_norm": 0.6606470346450806, "learning_rate": 3.09340022359918e-06, "loss": 0.0584, "step": 35950 }, { "epoch": 16.827521647554413, "grad_norm": 0.6170228123664856, "learning_rate": 3.090499347078549e-06, "loss": 0.0849, "step": 35960 }, { "epoch": 16.832202199859584, "grad_norm": 0.5383720397949219, "learning_rate": 3.0876053873921627e-06, "loss": 0.0711, "step": 35970 }, { "epoch": 16.836882752164755, "grad_norm": 0.6720550656318665, "learning_rate": 3.0847183464178788e-06, "loss": 0.0783, "step": 35980 }, { "epoch": 16.84156330446993, "grad_norm": 0.5656610727310181, "learning_rate": 3.0818382260290696e-06, "loss": 0.0762, "step": 35990 }, { "epoch": 16.8462438567751, "grad_norm": 0.5986707210540771, "learning_rate": 3.078965028094614e-06, "loss": 0.0859, "step": 36000 }, { "epoch": 16.85092440908027, "grad_norm": 0.511722207069397, "learning_rate": 3.0760987544788976e-06, "loss": 0.0638, "step": 36010 }, { "epoch": 16.855604961385442, "grad_norm": 0.5037940144538879, "learning_rate": 3.073239407041812e-06, "loss": 0.0734, "step": 36020 }, { "epoch": 16.860285513690616, "grad_norm": 0.6618945598602295, "learning_rate": 3.0703869876387613e-06, "loss": 0.0754, "step": 36030 }, { "epoch": 16.864966065995787, "grad_norm": 0.768492579460144, "learning_rate": 3.06754149812065e-06, "loss": 0.0778, "step": 36040 }, { "epoch": 16.86964661830096, "grad_norm": 0.5012738108634949, "learning_rate": 3.064702940333878e-06, "loss": 0.061, "step": 36050 }, { "epoch": 16.874327170606133, "grad_norm": 0.5704278945922852, "learning_rate": 3.0618713161203625e-06, "loss": 0.0709, "step": 36060 }, { "epoch": 16.879007722911304, "grad_norm": 0.47967007756233215, "learning_rate": 3.0590466273175083e-06, "loss": 0.0735, "step": 36070 }, { "epoch": 16.883688275216475, "grad_norm": 0.7131226062774658, "learning_rate": 3.056228875758229e-06, "loss": 0.0831, "step": 36080 }, { "epoch": 16.88836882752165, "grad_norm": 0.5426537990570068, "learning_rate": 3.0534180632709326e-06, "loss": 0.0634, "step": 36090 }, { "epoch": 16.89304937982682, "grad_norm": 0.5588693022727966, "learning_rate": 3.0506141916795233e-06, "loss": 0.0809, "step": 36100 }, { "epoch": 16.89772993213199, "grad_norm": 0.3958870470523834, "learning_rate": 3.0478172628034023e-06, "loss": 0.0727, "step": 36110 }, { "epoch": 16.902410484437162, "grad_norm": 0.6032480001449585, "learning_rate": 3.045027278457468e-06, "loss": 0.0809, "step": 36120 }, { "epoch": 16.907091036742337, "grad_norm": 0.5784974098205566, "learning_rate": 3.042244240452112e-06, "loss": 0.0691, "step": 36130 }, { "epoch": 16.911771589047508, "grad_norm": 0.5555382370948792, "learning_rate": 3.0394681505932146e-06, "loss": 0.081, "step": 36140 }, { "epoch": 16.91645214135268, "grad_norm": 0.6074702739715576, "learning_rate": 3.03669901068215e-06, "loss": 0.0726, "step": 36150 }, { "epoch": 16.921132693657853, "grad_norm": 0.9187952280044556, "learning_rate": 3.0339368225157887e-06, "loss": 0.0795, "step": 36160 }, { "epoch": 16.925813245963024, "grad_norm": 0.5046342015266418, "learning_rate": 3.031181587886478e-06, "loss": 0.0598, "step": 36170 }, { "epoch": 16.930493798268195, "grad_norm": 0.4255123436450958, "learning_rate": 3.028433308582062e-06, "loss": 0.0721, "step": 36180 }, { "epoch": 16.93517435057337, "grad_norm": 0.5786041617393494, "learning_rate": 3.0256919863858723e-06, "loss": 0.0748, "step": 36190 }, { "epoch": 16.93985490287854, "grad_norm": 0.6090528964996338, "learning_rate": 3.0229576230767177e-06, "loss": 0.0778, "step": 36200 }, { "epoch": 16.94453545518371, "grad_norm": 0.551059365272522, "learning_rate": 3.0202302204289005e-06, "loss": 0.0718, "step": 36210 }, { "epoch": 16.949216007488882, "grad_norm": 0.6087028980255127, "learning_rate": 3.0175097802122006e-06, "loss": 0.087, "step": 36220 }, { "epoch": 16.953896559794057, "grad_norm": 0.5782570838928223, "learning_rate": 3.014796304191886e-06, "loss": 0.0874, "step": 36230 }, { "epoch": 16.958577112099228, "grad_norm": 0.5563032627105713, "learning_rate": 3.012089794128696e-06, "loss": 0.0699, "step": 36240 }, { "epoch": 16.9632576644044, "grad_norm": 0.6283013224601746, "learning_rate": 3.0093902517788623e-06, "loss": 0.0686, "step": 36250 }, { "epoch": 16.967938216709573, "grad_norm": 0.6210262775421143, "learning_rate": 3.006697678894085e-06, "loss": 0.0815, "step": 36260 }, { "epoch": 16.972618769014744, "grad_norm": 0.7785419225692749, "learning_rate": 3.0040120772215415e-06, "loss": 0.067, "step": 36270 }, { "epoch": 16.977299321319915, "grad_norm": 0.7924624085426331, "learning_rate": 3.0013334485038975e-06, "loss": 0.0914, "step": 36280 }, { "epoch": 16.981979873625086, "grad_norm": 0.5829755067825317, "learning_rate": 2.9986617944792803e-06, "loss": 0.0928, "step": 36290 }, { "epoch": 16.98666042593026, "grad_norm": 0.630313515663147, "learning_rate": 2.9959971168812983e-06, "loss": 0.0728, "step": 36300 }, { "epoch": 16.99134097823543, "grad_norm": 0.4998546540737152, "learning_rate": 2.993339417439035e-06, "loss": 0.0734, "step": 36310 }, { "epoch": 16.996021530540602, "grad_norm": 0.6127678751945496, "learning_rate": 2.9906886978770387e-06, "loss": 0.0876, "step": 36320 }, { "epoch": 17.000468055230517, "grad_norm": 0.4651862382888794, "learning_rate": 2.9880449599153345e-06, "loss": 0.0669, "step": 36330 }, { "epoch": 17.005148607535688, "grad_norm": 0.4602208137512207, "learning_rate": 2.9854082052694143e-06, "loss": 0.0665, "step": 36340 }, { "epoch": 17.009829159840862, "grad_norm": 0.5665925145149231, "learning_rate": 2.982778435650242e-06, "loss": 0.0731, "step": 36350 }, { "epoch": 17.014509712146033, "grad_norm": 0.41594135761260986, "learning_rate": 2.980155652764244e-06, "loss": 0.068, "step": 36360 }, { "epoch": 17.019190264451204, "grad_norm": 0.6028298735618591, "learning_rate": 2.977539858313316e-06, "loss": 0.062, "step": 36370 }, { "epoch": 17.02387081675638, "grad_norm": 0.6440853476524353, "learning_rate": 2.9749310539948194e-06, "loss": 0.0665, "step": 36380 }, { "epoch": 17.02855136906155, "grad_norm": 0.5835644602775574, "learning_rate": 2.972329241501578e-06, "loss": 0.0689, "step": 36390 }, { "epoch": 17.03323192136672, "grad_norm": 0.6058840751647949, "learning_rate": 2.9697344225218806e-06, "loss": 0.0774, "step": 36400 }, { "epoch": 17.037912473671895, "grad_norm": 0.5590217113494873, "learning_rate": 2.9671465987394773e-06, "loss": 0.0681, "step": 36410 }, { "epoch": 17.042593025977066, "grad_norm": 0.5623674988746643, "learning_rate": 2.964565771833578e-06, "loss": 0.0731, "step": 36420 }, { "epoch": 17.047273578282237, "grad_norm": 0.8112977147102356, "learning_rate": 2.9619919434788526e-06, "loss": 0.0634, "step": 36430 }, { "epoch": 17.051954130587408, "grad_norm": 0.4669482111930847, "learning_rate": 2.959425115345432e-06, "loss": 0.0758, "step": 36440 }, { "epoch": 17.056634682892582, "grad_norm": 1.1982847452163696, "learning_rate": 2.956865289098904e-06, "loss": 0.0699, "step": 36450 }, { "epoch": 17.061315235197753, "grad_norm": 0.7937114834785461, "learning_rate": 2.9543124664003086e-06, "loss": 0.0668, "step": 36460 }, { "epoch": 17.065995787502924, "grad_norm": 0.743135392665863, "learning_rate": 2.951766648906147e-06, "loss": 0.0701, "step": 36470 }, { "epoch": 17.0706763398081, "grad_norm": 0.6033019423484802, "learning_rate": 2.9492278382683735e-06, "loss": 0.078, "step": 36480 }, { "epoch": 17.07535689211327, "grad_norm": 0.6794872879981995, "learning_rate": 2.9466960361343908e-06, "loss": 0.0691, "step": 36490 }, { "epoch": 17.08003744441844, "grad_norm": 0.4842578172683716, "learning_rate": 2.9441712441470627e-06, "loss": 0.0528, "step": 36500 }, { "epoch": 17.084717996723615, "grad_norm": 0.5299753546714783, "learning_rate": 2.9416534639446954e-06, "loss": 0.0786, "step": 36510 }, { "epoch": 17.089398549028786, "grad_norm": 0.46484774351119995, "learning_rate": 2.939142697161052e-06, "loss": 0.0593, "step": 36520 }, { "epoch": 17.094079101333957, "grad_norm": 0.5757821202278137, "learning_rate": 2.9366389454253415e-06, "loss": 0.0664, "step": 36530 }, { "epoch": 17.098759653639128, "grad_norm": 0.4901011884212494, "learning_rate": 2.934142210362218e-06, "loss": 0.0707, "step": 36540 }, { "epoch": 17.103440205944302, "grad_norm": 0.5088853240013123, "learning_rate": 2.9316524935917895e-06, "loss": 0.0604, "step": 36550 }, { "epoch": 17.108120758249473, "grad_norm": 0.6796884536743164, "learning_rate": 2.9291697967296045e-06, "loss": 0.0678, "step": 36560 }, { "epoch": 17.112801310554644, "grad_norm": 0.6007181406021118, "learning_rate": 2.9266941213866596e-06, "loss": 0.0597, "step": 36570 }, { "epoch": 17.11748186285982, "grad_norm": 0.5678294897079468, "learning_rate": 2.92422546916939e-06, "loss": 0.0617, "step": 36580 }, { "epoch": 17.12216241516499, "grad_norm": 0.5583005547523499, "learning_rate": 2.921763841679682e-06, "loss": 0.0657, "step": 36590 }, { "epoch": 17.12684296747016, "grad_norm": 0.7293022871017456, "learning_rate": 2.9193092405148552e-06, "loss": 0.0799, "step": 36600 }, { "epoch": 17.131523519775335, "grad_norm": 0.6330510973930359, "learning_rate": 2.9168616672676755e-06, "loss": 0.0742, "step": 36610 }, { "epoch": 17.136204072080506, "grad_norm": 0.5865249633789062, "learning_rate": 2.9144211235263468e-06, "loss": 0.0691, "step": 36620 }, { "epoch": 17.140884624385677, "grad_norm": 0.50995272397995, "learning_rate": 2.911987610874511e-06, "loss": 0.0487, "step": 36630 }, { "epoch": 17.145565176690848, "grad_norm": 0.4374971091747284, "learning_rate": 2.9095611308912467e-06, "loss": 0.0745, "step": 36640 }, { "epoch": 17.150245728996023, "grad_norm": 0.673105776309967, "learning_rate": 2.9071416851510717e-06, "loss": 0.0654, "step": 36650 }, { "epoch": 17.154926281301194, "grad_norm": 0.6433870792388916, "learning_rate": 2.9047292752239376e-06, "loss": 0.0689, "step": 36660 }, { "epoch": 17.159606833606365, "grad_norm": 0.49489620327949524, "learning_rate": 2.9023239026752333e-06, "loss": 0.0725, "step": 36670 }, { "epoch": 17.16428738591154, "grad_norm": 0.5947558879852295, "learning_rate": 2.8999255690657758e-06, "loss": 0.0757, "step": 36680 }, { "epoch": 17.16896793821671, "grad_norm": 0.5857840776443481, "learning_rate": 2.8975342759518183e-06, "loss": 0.0713, "step": 36690 }, { "epoch": 17.17364849052188, "grad_norm": 0.5473451614379883, "learning_rate": 2.8951500248850476e-06, "loss": 0.0743, "step": 36700 }, { "epoch": 17.178329042827052, "grad_norm": 0.6205880641937256, "learning_rate": 2.8927728174125737e-06, "loss": 0.0555, "step": 36710 }, { "epoch": 17.183009595132226, "grad_norm": 0.682894766330719, "learning_rate": 2.890402655076946e-06, "loss": 0.0821, "step": 36720 }, { "epoch": 17.187690147437397, "grad_norm": 0.6666727066040039, "learning_rate": 2.888039539416133e-06, "loss": 0.0705, "step": 36730 }, { "epoch": 17.19237069974257, "grad_norm": 0.658267080783844, "learning_rate": 2.8856834719635356e-06, "loss": 0.0717, "step": 36740 }, { "epoch": 17.197051252047743, "grad_norm": 0.438813716173172, "learning_rate": 2.883334454247983e-06, "loss": 0.0703, "step": 36750 }, { "epoch": 17.201731804352914, "grad_norm": 0.40220367908477783, "learning_rate": 2.880992487793724e-06, "loss": 0.0523, "step": 36760 }, { "epoch": 17.206412356658085, "grad_norm": 0.6542040705680847, "learning_rate": 2.8786575741204363e-06, "loss": 0.0793, "step": 36770 }, { "epoch": 17.21109290896326, "grad_norm": 0.5924660563468933, "learning_rate": 2.876329714743221e-06, "loss": 0.0689, "step": 36780 }, { "epoch": 17.21577346126843, "grad_norm": 0.6742114424705505, "learning_rate": 2.8740089111725997e-06, "loss": 0.0751, "step": 36790 }, { "epoch": 17.2204540135736, "grad_norm": 0.6200509071350098, "learning_rate": 2.8716951649145175e-06, "loss": 0.0641, "step": 36800 }, { "epoch": 17.225134565878772, "grad_norm": 0.4528730809688568, "learning_rate": 2.8693884774703384e-06, "loss": 0.0687, "step": 36810 }, { "epoch": 17.229815118183947, "grad_norm": 0.6418483257293701, "learning_rate": 2.867088850336848e-06, "loss": 0.0741, "step": 36820 }, { "epoch": 17.234495670489117, "grad_norm": 0.46885496377944946, "learning_rate": 2.864796285006247e-06, "loss": 0.0652, "step": 36830 }, { "epoch": 17.23917622279429, "grad_norm": 0.5795496106147766, "learning_rate": 2.862510782966158e-06, "loss": 0.057, "step": 36840 }, { "epoch": 17.243856775099463, "grad_norm": 0.6560617685317993, "learning_rate": 2.8602323456996196e-06, "loss": 0.0574, "step": 36850 }, { "epoch": 17.248537327404634, "grad_norm": 0.5780478715896606, "learning_rate": 2.8579609746850816e-06, "loss": 0.056, "step": 36860 }, { "epoch": 17.253217879709805, "grad_norm": 0.6253287196159363, "learning_rate": 2.8556966713964147e-06, "loss": 0.0655, "step": 36870 }, { "epoch": 17.25789843201498, "grad_norm": 0.4910690188407898, "learning_rate": 2.8534394373029006e-06, "loss": 0.0739, "step": 36880 }, { "epoch": 17.26257898432015, "grad_norm": 0.6813194155693054, "learning_rate": 2.851189273869233e-06, "loss": 0.0632, "step": 36890 }, { "epoch": 17.26725953662532, "grad_norm": 0.5386050939559937, "learning_rate": 2.848946182555519e-06, "loss": 0.061, "step": 36900 }, { "epoch": 17.271940088930492, "grad_norm": 0.5475113391876221, "learning_rate": 2.8467101648172752e-06, "loss": 0.06, "step": 36910 }, { "epoch": 17.276620641235667, "grad_norm": 0.5303221940994263, "learning_rate": 2.844481222105432e-06, "loss": 0.0673, "step": 36920 }, { "epoch": 17.281301193540838, "grad_norm": 0.8639215230941772, "learning_rate": 2.842259355866322e-06, "loss": 0.0641, "step": 36930 }, { "epoch": 17.28598174584601, "grad_norm": 0.7511585354804993, "learning_rate": 2.8400445675416944e-06, "loss": 0.0628, "step": 36940 }, { "epoch": 17.290662298151183, "grad_norm": 0.45909497141838074, "learning_rate": 2.8378368585686973e-06, "loss": 0.0684, "step": 36950 }, { "epoch": 17.295342850456354, "grad_norm": 0.5194634795188904, "learning_rate": 2.8356362303798917e-06, "loss": 0.0823, "step": 36960 }, { "epoch": 17.300023402761525, "grad_norm": 0.4453554153442383, "learning_rate": 2.8334426844032416e-06, "loss": 0.0741, "step": 36970 }, { "epoch": 17.3047039550667, "grad_norm": 0.4571787714958191, "learning_rate": 2.831256222062114e-06, "loss": 0.0762, "step": 36980 }, { "epoch": 17.30938450737187, "grad_norm": 0.7056688070297241, "learning_rate": 2.8290768447752804e-06, "loss": 0.0659, "step": 36990 }, { "epoch": 17.31406505967704, "grad_norm": 0.670795202255249, "learning_rate": 2.826904553956915e-06, "loss": 0.0735, "step": 37000 }, { "epoch": 17.318745611982212, "grad_norm": 0.48069655895233154, "learning_rate": 2.8247393510165954e-06, "loss": 0.069, "step": 37010 }, { "epoch": 17.323426164287387, "grad_norm": 0.6590391397476196, "learning_rate": 2.8225812373592956e-06, "loss": 0.0691, "step": 37020 }, { "epoch": 17.328106716592558, "grad_norm": 0.543427050113678, "learning_rate": 2.8204302143853943e-06, "loss": 0.0744, "step": 37030 }, { "epoch": 17.33278726889773, "grad_norm": 0.7989992499351501, "learning_rate": 2.8182862834906676e-06, "loss": 0.0618, "step": 37040 }, { "epoch": 17.337467821202903, "grad_norm": 0.6526657938957214, "learning_rate": 2.816149446066284e-06, "loss": 0.0786, "step": 37050 }, { "epoch": 17.342148373508074, "grad_norm": 0.540520429611206, "learning_rate": 2.814019703498821e-06, "loss": 0.0694, "step": 37060 }, { "epoch": 17.346828925813245, "grad_norm": 0.5566214323043823, "learning_rate": 2.811897057170243e-06, "loss": 0.0594, "step": 37070 }, { "epoch": 17.35150947811842, "grad_norm": 0.7721787095069885, "learning_rate": 2.8097815084579077e-06, "loss": 0.0826, "step": 37080 }, { "epoch": 17.35619003042359, "grad_norm": 0.531122088432312, "learning_rate": 2.807673058734578e-06, "loss": 0.0643, "step": 37090 }, { "epoch": 17.36087058272876, "grad_norm": 0.7363479137420654, "learning_rate": 2.8055717093684005e-06, "loss": 0.0685, "step": 37100 }, { "epoch": 17.365551135033932, "grad_norm": 0.5215363502502441, "learning_rate": 2.8034774617229183e-06, "loss": 0.0723, "step": 37110 }, { "epoch": 17.370231687339107, "grad_norm": 0.8858129978179932, "learning_rate": 2.8013903171570673e-06, "loss": 0.076, "step": 37120 }, { "epoch": 17.374912239644278, "grad_norm": 0.6882590055465698, "learning_rate": 2.7993102770251725e-06, "loss": 0.0665, "step": 37130 }, { "epoch": 17.37959279194945, "grad_norm": 0.5559398531913757, "learning_rate": 2.797237342676947e-06, "loss": 0.0738, "step": 37140 }, { "epoch": 17.384273344254623, "grad_norm": 0.5877988338470459, "learning_rate": 2.7951715154574986e-06, "loss": 0.0605, "step": 37150 }, { "epoch": 17.388953896559794, "grad_norm": 0.4393477737903595, "learning_rate": 2.7931127967073193e-06, "loss": 0.0638, "step": 37160 }, { "epoch": 17.393634448864965, "grad_norm": 0.489238440990448, "learning_rate": 2.7910611877622888e-06, "loss": 0.0728, "step": 37170 }, { "epoch": 17.39831500117014, "grad_norm": 0.5528594851493835, "learning_rate": 2.789016689953674e-06, "loss": 0.0698, "step": 37180 }, { "epoch": 17.40299555347531, "grad_norm": 0.5005813241004944, "learning_rate": 2.7869793046081295e-06, "loss": 0.0596, "step": 37190 }, { "epoch": 17.40767610578048, "grad_norm": 0.5968608856201172, "learning_rate": 2.7849490330476876e-06, "loss": 0.0631, "step": 37200 }, { "epoch": 17.412356658085653, "grad_norm": 0.5466770529747009, "learning_rate": 2.7829258765897724e-06, "loss": 0.054, "step": 37210 }, { "epoch": 17.417037210390827, "grad_norm": 0.61137455701828, "learning_rate": 2.780909836547189e-06, "loss": 0.0842, "step": 37220 }, { "epoch": 17.421717762695998, "grad_norm": 0.6821499466896057, "learning_rate": 2.7789009142281232e-06, "loss": 0.0631, "step": 37230 }, { "epoch": 17.42639831500117, "grad_norm": 0.7695760726928711, "learning_rate": 2.776899110936142e-06, "loss": 0.0588, "step": 37240 }, { "epoch": 17.431078867306343, "grad_norm": 0.43964117765426636, "learning_rate": 2.7749044279701945e-06, "loss": 0.0669, "step": 37250 }, { "epoch": 17.435759419611514, "grad_norm": 0.7690484523773193, "learning_rate": 2.772916866624609e-06, "loss": 0.0591, "step": 37260 }, { "epoch": 17.440439971916685, "grad_norm": 0.5139245986938477, "learning_rate": 2.7709364281890917e-06, "loss": 0.0594, "step": 37270 }, { "epoch": 17.44512052422186, "grad_norm": 0.5875255465507507, "learning_rate": 2.7689631139487294e-06, "loss": 0.0843, "step": 37280 }, { "epoch": 17.44980107652703, "grad_norm": 0.6904497742652893, "learning_rate": 2.766996925183983e-06, "loss": 0.071, "step": 37290 }, { "epoch": 17.454481628832202, "grad_norm": 0.7082509994506836, "learning_rate": 2.765037863170689e-06, "loss": 0.0674, "step": 37300 }, { "epoch": 17.459162181137373, "grad_norm": 0.5739540457725525, "learning_rate": 2.763085929180066e-06, "loss": 0.067, "step": 37310 }, { "epoch": 17.463842733442547, "grad_norm": 0.8478131294250488, "learning_rate": 2.7611411244786984e-06, "loss": 0.07, "step": 37320 }, { "epoch": 17.468523285747718, "grad_norm": 0.674866259098053, "learning_rate": 2.759203450328553e-06, "loss": 0.0741, "step": 37330 }, { "epoch": 17.47320383805289, "grad_norm": 0.6708934307098389, "learning_rate": 2.7572729079869614e-06, "loss": 0.0618, "step": 37340 }, { "epoch": 17.477884390358064, "grad_norm": 0.6035485863685608, "learning_rate": 2.7553494987066337e-06, "loss": 0.076, "step": 37350 }, { "epoch": 17.482564942663235, "grad_norm": 0.47988471388816833, "learning_rate": 2.753433223735649e-06, "loss": 0.0632, "step": 37360 }, { "epoch": 17.487245494968406, "grad_norm": 0.5785692930221558, "learning_rate": 2.751524084317458e-06, "loss": 0.0728, "step": 37370 }, { "epoch": 17.49192604727358, "grad_norm": 0.6357448697090149, "learning_rate": 2.7496220816908814e-06, "loss": 0.0679, "step": 37380 }, { "epoch": 17.49660659957875, "grad_norm": 0.49190637469291687, "learning_rate": 2.7477272170901047e-06, "loss": 0.0584, "step": 37390 }, { "epoch": 17.501287151883922, "grad_norm": 0.481143593788147, "learning_rate": 2.745839491744686e-06, "loss": 0.069, "step": 37400 }, { "epoch": 17.505967704189093, "grad_norm": 0.6899678111076355, "learning_rate": 2.7439589068795536e-06, "loss": 0.0764, "step": 37410 }, { "epoch": 17.510648256494267, "grad_norm": 0.5039841532707214, "learning_rate": 2.742085463714993e-06, "loss": 0.0723, "step": 37420 }, { "epoch": 17.51532880879944, "grad_norm": 0.5780612826347351, "learning_rate": 2.740219163466665e-06, "loss": 0.0638, "step": 37430 }, { "epoch": 17.52000936110461, "grad_norm": 0.531646728515625, "learning_rate": 2.7383600073455905e-06, "loss": 0.0696, "step": 37440 }, { "epoch": 17.524689913409784, "grad_norm": 0.5138718485832214, "learning_rate": 2.7365079965581555e-06, "loss": 0.0664, "step": 37450 }, { "epoch": 17.529370465714955, "grad_norm": 0.5090799331665039, "learning_rate": 2.7346631323061087e-06, "loss": 0.0643, "step": 37460 }, { "epoch": 17.534051018020126, "grad_norm": 0.4375087022781372, "learning_rate": 2.7328254157865644e-06, "loss": 0.0714, "step": 37470 }, { "epoch": 17.538731570325297, "grad_norm": 0.493470162153244, "learning_rate": 2.730994848191996e-06, "loss": 0.0576, "step": 37480 }, { "epoch": 17.54341212263047, "grad_norm": 0.5472561120986938, "learning_rate": 2.729171430710238e-06, "loss": 0.0733, "step": 37490 }, { "epoch": 17.548092674935642, "grad_norm": 0.6153830885887146, "learning_rate": 2.727355164524489e-06, "loss": 0.0775, "step": 37500 }, { "epoch": 17.552773227240813, "grad_norm": 0.7687682509422302, "learning_rate": 2.7255460508133024e-06, "loss": 0.0668, "step": 37510 }, { "epoch": 17.557453779545988, "grad_norm": 0.37536385655403137, "learning_rate": 2.72374409075059e-06, "loss": 0.0701, "step": 37520 }, { "epoch": 17.56213433185116, "grad_norm": 0.4751923084259033, "learning_rate": 2.7219492855056305e-06, "loss": 0.0638, "step": 37530 }, { "epoch": 17.56681488415633, "grad_norm": 0.47298774123191833, "learning_rate": 2.7201616362430476e-06, "loss": 0.0627, "step": 37540 }, { "epoch": 17.571495436461504, "grad_norm": 0.593508243560791, "learning_rate": 2.7183811441228302e-06, "loss": 0.0741, "step": 37550 }, { "epoch": 17.576175988766675, "grad_norm": 0.5963104367256165, "learning_rate": 2.7166078103003195e-06, "loss": 0.0655, "step": 37560 }, { "epoch": 17.580856541071846, "grad_norm": 0.5192373394966125, "learning_rate": 2.714841635926213e-06, "loss": 0.0739, "step": 37570 }, { "epoch": 17.58553709337702, "grad_norm": 0.5129625201225281, "learning_rate": 2.7130826221465627e-06, "loss": 0.0712, "step": 37580 }, { "epoch": 17.59021764568219, "grad_norm": 0.5210607647895813, "learning_rate": 2.711330770102771e-06, "loss": 0.0779, "step": 37590 }, { "epoch": 17.594898197987362, "grad_norm": 0.7261485457420349, "learning_rate": 2.7095860809315988e-06, "loss": 0.0656, "step": 37600 }, { "epoch": 17.599578750292533, "grad_norm": 0.719581663608551, "learning_rate": 2.707848555765155e-06, "loss": 0.0765, "step": 37610 }, { "epoch": 17.604259302597708, "grad_norm": 0.44822773337364197, "learning_rate": 2.7061181957309e-06, "loss": 0.0606, "step": 37620 }, { "epoch": 17.60893985490288, "grad_norm": 0.518076479434967, "learning_rate": 2.7043950019516465e-06, "loss": 0.0691, "step": 37630 }, { "epoch": 17.61362040720805, "grad_norm": 0.6222781538963318, "learning_rate": 2.702678975545554e-06, "loss": 0.0707, "step": 37640 }, { "epoch": 17.618300959513224, "grad_norm": 0.6489050984382629, "learning_rate": 2.7009701176261365e-06, "loss": 0.0677, "step": 37650 }, { "epoch": 17.622981511818395, "grad_norm": 0.6142975687980652, "learning_rate": 2.6992684293022534e-06, "loss": 0.0679, "step": 37660 }, { "epoch": 17.627662064123566, "grad_norm": 1.6073589324951172, "learning_rate": 2.6975739116781093e-06, "loss": 0.084, "step": 37670 }, { "epoch": 17.632342616428737, "grad_norm": 0.45584771037101746, "learning_rate": 2.6958865658532584e-06, "loss": 0.0755, "step": 37680 }, { "epoch": 17.63702316873391, "grad_norm": 0.47317826747894287, "learning_rate": 2.694206392922604e-06, "loss": 0.0655, "step": 37690 }, { "epoch": 17.641703721039082, "grad_norm": 0.7505092620849609, "learning_rate": 2.692533393976389e-06, "loss": 0.0643, "step": 37700 }, { "epoch": 17.646384273344253, "grad_norm": 0.6115980744361877, "learning_rate": 2.690867570100205e-06, "loss": 0.0529, "step": 37710 }, { "epoch": 17.651064825649428, "grad_norm": 0.4653246998786926, "learning_rate": 2.6892089223749882e-06, "loss": 0.0667, "step": 37720 }, { "epoch": 17.6557453779546, "grad_norm": 0.686353862285614, "learning_rate": 2.687557451877018e-06, "loss": 0.0602, "step": 37730 }, { "epoch": 17.66042593025977, "grad_norm": 0.8060922026634216, "learning_rate": 2.68591315967791e-06, "loss": 0.0722, "step": 37740 }, { "epoch": 17.665106482564944, "grad_norm": 0.5326098203659058, "learning_rate": 2.6842760468446354e-06, "loss": 0.0624, "step": 37750 }, { "epoch": 17.669787034870115, "grad_norm": 0.47716841101646423, "learning_rate": 2.682646114439493e-06, "loss": 0.0634, "step": 37760 }, { "epoch": 17.674467587175286, "grad_norm": 0.6771265268325806, "learning_rate": 2.681023363520132e-06, "loss": 0.0789, "step": 37770 }, { "epoch": 17.679148139480457, "grad_norm": 0.7516891956329346, "learning_rate": 2.6794077951395353e-06, "loss": 0.0842, "step": 37780 }, { "epoch": 17.68382869178563, "grad_norm": 0.6023266911506653, "learning_rate": 2.677799410346029e-06, "loss": 0.0678, "step": 37790 }, { "epoch": 17.688509244090802, "grad_norm": 0.5501587390899658, "learning_rate": 2.6761982101832754e-06, "loss": 0.0562, "step": 37800 }, { "epoch": 17.693189796395973, "grad_norm": 0.7318989634513855, "learning_rate": 2.674604195690278e-06, "loss": 0.0633, "step": 37810 }, { "epoch": 17.697870348701148, "grad_norm": 0.6055097579956055, "learning_rate": 2.6730173679013727e-06, "loss": 0.0548, "step": 37820 }, { "epoch": 17.70255090100632, "grad_norm": 0.46907132863998413, "learning_rate": 2.6714377278462373e-06, "loss": 0.0736, "step": 37830 }, { "epoch": 17.70723145331149, "grad_norm": 0.6143739819526672, "learning_rate": 2.6698652765498814e-06, "loss": 0.0631, "step": 37840 }, { "epoch": 17.711912005616664, "grad_norm": 0.49876585602760315, "learning_rate": 2.668300015032654e-06, "loss": 0.068, "step": 37850 }, { "epoch": 17.716592557921835, "grad_norm": 0.5112124085426331, "learning_rate": 2.666741944310232e-06, "loss": 0.0573, "step": 37860 }, { "epoch": 17.721273110227006, "grad_norm": 0.6054392457008362, "learning_rate": 2.665191065393635e-06, "loss": 0.0739, "step": 37870 }, { "epoch": 17.725953662532177, "grad_norm": 0.7105073928833008, "learning_rate": 2.6636473792892086e-06, "loss": 0.0708, "step": 37880 }, { "epoch": 17.73063421483735, "grad_norm": 0.5325417518615723, "learning_rate": 2.6621108869986355e-06, "loss": 0.0653, "step": 37890 }, { "epoch": 17.735314767142523, "grad_norm": 0.6816115975379944, "learning_rate": 2.660581589518929e-06, "loss": 0.0699, "step": 37900 }, { "epoch": 17.739995319447694, "grad_norm": 0.8352646827697754, "learning_rate": 2.6590594878424308e-06, "loss": 0.0615, "step": 37910 }, { "epoch": 17.744675871752868, "grad_norm": 0.5632380247116089, "learning_rate": 2.6575445829568204e-06, "loss": 0.0589, "step": 37920 }, { "epoch": 17.74935642405804, "grad_norm": 0.4607888460159302, "learning_rate": 2.6560368758450993e-06, "loss": 0.0722, "step": 37930 }, { "epoch": 17.75403697636321, "grad_norm": 0.537964403629303, "learning_rate": 2.654536367485606e-06, "loss": 0.0588, "step": 37940 }, { "epoch": 17.758717528668384, "grad_norm": 0.7177562713623047, "learning_rate": 2.653043058852001e-06, "loss": 0.0659, "step": 37950 }, { "epoch": 17.763398080973555, "grad_norm": 0.5640354156494141, "learning_rate": 2.6515569509132765e-06, "loss": 0.0695, "step": 37960 }, { "epoch": 17.768078633278726, "grad_norm": 0.5990029573440552, "learning_rate": 2.650078044633754e-06, "loss": 0.0655, "step": 37970 }, { "epoch": 17.772759185583897, "grad_norm": 0.5039685964584351, "learning_rate": 2.6486063409730774e-06, "loss": 0.061, "step": 37980 }, { "epoch": 17.777439737889072, "grad_norm": 0.63773113489151, "learning_rate": 2.6471418408862207e-06, "loss": 0.0615, "step": 37990 }, { "epoch": 17.782120290194243, "grad_norm": 0.5878995656967163, "learning_rate": 2.6456845453234807e-06, "loss": 0.0754, "step": 38000 }, { "epoch": 17.786800842499414, "grad_norm": 0.7231564521789551, "learning_rate": 2.644234455230482e-06, "loss": 0.0789, "step": 38010 }, { "epoch": 17.79148139480459, "grad_norm": 0.6119908094406128, "learning_rate": 2.6427915715481713e-06, "loss": 0.0548, "step": 38020 }, { "epoch": 17.79616194710976, "grad_norm": 0.5473850965499878, "learning_rate": 2.64135589521282e-06, "loss": 0.0666, "step": 38030 }, { "epoch": 17.80084249941493, "grad_norm": 0.5902993679046631, "learning_rate": 2.639927427156025e-06, "loss": 0.0715, "step": 38040 }, { "epoch": 17.805523051720105, "grad_norm": 0.7183619737625122, "learning_rate": 2.6385061683047016e-06, "loss": 0.0797, "step": 38050 }, { "epoch": 17.810203604025276, "grad_norm": 0.5911155939102173, "learning_rate": 2.637092119581089e-06, "loss": 0.0769, "step": 38060 }, { "epoch": 17.814884156330447, "grad_norm": 0.44933587312698364, "learning_rate": 2.6356852819027502e-06, "loss": 0.0677, "step": 38070 }, { "epoch": 17.819564708635617, "grad_norm": 0.6811946630477905, "learning_rate": 2.6342856561825644e-06, "loss": 0.0632, "step": 38080 }, { "epoch": 17.824245260940792, "grad_norm": 0.4470617175102234, "learning_rate": 2.6328932433287356e-06, "loss": 0.0731, "step": 38090 }, { "epoch": 17.828925813245963, "grad_norm": 0.5879184603691101, "learning_rate": 2.631508044244784e-06, "loss": 0.0721, "step": 38100 }, { "epoch": 17.833606365551134, "grad_norm": 0.6390225291252136, "learning_rate": 2.630130059829551e-06, "loss": 0.0786, "step": 38110 }, { "epoch": 17.83828691785631, "grad_norm": 0.4503346383571625, "learning_rate": 2.6287592909771944e-06, "loss": 0.0696, "step": 38120 }, { "epoch": 17.84296747016148, "grad_norm": 0.5469617247581482, "learning_rate": 2.6273957385771933e-06, "loss": 0.061, "step": 38130 }, { "epoch": 17.84764802246665, "grad_norm": 0.6247259378433228, "learning_rate": 2.6260394035143415e-06, "loss": 0.0669, "step": 38140 }, { "epoch": 17.852328574771825, "grad_norm": 0.5438675284385681, "learning_rate": 2.6246902866687488e-06, "loss": 0.066, "step": 38150 }, { "epoch": 17.857009127076996, "grad_norm": 0.48090994358062744, "learning_rate": 2.623348388915844e-06, "loss": 0.0578, "step": 38160 }, { "epoch": 17.861689679382167, "grad_norm": 0.5987251996994019, "learning_rate": 2.622013711126368e-06, "loss": 0.0661, "step": 38170 }, { "epoch": 17.866370231687338, "grad_norm": 0.49413469433784485, "learning_rate": 2.6206862541663815e-06, "loss": 0.075, "step": 38180 }, { "epoch": 17.871050783992512, "grad_norm": 0.5273921489715576, "learning_rate": 2.6193660188972553e-06, "loss": 0.0642, "step": 38190 }, { "epoch": 17.875731336297683, "grad_norm": 0.7004026174545288, "learning_rate": 2.618053006175675e-06, "loss": 0.0803, "step": 38200 }, { "epoch": 17.880411888602854, "grad_norm": 0.4490702748298645, "learning_rate": 2.6167472168536424e-06, "loss": 0.0691, "step": 38210 }, { "epoch": 17.88509244090803, "grad_norm": 0.5722501873970032, "learning_rate": 2.6154486517784677e-06, "loss": 0.0622, "step": 38220 }, { "epoch": 17.8897729932132, "grad_norm": 0.5411579012870789, "learning_rate": 2.6141573117927772e-06, "loss": 0.0612, "step": 38230 }, { "epoch": 17.89445354551837, "grad_norm": 0.4817347228527069, "learning_rate": 2.6128731977345072e-06, "loss": 0.0693, "step": 38240 }, { "epoch": 17.89913409782354, "grad_norm": 0.8645876049995422, "learning_rate": 2.6115963104369043e-06, "loss": 0.0666, "step": 38250 }, { "epoch": 17.903814650128716, "grad_norm": 0.6896898150444031, "learning_rate": 2.610326650728529e-06, "loss": 0.0758, "step": 38260 }, { "epoch": 17.908495202433887, "grad_norm": 0.5838624238967896, "learning_rate": 2.6090642194332472e-06, "loss": 0.0584, "step": 38270 }, { "epoch": 17.913175754739058, "grad_norm": 0.6147257089614868, "learning_rate": 2.6078090173702388e-06, "loss": 0.0661, "step": 38280 }, { "epoch": 17.917856307044232, "grad_norm": 0.4948195517063141, "learning_rate": 2.6065610453539905e-06, "loss": 0.0706, "step": 38290 }, { "epoch": 17.922536859349403, "grad_norm": 0.6359730958938599, "learning_rate": 2.6053203041942956e-06, "loss": 0.0726, "step": 38300 }, { "epoch": 17.927217411654574, "grad_norm": 0.9492740035057068, "learning_rate": 2.6040867946962597e-06, "loss": 0.0711, "step": 38310 }, { "epoch": 17.93189796395975, "grad_norm": 0.47357645630836487, "learning_rate": 2.6028605176602947e-06, "loss": 0.064, "step": 38320 }, { "epoch": 17.93657851626492, "grad_norm": 0.8935444951057434, "learning_rate": 2.601641473882115e-06, "loss": 0.0596, "step": 38330 }, { "epoch": 17.94125906857009, "grad_norm": 0.9165276288986206, "learning_rate": 2.6004296641527477e-06, "loss": 0.0671, "step": 38340 }, { "epoch": 17.945939620875265, "grad_norm": 1.1857231855392456, "learning_rate": 2.599225089258523e-06, "loss": 0.0647, "step": 38350 }, { "epoch": 17.950620173180436, "grad_norm": 0.3902508020401001, "learning_rate": 2.5980277499810748e-06, "loss": 0.0632, "step": 38360 }, { "epoch": 17.955300725485607, "grad_norm": 0.8160390257835388, "learning_rate": 2.596837647097344e-06, "loss": 0.0616, "step": 38370 }, { "epoch": 17.959981277790778, "grad_norm": 0.6585879921913147, "learning_rate": 2.5956547813795772e-06, "loss": 0.0688, "step": 38380 }, { "epoch": 17.964661830095952, "grad_norm": 0.7373951077461243, "learning_rate": 2.5944791535953197e-06, "loss": 0.0729, "step": 38390 }, { "epoch": 17.969342382401123, "grad_norm": 0.5812138319015503, "learning_rate": 2.5933107645074273e-06, "loss": 0.0645, "step": 38400 }, { "epoch": 17.974022934706294, "grad_norm": 0.5121139883995056, "learning_rate": 2.592149614874053e-06, "loss": 0.0606, "step": 38410 }, { "epoch": 17.97870348701147, "grad_norm": 0.5002479553222656, "learning_rate": 2.590995705448655e-06, "loss": 0.0549, "step": 38420 }, { "epoch": 17.98338403931664, "grad_norm": 0.6233993768692017, "learning_rate": 2.589849036979992e-06, "loss": 0.0735, "step": 38430 }, { "epoch": 17.98806459162181, "grad_norm": 0.6399626731872559, "learning_rate": 2.5887096102121256e-06, "loss": 0.0608, "step": 38440 }, { "epoch": 17.99274514392698, "grad_norm": 0.5739417672157288, "learning_rate": 2.5875774258844152e-06, "loss": 0.0753, "step": 38450 }, { "epoch": 17.997425696232156, "grad_norm": 0.7268523573875427, "learning_rate": 2.586452484731525e-06, "loss": 0.0803, "step": 38460 }, { "epoch": 18.00187222092207, "grad_norm": 0.46922507882118225, "learning_rate": 2.585334787483416e-06, "loss": 0.0629, "step": 38470 }, { "epoch": 18.00655277322724, "grad_norm": 0.6508978009223938, "learning_rate": 2.5842243348653515e-06, "loss": 0.072, "step": 38480 }, { "epoch": 18.011233325532412, "grad_norm": 0.46061745285987854, "learning_rate": 2.5831211275978913e-06, "loss": 0.0557, "step": 38490 }, { "epoch": 18.015913877837583, "grad_norm": 0.8250024318695068, "learning_rate": 2.5820251663968934e-06, "loss": 0.0665, "step": 38500 }, { "epoch": 18.020594430142758, "grad_norm": 0.5298056602478027, "learning_rate": 2.580936451973517e-06, "loss": 0.0575, "step": 38510 }, { "epoch": 18.02527498244793, "grad_norm": 0.5025271773338318, "learning_rate": 2.5798549850342143e-06, "loss": 0.0593, "step": 38520 }, { "epoch": 18.0299555347531, "grad_norm": 0.3993377387523651, "learning_rate": 2.578780766280741e-06, "loss": 0.0611, "step": 38530 }, { "epoch": 18.034636087058274, "grad_norm": 0.7428732514381409, "learning_rate": 2.5777137964101426e-06, "loss": 0.0696, "step": 38540 }, { "epoch": 18.039316639363445, "grad_norm": 0.46239882707595825, "learning_rate": 2.576654076114767e-06, "loss": 0.0564, "step": 38550 }, { "epoch": 18.043997191668616, "grad_norm": 0.8671801686286926, "learning_rate": 2.575601606082254e-06, "loss": 0.0598, "step": 38560 }, { "epoch": 18.04867774397379, "grad_norm": 0.6974246501922607, "learning_rate": 2.57455638699554e-06, "loss": 0.0615, "step": 38570 }, { "epoch": 18.05335829627896, "grad_norm": 0.4380614459514618, "learning_rate": 2.5735184195328574e-06, "loss": 0.0659, "step": 38580 }, { "epoch": 18.058038848584133, "grad_norm": 0.6388529539108276, "learning_rate": 2.5724877043677307e-06, "loss": 0.0697, "step": 38590 }, { "epoch": 18.062719400889304, "grad_norm": 0.5624552369117737, "learning_rate": 2.5714642421689792e-06, "loss": 0.0618, "step": 38600 }, { "epoch": 18.067399953194478, "grad_norm": 0.5966877937316895, "learning_rate": 2.5704480336007175e-06, "loss": 0.054, "step": 38610 }, { "epoch": 18.07208050549965, "grad_norm": 0.674892008304596, "learning_rate": 2.569439079322352e-06, "loss": 0.0642, "step": 38620 }, { "epoch": 18.07676105780482, "grad_norm": 0.4793814718723297, "learning_rate": 2.5684373799885844e-06, "loss": 0.058, "step": 38630 }, { "epoch": 18.081441610109994, "grad_norm": 0.5261518359184265, "learning_rate": 2.5674429362494017e-06, "loss": 0.0575, "step": 38640 }, { "epoch": 18.086122162415165, "grad_norm": 0.692387044429779, "learning_rate": 2.566455748750092e-06, "loss": 0.083, "step": 38650 }, { "epoch": 18.090802714720336, "grad_norm": 0.5693621635437012, "learning_rate": 2.5654758181312287e-06, "loss": 0.078, "step": 38660 }, { "epoch": 18.095483267025507, "grad_norm": 0.350993812084198, "learning_rate": 2.564503145028678e-06, "loss": 0.0557, "step": 38670 }, { "epoch": 18.10016381933068, "grad_norm": 0.9756559729576111, "learning_rate": 2.563537730073597e-06, "loss": 0.0689, "step": 38680 }, { "epoch": 18.104844371635853, "grad_norm": 0.6673873662948608, "learning_rate": 2.562579573892434e-06, "loss": 0.0726, "step": 38690 }, { "epoch": 18.109524923941024, "grad_norm": 0.5676450729370117, "learning_rate": 2.5616286771069255e-06, "loss": 0.0477, "step": 38700 }, { "epoch": 18.114205476246198, "grad_norm": 0.48391202092170715, "learning_rate": 2.560685040334097e-06, "loss": 0.0561, "step": 38710 }, { "epoch": 18.11888602855137, "grad_norm": 0.5967254638671875, "learning_rate": 2.559748664186266e-06, "loss": 0.0615, "step": 38720 }, { "epoch": 18.12356658085654, "grad_norm": 0.8670050501823425, "learning_rate": 2.5588195492710366e-06, "loss": 0.0629, "step": 38730 }, { "epoch": 18.128247133161715, "grad_norm": 0.713543713092804, "learning_rate": 2.5578976961912996e-06, "loss": 0.062, "step": 38740 }, { "epoch": 18.132927685466885, "grad_norm": 0.6661431789398193, "learning_rate": 2.5569831055452375e-06, "loss": 0.0698, "step": 38750 }, { "epoch": 18.137608237772056, "grad_norm": 0.6312611699104309, "learning_rate": 2.5560757779263174e-06, "loss": 0.0619, "step": 38760 }, { "epoch": 18.142288790077227, "grad_norm": 0.623812735080719, "learning_rate": 2.5551757139232926e-06, "loss": 0.0675, "step": 38770 }, { "epoch": 18.146969342382402, "grad_norm": 0.9951727390289307, "learning_rate": 2.554282914120208e-06, "loss": 0.0645, "step": 38780 }, { "epoch": 18.151649894687573, "grad_norm": 0.5210376381874084, "learning_rate": 2.55339737909639e-06, "loss": 0.0606, "step": 38790 }, { "epoch": 18.156330446992744, "grad_norm": 0.5331382155418396, "learning_rate": 2.552519109426453e-06, "loss": 0.0767, "step": 38800 }, { "epoch": 18.16101099929792, "grad_norm": 0.44117018580436707, "learning_rate": 2.551648105680297e-06, "loss": 0.0589, "step": 38810 }, { "epoch": 18.16569155160309, "grad_norm": 0.4273826479911804, "learning_rate": 2.5507843684231055e-06, "loss": 0.059, "step": 38820 }, { "epoch": 18.17037210390826, "grad_norm": 0.509330153465271, "learning_rate": 2.54992789821535e-06, "loss": 0.0573, "step": 38830 }, { "epoch": 18.175052656213435, "grad_norm": 0.7325332164764404, "learning_rate": 2.549078695612784e-06, "loss": 0.0762, "step": 38840 }, { "epoch": 18.179733208518606, "grad_norm": 0.6021181344985962, "learning_rate": 2.548236761166446e-06, "loss": 0.0564, "step": 38850 }, { "epoch": 18.184413760823777, "grad_norm": 0.5719072222709656, "learning_rate": 2.5474020954226563e-06, "loss": 0.0593, "step": 38860 }, { "epoch": 18.189094313128948, "grad_norm": 0.5744526982307434, "learning_rate": 2.5465746989230235e-06, "loss": 0.0584, "step": 38870 }, { "epoch": 18.193774865434122, "grad_norm": 0.7939673662185669, "learning_rate": 2.545754572204434e-06, "loss": 0.0745, "step": 38880 }, { "epoch": 18.198455417739293, "grad_norm": 1.0219402313232422, "learning_rate": 2.5449417157990598e-06, "loss": 0.0644, "step": 38890 }, { "epoch": 18.203135970044464, "grad_norm": 0.43726402521133423, "learning_rate": 2.544136130234355e-06, "loss": 0.0679, "step": 38900 }, { "epoch": 18.20781652234964, "grad_norm": 0.4891991913318634, "learning_rate": 2.5433378160330557e-06, "loss": 0.067, "step": 38910 }, { "epoch": 18.21249707465481, "grad_norm": 0.6848595142364502, "learning_rate": 2.5425467737131776e-06, "loss": 0.0529, "step": 38920 }, { "epoch": 18.21717762695998, "grad_norm": 0.6289695501327515, "learning_rate": 2.5417630037880203e-06, "loss": 0.056, "step": 38930 }, { "epoch": 18.221858179265155, "grad_norm": 0.5402114987373352, "learning_rate": 2.5409865067661636e-06, "loss": 0.0589, "step": 38940 }, { "epoch": 18.226538731570326, "grad_norm": 0.6767951250076294, "learning_rate": 2.5402172831514698e-06, "loss": 0.0708, "step": 38950 }, { "epoch": 18.231219283875497, "grad_norm": 0.5736719369888306, "learning_rate": 2.5394553334430762e-06, "loss": 0.0781, "step": 38960 }, { "epoch": 18.235899836180668, "grad_norm": 0.4809859097003937, "learning_rate": 2.5387006581354063e-06, "loss": 0.055, "step": 38970 }, { "epoch": 18.240580388485842, "grad_norm": 0.5667986869812012, "learning_rate": 2.5379532577181605e-06, "loss": 0.0641, "step": 38980 }, { "epoch": 18.245260940791013, "grad_norm": 0.4077320098876953, "learning_rate": 2.537213132676318e-06, "loss": 0.0525, "step": 38990 }, { "epoch": 18.249941493096184, "grad_norm": 0.4857611060142517, "learning_rate": 2.5364802834901385e-06, "loss": 0.061, "step": 39000 }, { "epoch": 18.25462204540136, "grad_norm": 0.5794081091880798, "learning_rate": 2.535754710635159e-06, "loss": 0.0603, "step": 39010 }, { "epoch": 18.25930259770653, "grad_norm": 0.8453078866004944, "learning_rate": 2.5350364145821967e-06, "loss": 0.0528, "step": 39020 }, { "epoch": 18.2639831500117, "grad_norm": 0.7203932404518127, "learning_rate": 2.534325395797345e-06, "loss": 0.0685, "step": 39030 }, { "epoch": 18.268663702316875, "grad_norm": 0.589933454990387, "learning_rate": 2.533621654741979e-06, "loss": 0.0606, "step": 39040 }, { "epoch": 18.273344254622046, "grad_norm": 0.542699933052063, "learning_rate": 2.532925191872745e-06, "loss": 0.0654, "step": 39050 }, { "epoch": 18.278024806927217, "grad_norm": 0.371748149394989, "learning_rate": 2.5322360076415723e-06, "loss": 0.068, "step": 39060 }, { "epoch": 18.282705359232388, "grad_norm": 0.7876960039138794, "learning_rate": 2.5315541024956665e-06, "loss": 0.0597, "step": 39070 }, { "epoch": 18.287385911537562, "grad_norm": 0.5031705498695374, "learning_rate": 2.5308794768775052e-06, "loss": 0.0548, "step": 39080 }, { "epoch": 18.292066463842733, "grad_norm": 0.7347437143325806, "learning_rate": 2.530212131224847e-06, "loss": 0.064, "step": 39090 }, { "epoch": 18.296747016147904, "grad_norm": 0.6050007939338684, "learning_rate": 2.529552065970726e-06, "loss": 0.0761, "step": 39100 }, { "epoch": 18.30142756845308, "grad_norm": 0.5099340677261353, "learning_rate": 2.528899281543449e-06, "loss": 0.062, "step": 39110 }, { "epoch": 18.30610812075825, "grad_norm": 0.4755766689777374, "learning_rate": 2.5282537783666037e-06, "loss": 0.0575, "step": 39120 }, { "epoch": 18.31078867306342, "grad_norm": 0.5028561353683472, "learning_rate": 2.5276155568590496e-06, "loss": 0.0638, "step": 39130 }, { "epoch": 18.315469225368595, "grad_norm": 0.705191969871521, "learning_rate": 2.5269846174349206e-06, "loss": 0.0774, "step": 39140 }, { "epoch": 18.320149777673766, "grad_norm": 0.6492907404899597, "learning_rate": 2.5263609605036266e-06, "loss": 0.0534, "step": 39150 }, { "epoch": 18.324830329978937, "grad_norm": 0.5241549015045166, "learning_rate": 2.5257445864698513e-06, "loss": 0.0594, "step": 39160 }, { "epoch": 18.329510882284108, "grad_norm": 0.6831234097480774, "learning_rate": 2.525135495733555e-06, "loss": 0.0807, "step": 39170 }, { "epoch": 18.334191434589282, "grad_norm": 0.47275084257125854, "learning_rate": 2.524533688689969e-06, "loss": 0.0568, "step": 39180 }, { "epoch": 18.338871986894453, "grad_norm": 0.5388441681861877, "learning_rate": 2.5239391657295975e-06, "loss": 0.0568, "step": 39190 }, { "epoch": 18.343552539199624, "grad_norm": 0.6353873610496521, "learning_rate": 2.5233519272382234e-06, "loss": 0.0639, "step": 39200 }, { "epoch": 18.3482330915048, "grad_norm": 0.7843367457389832, "learning_rate": 2.522771973596898e-06, "loss": 0.0622, "step": 39210 }, { "epoch": 18.35291364380997, "grad_norm": 0.7519840598106384, "learning_rate": 2.522199305181945e-06, "loss": 0.0666, "step": 39220 }, { "epoch": 18.35759419611514, "grad_norm": 0.6575687527656555, "learning_rate": 2.5216339223649655e-06, "loss": 0.0688, "step": 39230 }, { "epoch": 18.362274748420315, "grad_norm": 0.6831254959106445, "learning_rate": 2.521075825512829e-06, "loss": 0.0564, "step": 39240 }, { "epoch": 18.366955300725486, "grad_norm": 0.520615816116333, "learning_rate": 2.5205250149876783e-06, "loss": 0.053, "step": 39250 }, { "epoch": 18.371635853030657, "grad_norm": 0.6050226092338562, "learning_rate": 2.5199814911469296e-06, "loss": 0.0548, "step": 39260 }, { "epoch": 18.376316405335828, "grad_norm": 0.5308438539505005, "learning_rate": 2.5194452543432677e-06, "loss": 0.0578, "step": 39270 }, { "epoch": 18.380996957641003, "grad_norm": 0.5921254754066467, "learning_rate": 2.518916304924652e-06, "loss": 0.0508, "step": 39280 }, { "epoch": 18.385677509946174, "grad_norm": 0.5310086011886597, "learning_rate": 2.5183946432343135e-06, "loss": 0.0518, "step": 39290 }, { "epoch": 18.390358062251345, "grad_norm": 0.6571161150932312, "learning_rate": 2.51788026961075e-06, "loss": 0.0647, "step": 39300 }, { "epoch": 18.39503861455652, "grad_norm": 0.626827597618103, "learning_rate": 2.5173731843877335e-06, "loss": 0.0603, "step": 39310 }, { "epoch": 18.39971916686169, "grad_norm": 0.5558822751045227, "learning_rate": 2.516873387894308e-06, "loss": 0.0636, "step": 39320 }, { "epoch": 18.40439971916686, "grad_norm": 0.9145227074623108, "learning_rate": 2.5163808804547845e-06, "loss": 0.0671, "step": 39330 }, { "epoch": 18.409080271472035, "grad_norm": 0.7167326211929321, "learning_rate": 2.515895662388746e-06, "loss": 0.0501, "step": 39340 }, { "epoch": 18.413760823777206, "grad_norm": 0.686824381351471, "learning_rate": 2.5154177340110447e-06, "loss": 0.0572, "step": 39350 }, { "epoch": 18.418441376082377, "grad_norm": 0.5644968152046204, "learning_rate": 2.5149470956318037e-06, "loss": 0.0509, "step": 39360 }, { "epoch": 18.42312192838755, "grad_norm": 0.46930572390556335, "learning_rate": 2.5144837475564153e-06, "loss": 0.0536, "step": 39370 }, { "epoch": 18.427802480692723, "grad_norm": 0.5313988924026489, "learning_rate": 2.5140276900855417e-06, "loss": 0.0526, "step": 39380 }, { "epoch": 18.432483032997894, "grad_norm": 0.48669540882110596, "learning_rate": 2.5135789235151106e-06, "loss": 0.0677, "step": 39390 }, { "epoch": 18.437163585303065, "grad_norm": 0.6605704426765442, "learning_rate": 2.5131374481363253e-06, "loss": 0.0702, "step": 39400 }, { "epoch": 18.44184413760824, "grad_norm": 0.42405423521995544, "learning_rate": 2.512703264235654e-06, "loss": 0.0542, "step": 39410 }, { "epoch": 18.44652468991341, "grad_norm": 0.460878849029541, "learning_rate": 2.5122763720948324e-06, "loss": 0.0616, "step": 39420 }, { "epoch": 18.45120524221858, "grad_norm": 0.6710219383239746, "learning_rate": 2.511856771990866e-06, "loss": 0.0575, "step": 39430 }, { "epoch": 18.455885794523756, "grad_norm": 0.6008034944534302, "learning_rate": 2.5114444641960303e-06, "loss": 0.0722, "step": 39440 }, { "epoch": 18.460566346828926, "grad_norm": 0.5325629115104675, "learning_rate": 2.5110394489778666e-06, "loss": 0.064, "step": 39450 }, { "epoch": 18.465246899134097, "grad_norm": 0.6807911992073059, "learning_rate": 2.5106417265991844e-06, "loss": 0.0609, "step": 39460 }, { "epoch": 18.46992745143927, "grad_norm": 0.4781230688095093, "learning_rate": 2.510251297318062e-06, "loss": 0.0695, "step": 39470 }, { "epoch": 18.474608003744443, "grad_norm": 0.9009580612182617, "learning_rate": 2.509868161387843e-06, "loss": 0.0746, "step": 39480 }, { "epoch": 18.479288556049614, "grad_norm": 0.5627644062042236, "learning_rate": 2.5094923190571447e-06, "loss": 0.0817, "step": 39490 }, { "epoch": 18.483969108354785, "grad_norm": 0.6321631073951721, "learning_rate": 2.509123770569842e-06, "loss": 0.0618, "step": 39500 }, { "epoch": 18.48864966065996, "grad_norm": 0.5579924583435059, "learning_rate": 2.5087625161650846e-06, "loss": 0.0779, "step": 39510 }, { "epoch": 18.49333021296513, "grad_norm": 0.6316049695014954, "learning_rate": 2.508408556077286e-06, "loss": 0.0684, "step": 39520 }, { "epoch": 18.4980107652703, "grad_norm": 0.6310096979141235, "learning_rate": 2.5080618905361276e-06, "loss": 0.0608, "step": 39530 }, { "epoch": 18.502691317575476, "grad_norm": 0.5379772186279297, "learning_rate": 2.507722519766556e-06, "loss": 0.0619, "step": 39540 }, { "epoch": 18.507371869880647, "grad_norm": 0.7716780304908752, "learning_rate": 2.5073904439887865e-06, "loss": 0.0631, "step": 39550 }, { "epoch": 18.512052422185818, "grad_norm": 0.4184824526309967, "learning_rate": 2.5070656634182976e-06, "loss": 0.0604, "step": 39560 }, { "epoch": 18.51673297449099, "grad_norm": 0.6619906425476074, "learning_rate": 2.506748178265837e-06, "loss": 0.0584, "step": 39570 }, { "epoch": 18.521413526796163, "grad_norm": 0.5681060552597046, "learning_rate": 2.506437988737418e-06, "loss": 0.0708, "step": 39580 }, { "epoch": 18.526094079101334, "grad_norm": 0.47933879494667053, "learning_rate": 2.5061350950343175e-06, "loss": 0.0709, "step": 39590 }, { "epoch": 18.530774631406505, "grad_norm": 0.5349695682525635, "learning_rate": 2.505839497353083e-06, "loss": 0.0488, "step": 39600 }, { "epoch": 18.53545518371168, "grad_norm": 0.6723774671554565, "learning_rate": 2.50555119588552e-06, "loss": 0.0524, "step": 39610 }, { "epoch": 18.54013573601685, "grad_norm": 0.9605408906936646, "learning_rate": 2.5052701908187078e-06, "loss": 0.0674, "step": 39620 }, { "epoch": 18.54481628832202, "grad_norm": 0.4423544108867645, "learning_rate": 2.5049964823349874e-06, "loss": 0.0643, "step": 39630 }, { "epoch": 18.549496840627192, "grad_norm": 0.5601761341094971, "learning_rate": 2.5047300706119626e-06, "loss": 0.0612, "step": 39640 }, { "epoch": 18.554177392932367, "grad_norm": 0.4787710905075073, "learning_rate": 2.504470955822508e-06, "loss": 0.0554, "step": 39650 }, { "epoch": 18.558857945237538, "grad_norm": 0.5120471119880676, "learning_rate": 2.50421913813476e-06, "loss": 0.0578, "step": 39660 }, { "epoch": 18.56353849754271, "grad_norm": 0.5496343970298767, "learning_rate": 2.5039746177121173e-06, "loss": 0.0692, "step": 39670 }, { "epoch": 18.568219049847883, "grad_norm": 0.7498557567596436, "learning_rate": 2.50373739471325e-06, "loss": 0.0774, "step": 39680 }, { "epoch": 18.572899602153054, "grad_norm": 0.7692570686340332, "learning_rate": 2.5035074692920893e-06, "loss": 0.0692, "step": 39690 }, { "epoch": 18.577580154458225, "grad_norm": 0.48233574628829956, "learning_rate": 2.5032848415978297e-06, "loss": 0.0688, "step": 39700 }, { "epoch": 18.5822607067634, "grad_norm": 0.5667234063148499, "learning_rate": 2.5030695117749325e-06, "loss": 0.0543, "step": 39710 }, { "epoch": 18.58694125906857, "grad_norm": 0.48945364356040955, "learning_rate": 2.502861479963123e-06, "loss": 0.0595, "step": 39720 }, { "epoch": 18.59162181137374, "grad_norm": 0.5241784453392029, "learning_rate": 2.5026607462973898e-06, "loss": 0.0667, "step": 39730 }, { "epoch": 18.596302363678912, "grad_norm": 0.7610545754432678, "learning_rate": 2.5024673109079884e-06, "loss": 0.0613, "step": 39740 }, { "epoch": 18.600982915984087, "grad_norm": 0.4841451346874237, "learning_rate": 2.5022811739204363e-06, "loss": 0.0691, "step": 39750 }, { "epoch": 18.605663468289258, "grad_norm": 0.6627439856529236, "learning_rate": 2.5021023354555155e-06, "loss": 0.0628, "step": 39760 }, { "epoch": 18.61034402059443, "grad_norm": 0.579265296459198, "learning_rate": 2.501930795629273e-06, "loss": 0.0612, "step": 39770 }, { "epoch": 18.615024572899603, "grad_norm": 0.7993896007537842, "learning_rate": 2.501766554553019e-06, "loss": 0.0668, "step": 39780 }, { "epoch": 18.619705125204774, "grad_norm": 0.5533232688903809, "learning_rate": 2.501609612333326e-06, "loss": 0.0741, "step": 39790 }, { "epoch": 18.624385677509945, "grad_norm": 0.5675827264785767, "learning_rate": 2.501459969072035e-06, "loss": 0.0573, "step": 39800 }, { "epoch": 18.62906622981512, "grad_norm": 0.652480959892273, "learning_rate": 2.501317624866245e-06, "loss": 0.0655, "step": 39810 }, { "epoch": 18.63374678212029, "grad_norm": 0.5515062212944031, "learning_rate": 2.5011825798083243e-06, "loss": 0.0548, "step": 39820 }, { "epoch": 18.63842733442546, "grad_norm": 0.4012795388698578, "learning_rate": 2.501054833985901e-06, "loss": 0.0591, "step": 39830 }, { "epoch": 18.643107886730633, "grad_norm": 0.5283348560333252, "learning_rate": 2.500934387481867e-06, "loss": 0.0621, "step": 39840 }, { "epoch": 18.647788439035807, "grad_norm": 0.4855096638202667, "learning_rate": 2.50082124037438e-06, "loss": 0.0646, "step": 39850 }, { "epoch": 18.652468991340978, "grad_norm": 0.6461547017097473, "learning_rate": 2.50071539273686e-06, "loss": 0.0675, "step": 39860 }, { "epoch": 18.65714954364615, "grad_norm": 0.6635136604309082, "learning_rate": 2.500616844637989e-06, "loss": 0.0625, "step": 39870 }, { "epoch": 18.661830095951323, "grad_norm": 0.5391615629196167, "learning_rate": 2.5005255961417147e-06, "loss": 0.0626, "step": 39880 }, { "epoch": 18.666510648256494, "grad_norm": 0.5532911419868469, "learning_rate": 2.5004416473072483e-06, "loss": 0.062, "step": 39890 }, { "epoch": 18.671191200561665, "grad_norm": 0.7019877433776855, "learning_rate": 2.500364998189061e-06, "loss": 0.0601, "step": 39900 }, { "epoch": 18.67587175286684, "grad_norm": 0.6771016120910645, "learning_rate": 2.5002956488368913e-06, "loss": 0.0672, "step": 39910 }, { "epoch": 18.68055230517201, "grad_norm": 0.5325042009353638, "learning_rate": 2.5002335992957397e-06, "loss": 0.0585, "step": 39920 }, { "epoch": 18.68523285747718, "grad_norm": 0.6539416313171387, "learning_rate": 2.500178849605868e-06, "loss": 0.0501, "step": 39930 }, { "epoch": 18.689913409782353, "grad_norm": 0.697388768196106, "learning_rate": 2.5001313998028027e-06, "loss": 0.055, "step": 39940 }, { "epoch": 18.694593962087527, "grad_norm": 0.6056908965110779, "learning_rate": 2.5000912499173336e-06, "loss": 0.0561, "step": 39950 }, { "epoch": 18.699274514392698, "grad_norm": 0.4871160089969635, "learning_rate": 2.500058399975516e-06, "loss": 0.0546, "step": 39960 }, { "epoch": 18.70395506669787, "grad_norm": 0.6874995231628418, "learning_rate": 2.5000328499986614e-06, "loss": 0.0647, "step": 39970 }, { "epoch": 18.708635619003044, "grad_norm": 0.4527466297149658, "learning_rate": 2.5000146000033528e-06, "loss": 0.0664, "step": 39980 }, { "epoch": 18.713316171308215, "grad_norm": 0.5639970302581787, "learning_rate": 2.5000036500014303e-06, "loss": 0.0647, "step": 39990 }, { "epoch": 18.717996723613386, "grad_norm": 0.5674973726272583, "learning_rate": 2.5e-06, "loss": 0.0598, "step": 40000 } ], "logging_steps": 10, "max_steps": 40000, "num_input_tokens_seen": 0, "num_train_epochs": 19, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.2480377555569345e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }