diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,44650 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.3429694259236142, + "eval_steps": 3000, + "global_step": 312000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0002152194592826305, + "grad_norm": 26.8603458404541, + "learning_rate": 4.9000000000000005e-06, + "loss": 17.3227, + "step": 50 + }, + { + "epoch": 0.000430438918565261, + "grad_norm": 19.329322814941406, + "learning_rate": 9.900000000000002e-06, + "loss": 12.0508, + "step": 100 + }, + { + "epoch": 0.0006456583778478914, + "grad_norm": 115.69083404541016, + "learning_rate": 1.49e-05, + "loss": 10.803, + "step": 150 + }, + { + "epoch": 0.000860877837130522, + "grad_norm": 7.340383052825928, + "learning_rate": 1.9900000000000003e-05, + "loss": 8.7776, + "step": 200 + }, + { + "epoch": 0.0010760972964131525, + "grad_norm": 13.906534194946289, + "learning_rate": 2.4900000000000002e-05, + "loss": 7.9176, + "step": 250 + }, + { + "epoch": 0.001291316755695783, + "grad_norm": 12.21853256225586, + "learning_rate": 2.9900000000000002e-05, + "loss": 7.795, + "step": 300 + }, + { + "epoch": 0.0015065362149784135, + "grad_norm": 6.993707180023193, + "learning_rate": 3.49e-05, + "loss": 8.0983, + "step": 350 + }, + { + "epoch": 0.001721755674261044, + "grad_norm": 6.319125175476074, + "learning_rate": 3.99e-05, + "loss": 7.7024, + "step": 400 + }, + { + "epoch": 0.0019369751335436745, + "grad_norm": 6.399354457855225, + "learning_rate": 4.49e-05, + "loss": 7.1821, + "step": 450 + }, + { + "epoch": 0.002152194592826305, + "grad_norm": 7.419716835021973, + "learning_rate": 4.99e-05, + "loss": 7.0798, + "step": 500 + }, + { + "epoch": 0.0023674140521089356, + "grad_norm": 15.403363227844238, + "learning_rate": 5.4900000000000006e-05, + "loss": 6.8454, + "step": 550 + }, + { + "epoch": 0.002582633511391566, + "grad_norm": 5.574752330780029, + "learning_rate": 5.99e-05, + "loss": 7.4043, + "step": 600 + }, + { + "epoch": 0.0027978529706741964, + "grad_norm": 2.660090923309326, + "learning_rate": 6.49e-05, + "loss": 7.2324, + "step": 650 + }, + { + "epoch": 0.003013072429956827, + "grad_norm": 7.386739730834961, + "learning_rate": 6.99e-05, + "loss": 6.6835, + "step": 700 + }, + { + "epoch": 0.0032282918892394576, + "grad_norm": 3.3042895793914795, + "learning_rate": 7.49e-05, + "loss": 7.2021, + "step": 750 + }, + { + "epoch": 0.003443511348522088, + "grad_norm": 3.473345994949341, + "learning_rate": 7.99e-05, + "loss": 7.2807, + "step": 800 + }, + { + "epoch": 0.0036587308078047183, + "grad_norm": 2.8941078186035156, + "learning_rate": 8.49e-05, + "loss": 6.8374, + "step": 850 + }, + { + "epoch": 0.003873950267087349, + "grad_norm": 1.0632754564285278, + "learning_rate": 8.99e-05, + "loss": 7.032, + "step": 900 + }, + { + "epoch": 0.004089169726369979, + "grad_norm": 4.419502258300781, + "learning_rate": 9.49e-05, + "loss": 6.3488, + "step": 950 + }, + { + "epoch": 0.00430438918565261, + "grad_norm": 2.358043909072876, + "learning_rate": 9.99e-05, + "loss": 7.1425, + "step": 1000 + }, + { + "epoch": 0.00451960864493524, + "grad_norm": 3.326087236404419, + "learning_rate": 9.999999956019209e-05, + "loss": 6.9698, + "step": 1050 + }, + { + "epoch": 0.004734828104217871, + "grad_norm": 7.676785945892334, + "learning_rate": 9.99999982046825e-05, + "loss": 7.4023, + "step": 1100 + }, + { + "epoch": 0.004950047563500501, + "grad_norm": 4.259026050567627, + "learning_rate": 9.999999593328806e-05, + "loss": 6.8372, + "step": 1150 + }, + { + "epoch": 0.005165267022783132, + "grad_norm": 3.340075731277466, + "learning_rate": 9.999999274600884e-05, + "loss": 6.7022, + "step": 1200 + }, + { + "epoch": 0.005380486482065763, + "grad_norm": 2.2752718925476074, + "learning_rate": 9.999998864284489e-05, + "loss": 6.8091, + "step": 1250 + }, + { + "epoch": 0.005595705941348393, + "grad_norm": 3.7426183223724365, + "learning_rate": 9.999998362379628e-05, + "loss": 6.8313, + "step": 1300 + }, + { + "epoch": 0.005810925400631024, + "grad_norm": 1.9600551128387451, + "learning_rate": 9.999997768886309e-05, + "loss": 6.2167, + "step": 1350 + }, + { + "epoch": 0.006026144859913654, + "grad_norm": 1.950754165649414, + "learning_rate": 9.999997083804546e-05, + "loss": 6.5001, + "step": 1400 + }, + { + "epoch": 0.006241364319196284, + "grad_norm": 10.429224967956543, + "learning_rate": 9.999996307134347e-05, + "loss": 7.819, + "step": 1450 + }, + { + "epoch": 0.006456583778478915, + "grad_norm": 3.7420217990875244, + "learning_rate": 9.999995438875732e-05, + "loss": 7.0269, + "step": 1500 + }, + { + "epoch": 0.006671803237761545, + "grad_norm": 4.063221454620361, + "learning_rate": 9.999994479028712e-05, + "loss": 6.9488, + "step": 1550 + }, + { + "epoch": 0.006887022697044176, + "grad_norm": 3.9936013221740723, + "learning_rate": 9.999993427593307e-05, + "loss": 6.2174, + "step": 1600 + }, + { + "epoch": 0.0071022421563268065, + "grad_norm": 2.0001220703125, + "learning_rate": 9.999992284569536e-05, + "loss": 7.1857, + "step": 1650 + }, + { + "epoch": 0.007317461615609437, + "grad_norm": 0.7174533605575562, + "learning_rate": 9.999991049957419e-05, + "loss": 6.2459, + "step": 1700 + }, + { + "epoch": 0.007532681074892068, + "grad_norm": 3.2691726684570312, + "learning_rate": 9.99998972375698e-05, + "loss": 6.9517, + "step": 1750 + }, + { + "epoch": 0.007747900534174698, + "grad_norm": 3.9060966968536377, + "learning_rate": 9.999988305968241e-05, + "loss": 6.4712, + "step": 1800 + }, + { + "epoch": 0.007963119993457328, + "grad_norm": 1.9494738578796387, + "learning_rate": 9.999986796591232e-05, + "loss": 6.9059, + "step": 1850 + }, + { + "epoch": 0.008178339452739958, + "grad_norm": 3.455657720565796, + "learning_rate": 9.999985195625976e-05, + "loss": 6.8742, + "step": 1900 + }, + { + "epoch": 0.00839355891202259, + "grad_norm": 2.422761917114258, + "learning_rate": 9.999983503072506e-05, + "loss": 6.76, + "step": 1950 + }, + { + "epoch": 0.00860877837130522, + "grad_norm": 1.5549824237823486, + "learning_rate": 9.99998171893085e-05, + "loss": 7.2892, + "step": 2000 + }, + { + "epoch": 0.00882399783058785, + "grad_norm": 2.941375494003296, + "learning_rate": 9.999979843201042e-05, + "loss": 6.9165, + "step": 2050 + }, + { + "epoch": 0.00903921728987048, + "grad_norm": 2.5329244136810303, + "learning_rate": 9.999977875883117e-05, + "loss": 7.0663, + "step": 2100 + }, + { + "epoch": 0.00925443674915311, + "grad_norm": 4.413207530975342, + "learning_rate": 9.999975816977112e-05, + "loss": 6.9124, + "step": 2150 + }, + { + "epoch": 0.009469656208435743, + "grad_norm": 2.361320734024048, + "learning_rate": 9.999973666483062e-05, + "loss": 7.1641, + "step": 2200 + }, + { + "epoch": 0.009684875667718373, + "grad_norm": 6.783429145812988, + "learning_rate": 9.999971424401008e-05, + "loss": 6.9556, + "step": 2250 + }, + { + "epoch": 0.009900095127001003, + "grad_norm": 2.9611141681671143, + "learning_rate": 9.999969090730991e-05, + "loss": 6.7163, + "step": 2300 + }, + { + "epoch": 0.010115314586283633, + "grad_norm": 5.0506157875061035, + "learning_rate": 9.999966665473053e-05, + "loss": 6.472, + "step": 2350 + }, + { + "epoch": 0.010330534045566263, + "grad_norm": 0.7159901857376099, + "learning_rate": 9.999964148627239e-05, + "loss": 7.2062, + "step": 2400 + }, + { + "epoch": 0.010545753504848895, + "grad_norm": 1.947230339050293, + "learning_rate": 9.999961540193594e-05, + "loss": 6.9465, + "step": 2450 + }, + { + "epoch": 0.010760972964131525, + "grad_norm": 3.5723071098327637, + "learning_rate": 9.999958840172169e-05, + "loss": 6.8594, + "step": 2500 + }, + { + "epoch": 0.010976192423414155, + "grad_norm": 3.7859625816345215, + "learning_rate": 9.99995604856301e-05, + "loss": 6.757, + "step": 2550 + }, + { + "epoch": 0.011191411882696786, + "grad_norm": 6.068451404571533, + "learning_rate": 9.999953165366169e-05, + "loss": 6.8339, + "step": 2600 + }, + { + "epoch": 0.011406631341979416, + "grad_norm": 2.0270204544067383, + "learning_rate": 9.9999501905817e-05, + "loss": 6.4042, + "step": 2650 + }, + { + "epoch": 0.011621850801262048, + "grad_norm": 0.8194941282272339, + "learning_rate": 9.999947124209656e-05, + "loss": 6.6766, + "step": 2700 + }, + { + "epoch": 0.011837070260544678, + "grad_norm": 2.037416458129883, + "learning_rate": 9.999943966250095e-05, + "loss": 6.2684, + "step": 2750 + }, + { + "epoch": 0.012052289719827308, + "grad_norm": 2.8946526050567627, + "learning_rate": 9.999940716703074e-05, + "loss": 7.0975, + "step": 2800 + }, + { + "epoch": 0.012267509179109938, + "grad_norm": 0.834996223449707, + "learning_rate": 9.99993737556865e-05, + "loss": 5.9214, + "step": 2850 + }, + { + "epoch": 0.012482728638392568, + "grad_norm": 2.337341547012329, + "learning_rate": 9.999933942846887e-05, + "loss": 6.7977, + "step": 2900 + }, + { + "epoch": 0.0126979480976752, + "grad_norm": 2.514374017715454, + "learning_rate": 9.999930418537848e-05, + "loss": 7.3097, + "step": 2950 + }, + { + "epoch": 0.01291316755695783, + "grad_norm": 1.768357276916504, + "learning_rate": 9.999926802641596e-05, + "loss": 6.7894, + "step": 3000 + }, + { + "epoch": 0.01291316755695783, + "eval_loss": 7.275515556335449, + "eval_runtime": 35.0351, + "eval_samples_per_second": 18.267, + "eval_steps_per_second": 9.134, + "eval_tts_loss": 6.135569774283903, + "step": 3000 + }, + { + "epoch": 0.01312838701624046, + "grad_norm": 3.094282627105713, + "learning_rate": 9.9999230951582e-05, + "loss": 7.1422, + "step": 3050 + }, + { + "epoch": 0.01334360647552309, + "grad_norm": 0.6786778569221497, + "learning_rate": 9.999919296087725e-05, + "loss": 6.0048, + "step": 3100 + }, + { + "epoch": 0.01355882593480572, + "grad_norm": 1.9495091438293457, + "learning_rate": 9.999915405430243e-05, + "loss": 7.0515, + "step": 3150 + }, + { + "epoch": 0.013774045394088353, + "grad_norm": 2.578829050064087, + "learning_rate": 9.999911423185821e-05, + "loss": 6.3446, + "step": 3200 + }, + { + "epoch": 0.013989264853370983, + "grad_norm": 1.1182180643081665, + "learning_rate": 9.999907349354536e-05, + "loss": 6.3711, + "step": 3250 + }, + { + "epoch": 0.014204484312653613, + "grad_norm": 4.450316905975342, + "learning_rate": 9.999903183936462e-05, + "loss": 6.9344, + "step": 3300 + }, + { + "epoch": 0.014419703771936243, + "grad_norm": 1.6623255014419556, + "learning_rate": 9.999898926931675e-05, + "loss": 6.3902, + "step": 3350 + }, + { + "epoch": 0.014634923231218873, + "grad_norm": 1.7871100902557373, + "learning_rate": 9.999894578340252e-05, + "loss": 6.5327, + "step": 3400 + }, + { + "epoch": 0.014850142690501505, + "grad_norm": 4.765092849731445, + "learning_rate": 9.999890138162273e-05, + "loss": 6.6671, + "step": 3450 + }, + { + "epoch": 0.015065362149784135, + "grad_norm": 5.045365810394287, + "learning_rate": 9.999885606397821e-05, + "loss": 6.6274, + "step": 3500 + }, + { + "epoch": 0.015280581609066765, + "grad_norm": 0.7197855114936829, + "learning_rate": 9.999880983046976e-05, + "loss": 6.3428, + "step": 3550 + }, + { + "epoch": 0.015495801068349396, + "grad_norm": 2.3140616416931152, + "learning_rate": 9.999876268109826e-05, + "loss": 6.5606, + "step": 3600 + }, + { + "epoch": 0.015711020527632027, + "grad_norm": 1.8878636360168457, + "learning_rate": 9.999871461586456e-05, + "loss": 6.9379, + "step": 3650 + }, + { + "epoch": 0.015926239986914656, + "grad_norm": 3.4104018211364746, + "learning_rate": 9.999866563476951e-05, + "loss": 6.496, + "step": 3700 + }, + { + "epoch": 0.016141459446197288, + "grad_norm": 0.8844468593597412, + "learning_rate": 9.999861573781408e-05, + "loss": 6.4851, + "step": 3750 + }, + { + "epoch": 0.016356678905479916, + "grad_norm": 2.5082895755767822, + "learning_rate": 9.99985649249991e-05, + "loss": 6.8788, + "step": 3800 + }, + { + "epoch": 0.016571898364762548, + "grad_norm": 2.9652111530303955, + "learning_rate": 9.999851319632556e-05, + "loss": 6.249, + "step": 3850 + }, + { + "epoch": 0.01678711782404518, + "grad_norm": 2.324798107147217, + "learning_rate": 9.999846055179439e-05, + "loss": 6.4349, + "step": 3900 + }, + { + "epoch": 0.01700233728332781, + "grad_norm": 2.669116497039795, + "learning_rate": 9.999840699140655e-05, + "loss": 6.6811, + "step": 3950 + }, + { + "epoch": 0.01721755674261044, + "grad_norm": 3.599411964416504, + "learning_rate": 9.9998352515163e-05, + "loss": 6.6702, + "step": 4000 + }, + { + "epoch": 0.01743277620189307, + "grad_norm": 3.272143840789795, + "learning_rate": 9.999829712306479e-05, + "loss": 7.0483, + "step": 4050 + }, + { + "epoch": 0.0176479956611757, + "grad_norm": 2.94260311126709, + "learning_rate": 9.99982408151129e-05, + "loss": 7.3339, + "step": 4100 + }, + { + "epoch": 0.017863215120458333, + "grad_norm": 1.9961050748825073, + "learning_rate": 9.999818359130836e-05, + "loss": 6.3311, + "step": 4150 + }, + { + "epoch": 0.01807843457974096, + "grad_norm": 1.1790952682495117, + "learning_rate": 9.999812545165224e-05, + "loss": 6.4102, + "step": 4200 + }, + { + "epoch": 0.018293654039023593, + "grad_norm": 1.72084641456604, + "learning_rate": 9.999806639614557e-05, + "loss": 6.5153, + "step": 4250 + }, + { + "epoch": 0.01850887349830622, + "grad_norm": 2.550615072250366, + "learning_rate": 9.999800642478945e-05, + "loss": 6.5701, + "step": 4300 + }, + { + "epoch": 0.018724092957588853, + "grad_norm": 1.3754812479019165, + "learning_rate": 9.999794553758501e-05, + "loss": 6.6434, + "step": 4350 + }, + { + "epoch": 0.018939312416871485, + "grad_norm": 2.917146921157837, + "learning_rate": 9.999788373453332e-05, + "loss": 6.1996, + "step": 4400 + }, + { + "epoch": 0.019154531876154113, + "grad_norm": 1.454862117767334, + "learning_rate": 9.999782101563552e-05, + "loss": 6.4451, + "step": 4450 + }, + { + "epoch": 0.019369751335436745, + "grad_norm": 3.654081344604492, + "learning_rate": 9.999775738089277e-05, + "loss": 6.3259, + "step": 4500 + }, + { + "epoch": 0.019584970794719374, + "grad_norm": 3.3001208305358887, + "learning_rate": 9.999769283030624e-05, + "loss": 7.0563, + "step": 4550 + }, + { + "epoch": 0.019800190254002006, + "grad_norm": 2.0983545780181885, + "learning_rate": 9.99976273638771e-05, + "loss": 6.8418, + "step": 4600 + }, + { + "epoch": 0.020015409713284638, + "grad_norm": 2.6828572750091553, + "learning_rate": 9.999756098160656e-05, + "loss": 6.0864, + "step": 4650 + }, + { + "epoch": 0.020230629172567266, + "grad_norm": 0.7964070439338684, + "learning_rate": 9.999749368349583e-05, + "loss": 6.6171, + "step": 4700 + }, + { + "epoch": 0.020445848631849898, + "grad_norm": 2.583233118057251, + "learning_rate": 9.999742546954612e-05, + "loss": 6.8445, + "step": 4750 + }, + { + "epoch": 0.020661068091132526, + "grad_norm": 2.9055895805358887, + "learning_rate": 9.999735633975872e-05, + "loss": 6.5711, + "step": 4800 + }, + { + "epoch": 0.020876287550415158, + "grad_norm": 1.8964048624038696, + "learning_rate": 9.999728629413488e-05, + "loss": 6.3122, + "step": 4850 + }, + { + "epoch": 0.02109150700969779, + "grad_norm": 2.1846516132354736, + "learning_rate": 9.999721533267588e-05, + "loss": 6.5677, + "step": 4900 + }, + { + "epoch": 0.02130672646898042, + "grad_norm": 1.014978051185608, + "learning_rate": 9.999714345538303e-05, + "loss": 6.6308, + "step": 4950 + }, + { + "epoch": 0.02152194592826305, + "grad_norm": 4.362516403198242, + "learning_rate": 9.999707066225763e-05, + "loss": 6.6754, + "step": 5000 + }, + { + "epoch": 0.02173716538754568, + "grad_norm": 2.6892008781433105, + "learning_rate": 9.999699695330102e-05, + "loss": 6.1407, + "step": 5050 + }, + { + "epoch": 0.02195238484682831, + "grad_norm": 2.0193397998809814, + "learning_rate": 9.999692232851456e-05, + "loss": 6.5683, + "step": 5100 + }, + { + "epoch": 0.022167604306110943, + "grad_norm": 2.0567421913146973, + "learning_rate": 9.99968467878996e-05, + "loss": 6.0512, + "step": 5150 + }, + { + "epoch": 0.02238282376539357, + "grad_norm": 3.451784610748291, + "learning_rate": 9.999677033145754e-05, + "loss": 6.3591, + "step": 5200 + }, + { + "epoch": 0.022598043224676203, + "grad_norm": 1.3246066570281982, + "learning_rate": 9.999669295918977e-05, + "loss": 6.7152, + "step": 5250 + }, + { + "epoch": 0.02281326268395883, + "grad_norm": 2.4277822971343994, + "learning_rate": 9.999661467109771e-05, + "loss": 6.2632, + "step": 5300 + }, + { + "epoch": 0.023028482143241463, + "grad_norm": 2.5939643383026123, + "learning_rate": 9.99965354671828e-05, + "loss": 7.0792, + "step": 5350 + }, + { + "epoch": 0.023243701602524095, + "grad_norm": 1.9005554914474487, + "learning_rate": 9.999645534744649e-05, + "loss": 6.3339, + "step": 5400 + }, + { + "epoch": 0.023458921061806724, + "grad_norm": 3.7331430912017822, + "learning_rate": 9.999637431189022e-05, + "loss": 6.505, + "step": 5450 + }, + { + "epoch": 0.023674140521089355, + "grad_norm": 3.3390331268310547, + "learning_rate": 9.999629236051551e-05, + "loss": 6.4006, + "step": 5500 + }, + { + "epoch": 0.023889359980371984, + "grad_norm": 2.780344247817993, + "learning_rate": 9.999620949332385e-05, + "loss": 6.5983, + "step": 5550 + }, + { + "epoch": 0.024104579439654616, + "grad_norm": 2.368583917617798, + "learning_rate": 9.999612571031676e-05, + "loss": 6.4749, + "step": 5600 + }, + { + "epoch": 0.024319798898937248, + "grad_norm": 1.8673356771469116, + "learning_rate": 9.999604101149575e-05, + "loss": 6.0905, + "step": 5650 + }, + { + "epoch": 0.024535018358219876, + "grad_norm": 1.9339599609375, + "learning_rate": 9.999595539686242e-05, + "loss": 6.5353, + "step": 5700 + }, + { + "epoch": 0.024750237817502508, + "grad_norm": 1.1314785480499268, + "learning_rate": 9.99958688664183e-05, + "loss": 6.229, + "step": 5750 + }, + { + "epoch": 0.024965457276785136, + "grad_norm": 1.9622793197631836, + "learning_rate": 9.999578142016498e-05, + "loss": 6.3641, + "step": 5800 + }, + { + "epoch": 0.02518067673606777, + "grad_norm": 3.035353660583496, + "learning_rate": 9.999569305810408e-05, + "loss": 5.8934, + "step": 5850 + }, + { + "epoch": 0.0253958961953504, + "grad_norm": 2.4103734493255615, + "learning_rate": 9.999560378023719e-05, + "loss": 5.8673, + "step": 5900 + }, + { + "epoch": 0.02561111565463303, + "grad_norm": 2.6695399284362793, + "learning_rate": 9.999551358656598e-05, + "loss": 6.3541, + "step": 5950 + }, + { + "epoch": 0.02582633511391566, + "grad_norm": 2.1022303104400635, + "learning_rate": 9.999542247709205e-05, + "loss": 6.2268, + "step": 6000 + }, + { + "epoch": 0.02582633511391566, + "eval_loss": 6.915530204772949, + "eval_runtime": 35.1031, + "eval_samples_per_second": 18.232, + "eval_steps_per_second": 9.116, + "eval_tts_loss": 5.8806745554786595, + "step": 6000 + }, + { + "epoch": 0.02604155457319829, + "grad_norm": 4.164190769195557, + "learning_rate": 9.999533045181715e-05, + "loss": 6.7108, + "step": 6050 + }, + { + "epoch": 0.02625677403248092, + "grad_norm": 2.8115005493164062, + "learning_rate": 9.999523751074289e-05, + "loss": 6.2837, + "step": 6100 + }, + { + "epoch": 0.026471993491763553, + "grad_norm": 0.8663503527641296, + "learning_rate": 9.9995143653871e-05, + "loss": 6.6174, + "step": 6150 + }, + { + "epoch": 0.02668721295104618, + "grad_norm": 2.407132148742676, + "learning_rate": 9.999504888120321e-05, + "loss": 6.0997, + "step": 6200 + }, + { + "epoch": 0.026902432410328813, + "grad_norm": 0.35421791672706604, + "learning_rate": 9.999495319274124e-05, + "loss": 6.0708, + "step": 6250 + }, + { + "epoch": 0.02711765186961144, + "grad_norm": 3.39981746673584, + "learning_rate": 9.999485658848686e-05, + "loss": 6.6188, + "step": 6300 + }, + { + "epoch": 0.027332871328894073, + "grad_norm": 0.8876989483833313, + "learning_rate": 9.999475906844183e-05, + "loss": 5.9043, + "step": 6350 + }, + { + "epoch": 0.027548090788176705, + "grad_norm": 1.6306861639022827, + "learning_rate": 9.999466063260792e-05, + "loss": 6.2692, + "step": 6400 + }, + { + "epoch": 0.027763310247459334, + "grad_norm": 2.470092296600342, + "learning_rate": 9.999456128098694e-05, + "loss": 6.3113, + "step": 6450 + }, + { + "epoch": 0.027978529706741966, + "grad_norm": 1.6965795755386353, + "learning_rate": 9.999446101358074e-05, + "loss": 6.1343, + "step": 6500 + }, + { + "epoch": 0.028193749166024594, + "grad_norm": 5.296630382537842, + "learning_rate": 9.999435983039112e-05, + "loss": 5.8717, + "step": 6550 + }, + { + "epoch": 0.028408968625307226, + "grad_norm": 2.880582809448242, + "learning_rate": 9.999425773141995e-05, + "loss": 6.5886, + "step": 6600 + }, + { + "epoch": 0.028624188084589858, + "grad_norm": 1.231257438659668, + "learning_rate": 9.999415471666911e-05, + "loss": 5.5722, + "step": 6650 + }, + { + "epoch": 0.028839407543872486, + "grad_norm": 1.3361687660217285, + "learning_rate": 9.999405078614046e-05, + "loss": 6.0135, + "step": 6700 + }, + { + "epoch": 0.029054627003155118, + "grad_norm": 2.092428684234619, + "learning_rate": 9.999394593983593e-05, + "loss": 5.7688, + "step": 6750 + }, + { + "epoch": 0.029269846462437746, + "grad_norm": 3.2430238723754883, + "learning_rate": 9.999384017775742e-05, + "loss": 5.6645, + "step": 6800 + }, + { + "epoch": 0.02948506592172038, + "grad_norm": 3.6371824741363525, + "learning_rate": 9.999373349990688e-05, + "loss": 6.8558, + "step": 6850 + }, + { + "epoch": 0.02970028538100301, + "grad_norm": 1.9467822313308716, + "learning_rate": 9.999362590628628e-05, + "loss": 5.983, + "step": 6900 + }, + { + "epoch": 0.02991550484028564, + "grad_norm": 2.434537649154663, + "learning_rate": 9.999351739689755e-05, + "loss": 6.3175, + "step": 6950 + }, + { + "epoch": 0.03013072429956827, + "grad_norm": 1.664536714553833, + "learning_rate": 9.99934079717427e-05, + "loss": 6.8836, + "step": 7000 + }, + { + "epoch": 0.0303459437588509, + "grad_norm": 2.70991587638855, + "learning_rate": 9.999329763082372e-05, + "loss": 6.6681, + "step": 7050 + }, + { + "epoch": 0.03056116321813353, + "grad_norm": 2.0078442096710205, + "learning_rate": 9.999318637414268e-05, + "loss": 6.6222, + "step": 7100 + }, + { + "epoch": 0.030776382677416163, + "grad_norm": 2.9179351329803467, + "learning_rate": 9.999307420170156e-05, + "loss": 6.2226, + "step": 7150 + }, + { + "epoch": 0.03099160213669879, + "grad_norm": 4.912502765655518, + "learning_rate": 9.999296111350244e-05, + "loss": 5.9425, + "step": 7200 + }, + { + "epoch": 0.031206821595981423, + "grad_norm": 2.946540355682373, + "learning_rate": 9.99928471095474e-05, + "loss": 5.5484, + "step": 7250 + }, + { + "epoch": 0.031422041055264055, + "grad_norm": 3.060032606124878, + "learning_rate": 9.99927321898385e-05, + "loss": 6.4153, + "step": 7300 + }, + { + "epoch": 0.03163726051454668, + "grad_norm": 3.826935052871704, + "learning_rate": 9.999261635437787e-05, + "loss": 6.3109, + "step": 7350 + }, + { + "epoch": 0.03185247997382931, + "grad_norm": 3.113369941711426, + "learning_rate": 9.999249960316763e-05, + "loss": 6.571, + "step": 7400 + }, + { + "epoch": 0.03206769943311195, + "grad_norm": 2.408398151397705, + "learning_rate": 9.99923819362099e-05, + "loss": 6.3607, + "step": 7450 + }, + { + "epoch": 0.032282918892394576, + "grad_norm": 3.5740175247192383, + "learning_rate": 9.999226335350686e-05, + "loss": 6.6424, + "step": 7500 + }, + { + "epoch": 0.032498138351677204, + "grad_norm": 4.697231769561768, + "learning_rate": 9.999214385506068e-05, + "loss": 6.0001, + "step": 7550 + }, + { + "epoch": 0.03271335781095983, + "grad_norm": 2.618335008621216, + "learning_rate": 9.999202344087352e-05, + "loss": 6.0276, + "step": 7600 + }, + { + "epoch": 0.03292857727024247, + "grad_norm": 5.428880214691162, + "learning_rate": 9.999190211094763e-05, + "loss": 6.5664, + "step": 7650 + }, + { + "epoch": 0.033143796729525096, + "grad_norm": 4.682481288909912, + "learning_rate": 9.999177986528518e-05, + "loss": 6.6342, + "step": 7700 + }, + { + "epoch": 0.033359016188807725, + "grad_norm": 3.562303304672241, + "learning_rate": 9.999165670388844e-05, + "loss": 6.1028, + "step": 7750 + }, + { + "epoch": 0.03357423564809036, + "grad_norm": 2.4583373069763184, + "learning_rate": 9.999153262675967e-05, + "loss": 6.7496, + "step": 7800 + }, + { + "epoch": 0.03378945510737299, + "grad_norm": 4.960536003112793, + "learning_rate": 9.999140763390113e-05, + "loss": 5.6814, + "step": 7850 + }, + { + "epoch": 0.03400467456665562, + "grad_norm": 2.3540923595428467, + "learning_rate": 9.999128172531514e-05, + "loss": 6.6005, + "step": 7900 + }, + { + "epoch": 0.03421989402593825, + "grad_norm": 2.7912862300872803, + "learning_rate": 9.999115490100396e-05, + "loss": 6.3217, + "step": 7950 + }, + { + "epoch": 0.03443511348522088, + "grad_norm": 2.257110357284546, + "learning_rate": 9.999102716096992e-05, + "loss": 5.7987, + "step": 8000 + }, + { + "epoch": 0.03465033294450351, + "grad_norm": 0.7829526662826538, + "learning_rate": 9.999089850521542e-05, + "loss": 6.0739, + "step": 8050 + }, + { + "epoch": 0.03486555240378614, + "grad_norm": 2.7528398036956787, + "learning_rate": 9.999076893374272e-05, + "loss": 6.5598, + "step": 8100 + }, + { + "epoch": 0.03508077186306877, + "grad_norm": 3.9428257942199707, + "learning_rate": 9.999063844655429e-05, + "loss": 6.3206, + "step": 8150 + }, + { + "epoch": 0.0352959913223514, + "grad_norm": 2.629434108734131, + "learning_rate": 9.999050704365245e-05, + "loss": 6.6594, + "step": 8200 + }, + { + "epoch": 0.03551121078163403, + "grad_norm": 2.623521566390991, + "learning_rate": 9.999037472503964e-05, + "loss": 6.4933, + "step": 8250 + }, + { + "epoch": 0.035726430240916665, + "grad_norm": 0.8232753276824951, + "learning_rate": 9.999024149071827e-05, + "loss": 6.0177, + "step": 8300 + }, + { + "epoch": 0.035941649700199294, + "grad_norm": 2.0317294597625732, + "learning_rate": 9.999010734069079e-05, + "loss": 5.9525, + "step": 8350 + }, + { + "epoch": 0.03615686915948192, + "grad_norm": 4.573376178741455, + "learning_rate": 9.998997227495965e-05, + "loss": 5.9647, + "step": 8400 + }, + { + "epoch": 0.03637208861876456, + "grad_norm": 1.242768406867981, + "learning_rate": 9.998983629352734e-05, + "loss": 6.2669, + "step": 8450 + }, + { + "epoch": 0.036587308078047186, + "grad_norm": 1.8926278352737427, + "learning_rate": 9.998969939639631e-05, + "loss": 5.6523, + "step": 8500 + }, + { + "epoch": 0.036802527537329814, + "grad_norm": 3.1702353954315186, + "learning_rate": 9.998956158356912e-05, + "loss": 5.6008, + "step": 8550 + }, + { + "epoch": 0.03701774699661244, + "grad_norm": 1.6955516338348389, + "learning_rate": 9.998942285504827e-05, + "loss": 6.1786, + "step": 8600 + }, + { + "epoch": 0.03723296645589508, + "grad_norm": 2.589771032333374, + "learning_rate": 9.99892832108363e-05, + "loss": 5.5695, + "step": 8650 + }, + { + "epoch": 0.037448185915177706, + "grad_norm": 3.171199083328247, + "learning_rate": 9.998914265093578e-05, + "loss": 5.6731, + "step": 8700 + }, + { + "epoch": 0.037663405374460335, + "grad_norm": 4.114261150360107, + "learning_rate": 9.998900117534925e-05, + "loss": 5.619, + "step": 8750 + }, + { + "epoch": 0.03787862483374297, + "grad_norm": 2.323951005935669, + "learning_rate": 9.998885878407934e-05, + "loss": 6.117, + "step": 8800 + }, + { + "epoch": 0.0380938442930256, + "grad_norm": 3.221102237701416, + "learning_rate": 9.998871547712865e-05, + "loss": 5.5118, + "step": 8850 + }, + { + "epoch": 0.03830906375230823, + "grad_norm": 0.9197509288787842, + "learning_rate": 9.998857125449979e-05, + "loss": 5.6062, + "step": 8900 + }, + { + "epoch": 0.03852428321159086, + "grad_norm": 1.8731451034545898, + "learning_rate": 9.998842611619541e-05, + "loss": 6.2452, + "step": 8950 + }, + { + "epoch": 0.03873950267087349, + "grad_norm": 3.426931142807007, + "learning_rate": 9.998828006221817e-05, + "loss": 6.1218, + "step": 9000 + }, + { + "epoch": 0.03873950267087349, + "eval_loss": 6.622605800628662, + "eval_runtime": 34.8855, + "eval_samples_per_second": 18.346, + "eval_steps_per_second": 9.173, + "eval_tts_loss": 5.8468548438691865, + "step": 9000 + }, + { + "epoch": 0.03895472213015612, + "grad_norm": 3.452681541442871, + "learning_rate": 9.998813309257076e-05, + "loss": 6.3933, + "step": 9050 + }, + { + "epoch": 0.03916994158943875, + "grad_norm": 2.5486531257629395, + "learning_rate": 9.998798520725585e-05, + "loss": 5.9724, + "step": 9100 + }, + { + "epoch": 0.03938516104872138, + "grad_norm": 1.8915916681289673, + "learning_rate": 9.998783640627615e-05, + "loss": 5.9263, + "step": 9150 + }, + { + "epoch": 0.03960038050800401, + "grad_norm": 0.9119992852210999, + "learning_rate": 9.99876866896344e-05, + "loss": 5.8815, + "step": 9200 + }, + { + "epoch": 0.03981559996728664, + "grad_norm": 2.9021859169006348, + "learning_rate": 9.998753605733333e-05, + "loss": 5.9296, + "step": 9250 + }, + { + "epoch": 0.040030819426569275, + "grad_norm": 1.8049207925796509, + "learning_rate": 9.99873845093757e-05, + "loss": 5.645, + "step": 9300 + }, + { + "epoch": 0.040246038885851904, + "grad_norm": 2.2217743396759033, + "learning_rate": 9.99872320457643e-05, + "loss": 6.1254, + "step": 9350 + }, + { + "epoch": 0.04046125834513453, + "grad_norm": 0.4537351131439209, + "learning_rate": 9.99870786665019e-05, + "loss": 5.7958, + "step": 9400 + }, + { + "epoch": 0.04067647780441717, + "grad_norm": 1.9800406694412231, + "learning_rate": 9.998692437159133e-05, + "loss": 6.6597, + "step": 9450 + }, + { + "epoch": 0.040891697263699796, + "grad_norm": 2.1366400718688965, + "learning_rate": 9.99867691610354e-05, + "loss": 6.4338, + "step": 9500 + }, + { + "epoch": 0.041106916722982424, + "grad_norm": 2.132777690887451, + "learning_rate": 9.998661303483698e-05, + "loss": 6.1516, + "step": 9550 + }, + { + "epoch": 0.04132213618226505, + "grad_norm": 3.3035709857940674, + "learning_rate": 9.99864559929989e-05, + "loss": 5.5567, + "step": 9600 + }, + { + "epoch": 0.04153735564154769, + "grad_norm": 1.224552869796753, + "learning_rate": 9.998629803552404e-05, + "loss": 5.5185, + "step": 9650 + }, + { + "epoch": 0.041752575100830316, + "grad_norm": 2.2208364009857178, + "learning_rate": 9.99861391624153e-05, + "loss": 5.9014, + "step": 9700 + }, + { + "epoch": 0.041967794560112945, + "grad_norm": 1.7423096895217896, + "learning_rate": 9.998597937367561e-05, + "loss": 5.7276, + "step": 9750 + }, + { + "epoch": 0.04218301401939558, + "grad_norm": 1.8318346738815308, + "learning_rate": 9.998581866930787e-05, + "loss": 5.7392, + "step": 9800 + }, + { + "epoch": 0.04239823347867821, + "grad_norm": 5.738893985748291, + "learning_rate": 9.998565704931503e-05, + "loss": 5.8078, + "step": 9850 + }, + { + "epoch": 0.04261345293796084, + "grad_norm": 2.1150856018066406, + "learning_rate": 9.998549451370005e-05, + "loss": 5.7726, + "step": 9900 + }, + { + "epoch": 0.04282867239724347, + "grad_norm": 3.820524215698242, + "learning_rate": 9.998533106246592e-05, + "loss": 6.1336, + "step": 9950 + }, + { + "epoch": 0.0430438918565261, + "grad_norm": 3.667034387588501, + "learning_rate": 9.998516669561562e-05, + "loss": 5.5954, + "step": 10000 + }, + { + "epoch": 0.04325911131580873, + "grad_norm": 2.317172050476074, + "learning_rate": 9.998500141315216e-05, + "loss": 5.7046, + "step": 10050 + }, + { + "epoch": 0.04347433077509136, + "grad_norm": 1.6634409427642822, + "learning_rate": 9.998483521507857e-05, + "loss": 6.3074, + "step": 10100 + }, + { + "epoch": 0.04368955023437399, + "grad_norm": 3.321549415588379, + "learning_rate": 9.99846681013979e-05, + "loss": 5.7519, + "step": 10150 + }, + { + "epoch": 0.04390476969365662, + "grad_norm": 2.86082124710083, + "learning_rate": 9.998450007211322e-05, + "loss": 6.0753, + "step": 10200 + }, + { + "epoch": 0.04411998915293925, + "grad_norm": 4.267400741577148, + "learning_rate": 9.998433112722758e-05, + "loss": 5.8526, + "step": 10250 + }, + { + "epoch": 0.044335208612221885, + "grad_norm": 2.909208297729492, + "learning_rate": 9.998416126674411e-05, + "loss": 5.8076, + "step": 10300 + }, + { + "epoch": 0.044550428071504514, + "grad_norm": 3.683120012283325, + "learning_rate": 9.99839904906659e-05, + "loss": 6.081, + "step": 10350 + }, + { + "epoch": 0.04476564753078714, + "grad_norm": 1.8743500709533691, + "learning_rate": 9.998381879899605e-05, + "loss": 5.7538, + "step": 10400 + }, + { + "epoch": 0.04498086699006978, + "grad_norm": 2.0059988498687744, + "learning_rate": 9.998364619173777e-05, + "loss": 6.1639, + "step": 10450 + }, + { + "epoch": 0.045196086449352406, + "grad_norm": 5.152688980102539, + "learning_rate": 9.998347266889416e-05, + "loss": 6.0919, + "step": 10500 + }, + { + "epoch": 0.045411305908635034, + "grad_norm": 2.642868995666504, + "learning_rate": 9.998329823046843e-05, + "loss": 6.1627, + "step": 10550 + }, + { + "epoch": 0.04562652536791766, + "grad_norm": 3.267284870147705, + "learning_rate": 9.998312287646379e-05, + "loss": 5.7052, + "step": 10600 + }, + { + "epoch": 0.0458417448272003, + "grad_norm": 0.7364803552627563, + "learning_rate": 9.998294660688343e-05, + "loss": 5.702, + "step": 10650 + }, + { + "epoch": 0.046056964286482927, + "grad_norm": 1.794511079788208, + "learning_rate": 9.998276942173057e-05, + "loss": 6.1383, + "step": 10700 + }, + { + "epoch": 0.046272183745765555, + "grad_norm": 1.5316740274429321, + "learning_rate": 9.998259132100847e-05, + "loss": 6.353, + "step": 10750 + }, + { + "epoch": 0.04648740320504819, + "grad_norm": 1.2301288843154907, + "learning_rate": 9.99824123047204e-05, + "loss": 5.8512, + "step": 10800 + }, + { + "epoch": 0.04670262266433082, + "grad_norm": 2.319761276245117, + "learning_rate": 9.998223237286961e-05, + "loss": 6.239, + "step": 10850 + }, + { + "epoch": 0.04691784212361345, + "grad_norm": 2.231160879135132, + "learning_rate": 9.998205152545943e-05, + "loss": 5.9399, + "step": 10900 + }, + { + "epoch": 0.04713306158289608, + "grad_norm": 4.840134143829346, + "learning_rate": 9.998186976249315e-05, + "loss": 5.8044, + "step": 10950 + }, + { + "epoch": 0.04734828104217871, + "grad_norm": 5.200071811676025, + "learning_rate": 9.998168708397412e-05, + "loss": 5.8326, + "step": 11000 + }, + { + "epoch": 0.04756350050146134, + "grad_norm": 1.7022992372512817, + "learning_rate": 9.998150348990567e-05, + "loss": 5.8544, + "step": 11050 + }, + { + "epoch": 0.04777871996074397, + "grad_norm": 3.27194881439209, + "learning_rate": 9.998131898029115e-05, + "loss": 5.6729, + "step": 11100 + }, + { + "epoch": 0.0479939394200266, + "grad_norm": 3.980231523513794, + "learning_rate": 9.998113355513397e-05, + "loss": 5.6979, + "step": 11150 + }, + { + "epoch": 0.04820915887930923, + "grad_norm": 1.9998146295547485, + "learning_rate": 9.998094721443752e-05, + "loss": 6.176, + "step": 11200 + }, + { + "epoch": 0.04842437833859186, + "grad_norm": 3.3739256858825684, + "learning_rate": 9.99807599582052e-05, + "loss": 5.5272, + "step": 11250 + }, + { + "epoch": 0.048639597797874495, + "grad_norm": 1.4126222133636475, + "learning_rate": 9.998057178644042e-05, + "loss": 5.6789, + "step": 11300 + }, + { + "epoch": 0.048854817257157124, + "grad_norm": 3.7744109630584717, + "learning_rate": 9.99803826991467e-05, + "loss": 6.1386, + "step": 11350 + }, + { + "epoch": 0.04907003671643975, + "grad_norm": 3.964522123336792, + "learning_rate": 9.998019269632742e-05, + "loss": 6.1285, + "step": 11400 + }, + { + "epoch": 0.04928525617572239, + "grad_norm": 3.206300973892212, + "learning_rate": 9.998000177798611e-05, + "loss": 5.7273, + "step": 11450 + }, + { + "epoch": 0.049500475635005016, + "grad_norm": 1.6873538494110107, + "learning_rate": 9.997980994412625e-05, + "loss": 6.0362, + "step": 11500 + }, + { + "epoch": 0.049715695094287644, + "grad_norm": 2.978600263595581, + "learning_rate": 9.997961719475136e-05, + "loss": 5.8036, + "step": 11550 + }, + { + "epoch": 0.04993091455357027, + "grad_norm": 2.696183204650879, + "learning_rate": 9.997942352986497e-05, + "loss": 5.5853, + "step": 11600 + }, + { + "epoch": 0.05014613401285291, + "grad_norm": 3.4552910327911377, + "learning_rate": 9.997922894947063e-05, + "loss": 6.3501, + "step": 11650 + }, + { + "epoch": 0.05036135347213554, + "grad_norm": 3.2899463176727295, + "learning_rate": 9.99790334535719e-05, + "loss": 6.2418, + "step": 11700 + }, + { + "epoch": 0.050576572931418165, + "grad_norm": 0.4747687578201294, + "learning_rate": 9.997883704217237e-05, + "loss": 6.0017, + "step": 11750 + }, + { + "epoch": 0.0507917923907008, + "grad_norm": 1.9513529539108276, + "learning_rate": 9.997863971527561e-05, + "loss": 5.7911, + "step": 11800 + }, + { + "epoch": 0.05100701184998343, + "grad_norm": 2.9755373001098633, + "learning_rate": 9.997844147288526e-05, + "loss": 5.9174, + "step": 11850 + }, + { + "epoch": 0.05122223130926606, + "grad_norm": 3.2691125869750977, + "learning_rate": 9.997824231500497e-05, + "loss": 6.0606, + "step": 11900 + }, + { + "epoch": 0.051437450768548686, + "grad_norm": 1.8715627193450928, + "learning_rate": 9.997804224163834e-05, + "loss": 6.3076, + "step": 11950 + }, + { + "epoch": 0.05165267022783132, + "grad_norm": 0.7529690861701965, + "learning_rate": 9.997784125278906e-05, + "loss": 5.3057, + "step": 12000 + }, + { + "epoch": 0.05165267022783132, + "eval_loss": 6.4022393226623535, + "eval_runtime": 35.08, + "eval_samples_per_second": 18.244, + "eval_steps_per_second": 9.122, + "eval_tts_loss": 5.8680110494110895, + "step": 12000 + }, + { + "epoch": 0.05186788968711395, + "grad_norm": 3.1013505458831787, + "learning_rate": 9.99776393484608e-05, + "loss": 6.2185, + "step": 12050 + }, + { + "epoch": 0.05208310914639658, + "grad_norm": 4.53584623336792, + "learning_rate": 9.997743652865729e-05, + "loss": 6.3508, + "step": 12100 + }, + { + "epoch": 0.05229832860567921, + "grad_norm": 3.323902130126953, + "learning_rate": 9.997723279338222e-05, + "loss": 6.0004, + "step": 12150 + }, + { + "epoch": 0.05251354806496184, + "grad_norm": 1.664719581604004, + "learning_rate": 9.997702814263932e-05, + "loss": 5.9057, + "step": 12200 + }, + { + "epoch": 0.05272876752424447, + "grad_norm": 0.8217716217041016, + "learning_rate": 9.997682257643235e-05, + "loss": 5.4288, + "step": 12250 + }, + { + "epoch": 0.052943986983527105, + "grad_norm": 2.4197773933410645, + "learning_rate": 9.997661609476505e-05, + "loss": 5.0906, + "step": 12300 + }, + { + "epoch": 0.053159206442809734, + "grad_norm": 1.614018440246582, + "learning_rate": 9.997640869764125e-05, + "loss": 5.5784, + "step": 12350 + }, + { + "epoch": 0.05337442590209236, + "grad_norm": 3.0839896202087402, + "learning_rate": 9.99762003850647e-05, + "loss": 5.8983, + "step": 12400 + }, + { + "epoch": 0.05358964536137499, + "grad_norm": 2.4956319332122803, + "learning_rate": 9.997599115703926e-05, + "loss": 6.1043, + "step": 12450 + }, + { + "epoch": 0.053804864820657626, + "grad_norm": 3.0736868381500244, + "learning_rate": 9.997578101356872e-05, + "loss": 5.3886, + "step": 12500 + }, + { + "epoch": 0.054020084279940254, + "grad_norm": 1.8058282136917114, + "learning_rate": 9.997556995465696e-05, + "loss": 5.8122, + "step": 12550 + }, + { + "epoch": 0.05423530373922288, + "grad_norm": 1.7088472843170166, + "learning_rate": 9.997535798030782e-05, + "loss": 5.6335, + "step": 12600 + }, + { + "epoch": 0.05445052319850552, + "grad_norm": 2.217586040496826, + "learning_rate": 9.997514509052521e-05, + "loss": 5.7709, + "step": 12650 + }, + { + "epoch": 0.05466574265778815, + "grad_norm": 1.4640854597091675, + "learning_rate": 9.997493128531303e-05, + "loss": 6.2386, + "step": 12700 + }, + { + "epoch": 0.054880962117070775, + "grad_norm": 2.3812925815582275, + "learning_rate": 9.997471656467517e-05, + "loss": 5.9818, + "step": 12750 + }, + { + "epoch": 0.05509618157635341, + "grad_norm": 4.363724708557129, + "learning_rate": 9.997450092861558e-05, + "loss": 5.9508, + "step": 12800 + }, + { + "epoch": 0.05531140103563604, + "grad_norm": 1.6495294570922852, + "learning_rate": 9.99742843771382e-05, + "loss": 6.017, + "step": 12850 + }, + { + "epoch": 0.05552662049491867, + "grad_norm": 1.3933653831481934, + "learning_rate": 9.9974066910247e-05, + "loss": 6.5748, + "step": 12900 + }, + { + "epoch": 0.055741839954201296, + "grad_norm": 1.7844700813293457, + "learning_rate": 9.997384852794597e-05, + "loss": 5.7878, + "step": 12950 + }, + { + "epoch": 0.05595705941348393, + "grad_norm": 3.0626132488250732, + "learning_rate": 9.997362923023912e-05, + "loss": 6.1042, + "step": 13000 + }, + { + "epoch": 0.05617227887276656, + "grad_norm": 2.7775356769561768, + "learning_rate": 9.997340901713045e-05, + "loss": 5.6726, + "step": 13050 + }, + { + "epoch": 0.05638749833204919, + "grad_norm": 2.6739094257354736, + "learning_rate": 9.9973187888624e-05, + "loss": 5.876, + "step": 13100 + }, + { + "epoch": 0.05660271779133182, + "grad_norm": 3.243889808654785, + "learning_rate": 9.997296584472382e-05, + "loss": 6.3083, + "step": 13150 + }, + { + "epoch": 0.05681793725061445, + "grad_norm": 2.084683656692505, + "learning_rate": 9.997274288543397e-05, + "loss": 5.832, + "step": 13200 + }, + { + "epoch": 0.05703315670989708, + "grad_norm": 2.9229583740234375, + "learning_rate": 9.997251901075855e-05, + "loss": 6.3251, + "step": 13250 + }, + { + "epoch": 0.057248376169179716, + "grad_norm": 1.7777220010757446, + "learning_rate": 9.997229422070165e-05, + "loss": 5.609, + "step": 13300 + }, + { + "epoch": 0.057463595628462344, + "grad_norm": 1.6633163690567017, + "learning_rate": 9.997206851526736e-05, + "loss": 6.1616, + "step": 13350 + }, + { + "epoch": 0.05767881508774497, + "grad_norm": 0.8919707536697388, + "learning_rate": 9.997184189445989e-05, + "loss": 5.8023, + "step": 13400 + }, + { + "epoch": 0.0578940345470276, + "grad_norm": 2.8204293251037598, + "learning_rate": 9.997161435828332e-05, + "loss": 5.971, + "step": 13450 + }, + { + "epoch": 0.058109254006310236, + "grad_norm": 3.5405609607696533, + "learning_rate": 9.997138590674185e-05, + "loss": 5.9767, + "step": 13500 + }, + { + "epoch": 0.058324473465592865, + "grad_norm": 3.5715010166168213, + "learning_rate": 9.997115653983964e-05, + "loss": 5.9382, + "step": 13550 + }, + { + "epoch": 0.05853969292487549, + "grad_norm": 2.87684965133667, + "learning_rate": 9.997092625758094e-05, + "loss": 5.639, + "step": 13600 + }, + { + "epoch": 0.05875491238415813, + "grad_norm": 1.5056331157684326, + "learning_rate": 9.99706950599699e-05, + "loss": 5.8704, + "step": 13650 + }, + { + "epoch": 0.05897013184344076, + "grad_norm": 2.5838606357574463, + "learning_rate": 9.997046294701082e-05, + "loss": 5.7148, + "step": 13700 + }, + { + "epoch": 0.059185351302723385, + "grad_norm": 1.9607698917388916, + "learning_rate": 9.997022991870791e-05, + "loss": 5.4453, + "step": 13750 + }, + { + "epoch": 0.05940057076200602, + "grad_norm": 2.642509698867798, + "learning_rate": 9.996999597506546e-05, + "loss": 6.3816, + "step": 13800 + }, + { + "epoch": 0.05961579022128865, + "grad_norm": 1.4551447629928589, + "learning_rate": 9.996976111608774e-05, + "loss": 6.0388, + "step": 13850 + }, + { + "epoch": 0.05983100968057128, + "grad_norm": 2.2457828521728516, + "learning_rate": 9.996952534177908e-05, + "loss": 5.2416, + "step": 13900 + }, + { + "epoch": 0.060046229139853906, + "grad_norm": 0.530971109867096, + "learning_rate": 9.996928865214375e-05, + "loss": 5.1777, + "step": 13950 + }, + { + "epoch": 0.06026144859913654, + "grad_norm": 3.6856906414031982, + "learning_rate": 9.996905104718612e-05, + "loss": 5.2598, + "step": 14000 + }, + { + "epoch": 0.06047666805841917, + "grad_norm": 0.7726712226867676, + "learning_rate": 9.996881252691054e-05, + "loss": 5.4107, + "step": 14050 + }, + { + "epoch": 0.0606918875177018, + "grad_norm": 2.752558708190918, + "learning_rate": 9.996857309132139e-05, + "loss": 5.9589, + "step": 14100 + }, + { + "epoch": 0.06090710697698443, + "grad_norm": 2.040907859802246, + "learning_rate": 9.996833274042303e-05, + "loss": 5.7849, + "step": 14150 + }, + { + "epoch": 0.06112232643626706, + "grad_norm": 3.6041293144226074, + "learning_rate": 9.996809147421988e-05, + "loss": 5.7466, + "step": 14200 + }, + { + "epoch": 0.06133754589554969, + "grad_norm": 2.4578394889831543, + "learning_rate": 9.996784929271634e-05, + "loss": 5.298, + "step": 14250 + }, + { + "epoch": 0.061552765354832326, + "grad_norm": 2.1691932678222656, + "learning_rate": 9.996760619591687e-05, + "loss": 6.0262, + "step": 14300 + }, + { + "epoch": 0.061767984814114954, + "grad_norm": 2.074327230453491, + "learning_rate": 9.996736218382591e-05, + "loss": 6.1005, + "step": 14350 + }, + { + "epoch": 0.06198320427339758, + "grad_norm": 0.6185678243637085, + "learning_rate": 9.996711725644795e-05, + "loss": 5.8239, + "step": 14400 + }, + { + "epoch": 0.06219842373268021, + "grad_norm": 2.4976465702056885, + "learning_rate": 9.996687141378743e-05, + "loss": 5.7116, + "step": 14450 + }, + { + "epoch": 0.062413643191962846, + "grad_norm": 3.0181162357330322, + "learning_rate": 9.99666246558489e-05, + "loss": 6.0595, + "step": 14500 + }, + { + "epoch": 0.06262886265124548, + "grad_norm": 2.700378656387329, + "learning_rate": 9.996637698263685e-05, + "loss": 5.1164, + "step": 14550 + }, + { + "epoch": 0.06284408211052811, + "grad_norm": 2.0219006538391113, + "learning_rate": 9.996612839415585e-05, + "loss": 5.6068, + "step": 14600 + }, + { + "epoch": 0.06305930156981074, + "grad_norm": 2.824551582336426, + "learning_rate": 9.996587889041041e-05, + "loss": 5.5994, + "step": 14650 + }, + { + "epoch": 0.06327452102909337, + "grad_norm": 1.927943468093872, + "learning_rate": 9.996562847140514e-05, + "loss": 5.8084, + "step": 14700 + }, + { + "epoch": 0.063489740488376, + "grad_norm": 1.6298176050186157, + "learning_rate": 9.99653771371446e-05, + "loss": 5.5269, + "step": 14750 + }, + { + "epoch": 0.06370495994765862, + "grad_norm": 4.99830961227417, + "learning_rate": 9.99651248876334e-05, + "loss": 5.9489, + "step": 14800 + }, + { + "epoch": 0.06392017940694125, + "grad_norm": 1.591029167175293, + "learning_rate": 9.996487172287618e-05, + "loss": 5.8622, + "step": 14850 + }, + { + "epoch": 0.0641353988662239, + "grad_norm": 1.9866632223129272, + "learning_rate": 9.996461764287755e-05, + "loss": 5.1996, + "step": 14900 + }, + { + "epoch": 0.06435061832550652, + "grad_norm": 3.0030713081359863, + "learning_rate": 9.99643626476422e-05, + "loss": 5.9097, + "step": 14950 + }, + { + "epoch": 0.06456583778478915, + "grad_norm": 0.9997313618659973, + "learning_rate": 9.996410673717474e-05, + "loss": 5.4108, + "step": 15000 + }, + { + "epoch": 0.06456583778478915, + "eval_loss": 6.283059120178223, + "eval_runtime": 34.9621, + "eval_samples_per_second": 18.306, + "eval_steps_per_second": 9.153, + "eval_tts_loss": 5.890074991354934, + "step": 15000 + }, + { + "epoch": 0.06478105724407178, + "grad_norm": 1.1626211404800415, + "learning_rate": 9.996384991147992e-05, + "loss": 6.0025, + "step": 15050 + }, + { + "epoch": 0.06499627670335441, + "grad_norm": 2.173999071121216, + "learning_rate": 9.996359217056241e-05, + "loss": 5.1684, + "step": 15100 + }, + { + "epoch": 0.06521149616263704, + "grad_norm": 3.2823996543884277, + "learning_rate": 9.996333351442695e-05, + "loss": 5.9689, + "step": 15150 + }, + { + "epoch": 0.06542671562191966, + "grad_norm": 4.85387659072876, + "learning_rate": 9.996307394307827e-05, + "loss": 5.2338, + "step": 15200 + }, + { + "epoch": 0.06564193508120231, + "grad_norm": 2.2830216884613037, + "learning_rate": 9.99628134565211e-05, + "loss": 6.0923, + "step": 15250 + }, + { + "epoch": 0.06585715454048494, + "grad_norm": 3.5465762615203857, + "learning_rate": 9.996255205476027e-05, + "loss": 5.5179, + "step": 15300 + }, + { + "epoch": 0.06607237399976756, + "grad_norm": 1.3678102493286133, + "learning_rate": 9.99622897378005e-05, + "loss": 5.8667, + "step": 15350 + }, + { + "epoch": 0.06628759345905019, + "grad_norm": 4.226385116577148, + "learning_rate": 9.996202650564664e-05, + "loss": 5.786, + "step": 15400 + }, + { + "epoch": 0.06650281291833282, + "grad_norm": 3.308994770050049, + "learning_rate": 9.996176235830351e-05, + "loss": 5.6067, + "step": 15450 + }, + { + "epoch": 0.06671803237761545, + "grad_norm": 2.1735260486602783, + "learning_rate": 9.996149729577593e-05, + "loss": 5.9669, + "step": 15500 + }, + { + "epoch": 0.06693325183689809, + "grad_norm": 2.1894519329071045, + "learning_rate": 9.996123131806876e-05, + "loss": 5.1559, + "step": 15550 + }, + { + "epoch": 0.06714847129618072, + "grad_norm": 0.3986465334892273, + "learning_rate": 9.996096442518687e-05, + "loss": 5.454, + "step": 15600 + }, + { + "epoch": 0.06736369075546335, + "grad_norm": 0.811331570148468, + "learning_rate": 9.996069661713516e-05, + "loss": 5.328, + "step": 15650 + }, + { + "epoch": 0.06757891021474598, + "grad_norm": 2.8045287132263184, + "learning_rate": 9.996042789391855e-05, + "loss": 5.6671, + "step": 15700 + }, + { + "epoch": 0.0677941296740286, + "grad_norm": 1.9684051275253296, + "learning_rate": 9.996015825554193e-05, + "loss": 5.6186, + "step": 15750 + }, + { + "epoch": 0.06800934913331123, + "grad_norm": 1.785804271697998, + "learning_rate": 9.995988770201024e-05, + "loss": 6.3208, + "step": 15800 + }, + { + "epoch": 0.06822456859259386, + "grad_norm": 0.785915195941925, + "learning_rate": 9.995961623332845e-05, + "loss": 5.7703, + "step": 15850 + }, + { + "epoch": 0.0684397880518765, + "grad_norm": 2.4413414001464844, + "learning_rate": 9.995934384950155e-05, + "loss": 6.0865, + "step": 15900 + }, + { + "epoch": 0.06865500751115913, + "grad_norm": 3.873962879180908, + "learning_rate": 9.99590705505345e-05, + "loss": 5.7463, + "step": 15950 + }, + { + "epoch": 0.06887022697044176, + "grad_norm": 5.7327880859375, + "learning_rate": 9.995879633643231e-05, + "loss": 5.7538, + "step": 16000 + }, + { + "epoch": 0.06908544642972439, + "grad_norm": 2.0119428634643555, + "learning_rate": 9.995852120720002e-05, + "loss": 6.4433, + "step": 16050 + }, + { + "epoch": 0.06930066588900702, + "grad_norm": 3.0220303535461426, + "learning_rate": 9.995824516284266e-05, + "loss": 5.3502, + "step": 16100 + }, + { + "epoch": 0.06951588534828965, + "grad_norm": 4.893411159515381, + "learning_rate": 9.995796820336527e-05, + "loss": 5.9053, + "step": 16150 + }, + { + "epoch": 0.06973110480757228, + "grad_norm": 3.2052807807922363, + "learning_rate": 9.995769032877295e-05, + "loss": 5.606, + "step": 16200 + }, + { + "epoch": 0.06994632426685492, + "grad_norm": 4.188901901245117, + "learning_rate": 9.995741153907077e-05, + "loss": 5.5842, + "step": 16250 + }, + { + "epoch": 0.07016154372613755, + "grad_norm": 2.238579273223877, + "learning_rate": 9.995713183426386e-05, + "loss": 5.3988, + "step": 16300 + }, + { + "epoch": 0.07037676318542017, + "grad_norm": 1.6902905702590942, + "learning_rate": 9.995685121435732e-05, + "loss": 5.3483, + "step": 16350 + }, + { + "epoch": 0.0705919826447028, + "grad_norm": 1.878673791885376, + "learning_rate": 9.995656967935629e-05, + "loss": 5.7685, + "step": 16400 + }, + { + "epoch": 0.07080720210398543, + "grad_norm": 2.008070945739746, + "learning_rate": 9.995628722926596e-05, + "loss": 5.3091, + "step": 16450 + }, + { + "epoch": 0.07102242156326806, + "grad_norm": 1.9279426336288452, + "learning_rate": 9.995600386409146e-05, + "loss": 5.3509, + "step": 16500 + }, + { + "epoch": 0.0712376410225507, + "grad_norm": 3.8198723793029785, + "learning_rate": 9.995571958383802e-05, + "loss": 6.3, + "step": 16550 + }, + { + "epoch": 0.07145286048183333, + "grad_norm": 2.2203524112701416, + "learning_rate": 9.99554343885108e-05, + "loss": 5.3633, + "step": 16600 + }, + { + "epoch": 0.07166807994111596, + "grad_norm": 2.261531352996826, + "learning_rate": 9.995514827811507e-05, + "loss": 6.0075, + "step": 16650 + }, + { + "epoch": 0.07188329940039859, + "grad_norm": 1.4834709167480469, + "learning_rate": 9.995486125265606e-05, + "loss": 5.4911, + "step": 16700 + }, + { + "epoch": 0.07209851885968122, + "grad_norm": 1.8843462467193604, + "learning_rate": 9.995457331213903e-05, + "loss": 6.1968, + "step": 16750 + }, + { + "epoch": 0.07231373831896384, + "grad_norm": 3.8323771953582764, + "learning_rate": 9.995428445656922e-05, + "loss": 6.1647, + "step": 16800 + }, + { + "epoch": 0.07252895777824647, + "grad_norm": 4.457127094268799, + "learning_rate": 9.995399468595194e-05, + "loss": 5.621, + "step": 16850 + }, + { + "epoch": 0.07274417723752911, + "grad_norm": 2.2657434940338135, + "learning_rate": 9.995370400029252e-05, + "loss": 5.7298, + "step": 16900 + }, + { + "epoch": 0.07295939669681174, + "grad_norm": 2.0507192611694336, + "learning_rate": 9.995341239959628e-05, + "loss": 5.9999, + "step": 16950 + }, + { + "epoch": 0.07317461615609437, + "grad_norm": 1.2310922145843506, + "learning_rate": 9.995311988386854e-05, + "loss": 5.4771, + "step": 17000 + }, + { + "epoch": 0.073389835615377, + "grad_norm": 4.153654098510742, + "learning_rate": 9.995282645311465e-05, + "loss": 5.7198, + "step": 17050 + }, + { + "epoch": 0.07360505507465963, + "grad_norm": 1.7386804819107056, + "learning_rate": 9.995253210734004e-05, + "loss": 6.0636, + "step": 17100 + }, + { + "epoch": 0.07382027453394226, + "grad_norm": 2.547597885131836, + "learning_rate": 9.995223684655004e-05, + "loss": 5.8946, + "step": 17150 + }, + { + "epoch": 0.07403549399322489, + "grad_norm": 0.6837219595909119, + "learning_rate": 9.99519406707501e-05, + "loss": 5.7123, + "step": 17200 + }, + { + "epoch": 0.07425071345250753, + "grad_norm": 1.3913053274154663, + "learning_rate": 9.995164357994562e-05, + "loss": 5.6911, + "step": 17250 + }, + { + "epoch": 0.07446593291179016, + "grad_norm": 2.452699661254883, + "learning_rate": 9.995134557414205e-05, + "loss": 5.5449, + "step": 17300 + }, + { + "epoch": 0.07468115237107278, + "grad_norm": 2.8005967140197754, + "learning_rate": 9.995104665334485e-05, + "loss": 5.7126, + "step": 17350 + }, + { + "epoch": 0.07489637183035541, + "grad_norm": 1.947097659111023, + "learning_rate": 9.99507468175595e-05, + "loss": 5.4575, + "step": 17400 + }, + { + "epoch": 0.07511159128963804, + "grad_norm": 0.8892021775245667, + "learning_rate": 9.995044606679149e-05, + "loss": 5.4656, + "step": 17450 + }, + { + "epoch": 0.07532681074892067, + "grad_norm": 2.80574631690979, + "learning_rate": 9.995014440104631e-05, + "loss": 6.0767, + "step": 17500 + }, + { + "epoch": 0.07554203020820331, + "grad_norm": 1.610390543937683, + "learning_rate": 9.994984182032951e-05, + "loss": 5.3219, + "step": 17550 + }, + { + "epoch": 0.07575724966748594, + "grad_norm": 2.84792423248291, + "learning_rate": 9.994953832464665e-05, + "loss": 5.9615, + "step": 17600 + }, + { + "epoch": 0.07597246912676857, + "grad_norm": 2.072312593460083, + "learning_rate": 9.994923391400324e-05, + "loss": 6.0122, + "step": 17650 + }, + { + "epoch": 0.0761876885860512, + "grad_norm": 2.0159435272216797, + "learning_rate": 9.994892858840487e-05, + "loss": 5.2556, + "step": 17700 + }, + { + "epoch": 0.07640290804533383, + "grad_norm": 2.0125532150268555, + "learning_rate": 9.994862234785717e-05, + "loss": 5.7333, + "step": 17750 + }, + { + "epoch": 0.07661812750461645, + "grad_norm": 1.475946068763733, + "learning_rate": 9.994831519236571e-05, + "loss": 5.7599, + "step": 17800 + }, + { + "epoch": 0.07683334696389908, + "grad_norm": 2.418642997741699, + "learning_rate": 9.994800712193612e-05, + "loss": 5.4131, + "step": 17850 + }, + { + "epoch": 0.07704856642318172, + "grad_norm": 1.8710205554962158, + "learning_rate": 9.994769813657407e-05, + "loss": 5.5828, + "step": 17900 + }, + { + "epoch": 0.07726378588246435, + "grad_norm": 4.8582868576049805, + "learning_rate": 9.99473882362852e-05, + "loss": 5.8408, + "step": 17950 + }, + { + "epoch": 0.07747900534174698, + "grad_norm": 1.9501605033874512, + "learning_rate": 9.99470774210752e-05, + "loss": 5.303, + "step": 18000 + }, + { + "epoch": 0.07747900534174698, + "eval_loss": 6.211604118347168, + "eval_runtime": 35.0352, + "eval_samples_per_second": 18.267, + "eval_steps_per_second": 9.134, + "eval_tts_loss": 5.953131515526134, + "step": 18000 + }, + { + "epoch": 0.07769422480102961, + "grad_norm": 2.7112791538238525, + "learning_rate": 9.994676569094974e-05, + "loss": 5.6897, + "step": 18050 + }, + { + "epoch": 0.07790944426031224, + "grad_norm": 5.096025466918945, + "learning_rate": 9.994645304591454e-05, + "loss": 5.8427, + "step": 18100 + }, + { + "epoch": 0.07812466371959487, + "grad_norm": 1.710616111755371, + "learning_rate": 9.994613948597534e-05, + "loss": 6.1506, + "step": 18150 + }, + { + "epoch": 0.0783398831788775, + "grad_norm": 2.219977617263794, + "learning_rate": 9.994582501113785e-05, + "loss": 5.4612, + "step": 18200 + }, + { + "epoch": 0.07855510263816014, + "grad_norm": 3.029083490371704, + "learning_rate": 9.994550962140788e-05, + "loss": 5.3527, + "step": 18250 + }, + { + "epoch": 0.07877032209744277, + "grad_norm": 2.2938177585601807, + "learning_rate": 9.994519331679117e-05, + "loss": 5.6825, + "step": 18300 + }, + { + "epoch": 0.0789855415567254, + "grad_norm": 3.9177558422088623, + "learning_rate": 9.994487609729352e-05, + "loss": 5.659, + "step": 18350 + }, + { + "epoch": 0.07920076101600802, + "grad_norm": 5.502213954925537, + "learning_rate": 9.994455796292076e-05, + "loss": 5.1632, + "step": 18400 + }, + { + "epoch": 0.07941598047529065, + "grad_norm": 1.0359677076339722, + "learning_rate": 9.99442389136787e-05, + "loss": 5.9026, + "step": 18450 + }, + { + "epoch": 0.07963119993457328, + "grad_norm": 2.5854058265686035, + "learning_rate": 9.994391894957318e-05, + "loss": 5.4129, + "step": 18500 + }, + { + "epoch": 0.07984641939385591, + "grad_norm": 2.386911392211914, + "learning_rate": 9.994359807061007e-05, + "loss": 5.716, + "step": 18550 + }, + { + "epoch": 0.08006163885313855, + "grad_norm": 1.0967060327529907, + "learning_rate": 9.994327627679525e-05, + "loss": 5.1382, + "step": 18600 + }, + { + "epoch": 0.08027685831242118, + "grad_norm": 2.006098508834839, + "learning_rate": 9.994295356813461e-05, + "loss": 5.9263, + "step": 18650 + }, + { + "epoch": 0.08049207777170381, + "grad_norm": 1.702653408050537, + "learning_rate": 9.994262994463407e-05, + "loss": 5.3092, + "step": 18700 + }, + { + "epoch": 0.08070729723098644, + "grad_norm": 1.5611636638641357, + "learning_rate": 9.994230540629955e-05, + "loss": 5.2208, + "step": 18750 + }, + { + "epoch": 0.08092251669026906, + "grad_norm": 1.5459251403808594, + "learning_rate": 9.994197995313699e-05, + "loss": 5.7667, + "step": 18800 + }, + { + "epoch": 0.08113773614955169, + "grad_norm": 2.4298837184906006, + "learning_rate": 9.994165358515235e-05, + "loss": 5.6655, + "step": 18850 + }, + { + "epoch": 0.08135295560883433, + "grad_norm": 1.7953922748565674, + "learning_rate": 9.994132630235162e-05, + "loss": 5.5188, + "step": 18900 + }, + { + "epoch": 0.08156817506811696, + "grad_norm": 0.7084425687789917, + "learning_rate": 9.994099810474078e-05, + "loss": 5.4348, + "step": 18950 + }, + { + "epoch": 0.08178339452739959, + "grad_norm": 3.0085363388061523, + "learning_rate": 9.994066899232587e-05, + "loss": 5.7936, + "step": 19000 + }, + { + "epoch": 0.08199861398668222, + "grad_norm": 2.9376282691955566, + "learning_rate": 9.994033896511289e-05, + "loss": 5.6467, + "step": 19050 + }, + { + "epoch": 0.08221383344596485, + "grad_norm": 0.9916555285453796, + "learning_rate": 9.99400080231079e-05, + "loss": 5.8205, + "step": 19100 + }, + { + "epoch": 0.08242905290524748, + "grad_norm": 2.3526439666748047, + "learning_rate": 9.993967616631695e-05, + "loss": 5.6243, + "step": 19150 + }, + { + "epoch": 0.0826442723645301, + "grad_norm": 1.9576694965362549, + "learning_rate": 9.993934339474613e-05, + "loss": 5.7917, + "step": 19200 + }, + { + "epoch": 0.08285949182381275, + "grad_norm": 2.034571409225464, + "learning_rate": 9.993900970840154e-05, + "loss": 5.6037, + "step": 19250 + }, + { + "epoch": 0.08307471128309538, + "grad_norm": 2.198061227798462, + "learning_rate": 9.993867510728928e-05, + "loss": 5.7302, + "step": 19300 + }, + { + "epoch": 0.083289930742378, + "grad_norm": 3.86657452583313, + "learning_rate": 9.993833959141549e-05, + "loss": 5.6247, + "step": 19350 + }, + { + "epoch": 0.08350515020166063, + "grad_norm": 3.477975606918335, + "learning_rate": 9.99380031607863e-05, + "loss": 5.9634, + "step": 19400 + }, + { + "epoch": 0.08372036966094326, + "grad_norm": 1.95694899559021, + "learning_rate": 9.993766581540788e-05, + "loss": 5.7867, + "step": 19450 + }, + { + "epoch": 0.08393558912022589, + "grad_norm": 1.624955177307129, + "learning_rate": 9.993732755528642e-05, + "loss": 5.2972, + "step": 19500 + }, + { + "epoch": 0.08415080857950852, + "grad_norm": 1.6742191314697266, + "learning_rate": 9.993698838042811e-05, + "loss": 6.0457, + "step": 19550 + }, + { + "epoch": 0.08436602803879116, + "grad_norm": 2.6992990970611572, + "learning_rate": 9.993664829083916e-05, + "loss": 5.9228, + "step": 19600 + }, + { + "epoch": 0.08458124749807379, + "grad_norm": 1.5121800899505615, + "learning_rate": 9.993630728652579e-05, + "loss": 5.6269, + "step": 19650 + }, + { + "epoch": 0.08479646695735642, + "grad_norm": 2.1630330085754395, + "learning_rate": 9.993596536749426e-05, + "loss": 5.105, + "step": 19700 + }, + { + "epoch": 0.08501168641663905, + "grad_norm": 2.2271671295166016, + "learning_rate": 9.993562253375084e-05, + "loss": 5.6632, + "step": 19750 + }, + { + "epoch": 0.08522690587592167, + "grad_norm": 0.4413912296295166, + "learning_rate": 9.99352787853018e-05, + "loss": 5.544, + "step": 19800 + }, + { + "epoch": 0.0854421253352043, + "grad_norm": 2.1797685623168945, + "learning_rate": 9.993493412215342e-05, + "loss": 5.8664, + "step": 19850 + }, + { + "epoch": 0.08565734479448694, + "grad_norm": 1.9431596994400024, + "learning_rate": 9.993458854431205e-05, + "loss": 5.5971, + "step": 19900 + }, + { + "epoch": 0.08587256425376957, + "grad_norm": 4.063226699829102, + "learning_rate": 9.9934242051784e-05, + "loss": 5.6564, + "step": 19950 + }, + { + "epoch": 0.0860877837130522, + "grad_norm": 2.522859573364258, + "learning_rate": 9.993389464457559e-05, + "loss": 5.8276, + "step": 20000 + }, + { + "epoch": 0.08630300317233483, + "grad_norm": 2.62758469581604, + "learning_rate": 9.993354632269323e-05, + "loss": 5.7523, + "step": 20050 + }, + { + "epoch": 0.08651822263161746, + "grad_norm": 3.3180532455444336, + "learning_rate": 9.993319708614328e-05, + "loss": 5.4692, + "step": 20100 + }, + { + "epoch": 0.08673344209090009, + "grad_norm": 2.749356746673584, + "learning_rate": 9.993284693493214e-05, + "loss": 5.0978, + "step": 20150 + }, + { + "epoch": 0.08694866155018272, + "grad_norm": 2.546121835708618, + "learning_rate": 9.993249586906622e-05, + "loss": 6.0128, + "step": 20200 + }, + { + "epoch": 0.08716388100946536, + "grad_norm": 2.2504751682281494, + "learning_rate": 9.993214388855196e-05, + "loss": 5.51, + "step": 20250 + }, + { + "epoch": 0.08737910046874799, + "grad_norm": 1.6568955183029175, + "learning_rate": 9.993179099339579e-05, + "loss": 5.8227, + "step": 20300 + }, + { + "epoch": 0.08759431992803061, + "grad_norm": 2.0389914512634277, + "learning_rate": 9.99314371836042e-05, + "loss": 5.5947, + "step": 20350 + }, + { + "epoch": 0.08780953938731324, + "grad_norm": 1.5150511264801025, + "learning_rate": 9.993108245918364e-05, + "loss": 5.5727, + "step": 20400 + }, + { + "epoch": 0.08802475884659587, + "grad_norm": 5.016895294189453, + "learning_rate": 9.993072682014061e-05, + "loss": 5.7186, + "step": 20450 + }, + { + "epoch": 0.0882399783058785, + "grad_norm": 2.4236650466918945, + "learning_rate": 9.993037026648167e-05, + "loss": 5.7055, + "step": 20500 + }, + { + "epoch": 0.08845519776516113, + "grad_norm": 2.5137975215911865, + "learning_rate": 9.993001279821331e-05, + "loss": 5.7467, + "step": 20550 + }, + { + "epoch": 0.08867041722444377, + "grad_norm": 2.5234992504119873, + "learning_rate": 9.992965441534209e-05, + "loss": 5.6135, + "step": 20600 + }, + { + "epoch": 0.0888856366837264, + "grad_norm": 1.5910043716430664, + "learning_rate": 9.992929511787457e-05, + "loss": 5.6351, + "step": 20650 + }, + { + "epoch": 0.08910085614300903, + "grad_norm": 0.20015203952789307, + "learning_rate": 9.992893490581732e-05, + "loss": 5.3147, + "step": 20700 + }, + { + "epoch": 0.08931607560229166, + "grad_norm": 4.25960636138916, + "learning_rate": 9.992857377917697e-05, + "loss": 5.7337, + "step": 20750 + }, + { + "epoch": 0.08953129506157428, + "grad_norm": 2.9144580364227295, + "learning_rate": 9.99282117379601e-05, + "loss": 5.6196, + "step": 20800 + }, + { + "epoch": 0.08974651452085691, + "grad_norm": 6.718020439147949, + "learning_rate": 9.992784878217338e-05, + "loss": 5.3115, + "step": 20850 + }, + { + "epoch": 0.08996173398013955, + "grad_norm": 3.80033802986145, + "learning_rate": 9.992748491182342e-05, + "loss": 5.7198, + "step": 20900 + }, + { + "epoch": 0.09017695343942218, + "grad_norm": 3.3106350898742676, + "learning_rate": 9.992712012691691e-05, + "loss": 5.3388, + "step": 20950 + }, + { + "epoch": 0.09039217289870481, + "grad_norm": 1.4637995958328247, + "learning_rate": 9.992675442746052e-05, + "loss": 5.8138, + "step": 21000 + }, + { + "epoch": 0.09039217289870481, + "eval_loss": 6.120312690734863, + "eval_runtime": 35.0593, + "eval_samples_per_second": 18.255, + "eval_steps_per_second": 9.127, + "eval_tts_loss": 5.989071580309767, + "step": 21000 + }, + { + "epoch": 0.09060739235798744, + "grad_norm": 2.7966606616973877, + "learning_rate": 9.992638781346096e-05, + "loss": 5.2825, + "step": 21050 + }, + { + "epoch": 0.09082261181727007, + "grad_norm": 1.386186122894287, + "learning_rate": 9.992602028492492e-05, + "loss": 5.9164, + "step": 21100 + }, + { + "epoch": 0.0910378312765527, + "grad_norm": 2.192592144012451, + "learning_rate": 9.992565184185918e-05, + "loss": 5.939, + "step": 21150 + }, + { + "epoch": 0.09125305073583533, + "grad_norm": 2.114048480987549, + "learning_rate": 9.992528248427045e-05, + "loss": 5.7321, + "step": 21200 + }, + { + "epoch": 0.09146827019511797, + "grad_norm": 2.5562314987182617, + "learning_rate": 9.992491221216549e-05, + "loss": 5.7839, + "step": 21250 + }, + { + "epoch": 0.0916834896544006, + "grad_norm": 2.040024757385254, + "learning_rate": 9.992454102555112e-05, + "loss": 5.4575, + "step": 21300 + }, + { + "epoch": 0.09189870911368322, + "grad_norm": 1.7115858793258667, + "learning_rate": 9.992416892443411e-05, + "loss": 5.011, + "step": 21350 + }, + { + "epoch": 0.09211392857296585, + "grad_norm": 1.5356016159057617, + "learning_rate": 9.992379590882126e-05, + "loss": 5.6164, + "step": 21400 + }, + { + "epoch": 0.09232914803224848, + "grad_norm": 0.6225360035896301, + "learning_rate": 9.992342197871945e-05, + "loss": 5.2713, + "step": 21450 + }, + { + "epoch": 0.09254436749153111, + "grad_norm": 1.8149142265319824, + "learning_rate": 9.99230471341355e-05, + "loss": 5.5159, + "step": 21500 + }, + { + "epoch": 0.09275958695081374, + "grad_norm": 2.482386589050293, + "learning_rate": 9.99226713750763e-05, + "loss": 5.1729, + "step": 21550 + }, + { + "epoch": 0.09297480641009638, + "grad_norm": 1.2547438144683838, + "learning_rate": 9.99222947015487e-05, + "loss": 5.5702, + "step": 21600 + }, + { + "epoch": 0.09319002586937901, + "grad_norm": 2.2927472591400146, + "learning_rate": 9.992191711355961e-05, + "loss": 5.7192, + "step": 21650 + }, + { + "epoch": 0.09340524532866164, + "grad_norm": 0.5083953738212585, + "learning_rate": 9.992153861111595e-05, + "loss": 5.7377, + "step": 21700 + }, + { + "epoch": 0.09362046478794427, + "grad_norm": 1.6559268236160278, + "learning_rate": 9.992115919422466e-05, + "loss": 5.4341, + "step": 21750 + }, + { + "epoch": 0.0938356842472269, + "grad_norm": 1.5492008924484253, + "learning_rate": 9.992077886289271e-05, + "loss": 6.1752, + "step": 21800 + }, + { + "epoch": 0.09405090370650952, + "grad_norm": 2.3303942680358887, + "learning_rate": 9.992039761712701e-05, + "loss": 5.4488, + "step": 21850 + }, + { + "epoch": 0.09426612316579216, + "grad_norm": 2.5752131938934326, + "learning_rate": 9.992001545693458e-05, + "loss": 5.4674, + "step": 21900 + }, + { + "epoch": 0.0944813426250748, + "grad_norm": 3.2236640453338623, + "learning_rate": 9.991963238232243e-05, + "loss": 5.2782, + "step": 21950 + }, + { + "epoch": 0.09469656208435742, + "grad_norm": 3.502394914627075, + "learning_rate": 9.991924839329755e-05, + "loss": 5.6292, + "step": 22000 + }, + { + "epoch": 0.09491178154364005, + "grad_norm": 2.5439321994781494, + "learning_rate": 9.991886348986699e-05, + "loss": 6.0257, + "step": 22050 + }, + { + "epoch": 0.09512700100292268, + "grad_norm": 3.3802876472473145, + "learning_rate": 9.991847767203779e-05, + "loss": 5.6352, + "step": 22100 + }, + { + "epoch": 0.09534222046220531, + "grad_norm": 3.3134350776672363, + "learning_rate": 9.991809093981704e-05, + "loss": 5.5393, + "step": 22150 + }, + { + "epoch": 0.09555743992148794, + "grad_norm": 2.8163442611694336, + "learning_rate": 9.99177032932118e-05, + "loss": 5.8707, + "step": 22200 + }, + { + "epoch": 0.09577265938077058, + "grad_norm": 3.161935329437256, + "learning_rate": 9.991731473222917e-05, + "loss": 5.6604, + "step": 22250 + }, + { + "epoch": 0.0959878788400532, + "grad_norm": 2.913591146469116, + "learning_rate": 9.991692525687628e-05, + "loss": 5.6775, + "step": 22300 + }, + { + "epoch": 0.09620309829933583, + "grad_norm": 1.6422141790390015, + "learning_rate": 9.991653486716028e-05, + "loss": 5.7927, + "step": 22350 + }, + { + "epoch": 0.09641831775861846, + "grad_norm": 2.1937448978424072, + "learning_rate": 9.991614356308828e-05, + "loss": 5.76, + "step": 22400 + }, + { + "epoch": 0.09663353721790109, + "grad_norm": 0.8868394494056702, + "learning_rate": 9.991575134466748e-05, + "loss": 5.5599, + "step": 22450 + }, + { + "epoch": 0.09684875667718372, + "grad_norm": 1.9727965593338013, + "learning_rate": 9.991535821190505e-05, + "loss": 5.686, + "step": 22500 + }, + { + "epoch": 0.09706397613646635, + "grad_norm": 1.336888074874878, + "learning_rate": 9.99149641648082e-05, + "loss": 5.2863, + "step": 22550 + }, + { + "epoch": 0.09727919559574899, + "grad_norm": 2.4324193000793457, + "learning_rate": 9.991456920338414e-05, + "loss": 5.9909, + "step": 22600 + }, + { + "epoch": 0.09749441505503162, + "grad_norm": 2.584671974182129, + "learning_rate": 9.991417332764012e-05, + "loss": 5.0569, + "step": 22650 + }, + { + "epoch": 0.09770963451431425, + "grad_norm": 1.7729185819625854, + "learning_rate": 9.991377653758336e-05, + "loss": 5.5733, + "step": 22700 + }, + { + "epoch": 0.09792485397359688, + "grad_norm": 1.3918360471725464, + "learning_rate": 9.991337883322116e-05, + "loss": 5.7868, + "step": 22750 + }, + { + "epoch": 0.0981400734328795, + "grad_norm": 1.7086223363876343, + "learning_rate": 9.99129802145608e-05, + "loss": 5.1525, + "step": 22800 + }, + { + "epoch": 0.09835529289216213, + "grad_norm": 3.210325241088867, + "learning_rate": 9.991258068160957e-05, + "loss": 5.6047, + "step": 22850 + }, + { + "epoch": 0.09857051235144478, + "grad_norm": 3.2450008392333984, + "learning_rate": 9.991218023437479e-05, + "loss": 5.8373, + "step": 22900 + }, + { + "epoch": 0.0987857318107274, + "grad_norm": 0.26299312710762024, + "learning_rate": 9.99117788728638e-05, + "loss": 5.7097, + "step": 22950 + }, + { + "epoch": 0.09900095127001003, + "grad_norm": 1.0478720664978027, + "learning_rate": 9.991137659708395e-05, + "loss": 5.5777, + "step": 23000 + }, + { + "epoch": 0.09921617072929266, + "grad_norm": 1.534126877784729, + "learning_rate": 9.99109734070426e-05, + "loss": 5.9176, + "step": 23050 + }, + { + "epoch": 0.09943139018857529, + "grad_norm": 2.496798038482666, + "learning_rate": 9.991056930274715e-05, + "loss": 5.3678, + "step": 23100 + }, + { + "epoch": 0.09964660964785792, + "grad_norm": 3.525470733642578, + "learning_rate": 9.991016428420501e-05, + "loss": 6.1621, + "step": 23150 + }, + { + "epoch": 0.09986182910714055, + "grad_norm": 1.5937589406967163, + "learning_rate": 9.990975835142359e-05, + "loss": 5.746, + "step": 23200 + }, + { + "epoch": 0.10007704856642319, + "grad_norm": 1.8206266164779663, + "learning_rate": 9.99093515044103e-05, + "loss": 5.3184, + "step": 23250 + }, + { + "epoch": 0.10029226802570582, + "grad_norm": 3.335700273513794, + "learning_rate": 9.990894374317263e-05, + "loss": 5.4627, + "step": 23300 + }, + { + "epoch": 0.10050748748498844, + "grad_norm": 0.22533506155014038, + "learning_rate": 9.990853506771804e-05, + "loss": 5.2765, + "step": 23350 + }, + { + "epoch": 0.10072270694427107, + "grad_norm": 2.7000696659088135, + "learning_rate": 9.990812547805399e-05, + "loss": 5.7232, + "step": 23400 + }, + { + "epoch": 0.1009379264035537, + "grad_norm": 5.4985504150390625, + "learning_rate": 9.990771497418803e-05, + "loss": 5.961, + "step": 23450 + }, + { + "epoch": 0.10115314586283633, + "grad_norm": 2.7308192253112793, + "learning_rate": 9.990730355612763e-05, + "loss": 5.6998, + "step": 23500 + }, + { + "epoch": 0.10136836532211896, + "grad_norm": 2.120668888092041, + "learning_rate": 9.990689122388035e-05, + "loss": 5.7961, + "step": 23550 + }, + { + "epoch": 0.1015835847814016, + "grad_norm": 2.378713369369507, + "learning_rate": 9.990647797745373e-05, + "loss": 5.6945, + "step": 23600 + }, + { + "epoch": 0.10179880424068423, + "grad_norm": 2.3826904296875, + "learning_rate": 9.99060638168554e-05, + "loss": 5.3905, + "step": 23650 + }, + { + "epoch": 0.10201402369996686, + "grad_norm": 3.502322196960449, + "learning_rate": 9.990564874209286e-05, + "loss": 4.9769, + "step": 23700 + }, + { + "epoch": 0.10222924315924949, + "grad_norm": 1.8315318822860718, + "learning_rate": 9.990523275317374e-05, + "loss": 5.683, + "step": 23750 + }, + { + "epoch": 0.10244446261853211, + "grad_norm": 3.910715103149414, + "learning_rate": 9.990481585010569e-05, + "loss": 5.9168, + "step": 23800 + }, + { + "epoch": 0.10265968207781474, + "grad_norm": 1.771957278251648, + "learning_rate": 9.990439803289631e-05, + "loss": 5.6204, + "step": 23850 + }, + { + "epoch": 0.10287490153709737, + "grad_norm": 3.0744545459747314, + "learning_rate": 9.990397930155328e-05, + "loss": 5.2489, + "step": 23900 + }, + { + "epoch": 0.10309012099638001, + "grad_norm": 2.2850568294525146, + "learning_rate": 9.990355965608427e-05, + "loss": 5.53, + "step": 23950 + }, + { + "epoch": 0.10330534045566264, + "grad_norm": 1.6761142015457153, + "learning_rate": 9.990313909649696e-05, + "loss": 5.7021, + "step": 24000 + }, + { + "epoch": 0.10330534045566264, + "eval_loss": 6.075467586517334, + "eval_runtime": 34.9692, + "eval_samples_per_second": 18.302, + "eval_steps_per_second": 9.151, + "eval_tts_loss": 6.074243620426464, + "step": 24000 + }, + { + "epoch": 0.10352055991494527, + "grad_norm": 1.0519886016845703, + "learning_rate": 9.990271762279904e-05, + "loss": 5.2414, + "step": 24050 + }, + { + "epoch": 0.1037357793742279, + "grad_norm": 1.4776898622512817, + "learning_rate": 9.990229523499825e-05, + "loss": 5.6575, + "step": 24100 + }, + { + "epoch": 0.10395099883351053, + "grad_norm": 2.172640800476074, + "learning_rate": 9.990187193310232e-05, + "loss": 5.4945, + "step": 24150 + }, + { + "epoch": 0.10416621829279316, + "grad_norm": 4.70146369934082, + "learning_rate": 9.9901447717119e-05, + "loss": 5.8684, + "step": 24200 + }, + { + "epoch": 0.1043814377520758, + "grad_norm": 3.53867244720459, + "learning_rate": 9.990102258705606e-05, + "loss": 5.2688, + "step": 24250 + }, + { + "epoch": 0.10459665721135843, + "grad_norm": 1.899337887763977, + "learning_rate": 9.99005965429213e-05, + "loss": 5.7164, + "step": 24300 + }, + { + "epoch": 0.10481187667064105, + "grad_norm": 3.4093098640441895, + "learning_rate": 9.99001695847225e-05, + "loss": 5.5363, + "step": 24350 + }, + { + "epoch": 0.10502709612992368, + "grad_norm": 2.4906914234161377, + "learning_rate": 9.989974171246753e-05, + "loss": 6.0748, + "step": 24400 + }, + { + "epoch": 0.10524231558920631, + "grad_norm": 2.1009206771850586, + "learning_rate": 9.989931292616416e-05, + "loss": 5.5356, + "step": 24450 + }, + { + "epoch": 0.10545753504848894, + "grad_norm": 2.2778830528259277, + "learning_rate": 9.989888322582028e-05, + "loss": 5.2965, + "step": 24500 + }, + { + "epoch": 0.10567275450777157, + "grad_norm": 2.0149364471435547, + "learning_rate": 9.989845261144376e-05, + "loss": 5.2788, + "step": 24550 + }, + { + "epoch": 0.10588797396705421, + "grad_norm": 2.501782178878784, + "learning_rate": 9.989802108304251e-05, + "loss": 5.3389, + "step": 24600 + }, + { + "epoch": 0.10610319342633684, + "grad_norm": 1.4465404748916626, + "learning_rate": 9.98975886406244e-05, + "loss": 5.4771, + "step": 24650 + }, + { + "epoch": 0.10631841288561947, + "grad_norm": 4.242234230041504, + "learning_rate": 9.989715528419736e-05, + "loss": 5.9209, + "step": 24700 + }, + { + "epoch": 0.1065336323449021, + "grad_norm": 3.2570533752441406, + "learning_rate": 9.989672101376933e-05, + "loss": 5.1908, + "step": 24750 + }, + { + "epoch": 0.10674885180418472, + "grad_norm": 1.3415498733520508, + "learning_rate": 9.989628582934827e-05, + "loss": 4.9377, + "step": 24800 + }, + { + "epoch": 0.10696407126346735, + "grad_norm": 2.7268407344818115, + "learning_rate": 9.989584973094215e-05, + "loss": 5.5421, + "step": 24850 + }, + { + "epoch": 0.10717929072274998, + "grad_norm": 2.801910638809204, + "learning_rate": 9.989541271855895e-05, + "loss": 5.7429, + "step": 24900 + }, + { + "epoch": 0.10739451018203262, + "grad_norm": 2.580946445465088, + "learning_rate": 9.989497479220669e-05, + "loss": 5.2451, + "step": 24950 + }, + { + "epoch": 0.10760972964131525, + "grad_norm": 2.577997922897339, + "learning_rate": 9.989453595189337e-05, + "loss": 5.7432, + "step": 25000 + }, + { + "epoch": 0.10782494910059788, + "grad_norm": 2.7716283798217773, + "learning_rate": 9.989409619762704e-05, + "loss": 5.1859, + "step": 25050 + }, + { + "epoch": 0.10804016855988051, + "grad_norm": 2.0244128704071045, + "learning_rate": 9.989365552941576e-05, + "loss": 5.8532, + "step": 25100 + }, + { + "epoch": 0.10825538801916314, + "grad_norm": 4.227988243103027, + "learning_rate": 9.989321394726762e-05, + "loss": 5.5699, + "step": 25150 + }, + { + "epoch": 0.10847060747844577, + "grad_norm": 2.5811126232147217, + "learning_rate": 9.989277145119067e-05, + "loss": 5.2247, + "step": 25200 + }, + { + "epoch": 0.10868582693772841, + "grad_norm": 1.5612678527832031, + "learning_rate": 9.989232804119303e-05, + "loss": 5.2196, + "step": 25250 + }, + { + "epoch": 0.10890104639701104, + "grad_norm": 2.0854220390319824, + "learning_rate": 9.989188371728283e-05, + "loss": 5.7522, + "step": 25300 + }, + { + "epoch": 0.10911626585629366, + "grad_norm": 2.1087236404418945, + "learning_rate": 9.98914384794682e-05, + "loss": 5.4463, + "step": 25350 + }, + { + "epoch": 0.1093314853155763, + "grad_norm": 3.4781177043914795, + "learning_rate": 9.989099232775731e-05, + "loss": 5.3947, + "step": 25400 + }, + { + "epoch": 0.10954670477485892, + "grad_norm": 1.7244888544082642, + "learning_rate": 9.989054526215831e-05, + "loss": 5.7542, + "step": 25450 + }, + { + "epoch": 0.10976192423414155, + "grad_norm": 3.2116589546203613, + "learning_rate": 9.989009728267942e-05, + "loss": 5.2863, + "step": 25500 + }, + { + "epoch": 0.10997714369342418, + "grad_norm": 0.5673274993896484, + "learning_rate": 9.988964838932882e-05, + "loss": 5.1009, + "step": 25550 + }, + { + "epoch": 0.11019236315270682, + "grad_norm": 1.0651121139526367, + "learning_rate": 9.988919858211473e-05, + "loss": 5.0074, + "step": 25600 + }, + { + "epoch": 0.11040758261198945, + "grad_norm": 2.651923894882202, + "learning_rate": 9.98887478610454e-05, + "loss": 5.6355, + "step": 25650 + }, + { + "epoch": 0.11062280207127208, + "grad_norm": 1.4448643922805786, + "learning_rate": 9.988829622612909e-05, + "loss": 5.4515, + "step": 25700 + }, + { + "epoch": 0.1108380215305547, + "grad_norm": 2.3571434020996094, + "learning_rate": 9.988784367737407e-05, + "loss": 5.5517, + "step": 25750 + }, + { + "epoch": 0.11105324098983733, + "grad_norm": 2.3411264419555664, + "learning_rate": 9.988739021478864e-05, + "loss": 5.625, + "step": 25800 + }, + { + "epoch": 0.11126846044911996, + "grad_norm": 2.2248804569244385, + "learning_rate": 9.988693583838108e-05, + "loss": 4.8392, + "step": 25850 + }, + { + "epoch": 0.11148367990840259, + "grad_norm": 1.4723479747772217, + "learning_rate": 9.988648054815975e-05, + "loss": 5.6298, + "step": 25900 + }, + { + "epoch": 0.11169889936768523, + "grad_norm": 1.6894710063934326, + "learning_rate": 9.988602434413295e-05, + "loss": 5.3824, + "step": 25950 + }, + { + "epoch": 0.11191411882696786, + "grad_norm": 3.0961272716522217, + "learning_rate": 9.988556722630904e-05, + "loss": 5.5773, + "step": 26000 + }, + { + "epoch": 0.11212933828625049, + "grad_norm": 2.9665780067443848, + "learning_rate": 9.988510919469643e-05, + "loss": 5.3521, + "step": 26050 + }, + { + "epoch": 0.11234455774553312, + "grad_norm": 1.2362552881240845, + "learning_rate": 9.988465024930348e-05, + "loss": 5.3812, + "step": 26100 + }, + { + "epoch": 0.11255977720481575, + "grad_norm": 1.274009346961975, + "learning_rate": 9.988419039013861e-05, + "loss": 5.4168, + "step": 26150 + }, + { + "epoch": 0.11277499666409838, + "grad_norm": 2.7908737659454346, + "learning_rate": 9.988372961721023e-05, + "loss": 5.4106, + "step": 26200 + }, + { + "epoch": 0.11299021612338102, + "grad_norm": 1.8592579364776611, + "learning_rate": 9.98832679305268e-05, + "loss": 5.1929, + "step": 26250 + }, + { + "epoch": 0.11320543558266365, + "grad_norm": 1.5714116096496582, + "learning_rate": 9.988280533009675e-05, + "loss": 5.4644, + "step": 26300 + }, + { + "epoch": 0.11342065504194628, + "grad_norm": 2.6589457988739014, + "learning_rate": 9.988234181592859e-05, + "loss": 5.7564, + "step": 26350 + }, + { + "epoch": 0.1136358745012289, + "grad_norm": 2.239645004272461, + "learning_rate": 9.988187738803077e-05, + "loss": 5.4467, + "step": 26400 + }, + { + "epoch": 0.11385109396051153, + "grad_norm": 1.60032320022583, + "learning_rate": 9.988141204641184e-05, + "loss": 5.9954, + "step": 26450 + }, + { + "epoch": 0.11406631341979416, + "grad_norm": 1.6965994834899902, + "learning_rate": 9.988094579108027e-05, + "loss": 5.9109, + "step": 26500 + }, + { + "epoch": 0.11428153287907679, + "grad_norm": 2.6602962017059326, + "learning_rate": 9.988047862204467e-05, + "loss": 5.8182, + "step": 26550 + }, + { + "epoch": 0.11449675233835943, + "grad_norm": 2.668867349624634, + "learning_rate": 9.988001053931353e-05, + "loss": 5.502, + "step": 26600 + }, + { + "epoch": 0.11471197179764206, + "grad_norm": 3.1338186264038086, + "learning_rate": 9.987954154289546e-05, + "loss": 5.1055, + "step": 26650 + }, + { + "epoch": 0.11492719125692469, + "grad_norm": 2.404160737991333, + "learning_rate": 9.987907163279905e-05, + "loss": 5.8189, + "step": 26700 + }, + { + "epoch": 0.11514241071620732, + "grad_norm": 0.6987460851669312, + "learning_rate": 9.987860080903289e-05, + "loss": 5.6674, + "step": 26750 + }, + { + "epoch": 0.11535763017548994, + "grad_norm": 2.199315071105957, + "learning_rate": 9.987812907160563e-05, + "loss": 5.8566, + "step": 26800 + }, + { + "epoch": 0.11557284963477257, + "grad_norm": 2.871995687484741, + "learning_rate": 9.987765642052589e-05, + "loss": 5.697, + "step": 26850 + }, + { + "epoch": 0.1157880690940552, + "grad_norm": 2.1545634269714355, + "learning_rate": 9.987718285580234e-05, + "loss": 5.2173, + "step": 26900 + }, + { + "epoch": 0.11600328855333784, + "grad_norm": 1.8766212463378906, + "learning_rate": 9.987670837744365e-05, + "loss": 5.6564, + "step": 26950 + }, + { + "epoch": 0.11621850801262047, + "grad_norm": 0.7649845480918884, + "learning_rate": 9.987623298545851e-05, + "loss": 5.0251, + "step": 27000 + }, + { + "epoch": 0.11621850801262047, + "eval_loss": 6.025885105133057, + "eval_runtime": 35.1463, + "eval_samples_per_second": 18.21, + "eval_steps_per_second": 9.105, + "eval_tts_loss": 6.0371585044848075, + "step": 27000 + }, + { + "epoch": 0.1164337274719031, + "grad_norm": 2.2062411308288574, + "learning_rate": 9.987575667985564e-05, + "loss": 5.5571, + "step": 27050 + }, + { + "epoch": 0.11664894693118573, + "grad_norm": 2.8346869945526123, + "learning_rate": 9.987527946064374e-05, + "loss": 5.529, + "step": 27100 + }, + { + "epoch": 0.11686416639046836, + "grad_norm": 2.1873340606689453, + "learning_rate": 9.987480132783158e-05, + "loss": 5.9728, + "step": 27150 + }, + { + "epoch": 0.11707938584975099, + "grad_norm": 1.9982357025146484, + "learning_rate": 9.987432228142789e-05, + "loss": 5.0546, + "step": 27200 + }, + { + "epoch": 0.11729460530903363, + "grad_norm": 1.6674128770828247, + "learning_rate": 9.987384232144146e-05, + "loss": 5.5633, + "step": 27250 + }, + { + "epoch": 0.11750982476831626, + "grad_norm": 3.05452036857605, + "learning_rate": 9.987336144788109e-05, + "loss": 5.6598, + "step": 27300 + }, + { + "epoch": 0.11772504422759889, + "grad_norm": 1.5572335720062256, + "learning_rate": 9.987287966075558e-05, + "loss": 5.4243, + "step": 27350 + }, + { + "epoch": 0.11794026368688151, + "grad_norm": 2.8993947505950928, + "learning_rate": 9.987239696007376e-05, + "loss": 5.3932, + "step": 27400 + }, + { + "epoch": 0.11815548314616414, + "grad_norm": 0.8210366368293762, + "learning_rate": 9.987191334584445e-05, + "loss": 5.3612, + "step": 27450 + }, + { + "epoch": 0.11837070260544677, + "grad_norm": 2.9093949794769287, + "learning_rate": 9.987142881807654e-05, + "loss": 5.5898, + "step": 27500 + }, + { + "epoch": 0.1185859220647294, + "grad_norm": 1.915183424949646, + "learning_rate": 9.987094337677888e-05, + "loss": 5.1732, + "step": 27550 + }, + { + "epoch": 0.11880114152401204, + "grad_norm": 1.4846135377883911, + "learning_rate": 9.987045702196039e-05, + "loss": 5.3181, + "step": 27600 + }, + { + "epoch": 0.11901636098329467, + "grad_norm": 1.4232251644134521, + "learning_rate": 9.986996975362994e-05, + "loss": 5.514, + "step": 27650 + }, + { + "epoch": 0.1192315804425773, + "grad_norm": 1.5308873653411865, + "learning_rate": 9.986948157179648e-05, + "loss": 5.5734, + "step": 27700 + }, + { + "epoch": 0.11944679990185993, + "grad_norm": 3.62681245803833, + "learning_rate": 9.986899247646897e-05, + "loss": 5.7271, + "step": 27750 + }, + { + "epoch": 0.11966201936114255, + "grad_norm": 1.4417561292648315, + "learning_rate": 9.986850246765634e-05, + "loss": 5.2856, + "step": 27800 + }, + { + "epoch": 0.11987723882042518, + "grad_norm": 1.7948182821273804, + "learning_rate": 9.986801154536755e-05, + "loss": 5.5233, + "step": 27850 + }, + { + "epoch": 0.12009245827970781, + "grad_norm": 1.6459102630615234, + "learning_rate": 9.986751970961166e-05, + "loss": 5.3729, + "step": 27900 + }, + { + "epoch": 0.12030767773899045, + "grad_norm": 2.9628498554229736, + "learning_rate": 9.98670269603976e-05, + "loss": 5.4728, + "step": 27950 + }, + { + "epoch": 0.12052289719827308, + "grad_norm": 0.5627707839012146, + "learning_rate": 9.986653329773446e-05, + "loss": 5.6092, + "step": 28000 + }, + { + "epoch": 0.12073811665755571, + "grad_norm": 1.7410998344421387, + "learning_rate": 9.986603872163124e-05, + "loss": 5.1737, + "step": 28050 + }, + { + "epoch": 0.12095333611683834, + "grad_norm": 2.4717376232147217, + "learning_rate": 9.986554323209702e-05, + "loss": 5.483, + "step": 28100 + }, + { + "epoch": 0.12116855557612097, + "grad_norm": 2.147458791732788, + "learning_rate": 9.986504682914089e-05, + "loss": 5.2685, + "step": 28150 + }, + { + "epoch": 0.1213837750354036, + "grad_norm": 2.1300036907196045, + "learning_rate": 9.986454951277189e-05, + "loss": 5.3851, + "step": 28200 + }, + { + "epoch": 0.12159899449468624, + "grad_norm": 2.2413763999938965, + "learning_rate": 9.986405128299919e-05, + "loss": 5.6036, + "step": 28250 + }, + { + "epoch": 0.12181421395396887, + "grad_norm": 3.2498347759246826, + "learning_rate": 9.986355213983188e-05, + "loss": 5.5892, + "step": 28300 + }, + { + "epoch": 0.1220294334132515, + "grad_norm": 2.18815016746521, + "learning_rate": 9.986305208327911e-05, + "loss": 5.7305, + "step": 28350 + }, + { + "epoch": 0.12224465287253412, + "grad_norm": 2.1016979217529297, + "learning_rate": 9.986255111335004e-05, + "loss": 5.301, + "step": 28400 + }, + { + "epoch": 0.12245987233181675, + "grad_norm": 2.5464978218078613, + "learning_rate": 9.986204923005387e-05, + "loss": 5.3999, + "step": 28450 + }, + { + "epoch": 0.12267509179109938, + "grad_norm": 1.9036459922790527, + "learning_rate": 9.986154643339976e-05, + "loss": 5.068, + "step": 28500 + }, + { + "epoch": 0.12289031125038201, + "grad_norm": 2.368666887283325, + "learning_rate": 9.986104272339693e-05, + "loss": 4.6744, + "step": 28550 + }, + { + "epoch": 0.12310553070966465, + "grad_norm": 0.8506936430931091, + "learning_rate": 9.986053810005461e-05, + "loss": 5.5365, + "step": 28600 + }, + { + "epoch": 0.12332075016894728, + "grad_norm": 2.039323091506958, + "learning_rate": 9.986003256338205e-05, + "loss": 5.1222, + "step": 28650 + }, + { + "epoch": 0.12353596962822991, + "grad_norm": 2.6029202938079834, + "learning_rate": 9.985952611338851e-05, + "loss": 5.2029, + "step": 28700 + }, + { + "epoch": 0.12375118908751254, + "grad_norm": 2.4960179328918457, + "learning_rate": 9.985901875008325e-05, + "loss": 5.5548, + "step": 28750 + }, + { + "epoch": 0.12396640854679516, + "grad_norm": 2.147050142288208, + "learning_rate": 9.985851047347559e-05, + "loss": 4.7654, + "step": 28800 + }, + { + "epoch": 0.1241816280060778, + "grad_norm": 2.5024757385253906, + "learning_rate": 9.985800128357481e-05, + "loss": 5.3153, + "step": 28850 + }, + { + "epoch": 0.12439684746536042, + "grad_norm": 0.26511257886886597, + "learning_rate": 9.985749118039025e-05, + "loss": 5.4374, + "step": 28900 + }, + { + "epoch": 0.12461206692464306, + "grad_norm": 2.3424553871154785, + "learning_rate": 9.985698016393128e-05, + "loss": 5.5801, + "step": 28950 + }, + { + "epoch": 0.12482728638392569, + "grad_norm": 2.643354654312134, + "learning_rate": 9.98564682342072e-05, + "loss": 5.5198, + "step": 29000 + }, + { + "epoch": 0.1250425058432083, + "grad_norm": 3.4021048545837402, + "learning_rate": 9.985595539122745e-05, + "loss": 5.5845, + "step": 29050 + }, + { + "epoch": 0.12525772530249096, + "grad_norm": 1.239449143409729, + "learning_rate": 9.98554416350014e-05, + "loss": 5.4986, + "step": 29100 + }, + { + "epoch": 0.1254729447617736, + "grad_norm": 1.8107184171676636, + "learning_rate": 9.985492696553844e-05, + "loss": 5.3732, + "step": 29150 + }, + { + "epoch": 0.12568816422105622, + "grad_norm": 1.9401241540908813, + "learning_rate": 9.985441138284803e-05, + "loss": 5.3811, + "step": 29200 + }, + { + "epoch": 0.12590338368033885, + "grad_norm": 1.6494169235229492, + "learning_rate": 9.985389488693961e-05, + "loss": 5.6759, + "step": 29250 + }, + { + "epoch": 0.12611860313962148, + "grad_norm": 2.3470914363861084, + "learning_rate": 9.985337747782262e-05, + "loss": 5.3519, + "step": 29300 + }, + { + "epoch": 0.1263338225989041, + "grad_norm": 2.154808521270752, + "learning_rate": 9.985285915550655e-05, + "loss": 5.4727, + "step": 29350 + }, + { + "epoch": 0.12654904205818673, + "grad_norm": 1.1886603832244873, + "learning_rate": 9.98523399200009e-05, + "loss": 5.4659, + "step": 29400 + }, + { + "epoch": 0.12676426151746936, + "grad_norm": 0.4835765063762665, + "learning_rate": 9.985181977131518e-05, + "loss": 5.4184, + "step": 29450 + }, + { + "epoch": 0.126979480976752, + "grad_norm": 2.3627161979675293, + "learning_rate": 9.98512987094589e-05, + "loss": 5.6021, + "step": 29500 + }, + { + "epoch": 0.12719470043603462, + "grad_norm": 2.5391452312469482, + "learning_rate": 9.985077673444163e-05, + "loss": 5.6602, + "step": 29550 + }, + { + "epoch": 0.12740991989531725, + "grad_norm": 3.06418514251709, + "learning_rate": 9.985025384627291e-05, + "loss": 5.1579, + "step": 29600 + }, + { + "epoch": 0.12762513935459988, + "grad_norm": 1.4721542596817017, + "learning_rate": 9.984973004496235e-05, + "loss": 5.4792, + "step": 29650 + }, + { + "epoch": 0.1278403588138825, + "grad_norm": 2.503422260284424, + "learning_rate": 9.984920533051949e-05, + "loss": 5.3315, + "step": 29700 + }, + { + "epoch": 0.12805557827316513, + "grad_norm": 2.0734918117523193, + "learning_rate": 9.984867970295399e-05, + "loss": 5.3065, + "step": 29750 + }, + { + "epoch": 0.1282707977324478, + "grad_norm": 1.437111735343933, + "learning_rate": 9.984815316227548e-05, + "loss": 5.8317, + "step": 29800 + }, + { + "epoch": 0.12848601719173042, + "grad_norm": 1.291813611984253, + "learning_rate": 9.984762570849356e-05, + "loss": 5.1701, + "step": 29850 + }, + { + "epoch": 0.12870123665101305, + "grad_norm": 2.494788885116577, + "learning_rate": 9.984709734161793e-05, + "loss": 5.2604, + "step": 29900 + }, + { + "epoch": 0.12891645611029567, + "grad_norm": 1.8712122440338135, + "learning_rate": 9.984656806165825e-05, + "loss": 5.4942, + "step": 29950 + }, + { + "epoch": 0.1291316755695783, + "grad_norm": 1.2522830963134766, + "learning_rate": 9.984603786862423e-05, + "loss": 5.3757, + "step": 30000 + }, + { + "epoch": 0.1291316755695783, + "eval_loss": 5.9812750816345215, + "eval_runtime": 34.8939, + "eval_samples_per_second": 18.341, + "eval_steps_per_second": 9.171, + "eval_tts_loss": 6.215888502622311, + "step": 30000 + }, + { + "epoch": 0.12934689502886093, + "grad_norm": 1.46137273311615, + "learning_rate": 9.984550676252557e-05, + "loss": 5.3207, + "step": 30050 + }, + { + "epoch": 0.12956211448814356, + "grad_norm": 1.4755154848098755, + "learning_rate": 9.984497474337201e-05, + "loss": 4.9415, + "step": 30100 + }, + { + "epoch": 0.1297773339474262, + "grad_norm": 4.713237762451172, + "learning_rate": 9.98444418111733e-05, + "loss": 6.03, + "step": 30150 + }, + { + "epoch": 0.12999255340670882, + "grad_norm": 2.6812822818756104, + "learning_rate": 9.984390796593917e-05, + "loss": 5.2702, + "step": 30200 + }, + { + "epoch": 0.13020777286599144, + "grad_norm": 2.0694351196289062, + "learning_rate": 9.984337320767943e-05, + "loss": 5.226, + "step": 30250 + }, + { + "epoch": 0.13042299232527407, + "grad_norm": 1.8143447637557983, + "learning_rate": 9.984283753640385e-05, + "loss": 5.3774, + "step": 30300 + }, + { + "epoch": 0.1306382117845567, + "grad_norm": 0.7100928425788879, + "learning_rate": 9.984230095212227e-05, + "loss": 5.6791, + "step": 30350 + }, + { + "epoch": 0.13085343124383933, + "grad_norm": 4.8559980392456055, + "learning_rate": 9.98417634548445e-05, + "loss": 5.3695, + "step": 30400 + }, + { + "epoch": 0.13106865070312199, + "grad_norm": 1.5761280059814453, + "learning_rate": 9.98412250445804e-05, + "loss": 5.3751, + "step": 30450 + }, + { + "epoch": 0.13128387016240461, + "grad_norm": 1.514512062072754, + "learning_rate": 9.984068572133982e-05, + "loss": 5.16, + "step": 30500 + }, + { + "epoch": 0.13149908962168724, + "grad_norm": 3.0511252880096436, + "learning_rate": 9.984014548513266e-05, + "loss": 5.2931, + "step": 30550 + }, + { + "epoch": 0.13171430908096987, + "grad_norm": 0.46110275387763977, + "learning_rate": 9.983960433596878e-05, + "loss": 5.5588, + "step": 30600 + }, + { + "epoch": 0.1319295285402525, + "grad_norm": 2.553818702697754, + "learning_rate": 9.983906227385811e-05, + "loss": 5.0803, + "step": 30650 + }, + { + "epoch": 0.13214474799953513, + "grad_norm": 1.1773079633712769, + "learning_rate": 9.983851929881059e-05, + "loss": 5.4283, + "step": 30700 + }, + { + "epoch": 0.13235996745881776, + "grad_norm": 1.9872972965240479, + "learning_rate": 9.983797541083615e-05, + "loss": 6.0116, + "step": 30750 + }, + { + "epoch": 0.13257518691810039, + "grad_norm": 0.6736693382263184, + "learning_rate": 9.983743060994478e-05, + "loss": 5.8161, + "step": 30800 + }, + { + "epoch": 0.132790406377383, + "grad_norm": 2.257844924926758, + "learning_rate": 9.983688489614643e-05, + "loss": 5.6275, + "step": 30850 + }, + { + "epoch": 0.13300562583666564, + "grad_norm": 2.536855936050415, + "learning_rate": 9.98363382694511e-05, + "loss": 5.4044, + "step": 30900 + }, + { + "epoch": 0.13322084529594827, + "grad_norm": 0.5928888916969299, + "learning_rate": 9.983579072986881e-05, + "loss": 5.5943, + "step": 30950 + }, + { + "epoch": 0.1334360647552309, + "grad_norm": 1.3909873962402344, + "learning_rate": 9.98352422774096e-05, + "loss": 5.2961, + "step": 31000 + }, + { + "epoch": 0.13365128421451353, + "grad_norm": 1.9300838708877563, + "learning_rate": 9.98346929120835e-05, + "loss": 5.7143, + "step": 31050 + }, + { + "epoch": 0.13386650367379618, + "grad_norm": 1.700229287147522, + "learning_rate": 9.983414263390056e-05, + "loss": 5.6343, + "step": 31100 + }, + { + "epoch": 0.1340817231330788, + "grad_norm": 1.397482991218567, + "learning_rate": 9.983359144287089e-05, + "loss": 5.6914, + "step": 31150 + }, + { + "epoch": 0.13429694259236144, + "grad_norm": 0.263315886259079, + "learning_rate": 9.983303933900459e-05, + "loss": 5.6216, + "step": 31200 + }, + { + "epoch": 0.13451216205164407, + "grad_norm": 1.4468061923980713, + "learning_rate": 9.983248632231174e-05, + "loss": 6.0275, + "step": 31250 + }, + { + "epoch": 0.1347273815109267, + "grad_norm": 3.4244213104248047, + "learning_rate": 9.983193239280248e-05, + "loss": 5.6754, + "step": 31300 + }, + { + "epoch": 0.13494260097020933, + "grad_norm": 3.2217063903808594, + "learning_rate": 9.9831377550487e-05, + "loss": 4.8805, + "step": 31350 + }, + { + "epoch": 0.13515782042949195, + "grad_norm": 1.6030455827713013, + "learning_rate": 9.98308217953754e-05, + "loss": 4.8489, + "step": 31400 + }, + { + "epoch": 0.13537303988877458, + "grad_norm": 2.1721973419189453, + "learning_rate": 9.983026512747789e-05, + "loss": 5.5673, + "step": 31450 + }, + { + "epoch": 0.1355882593480572, + "grad_norm": 2.56866717338562, + "learning_rate": 9.982970754680467e-05, + "loss": 5.5123, + "step": 31500 + }, + { + "epoch": 0.13580347880733984, + "grad_norm": 2.7944560050964355, + "learning_rate": 9.982914905336595e-05, + "loss": 5.6873, + "step": 31550 + }, + { + "epoch": 0.13601869826662247, + "grad_norm": 2.4799234867095947, + "learning_rate": 9.982858964717196e-05, + "loss": 4.9904, + "step": 31600 + }, + { + "epoch": 0.1362339177259051, + "grad_norm": 2.8318493366241455, + "learning_rate": 9.982802932823294e-05, + "loss": 5.7484, + "step": 31650 + }, + { + "epoch": 0.13644913718518772, + "grad_norm": 2.5355005264282227, + "learning_rate": 9.982746809655916e-05, + "loss": 5.0286, + "step": 31700 + }, + { + "epoch": 0.13666435664447035, + "grad_norm": 2.4060590267181396, + "learning_rate": 9.98269059521609e-05, + "loss": 5.5029, + "step": 31750 + }, + { + "epoch": 0.136879576103753, + "grad_norm": 1.8113024234771729, + "learning_rate": 9.982634289504846e-05, + "loss": 5.8076, + "step": 31800 + }, + { + "epoch": 0.13709479556303564, + "grad_norm": 1.5806828737258911, + "learning_rate": 9.982577892523215e-05, + "loss": 5.2911, + "step": 31850 + }, + { + "epoch": 0.13731001502231827, + "grad_norm": 2.3790087699890137, + "learning_rate": 9.982521404272231e-05, + "loss": 5.5066, + "step": 31900 + }, + { + "epoch": 0.1375252344816009, + "grad_norm": 2.193592071533203, + "learning_rate": 9.982464824752926e-05, + "loss": 5.6195, + "step": 31950 + }, + { + "epoch": 0.13774045394088352, + "grad_norm": 1.6138255596160889, + "learning_rate": 9.98240815396634e-05, + "loss": 5.7835, + "step": 32000 + }, + { + "epoch": 0.13795567340016615, + "grad_norm": 1.66805899143219, + "learning_rate": 9.98235139191351e-05, + "loss": 5.3861, + "step": 32050 + }, + { + "epoch": 0.13817089285944878, + "grad_norm": 1.4840621948242188, + "learning_rate": 9.982294538595474e-05, + "loss": 5.4733, + "step": 32100 + }, + { + "epoch": 0.1383861123187314, + "grad_norm": 1.447516679763794, + "learning_rate": 9.982237594013274e-05, + "loss": 5.4771, + "step": 32150 + }, + { + "epoch": 0.13860133177801404, + "grad_norm": 2.313643455505371, + "learning_rate": 9.982180558167953e-05, + "loss": 5.5887, + "step": 32200 + }, + { + "epoch": 0.13881655123729666, + "grad_norm": 2.5428316593170166, + "learning_rate": 9.982123431060559e-05, + "loss": 5.5504, + "step": 32250 + }, + { + "epoch": 0.1390317706965793, + "grad_norm": 2.1311452388763428, + "learning_rate": 9.982066212692136e-05, + "loss": 5.6135, + "step": 32300 + }, + { + "epoch": 0.13924699015586192, + "grad_norm": 0.9893867373466492, + "learning_rate": 9.982008903063729e-05, + "loss": 5.6237, + "step": 32350 + }, + { + "epoch": 0.13946220961514455, + "grad_norm": 1.552283763885498, + "learning_rate": 9.981951502176393e-05, + "loss": 5.5592, + "step": 32400 + }, + { + "epoch": 0.1396774290744272, + "grad_norm": 3.1434860229492188, + "learning_rate": 9.981894010031176e-05, + "loss": 5.5814, + "step": 32450 + }, + { + "epoch": 0.13989264853370983, + "grad_norm": 3.1417012214660645, + "learning_rate": 9.981836426629134e-05, + "loss": 5.3481, + "step": 32500 + }, + { + "epoch": 0.14010786799299246, + "grad_norm": 1.7028478384017944, + "learning_rate": 9.981778751971319e-05, + "loss": 5.2178, + "step": 32550 + }, + { + "epoch": 0.1403230874522751, + "grad_norm": 3.4213337898254395, + "learning_rate": 9.981720986058789e-05, + "loss": 5.1522, + "step": 32600 + }, + { + "epoch": 0.14053830691155772, + "grad_norm": 0.6536617279052734, + "learning_rate": 9.981663128892601e-05, + "loss": 5.0166, + "step": 32650 + }, + { + "epoch": 0.14075352637084035, + "grad_norm": 2.046649694442749, + "learning_rate": 9.981605180473816e-05, + "loss": 5.597, + "step": 32700 + }, + { + "epoch": 0.14096874583012298, + "grad_norm": 4.765674591064453, + "learning_rate": 9.981547140803496e-05, + "loss": 5.2152, + "step": 32750 + }, + { + "epoch": 0.1411839652894056, + "grad_norm": 2.2457516193389893, + "learning_rate": 9.981489009882702e-05, + "loss": 5.7226, + "step": 32800 + }, + { + "epoch": 0.14139918474868823, + "grad_norm": 1.6998268365859985, + "learning_rate": 9.9814307877125e-05, + "loss": 5.3634, + "step": 32850 + }, + { + "epoch": 0.14161440420797086, + "grad_norm": 2.59657621383667, + "learning_rate": 9.981372474293957e-05, + "loss": 5.3392, + "step": 32900 + }, + { + "epoch": 0.1418296236672535, + "grad_norm": 3.161486864089966, + "learning_rate": 9.981314069628141e-05, + "loss": 5.3462, + "step": 32950 + }, + { + "epoch": 0.14204484312653612, + "grad_norm": 3.8897647857666016, + "learning_rate": 9.98125557371612e-05, + "loss": 5.7316, + "step": 33000 + }, + { + "epoch": 0.14204484312653612, + "eval_loss": 5.929924011230469, + "eval_runtime": 35.1653, + "eval_samples_per_second": 18.2, + "eval_steps_per_second": 9.1, + "eval_tts_loss": 6.274720407382421, + "step": 33000 + }, + { + "epoch": 0.14226006258581875, + "grad_norm": 2.4234044551849365, + "learning_rate": 9.981196986558968e-05, + "loss": 5.6651, + "step": 33050 + }, + { + "epoch": 0.1424752820451014, + "grad_norm": 3.006497621536255, + "learning_rate": 9.981138308157758e-05, + "loss": 5.3141, + "step": 33100 + }, + { + "epoch": 0.14269050150438403, + "grad_norm": 2.4793994426727295, + "learning_rate": 9.981079538513562e-05, + "loss": 5.581, + "step": 33150 + }, + { + "epoch": 0.14290572096366666, + "grad_norm": 3.0280158519744873, + "learning_rate": 9.981020677627462e-05, + "loss": 5.6563, + "step": 33200 + }, + { + "epoch": 0.1431209404229493, + "grad_norm": 0.9516535997390747, + "learning_rate": 9.980961725500531e-05, + "loss": 5.2644, + "step": 33250 + }, + { + "epoch": 0.14333615988223192, + "grad_norm": 2.5305702686309814, + "learning_rate": 9.980902682133849e-05, + "loss": 5.7184, + "step": 33300 + }, + { + "epoch": 0.14355137934151455, + "grad_norm": 4.616020679473877, + "learning_rate": 9.9808435475285e-05, + "loss": 5.158, + "step": 33350 + }, + { + "epoch": 0.14376659880079717, + "grad_norm": 2.222309112548828, + "learning_rate": 9.980784321685568e-05, + "loss": 6.0574, + "step": 33400 + }, + { + "epoch": 0.1439818182600798, + "grad_norm": 1.5424367189407349, + "learning_rate": 9.980725004606135e-05, + "loss": 5.5933, + "step": 33450 + }, + { + "epoch": 0.14419703771936243, + "grad_norm": 0.8546040058135986, + "learning_rate": 9.98066559629129e-05, + "loss": 5.3472, + "step": 33500 + }, + { + "epoch": 0.14441225717864506, + "grad_norm": 0.9510634541511536, + "learning_rate": 9.980606096742117e-05, + "loss": 5.2325, + "step": 33550 + }, + { + "epoch": 0.1446274766379277, + "grad_norm": 2.7242398262023926, + "learning_rate": 9.980546505959711e-05, + "loss": 5.2621, + "step": 33600 + }, + { + "epoch": 0.14484269609721032, + "grad_norm": 0.7571353912353516, + "learning_rate": 9.980486823945163e-05, + "loss": 4.7892, + "step": 33650 + }, + { + "epoch": 0.14505791555649294, + "grad_norm": 1.459936499595642, + "learning_rate": 9.980427050699564e-05, + "loss": 5.5899, + "step": 33700 + }, + { + "epoch": 0.14527313501577557, + "grad_norm": 0.3951803743839264, + "learning_rate": 9.980367186224008e-05, + "loss": 5.3712, + "step": 33750 + }, + { + "epoch": 0.14548835447505823, + "grad_norm": 2.601038932800293, + "learning_rate": 9.980307230519595e-05, + "loss": 5.7103, + "step": 33800 + }, + { + "epoch": 0.14570357393434086, + "grad_norm": 2.958812952041626, + "learning_rate": 9.98024718358742e-05, + "loss": 5.0819, + "step": 33850 + }, + { + "epoch": 0.14591879339362349, + "grad_norm": 2.6120412349700928, + "learning_rate": 9.980187045428586e-05, + "loss": 5.3419, + "step": 33900 + }, + { + "epoch": 0.14613401285290611, + "grad_norm": 2.8739500045776367, + "learning_rate": 9.98012681604419e-05, + "loss": 5.2744, + "step": 33950 + }, + { + "epoch": 0.14634923231218874, + "grad_norm": 3.3018507957458496, + "learning_rate": 9.980066495435342e-05, + "loss": 5.7554, + "step": 34000 + }, + { + "epoch": 0.14656445177147137, + "grad_norm": 6.291999340057373, + "learning_rate": 9.980006083603142e-05, + "loss": 5.7123, + "step": 34050 + }, + { + "epoch": 0.146779671230754, + "grad_norm": 2.165609836578369, + "learning_rate": 9.979945580548698e-05, + "loss": 5.4368, + "step": 34100 + }, + { + "epoch": 0.14699489069003663, + "grad_norm": 2.3420979976654053, + "learning_rate": 9.979884986273116e-05, + "loss": 5.2801, + "step": 34150 + }, + { + "epoch": 0.14721011014931926, + "grad_norm": 1.4355032444000244, + "learning_rate": 9.97982430077751e-05, + "loss": 4.8028, + "step": 34200 + }, + { + "epoch": 0.14742532960860188, + "grad_norm": 1.4355089664459229, + "learning_rate": 9.97976352406299e-05, + "loss": 4.9769, + "step": 34250 + }, + { + "epoch": 0.1476405490678845, + "grad_norm": 1.3880969285964966, + "learning_rate": 9.979702656130667e-05, + "loss": 5.1394, + "step": 34300 + }, + { + "epoch": 0.14785576852716714, + "grad_norm": 3.255547523498535, + "learning_rate": 9.97964169698166e-05, + "loss": 5.7756, + "step": 34350 + }, + { + "epoch": 0.14807098798644977, + "grad_norm": 2.291508674621582, + "learning_rate": 9.979580646617081e-05, + "loss": 5.3929, + "step": 34400 + }, + { + "epoch": 0.14828620744573243, + "grad_norm": 2.3118536472320557, + "learning_rate": 9.979519505038053e-05, + "loss": 5.5905, + "step": 34450 + }, + { + "epoch": 0.14850142690501505, + "grad_norm": 3.168945074081421, + "learning_rate": 9.979458272245694e-05, + "loss": 5.3414, + "step": 34500 + }, + { + "epoch": 0.14871664636429768, + "grad_norm": 1.9594351053237915, + "learning_rate": 9.979396948241125e-05, + "loss": 5.6222, + "step": 34550 + }, + { + "epoch": 0.1489318658235803, + "grad_norm": 1.4581081867218018, + "learning_rate": 9.979335533025469e-05, + "loss": 5.3751, + "step": 34600 + }, + { + "epoch": 0.14914708528286294, + "grad_norm": 1.7096333503723145, + "learning_rate": 9.979274026599852e-05, + "loss": 5.2099, + "step": 34650 + }, + { + "epoch": 0.14936230474214557, + "grad_norm": 1.6439261436462402, + "learning_rate": 9.9792124289654e-05, + "loss": 4.928, + "step": 34700 + }, + { + "epoch": 0.1495775242014282, + "grad_norm": 3.5352048873901367, + "learning_rate": 9.979150740123245e-05, + "loss": 5.2443, + "step": 34750 + }, + { + "epoch": 0.14979274366071083, + "grad_norm": 1.423115849494934, + "learning_rate": 9.97908896007451e-05, + "loss": 5.443, + "step": 34800 + }, + { + "epoch": 0.15000796311999345, + "grad_norm": 1.502456545829773, + "learning_rate": 9.979027088820331e-05, + "loss": 5.7151, + "step": 34850 + }, + { + "epoch": 0.15022318257927608, + "grad_norm": 1.9039199352264404, + "learning_rate": 9.978965126361841e-05, + "loss": 5.57, + "step": 34900 + }, + { + "epoch": 0.1504384020385587, + "grad_norm": 3.0958895683288574, + "learning_rate": 9.978903072700176e-05, + "loss": 5.472, + "step": 34950 + }, + { + "epoch": 0.15065362149784134, + "grad_norm": 2.238967180252075, + "learning_rate": 9.97884092783647e-05, + "loss": 5.7927, + "step": 35000 + }, + { + "epoch": 0.15086884095712397, + "grad_norm": 3.374598264694214, + "learning_rate": 9.978778691771863e-05, + "loss": 5.1862, + "step": 35050 + }, + { + "epoch": 0.15108406041640662, + "grad_norm": 0.41951072216033936, + "learning_rate": 9.978716364507495e-05, + "loss": 5.6541, + "step": 35100 + }, + { + "epoch": 0.15129927987568925, + "grad_norm": 1.4936010837554932, + "learning_rate": 9.978653946044507e-05, + "loss": 5.8178, + "step": 35150 + }, + { + "epoch": 0.15151449933497188, + "grad_norm": 1.3657845258712769, + "learning_rate": 9.978591436384044e-05, + "loss": 5.5838, + "step": 35200 + }, + { + "epoch": 0.1517297187942545, + "grad_norm": 3.2133371829986572, + "learning_rate": 9.97852883552725e-05, + "loss": 5.6795, + "step": 35250 + }, + { + "epoch": 0.15194493825353714, + "grad_norm": 1.761927604675293, + "learning_rate": 9.97846614347527e-05, + "loss": 5.5125, + "step": 35300 + }, + { + "epoch": 0.15216015771281977, + "grad_norm": 1.6710435152053833, + "learning_rate": 9.978403360229254e-05, + "loss": 5.2356, + "step": 35350 + }, + { + "epoch": 0.1523753771721024, + "grad_norm": 2.6121397018432617, + "learning_rate": 9.978340485790354e-05, + "loss": 5.2994, + "step": 35400 + }, + { + "epoch": 0.15259059663138502, + "grad_norm": 2.96205997467041, + "learning_rate": 9.978277520159718e-05, + "loss": 5.692, + "step": 35450 + }, + { + "epoch": 0.15280581609066765, + "grad_norm": 1.3443952798843384, + "learning_rate": 9.978214463338502e-05, + "loss": 5.2018, + "step": 35500 + }, + { + "epoch": 0.15302103554995028, + "grad_norm": 2.102646827697754, + "learning_rate": 9.978151315327859e-05, + "loss": 5.67, + "step": 35550 + }, + { + "epoch": 0.1532362550092329, + "grad_norm": 0.8915745615959167, + "learning_rate": 9.978088076128949e-05, + "loss": 5.2608, + "step": 35600 + }, + { + "epoch": 0.15345147446851554, + "grad_norm": 2.6986873149871826, + "learning_rate": 9.978024745742926e-05, + "loss": 5.6509, + "step": 35650 + }, + { + "epoch": 0.15366669392779816, + "grad_norm": 4.246592998504639, + "learning_rate": 9.977961324170955e-05, + "loss": 5.6828, + "step": 35700 + }, + { + "epoch": 0.1538819133870808, + "grad_norm": 2.2997899055480957, + "learning_rate": 9.977897811414192e-05, + "loss": 5.7015, + "step": 35750 + }, + { + "epoch": 0.15409713284636345, + "grad_norm": 2.178666830062866, + "learning_rate": 9.977834207473805e-05, + "loss": 5.3134, + "step": 35800 + }, + { + "epoch": 0.15431235230564608, + "grad_norm": 4.692437171936035, + "learning_rate": 9.977770512350958e-05, + "loss": 5.0401, + "step": 35850 + }, + { + "epoch": 0.1545275717649287, + "grad_norm": 0.7968437671661377, + "learning_rate": 9.977706726046817e-05, + "loss": 4.956, + "step": 35900 + }, + { + "epoch": 0.15474279122421133, + "grad_norm": 1.5475752353668213, + "learning_rate": 9.977642848562551e-05, + "loss": 5.6032, + "step": 35950 + }, + { + "epoch": 0.15495801068349396, + "grad_norm": 1.921683430671692, + "learning_rate": 9.97757887989933e-05, + "loss": 5.6502, + "step": 36000 + }, + { + "epoch": 0.15495801068349396, + "eval_loss": 5.9112629890441895, + "eval_runtime": 35.0143, + "eval_samples_per_second": 18.278, + "eval_steps_per_second": 9.139, + "eval_tts_loss": 6.190609746281781, + "step": 36000 + }, + { + "epoch": 0.1551732301427766, + "grad_norm": 0.5593498945236206, + "learning_rate": 9.977514820058325e-05, + "loss": 5.5311, + "step": 36050 + }, + { + "epoch": 0.15538844960205922, + "grad_norm": 1.648179054260254, + "learning_rate": 9.977450669040709e-05, + "loss": 5.0727, + "step": 36100 + }, + { + "epoch": 0.15560366906134185, + "grad_norm": 1.225598931312561, + "learning_rate": 9.977386426847659e-05, + "loss": 5.4417, + "step": 36150 + }, + { + "epoch": 0.15581888852062448, + "grad_norm": 4.123205184936523, + "learning_rate": 9.977322093480351e-05, + "loss": 5.6039, + "step": 36200 + }, + { + "epoch": 0.1560341079799071, + "grad_norm": 5.625216007232666, + "learning_rate": 9.977257668939964e-05, + "loss": 5.3753, + "step": 36250 + }, + { + "epoch": 0.15624932743918973, + "grad_norm": 1.8246991634368896, + "learning_rate": 9.977193153227678e-05, + "loss": 5.7092, + "step": 36300 + }, + { + "epoch": 0.15646454689847236, + "grad_norm": 2.255547523498535, + "learning_rate": 9.977128546344672e-05, + "loss": 5.7265, + "step": 36350 + }, + { + "epoch": 0.156679766357755, + "grad_norm": 1.492177128791809, + "learning_rate": 9.977063848292132e-05, + "loss": 5.4008, + "step": 36400 + }, + { + "epoch": 0.15689498581703765, + "grad_norm": 2.5898587703704834, + "learning_rate": 9.976999059071243e-05, + "loss": 5.1462, + "step": 36450 + }, + { + "epoch": 0.15711020527632027, + "grad_norm": 2.1698334217071533, + "learning_rate": 9.976934178683192e-05, + "loss": 5.3856, + "step": 36500 + }, + { + "epoch": 0.1573254247356029, + "grad_norm": 1.5614228248596191, + "learning_rate": 9.97686920712917e-05, + "loss": 5.2898, + "step": 36550 + }, + { + "epoch": 0.15754064419488553, + "grad_norm": 3.1924962997436523, + "learning_rate": 9.97680414441036e-05, + "loss": 4.9386, + "step": 36600 + }, + { + "epoch": 0.15775586365416816, + "grad_norm": 2.240344762802124, + "learning_rate": 9.976738990527958e-05, + "loss": 5.5171, + "step": 36650 + }, + { + "epoch": 0.1579710831134508, + "grad_norm": 1.5970457792282104, + "learning_rate": 9.976673745483161e-05, + "loss": 4.759, + "step": 36700 + }, + { + "epoch": 0.15818630257273342, + "grad_norm": 1.3828089237213135, + "learning_rate": 9.976608409277157e-05, + "loss": 5.6552, + "step": 36750 + }, + { + "epoch": 0.15840152203201605, + "grad_norm": 2.226567029953003, + "learning_rate": 9.976542981911147e-05, + "loss": 5.4942, + "step": 36800 + }, + { + "epoch": 0.15861674149129867, + "grad_norm": 1.6933034658432007, + "learning_rate": 9.97647746338633e-05, + "loss": 5.3904, + "step": 36850 + }, + { + "epoch": 0.1588319609505813, + "grad_norm": 1.5187500715255737, + "learning_rate": 9.976411853703905e-05, + "loss": 5.5622, + "step": 36900 + }, + { + "epoch": 0.15904718040986393, + "grad_norm": 2.3954992294311523, + "learning_rate": 9.976346152865072e-05, + "loss": 5.5178, + "step": 36950 + }, + { + "epoch": 0.15926239986914656, + "grad_norm": 2.057668924331665, + "learning_rate": 9.976280360871039e-05, + "loss": 4.9503, + "step": 37000 + }, + { + "epoch": 0.1594776193284292, + "grad_norm": 1.8565423488616943, + "learning_rate": 9.976214477723007e-05, + "loss": 5.5459, + "step": 37050 + }, + { + "epoch": 0.15969283878771182, + "grad_norm": 0.8272342681884766, + "learning_rate": 9.976148503422185e-05, + "loss": 4.8183, + "step": 37100 + }, + { + "epoch": 0.15990805824699447, + "grad_norm": 3.098938226699829, + "learning_rate": 9.97608243796978e-05, + "loss": 5.4898, + "step": 37150 + }, + { + "epoch": 0.1601232777062771, + "grad_norm": 2.336383819580078, + "learning_rate": 9.976016281367002e-05, + "loss": 5.3087, + "step": 37200 + }, + { + "epoch": 0.16033849716555973, + "grad_norm": 0.3424004912376404, + "learning_rate": 9.975950033615066e-05, + "loss": 4.9581, + "step": 37250 + }, + { + "epoch": 0.16055371662484236, + "grad_norm": 2.729297637939453, + "learning_rate": 9.975883694715182e-05, + "loss": 5.5309, + "step": 37300 + }, + { + "epoch": 0.16076893608412499, + "grad_norm": 2.7886335849761963, + "learning_rate": 9.975817264668567e-05, + "loss": 5.3039, + "step": 37350 + }, + { + "epoch": 0.16098415554340761, + "grad_norm": 4.871828079223633, + "learning_rate": 9.975750743476437e-05, + "loss": 5.3582, + "step": 37400 + }, + { + "epoch": 0.16119937500269024, + "grad_norm": 1.5429518222808838, + "learning_rate": 9.97568413114001e-05, + "loss": 5.6095, + "step": 37450 + }, + { + "epoch": 0.16141459446197287, + "grad_norm": 0.6986168026924133, + "learning_rate": 9.975617427660508e-05, + "loss": 5.9651, + "step": 37500 + }, + { + "epoch": 0.1616298139212555, + "grad_norm": 1.876977801322937, + "learning_rate": 9.975550633039152e-05, + "loss": 5.4918, + "step": 37550 + }, + { + "epoch": 0.16184503338053813, + "grad_norm": 1.5410807132720947, + "learning_rate": 9.975483747277165e-05, + "loss": 5.504, + "step": 37600 + }, + { + "epoch": 0.16206025283982076, + "grad_norm": 1.4334187507629395, + "learning_rate": 9.975416770375774e-05, + "loss": 5.3349, + "step": 37650 + }, + { + "epoch": 0.16227547229910338, + "grad_norm": 1.606478214263916, + "learning_rate": 9.975349702336203e-05, + "loss": 5.6279, + "step": 37700 + }, + { + "epoch": 0.162490691758386, + "grad_norm": 2.4072859287261963, + "learning_rate": 9.975282543159683e-05, + "loss": 5.4816, + "step": 37750 + }, + { + "epoch": 0.16270591121766867, + "grad_norm": 2.2514188289642334, + "learning_rate": 9.975215292847441e-05, + "loss": 5.8864, + "step": 37800 + }, + { + "epoch": 0.1629211306769513, + "grad_norm": 0.2650400698184967, + "learning_rate": 9.975147951400714e-05, + "loss": 5.5612, + "step": 37850 + }, + { + "epoch": 0.16313635013623393, + "grad_norm": 1.2410880327224731, + "learning_rate": 9.975080518820731e-05, + "loss": 5.1474, + "step": 37900 + }, + { + "epoch": 0.16335156959551655, + "grad_norm": 1.8060237169265747, + "learning_rate": 9.975012995108728e-05, + "loss": 5.794, + "step": 37950 + }, + { + "epoch": 0.16356678905479918, + "grad_norm": 2.890559196472168, + "learning_rate": 9.974945380265946e-05, + "loss": 4.7735, + "step": 38000 + }, + { + "epoch": 0.1637820085140818, + "grad_norm": 1.184944987297058, + "learning_rate": 9.974877674293617e-05, + "loss": 5.352, + "step": 38050 + }, + { + "epoch": 0.16399722797336444, + "grad_norm": 1.8469971418380737, + "learning_rate": 9.974809877192985e-05, + "loss": 5.1287, + "step": 38100 + }, + { + "epoch": 0.16421244743264707, + "grad_norm": 1.6654694080352783, + "learning_rate": 9.974741988965292e-05, + "loss": 5.7288, + "step": 38150 + }, + { + "epoch": 0.1644276668919297, + "grad_norm": 0.8104584217071533, + "learning_rate": 9.97467400961178e-05, + "loss": 5.25, + "step": 38200 + }, + { + "epoch": 0.16464288635121233, + "grad_norm": 3.234036684036255, + "learning_rate": 9.974605939133697e-05, + "loss": 5.4808, + "step": 38250 + }, + { + "epoch": 0.16485810581049495, + "grad_norm": 0.8953971862792969, + "learning_rate": 9.974537777532287e-05, + "loss": 5.334, + "step": 38300 + }, + { + "epoch": 0.16507332526977758, + "grad_norm": 2.12491774559021, + "learning_rate": 9.974469524808798e-05, + "loss": 5.2119, + "step": 38350 + }, + { + "epoch": 0.1652885447290602, + "grad_norm": 1.3705114126205444, + "learning_rate": 9.974401180964483e-05, + "loss": 5.2791, + "step": 38400 + }, + { + "epoch": 0.16550376418834287, + "grad_norm": 1.495818853378296, + "learning_rate": 9.974332746000594e-05, + "loss": 5.4718, + "step": 38450 + }, + { + "epoch": 0.1657189836476255, + "grad_norm": 1.4970543384552002, + "learning_rate": 9.974264219918381e-05, + "loss": 5.5372, + "step": 38500 + }, + { + "epoch": 0.16593420310690812, + "grad_norm": 3.288795232772827, + "learning_rate": 9.974195602719103e-05, + "loss": 5.2175, + "step": 38550 + }, + { + "epoch": 0.16614942256619075, + "grad_norm": 1.8418917655944824, + "learning_rate": 9.974126894404015e-05, + "loss": 5.1551, + "step": 38600 + }, + { + "epoch": 0.16636464202547338, + "grad_norm": 2.634288787841797, + "learning_rate": 9.974058094974377e-05, + "loss": 5.2052, + "step": 38650 + }, + { + "epoch": 0.166579861484756, + "grad_norm": 2.195910930633545, + "learning_rate": 9.973989204431448e-05, + "loss": 5.5815, + "step": 38700 + }, + { + "epoch": 0.16679508094403864, + "grad_norm": 2.288658618927002, + "learning_rate": 9.973920222776489e-05, + "loss": 5.5911, + "step": 38750 + }, + { + "epoch": 0.16701030040332127, + "grad_norm": 1.7316340208053589, + "learning_rate": 9.973851150010765e-05, + "loss": 5.42, + "step": 38800 + }, + { + "epoch": 0.1672255198626039, + "grad_norm": 2.767078161239624, + "learning_rate": 9.973781986135543e-05, + "loss": 5.3884, + "step": 38850 + }, + { + "epoch": 0.16744073932188652, + "grad_norm": 0.4098517596721649, + "learning_rate": 9.973712731152085e-05, + "loss": 5.0994, + "step": 38900 + }, + { + "epoch": 0.16765595878116915, + "grad_norm": 0.8560373783111572, + "learning_rate": 9.973643385061664e-05, + "loss": 4.8645, + "step": 38950 + }, + { + "epoch": 0.16787117824045178, + "grad_norm": 2.0795884132385254, + "learning_rate": 9.97357394786555e-05, + "loss": 5.3843, + "step": 39000 + }, + { + "epoch": 0.16787117824045178, + "eval_loss": 5.870829105377197, + "eval_runtime": 34.7296, + "eval_samples_per_second": 18.428, + "eval_steps_per_second": 9.214, + "eval_tts_loss": 6.272877175742313, + "step": 39000 + }, + { + "epoch": 0.1680863976997344, + "grad_norm": 1.8641045093536377, + "learning_rate": 9.973504419565012e-05, + "loss": 5.0595, + "step": 39050 + }, + { + "epoch": 0.16830161715901704, + "grad_norm": 2.2022879123687744, + "learning_rate": 9.973434800161325e-05, + "loss": 5.0, + "step": 39100 + }, + { + "epoch": 0.1685168366182997, + "grad_norm": 2.1489479541778564, + "learning_rate": 9.973365089655766e-05, + "loss": 5.4008, + "step": 39150 + }, + { + "epoch": 0.16873205607758232, + "grad_norm": 1.7100859880447388, + "learning_rate": 9.973295288049609e-05, + "loss": 5.222, + "step": 39200 + }, + { + "epoch": 0.16894727553686495, + "grad_norm": 1.5498934984207153, + "learning_rate": 9.973225395344135e-05, + "loss": 5.2346, + "step": 39250 + }, + { + "epoch": 0.16916249499614758, + "grad_norm": 4.73895788192749, + "learning_rate": 9.973155411540624e-05, + "loss": 5.2512, + "step": 39300 + }, + { + "epoch": 0.1693777144554302, + "grad_norm": 2.2389514446258545, + "learning_rate": 9.973085336640358e-05, + "loss": 5.2602, + "step": 39350 + }, + { + "epoch": 0.16959293391471283, + "grad_norm": 1.3899948596954346, + "learning_rate": 9.973015170644617e-05, + "loss": 5.233, + "step": 39400 + }, + { + "epoch": 0.16980815337399546, + "grad_norm": 1.5831886529922485, + "learning_rate": 9.972944913554692e-05, + "loss": 5.0764, + "step": 39450 + }, + { + "epoch": 0.1700233728332781, + "grad_norm": 2.452754259109497, + "learning_rate": 9.972874565371865e-05, + "loss": 5.4594, + "step": 39500 + }, + { + "epoch": 0.17023859229256072, + "grad_norm": 3.4412734508514404, + "learning_rate": 9.972804126097428e-05, + "loss": 4.8977, + "step": 39550 + }, + { + "epoch": 0.17045381175184335, + "grad_norm": 1.4504958391189575, + "learning_rate": 9.972733595732671e-05, + "loss": 5.386, + "step": 39600 + }, + { + "epoch": 0.17066903121112598, + "grad_norm": 2.148068904876709, + "learning_rate": 9.972662974278884e-05, + "loss": 5.5172, + "step": 39650 + }, + { + "epoch": 0.1708842506704086, + "grad_norm": 1.2441363334655762, + "learning_rate": 9.972592261737361e-05, + "loss": 5.4052, + "step": 39700 + }, + { + "epoch": 0.17109947012969123, + "grad_norm": 1.8839558362960815, + "learning_rate": 9.9725214581094e-05, + "loss": 5.5614, + "step": 39750 + }, + { + "epoch": 0.1713146895889739, + "grad_norm": 2.6571266651153564, + "learning_rate": 9.972450563396295e-05, + "loss": 5.3385, + "step": 39800 + }, + { + "epoch": 0.17152990904825652, + "grad_norm": 2.1655266284942627, + "learning_rate": 9.972379577599345e-05, + "loss": 5.6496, + "step": 39850 + }, + { + "epoch": 0.17174512850753915, + "grad_norm": 0.5582059025764465, + "learning_rate": 9.97230850071985e-05, + "loss": 4.8615, + "step": 39900 + }, + { + "epoch": 0.17196034796682177, + "grad_norm": 3.6343626976013184, + "learning_rate": 9.972237332759114e-05, + "loss": 5.386, + "step": 39950 + }, + { + "epoch": 0.1721755674261044, + "grad_norm": 1.3018357753753662, + "learning_rate": 9.97216607371844e-05, + "loss": 5.7378, + "step": 40000 + }, + { + "epoch": 0.17239078688538703, + "grad_norm": 0.6158581972122192, + "learning_rate": 9.972094723599132e-05, + "loss": 5.1397, + "step": 40050 + }, + { + "epoch": 0.17260600634466966, + "grad_norm": 1.287165880203247, + "learning_rate": 9.972023282402497e-05, + "loss": 5.2401, + "step": 40100 + }, + { + "epoch": 0.1728212258039523, + "grad_norm": 1.441633939743042, + "learning_rate": 9.971951750129846e-05, + "loss": 5.4769, + "step": 40150 + }, + { + "epoch": 0.17303644526323492, + "grad_norm": 1.784557819366455, + "learning_rate": 9.971880126782486e-05, + "loss": 5.0819, + "step": 40200 + }, + { + "epoch": 0.17325166472251755, + "grad_norm": 1.6827545166015625, + "learning_rate": 9.971808412361732e-05, + "loss": 5.6166, + "step": 40250 + }, + { + "epoch": 0.17346688418180017, + "grad_norm": 3.3910458087921143, + "learning_rate": 9.971736606868896e-05, + "loss": 5.55, + "step": 40300 + }, + { + "epoch": 0.1736821036410828, + "grad_norm": 1.2950443029403687, + "learning_rate": 9.971664710305292e-05, + "loss": 5.1753, + "step": 40350 + }, + { + "epoch": 0.17389732310036543, + "grad_norm": 2.532304525375366, + "learning_rate": 9.971592722672241e-05, + "loss": 5.6387, + "step": 40400 + }, + { + "epoch": 0.1741125425596481, + "grad_norm": 1.2121481895446777, + "learning_rate": 9.971520643971057e-05, + "loss": 5.4671, + "step": 40450 + }, + { + "epoch": 0.17432776201893072, + "grad_norm": 3.362840175628662, + "learning_rate": 9.971448474203063e-05, + "loss": 5.0655, + "step": 40500 + }, + { + "epoch": 0.17454298147821334, + "grad_norm": 1.7366851568222046, + "learning_rate": 9.97137621336958e-05, + "loss": 5.3339, + "step": 40550 + }, + { + "epoch": 0.17475820093749597, + "grad_norm": 1.1280310153961182, + "learning_rate": 9.971303861471934e-05, + "loss": 5.1712, + "step": 40600 + }, + { + "epoch": 0.1749734203967786, + "grad_norm": 0.492414653301239, + "learning_rate": 9.971231418511449e-05, + "loss": 4.9485, + "step": 40650 + }, + { + "epoch": 0.17518863985606123, + "grad_norm": 0.2789967954158783, + "learning_rate": 9.971158884489448e-05, + "loss": 5.6404, + "step": 40700 + }, + { + "epoch": 0.17540385931534386, + "grad_norm": 2.443483352661133, + "learning_rate": 9.971086259407266e-05, + "loss": 5.8068, + "step": 40750 + }, + { + "epoch": 0.17561907877462649, + "grad_norm": 2.113656520843506, + "learning_rate": 9.97101354326623e-05, + "loss": 4.926, + "step": 40800 + }, + { + "epoch": 0.17583429823390911, + "grad_norm": 3.193448305130005, + "learning_rate": 9.970940736067672e-05, + "loss": 5.1317, + "step": 40850 + }, + { + "epoch": 0.17604951769319174, + "grad_norm": 1.3200008869171143, + "learning_rate": 9.970867837812927e-05, + "loss": 5.7377, + "step": 40900 + }, + { + "epoch": 0.17626473715247437, + "grad_norm": 0.31928595900535583, + "learning_rate": 9.970794848503328e-05, + "loss": 4.8161, + "step": 40950 + }, + { + "epoch": 0.176479956611757, + "grad_norm": 2.3104941844940186, + "learning_rate": 9.970721768140215e-05, + "loss": 5.2412, + "step": 41000 + }, + { + "epoch": 0.17669517607103963, + "grad_norm": 3.51460337638855, + "learning_rate": 9.970648596724923e-05, + "loss": 5.4543, + "step": 41050 + }, + { + "epoch": 0.17691039553032226, + "grad_norm": 3.950688123703003, + "learning_rate": 9.970575334258796e-05, + "loss": 5.319, + "step": 41100 + }, + { + "epoch": 0.1771256149896049, + "grad_norm": 1.7399026155471802, + "learning_rate": 9.970501980743174e-05, + "loss": 5.2697, + "step": 41150 + }, + { + "epoch": 0.17734083444888754, + "grad_norm": 2.093517541885376, + "learning_rate": 9.970428536179402e-05, + "loss": 5.163, + "step": 41200 + }, + { + "epoch": 0.17755605390817017, + "grad_norm": 1.5393140316009521, + "learning_rate": 9.970355000568823e-05, + "loss": 5.4786, + "step": 41250 + }, + { + "epoch": 0.1777712733674528, + "grad_norm": 0.8997963666915894, + "learning_rate": 9.970281373912787e-05, + "loss": 4.8615, + "step": 41300 + }, + { + "epoch": 0.17798649282673543, + "grad_norm": 3.836322546005249, + "learning_rate": 9.970207656212639e-05, + "loss": 5.3586, + "step": 41350 + }, + { + "epoch": 0.17820171228601805, + "grad_norm": 1.6137765645980835, + "learning_rate": 9.970133847469733e-05, + "loss": 5.3465, + "step": 41400 + }, + { + "epoch": 0.17841693174530068, + "grad_norm": 2.206439733505249, + "learning_rate": 9.970059947685418e-05, + "loss": 5.3968, + "step": 41450 + }, + { + "epoch": 0.1786321512045833, + "grad_norm": 1.4915977716445923, + "learning_rate": 9.96998595686105e-05, + "loss": 5.5864, + "step": 41500 + }, + { + "epoch": 0.17884737066386594, + "grad_norm": 2.243992805480957, + "learning_rate": 9.969911874997984e-05, + "loss": 5.3603, + "step": 41550 + }, + { + "epoch": 0.17906259012314857, + "grad_norm": 1.5681465864181519, + "learning_rate": 9.969837702097575e-05, + "loss": 5.4028, + "step": 41600 + }, + { + "epoch": 0.1792778095824312, + "grad_norm": 3.025139808654785, + "learning_rate": 9.969763438161185e-05, + "loss": 5.7374, + "step": 41650 + }, + { + "epoch": 0.17949302904171383, + "grad_norm": 3.2012557983398438, + "learning_rate": 9.969689083190172e-05, + "loss": 4.9231, + "step": 41700 + }, + { + "epoch": 0.17970824850099645, + "grad_norm": 1.292650818824768, + "learning_rate": 9.969614637185897e-05, + "loss": 4.8823, + "step": 41750 + }, + { + "epoch": 0.1799234679602791, + "grad_norm": 1.1735448837280273, + "learning_rate": 9.969540100149728e-05, + "loss": 5.5142, + "step": 41800 + }, + { + "epoch": 0.18013868741956174, + "grad_norm": 1.6322771310806274, + "learning_rate": 9.969465472083026e-05, + "loss": 5.1331, + "step": 41850 + }, + { + "epoch": 0.18035390687884437, + "grad_norm": 1.306857705116272, + "learning_rate": 9.969390752987157e-05, + "loss": 5.4072, + "step": 41900 + }, + { + "epoch": 0.180569126338127, + "grad_norm": 2.7299110889434814, + "learning_rate": 9.969315942863495e-05, + "loss": 5.5491, + "step": 41950 + }, + { + "epoch": 0.18078434579740962, + "grad_norm": 0.771268904209137, + "learning_rate": 9.969241041713408e-05, + "loss": 4.6358, + "step": 42000 + }, + { + "epoch": 0.18078434579740962, + "eval_loss": 5.84802770614624, + "eval_runtime": 34.9769, + "eval_samples_per_second": 18.298, + "eval_steps_per_second": 9.149, + "eval_tts_loss": 6.298460564465583, + "step": 42000 + }, + { + "epoch": 0.18099956525669225, + "grad_norm": 2.0695533752441406, + "learning_rate": 9.969166049538268e-05, + "loss": 5.3819, + "step": 42050 + }, + { + "epoch": 0.18121478471597488, + "grad_norm": 0.47863495349884033, + "learning_rate": 9.969090966339446e-05, + "loss": 5.0477, + "step": 42100 + }, + { + "epoch": 0.1814300041752575, + "grad_norm": 2.439303398132324, + "learning_rate": 9.969015792118321e-05, + "loss": 4.8656, + "step": 42150 + }, + { + "epoch": 0.18164522363454014, + "grad_norm": 2.8346097469329834, + "learning_rate": 9.968940526876268e-05, + "loss": 5.4279, + "step": 42200 + }, + { + "epoch": 0.18186044309382277, + "grad_norm": 0.49053213000297546, + "learning_rate": 9.968865170614667e-05, + "loss": 5.0488, + "step": 42250 + }, + { + "epoch": 0.1820756625531054, + "grad_norm": 2.295205593109131, + "learning_rate": 9.968789723334897e-05, + "loss": 5.8764, + "step": 42300 + }, + { + "epoch": 0.18229088201238802, + "grad_norm": 1.606118083000183, + "learning_rate": 9.968714185038342e-05, + "loss": 5.5817, + "step": 42350 + }, + { + "epoch": 0.18250610147167065, + "grad_norm": 0.7339264750480652, + "learning_rate": 9.968638555726382e-05, + "loss": 5.0508, + "step": 42400 + }, + { + "epoch": 0.18272132093095328, + "grad_norm": 3.127467632293701, + "learning_rate": 9.968562835400407e-05, + "loss": 4.945, + "step": 42450 + }, + { + "epoch": 0.18293654039023594, + "grad_norm": 2.3751089572906494, + "learning_rate": 9.968487024061801e-05, + "loss": 5.1792, + "step": 42500 + }, + { + "epoch": 0.18315175984951856, + "grad_norm": 1.3080412149429321, + "learning_rate": 9.968411121711954e-05, + "loss": 5.1487, + "step": 42550 + }, + { + "epoch": 0.1833669793088012, + "grad_norm": 2.272592782974243, + "learning_rate": 9.968335128352254e-05, + "loss": 5.0308, + "step": 42600 + }, + { + "epoch": 0.18358219876808382, + "grad_norm": 2.236130952835083, + "learning_rate": 9.968259043984097e-05, + "loss": 5.2578, + "step": 42650 + }, + { + "epoch": 0.18379741822736645, + "grad_norm": 1.417202115058899, + "learning_rate": 9.968182868608873e-05, + "loss": 5.4251, + "step": 42700 + }, + { + "epoch": 0.18401263768664908, + "grad_norm": 4.005433082580566, + "learning_rate": 9.96810660222798e-05, + "loss": 5.3892, + "step": 42750 + }, + { + "epoch": 0.1842278571459317, + "grad_norm": 2.3446834087371826, + "learning_rate": 9.968030244842813e-05, + "loss": 5.3053, + "step": 42800 + }, + { + "epoch": 0.18444307660521433, + "grad_norm": 1.9035673141479492, + "learning_rate": 9.967953796454772e-05, + "loss": 5.0459, + "step": 42850 + }, + { + "epoch": 0.18465829606449696, + "grad_norm": 1.5197334289550781, + "learning_rate": 9.967877257065257e-05, + "loss": 5.3672, + "step": 42900 + }, + { + "epoch": 0.1848735155237796, + "grad_norm": 1.5576106309890747, + "learning_rate": 9.96780062667567e-05, + "loss": 5.0752, + "step": 42950 + }, + { + "epoch": 0.18508873498306222, + "grad_norm": 0.6207577586174011, + "learning_rate": 9.967723905287414e-05, + "loss": 5.4144, + "step": 43000 + }, + { + "epoch": 0.18530395444234485, + "grad_norm": 0.9759745597839355, + "learning_rate": 9.967647092901895e-05, + "loss": 4.9771, + "step": 43050 + }, + { + "epoch": 0.18551917390162748, + "grad_norm": 1.4265793561935425, + "learning_rate": 9.96757018952052e-05, + "loss": 5.4655, + "step": 43100 + }, + { + "epoch": 0.18573439336091013, + "grad_norm": 3.3434743881225586, + "learning_rate": 9.9674931951447e-05, + "loss": 5.439, + "step": 43150 + }, + { + "epoch": 0.18594961282019276, + "grad_norm": 2.811783790588379, + "learning_rate": 9.96741610977584e-05, + "loss": 5.208, + "step": 43200 + }, + { + "epoch": 0.1861648322794754, + "grad_norm": 2.3039863109588623, + "learning_rate": 9.967338933415357e-05, + "loss": 5.1746, + "step": 43250 + }, + { + "epoch": 0.18638005173875802, + "grad_norm": 3.197349786758423, + "learning_rate": 9.967261666064664e-05, + "loss": 5.6639, + "step": 43300 + }, + { + "epoch": 0.18659527119804065, + "grad_norm": 0.584815263748169, + "learning_rate": 9.967184307725173e-05, + "loss": 5.0686, + "step": 43350 + }, + { + "epoch": 0.18681049065732327, + "grad_norm": 1.5519447326660156, + "learning_rate": 9.967106858398304e-05, + "loss": 4.9883, + "step": 43400 + }, + { + "epoch": 0.1870257101166059, + "grad_norm": 0.6102450489997864, + "learning_rate": 9.967029318085474e-05, + "loss": 5.5544, + "step": 43450 + }, + { + "epoch": 0.18724092957588853, + "grad_norm": 0.37067851424217224, + "learning_rate": 9.966951686788107e-05, + "loss": 5.0514, + "step": 43500 + }, + { + "epoch": 0.18745614903517116, + "grad_norm": 0.48290497064590454, + "learning_rate": 9.966873964507619e-05, + "loss": 4.8839, + "step": 43550 + }, + { + "epoch": 0.1876713684944538, + "grad_norm": 2.0602896213531494, + "learning_rate": 9.966796151245439e-05, + "loss": 5.2268, + "step": 43600 + }, + { + "epoch": 0.18788658795373642, + "grad_norm": 2.542405128479004, + "learning_rate": 9.966718247002991e-05, + "loss": 5.7062, + "step": 43650 + }, + { + "epoch": 0.18810180741301905, + "grad_norm": 2.792210578918457, + "learning_rate": 9.966640251781699e-05, + "loss": 4.7853, + "step": 43700 + }, + { + "epoch": 0.18831702687230167, + "grad_norm": 2.271001100540161, + "learning_rate": 9.966562165582995e-05, + "loss": 5.5939, + "step": 43750 + }, + { + "epoch": 0.18853224633158433, + "grad_norm": 2.084362506866455, + "learning_rate": 9.966483988408308e-05, + "loss": 5.0626, + "step": 43800 + }, + { + "epoch": 0.18874746579086696, + "grad_norm": 2.001537322998047, + "learning_rate": 9.966405720259073e-05, + "loss": 5.3687, + "step": 43850 + }, + { + "epoch": 0.1889626852501496, + "grad_norm": 4.805939674377441, + "learning_rate": 9.966327361136718e-05, + "loss": 5.3945, + "step": 43900 + }, + { + "epoch": 0.18917790470943222, + "grad_norm": 3.2100584506988525, + "learning_rate": 9.966248911042684e-05, + "loss": 5.6944, + "step": 43950 + }, + { + "epoch": 0.18939312416871484, + "grad_norm": 2.120170831680298, + "learning_rate": 9.966170369978404e-05, + "loss": 5.0147, + "step": 44000 + }, + { + "epoch": 0.18960834362799747, + "grad_norm": 2.4279775619506836, + "learning_rate": 9.966091737945318e-05, + "loss": 5.766, + "step": 44050 + }, + { + "epoch": 0.1898235630872801, + "grad_norm": 2.167266607284546, + "learning_rate": 9.966013014944868e-05, + "loss": 5.812, + "step": 44100 + }, + { + "epoch": 0.19003878254656273, + "grad_norm": 1.7482531070709229, + "learning_rate": 9.965934200978493e-05, + "loss": 5.482, + "step": 44150 + }, + { + "epoch": 0.19025400200584536, + "grad_norm": 1.7256593704223633, + "learning_rate": 9.96585529604764e-05, + "loss": 5.7897, + "step": 44200 + }, + { + "epoch": 0.19046922146512799, + "grad_norm": 2.61993670463562, + "learning_rate": 9.96577630015375e-05, + "loss": 5.8388, + "step": 44250 + }, + { + "epoch": 0.19068444092441061, + "grad_norm": 1.0628066062927246, + "learning_rate": 9.965697213298275e-05, + "loss": 5.3645, + "step": 44300 + }, + { + "epoch": 0.19089966038369324, + "grad_norm": 1.4612067937850952, + "learning_rate": 9.96561803548266e-05, + "loss": 5.0906, + "step": 44350 + }, + { + "epoch": 0.19111487984297587, + "grad_norm": 2.455042600631714, + "learning_rate": 9.965538766708358e-05, + "loss": 5.3611, + "step": 44400 + }, + { + "epoch": 0.1913300993022585, + "grad_norm": 1.5691808462142944, + "learning_rate": 9.96545940697682e-05, + "loss": 5.3352, + "step": 44450 + }, + { + "epoch": 0.19154531876154116, + "grad_norm": 1.350935459136963, + "learning_rate": 9.965379956289498e-05, + "loss": 5.8732, + "step": 44500 + }, + { + "epoch": 0.19176053822082378, + "grad_norm": 2.552147626876831, + "learning_rate": 9.965300414647849e-05, + "loss": 5.5418, + "step": 44550 + }, + { + "epoch": 0.1919757576801064, + "grad_norm": 1.9981579780578613, + "learning_rate": 9.965220782053331e-05, + "loss": 5.0203, + "step": 44600 + }, + { + "epoch": 0.19219097713938904, + "grad_norm": 2.1526386737823486, + "learning_rate": 9.9651410585074e-05, + "loss": 5.4002, + "step": 44650 + }, + { + "epoch": 0.19240619659867167, + "grad_norm": 1.5357455015182495, + "learning_rate": 9.965061244011519e-05, + "loss": 5.2834, + "step": 44700 + }, + { + "epoch": 0.1926214160579543, + "grad_norm": 0.8551031351089478, + "learning_rate": 9.964981338567147e-05, + "loss": 5.3254, + "step": 44750 + }, + { + "epoch": 0.19283663551723693, + "grad_norm": 2.5606887340545654, + "learning_rate": 9.964901342175751e-05, + "loss": 5.3899, + "step": 44800 + }, + { + "epoch": 0.19305185497651955, + "grad_norm": 2.810432195663452, + "learning_rate": 9.964821254838797e-05, + "loss": 5.4139, + "step": 44850 + }, + { + "epoch": 0.19326707443580218, + "grad_norm": 2.201428174972534, + "learning_rate": 9.964741076557747e-05, + "loss": 5.0836, + "step": 44900 + }, + { + "epoch": 0.1934822938950848, + "grad_norm": 1.4712821245193481, + "learning_rate": 9.964660807334073e-05, + "loss": 5.6582, + "step": 44950 + }, + { + "epoch": 0.19369751335436744, + "grad_norm": 3.130441904067993, + "learning_rate": 9.964580447169245e-05, + "loss": 5.2478, + "step": 45000 + }, + { + "epoch": 0.19369751335436744, + "eval_loss": 5.81725549697876, + "eval_runtime": 34.853, + "eval_samples_per_second": 18.363, + "eval_steps_per_second": 9.181, + "eval_tts_loss": 6.2411583849271, + "step": 45000 + }, + { + "epoch": 0.19391273281365007, + "grad_norm": 2.7762467861175537, + "learning_rate": 9.964499996064735e-05, + "loss": 5.3612, + "step": 45050 + }, + { + "epoch": 0.1941279522729327, + "grad_norm": 0.5360327959060669, + "learning_rate": 9.964419454022017e-05, + "loss": 5.5046, + "step": 45100 + }, + { + "epoch": 0.19434317173221535, + "grad_norm": 1.5461030006408691, + "learning_rate": 9.964338821042566e-05, + "loss": 5.534, + "step": 45150 + }, + { + "epoch": 0.19455839119149798, + "grad_norm": 1.5773454904556274, + "learning_rate": 9.964258097127859e-05, + "loss": 5.3477, + "step": 45200 + }, + { + "epoch": 0.1947736106507806, + "grad_norm": 2.6713690757751465, + "learning_rate": 9.964177282279375e-05, + "loss": 5.0109, + "step": 45250 + }, + { + "epoch": 0.19498883011006324, + "grad_norm": 2.2002274990081787, + "learning_rate": 9.964096376498592e-05, + "loss": 5.2345, + "step": 45300 + }, + { + "epoch": 0.19520404956934587, + "grad_norm": 1.4429247379302979, + "learning_rate": 9.964015379786996e-05, + "loss": 5.4244, + "step": 45350 + }, + { + "epoch": 0.1954192690286285, + "grad_norm": 1.4166209697723389, + "learning_rate": 9.963934292146069e-05, + "loss": 5.0845, + "step": 45400 + }, + { + "epoch": 0.19563448848791112, + "grad_norm": 2.5319085121154785, + "learning_rate": 9.963853113577295e-05, + "loss": 5.492, + "step": 45450 + }, + { + "epoch": 0.19584970794719375, + "grad_norm": 1.9529362916946411, + "learning_rate": 9.963771844082161e-05, + "loss": 5.9854, + "step": 45500 + }, + { + "epoch": 0.19606492740647638, + "grad_norm": 2.4052317142486572, + "learning_rate": 9.963690483662158e-05, + "loss": 5.0563, + "step": 45550 + }, + { + "epoch": 0.196280146865759, + "grad_norm": 1.7268341779708862, + "learning_rate": 9.963609032318776e-05, + "loss": 5.3297, + "step": 45600 + }, + { + "epoch": 0.19649536632504164, + "grad_norm": 2.54902982711792, + "learning_rate": 9.963527490053505e-05, + "loss": 5.245, + "step": 45650 + }, + { + "epoch": 0.19671058578432427, + "grad_norm": 1.2306404113769531, + "learning_rate": 9.963445856867839e-05, + "loss": 5.4674, + "step": 45700 + }, + { + "epoch": 0.1969258052436069, + "grad_norm": 1.9797799587249756, + "learning_rate": 9.963364132763275e-05, + "loss": 5.1671, + "step": 45750 + }, + { + "epoch": 0.19714102470288955, + "grad_norm": 1.4335042238235474, + "learning_rate": 9.96328231774131e-05, + "loss": 5.1364, + "step": 45800 + }, + { + "epoch": 0.19735624416217218, + "grad_norm": 2.77420973777771, + "learning_rate": 9.963200411803439e-05, + "loss": 5.1995, + "step": 45850 + }, + { + "epoch": 0.1975714636214548, + "grad_norm": 1.9059392213821411, + "learning_rate": 9.963118414951167e-05, + "loss": 4.8073, + "step": 45900 + }, + { + "epoch": 0.19778668308073744, + "grad_norm": 2.5538692474365234, + "learning_rate": 9.963036327185993e-05, + "loss": 5.0151, + "step": 45950 + }, + { + "epoch": 0.19800190254002006, + "grad_norm": 1.895698070526123, + "learning_rate": 9.962954148509422e-05, + "loss": 5.5077, + "step": 46000 + }, + { + "epoch": 0.1982171219993027, + "grad_norm": 2.1901729106903076, + "learning_rate": 9.96287187892296e-05, + "loss": 5.4184, + "step": 46050 + }, + { + "epoch": 0.19843234145858532, + "grad_norm": 1.3465992212295532, + "learning_rate": 9.96278951842811e-05, + "loss": 4.8947, + "step": 46100 + }, + { + "epoch": 0.19864756091786795, + "grad_norm": 2.0933282375335693, + "learning_rate": 9.962707067026387e-05, + "loss": 5.1007, + "step": 46150 + }, + { + "epoch": 0.19886278037715058, + "grad_norm": 1.311606526374817, + "learning_rate": 9.962624524719297e-05, + "loss": 5.2685, + "step": 46200 + }, + { + "epoch": 0.1990779998364332, + "grad_norm": 1.857384443283081, + "learning_rate": 9.962541891508353e-05, + "loss": 5.3565, + "step": 46250 + }, + { + "epoch": 0.19929321929571583, + "grad_norm": 0.7405107617378235, + "learning_rate": 9.962459167395069e-05, + "loss": 5.1753, + "step": 46300 + }, + { + "epoch": 0.19950843875499846, + "grad_norm": 3.8163087368011475, + "learning_rate": 9.962376352380957e-05, + "loss": 5.1759, + "step": 46350 + }, + { + "epoch": 0.1997236582142811, + "grad_norm": 1.4860745668411255, + "learning_rate": 9.96229344646754e-05, + "loss": 5.209, + "step": 46400 + }, + { + "epoch": 0.19993887767356372, + "grad_norm": 1.9401720762252808, + "learning_rate": 9.962210449656333e-05, + "loss": 5.2299, + "step": 46450 + }, + { + "epoch": 0.20015409713284638, + "grad_norm": 3.026341438293457, + "learning_rate": 9.962127361948856e-05, + "loss": 5.5727, + "step": 46500 + }, + { + "epoch": 0.200369316592129, + "grad_norm": 2.1455018520355225, + "learning_rate": 9.962044183346632e-05, + "loss": 5.7384, + "step": 46550 + }, + { + "epoch": 0.20058453605141163, + "grad_norm": 3.5870113372802734, + "learning_rate": 9.961960913851184e-05, + "loss": 5.2249, + "step": 46600 + }, + { + "epoch": 0.20079975551069426, + "grad_norm": 2.448197603225708, + "learning_rate": 9.96187755346404e-05, + "loss": 5.1269, + "step": 46650 + }, + { + "epoch": 0.2010149749699769, + "grad_norm": 1.8139272928237915, + "learning_rate": 9.961794102186723e-05, + "loss": 5.399, + "step": 46700 + }, + { + "epoch": 0.20123019442925952, + "grad_norm": 1.999450922012329, + "learning_rate": 9.961710560020763e-05, + "loss": 5.2369, + "step": 46750 + }, + { + "epoch": 0.20144541388854215, + "grad_norm": 4.3499436378479, + "learning_rate": 9.961626926967691e-05, + "loss": 5.1691, + "step": 46800 + }, + { + "epoch": 0.20166063334782477, + "grad_norm": 1.6662920713424683, + "learning_rate": 9.96154320302904e-05, + "loss": 5.3244, + "step": 46850 + }, + { + "epoch": 0.2018758528071074, + "grad_norm": 2.1195456981658936, + "learning_rate": 9.96145938820634e-05, + "loss": 5.1986, + "step": 46900 + }, + { + "epoch": 0.20209107226639003, + "grad_norm": 1.5051194429397583, + "learning_rate": 9.96137548250113e-05, + "loss": 5.6112, + "step": 46950 + }, + { + "epoch": 0.20230629172567266, + "grad_norm": 3.3609747886657715, + "learning_rate": 9.961291485914945e-05, + "loss": 5.3774, + "step": 47000 + }, + { + "epoch": 0.2025215111849553, + "grad_norm": 1.655246376991272, + "learning_rate": 9.961207398449326e-05, + "loss": 4.5358, + "step": 47050 + }, + { + "epoch": 0.20273673064423792, + "grad_norm": 1.440521001815796, + "learning_rate": 9.96112322010581e-05, + "loss": 5.2897, + "step": 47100 + }, + { + "epoch": 0.20295195010352057, + "grad_norm": 2.6921069622039795, + "learning_rate": 9.961038950885939e-05, + "loss": 5.1574, + "step": 47150 + }, + { + "epoch": 0.2031671695628032, + "grad_norm": 1.3845224380493164, + "learning_rate": 9.96095459079126e-05, + "loss": 5.3951, + "step": 47200 + }, + { + "epoch": 0.20338238902208583, + "grad_norm": 1.813748836517334, + "learning_rate": 9.960870139823315e-05, + "loss": 5.4612, + "step": 47250 + }, + { + "epoch": 0.20359760848136846, + "grad_norm": 1.4972589015960693, + "learning_rate": 9.960785597983653e-05, + "loss": 5.3061, + "step": 47300 + }, + { + "epoch": 0.2038128279406511, + "grad_norm": 1.489404320716858, + "learning_rate": 9.960700965273821e-05, + "loss": 5.5713, + "step": 47350 + }, + { + "epoch": 0.20402804739993372, + "grad_norm": 1.4493526220321655, + "learning_rate": 9.960616241695371e-05, + "loss": 5.854, + "step": 47400 + }, + { + "epoch": 0.20424326685921634, + "grad_norm": 1.326565146446228, + "learning_rate": 9.960531427249853e-05, + "loss": 5.0841, + "step": 47450 + }, + { + "epoch": 0.20445848631849897, + "grad_norm": 1.2407478094100952, + "learning_rate": 9.960446521938823e-05, + "loss": 5.3243, + "step": 47500 + }, + { + "epoch": 0.2046737057777816, + "grad_norm": 2.029686212539673, + "learning_rate": 9.960361525763835e-05, + "loss": 5.2789, + "step": 47550 + }, + { + "epoch": 0.20488892523706423, + "grad_norm": 2.4505600929260254, + "learning_rate": 9.960276438726446e-05, + "loss": 5.3149, + "step": 47600 + }, + { + "epoch": 0.20510414469634686, + "grad_norm": 2.4376471042633057, + "learning_rate": 9.960191260828213e-05, + "loss": 5.4129, + "step": 47650 + }, + { + "epoch": 0.20531936415562949, + "grad_norm": 0.8598138093948364, + "learning_rate": 9.960105992070698e-05, + "loss": 5.4211, + "step": 47700 + }, + { + "epoch": 0.20553458361491211, + "grad_norm": 1.7363723516464233, + "learning_rate": 9.960020632455463e-05, + "loss": 5.1757, + "step": 47750 + }, + { + "epoch": 0.20574980307419474, + "grad_norm": 2.0996665954589844, + "learning_rate": 9.959935181984072e-05, + "loss": 5.3625, + "step": 47800 + }, + { + "epoch": 0.2059650225334774, + "grad_norm": 4.319682598114014, + "learning_rate": 9.95984964065809e-05, + "loss": 5.2419, + "step": 47850 + }, + { + "epoch": 0.20618024199276003, + "grad_norm": 2.2911555767059326, + "learning_rate": 9.959764008479082e-05, + "loss": 5.6187, + "step": 47900 + }, + { + "epoch": 0.20639546145204266, + "grad_norm": 1.948038101196289, + "learning_rate": 9.959678285448618e-05, + "loss": 5.349, + "step": 47950 + }, + { + "epoch": 0.20661068091132528, + "grad_norm": 3.4478232860565186, + "learning_rate": 9.959592471568269e-05, + "loss": 5.5809, + "step": 48000 + }, + { + "epoch": 0.20661068091132528, + "eval_loss": 5.796329498291016, + "eval_runtime": 35.223, + "eval_samples_per_second": 18.17, + "eval_steps_per_second": 9.085, + "eval_tts_loss": 6.22501932611318, + "step": 48000 + }, + { + "epoch": 0.2068259003706079, + "grad_norm": 1.7736670970916748, + "learning_rate": 9.959506566839605e-05, + "loss": 4.9697, + "step": 48050 + }, + { + "epoch": 0.20704111982989054, + "grad_norm": 2.140063762664795, + "learning_rate": 9.959420571264203e-05, + "loss": 5.0653, + "step": 48100 + }, + { + "epoch": 0.20725633928917317, + "grad_norm": 0.4537443518638611, + "learning_rate": 9.959334484843634e-05, + "loss": 4.8765, + "step": 48150 + }, + { + "epoch": 0.2074715587484558, + "grad_norm": 0.41736841201782227, + "learning_rate": 9.959248307579476e-05, + "loss": 5.4592, + "step": 48200 + }, + { + "epoch": 0.20768677820773843, + "grad_norm": 0.36742767691612244, + "learning_rate": 9.95916203947331e-05, + "loss": 4.9888, + "step": 48250 + }, + { + "epoch": 0.20790199766702105, + "grad_norm": 2.1028084754943848, + "learning_rate": 9.959075680526715e-05, + "loss": 5.8642, + "step": 48300 + }, + { + "epoch": 0.20811721712630368, + "grad_norm": 2.4235270023345947, + "learning_rate": 9.958989230741273e-05, + "loss": 5.6147, + "step": 48350 + }, + { + "epoch": 0.2083324365855863, + "grad_norm": 2.302995443344116, + "learning_rate": 9.958902690118565e-05, + "loss": 5.3417, + "step": 48400 + }, + { + "epoch": 0.20854765604486894, + "grad_norm": 2.163205862045288, + "learning_rate": 9.95881605866018e-05, + "loss": 5.5863, + "step": 48450 + }, + { + "epoch": 0.2087628755041516, + "grad_norm": 0.9933422207832336, + "learning_rate": 9.958729336367705e-05, + "loss": 5.2107, + "step": 48500 + }, + { + "epoch": 0.20897809496343422, + "grad_norm": 0.7456415891647339, + "learning_rate": 9.958642523242725e-05, + "loss": 5.0198, + "step": 48550 + }, + { + "epoch": 0.20919331442271685, + "grad_norm": 1.5509700775146484, + "learning_rate": 9.958555619286831e-05, + "loss": 5.2176, + "step": 48600 + }, + { + "epoch": 0.20940853388199948, + "grad_norm": 1.6162989139556885, + "learning_rate": 9.958468624501616e-05, + "loss": 5.5661, + "step": 48650 + }, + { + "epoch": 0.2096237533412821, + "grad_norm": 0.5913316607475281, + "learning_rate": 9.958381538888675e-05, + "loss": 5.3394, + "step": 48700 + }, + { + "epoch": 0.20983897280056474, + "grad_norm": 2.321537494659424, + "learning_rate": 9.9582943624496e-05, + "loss": 5.3182, + "step": 48750 + }, + { + "epoch": 0.21005419225984737, + "grad_norm": 5.531227111816406, + "learning_rate": 9.958207095185992e-05, + "loss": 5.2225, + "step": 48800 + }, + { + "epoch": 0.21026941171913, + "grad_norm": 1.753251552581787, + "learning_rate": 9.958119737099445e-05, + "loss": 5.6308, + "step": 48850 + }, + { + "epoch": 0.21048463117841262, + "grad_norm": 1.5117651224136353, + "learning_rate": 9.958032288191563e-05, + "loss": 5.1385, + "step": 48900 + }, + { + "epoch": 0.21069985063769525, + "grad_norm": 1.5715397596359253, + "learning_rate": 9.957944748463945e-05, + "loss": 5.197, + "step": 48950 + }, + { + "epoch": 0.21091507009697788, + "grad_norm": 2.747018814086914, + "learning_rate": 9.957857117918194e-05, + "loss": 5.1599, + "step": 49000 + }, + { + "epoch": 0.2111302895562605, + "grad_norm": 1.5428632497787476, + "learning_rate": 9.957769396555921e-05, + "loss": 4.7813, + "step": 49050 + }, + { + "epoch": 0.21134550901554314, + "grad_norm": 1.5541731119155884, + "learning_rate": 9.957681584378726e-05, + "loss": 5.641, + "step": 49100 + }, + { + "epoch": 0.2115607284748258, + "grad_norm": 1.915408968925476, + "learning_rate": 9.95759368138822e-05, + "loss": 5.613, + "step": 49150 + }, + { + "epoch": 0.21177594793410842, + "grad_norm": 1.8603709936141968, + "learning_rate": 9.957505687586014e-05, + "loss": 5.4323, + "step": 49200 + }, + { + "epoch": 0.21199116739339105, + "grad_norm": 1.537431001663208, + "learning_rate": 9.957417602973719e-05, + "loss": 5.2639, + "step": 49250 + }, + { + "epoch": 0.21220638685267368, + "grad_norm": 2.047757863998413, + "learning_rate": 9.957329427552949e-05, + "loss": 5.4564, + "step": 49300 + }, + { + "epoch": 0.2124216063119563, + "grad_norm": 1.3951332569122314, + "learning_rate": 9.95724116132532e-05, + "loss": 5.4653, + "step": 49350 + }, + { + "epoch": 0.21263682577123894, + "grad_norm": 1.7220056056976318, + "learning_rate": 9.957152804292446e-05, + "loss": 4.9709, + "step": 49400 + }, + { + "epoch": 0.21285204523052156, + "grad_norm": 2.4672255516052246, + "learning_rate": 9.957064356455947e-05, + "loss": 5.3871, + "step": 49450 + }, + { + "epoch": 0.2130672646898042, + "grad_norm": 2.7639989852905273, + "learning_rate": 9.956975817817444e-05, + "loss": 5.1945, + "step": 49500 + }, + { + "epoch": 0.21328248414908682, + "grad_norm": 1.3475544452667236, + "learning_rate": 9.956887188378558e-05, + "loss": 5.6068, + "step": 49550 + }, + { + "epoch": 0.21349770360836945, + "grad_norm": 2.501035690307617, + "learning_rate": 9.956798468140916e-05, + "loss": 5.1736, + "step": 49600 + }, + { + "epoch": 0.21371292306765208, + "grad_norm": 1.5941158533096313, + "learning_rate": 9.956709657106136e-05, + "loss": 5.4683, + "step": 49650 + }, + { + "epoch": 0.2139281425269347, + "grad_norm": 1.7121700048446655, + "learning_rate": 9.95662075527585e-05, + "loss": 5.1543, + "step": 49700 + }, + { + "epoch": 0.21414336198621733, + "grad_norm": 0.35852164030075073, + "learning_rate": 9.956531762651686e-05, + "loss": 5.3213, + "step": 49750 + }, + { + "epoch": 0.21435858144549996, + "grad_norm": 2.651510715484619, + "learning_rate": 9.956442679235272e-05, + "loss": 5.462, + "step": 49800 + }, + { + "epoch": 0.21457380090478262, + "grad_norm": 1.2296634912490845, + "learning_rate": 9.956353505028241e-05, + "loss": 5.0484, + "step": 49850 + }, + { + "epoch": 0.21478902036406525, + "grad_norm": 1.7858680486679077, + "learning_rate": 9.956264240032227e-05, + "loss": 4.8874, + "step": 49900 + }, + { + "epoch": 0.21500423982334788, + "grad_norm": 1.5296380519866943, + "learning_rate": 9.956174884248866e-05, + "loss": 5.4199, + "step": 49950 + }, + { + "epoch": 0.2152194592826305, + "grad_norm": 2.181480884552002, + "learning_rate": 9.956085437679792e-05, + "loss": 5.1259, + "step": 50000 + }, + { + "epoch": 0.21543467874191313, + "grad_norm": 1.9449048042297363, + "learning_rate": 9.955995900326645e-05, + "loss": 4.9967, + "step": 50050 + }, + { + "epoch": 0.21564989820119576, + "grad_norm": 1.448880672454834, + "learning_rate": 9.955906272191065e-05, + "loss": 5.5826, + "step": 50100 + }, + { + "epoch": 0.2158651176604784, + "grad_norm": 1.0305227041244507, + "learning_rate": 9.955816553274694e-05, + "loss": 5.0103, + "step": 50150 + }, + { + "epoch": 0.21608033711976102, + "grad_norm": 0.9720062613487244, + "learning_rate": 9.955726743579176e-05, + "loss": 5.2033, + "step": 50200 + }, + { + "epoch": 0.21629555657904365, + "grad_norm": 2.3680825233459473, + "learning_rate": 9.955636843106156e-05, + "loss": 5.2889, + "step": 50250 + }, + { + "epoch": 0.21651077603832627, + "grad_norm": 1.2048760652542114, + "learning_rate": 9.955546851857278e-05, + "loss": 5.0201, + "step": 50300 + }, + { + "epoch": 0.2167259954976089, + "grad_norm": 3.1003036499023438, + "learning_rate": 9.955456769834194e-05, + "loss": 5.0554, + "step": 50350 + }, + { + "epoch": 0.21694121495689153, + "grad_norm": 2.9815256595611572, + "learning_rate": 9.955366597038553e-05, + "loss": 5.3669, + "step": 50400 + }, + { + "epoch": 0.21715643441617416, + "grad_norm": 2.360203504562378, + "learning_rate": 9.955276333472006e-05, + "loss": 4.9064, + "step": 50450 + }, + { + "epoch": 0.21737165387545682, + "grad_norm": 2.104553699493408, + "learning_rate": 9.955185979136206e-05, + "loss": 5.7652, + "step": 50500 + }, + { + "epoch": 0.21758687333473944, + "grad_norm": 2.606586456298828, + "learning_rate": 9.95509553403281e-05, + "loss": 5.2533, + "step": 50550 + }, + { + "epoch": 0.21780209279402207, + "grad_norm": 1.7071987390518188, + "learning_rate": 9.955004998163472e-05, + "loss": 5.5881, + "step": 50600 + }, + { + "epoch": 0.2180173122533047, + "grad_norm": 1.0531247854232788, + "learning_rate": 9.954914371529855e-05, + "loss": 4.9696, + "step": 50650 + }, + { + "epoch": 0.21823253171258733, + "grad_norm": 1.9945915937423706, + "learning_rate": 9.954823654133615e-05, + "loss": 5.589, + "step": 50700 + }, + { + "epoch": 0.21844775117186996, + "grad_norm": 1.5203957557678223, + "learning_rate": 9.954732845976415e-05, + "loss": 5.4531, + "step": 50750 + }, + { + "epoch": 0.2186629706311526, + "grad_norm": 1.6130144596099854, + "learning_rate": 9.954641947059918e-05, + "loss": 5.3491, + "step": 50800 + }, + { + "epoch": 0.21887819009043522, + "grad_norm": 1.057516098022461, + "learning_rate": 9.954550957385789e-05, + "loss": 5.215, + "step": 50850 + }, + { + "epoch": 0.21909340954971784, + "grad_norm": 1.3668022155761719, + "learning_rate": 9.954459876955696e-05, + "loss": 5.1415, + "step": 50900 + }, + { + "epoch": 0.21930862900900047, + "grad_norm": 2.0825867652893066, + "learning_rate": 9.954368705771307e-05, + "loss": 5.3143, + "step": 50950 + }, + { + "epoch": 0.2195238484682831, + "grad_norm": 2.488001823425293, + "learning_rate": 9.95427744383429e-05, + "loss": 5.3089, + "step": 51000 + }, + { + "epoch": 0.2195238484682831, + "eval_loss": 5.778656005859375, + "eval_runtime": 35.0444, + "eval_samples_per_second": 18.263, + "eval_steps_per_second": 9.131, + "eval_tts_loss": 6.272574217872726, + "step": 51000 + }, + { + "epoch": 0.21973906792756573, + "grad_norm": 2.461165189743042, + "learning_rate": 9.954186091146319e-05, + "loss": 4.9464, + "step": 51050 + }, + { + "epoch": 0.21995428738684836, + "grad_norm": 2.582106113433838, + "learning_rate": 9.954094647709067e-05, + "loss": 5.08, + "step": 51100 + }, + { + "epoch": 0.220169506846131, + "grad_norm": 0.4270996153354645, + "learning_rate": 9.954003113524208e-05, + "loss": 5.0233, + "step": 51150 + }, + { + "epoch": 0.22038472630541364, + "grad_norm": 1.5620601177215576, + "learning_rate": 9.953911488593419e-05, + "loss": 5.5764, + "step": 51200 + }, + { + "epoch": 0.22059994576469627, + "grad_norm": 1.5592663288116455, + "learning_rate": 9.95381977291838e-05, + "loss": 5.0214, + "step": 51250 + }, + { + "epoch": 0.2208151652239789, + "grad_norm": 1.748214840888977, + "learning_rate": 9.953727966500767e-05, + "loss": 5.0599, + "step": 51300 + }, + { + "epoch": 0.22103038468326153, + "grad_norm": 4.137348651885986, + "learning_rate": 9.953636069342267e-05, + "loss": 5.1298, + "step": 51350 + }, + { + "epoch": 0.22124560414254416, + "grad_norm": 2.026874303817749, + "learning_rate": 9.953544081444562e-05, + "loss": 5.5089, + "step": 51400 + }, + { + "epoch": 0.22146082360182678, + "grad_norm": 1.2134157419204712, + "learning_rate": 9.953452002809333e-05, + "loss": 5.351, + "step": 51450 + }, + { + "epoch": 0.2216760430611094, + "grad_norm": 2.0933847427368164, + "learning_rate": 9.953359833438269e-05, + "loss": 5.958, + "step": 51500 + }, + { + "epoch": 0.22189126252039204, + "grad_norm": 1.9918137788772583, + "learning_rate": 9.95326757333306e-05, + "loss": 5.1777, + "step": 51550 + }, + { + "epoch": 0.22210648197967467, + "grad_norm": 3.7212729454040527, + "learning_rate": 9.953175222495395e-05, + "loss": 4.951, + "step": 51600 + }, + { + "epoch": 0.2223217014389573, + "grad_norm": 3.5238146781921387, + "learning_rate": 9.953082780926965e-05, + "loss": 5.2853, + "step": 51650 + }, + { + "epoch": 0.22253692089823993, + "grad_norm": 1.2535676956176758, + "learning_rate": 9.952990248629464e-05, + "loss": 5.3428, + "step": 51700 + }, + { + "epoch": 0.22275214035752255, + "grad_norm": 4.145112991333008, + "learning_rate": 9.952897625604587e-05, + "loss": 5.1877, + "step": 51750 + }, + { + "epoch": 0.22296735981680518, + "grad_norm": 2.3402130603790283, + "learning_rate": 9.95280491185403e-05, + "loss": 4.8668, + "step": 51800 + }, + { + "epoch": 0.22318257927608784, + "grad_norm": 3.6970629692077637, + "learning_rate": 9.952712107379492e-05, + "loss": 5.4614, + "step": 51850 + }, + { + "epoch": 0.22339779873537047, + "grad_norm": 1.569783091545105, + "learning_rate": 9.952619212182672e-05, + "loss": 5.2226, + "step": 51900 + }, + { + "epoch": 0.2236130181946531, + "grad_norm": 0.8276736736297607, + "learning_rate": 9.952526226265272e-05, + "loss": 5.1484, + "step": 51950 + }, + { + "epoch": 0.22382823765393572, + "grad_norm": 0.45729681849479675, + "learning_rate": 9.952433149628996e-05, + "loss": 5.3708, + "step": 52000 + }, + { + "epoch": 0.22404345711321835, + "grad_norm": 3.0493037700653076, + "learning_rate": 9.952339982275549e-05, + "loss": 4.9436, + "step": 52050 + }, + { + "epoch": 0.22425867657250098, + "grad_norm": 1.8304576873779297, + "learning_rate": 9.952246724206637e-05, + "loss": 5.1659, + "step": 52100 + }, + { + "epoch": 0.2244738960317836, + "grad_norm": 1.6446971893310547, + "learning_rate": 9.952153375423967e-05, + "loss": 5.1965, + "step": 52150 + }, + { + "epoch": 0.22468911549106624, + "grad_norm": 1.5891069173812866, + "learning_rate": 9.952059935929251e-05, + "loss": 5.3408, + "step": 52200 + }, + { + "epoch": 0.22490433495034887, + "grad_norm": 1.3338673114776611, + "learning_rate": 9.9519664057242e-05, + "loss": 5.4763, + "step": 52250 + }, + { + "epoch": 0.2251195544096315, + "grad_norm": 1.7725980281829834, + "learning_rate": 9.951872784810528e-05, + "loss": 5.8133, + "step": 52300 + }, + { + "epoch": 0.22533477386891412, + "grad_norm": 1.7858965396881104, + "learning_rate": 9.951779073189948e-05, + "loss": 5.6091, + "step": 52350 + }, + { + "epoch": 0.22554999332819675, + "grad_norm": 1.6589215993881226, + "learning_rate": 9.951685270864177e-05, + "loss": 4.5695, + "step": 52400 + }, + { + "epoch": 0.22576521278747938, + "grad_norm": 2.358440399169922, + "learning_rate": 9.951591377834935e-05, + "loss": 5.1402, + "step": 52450 + }, + { + "epoch": 0.22598043224676204, + "grad_norm": 2.1982483863830566, + "learning_rate": 9.951497394103941e-05, + "loss": 5.2818, + "step": 52500 + }, + { + "epoch": 0.22619565170604466, + "grad_norm": 3.5349411964416504, + "learning_rate": 9.951403319672916e-05, + "loss": 4.7466, + "step": 52550 + }, + { + "epoch": 0.2264108711653273, + "grad_norm": 2.4478774070739746, + "learning_rate": 9.951309154543583e-05, + "loss": 5.6766, + "step": 52600 + }, + { + "epoch": 0.22662609062460992, + "grad_norm": 0.88774174451828, + "learning_rate": 9.951214898717669e-05, + "loss": 4.9294, + "step": 52650 + }, + { + "epoch": 0.22684131008389255, + "grad_norm": 1.0971379280090332, + "learning_rate": 9.951120552196898e-05, + "loss": 5.0739, + "step": 52700 + }, + { + "epoch": 0.22705652954317518, + "grad_norm": 3.6966500282287598, + "learning_rate": 9.951026114982999e-05, + "loss": 5.2595, + "step": 52750 + }, + { + "epoch": 0.2272717490024578, + "grad_norm": 2.4223926067352295, + "learning_rate": 9.950931587077702e-05, + "loss": 5.3861, + "step": 52800 + }, + { + "epoch": 0.22748696846174044, + "grad_norm": 1.356717586517334, + "learning_rate": 9.950836968482738e-05, + "loss": 5.4223, + "step": 52850 + }, + { + "epoch": 0.22770218792102306, + "grad_norm": 1.9854652881622314, + "learning_rate": 9.950742259199842e-05, + "loss": 4.9851, + "step": 52900 + }, + { + "epoch": 0.2279174073803057, + "grad_norm": 2.5869698524475098, + "learning_rate": 9.950647459230749e-05, + "loss": 5.1882, + "step": 52950 + }, + { + "epoch": 0.22813262683958832, + "grad_norm": 4.297923564910889, + "learning_rate": 9.950552568577192e-05, + "loss": 5.7327, + "step": 53000 + }, + { + "epoch": 0.22834784629887095, + "grad_norm": 1.5137077569961548, + "learning_rate": 9.950457587240911e-05, + "loss": 4.9542, + "step": 53050 + }, + { + "epoch": 0.22856306575815358, + "grad_norm": 2.350811719894409, + "learning_rate": 9.950362515223648e-05, + "loss": 5.0426, + "step": 53100 + }, + { + "epoch": 0.22877828521743623, + "grad_norm": 1.9890429973602295, + "learning_rate": 9.950267352527143e-05, + "loss": 5.2309, + "step": 53150 + }, + { + "epoch": 0.22899350467671886, + "grad_norm": 1.8076109886169434, + "learning_rate": 9.950172099153137e-05, + "loss": 5.4543, + "step": 53200 + }, + { + "epoch": 0.2292087241360015, + "grad_norm": 0.914721667766571, + "learning_rate": 9.950076755103376e-05, + "loss": 5.3142, + "step": 53250 + }, + { + "epoch": 0.22942394359528412, + "grad_norm": 1.5374040603637695, + "learning_rate": 9.949981320379611e-05, + "loss": 5.4784, + "step": 53300 + }, + { + "epoch": 0.22963916305456675, + "grad_norm": 3.259962558746338, + "learning_rate": 9.949885794983584e-05, + "loss": 5.0756, + "step": 53350 + }, + { + "epoch": 0.22985438251384938, + "grad_norm": 2.1574771404266357, + "learning_rate": 9.949790178917046e-05, + "loss": 5.4622, + "step": 53400 + }, + { + "epoch": 0.230069601973132, + "grad_norm": 1.8553476333618164, + "learning_rate": 9.949694472181752e-05, + "loss": 5.7068, + "step": 53450 + }, + { + "epoch": 0.23028482143241463, + "grad_norm": 1.483270287513733, + "learning_rate": 9.949598674779451e-05, + "loss": 5.4033, + "step": 53500 + }, + { + "epoch": 0.23050004089169726, + "grad_norm": 3.0344796180725098, + "learning_rate": 9.949502786711901e-05, + "loss": 5.0717, + "step": 53550 + }, + { + "epoch": 0.2307152603509799, + "grad_norm": 4.975027084350586, + "learning_rate": 9.949406807980856e-05, + "loss": 5.5745, + "step": 53600 + }, + { + "epoch": 0.23093047981026252, + "grad_norm": 1.039671778678894, + "learning_rate": 9.949310738588075e-05, + "loss": 5.6906, + "step": 53650 + }, + { + "epoch": 0.23114569926954515, + "grad_norm": 5.070066928863525, + "learning_rate": 9.949214578535318e-05, + "loss": 4.9257, + "step": 53700 + }, + { + "epoch": 0.23136091872882777, + "grad_norm": 0.5672413110733032, + "learning_rate": 9.949118327824347e-05, + "loss": 4.6967, + "step": 53750 + }, + { + "epoch": 0.2315761381881104, + "grad_norm": 2.615105628967285, + "learning_rate": 9.949021986456923e-05, + "loss": 4.9555, + "step": 53800 + }, + { + "epoch": 0.23179135764739306, + "grad_norm": 2.205103874206543, + "learning_rate": 9.948925554434813e-05, + "loss": 5.3738, + "step": 53850 + }, + { + "epoch": 0.2320065771066757, + "grad_norm": 1.4922044277191162, + "learning_rate": 9.948829031759782e-05, + "loss": 5.5516, + "step": 53900 + }, + { + "epoch": 0.23222179656595832, + "grad_norm": 1.8403173685073853, + "learning_rate": 9.9487324184336e-05, + "loss": 5.0703, + "step": 53950 + }, + { + "epoch": 0.23243701602524094, + "grad_norm": 1.8872774839401245, + "learning_rate": 9.948635714458034e-05, + "loss": 5.2538, + "step": 54000 + }, + { + "epoch": 0.23243701602524094, + "eval_loss": 5.75490140914917, + "eval_runtime": 35.0486, + "eval_samples_per_second": 18.26, + "eval_steps_per_second": 9.13, + "eval_tts_loss": 6.34618488260096, + "step": 54000 + }, + { + "epoch": 0.23265223548452357, + "grad_norm": 1.8977750539779663, + "learning_rate": 9.948538919834857e-05, + "loss": 5.375, + "step": 54050 + }, + { + "epoch": 0.2328674549438062, + "grad_norm": 3.526210308074951, + "learning_rate": 9.948442034565841e-05, + "loss": 5.4403, + "step": 54100 + }, + { + "epoch": 0.23308267440308883, + "grad_norm": 2.4231479167938232, + "learning_rate": 9.948345058652763e-05, + "loss": 5.6837, + "step": 54150 + }, + { + "epoch": 0.23329789386237146, + "grad_norm": 0.7686912417411804, + "learning_rate": 9.948247992097397e-05, + "loss": 5.5027, + "step": 54200 + }, + { + "epoch": 0.2335131133216541, + "grad_norm": 1.5065789222717285, + "learning_rate": 9.948150834901522e-05, + "loss": 4.9506, + "step": 54250 + }, + { + "epoch": 0.23372833278093672, + "grad_norm": 2.2319328784942627, + "learning_rate": 9.948053587066918e-05, + "loss": 4.9884, + "step": 54300 + }, + { + "epoch": 0.23394355224021934, + "grad_norm": 1.5924419164657593, + "learning_rate": 9.947956248595366e-05, + "loss": 5.3245, + "step": 54350 + }, + { + "epoch": 0.23415877169950197, + "grad_norm": 2.7995688915252686, + "learning_rate": 9.947858819488649e-05, + "loss": 5.8429, + "step": 54400 + }, + { + "epoch": 0.2343739911587846, + "grad_norm": 2.437288999557495, + "learning_rate": 9.94776129974855e-05, + "loss": 5.153, + "step": 54450 + }, + { + "epoch": 0.23458921061806726, + "grad_norm": 2.125924587249756, + "learning_rate": 9.94766368937686e-05, + "loss": 5.267, + "step": 54500 + }, + { + "epoch": 0.23480443007734988, + "grad_norm": 1.672737956047058, + "learning_rate": 9.947565988375362e-05, + "loss": 5.4251, + "step": 54550 + }, + { + "epoch": 0.2350196495366325, + "grad_norm": 1.26529061794281, + "learning_rate": 9.947468196745849e-05, + "loss": 5.2057, + "step": 54600 + }, + { + "epoch": 0.23523486899591514, + "grad_norm": 3.162121057510376, + "learning_rate": 9.947370314490108e-05, + "loss": 5.4549, + "step": 54650 + }, + { + "epoch": 0.23545008845519777, + "grad_norm": 2.607717752456665, + "learning_rate": 9.947272341609938e-05, + "loss": 5.3252, + "step": 54700 + }, + { + "epoch": 0.2356653079144804, + "grad_norm": 1.128678560256958, + "learning_rate": 9.947174278107129e-05, + "loss": 5.3561, + "step": 54750 + }, + { + "epoch": 0.23588052737376303, + "grad_norm": 2.4357430934906006, + "learning_rate": 9.94707612398348e-05, + "loss": 5.2771, + "step": 54800 + }, + { + "epoch": 0.23609574683304566, + "grad_norm": 1.5515512228012085, + "learning_rate": 9.946977879240786e-05, + "loss": 5.7654, + "step": 54850 + }, + { + "epoch": 0.23631096629232828, + "grad_norm": 0.7914877533912659, + "learning_rate": 9.946879543880852e-05, + "loss": 5.3366, + "step": 54900 + }, + { + "epoch": 0.2365261857516109, + "grad_norm": 1.5811545848846436, + "learning_rate": 9.946781117905473e-05, + "loss": 5.2657, + "step": 54950 + }, + { + "epoch": 0.23674140521089354, + "grad_norm": 3.4460389614105225, + "learning_rate": 9.946682601316455e-05, + "loss": 5.2544, + "step": 55000 + }, + { + "epoch": 0.23695662467017617, + "grad_norm": 1.7497141361236572, + "learning_rate": 9.946583994115601e-05, + "loss": 5.631, + "step": 55050 + }, + { + "epoch": 0.2371718441294588, + "grad_norm": 3.0559229850769043, + "learning_rate": 9.946485296304722e-05, + "loss": 5.2234, + "step": 55100 + }, + { + "epoch": 0.23738706358874143, + "grad_norm": 5.328866481781006, + "learning_rate": 9.946386507885618e-05, + "loss": 4.7718, + "step": 55150 + }, + { + "epoch": 0.23760228304802408, + "grad_norm": 1.973453164100647, + "learning_rate": 9.946287628860107e-05, + "loss": 4.9236, + "step": 55200 + }, + { + "epoch": 0.2378175025073067, + "grad_norm": 3.221027135848999, + "learning_rate": 9.946188659229994e-05, + "loss": 5.0484, + "step": 55250 + }, + { + "epoch": 0.23803272196658934, + "grad_norm": 2.2229065895080566, + "learning_rate": 9.946089598997095e-05, + "loss": 5.2718, + "step": 55300 + }, + { + "epoch": 0.23824794142587197, + "grad_norm": 2.102578639984131, + "learning_rate": 9.945990448163223e-05, + "loss": 4.9221, + "step": 55350 + }, + { + "epoch": 0.2384631608851546, + "grad_norm": 1.6233347654342651, + "learning_rate": 9.945891206730196e-05, + "loss": 5.4152, + "step": 55400 + }, + { + "epoch": 0.23867838034443722, + "grad_norm": 2.0331990718841553, + "learning_rate": 9.94579187469983e-05, + "loss": 5.4043, + "step": 55450 + }, + { + "epoch": 0.23889359980371985, + "grad_norm": 2.750063180923462, + "learning_rate": 9.945692452073947e-05, + "loss": 5.5098, + "step": 55500 + }, + { + "epoch": 0.23910881926300248, + "grad_norm": 1.4155449867248535, + "learning_rate": 9.945592938854364e-05, + "loss": 5.6684, + "step": 55550 + }, + { + "epoch": 0.2393240387222851, + "grad_norm": 1.368462324142456, + "learning_rate": 9.945493335042908e-05, + "loss": 5.2724, + "step": 55600 + }, + { + "epoch": 0.23953925818156774, + "grad_norm": 2.986207962036133, + "learning_rate": 9.945393640641402e-05, + "loss": 5.5655, + "step": 55650 + }, + { + "epoch": 0.23975447764085037, + "grad_norm": 1.4440394639968872, + "learning_rate": 9.945293855651672e-05, + "loss": 4.9547, + "step": 55700 + }, + { + "epoch": 0.239969697100133, + "grad_norm": 1.4259644746780396, + "learning_rate": 9.945193980075544e-05, + "loss": 5.0071, + "step": 55750 + }, + { + "epoch": 0.24018491655941562, + "grad_norm": 2.6879870891571045, + "learning_rate": 9.94509401391485e-05, + "loss": 5.5767, + "step": 55800 + }, + { + "epoch": 0.24040013601869828, + "grad_norm": 3.495375156402588, + "learning_rate": 9.944993957171423e-05, + "loss": 5.3094, + "step": 55850 + }, + { + "epoch": 0.2406153554779809, + "grad_norm": 3.3842592239379883, + "learning_rate": 9.944893809847091e-05, + "loss": 5.3613, + "step": 55900 + }, + { + "epoch": 0.24083057493726354, + "grad_norm": 4.214946746826172, + "learning_rate": 9.944793571943691e-05, + "loss": 5.2223, + "step": 55950 + }, + { + "epoch": 0.24104579439654616, + "grad_norm": 2.5203423500061035, + "learning_rate": 9.94469324346306e-05, + "loss": 5.205, + "step": 56000 + }, + { + "epoch": 0.2412610138558288, + "grad_norm": 2.954901695251465, + "learning_rate": 9.944592824407035e-05, + "loss": 5.4582, + "step": 56050 + }, + { + "epoch": 0.24147623331511142, + "grad_norm": 2.6618287563323975, + "learning_rate": 9.944492314777454e-05, + "loss": 5.1492, + "step": 56100 + }, + { + "epoch": 0.24169145277439405, + "grad_norm": 0.7004172801971436, + "learning_rate": 9.94439171457616e-05, + "loss": 5.4037, + "step": 56150 + }, + { + "epoch": 0.24190667223367668, + "grad_norm": 1.261536955833435, + "learning_rate": 9.944291023804995e-05, + "loss": 5.0977, + "step": 56200 + }, + { + "epoch": 0.2421218916929593, + "grad_norm": 1.9998053312301636, + "learning_rate": 9.944190242465802e-05, + "loss": 4.732, + "step": 56250 + }, + { + "epoch": 0.24233711115224194, + "grad_norm": 2.1535086631774902, + "learning_rate": 9.94408937056043e-05, + "loss": 4.9951, + "step": 56300 + }, + { + "epoch": 0.24255233061152456, + "grad_norm": 0.6176988482475281, + "learning_rate": 9.943988408090726e-05, + "loss": 4.9282, + "step": 56350 + }, + { + "epoch": 0.2427675500708072, + "grad_norm": 1.9532209634780884, + "learning_rate": 9.943887355058536e-05, + "loss": 4.9962, + "step": 56400 + }, + { + "epoch": 0.24298276953008982, + "grad_norm": 3.0574049949645996, + "learning_rate": 9.943786211465716e-05, + "loss": 4.8592, + "step": 56450 + }, + { + "epoch": 0.24319798898937248, + "grad_norm": 1.6149346828460693, + "learning_rate": 9.943684977314116e-05, + "loss": 5.3563, + "step": 56500 + }, + { + "epoch": 0.2434132084486551, + "grad_norm": 1.4772765636444092, + "learning_rate": 9.94358365260559e-05, + "loss": 5.1515, + "step": 56550 + }, + { + "epoch": 0.24362842790793773, + "grad_norm": 1.9287935495376587, + "learning_rate": 9.943482237341996e-05, + "loss": 5.3981, + "step": 56600 + }, + { + "epoch": 0.24384364736722036, + "grad_norm": 1.1024911403656006, + "learning_rate": 9.943380731525189e-05, + "loss": 5.3213, + "step": 56650 + }, + { + "epoch": 0.244058866826503, + "grad_norm": 1.4031673669815063, + "learning_rate": 9.943279135157033e-05, + "loss": 5.6008, + "step": 56700 + }, + { + "epoch": 0.24427408628578562, + "grad_norm": 2.175830125808716, + "learning_rate": 9.943177448239383e-05, + "loss": 4.9379, + "step": 56750 + }, + { + "epoch": 0.24448930574506825, + "grad_norm": 2.348788261413574, + "learning_rate": 9.943075670774105e-05, + "loss": 5.4513, + "step": 56800 + }, + { + "epoch": 0.24470452520435088, + "grad_norm": 2.708932638168335, + "learning_rate": 9.942973802763065e-05, + "loss": 4.9419, + "step": 56850 + }, + { + "epoch": 0.2449197446636335, + "grad_norm": 4.615316867828369, + "learning_rate": 9.942871844208124e-05, + "loss": 5.3054, + "step": 56900 + }, + { + "epoch": 0.24513496412291613, + "grad_norm": 1.204506754875183, + "learning_rate": 9.942769795111156e-05, + "loss": 5.003, + "step": 56950 + }, + { + "epoch": 0.24535018358219876, + "grad_norm": 2.089003801345825, + "learning_rate": 9.942667655474024e-05, + "loss": 4.9656, + "step": 57000 + }, + { + "epoch": 0.24535018358219876, + "eval_loss": 5.7350873947143555, + "eval_runtime": 34.9543, + "eval_samples_per_second": 18.31, + "eval_steps_per_second": 9.155, + "eval_tts_loss": 6.332701958575566, + "step": 57000 + }, + { + "epoch": 0.2455654030414814, + "grad_norm": 1.2268033027648926, + "learning_rate": 9.942565425298602e-05, + "loss": 5.123, + "step": 57050 + }, + { + "epoch": 0.24578062250076402, + "grad_norm": 1.8592723608016968, + "learning_rate": 9.942463104586764e-05, + "loss": 5.6979, + "step": 57100 + }, + { + "epoch": 0.24599584196004665, + "grad_norm": 0.5017115473747253, + "learning_rate": 9.942360693340382e-05, + "loss": 5.3483, + "step": 57150 + }, + { + "epoch": 0.2462110614193293, + "grad_norm": 1.3968095779418945, + "learning_rate": 9.942258191561332e-05, + "loss": 5.4849, + "step": 57200 + }, + { + "epoch": 0.24642628087861193, + "grad_norm": 0.5747193098068237, + "learning_rate": 9.942155599251492e-05, + "loss": 5.2929, + "step": 57250 + }, + { + "epoch": 0.24664150033789456, + "grad_norm": 2.3296024799346924, + "learning_rate": 9.942052916412743e-05, + "loss": 5.3351, + "step": 57300 + }, + { + "epoch": 0.2468567197971772, + "grad_norm": 5.077657699584961, + "learning_rate": 9.941950143046965e-05, + "loss": 4.8084, + "step": 57350 + }, + { + "epoch": 0.24707193925645982, + "grad_norm": 2.5830864906311035, + "learning_rate": 9.941847279156038e-05, + "loss": 5.0984, + "step": 57400 + }, + { + "epoch": 0.24728715871574244, + "grad_norm": 4.606936454772949, + "learning_rate": 9.941744324741849e-05, + "loss": 5.3343, + "step": 57450 + }, + { + "epoch": 0.24750237817502507, + "grad_norm": 0.628861665725708, + "learning_rate": 9.941641279806285e-05, + "loss": 5.4058, + "step": 57500 + }, + { + "epoch": 0.2477175976343077, + "grad_norm": 2.0351967811584473, + "learning_rate": 9.941538144351228e-05, + "loss": 4.9985, + "step": 57550 + }, + { + "epoch": 0.24793281709359033, + "grad_norm": 2.155299425125122, + "learning_rate": 9.941434918378573e-05, + "loss": 5.3191, + "step": 57600 + }, + { + "epoch": 0.24814803655287296, + "grad_norm": 2.5731594562530518, + "learning_rate": 9.941331601890209e-05, + "loss": 5.6035, + "step": 57650 + }, + { + "epoch": 0.2483632560121556, + "grad_norm": 3.6012821197509766, + "learning_rate": 9.941228194888028e-05, + "loss": 5.232, + "step": 57700 + }, + { + "epoch": 0.24857847547143821, + "grad_norm": 1.3827499151229858, + "learning_rate": 9.941124697373924e-05, + "loss": 5.3183, + "step": 57750 + }, + { + "epoch": 0.24879369493072084, + "grad_norm": 4.1097731590271, + "learning_rate": 9.941021109349793e-05, + "loss": 5.1746, + "step": 57800 + }, + { + "epoch": 0.2490089143900035, + "grad_norm": 2.089738607406616, + "learning_rate": 9.940917430817534e-05, + "loss": 5.2384, + "step": 57850 + }, + { + "epoch": 0.24922413384928613, + "grad_norm": 1.0719624757766724, + "learning_rate": 9.940813661779045e-05, + "loss": 4.8462, + "step": 57900 + }, + { + "epoch": 0.24943935330856876, + "grad_norm": 2.2489209175109863, + "learning_rate": 9.940709802236226e-05, + "loss": 5.4476, + "step": 57950 + }, + { + "epoch": 0.24965457276785138, + "grad_norm": 1.7692285776138306, + "learning_rate": 9.940605852190979e-05, + "loss": 5.2415, + "step": 58000 + }, + { + "epoch": 0.249869792227134, + "grad_norm": 2.677029609680176, + "learning_rate": 9.94050181164521e-05, + "loss": 5.476, + "step": 58050 + }, + { + "epoch": 0.2500850116864166, + "grad_norm": 0.8494945168495178, + "learning_rate": 9.940397680600825e-05, + "loss": 5.1492, + "step": 58100 + }, + { + "epoch": 0.25030023114569927, + "grad_norm": 1.2847857475280762, + "learning_rate": 9.94029345905973e-05, + "loss": 5.5469, + "step": 58150 + }, + { + "epoch": 0.2505154506049819, + "grad_norm": 2.8374667167663574, + "learning_rate": 9.940189147023835e-05, + "loss": 5.0331, + "step": 58200 + }, + { + "epoch": 0.2507306700642645, + "grad_norm": 2.4275097846984863, + "learning_rate": 9.940084744495051e-05, + "loss": 5.2968, + "step": 58250 + }, + { + "epoch": 0.2509458895235472, + "grad_norm": 1.427088737487793, + "learning_rate": 9.939980251475288e-05, + "loss": 5.0285, + "step": 58300 + }, + { + "epoch": 0.2511611089828298, + "grad_norm": 1.8256670236587524, + "learning_rate": 9.939875667966463e-05, + "loss": 5.1116, + "step": 58350 + }, + { + "epoch": 0.25137632844211244, + "grad_norm": 2.046734571456909, + "learning_rate": 9.939770993970491e-05, + "loss": 5.3619, + "step": 58400 + }, + { + "epoch": 0.25159154790139504, + "grad_norm": 1.3059499263763428, + "learning_rate": 9.939666229489289e-05, + "loss": 5.1999, + "step": 58450 + }, + { + "epoch": 0.2518067673606777, + "grad_norm": 2.30973744392395, + "learning_rate": 9.939561374524775e-05, + "loss": 5.6257, + "step": 58500 + }, + { + "epoch": 0.2520219868199603, + "grad_norm": 2.127963066101074, + "learning_rate": 9.93945642907887e-05, + "loss": 5.7624, + "step": 58550 + }, + { + "epoch": 0.25223720627924295, + "grad_norm": 1.5844368934631348, + "learning_rate": 9.9393513931535e-05, + "loss": 5.3548, + "step": 58600 + }, + { + "epoch": 0.25245242573852555, + "grad_norm": 1.6759587526321411, + "learning_rate": 9.939246266750584e-05, + "loss": 5.5066, + "step": 58650 + }, + { + "epoch": 0.2526676451978082, + "grad_norm": 2.0369479656219482, + "learning_rate": 9.939141049872049e-05, + "loss": 4.8633, + "step": 58700 + }, + { + "epoch": 0.2528828646570908, + "grad_norm": 3.320176124572754, + "learning_rate": 9.939035742519825e-05, + "loss": 5.0933, + "step": 58750 + }, + { + "epoch": 0.25309808411637347, + "grad_norm": 1.9914627075195312, + "learning_rate": 9.938930344695839e-05, + "loss": 5.0645, + "step": 58800 + }, + { + "epoch": 0.2533133035756561, + "grad_norm": 1.9133009910583496, + "learning_rate": 9.938824856402021e-05, + "loss": 5.5041, + "step": 58850 + }, + { + "epoch": 0.2535285230349387, + "grad_norm": 1.5282642841339111, + "learning_rate": 9.938719277640305e-05, + "loss": 5.3017, + "step": 58900 + }, + { + "epoch": 0.2537437424942214, + "grad_norm": 2.809129238128662, + "learning_rate": 9.938613608412624e-05, + "loss": 5.435, + "step": 58950 + }, + { + "epoch": 0.253958961953504, + "grad_norm": 1.3909580707550049, + "learning_rate": 9.938507848720912e-05, + "loss": 5.0936, + "step": 59000 + }, + { + "epoch": 0.25417418141278664, + "grad_norm": 1.622314214706421, + "learning_rate": 9.93840199856711e-05, + "loss": 5.6459, + "step": 59050 + }, + { + "epoch": 0.25438940087206924, + "grad_norm": 1.5051627159118652, + "learning_rate": 9.938296057953153e-05, + "loss": 5.8814, + "step": 59100 + }, + { + "epoch": 0.2546046203313519, + "grad_norm": 0.9158236384391785, + "learning_rate": 9.938190026880985e-05, + "loss": 4.7402, + "step": 59150 + }, + { + "epoch": 0.2548198397906345, + "grad_norm": 2.085770845413208, + "learning_rate": 9.938083905352547e-05, + "loss": 5.2057, + "step": 59200 + }, + { + "epoch": 0.25503505924991715, + "grad_norm": 0.3976602554321289, + "learning_rate": 9.93797769336978e-05, + "loss": 4.9201, + "step": 59250 + }, + { + "epoch": 0.25525027870919975, + "grad_norm": 1.5133053064346313, + "learning_rate": 9.937871390934635e-05, + "loss": 4.9312, + "step": 59300 + }, + { + "epoch": 0.2554654981684824, + "grad_norm": 4.599536418914795, + "learning_rate": 9.937764998049054e-05, + "loss": 5.1882, + "step": 59350 + }, + { + "epoch": 0.255680717627765, + "grad_norm": 1.679758071899414, + "learning_rate": 9.93765851471499e-05, + "loss": 5.0733, + "step": 59400 + }, + { + "epoch": 0.25589593708704766, + "grad_norm": 1.532802939414978, + "learning_rate": 9.937551940934391e-05, + "loss": 5.5591, + "step": 59450 + }, + { + "epoch": 0.25611115654633027, + "grad_norm": 1.4076752662658691, + "learning_rate": 9.93744527670921e-05, + "loss": 5.5561, + "step": 59500 + }, + { + "epoch": 0.2563263760056129, + "grad_norm": 1.4878861904144287, + "learning_rate": 9.9373385220414e-05, + "loss": 4.7665, + "step": 59550 + }, + { + "epoch": 0.2565415954648956, + "grad_norm": 1.4673134088516235, + "learning_rate": 9.937231676932919e-05, + "loss": 5.0129, + "step": 59600 + }, + { + "epoch": 0.2567568149241782, + "grad_norm": 3.34812068939209, + "learning_rate": 9.93712474138572e-05, + "loss": 5.4084, + "step": 59650 + }, + { + "epoch": 0.25697203438346083, + "grad_norm": 1.9143149852752686, + "learning_rate": 9.937017715401766e-05, + "loss": 4.865, + "step": 59700 + }, + { + "epoch": 0.25718725384274344, + "grad_norm": 2.1645452976226807, + "learning_rate": 9.936910598983016e-05, + "loss": 5.3338, + "step": 59750 + }, + { + "epoch": 0.2574024733020261, + "grad_norm": 1.8841525316238403, + "learning_rate": 9.936803392131431e-05, + "loss": 5.4878, + "step": 59800 + }, + { + "epoch": 0.2576176927613087, + "grad_norm": 4.178133487701416, + "learning_rate": 9.936696094848976e-05, + "loss": 4.9176, + "step": 59850 + }, + { + "epoch": 0.25783291222059135, + "grad_norm": 2.4886066913604736, + "learning_rate": 9.936588707137616e-05, + "loss": 5.1001, + "step": 59900 + }, + { + "epoch": 0.25804813167987395, + "grad_norm": 1.1488391160964966, + "learning_rate": 9.93648122899932e-05, + "loss": 4.6423, + "step": 59950 + }, + { + "epoch": 0.2582633511391566, + "grad_norm": 2.937607526779175, + "learning_rate": 9.936373660436052e-05, + "loss": 5.1468, + "step": 60000 + }, + { + "epoch": 0.2582633511391566, + "eval_loss": 5.714274883270264, + "eval_runtime": 34.8451, + "eval_samples_per_second": 18.367, + "eval_steps_per_second": 9.184, + "eval_tts_loss": 6.312542254878453, + "step": 60000 + }, + { + "epoch": 0.2584785705984392, + "grad_norm": 1.4372106790542603, + "learning_rate": 9.936266001449786e-05, + "loss": 5.0108, + "step": 60050 + }, + { + "epoch": 0.25869379005772186, + "grad_norm": 2.5032851696014404, + "learning_rate": 9.936158252042495e-05, + "loss": 5.2574, + "step": 60100 + }, + { + "epoch": 0.25890900951700446, + "grad_norm": 2.45723819732666, + "learning_rate": 9.93605041221615e-05, + "loss": 5.5347, + "step": 60150 + }, + { + "epoch": 0.2591242289762871, + "grad_norm": 2.042066812515259, + "learning_rate": 9.935942481972727e-05, + "loss": 5.1972, + "step": 60200 + }, + { + "epoch": 0.2593394484355698, + "grad_norm": 3.867236852645874, + "learning_rate": 9.935834461314204e-05, + "loss": 5.4191, + "step": 60250 + }, + { + "epoch": 0.2595546678948524, + "grad_norm": 3.326977014541626, + "learning_rate": 9.935726350242559e-05, + "loss": 5.0745, + "step": 60300 + }, + { + "epoch": 0.25976988735413503, + "grad_norm": 2.249227285385132, + "learning_rate": 9.935618148759774e-05, + "loss": 5.1524, + "step": 60350 + }, + { + "epoch": 0.25998510681341763, + "grad_norm": 3.1650679111480713, + "learning_rate": 9.935509856867827e-05, + "loss": 5.1183, + "step": 60400 + }, + { + "epoch": 0.2602003262727003, + "grad_norm": 1.5792036056518555, + "learning_rate": 9.935401474568706e-05, + "loss": 5.7804, + "step": 60450 + }, + { + "epoch": 0.2604155457319829, + "grad_norm": 2.578972101211548, + "learning_rate": 9.935293001864394e-05, + "loss": 4.9662, + "step": 60500 + }, + { + "epoch": 0.26063076519126555, + "grad_norm": 2.553814649581909, + "learning_rate": 9.93518443875688e-05, + "loss": 5.473, + "step": 60550 + }, + { + "epoch": 0.26084598465054815, + "grad_norm": 3.0505259037017822, + "learning_rate": 9.93507578524815e-05, + "loss": 5.8697, + "step": 60600 + }, + { + "epoch": 0.2610612041098308, + "grad_norm": 2.346491575241089, + "learning_rate": 9.934967041340193e-05, + "loss": 5.5243, + "step": 60650 + }, + { + "epoch": 0.2612764235691134, + "grad_norm": 2.130540370941162, + "learning_rate": 9.934858207035007e-05, + "loss": 5.1534, + "step": 60700 + }, + { + "epoch": 0.26149164302839606, + "grad_norm": 2.051178455352783, + "learning_rate": 9.93474928233458e-05, + "loss": 5.1202, + "step": 60750 + }, + { + "epoch": 0.26170686248767866, + "grad_norm": 1.9480808973312378, + "learning_rate": 9.934640267240908e-05, + "loss": 4.7985, + "step": 60800 + }, + { + "epoch": 0.2619220819469613, + "grad_norm": 1.59281325340271, + "learning_rate": 9.934531161755993e-05, + "loss": 4.9366, + "step": 60850 + }, + { + "epoch": 0.26213730140624397, + "grad_norm": 0.5018999576568604, + "learning_rate": 9.934421965881828e-05, + "loss": 5.5054, + "step": 60900 + }, + { + "epoch": 0.2623525208655266, + "grad_norm": 2.5710151195526123, + "learning_rate": 9.934312679620414e-05, + "loss": 5.7211, + "step": 60950 + }, + { + "epoch": 0.26256774032480923, + "grad_norm": 0.7192572355270386, + "learning_rate": 9.934203302973754e-05, + "loss": 5.4212, + "step": 61000 + }, + { + "epoch": 0.26278295978409183, + "grad_norm": 1.5791642665863037, + "learning_rate": 9.934093835943851e-05, + "loss": 5.1539, + "step": 61050 + }, + { + "epoch": 0.2629981792433745, + "grad_norm": 2.0854594707489014, + "learning_rate": 9.933984278532711e-05, + "loss": 5.6082, + "step": 61100 + }, + { + "epoch": 0.2632133987026571, + "grad_norm": 2.2452282905578613, + "learning_rate": 9.933874630742339e-05, + "loss": 5.2305, + "step": 61150 + }, + { + "epoch": 0.26342861816193974, + "grad_norm": 2.8415367603302, + "learning_rate": 9.933764892574746e-05, + "loss": 5.4377, + "step": 61200 + }, + { + "epoch": 0.26364383762122234, + "grad_norm": 1.5109225511550903, + "learning_rate": 9.933655064031942e-05, + "loss": 5.3821, + "step": 61250 + }, + { + "epoch": 0.263859057080505, + "grad_norm": 1.4373362064361572, + "learning_rate": 9.933545145115937e-05, + "loss": 5.1291, + "step": 61300 + }, + { + "epoch": 0.2640742765397876, + "grad_norm": 2.224710702896118, + "learning_rate": 9.933435135828743e-05, + "loss": 5.3489, + "step": 61350 + }, + { + "epoch": 0.26428949599907026, + "grad_norm": 2.713477373123169, + "learning_rate": 9.93332503617238e-05, + "loss": 4.9196, + "step": 61400 + }, + { + "epoch": 0.26450471545835286, + "grad_norm": 1.1444673538208008, + "learning_rate": 9.933214846148862e-05, + "loss": 4.8458, + "step": 61450 + }, + { + "epoch": 0.2647199349176355, + "grad_norm": 0.31838956475257874, + "learning_rate": 9.933104565760208e-05, + "loss": 5.0872, + "step": 61500 + }, + { + "epoch": 0.26493515437691817, + "grad_norm": 1.6787073612213135, + "learning_rate": 9.932994195008436e-05, + "loss": 4.6267, + "step": 61550 + }, + { + "epoch": 0.26515037383620077, + "grad_norm": 1.487918496131897, + "learning_rate": 9.93288373389557e-05, + "loss": 5.5152, + "step": 61600 + }, + { + "epoch": 0.2653655932954834, + "grad_norm": 0.6379329562187195, + "learning_rate": 9.932773182423631e-05, + "loss": 4.6375, + "step": 61650 + }, + { + "epoch": 0.265580812754766, + "grad_norm": 1.560736894607544, + "learning_rate": 9.932662540594648e-05, + "loss": 4.9455, + "step": 61700 + }, + { + "epoch": 0.2657960322140487, + "grad_norm": 2.8204092979431152, + "learning_rate": 9.932551808410645e-05, + "loss": 4.7428, + "step": 61750 + }, + { + "epoch": 0.2660112516733313, + "grad_norm": 2.4071149826049805, + "learning_rate": 9.932440985873651e-05, + "loss": 5.0994, + "step": 61800 + }, + { + "epoch": 0.26622647113261394, + "grad_norm": 1.6903042793273926, + "learning_rate": 9.932330072985696e-05, + "loss": 5.0547, + "step": 61850 + }, + { + "epoch": 0.26644169059189654, + "grad_norm": 1.5531368255615234, + "learning_rate": 9.932219069748811e-05, + "loss": 4.9196, + "step": 61900 + }, + { + "epoch": 0.2666569100511792, + "grad_norm": 4.349738597869873, + "learning_rate": 9.932107976165031e-05, + "loss": 5.5492, + "step": 61950 + }, + { + "epoch": 0.2668721295104618, + "grad_norm": 1.8932794332504272, + "learning_rate": 9.931996792236387e-05, + "loss": 4.8417, + "step": 62000 + }, + { + "epoch": 0.26708734896974445, + "grad_norm": 1.2642627954483032, + "learning_rate": 9.931885517964921e-05, + "loss": 5.3965, + "step": 62050 + }, + { + "epoch": 0.26730256842902705, + "grad_norm": 3.7117292881011963, + "learning_rate": 9.931774153352669e-05, + "loss": 5.4896, + "step": 62100 + }, + { + "epoch": 0.2675177878883097, + "grad_norm": 1.8055529594421387, + "learning_rate": 9.931662698401669e-05, + "loss": 5.4623, + "step": 62150 + }, + { + "epoch": 0.26773300734759237, + "grad_norm": 1.2677342891693115, + "learning_rate": 9.931551153113965e-05, + "loss": 5.2569, + "step": 62200 + }, + { + "epoch": 0.26794822680687497, + "grad_norm": 2.0716171264648438, + "learning_rate": 9.9314395174916e-05, + "loss": 5.0339, + "step": 62250 + }, + { + "epoch": 0.2681634462661576, + "grad_norm": 4.138665199279785, + "learning_rate": 9.931327791536619e-05, + "loss": 5.562, + "step": 62300 + }, + { + "epoch": 0.2683786657254402, + "grad_norm": 2.2694361209869385, + "learning_rate": 9.931215975251067e-05, + "loss": 4.9839, + "step": 62350 + }, + { + "epoch": 0.2685938851847229, + "grad_norm": 1.4361618757247925, + "learning_rate": 9.931104068636993e-05, + "loss": 5.586, + "step": 62400 + }, + { + "epoch": 0.2688091046440055, + "grad_norm": 1.952954649925232, + "learning_rate": 9.930992071696448e-05, + "loss": 5.2026, + "step": 62450 + }, + { + "epoch": 0.26902432410328814, + "grad_norm": 4.103116512298584, + "learning_rate": 9.930879984431482e-05, + "loss": 4.8111, + "step": 62500 + }, + { + "epoch": 0.26923954356257074, + "grad_norm": 1.9213756322860718, + "learning_rate": 9.93076780684415e-05, + "loss": 5.2023, + "step": 62550 + }, + { + "epoch": 0.2694547630218534, + "grad_norm": 1.1413936614990234, + "learning_rate": 9.930655538936505e-05, + "loss": 5.3314, + "step": 62600 + }, + { + "epoch": 0.269669982481136, + "grad_norm": 0.8136217594146729, + "learning_rate": 9.930543180710603e-05, + "loss": 5.046, + "step": 62650 + }, + { + "epoch": 0.26988520194041865, + "grad_norm": 1.8434231281280518, + "learning_rate": 9.930430732168504e-05, + "loss": 5.1236, + "step": 62700 + }, + { + "epoch": 0.27010042139970125, + "grad_norm": 1.980948567390442, + "learning_rate": 9.930318193312267e-05, + "loss": 5.2383, + "step": 62750 + }, + { + "epoch": 0.2703156408589839, + "grad_norm": 2.6044650077819824, + "learning_rate": 9.930205564143954e-05, + "loss": 4.9233, + "step": 62800 + }, + { + "epoch": 0.27053086031826656, + "grad_norm": 1.6596988439559937, + "learning_rate": 9.930092844665628e-05, + "loss": 5.2296, + "step": 62850 + }, + { + "epoch": 0.27074607977754916, + "grad_norm": 1.9009202718734741, + "learning_rate": 9.929980034879353e-05, + "loss": 5.3758, + "step": 62900 + }, + { + "epoch": 0.2709612992368318, + "grad_norm": 2.876044511795044, + "learning_rate": 9.929867134787196e-05, + "loss": 5.6572, + "step": 62950 + }, + { + "epoch": 0.2711765186961144, + "grad_norm": 1.5896497964859009, + "learning_rate": 9.929754144391223e-05, + "loss": 4.9539, + "step": 63000 + }, + { + "epoch": 0.2711765186961144, + "eval_loss": 5.693178653717041, + "eval_runtime": 35.0767, + "eval_samples_per_second": 18.246, + "eval_steps_per_second": 9.123, + "eval_tts_loss": 6.412921987635079, + "step": 63000 + }, + { + "epoch": 0.2713917381553971, + "grad_norm": 3.7784128189086914, + "learning_rate": 9.929641063693509e-05, + "loss": 5.3002, + "step": 63050 + }, + { + "epoch": 0.2716069576146797, + "grad_norm": 2.250643014907837, + "learning_rate": 9.92952789269612e-05, + "loss": 5.5092, + "step": 63100 + }, + { + "epoch": 0.27182217707396233, + "grad_norm": 2.394786834716797, + "learning_rate": 9.929414631401131e-05, + "loss": 5.025, + "step": 63150 + }, + { + "epoch": 0.27203739653324494, + "grad_norm": 2.526033878326416, + "learning_rate": 9.929301279810616e-05, + "loss": 5.2769, + "step": 63200 + }, + { + "epoch": 0.2722526159925276, + "grad_norm": 2.601335287094116, + "learning_rate": 9.929187837926655e-05, + "loss": 5.3217, + "step": 63250 + }, + { + "epoch": 0.2724678354518102, + "grad_norm": 3.790667772293091, + "learning_rate": 9.92907430575132e-05, + "loss": 5.031, + "step": 63300 + }, + { + "epoch": 0.27268305491109285, + "grad_norm": 1.754679799079895, + "learning_rate": 9.928960683286695e-05, + "loss": 5.0719, + "step": 63350 + }, + { + "epoch": 0.27289827437037545, + "grad_norm": 3.114264726638794, + "learning_rate": 9.928846970534859e-05, + "loss": 5.1811, + "step": 63400 + }, + { + "epoch": 0.2731134938296581, + "grad_norm": 1.6632746458053589, + "learning_rate": 9.928733167497897e-05, + "loss": 5.098, + "step": 63450 + }, + { + "epoch": 0.2733287132889407, + "grad_norm": 2.3255672454833984, + "learning_rate": 9.928619274177893e-05, + "loss": 5.2746, + "step": 63500 + }, + { + "epoch": 0.27354393274822336, + "grad_norm": 1.9656232595443726, + "learning_rate": 9.928505290576933e-05, + "loss": 5.4508, + "step": 63550 + }, + { + "epoch": 0.273759152207506, + "grad_norm": 2.095505714416504, + "learning_rate": 9.928391216697104e-05, + "loss": 5.3299, + "step": 63600 + }, + { + "epoch": 0.2739743716667886, + "grad_norm": 1.6140300035476685, + "learning_rate": 9.928277052540496e-05, + "loss": 5.3867, + "step": 63650 + }, + { + "epoch": 0.2741895911260713, + "grad_norm": 1.1519381999969482, + "learning_rate": 9.9281627981092e-05, + "loss": 4.9063, + "step": 63700 + }, + { + "epoch": 0.2744048105853539, + "grad_norm": 2.049403667449951, + "learning_rate": 9.92804845340531e-05, + "loss": 5.5286, + "step": 63750 + }, + { + "epoch": 0.27462003004463653, + "grad_norm": 0.8681288361549377, + "learning_rate": 9.92793401843092e-05, + "loss": 4.7444, + "step": 63800 + }, + { + "epoch": 0.27483524950391913, + "grad_norm": 2.5735480785369873, + "learning_rate": 9.927819493188126e-05, + "loss": 5.1348, + "step": 63850 + }, + { + "epoch": 0.2750504689632018, + "grad_norm": 0.8791064023971558, + "learning_rate": 9.927704877679027e-05, + "loss": 5.0984, + "step": 63900 + }, + { + "epoch": 0.2752656884224844, + "grad_norm": 2.847947597503662, + "learning_rate": 9.927590171905721e-05, + "loss": 5.2297, + "step": 63950 + }, + { + "epoch": 0.27548090788176705, + "grad_norm": 2.298250198364258, + "learning_rate": 9.92747537587031e-05, + "loss": 5.2002, + "step": 64000 + }, + { + "epoch": 0.27569612734104965, + "grad_norm": 1.6366026401519775, + "learning_rate": 9.927360489574895e-05, + "loss": 5.324, + "step": 64050 + }, + { + "epoch": 0.2759113468003323, + "grad_norm": 0.6945295333862305, + "learning_rate": 9.927245513021583e-05, + "loss": 4.7761, + "step": 64100 + }, + { + "epoch": 0.2761265662596149, + "grad_norm": 0.6995507478713989, + "learning_rate": 9.927130446212477e-05, + "loss": 5.3399, + "step": 64150 + }, + { + "epoch": 0.27634178571889756, + "grad_norm": 1.9427343606948853, + "learning_rate": 9.92701528914969e-05, + "loss": 5.1016, + "step": 64200 + }, + { + "epoch": 0.2765570051781802, + "grad_norm": 1.6938934326171875, + "learning_rate": 9.926900041835326e-05, + "loss": 5.1924, + "step": 64250 + }, + { + "epoch": 0.2767722246374628, + "grad_norm": 1.5238553285598755, + "learning_rate": 9.926784704271499e-05, + "loss": 5.6148, + "step": 64300 + }, + { + "epoch": 0.27698744409674547, + "grad_norm": 1.9351153373718262, + "learning_rate": 9.92666927646032e-05, + "loss": 4.6976, + "step": 64350 + }, + { + "epoch": 0.2772026635560281, + "grad_norm": 2.84324049949646, + "learning_rate": 9.926553758403906e-05, + "loss": 5.0453, + "step": 64400 + }, + { + "epoch": 0.27741788301531073, + "grad_norm": 2.5920162200927734, + "learning_rate": 9.92643815010437e-05, + "loss": 5.5748, + "step": 64450 + }, + { + "epoch": 0.27763310247459333, + "grad_norm": 3.726463556289673, + "learning_rate": 9.926322451563831e-05, + "loss": 5.4262, + "step": 64500 + }, + { + "epoch": 0.277848321933876, + "grad_norm": 2.2043912410736084, + "learning_rate": 9.92620666278441e-05, + "loss": 5.5953, + "step": 64550 + }, + { + "epoch": 0.2780635413931586, + "grad_norm": 1.6709778308868408, + "learning_rate": 9.926090783768225e-05, + "loss": 5.5626, + "step": 64600 + }, + { + "epoch": 0.27827876085244124, + "grad_norm": 6.487951278686523, + "learning_rate": 9.925974814517399e-05, + "loss": 5.3274, + "step": 64650 + }, + { + "epoch": 0.27849398031172384, + "grad_norm": 1.2043954133987427, + "learning_rate": 9.925858755034059e-05, + "loss": 4.8674, + "step": 64700 + }, + { + "epoch": 0.2787091997710065, + "grad_norm": 1.9522039890289307, + "learning_rate": 9.925742605320327e-05, + "loss": 5.1816, + "step": 64750 + }, + { + "epoch": 0.2789244192302891, + "grad_norm": 1.7387962341308594, + "learning_rate": 9.925626365378335e-05, + "loss": 5.2897, + "step": 64800 + }, + { + "epoch": 0.27913963868957176, + "grad_norm": 4.6367621421813965, + "learning_rate": 9.925510035210209e-05, + "loss": 5.3653, + "step": 64850 + }, + { + "epoch": 0.2793548581488544, + "grad_norm": 1.7106224298477173, + "learning_rate": 9.925393614818082e-05, + "loss": 4.6213, + "step": 64900 + }, + { + "epoch": 0.279570077608137, + "grad_norm": 1.5968356132507324, + "learning_rate": 9.925277104204083e-05, + "loss": 5.3896, + "step": 64950 + }, + { + "epoch": 0.27978529706741967, + "grad_norm": 1.6822242736816406, + "learning_rate": 9.92516050337035e-05, + "loss": 4.8993, + "step": 65000 + }, + { + "epoch": 0.28000051652670227, + "grad_norm": 2.80346941947937, + "learning_rate": 9.92504381231902e-05, + "loss": 5.2805, + "step": 65050 + }, + { + "epoch": 0.2802157359859849, + "grad_norm": 1.7603247165679932, + "learning_rate": 9.924927031052224e-05, + "loss": 4.887, + "step": 65100 + }, + { + "epoch": 0.2804309554452675, + "grad_norm": 1.5268793106079102, + "learning_rate": 9.924810159572108e-05, + "loss": 5.2927, + "step": 65150 + }, + { + "epoch": 0.2806461749045502, + "grad_norm": 1.4674557447433472, + "learning_rate": 9.924693197880808e-05, + "loss": 5.2671, + "step": 65200 + }, + { + "epoch": 0.2808613943638328, + "grad_norm": 1.1149851083755493, + "learning_rate": 9.92457614598047e-05, + "loss": 5.0251, + "step": 65250 + }, + { + "epoch": 0.28107661382311544, + "grad_norm": 1.5153614282608032, + "learning_rate": 9.924459003873236e-05, + "loss": 4.8532, + "step": 65300 + }, + { + "epoch": 0.28129183328239804, + "grad_norm": 1.47335946559906, + "learning_rate": 9.924341771561253e-05, + "loss": 5.2228, + "step": 65350 + }, + { + "epoch": 0.2815070527416807, + "grad_norm": 2.1090242862701416, + "learning_rate": 9.924224449046669e-05, + "loss": 5.5617, + "step": 65400 + }, + { + "epoch": 0.2817222722009633, + "grad_norm": 1.5761854648590088, + "learning_rate": 9.92410703633163e-05, + "loss": 4.8024, + "step": 65450 + }, + { + "epoch": 0.28193749166024595, + "grad_norm": 1.7882229089736938, + "learning_rate": 9.923989533418288e-05, + "loss": 5.2494, + "step": 65500 + }, + { + "epoch": 0.2821527111195286, + "grad_norm": 0.9368113279342651, + "learning_rate": 9.923871940308798e-05, + "loss": 5.5156, + "step": 65550 + }, + { + "epoch": 0.2823679305788112, + "grad_norm": 1.663637399673462, + "learning_rate": 9.923754257005311e-05, + "loss": 5.0296, + "step": 65600 + }, + { + "epoch": 0.28258315003809387, + "grad_norm": 2.0059878826141357, + "learning_rate": 9.923636483509983e-05, + "loss": 5.0729, + "step": 65650 + }, + { + "epoch": 0.28279836949737647, + "grad_norm": 0.5605185627937317, + "learning_rate": 9.923518619824974e-05, + "loss": 4.8456, + "step": 65700 + }, + { + "epoch": 0.2830135889566591, + "grad_norm": 1.6503628492355347, + "learning_rate": 9.923400665952439e-05, + "loss": 5.4043, + "step": 65750 + }, + { + "epoch": 0.2832288084159417, + "grad_norm": 2.516427755355835, + "learning_rate": 9.923282621894543e-05, + "loss": 5.1121, + "step": 65800 + }, + { + "epoch": 0.2834440278752244, + "grad_norm": 3.058464288711548, + "learning_rate": 9.923164487653444e-05, + "loss": 5.0299, + "step": 65850 + }, + { + "epoch": 0.283659247334507, + "grad_norm": 3.319711446762085, + "learning_rate": 9.923046263231308e-05, + "loss": 4.7587, + "step": 65900 + }, + { + "epoch": 0.28387446679378964, + "grad_norm": 1.9952852725982666, + "learning_rate": 9.922927948630303e-05, + "loss": 5.4155, + "step": 65950 + }, + { + "epoch": 0.28408968625307224, + "grad_norm": 1.9791783094406128, + "learning_rate": 9.922809543852592e-05, + "loss": 5.3987, + "step": 66000 + }, + { + "epoch": 0.28408968625307224, + "eval_loss": 5.686070919036865, + "eval_runtime": 35.036, + "eval_samples_per_second": 18.267, + "eval_steps_per_second": 9.133, + "eval_tts_loss": 6.2827784499254085, + "step": 66000 + }, + { + "epoch": 0.2843049057123549, + "grad_norm": 0.547368586063385, + "learning_rate": 9.922691048900346e-05, + "loss": 5.0529, + "step": 66050 + }, + { + "epoch": 0.2845201251716375, + "grad_norm": 2.4450178146362305, + "learning_rate": 9.922572463775735e-05, + "loss": 5.1592, + "step": 66100 + }, + { + "epoch": 0.28473534463092015, + "grad_norm": 1.4566258192062378, + "learning_rate": 9.922453788480931e-05, + "loss": 4.9229, + "step": 66150 + }, + { + "epoch": 0.2849505640902028, + "grad_norm": 2.1411094665527344, + "learning_rate": 9.922335023018109e-05, + "loss": 5.2469, + "step": 66200 + }, + { + "epoch": 0.2851657835494854, + "grad_norm": 2.2769110202789307, + "learning_rate": 9.922216167389443e-05, + "loss": 4.7848, + "step": 66250 + }, + { + "epoch": 0.28538100300876806, + "grad_norm": 4.330928325653076, + "learning_rate": 9.922097221597112e-05, + "loss": 5.0452, + "step": 66300 + }, + { + "epoch": 0.28559622246805066, + "grad_norm": 0.7433291673660278, + "learning_rate": 9.921978185643294e-05, + "loss": 5.1372, + "step": 66350 + }, + { + "epoch": 0.2858114419273333, + "grad_norm": 1.702668309211731, + "learning_rate": 9.921859059530169e-05, + "loss": 4.7853, + "step": 66400 + }, + { + "epoch": 0.2860266613866159, + "grad_norm": 2.2420530319213867, + "learning_rate": 9.921739843259918e-05, + "loss": 5.115, + "step": 66450 + }, + { + "epoch": 0.2862418808458986, + "grad_norm": 2.6457695960998535, + "learning_rate": 9.921620536834728e-05, + "loss": 5.6178, + "step": 66500 + }, + { + "epoch": 0.2864571003051812, + "grad_norm": 1.7178294658660889, + "learning_rate": 9.921501140256783e-05, + "loss": 4.9509, + "step": 66550 + }, + { + "epoch": 0.28667231976446383, + "grad_norm": 1.273775577545166, + "learning_rate": 9.921381653528268e-05, + "loss": 5.3758, + "step": 66600 + }, + { + "epoch": 0.28688753922374644, + "grad_norm": 2.0566086769104004, + "learning_rate": 9.921262076651374e-05, + "loss": 5.0995, + "step": 66650 + }, + { + "epoch": 0.2871027586830291, + "grad_norm": 1.5906682014465332, + "learning_rate": 9.92114240962829e-05, + "loss": 4.8806, + "step": 66700 + }, + { + "epoch": 0.2873179781423117, + "grad_norm": 2.5699715614318848, + "learning_rate": 9.92102265246121e-05, + "loss": 4.9329, + "step": 66750 + }, + { + "epoch": 0.28753319760159435, + "grad_norm": 2.989287853240967, + "learning_rate": 9.920902805152325e-05, + "loss": 5.0508, + "step": 66800 + }, + { + "epoch": 0.28774841706087695, + "grad_norm": 1.89162015914917, + "learning_rate": 9.920782867703835e-05, + "loss": 4.7865, + "step": 66850 + }, + { + "epoch": 0.2879636365201596, + "grad_norm": 1.489004135131836, + "learning_rate": 9.920662840117932e-05, + "loss": 5.1594, + "step": 66900 + }, + { + "epoch": 0.28817885597944226, + "grad_norm": 1.875589370727539, + "learning_rate": 9.920542722396815e-05, + "loss": 5.3558, + "step": 66950 + }, + { + "epoch": 0.28839407543872486, + "grad_norm": 2.5545406341552734, + "learning_rate": 9.920422514542686e-05, + "loss": 4.7427, + "step": 67000 + }, + { + "epoch": 0.2886092948980075, + "grad_norm": 2.772759437561035, + "learning_rate": 9.920302216557747e-05, + "loss": 5.5215, + "step": 67050 + }, + { + "epoch": 0.2888245143572901, + "grad_norm": 0.43812066316604614, + "learning_rate": 9.920181828444201e-05, + "loss": 5.2253, + "step": 67100 + }, + { + "epoch": 0.2890397338165728, + "grad_norm": 2.6810245513916016, + "learning_rate": 9.920061350204256e-05, + "loss": 4.9637, + "step": 67150 + }, + { + "epoch": 0.2892549532758554, + "grad_norm": 2.0608086585998535, + "learning_rate": 9.919940781840113e-05, + "loss": 5.2811, + "step": 67200 + }, + { + "epoch": 0.28947017273513803, + "grad_norm": 3.9700584411621094, + "learning_rate": 9.919820123353985e-05, + "loss": 5.9059, + "step": 67250 + }, + { + "epoch": 0.28968539219442063, + "grad_norm": 0.8409829139709473, + "learning_rate": 9.91969937474808e-05, + "loss": 4.7572, + "step": 67300 + }, + { + "epoch": 0.2899006116537033, + "grad_norm": 1.6320794820785522, + "learning_rate": 9.919578536024612e-05, + "loss": 4.6698, + "step": 67350 + }, + { + "epoch": 0.2901158311129859, + "grad_norm": 2.3892982006073, + "learning_rate": 9.919457607185794e-05, + "loss": 4.8165, + "step": 67400 + }, + { + "epoch": 0.29033105057226855, + "grad_norm": 4.1091742515563965, + "learning_rate": 9.919336588233838e-05, + "loss": 5.2031, + "step": 67450 + }, + { + "epoch": 0.29054627003155115, + "grad_norm": 1.5536625385284424, + "learning_rate": 9.919215479170965e-05, + "loss": 5.1228, + "step": 67500 + }, + { + "epoch": 0.2907614894908338, + "grad_norm": 0.770484209060669, + "learning_rate": 9.919094279999392e-05, + "loss": 4.9779, + "step": 67550 + }, + { + "epoch": 0.29097670895011646, + "grad_norm": 2.0326366424560547, + "learning_rate": 9.91897299072134e-05, + "loss": 5.0454, + "step": 67600 + }, + { + "epoch": 0.29119192840939906, + "grad_norm": 3.816662073135376, + "learning_rate": 9.918851611339027e-05, + "loss": 5.3212, + "step": 67650 + }, + { + "epoch": 0.2914071478686817, + "grad_norm": 1.3533514738082886, + "learning_rate": 9.918730141854679e-05, + "loss": 5.6193, + "step": 67700 + }, + { + "epoch": 0.2916223673279643, + "grad_norm": 2.3325023651123047, + "learning_rate": 9.918608582270522e-05, + "loss": 5.0451, + "step": 67750 + }, + { + "epoch": 0.29183758678724697, + "grad_norm": 1.1213713884353638, + "learning_rate": 9.918486932588782e-05, + "loss": 4.9399, + "step": 67800 + }, + { + "epoch": 0.2920528062465296, + "grad_norm": 2.216113567352295, + "learning_rate": 9.918365192811687e-05, + "loss": 5.5195, + "step": 67850 + }, + { + "epoch": 0.29226802570581223, + "grad_norm": 0.7381883859634399, + "learning_rate": 9.918243362941466e-05, + "loss": 5.3576, + "step": 67900 + }, + { + "epoch": 0.29248324516509483, + "grad_norm": 1.8865081071853638, + "learning_rate": 9.918121442980351e-05, + "loss": 5.437, + "step": 67950 + }, + { + "epoch": 0.2926984646243775, + "grad_norm": 2.1195924282073975, + "learning_rate": 9.917999432930578e-05, + "loss": 5.246, + "step": 68000 + }, + { + "epoch": 0.2929136840836601, + "grad_norm": 1.9039791822433472, + "learning_rate": 9.917877332794379e-05, + "loss": 5.1722, + "step": 68050 + }, + { + "epoch": 0.29312890354294274, + "grad_norm": 2.0538041591644287, + "learning_rate": 9.91775514257399e-05, + "loss": 5.2089, + "step": 68100 + }, + { + "epoch": 0.29334412300222534, + "grad_norm": 0.8743541240692139, + "learning_rate": 9.917632862271654e-05, + "loss": 5.6299, + "step": 68150 + }, + { + "epoch": 0.293559342461508, + "grad_norm": 1.6087875366210938, + "learning_rate": 9.917510491889604e-05, + "loss": 5.2572, + "step": 68200 + }, + { + "epoch": 0.29377456192079066, + "grad_norm": 1.6971402168273926, + "learning_rate": 9.917388031430087e-05, + "loss": 5.4386, + "step": 68250 + }, + { + "epoch": 0.29398978138007326, + "grad_norm": 1.8693087100982666, + "learning_rate": 9.917265480895343e-05, + "loss": 5.0145, + "step": 68300 + }, + { + "epoch": 0.2942050008393559, + "grad_norm": 1.6142679452896118, + "learning_rate": 9.91714284028762e-05, + "loss": 5.3264, + "step": 68350 + }, + { + "epoch": 0.2944202202986385, + "grad_norm": 0.6584939360618591, + "learning_rate": 9.917020109609163e-05, + "loss": 4.9074, + "step": 68400 + }, + { + "epoch": 0.29463543975792117, + "grad_norm": 1.6929501295089722, + "learning_rate": 9.916897288862219e-05, + "loss": 5.258, + "step": 68450 + }, + { + "epoch": 0.29485065921720377, + "grad_norm": 1.6995775699615479, + "learning_rate": 9.916774378049038e-05, + "loss": 5.0251, + "step": 68500 + }, + { + "epoch": 0.2950658786764864, + "grad_norm": 1.5695832967758179, + "learning_rate": 9.916651377171872e-05, + "loss": 5.2548, + "step": 68550 + }, + { + "epoch": 0.295281098135769, + "grad_norm": 1.5580003261566162, + "learning_rate": 9.916528286232975e-05, + "loss": 4.8965, + "step": 68600 + }, + { + "epoch": 0.2954963175950517, + "grad_norm": 1.0963990688323975, + "learning_rate": 9.916405105234601e-05, + "loss": 5.3914, + "step": 68650 + }, + { + "epoch": 0.2957115370543343, + "grad_norm": 2.6679320335388184, + "learning_rate": 9.916281834179004e-05, + "loss": 5.2227, + "step": 68700 + }, + { + "epoch": 0.29592675651361694, + "grad_norm": 2.1512210369110107, + "learning_rate": 9.916158473068447e-05, + "loss": 5.1483, + "step": 68750 + }, + { + "epoch": 0.29614197597289954, + "grad_norm": 0.9578506946563721, + "learning_rate": 9.916035021905186e-05, + "loss": 4.6811, + "step": 68800 + }, + { + "epoch": 0.2963571954321822, + "grad_norm": 4.077146530151367, + "learning_rate": 9.915911480691485e-05, + "loss": 5.528, + "step": 68850 + }, + { + "epoch": 0.29657241489146485, + "grad_norm": 2.4349143505096436, + "learning_rate": 9.915787849429603e-05, + "loss": 4.8697, + "step": 68900 + }, + { + "epoch": 0.29678763435074745, + "grad_norm": 1.6968454122543335, + "learning_rate": 9.915664128121808e-05, + "loss": 5.4427, + "step": 68950 + }, + { + "epoch": 0.2970028538100301, + "grad_norm": 1.7751846313476562, + "learning_rate": 9.915540316770364e-05, + "loss": 4.3629, + "step": 69000 + }, + { + "epoch": 0.2970028538100301, + "eval_loss": 5.6501054763793945, + "eval_runtime": 35.1378, + "eval_samples_per_second": 18.214, + "eval_steps_per_second": 9.107, + "eval_tts_loss": 6.423081480872479, + "step": 69000 + }, + { + "epoch": 0.2972180732693127, + "grad_norm": 2.4211487770080566, + "learning_rate": 9.915416415377542e-05, + "loss": 4.9908, + "step": 69050 + }, + { + "epoch": 0.29743329272859537, + "grad_norm": 0.5479389429092407, + "learning_rate": 9.915292423945609e-05, + "loss": 5.2685, + "step": 69100 + }, + { + "epoch": 0.29764851218787797, + "grad_norm": 0.7428943514823914, + "learning_rate": 9.915168342476837e-05, + "loss": 4.9685, + "step": 69150 + }, + { + "epoch": 0.2978637316471606, + "grad_norm": 1.6617367267608643, + "learning_rate": 9.9150441709735e-05, + "loss": 5.1601, + "step": 69200 + }, + { + "epoch": 0.2980789511064432, + "grad_norm": 2.7116451263427734, + "learning_rate": 9.91491990943787e-05, + "loss": 4.5658, + "step": 69250 + }, + { + "epoch": 0.2982941705657259, + "grad_norm": 4.545122146606445, + "learning_rate": 9.914795557872225e-05, + "loss": 5.3177, + "step": 69300 + }, + { + "epoch": 0.2985093900250085, + "grad_norm": 2.6082725524902344, + "learning_rate": 9.914671116278842e-05, + "loss": 5.4381, + "step": 69350 + }, + { + "epoch": 0.29872460948429114, + "grad_norm": 2.5334584712982178, + "learning_rate": 9.914546584660002e-05, + "loss": 5.0469, + "step": 69400 + }, + { + "epoch": 0.29893982894357374, + "grad_norm": 2.3457202911376953, + "learning_rate": 9.914421963017985e-05, + "loss": 4.9401, + "step": 69450 + }, + { + "epoch": 0.2991550484028564, + "grad_norm": 3.3694803714752197, + "learning_rate": 9.914297251355074e-05, + "loss": 5.2113, + "step": 69500 + }, + { + "epoch": 0.29937026786213905, + "grad_norm": 2.6591973304748535, + "learning_rate": 9.914172449673553e-05, + "loss": 5.0335, + "step": 69550 + }, + { + "epoch": 0.29958548732142165, + "grad_norm": 1.6091809272766113, + "learning_rate": 9.914047557975707e-05, + "loss": 4.8722, + "step": 69600 + }, + { + "epoch": 0.2998007067807043, + "grad_norm": 1.6482150554656982, + "learning_rate": 9.913922576263828e-05, + "loss": 5.1475, + "step": 69650 + }, + { + "epoch": 0.3000159262399869, + "grad_norm": 2.031062126159668, + "learning_rate": 9.913797504540201e-05, + "loss": 4.9345, + "step": 69700 + }, + { + "epoch": 0.30023114569926956, + "grad_norm": 2.2380430698394775, + "learning_rate": 9.913672342807117e-05, + "loss": 5.1237, + "step": 69750 + }, + { + "epoch": 0.30044636515855216, + "grad_norm": 4.613659381866455, + "learning_rate": 9.913547091066873e-05, + "loss": 4.7121, + "step": 69800 + }, + { + "epoch": 0.3006615846178348, + "grad_norm": 1.3442952632904053, + "learning_rate": 9.913421749321759e-05, + "loss": 5.2913, + "step": 69850 + }, + { + "epoch": 0.3008768040771174, + "grad_norm": 0.7723957300186157, + "learning_rate": 9.913296317574073e-05, + "loss": 5.2792, + "step": 69900 + }, + { + "epoch": 0.3010920235364001, + "grad_norm": 1.7333821058273315, + "learning_rate": 9.913170795826112e-05, + "loss": 5.3465, + "step": 69950 + }, + { + "epoch": 0.3013072429956827, + "grad_norm": 1.7351491451263428, + "learning_rate": 9.913045184080175e-05, + "loss": 5.1773, + "step": 70000 + }, + { + "epoch": 0.30152246245496533, + "grad_norm": 1.5553429126739502, + "learning_rate": 9.912919482338563e-05, + "loss": 5.6019, + "step": 70050 + }, + { + "epoch": 0.30173768191424793, + "grad_norm": 1.219978928565979, + "learning_rate": 9.912793690603579e-05, + "loss": 4.8683, + "step": 70100 + }, + { + "epoch": 0.3019529013735306, + "grad_norm": 3.435940980911255, + "learning_rate": 9.912667808877527e-05, + "loss": 5.6262, + "step": 70150 + }, + { + "epoch": 0.30216812083281325, + "grad_norm": 3.473267078399658, + "learning_rate": 9.912541837162715e-05, + "loss": 5.2596, + "step": 70200 + }, + { + "epoch": 0.30238334029209585, + "grad_norm": 2.3576252460479736, + "learning_rate": 9.912415775461446e-05, + "loss": 5.2474, + "step": 70250 + }, + { + "epoch": 0.3025985597513785, + "grad_norm": 1.5102282762527466, + "learning_rate": 9.912289623776032e-05, + "loss": 5.5551, + "step": 70300 + }, + { + "epoch": 0.3028137792106611, + "grad_norm": 2.783494472503662, + "learning_rate": 9.912163382108783e-05, + "loss": 4.8888, + "step": 70350 + }, + { + "epoch": 0.30302899866994376, + "grad_norm": 2.732480525970459, + "learning_rate": 9.912037050462013e-05, + "loss": 5.253, + "step": 70400 + }, + { + "epoch": 0.30324421812922636, + "grad_norm": 0.8126398324966431, + "learning_rate": 9.911910628838034e-05, + "loss": 5.1291, + "step": 70450 + }, + { + "epoch": 0.303459437588509, + "grad_norm": 1.863263487815857, + "learning_rate": 9.911784117239165e-05, + "loss": 5.4246, + "step": 70500 + }, + { + "epoch": 0.3036746570477916, + "grad_norm": 1.7170761823654175, + "learning_rate": 9.91165751566772e-05, + "loss": 5.6213, + "step": 70550 + }, + { + "epoch": 0.3038898765070743, + "grad_norm": 2.6633095741271973, + "learning_rate": 9.911530824126018e-05, + "loss": 4.95, + "step": 70600 + }, + { + "epoch": 0.3041050959663569, + "grad_norm": 1.6082711219787598, + "learning_rate": 9.911404042616383e-05, + "loss": 4.9299, + "step": 70650 + }, + { + "epoch": 0.30432031542563953, + "grad_norm": 1.6461269855499268, + "learning_rate": 9.911277171141134e-05, + "loss": 4.6679, + "step": 70700 + }, + { + "epoch": 0.30453553488492213, + "grad_norm": 2.431260108947754, + "learning_rate": 9.911150209702595e-05, + "loss": 5.1078, + "step": 70750 + }, + { + "epoch": 0.3047507543442048, + "grad_norm": 3.4586246013641357, + "learning_rate": 9.911023158303094e-05, + "loss": 4.8872, + "step": 70800 + }, + { + "epoch": 0.3049659738034874, + "grad_norm": 1.1381272077560425, + "learning_rate": 9.910896016944958e-05, + "loss": 5.1239, + "step": 70850 + }, + { + "epoch": 0.30518119326277005, + "grad_norm": 1.5317662954330444, + "learning_rate": 9.910768785630513e-05, + "loss": 5.2997, + "step": 70900 + }, + { + "epoch": 0.3053964127220527, + "grad_norm": 2.3332087993621826, + "learning_rate": 9.910641464362093e-05, + "loss": 5.1415, + "step": 70950 + }, + { + "epoch": 0.3056116321813353, + "grad_norm": 2.3158321380615234, + "learning_rate": 9.91051405314203e-05, + "loss": 5.5425, + "step": 71000 + }, + { + "epoch": 0.30582685164061796, + "grad_norm": 0.8816059231758118, + "learning_rate": 9.910386551972654e-05, + "loss": 5.2841, + "step": 71050 + }, + { + "epoch": 0.30604207109990056, + "grad_norm": 1.2553017139434814, + "learning_rate": 9.910258960856306e-05, + "loss": 5.0913, + "step": 71100 + }, + { + "epoch": 0.3062572905591832, + "grad_norm": 3.244391441345215, + "learning_rate": 9.91013127979532e-05, + "loss": 4.6757, + "step": 71150 + }, + { + "epoch": 0.3064725100184658, + "grad_norm": 4.0318708419799805, + "learning_rate": 9.910003508792035e-05, + "loss": 5.1855, + "step": 71200 + }, + { + "epoch": 0.30668772947774847, + "grad_norm": 1.6130050420761108, + "learning_rate": 9.909875647848792e-05, + "loss": 5.2966, + "step": 71250 + }, + { + "epoch": 0.3069029489370311, + "grad_norm": 3.2795987129211426, + "learning_rate": 9.909747696967933e-05, + "loss": 5.0023, + "step": 71300 + }, + { + "epoch": 0.30711816839631373, + "grad_norm": 2.1928446292877197, + "learning_rate": 9.909619656151801e-05, + "loss": 4.8493, + "step": 71350 + }, + { + "epoch": 0.30733338785559633, + "grad_norm": 2.7492294311523438, + "learning_rate": 9.90949152540274e-05, + "loss": 5.1577, + "step": 71400 + }, + { + "epoch": 0.307548607314879, + "grad_norm": 2.3589394092559814, + "learning_rate": 9.909363304723103e-05, + "loss": 4.8613, + "step": 71450 + }, + { + "epoch": 0.3077638267741616, + "grad_norm": 1.6609641313552856, + "learning_rate": 9.909234994115233e-05, + "loss": 5.205, + "step": 71500 + }, + { + "epoch": 0.30797904623344424, + "grad_norm": 0.7710240483283997, + "learning_rate": 9.909106593581484e-05, + "loss": 5.0354, + "step": 71550 + }, + { + "epoch": 0.3081942656927269, + "grad_norm": 0.3810049593448639, + "learning_rate": 9.908978103124203e-05, + "loss": 5.0808, + "step": 71600 + }, + { + "epoch": 0.3084094851520095, + "grad_norm": 1.5663766860961914, + "learning_rate": 9.90884952274575e-05, + "loss": 5.6008, + "step": 71650 + }, + { + "epoch": 0.30862470461129216, + "grad_norm": 1.4974271059036255, + "learning_rate": 9.908720852448474e-05, + "loss": 4.8699, + "step": 71700 + }, + { + "epoch": 0.30883992407057476, + "grad_norm": 1.9685567617416382, + "learning_rate": 9.908592092234736e-05, + "loss": 5.347, + "step": 71750 + }, + { + "epoch": 0.3090551435298574, + "grad_norm": 3.5605273246765137, + "learning_rate": 9.908463242106895e-05, + "loss": 5.1876, + "step": 71800 + }, + { + "epoch": 0.30927036298914, + "grad_norm": 2.907881021499634, + "learning_rate": 9.90833430206731e-05, + "loss": 5.3785, + "step": 71850 + }, + { + "epoch": 0.30948558244842267, + "grad_norm": 2.119683027267456, + "learning_rate": 9.908205272118343e-05, + "loss": 5.2738, + "step": 71900 + }, + { + "epoch": 0.30970080190770527, + "grad_norm": 1.758484959602356, + "learning_rate": 9.908076152262356e-05, + "loss": 5.1404, + "step": 71950 + }, + { + "epoch": 0.3099160213669879, + "grad_norm": 1.516297698020935, + "learning_rate": 9.907946942501716e-05, + "loss": 5.3415, + "step": 72000 + }, + { + "epoch": 0.3099160213669879, + "eval_loss": 5.634191989898682, + "eval_runtime": 34.8504, + "eval_samples_per_second": 18.364, + "eval_steps_per_second": 9.182, + "eval_tts_loss": 6.4459087036106775, + "step": 72000 + }, + { + "epoch": 0.3101312408262705, + "grad_norm": 0.9416413307189941, + "learning_rate": 9.90781764283879e-05, + "loss": 5.2729, + "step": 72050 + }, + { + "epoch": 0.3103464602855532, + "grad_norm": 2.211540937423706, + "learning_rate": 9.907688253275944e-05, + "loss": 4.989, + "step": 72100 + }, + { + "epoch": 0.3105616797448358, + "grad_norm": 1.5137875080108643, + "learning_rate": 9.907558773815552e-05, + "loss": 5.4198, + "step": 72150 + }, + { + "epoch": 0.31077689920411844, + "grad_norm": 0.8794851303100586, + "learning_rate": 9.907429204459984e-05, + "loss": 5.2378, + "step": 72200 + }, + { + "epoch": 0.3109921186634011, + "grad_norm": 2.8894755840301514, + "learning_rate": 9.90729954521161e-05, + "loss": 5.4136, + "step": 72250 + }, + { + "epoch": 0.3112073381226837, + "grad_norm": 2.9685022830963135, + "learning_rate": 9.90716979607281e-05, + "loss": 5.3683, + "step": 72300 + }, + { + "epoch": 0.31142255758196635, + "grad_norm": 1.6592434644699097, + "learning_rate": 9.90703995704596e-05, + "loss": 5.3201, + "step": 72350 + }, + { + "epoch": 0.31163777704124895, + "grad_norm": 1.5121089220046997, + "learning_rate": 9.906910028133435e-05, + "loss": 5.2714, + "step": 72400 + }, + { + "epoch": 0.3118529965005316, + "grad_norm": 0.4747486710548401, + "learning_rate": 9.90678000933762e-05, + "loss": 4.9999, + "step": 72450 + }, + { + "epoch": 0.3120682159598142, + "grad_norm": 2.2488884925842285, + "learning_rate": 9.906649900660893e-05, + "loss": 4.8699, + "step": 72500 + }, + { + "epoch": 0.31228343541909687, + "grad_norm": 2.2840054035186768, + "learning_rate": 9.906519702105637e-05, + "loss": 4.7623, + "step": 72550 + }, + { + "epoch": 0.31249865487837947, + "grad_norm": 2.4047768115997314, + "learning_rate": 9.906389413674239e-05, + "loss": 5.3738, + "step": 72600 + }, + { + "epoch": 0.3127138743376621, + "grad_norm": 2.6370558738708496, + "learning_rate": 9.906259035369084e-05, + "loss": 4.9398, + "step": 72650 + }, + { + "epoch": 0.3129290937969447, + "grad_norm": 1.5609073638916016, + "learning_rate": 9.90612856719256e-05, + "loss": 5.1239, + "step": 72700 + }, + { + "epoch": 0.3131443132562274, + "grad_norm": 2.3887228965759277, + "learning_rate": 9.90599800914706e-05, + "loss": 5.1628, + "step": 72750 + }, + { + "epoch": 0.31335953271551, + "grad_norm": 3.1505982875823975, + "learning_rate": 9.905867361234972e-05, + "loss": 4.9456, + "step": 72800 + }, + { + "epoch": 0.31357475217479264, + "grad_norm": 1.4852324724197388, + "learning_rate": 9.905736623458692e-05, + "loss": 5.0186, + "step": 72850 + }, + { + "epoch": 0.3137899716340753, + "grad_norm": 2.3196616172790527, + "learning_rate": 9.905605795820611e-05, + "loss": 5.1211, + "step": 72900 + }, + { + "epoch": 0.3140051910933579, + "grad_norm": 2.5520896911621094, + "learning_rate": 9.90547487832313e-05, + "loss": 5.4047, + "step": 72950 + }, + { + "epoch": 0.31422041055264055, + "grad_norm": 1.7832024097442627, + "learning_rate": 9.905343870968645e-05, + "loss": 4.9287, + "step": 73000 + }, + { + "epoch": 0.31443563001192315, + "grad_norm": 0.5077972412109375, + "learning_rate": 9.905212773759554e-05, + "loss": 5.1493, + "step": 73050 + }, + { + "epoch": 0.3146508494712058, + "grad_norm": 1.5833030939102173, + "learning_rate": 9.90508158669826e-05, + "loss": 5.1044, + "step": 73100 + }, + { + "epoch": 0.3148660689304884, + "grad_norm": 2.068507671356201, + "learning_rate": 9.904950309787166e-05, + "loss": 5.084, + "step": 73150 + }, + { + "epoch": 0.31508128838977106, + "grad_norm": 2.3936989307403564, + "learning_rate": 9.904818943028677e-05, + "loss": 5.1278, + "step": 73200 + }, + { + "epoch": 0.31529650784905366, + "grad_norm": 2.580096483230591, + "learning_rate": 9.9046874864252e-05, + "loss": 4.7914, + "step": 73250 + }, + { + "epoch": 0.3155117273083363, + "grad_norm": 4.941137790679932, + "learning_rate": 9.90455593997914e-05, + "loss": 5.1143, + "step": 73300 + }, + { + "epoch": 0.3157269467676189, + "grad_norm": 2.588543176651001, + "learning_rate": 9.904424303692912e-05, + "loss": 5.5596, + "step": 73350 + }, + { + "epoch": 0.3159421662269016, + "grad_norm": 1.9365124702453613, + "learning_rate": 9.904292577568923e-05, + "loss": 5.2589, + "step": 73400 + }, + { + "epoch": 0.3161573856861842, + "grad_norm": 4.358753681182861, + "learning_rate": 9.904160761609584e-05, + "loss": 5.5639, + "step": 73450 + }, + { + "epoch": 0.31637260514546683, + "grad_norm": 1.5134845972061157, + "learning_rate": 9.904028855817315e-05, + "loss": 5.1286, + "step": 73500 + }, + { + "epoch": 0.3165878246047495, + "grad_norm": 0.4156987965106964, + "learning_rate": 9.903896860194531e-05, + "loss": 5.0597, + "step": 73550 + }, + { + "epoch": 0.3168030440640321, + "grad_norm": 2.1995933055877686, + "learning_rate": 9.903764774743645e-05, + "loss": 5.1659, + "step": 73600 + }, + { + "epoch": 0.31701826352331475, + "grad_norm": 2.4311413764953613, + "learning_rate": 9.903632599467082e-05, + "loss": 4.7231, + "step": 73650 + }, + { + "epoch": 0.31723348298259735, + "grad_norm": 1.6016931533813477, + "learning_rate": 9.903500334367262e-05, + "loss": 4.9681, + "step": 73700 + }, + { + "epoch": 0.31744870244188, + "grad_norm": 0.59273761510849, + "learning_rate": 9.903367979446605e-05, + "loss": 5.4365, + "step": 73750 + }, + { + "epoch": 0.3176639219011626, + "grad_norm": 1.593131184577942, + "learning_rate": 9.903235534707538e-05, + "loss": 4.4823, + "step": 73800 + }, + { + "epoch": 0.31787914136044526, + "grad_norm": 1.6300718784332275, + "learning_rate": 9.903103000152487e-05, + "loss": 5.0559, + "step": 73850 + }, + { + "epoch": 0.31809436081972786, + "grad_norm": 1.7260617017745972, + "learning_rate": 9.902970375783879e-05, + "loss": 5.1287, + "step": 73900 + }, + { + "epoch": 0.3183095802790105, + "grad_norm": 2.6259076595306396, + "learning_rate": 9.902837661604144e-05, + "loss": 5.2586, + "step": 73950 + }, + { + "epoch": 0.3185247997382931, + "grad_norm": 1.2139583826065063, + "learning_rate": 9.902704857615712e-05, + "loss": 4.8866, + "step": 74000 + }, + { + "epoch": 0.3187400191975758, + "grad_norm": 2.091721534729004, + "learning_rate": 9.902571963821017e-05, + "loss": 5.3583, + "step": 74050 + }, + { + "epoch": 0.3189552386568584, + "grad_norm": 1.8077058792114258, + "learning_rate": 9.902438980222493e-05, + "loss": 4.8799, + "step": 74100 + }, + { + "epoch": 0.31917045811614103, + "grad_norm": 2.529721975326538, + "learning_rate": 9.902305906822574e-05, + "loss": 5.3411, + "step": 74150 + }, + { + "epoch": 0.31938567757542363, + "grad_norm": 3.496781349182129, + "learning_rate": 9.902172743623699e-05, + "loss": 4.6095, + "step": 74200 + }, + { + "epoch": 0.3196008970347063, + "grad_norm": 1.535830020904541, + "learning_rate": 9.902039490628309e-05, + "loss": 5.4399, + "step": 74250 + }, + { + "epoch": 0.31981611649398894, + "grad_norm": 1.7051324844360352, + "learning_rate": 9.901906147838842e-05, + "loss": 4.9622, + "step": 74300 + }, + { + "epoch": 0.32003133595327155, + "grad_norm": 0.5078291893005371, + "learning_rate": 9.901772715257743e-05, + "loss": 5.1364, + "step": 74350 + }, + { + "epoch": 0.3202465554125542, + "grad_norm": 2.1991875171661377, + "learning_rate": 9.901639192887453e-05, + "loss": 4.9491, + "step": 74400 + }, + { + "epoch": 0.3204617748718368, + "grad_norm": 2.60101056098938, + "learning_rate": 9.901505580730421e-05, + "loss": 4.6812, + "step": 74450 + }, + { + "epoch": 0.32067699433111946, + "grad_norm": 0.3974127769470215, + "learning_rate": 9.901371878789094e-05, + "loss": 5.6295, + "step": 74500 + }, + { + "epoch": 0.32089221379040206, + "grad_norm": 3.2609150409698486, + "learning_rate": 9.90123808706592e-05, + "loss": 5.7079, + "step": 74550 + }, + { + "epoch": 0.3211074332496847, + "grad_norm": 1.4608869552612305, + "learning_rate": 9.90110420556335e-05, + "loss": 5.0404, + "step": 74600 + }, + { + "epoch": 0.3213226527089673, + "grad_norm": 2.332105875015259, + "learning_rate": 9.900970234283835e-05, + "loss": 5.139, + "step": 74650 + }, + { + "epoch": 0.32153787216824997, + "grad_norm": 2.4275624752044678, + "learning_rate": 9.900836173229833e-05, + "loss": 5.4913, + "step": 74700 + }, + { + "epoch": 0.3217530916275326, + "grad_norm": 4.477487087249756, + "learning_rate": 9.900702022403795e-05, + "loss": 5.3937, + "step": 74750 + }, + { + "epoch": 0.32196831108681523, + "grad_norm": 2.3540914058685303, + "learning_rate": 9.90056778180818e-05, + "loss": 5.0629, + "step": 74800 + }, + { + "epoch": 0.32218353054609783, + "grad_norm": 2.7661845684051514, + "learning_rate": 9.900433451445451e-05, + "loss": 4.7945, + "step": 74850 + }, + { + "epoch": 0.3223987500053805, + "grad_norm": 2.1593122482299805, + "learning_rate": 9.900299031318063e-05, + "loss": 5.185, + "step": 74900 + }, + { + "epoch": 0.32261396946466314, + "grad_norm": 2.3984334468841553, + "learning_rate": 9.900164521428481e-05, + "loss": 5.036, + "step": 74950 + }, + { + "epoch": 0.32282918892394574, + "grad_norm": 0.48492223024368286, + "learning_rate": 9.900029921779168e-05, + "loss": 5.2054, + "step": 75000 + }, + { + "epoch": 0.32282918892394574, + "eval_loss": 5.61959171295166, + "eval_runtime": 35.2132, + "eval_samples_per_second": 18.175, + "eval_steps_per_second": 9.088, + "eval_tts_loss": 6.506716375731956, + "step": 75000 + }, + { + "epoch": 0.3230444083832284, + "grad_norm": 4.421298980712891, + "learning_rate": 9.899895232372592e-05, + "loss": 4.9556, + "step": 75050 + }, + { + "epoch": 0.323259627842511, + "grad_norm": 0.9317294955253601, + "learning_rate": 9.899760453211216e-05, + "loss": 4.8241, + "step": 75100 + }, + { + "epoch": 0.32347484730179366, + "grad_norm": 2.9604146480560303, + "learning_rate": 9.899625584297511e-05, + "loss": 5.5344, + "step": 75150 + }, + { + "epoch": 0.32369006676107626, + "grad_norm": 2.1945788860321045, + "learning_rate": 9.899490625633948e-05, + "loss": 5.3145, + "step": 75200 + }, + { + "epoch": 0.3239052862203589, + "grad_norm": 1.5703214406967163, + "learning_rate": 9.899355577222999e-05, + "loss": 5.4652, + "step": 75250 + }, + { + "epoch": 0.3241205056796415, + "grad_norm": 0.7673836350440979, + "learning_rate": 9.899220439067136e-05, + "loss": 4.652, + "step": 75300 + }, + { + "epoch": 0.32433572513892417, + "grad_norm": 2.0010156631469727, + "learning_rate": 9.899085211168836e-05, + "loss": 4.8031, + "step": 75350 + }, + { + "epoch": 0.32455094459820677, + "grad_norm": 1.5293958187103271, + "learning_rate": 9.898949893530577e-05, + "loss": 5.1378, + "step": 75400 + }, + { + "epoch": 0.3247661640574894, + "grad_norm": 0.8878456354141235, + "learning_rate": 9.898814486154837e-05, + "loss": 4.9946, + "step": 75450 + }, + { + "epoch": 0.324981383516772, + "grad_norm": 2.125000476837158, + "learning_rate": 9.898678989044094e-05, + "loss": 4.693, + "step": 75500 + }, + { + "epoch": 0.3251966029760547, + "grad_norm": 1.4406217336654663, + "learning_rate": 9.898543402200833e-05, + "loss": 5.0303, + "step": 75550 + }, + { + "epoch": 0.32541182243533734, + "grad_norm": 1.9397448301315308, + "learning_rate": 9.898407725627536e-05, + "loss": 4.6042, + "step": 75600 + }, + { + "epoch": 0.32562704189461994, + "grad_norm": 2.8085663318634033, + "learning_rate": 9.89827195932669e-05, + "loss": 5.3284, + "step": 75650 + }, + { + "epoch": 0.3258422613539026, + "grad_norm": 1.6764417886734009, + "learning_rate": 9.89813610330078e-05, + "loss": 4.9448, + "step": 75700 + }, + { + "epoch": 0.3260574808131852, + "grad_norm": 0.8276155591011047, + "learning_rate": 9.898000157552296e-05, + "loss": 5.3891, + "step": 75750 + }, + { + "epoch": 0.32627270027246785, + "grad_norm": 0.7133195996284485, + "learning_rate": 9.897864122083726e-05, + "loss": 4.8674, + "step": 75800 + }, + { + "epoch": 0.32648791973175045, + "grad_norm": 1.7746258974075317, + "learning_rate": 9.897727996897565e-05, + "loss": 5.2064, + "step": 75850 + }, + { + "epoch": 0.3267031391910331, + "grad_norm": 3.86490535736084, + "learning_rate": 9.897591781996304e-05, + "loss": 5.1787, + "step": 75900 + }, + { + "epoch": 0.3269183586503157, + "grad_norm": 3.3726563453674316, + "learning_rate": 9.897455477382439e-05, + "loss": 5.3747, + "step": 75950 + }, + { + "epoch": 0.32713357810959837, + "grad_norm": 2.0391337871551514, + "learning_rate": 9.897319083058467e-05, + "loss": 5.4051, + "step": 76000 + }, + { + "epoch": 0.32734879756888097, + "grad_norm": 2.944945812225342, + "learning_rate": 9.897182599026886e-05, + "loss": 5.2784, + "step": 76050 + }, + { + "epoch": 0.3275640170281636, + "grad_norm": 2.2497196197509766, + "learning_rate": 9.897046025290198e-05, + "loss": 5.1144, + "step": 76100 + }, + { + "epoch": 0.3277792364874462, + "grad_norm": 0.9628269076347351, + "learning_rate": 9.896909361850902e-05, + "loss": 5.4216, + "step": 76150 + }, + { + "epoch": 0.3279944559467289, + "grad_norm": 1.004314661026001, + "learning_rate": 9.896772608711502e-05, + "loss": 5.501, + "step": 76200 + }, + { + "epoch": 0.32820967540601154, + "grad_norm": 3.9486334323883057, + "learning_rate": 9.896635765874503e-05, + "loss": 4.6532, + "step": 76250 + }, + { + "epoch": 0.32842489486529414, + "grad_norm": 0.6149348616600037, + "learning_rate": 9.896498833342413e-05, + "loss": 5.2129, + "step": 76300 + }, + { + "epoch": 0.3286401143245768, + "grad_norm": 1.3119436502456665, + "learning_rate": 9.896361811117738e-05, + "loss": 5.3326, + "step": 76350 + }, + { + "epoch": 0.3288553337838594, + "grad_norm": 2.4753427505493164, + "learning_rate": 9.89622469920299e-05, + "loss": 5.5383, + "step": 76400 + }, + { + "epoch": 0.32907055324314205, + "grad_norm": 0.7027820348739624, + "learning_rate": 9.896087497600681e-05, + "loss": 5.3171, + "step": 76450 + }, + { + "epoch": 0.32928577270242465, + "grad_norm": 2.598940849304199, + "learning_rate": 9.895950206313323e-05, + "loss": 5.605, + "step": 76500 + }, + { + "epoch": 0.3295009921617073, + "grad_norm": 2.3114240169525146, + "learning_rate": 9.895812825343429e-05, + "loss": 5.6593, + "step": 76550 + }, + { + "epoch": 0.3297162116209899, + "grad_norm": 1.9094040393829346, + "learning_rate": 9.895675354693518e-05, + "loss": 4.6229, + "step": 76600 + }, + { + "epoch": 0.32993143108027256, + "grad_norm": 1.5954139232635498, + "learning_rate": 9.895537794366109e-05, + "loss": 5.5932, + "step": 76650 + }, + { + "epoch": 0.33014665053955516, + "grad_norm": 1.5245658159255981, + "learning_rate": 9.89540014436372e-05, + "loss": 5.3716, + "step": 76700 + }, + { + "epoch": 0.3303618699988378, + "grad_norm": 2.0292322635650635, + "learning_rate": 9.895262404688872e-05, + "loss": 4.8923, + "step": 76750 + }, + { + "epoch": 0.3305770894581204, + "grad_norm": 0.660561740398407, + "learning_rate": 9.895124575344088e-05, + "loss": 4.6169, + "step": 76800 + }, + { + "epoch": 0.3307923089174031, + "grad_norm": 1.7029401063919067, + "learning_rate": 9.894986656331895e-05, + "loss": 5.0832, + "step": 76850 + }, + { + "epoch": 0.33100752837668573, + "grad_norm": 1.4718998670578003, + "learning_rate": 9.894848647654817e-05, + "loss": 5.0618, + "step": 76900 + }, + { + "epoch": 0.33122274783596833, + "grad_norm": 0.412259578704834, + "learning_rate": 9.894710549315384e-05, + "loss": 4.6757, + "step": 76950 + }, + { + "epoch": 0.331437967295251, + "grad_norm": 0.3952701985836029, + "learning_rate": 9.894572361316126e-05, + "loss": 5.3411, + "step": 77000 + }, + { + "epoch": 0.3316531867545336, + "grad_norm": 2.1810872554779053, + "learning_rate": 9.894434083659569e-05, + "loss": 5.0481, + "step": 77050 + }, + { + "epoch": 0.33186840621381625, + "grad_norm": 2.332085371017456, + "learning_rate": 9.894295716348254e-05, + "loss": 5.4127, + "step": 77100 + }, + { + "epoch": 0.33208362567309885, + "grad_norm": 1.747309923171997, + "learning_rate": 9.894157259384709e-05, + "loss": 5.1965, + "step": 77150 + }, + { + "epoch": 0.3322988451323815, + "grad_norm": 1.4799559116363525, + "learning_rate": 9.894018712771473e-05, + "loss": 5.1703, + "step": 77200 + }, + { + "epoch": 0.3325140645916641, + "grad_norm": 1.879120945930481, + "learning_rate": 9.893880076511083e-05, + "loss": 4.9422, + "step": 77250 + }, + { + "epoch": 0.33272928405094676, + "grad_norm": 1.6397442817687988, + "learning_rate": 9.893741350606078e-05, + "loss": 5.0304, + "step": 77300 + }, + { + "epoch": 0.33294450351022936, + "grad_norm": 0.585242509841919, + "learning_rate": 9.893602535059002e-05, + "loss": 5.2464, + "step": 77350 + }, + { + "epoch": 0.333159722969512, + "grad_norm": 1.3208565711975098, + "learning_rate": 9.893463629872395e-05, + "loss": 4.7271, + "step": 77400 + }, + { + "epoch": 0.3333749424287946, + "grad_norm": 0.802437961101532, + "learning_rate": 9.893324635048803e-05, + "loss": 5.0919, + "step": 77450 + }, + { + "epoch": 0.3335901618880773, + "grad_norm": 2.063751220703125, + "learning_rate": 9.893185550590771e-05, + "loss": 5.0318, + "step": 77500 + }, + { + "epoch": 0.3338053813473599, + "grad_norm": 2.500229835510254, + "learning_rate": 9.893046376500847e-05, + "loss": 5.2631, + "step": 77550 + }, + { + "epoch": 0.33402060080664253, + "grad_norm": 1.4436476230621338, + "learning_rate": 9.89290711278158e-05, + "loss": 5.2527, + "step": 77600 + }, + { + "epoch": 0.3342358202659252, + "grad_norm": 1.9382665157318115, + "learning_rate": 9.892767759435521e-05, + "loss": 5.0231, + "step": 77650 + }, + { + "epoch": 0.3344510397252078, + "grad_norm": 1.8987842798233032, + "learning_rate": 9.892628316465224e-05, + "loss": 4.8547, + "step": 77700 + }, + { + "epoch": 0.33466625918449044, + "grad_norm": 2.3516054153442383, + "learning_rate": 9.892488783873243e-05, + "loss": 4.8414, + "step": 77750 + }, + { + "epoch": 0.33488147864377305, + "grad_norm": 4.843871593475342, + "learning_rate": 9.892349161662131e-05, + "loss": 5.3052, + "step": 77800 + }, + { + "epoch": 0.3350966981030557, + "grad_norm": 1.1992316246032715, + "learning_rate": 9.892209449834449e-05, + "loss": 4.9121, + "step": 77850 + }, + { + "epoch": 0.3353119175623383, + "grad_norm": 0.9081657528877258, + "learning_rate": 9.892069648392756e-05, + "loss": 5.1534, + "step": 77900 + }, + { + "epoch": 0.33552713702162096, + "grad_norm": 0.9907997250556946, + "learning_rate": 9.89192975733961e-05, + "loss": 5.1538, + "step": 77950 + }, + { + "epoch": 0.33574235648090356, + "grad_norm": 2.3949127197265625, + "learning_rate": 9.891789776677577e-05, + "loss": 4.5593, + "step": 78000 + }, + { + "epoch": 0.33574235648090356, + "eval_loss": 5.601741313934326, + "eval_runtime": 35.0362, + "eval_samples_per_second": 18.267, + "eval_steps_per_second": 9.133, + "eval_tts_loss": 6.4960716384946195, + "step": 78000 + }, + { + "epoch": 0.3359575759401862, + "grad_norm": 0.3258666694164276, + "learning_rate": 9.891649706409218e-05, + "loss": 5.3776, + "step": 78050 + }, + { + "epoch": 0.3361727953994688, + "grad_norm": 1.601615071296692, + "learning_rate": 9.891509546537101e-05, + "loss": 5.287, + "step": 78100 + }, + { + "epoch": 0.33638801485875147, + "grad_norm": 1.649818778038025, + "learning_rate": 9.891369297063792e-05, + "loss": 4.6082, + "step": 78150 + }, + { + "epoch": 0.3366032343180341, + "grad_norm": 2.5989162921905518, + "learning_rate": 9.891228957991862e-05, + "loss": 5.5881, + "step": 78200 + }, + { + "epoch": 0.33681845377731673, + "grad_norm": 1.2540209293365479, + "learning_rate": 9.891088529323878e-05, + "loss": 4.6504, + "step": 78250 + }, + { + "epoch": 0.3370336732365994, + "grad_norm": 3.1675641536712646, + "learning_rate": 9.890948011062416e-05, + "loss": 5.3531, + "step": 78300 + }, + { + "epoch": 0.337248892695882, + "grad_norm": 1.0780863761901855, + "learning_rate": 9.89080740321005e-05, + "loss": 5.7424, + "step": 78350 + }, + { + "epoch": 0.33746411215516464, + "grad_norm": 2.361037254333496, + "learning_rate": 9.890666705769351e-05, + "loss": 5.1698, + "step": 78400 + }, + { + "epoch": 0.33767933161444724, + "grad_norm": 2.822901487350464, + "learning_rate": 9.890525918742902e-05, + "loss": 5.2088, + "step": 78450 + }, + { + "epoch": 0.3378945510737299, + "grad_norm": 0.507655918598175, + "learning_rate": 9.890385042133279e-05, + "loss": 5.268, + "step": 78500 + }, + { + "epoch": 0.3381097705330125, + "grad_norm": 1.4558864831924438, + "learning_rate": 9.890244075943063e-05, + "loss": 5.1943, + "step": 78550 + }, + { + "epoch": 0.33832498999229516, + "grad_norm": 1.5781294107437134, + "learning_rate": 9.890103020174836e-05, + "loss": 5.0586, + "step": 78600 + }, + { + "epoch": 0.33854020945157776, + "grad_norm": 3.9625606536865234, + "learning_rate": 9.889961874831181e-05, + "loss": 4.9895, + "step": 78650 + }, + { + "epoch": 0.3387554289108604, + "grad_norm": 2.973985195159912, + "learning_rate": 9.889820639914685e-05, + "loss": 5.1331, + "step": 78700 + }, + { + "epoch": 0.338970648370143, + "grad_norm": 2.300353765487671, + "learning_rate": 9.889679315427934e-05, + "loss": 4.8792, + "step": 78750 + }, + { + "epoch": 0.33918586782942567, + "grad_norm": 1.355301856994629, + "learning_rate": 9.889537901373517e-05, + "loss": 4.9399, + "step": 78800 + }, + { + "epoch": 0.33940108728870827, + "grad_norm": 1.522600769996643, + "learning_rate": 9.889396397754028e-05, + "loss": 4.6055, + "step": 78850 + }, + { + "epoch": 0.3396163067479909, + "grad_norm": 2.5191261768341064, + "learning_rate": 9.889254804572051e-05, + "loss": 4.809, + "step": 78900 + }, + { + "epoch": 0.3398315262072736, + "grad_norm": 2.6403117179870605, + "learning_rate": 9.889113121830188e-05, + "loss": 5.045, + "step": 78950 + }, + { + "epoch": 0.3400467456665562, + "grad_norm": 2.4916510581970215, + "learning_rate": 9.888971349531027e-05, + "loss": 4.9712, + "step": 79000 + }, + { + "epoch": 0.34026196512583884, + "grad_norm": 1.8457448482513428, + "learning_rate": 9.888829487677172e-05, + "loss": 5.0917, + "step": 79050 + }, + { + "epoch": 0.34047718458512144, + "grad_norm": 1.6950429677963257, + "learning_rate": 9.888687536271215e-05, + "loss": 5.2473, + "step": 79100 + }, + { + "epoch": 0.3406924040444041, + "grad_norm": 1.726990818977356, + "learning_rate": 9.888545495315761e-05, + "loss": 5.2172, + "step": 79150 + }, + { + "epoch": 0.3409076235036867, + "grad_norm": 2.5483310222625732, + "learning_rate": 9.88840336481341e-05, + "loss": 5.2876, + "step": 79200 + }, + { + "epoch": 0.34112284296296935, + "grad_norm": 2.5093281269073486, + "learning_rate": 9.888261144766765e-05, + "loss": 5.1688, + "step": 79250 + }, + { + "epoch": 0.34133806242225195, + "grad_norm": 1.8097907304763794, + "learning_rate": 9.888118835178433e-05, + "loss": 4.6922, + "step": 79300 + }, + { + "epoch": 0.3415532818815346, + "grad_norm": 4.119552135467529, + "learning_rate": 9.887976436051019e-05, + "loss": 5.1439, + "step": 79350 + }, + { + "epoch": 0.3417685013408172, + "grad_norm": 2.76662540435791, + "learning_rate": 9.887833947387132e-05, + "loss": 5.3497, + "step": 79400 + }, + { + "epoch": 0.34198372080009987, + "grad_norm": 3.2286031246185303, + "learning_rate": 9.887691369189383e-05, + "loss": 5.2024, + "step": 79450 + }, + { + "epoch": 0.34219894025938247, + "grad_norm": 2.2700815200805664, + "learning_rate": 9.887548701460381e-05, + "loss": 4.8137, + "step": 79500 + }, + { + "epoch": 0.3424141597186651, + "grad_norm": 2.16735577583313, + "learning_rate": 9.88740594420274e-05, + "loss": 4.81, + "step": 79550 + }, + { + "epoch": 0.3426293791779478, + "grad_norm": 4.801692008972168, + "learning_rate": 9.88726309741908e-05, + "loss": 5.1334, + "step": 79600 + }, + { + "epoch": 0.3428445986372304, + "grad_norm": 1.7399427890777588, + "learning_rate": 9.887120161112013e-05, + "loss": 4.8734, + "step": 79650 + }, + { + "epoch": 0.34305981809651304, + "grad_norm": 1.6135228872299194, + "learning_rate": 9.886977135284156e-05, + "loss": 4.6463, + "step": 79700 + }, + { + "epoch": 0.34327503755579564, + "grad_norm": 2.127915859222412, + "learning_rate": 9.886834019938131e-05, + "loss": 5.1299, + "step": 79750 + }, + { + "epoch": 0.3434902570150783, + "grad_norm": 3.430244207382202, + "learning_rate": 9.886690815076562e-05, + "loss": 5.5258, + "step": 79800 + }, + { + "epoch": 0.3437054764743609, + "grad_norm": 2.627686023712158, + "learning_rate": 9.886547520702068e-05, + "loss": 4.992, + "step": 79850 + }, + { + "epoch": 0.34392069593364355, + "grad_norm": 1.2355430126190186, + "learning_rate": 9.886404136817276e-05, + "loss": 4.8294, + "step": 79900 + }, + { + "epoch": 0.34413591539292615, + "grad_norm": 2.221003532409668, + "learning_rate": 9.886260663424811e-05, + "loss": 5.4131, + "step": 79950 + }, + { + "epoch": 0.3443511348522088, + "grad_norm": 2.578634738922119, + "learning_rate": 9.886117100527303e-05, + "loss": 5.2438, + "step": 80000 + }, + { + "epoch": 0.3445663543114914, + "grad_norm": 1.708832859992981, + "learning_rate": 9.885973448127378e-05, + "loss": 4.7444, + "step": 80050 + }, + { + "epoch": 0.34478157377077406, + "grad_norm": 0.9426866173744202, + "learning_rate": 9.885829706227674e-05, + "loss": 4.9549, + "step": 80100 + }, + { + "epoch": 0.34499679323005666, + "grad_norm": 2.1775684356689453, + "learning_rate": 9.885685874830818e-05, + "loss": 5.1801, + "step": 80150 + }, + { + "epoch": 0.3452120126893393, + "grad_norm": 2.1697750091552734, + "learning_rate": 9.885541953939448e-05, + "loss": 5.7266, + "step": 80200 + }, + { + "epoch": 0.345427232148622, + "grad_norm": 2.599714994430542, + "learning_rate": 9.885397943556199e-05, + "loss": 4.7887, + "step": 80250 + }, + { + "epoch": 0.3456424516079046, + "grad_norm": 2.239100217819214, + "learning_rate": 9.885253843683709e-05, + "loss": 5.445, + "step": 80300 + }, + { + "epoch": 0.34585767106718723, + "grad_norm": 3.8728537559509277, + "learning_rate": 9.885109654324618e-05, + "loss": 5.2833, + "step": 80350 + }, + { + "epoch": 0.34607289052646983, + "grad_norm": 0.62574702501297, + "learning_rate": 9.884965375481565e-05, + "loss": 5.1289, + "step": 80400 + }, + { + "epoch": 0.3462881099857525, + "grad_norm": 2.441049337387085, + "learning_rate": 9.884821007157197e-05, + "loss": 5.2573, + "step": 80450 + }, + { + "epoch": 0.3465033294450351, + "grad_norm": 1.8476542234420776, + "learning_rate": 9.884676549354154e-05, + "loss": 5.2146, + "step": 80500 + }, + { + "epoch": 0.34671854890431775, + "grad_norm": 1.6117701530456543, + "learning_rate": 9.884532002075086e-05, + "loss": 4.9425, + "step": 80550 + }, + { + "epoch": 0.34693376836360035, + "grad_norm": 3.4121313095092773, + "learning_rate": 9.88438736532264e-05, + "loss": 4.9849, + "step": 80600 + }, + { + "epoch": 0.347148987822883, + "grad_norm": 2.5895187854766846, + "learning_rate": 9.884242639099464e-05, + "loss": 5.3334, + "step": 80650 + }, + { + "epoch": 0.3473642072821656, + "grad_norm": 2.5307772159576416, + "learning_rate": 9.884097823408209e-05, + "loss": 4.8371, + "step": 80700 + }, + { + "epoch": 0.34757942674144826, + "grad_norm": 5.312500953674316, + "learning_rate": 9.883952918251527e-05, + "loss": 5.131, + "step": 80750 + }, + { + "epoch": 0.34779464620073086, + "grad_norm": 3.3094773292541504, + "learning_rate": 9.883807923632076e-05, + "loss": 4.8832, + "step": 80800 + }, + { + "epoch": 0.3480098656600135, + "grad_norm": 1.05629563331604, + "learning_rate": 9.883662839552508e-05, + "loss": 4.8297, + "step": 80850 + }, + { + "epoch": 0.3482250851192962, + "grad_norm": 1.5375936031341553, + "learning_rate": 9.883517666015483e-05, + "loss": 4.9132, + "step": 80900 + }, + { + "epoch": 0.3484403045785788, + "grad_norm": 0.7305281758308411, + "learning_rate": 9.883372403023658e-05, + "loss": 4.936, + "step": 80950 + }, + { + "epoch": 0.34865552403786143, + "grad_norm": 1.7922626733779907, + "learning_rate": 9.883227050579698e-05, + "loss": 5.0557, + "step": 81000 + }, + { + "epoch": 0.34865552403786143, + "eval_loss": 5.584456443786621, + "eval_runtime": 35.1206, + "eval_samples_per_second": 18.223, + "eval_steps_per_second": 9.111, + "eval_tts_loss": 6.503783689685149, + "step": 81000 + }, + { + "epoch": 0.34887074349714403, + "grad_norm": 1.0082805156707764, + "learning_rate": 9.88308160868626e-05, + "loss": 4.8893, + "step": 81050 + }, + { + "epoch": 0.3490859629564267, + "grad_norm": 1.6563082933425903, + "learning_rate": 9.882936077346012e-05, + "loss": 4.5576, + "step": 81100 + }, + { + "epoch": 0.3493011824157093, + "grad_norm": 2.064661741256714, + "learning_rate": 9.882790456561619e-05, + "loss": 5.2986, + "step": 81150 + }, + { + "epoch": 0.34951640187499194, + "grad_norm": 2.609546661376953, + "learning_rate": 9.88264474633575e-05, + "loss": 5.2662, + "step": 81200 + }, + { + "epoch": 0.34973162133427455, + "grad_norm": 2.7821357250213623, + "learning_rate": 9.882498946671069e-05, + "loss": 5.3456, + "step": 81250 + }, + { + "epoch": 0.3499468407935572, + "grad_norm": 3.661167621612549, + "learning_rate": 9.882353057570253e-05, + "loss": 5.3006, + "step": 81300 + }, + { + "epoch": 0.3501620602528398, + "grad_norm": 2.063689947128296, + "learning_rate": 9.88220707903597e-05, + "loss": 4.9506, + "step": 81350 + }, + { + "epoch": 0.35037727971212246, + "grad_norm": 1.6568044424057007, + "learning_rate": 9.882061011070895e-05, + "loss": 5.5551, + "step": 81400 + }, + { + "epoch": 0.35059249917140506, + "grad_norm": 1.626676321029663, + "learning_rate": 9.881914853677705e-05, + "loss": 4.9107, + "step": 81450 + }, + { + "epoch": 0.3508077186306877, + "grad_norm": 1.5845904350280762, + "learning_rate": 9.881768606859075e-05, + "loss": 5.1929, + "step": 81500 + }, + { + "epoch": 0.3510229380899703, + "grad_norm": 1.6003241539001465, + "learning_rate": 9.881622270617685e-05, + "loss": 5.3811, + "step": 81550 + }, + { + "epoch": 0.35123815754925297, + "grad_norm": 1.568265438079834, + "learning_rate": 9.881475844956217e-05, + "loss": 5.3006, + "step": 81600 + }, + { + "epoch": 0.35145337700853563, + "grad_norm": 1.9312773942947388, + "learning_rate": 9.881329329877351e-05, + "loss": 5.1367, + "step": 81650 + }, + { + "epoch": 0.35166859646781823, + "grad_norm": 3.4555766582489014, + "learning_rate": 9.881182725383772e-05, + "loss": 5.4699, + "step": 81700 + }, + { + "epoch": 0.3518838159271009, + "grad_norm": 2.000542640686035, + "learning_rate": 9.881036031478167e-05, + "loss": 5.699, + "step": 81750 + }, + { + "epoch": 0.3520990353863835, + "grad_norm": 0.6394448280334473, + "learning_rate": 9.880889248163218e-05, + "loss": 5.3307, + "step": 81800 + }, + { + "epoch": 0.35231425484566614, + "grad_norm": 3.0880236625671387, + "learning_rate": 9.88074237544162e-05, + "loss": 5.2599, + "step": 81850 + }, + { + "epoch": 0.35252947430494874, + "grad_norm": 0.6260001063346863, + "learning_rate": 9.88059541331606e-05, + "loss": 4.7994, + "step": 81900 + }, + { + "epoch": 0.3527446937642314, + "grad_norm": 1.2126617431640625, + "learning_rate": 9.880448361789229e-05, + "loss": 4.8357, + "step": 81950 + }, + { + "epoch": 0.352959913223514, + "grad_norm": 1.0415130853652954, + "learning_rate": 9.880301220863823e-05, + "loss": 5.3927, + "step": 82000 + }, + { + "epoch": 0.35317513268279666, + "grad_norm": 0.8887786269187927, + "learning_rate": 9.880153990542536e-05, + "loss": 4.9943, + "step": 82050 + }, + { + "epoch": 0.35339035214207926, + "grad_norm": 4.7298102378845215, + "learning_rate": 9.880006670828064e-05, + "loss": 5.3309, + "step": 82100 + }, + { + "epoch": 0.3536055716013619, + "grad_norm": 3.244288921356201, + "learning_rate": 9.879859261723108e-05, + "loss": 4.8511, + "step": 82150 + }, + { + "epoch": 0.3538207910606445, + "grad_norm": 2.089033365249634, + "learning_rate": 9.879711763230366e-05, + "loss": 5.026, + "step": 82200 + }, + { + "epoch": 0.35403601051992717, + "grad_norm": 1.8765615224838257, + "learning_rate": 9.879564175352542e-05, + "loss": 4.7204, + "step": 82250 + }, + { + "epoch": 0.3542512299792098, + "grad_norm": 2.7211356163024902, + "learning_rate": 9.879416498092338e-05, + "loss": 4.9627, + "step": 82300 + }, + { + "epoch": 0.3544664494384924, + "grad_norm": 2.4639430046081543, + "learning_rate": 9.87926873145246e-05, + "loss": 5.0027, + "step": 82350 + }, + { + "epoch": 0.3546816688977751, + "grad_norm": 1.1868442296981812, + "learning_rate": 9.879120875435612e-05, + "loss": 5.267, + "step": 82400 + }, + { + "epoch": 0.3548968883570577, + "grad_norm": 2.3272945880889893, + "learning_rate": 9.878972930044507e-05, + "loss": 5.1488, + "step": 82450 + }, + { + "epoch": 0.35511210781634034, + "grad_norm": 1.4607622623443604, + "learning_rate": 9.87882489528185e-05, + "loss": 5.3924, + "step": 82500 + }, + { + "epoch": 0.35532732727562294, + "grad_norm": 3.082193374633789, + "learning_rate": 9.878676771150358e-05, + "loss": 4.9192, + "step": 82550 + }, + { + "epoch": 0.3555425467349056, + "grad_norm": 2.463804006576538, + "learning_rate": 9.87852855765274e-05, + "loss": 4.8661, + "step": 82600 + }, + { + "epoch": 0.3557577661941882, + "grad_norm": 2.3448259830474854, + "learning_rate": 9.878380254791713e-05, + "loss": 5.238, + "step": 82650 + }, + { + "epoch": 0.35597298565347085, + "grad_norm": 5.145789623260498, + "learning_rate": 9.878231862569992e-05, + "loss": 4.5764, + "step": 82700 + }, + { + "epoch": 0.35618820511275345, + "grad_norm": 2.205789804458618, + "learning_rate": 9.878083380990297e-05, + "loss": 5.0361, + "step": 82750 + }, + { + "epoch": 0.3564034245720361, + "grad_norm": 1.6599546670913696, + "learning_rate": 9.877934810055347e-05, + "loss": 4.9786, + "step": 82800 + }, + { + "epoch": 0.3566186440313187, + "grad_norm": 0.8987308144569397, + "learning_rate": 9.877786149767862e-05, + "loss": 4.9433, + "step": 82850 + }, + { + "epoch": 0.35683386349060137, + "grad_norm": 0.9997716546058655, + "learning_rate": 9.877637400130568e-05, + "loss": 5.019, + "step": 82900 + }, + { + "epoch": 0.357049082949884, + "grad_norm": 2.619978666305542, + "learning_rate": 9.877488561146189e-05, + "loss": 4.6711, + "step": 82950 + }, + { + "epoch": 0.3572643024091666, + "grad_norm": 2.990072727203369, + "learning_rate": 9.87733963281745e-05, + "loss": 4.9908, + "step": 83000 + }, + { + "epoch": 0.3574795218684493, + "grad_norm": 5.413049697875977, + "learning_rate": 9.87719061514708e-05, + "loss": 5.6182, + "step": 83050 + }, + { + "epoch": 0.3576947413277319, + "grad_norm": 1.275639533996582, + "learning_rate": 9.877041508137807e-05, + "loss": 4.5406, + "step": 83100 + }, + { + "epoch": 0.35790996078701454, + "grad_norm": 2.497114896774292, + "learning_rate": 9.876892311792367e-05, + "loss": 4.7341, + "step": 83150 + }, + { + "epoch": 0.35812518024629714, + "grad_norm": 0.3808848261833191, + "learning_rate": 9.876743026113486e-05, + "loss": 4.4671, + "step": 83200 + }, + { + "epoch": 0.3583403997055798, + "grad_norm": 4.46933650970459, + "learning_rate": 9.876593651103905e-05, + "loss": 4.8758, + "step": 83250 + }, + { + "epoch": 0.3585556191648624, + "grad_norm": 3.0652835369110107, + "learning_rate": 9.876444186766356e-05, + "loss": 4.9633, + "step": 83300 + }, + { + "epoch": 0.35877083862414505, + "grad_norm": 3.3186824321746826, + "learning_rate": 9.876294633103579e-05, + "loss": 5.2514, + "step": 83350 + }, + { + "epoch": 0.35898605808342765, + "grad_norm": 0.7417575716972351, + "learning_rate": 9.876144990118312e-05, + "loss": 4.7518, + "step": 83400 + }, + { + "epoch": 0.3592012775427103, + "grad_norm": 2.0812876224517822, + "learning_rate": 9.875995257813297e-05, + "loss": 4.6479, + "step": 83450 + }, + { + "epoch": 0.3594164970019929, + "grad_norm": 2.018723964691162, + "learning_rate": 9.875845436191278e-05, + "loss": 5.1877, + "step": 83500 + }, + { + "epoch": 0.35963171646127556, + "grad_norm": 1.7534682750701904, + "learning_rate": 9.875695525254997e-05, + "loss": 5.5451, + "step": 83550 + }, + { + "epoch": 0.3598469359205582, + "grad_norm": 1.7650901079177856, + "learning_rate": 9.875545525007201e-05, + "loss": 5.1638, + "step": 83600 + }, + { + "epoch": 0.3600621553798408, + "grad_norm": 3.7409517765045166, + "learning_rate": 9.875395435450637e-05, + "loss": 4.9258, + "step": 83650 + }, + { + "epoch": 0.3602773748391235, + "grad_norm": 0.9671549797058105, + "learning_rate": 9.875245256588056e-05, + "loss": 5.3067, + "step": 83700 + }, + { + "epoch": 0.3604925942984061, + "grad_norm": 4.366477966308594, + "learning_rate": 9.875094988422208e-05, + "loss": 5.2109, + "step": 83750 + }, + { + "epoch": 0.36070781375768873, + "grad_norm": 2.0864574909210205, + "learning_rate": 9.874944630955844e-05, + "loss": 4.9811, + "step": 83800 + }, + { + "epoch": 0.36092303321697133, + "grad_norm": 5.57163667678833, + "learning_rate": 9.874794184191722e-05, + "loss": 4.8866, + "step": 83850 + }, + { + "epoch": 0.361138252676254, + "grad_norm": 1.8251820802688599, + "learning_rate": 9.874643648132594e-05, + "loss": 5.0791, + "step": 83900 + }, + { + "epoch": 0.3613534721355366, + "grad_norm": 2.681342840194702, + "learning_rate": 9.87449302278122e-05, + "loss": 5.1079, + "step": 83950 + }, + { + "epoch": 0.36156869159481925, + "grad_norm": 1.3181792497634888, + "learning_rate": 9.874342308140357e-05, + "loss": 5.2396, + "step": 84000 + }, + { + "epoch": 0.36156869159481925, + "eval_loss": 5.57422399520874, + "eval_runtime": 34.9526, + "eval_samples_per_second": 18.31, + "eval_steps_per_second": 9.155, + "eval_tts_loss": 6.543465211732926, + "step": 84000 + }, + { + "epoch": 0.36178391105410185, + "grad_norm": 1.5136290788650513, + "learning_rate": 9.874191504212769e-05, + "loss": 4.751, + "step": 84050 + }, + { + "epoch": 0.3619991305133845, + "grad_norm": 1.9622468948364258, + "learning_rate": 9.874040611001213e-05, + "loss": 5.2153, + "step": 84100 + }, + { + "epoch": 0.3622143499726671, + "grad_norm": 2.651881456375122, + "learning_rate": 9.873889628508459e-05, + "loss": 5.1633, + "step": 84150 + }, + { + "epoch": 0.36242956943194976, + "grad_norm": 2.0679988861083984, + "learning_rate": 9.873738556737268e-05, + "loss": 5.0442, + "step": 84200 + }, + { + "epoch": 0.3626447888912324, + "grad_norm": 0.7162752151489258, + "learning_rate": 9.873587395690412e-05, + "loss": 4.9139, + "step": 84250 + }, + { + "epoch": 0.362860008350515, + "grad_norm": 3.194774866104126, + "learning_rate": 9.873436145370653e-05, + "loss": 5.4229, + "step": 84300 + }, + { + "epoch": 0.3630752278097977, + "grad_norm": 2.1217708587646484, + "learning_rate": 9.87328480578077e-05, + "loss": 5.0167, + "step": 84350 + }, + { + "epoch": 0.3632904472690803, + "grad_norm": 1.8084437847137451, + "learning_rate": 9.873133376923527e-05, + "loss": 4.8321, + "step": 84400 + }, + { + "epoch": 0.36350566672836293, + "grad_norm": 2.5178580284118652, + "learning_rate": 9.872981858801704e-05, + "loss": 5.2723, + "step": 84450 + }, + { + "epoch": 0.36372088618764553, + "grad_norm": 0.8991774916648865, + "learning_rate": 9.872830251418073e-05, + "loss": 4.7785, + "step": 84500 + }, + { + "epoch": 0.3639361056469282, + "grad_norm": 2.1585869789123535, + "learning_rate": 9.872678554775413e-05, + "loss": 5.3259, + "step": 84550 + }, + { + "epoch": 0.3641513251062108, + "grad_norm": 1.2307955026626587, + "learning_rate": 9.872526768876502e-05, + "loss": 5.4471, + "step": 84600 + }, + { + "epoch": 0.36436654456549344, + "grad_norm": 2.9752938747406006, + "learning_rate": 9.872374893724121e-05, + "loss": 5.0021, + "step": 84650 + }, + { + "epoch": 0.36458176402477604, + "grad_norm": 0.501471996307373, + "learning_rate": 9.87222292932105e-05, + "loss": 5.3476, + "step": 84700 + }, + { + "epoch": 0.3647969834840587, + "grad_norm": 2.666351079940796, + "learning_rate": 9.872070875670074e-05, + "loss": 5.6079, + "step": 84750 + }, + { + "epoch": 0.3650122029433413, + "grad_norm": 1.2570743560791016, + "learning_rate": 9.871918732773979e-05, + "loss": 4.9414, + "step": 84800 + }, + { + "epoch": 0.36522742240262396, + "grad_norm": 1.7429338693618774, + "learning_rate": 9.871766500635552e-05, + "loss": 4.9484, + "step": 84850 + }, + { + "epoch": 0.36544264186190656, + "grad_norm": 3.1215717792510986, + "learning_rate": 9.87161417925758e-05, + "loss": 5.0658, + "step": 84900 + }, + { + "epoch": 0.3656578613211892, + "grad_norm": 1.0716313123703003, + "learning_rate": 9.871461768642854e-05, + "loss": 5.0235, + "step": 84950 + }, + { + "epoch": 0.36587308078047187, + "grad_norm": 2.3495306968688965, + "learning_rate": 9.871309268794165e-05, + "loss": 5.1774, + "step": 85000 + }, + { + "epoch": 0.36608830023975447, + "grad_norm": 3.437885284423828, + "learning_rate": 9.871156679714307e-05, + "loss": 4.8815, + "step": 85050 + }, + { + "epoch": 0.36630351969903713, + "grad_norm": 2.4288949966430664, + "learning_rate": 9.871004001406077e-05, + "loss": 5.4779, + "step": 85100 + }, + { + "epoch": 0.36651873915831973, + "grad_norm": 3.2811269760131836, + "learning_rate": 9.870851233872268e-05, + "loss": 5.1239, + "step": 85150 + }, + { + "epoch": 0.3667339586176024, + "grad_norm": 5.041448593139648, + "learning_rate": 9.870698377115681e-05, + "loss": 4.5925, + "step": 85200 + }, + { + "epoch": 0.366949178076885, + "grad_norm": 1.9875543117523193, + "learning_rate": 9.870545431139116e-05, + "loss": 5.1358, + "step": 85250 + }, + { + "epoch": 0.36716439753616764, + "grad_norm": 2.401449680328369, + "learning_rate": 9.870392395945374e-05, + "loss": 4.8718, + "step": 85300 + }, + { + "epoch": 0.36737961699545024, + "grad_norm": 1.704612135887146, + "learning_rate": 9.870239271537256e-05, + "loss": 5.3043, + "step": 85350 + }, + { + "epoch": 0.3675948364547329, + "grad_norm": 3.215646505355835, + "learning_rate": 9.870086057917571e-05, + "loss": 5.0055, + "step": 85400 + }, + { + "epoch": 0.3678100559140155, + "grad_norm": 2.0224668979644775, + "learning_rate": 9.869932755089123e-05, + "loss": 5.4084, + "step": 85450 + }, + { + "epoch": 0.36802527537329816, + "grad_norm": 4.207870960235596, + "learning_rate": 9.86977936305472e-05, + "loss": 5.453, + "step": 85500 + }, + { + "epoch": 0.36824049483258076, + "grad_norm": 2.621746778488159, + "learning_rate": 9.869625881817175e-05, + "loss": 5.1332, + "step": 85550 + }, + { + "epoch": 0.3684557142918634, + "grad_norm": 1.785874843597412, + "learning_rate": 9.869472311379295e-05, + "loss": 5.2263, + "step": 85600 + }, + { + "epoch": 0.36867093375114607, + "grad_norm": 2.058417558670044, + "learning_rate": 9.869318651743896e-05, + "loss": 4.8282, + "step": 85650 + }, + { + "epoch": 0.36888615321042867, + "grad_norm": 2.47823429107666, + "learning_rate": 9.86916490291379e-05, + "loss": 5.3131, + "step": 85700 + }, + { + "epoch": 0.3691013726697113, + "grad_norm": 3.0080959796905518, + "learning_rate": 9.869011064891797e-05, + "loss": 4.8853, + "step": 85750 + }, + { + "epoch": 0.3693165921289939, + "grad_norm": 2.636772632598877, + "learning_rate": 9.868857137680732e-05, + "loss": 4.6618, + "step": 85800 + }, + { + "epoch": 0.3695318115882766, + "grad_norm": 1.9575176239013672, + "learning_rate": 9.868703121283416e-05, + "loss": 4.9889, + "step": 85850 + }, + { + "epoch": 0.3697470310475592, + "grad_norm": 2.5548593997955322, + "learning_rate": 9.868549015702668e-05, + "loss": 5.4828, + "step": 85900 + }, + { + "epoch": 0.36996225050684184, + "grad_norm": 1.7595722675323486, + "learning_rate": 9.868394820941314e-05, + "loss": 4.9142, + "step": 85950 + }, + { + "epoch": 0.37017746996612444, + "grad_norm": 0.6299664378166199, + "learning_rate": 9.868240537002179e-05, + "loss": 5.1754, + "step": 86000 + }, + { + "epoch": 0.3703926894254071, + "grad_norm": 2.0729360580444336, + "learning_rate": 9.868086163888083e-05, + "loss": 4.7891, + "step": 86050 + }, + { + "epoch": 0.3706079088846897, + "grad_norm": 3.3389272689819336, + "learning_rate": 9.867931701601861e-05, + "loss": 5.0078, + "step": 86100 + }, + { + "epoch": 0.37082312834397235, + "grad_norm": 3.186583995819092, + "learning_rate": 9.86777715014634e-05, + "loss": 5.1303, + "step": 86150 + }, + { + "epoch": 0.37103834780325495, + "grad_norm": 1.4598468542099, + "learning_rate": 9.867622509524349e-05, + "loss": 5.2036, + "step": 86200 + }, + { + "epoch": 0.3712535672625376, + "grad_norm": 1.67514967918396, + "learning_rate": 9.867467779738721e-05, + "loss": 4.6396, + "step": 86250 + }, + { + "epoch": 0.37146878672182027, + "grad_norm": 4.2529683113098145, + "learning_rate": 9.867312960792294e-05, + "loss": 5.2066, + "step": 86300 + }, + { + "epoch": 0.37168400618110287, + "grad_norm": 1.7731200456619263, + "learning_rate": 9.867158052687899e-05, + "loss": 5.2348, + "step": 86350 + }, + { + "epoch": 0.3718992256403855, + "grad_norm": 0.8710344433784485, + "learning_rate": 9.867003055428378e-05, + "loss": 5.016, + "step": 86400 + }, + { + "epoch": 0.3721144450996681, + "grad_norm": 3.152829170227051, + "learning_rate": 9.866847969016567e-05, + "loss": 4.68, + "step": 86450 + }, + { + "epoch": 0.3723296645589508, + "grad_norm": 1.125133752822876, + "learning_rate": 9.866692793455308e-05, + "loss": 4.7659, + "step": 86500 + }, + { + "epoch": 0.3725448840182334, + "grad_norm": 3.5178985595703125, + "learning_rate": 9.866537528747444e-05, + "loss": 5.1892, + "step": 86550 + }, + { + "epoch": 0.37276010347751604, + "grad_norm": 2.7990353107452393, + "learning_rate": 9.866382174895817e-05, + "loss": 5.4303, + "step": 86600 + }, + { + "epoch": 0.37297532293679864, + "grad_norm": 2.555786371231079, + "learning_rate": 9.866226731903275e-05, + "loss": 5.3229, + "step": 86650 + }, + { + "epoch": 0.3731905423960813, + "grad_norm": 2.3630778789520264, + "learning_rate": 9.866071199772664e-05, + "loss": 4.7296, + "step": 86700 + }, + { + "epoch": 0.3734057618553639, + "grad_norm": 2.2691643238067627, + "learning_rate": 9.865915578506835e-05, + "loss": 5.2631, + "step": 86750 + }, + { + "epoch": 0.37362098131464655, + "grad_norm": 0.9444677233695984, + "learning_rate": 9.865759868108636e-05, + "loss": 5.145, + "step": 86800 + }, + { + "epoch": 0.37383620077392915, + "grad_norm": 3.8003478050231934, + "learning_rate": 9.86560406858092e-05, + "loss": 4.7549, + "step": 86850 + }, + { + "epoch": 0.3740514202332118, + "grad_norm": 0.9284152984619141, + "learning_rate": 9.865448179926542e-05, + "loss": 5.3649, + "step": 86900 + }, + { + "epoch": 0.37426663969249446, + "grad_norm": 1.779629111289978, + "learning_rate": 9.865292202148358e-05, + "loss": 5.0929, + "step": 86950 + }, + { + "epoch": 0.37448185915177706, + "grad_norm": 2.418083429336548, + "learning_rate": 9.865136135249223e-05, + "loss": 5.1134, + "step": 87000 + }, + { + "epoch": 0.37448185915177706, + "eval_loss": 5.5465264320373535, + "eval_runtime": 35.1104, + "eval_samples_per_second": 18.228, + "eval_steps_per_second": 9.114, + "eval_tts_loss": 6.532261712702735, + "step": 87000 + }, + { + "epoch": 0.3746970786110597, + "grad_norm": 2.1852855682373047, + "learning_rate": 9.864979979231996e-05, + "loss": 4.824, + "step": 87050 + }, + { + "epoch": 0.3749122980703423, + "grad_norm": 1.570412516593933, + "learning_rate": 9.864823734099538e-05, + "loss": 4.703, + "step": 87100 + }, + { + "epoch": 0.375127517529625, + "grad_norm": 2.174656391143799, + "learning_rate": 9.864667399854712e-05, + "loss": 5.1402, + "step": 87150 + }, + { + "epoch": 0.3753427369889076, + "grad_norm": 2.768665075302124, + "learning_rate": 9.864510976500381e-05, + "loss": 4.997, + "step": 87200 + }, + { + "epoch": 0.37555795644819023, + "grad_norm": 2.1193199157714844, + "learning_rate": 9.864354464039409e-05, + "loss": 5.0486, + "step": 87250 + }, + { + "epoch": 0.37577317590747283, + "grad_norm": 2.0332205295562744, + "learning_rate": 9.864197862474666e-05, + "loss": 4.8912, + "step": 87300 + }, + { + "epoch": 0.3759883953667555, + "grad_norm": 2.055063247680664, + "learning_rate": 9.864041171809018e-05, + "loss": 4.7814, + "step": 87350 + }, + { + "epoch": 0.3762036148260381, + "grad_norm": 2.2019028663635254, + "learning_rate": 9.863884392045336e-05, + "loss": 4.9128, + "step": 87400 + }, + { + "epoch": 0.37641883428532075, + "grad_norm": 1.89211905002594, + "learning_rate": 9.863727523186493e-05, + "loss": 4.9279, + "step": 87450 + }, + { + "epoch": 0.37663405374460335, + "grad_norm": 1.5809367895126343, + "learning_rate": 9.863570565235359e-05, + "loss": 4.7779, + "step": 87500 + }, + { + "epoch": 0.376849273203886, + "grad_norm": 1.6993454694747925, + "learning_rate": 9.863413518194813e-05, + "loss": 5.3102, + "step": 87550 + }, + { + "epoch": 0.37706449266316866, + "grad_norm": 2.286719560623169, + "learning_rate": 9.86325638206773e-05, + "loss": 5.0939, + "step": 87600 + }, + { + "epoch": 0.37727971212245126, + "grad_norm": 1.94940185546875, + "learning_rate": 9.863099156856987e-05, + "loss": 4.6216, + "step": 87650 + }, + { + "epoch": 0.3774949315817339, + "grad_norm": 2.4847705364227295, + "learning_rate": 9.862941842565466e-05, + "loss": 4.8528, + "step": 87700 + }, + { + "epoch": 0.3777101510410165, + "grad_norm": 1.2659863233566284, + "learning_rate": 9.862784439196049e-05, + "loss": 5.254, + "step": 87750 + }, + { + "epoch": 0.3779253705002992, + "grad_norm": 1.5410501956939697, + "learning_rate": 9.862626946751619e-05, + "loss": 4.8621, + "step": 87800 + }, + { + "epoch": 0.3781405899595818, + "grad_norm": 2.4417333602905273, + "learning_rate": 9.862469365235059e-05, + "loss": 4.9572, + "step": 87850 + }, + { + "epoch": 0.37835580941886443, + "grad_norm": 2.7407665252685547, + "learning_rate": 9.862311694649256e-05, + "loss": 5.4726, + "step": 87900 + }, + { + "epoch": 0.37857102887814703, + "grad_norm": 2.742138147354126, + "learning_rate": 9.8621539349971e-05, + "loss": 5.3632, + "step": 87950 + }, + { + "epoch": 0.3787862483374297, + "grad_norm": 2.7188079357147217, + "learning_rate": 9.861996086281482e-05, + "loss": 5.2104, + "step": 88000 + }, + { + "epoch": 0.3790014677967123, + "grad_norm": 2.3733513355255127, + "learning_rate": 9.86183814850529e-05, + "loss": 4.706, + "step": 88050 + }, + { + "epoch": 0.37921668725599494, + "grad_norm": 0.21637287735939026, + "learning_rate": 9.861680121671416e-05, + "loss": 4.8686, + "step": 88100 + }, + { + "epoch": 0.37943190671527754, + "grad_norm": 1.5723663568496704, + "learning_rate": 9.861522005782758e-05, + "loss": 5.2899, + "step": 88150 + }, + { + "epoch": 0.3796471261745602, + "grad_norm": 2.0000224113464355, + "learning_rate": 9.861363800842212e-05, + "loss": 4.9472, + "step": 88200 + }, + { + "epoch": 0.37986234563384286, + "grad_norm": 2.1894655227661133, + "learning_rate": 9.861205506852675e-05, + "loss": 5.1755, + "step": 88250 + }, + { + "epoch": 0.38007756509312546, + "grad_norm": 3.22061824798584, + "learning_rate": 9.861047123817048e-05, + "loss": 5.1109, + "step": 88300 + }, + { + "epoch": 0.3802927845524081, + "grad_norm": 2.4320688247680664, + "learning_rate": 9.860888651738229e-05, + "loss": 5.2547, + "step": 88350 + }, + { + "epoch": 0.3805080040116907, + "grad_norm": 2.9908690452575684, + "learning_rate": 9.860730090619124e-05, + "loss": 5.4276, + "step": 88400 + }, + { + "epoch": 0.38072322347097337, + "grad_norm": 1.9990092515945435, + "learning_rate": 9.860571440462635e-05, + "loss": 5.2761, + "step": 88450 + }, + { + "epoch": 0.38093844293025597, + "grad_norm": 2.2380356788635254, + "learning_rate": 9.860412701271672e-05, + "loss": 5.2736, + "step": 88500 + }, + { + "epoch": 0.38115366238953863, + "grad_norm": 2.020083427429199, + "learning_rate": 9.860253873049139e-05, + "loss": 5.0899, + "step": 88550 + }, + { + "epoch": 0.38136888184882123, + "grad_norm": 2.127946138381958, + "learning_rate": 9.860094955797946e-05, + "loss": 5.1375, + "step": 88600 + }, + { + "epoch": 0.3815841013081039, + "grad_norm": 0.44063398241996765, + "learning_rate": 9.859935949521006e-05, + "loss": 5.0354, + "step": 88650 + }, + { + "epoch": 0.3817993207673865, + "grad_norm": 1.1802786588668823, + "learning_rate": 9.859776854221227e-05, + "loss": 5.1822, + "step": 88700 + }, + { + "epoch": 0.38201454022666914, + "grad_norm": 2.618706464767456, + "learning_rate": 9.85961766990153e-05, + "loss": 4.9966, + "step": 88750 + }, + { + "epoch": 0.38222975968595174, + "grad_norm": 3.2433018684387207, + "learning_rate": 9.859458396564826e-05, + "loss": 4.7433, + "step": 88800 + }, + { + "epoch": 0.3824449791452344, + "grad_norm": 2.418917179107666, + "learning_rate": 9.859299034214033e-05, + "loss": 5.3384, + "step": 88850 + }, + { + "epoch": 0.382660198604517, + "grad_norm": 3.5978734493255615, + "learning_rate": 9.859139582852072e-05, + "loss": 5.0691, + "step": 88900 + }, + { + "epoch": 0.38287541806379966, + "grad_norm": 2.61253023147583, + "learning_rate": 9.858980042481863e-05, + "loss": 4.7016, + "step": 88950 + }, + { + "epoch": 0.3830906375230823, + "grad_norm": 2.1786916255950928, + "learning_rate": 9.858820413106327e-05, + "loss": 4.8875, + "step": 89000 + }, + { + "epoch": 0.3833058569823649, + "grad_norm": 4.601500511169434, + "learning_rate": 9.85866069472839e-05, + "loss": 5.3947, + "step": 89050 + }, + { + "epoch": 0.38352107644164757, + "grad_norm": 0.5190704464912415, + "learning_rate": 9.858500887350977e-05, + "loss": 4.6871, + "step": 89100 + }, + { + "epoch": 0.38373629590093017, + "grad_norm": 4.027706623077393, + "learning_rate": 9.858340990977015e-05, + "loss": 5.472, + "step": 89150 + }, + { + "epoch": 0.3839515153602128, + "grad_norm": 2.399104118347168, + "learning_rate": 9.858181005609434e-05, + "loss": 4.8609, + "step": 89200 + }, + { + "epoch": 0.3841667348194954, + "grad_norm": 0.5637953281402588, + "learning_rate": 9.858020931251163e-05, + "loss": 4.5653, + "step": 89250 + }, + { + "epoch": 0.3843819542787781, + "grad_norm": 3.101699113845825, + "learning_rate": 9.857860767905134e-05, + "loss": 5.4332, + "step": 89300 + }, + { + "epoch": 0.3845971737380607, + "grad_norm": 2.8481950759887695, + "learning_rate": 9.857700515574285e-05, + "loss": 5.2032, + "step": 89350 + }, + { + "epoch": 0.38481239319734334, + "grad_norm": 4.203928470611572, + "learning_rate": 9.857540174261544e-05, + "loss": 5.1648, + "step": 89400 + }, + { + "epoch": 0.38502761265662594, + "grad_norm": 2.5223865509033203, + "learning_rate": 9.857379743969855e-05, + "loss": 4.5888, + "step": 89450 + }, + { + "epoch": 0.3852428321159086, + "grad_norm": 3.072415351867676, + "learning_rate": 9.857219224702152e-05, + "loss": 5.1484, + "step": 89500 + }, + { + "epoch": 0.3854580515751912, + "grad_norm": 1.0528713464736938, + "learning_rate": 9.857058616461378e-05, + "loss": 4.877, + "step": 89550 + }, + { + "epoch": 0.38567327103447385, + "grad_norm": 1.8131152391433716, + "learning_rate": 9.856897919250476e-05, + "loss": 5.0465, + "step": 89600 + }, + { + "epoch": 0.3858884904937565, + "grad_norm": 1.558307409286499, + "learning_rate": 9.856737133072387e-05, + "loss": 4.4244, + "step": 89650 + }, + { + "epoch": 0.3861037099530391, + "grad_norm": 2.1213862895965576, + "learning_rate": 9.856576257930057e-05, + "loss": 4.9559, + "step": 89700 + }, + { + "epoch": 0.38631892941232177, + "grad_norm": 1.1613202095031738, + "learning_rate": 9.856415293826432e-05, + "loss": 4.818, + "step": 89750 + }, + { + "epoch": 0.38653414887160437, + "grad_norm": 1.754496693611145, + "learning_rate": 9.856254240764462e-05, + "loss": 5.0454, + "step": 89800 + }, + { + "epoch": 0.386749368330887, + "grad_norm": 3.1636641025543213, + "learning_rate": 9.856093098747097e-05, + "loss": 5.5041, + "step": 89850 + }, + { + "epoch": 0.3869645877901696, + "grad_norm": 4.040655612945557, + "learning_rate": 9.855931867777288e-05, + "loss": 5.281, + "step": 89900 + }, + { + "epoch": 0.3871798072494523, + "grad_norm": 2.511167049407959, + "learning_rate": 9.85577054785799e-05, + "loss": 5.0465, + "step": 89950 + }, + { + "epoch": 0.3873950267087349, + "grad_norm": 0.8693875074386597, + "learning_rate": 9.855609138992155e-05, + "loss": 5.058, + "step": 90000 + }, + { + "epoch": 0.3873950267087349, + "eval_loss": 5.517374515533447, + "eval_runtime": 35.0241, + "eval_samples_per_second": 18.273, + "eval_steps_per_second": 9.137, + "eval_tts_loss": 6.546801874017835, + "step": 90000 + }, + { + "epoch": 0.38761024616801754, + "grad_norm": 1.3565654754638672, + "learning_rate": 9.855447641182742e-05, + "loss": 5.091, + "step": 90050 + }, + { + "epoch": 0.38782546562730014, + "grad_norm": 3.0673940181732178, + "learning_rate": 9.855286054432708e-05, + "loss": 5.2379, + "step": 90100 + }, + { + "epoch": 0.3880406850865828, + "grad_norm": 1.929660677909851, + "learning_rate": 9.855124378745016e-05, + "loss": 4.9912, + "step": 90150 + }, + { + "epoch": 0.3882559045458654, + "grad_norm": 1.5866032838821411, + "learning_rate": 9.854962614122622e-05, + "loss": 5.0546, + "step": 90200 + }, + { + "epoch": 0.38847112400514805, + "grad_norm": 0.5423416495323181, + "learning_rate": 9.854800760568494e-05, + "loss": 4.6871, + "step": 90250 + }, + { + "epoch": 0.3886863434644307, + "grad_norm": 2.9567508697509766, + "learning_rate": 9.854638818085595e-05, + "loss": 5.163, + "step": 90300 + }, + { + "epoch": 0.3889015629237133, + "grad_norm": 1.860320806503296, + "learning_rate": 9.854476786676892e-05, + "loss": 4.724, + "step": 90350 + }, + { + "epoch": 0.38911678238299596, + "grad_norm": 1.6422196626663208, + "learning_rate": 9.85431466634535e-05, + "loss": 5.0527, + "step": 90400 + }, + { + "epoch": 0.38933200184227856, + "grad_norm": 2.4775640964508057, + "learning_rate": 9.854152457093944e-05, + "loss": 4.8734, + "step": 90450 + }, + { + "epoch": 0.3895472213015612, + "grad_norm": 1.669533610343933, + "learning_rate": 9.853990158925642e-05, + "loss": 4.6713, + "step": 90500 + }, + { + "epoch": 0.3897624407608438, + "grad_norm": 1.7533091306686401, + "learning_rate": 9.853827771843416e-05, + "loss": 4.6256, + "step": 90550 + }, + { + "epoch": 0.3899776602201265, + "grad_norm": 1.7487502098083496, + "learning_rate": 9.853665295850242e-05, + "loss": 5.6339, + "step": 90600 + }, + { + "epoch": 0.3901928796794091, + "grad_norm": 2.295142412185669, + "learning_rate": 9.853502730949096e-05, + "loss": 4.9436, + "step": 90650 + }, + { + "epoch": 0.39040809913869173, + "grad_norm": 0.7481253743171692, + "learning_rate": 9.853340077142958e-05, + "loss": 5.1321, + "step": 90700 + }, + { + "epoch": 0.39062331859797433, + "grad_norm": 1.3383275270462036, + "learning_rate": 9.853177334434803e-05, + "loss": 4.9275, + "step": 90750 + }, + { + "epoch": 0.390838538057257, + "grad_norm": 1.7193812131881714, + "learning_rate": 9.853014502827615e-05, + "loss": 5.1338, + "step": 90800 + }, + { + "epoch": 0.3910537575165396, + "grad_norm": 1.210848331451416, + "learning_rate": 9.852851582324377e-05, + "loss": 5.1035, + "step": 90850 + }, + { + "epoch": 0.39126897697582225, + "grad_norm": 1.6387708187103271, + "learning_rate": 9.852688572928073e-05, + "loss": 5.4046, + "step": 90900 + }, + { + "epoch": 0.3914841964351049, + "grad_norm": 1.7970072031021118, + "learning_rate": 9.852525474641687e-05, + "loss": 4.374, + "step": 90950 + }, + { + "epoch": 0.3916994158943875, + "grad_norm": 4.133056640625, + "learning_rate": 9.852362287468208e-05, + "loss": 4.5321, + "step": 91000 + }, + { + "epoch": 0.39191463535367016, + "grad_norm": 2.3880555629730225, + "learning_rate": 9.852199011410625e-05, + "loss": 4.9097, + "step": 91050 + }, + { + "epoch": 0.39212985481295276, + "grad_norm": 2.9222450256347656, + "learning_rate": 9.852035646471931e-05, + "loss": 5.2708, + "step": 91100 + }, + { + "epoch": 0.3923450742722354, + "grad_norm": 1.6426830291748047, + "learning_rate": 9.851872192655115e-05, + "loss": 4.7694, + "step": 91150 + }, + { + "epoch": 0.392560293731518, + "grad_norm": 2.466383457183838, + "learning_rate": 9.851708649963174e-05, + "loss": 4.836, + "step": 91200 + }, + { + "epoch": 0.3927755131908007, + "grad_norm": 1.1893091201782227, + "learning_rate": 9.8515450183991e-05, + "loss": 5.3118, + "step": 91250 + }, + { + "epoch": 0.3929907326500833, + "grad_norm": 2.882634401321411, + "learning_rate": 9.851381297965895e-05, + "loss": 5.1108, + "step": 91300 + }, + { + "epoch": 0.39320595210936593, + "grad_norm": 1.8085002899169922, + "learning_rate": 9.851217488666554e-05, + "loss": 4.9389, + "step": 91350 + }, + { + "epoch": 0.39342117156864853, + "grad_norm": 3.4653775691986084, + "learning_rate": 9.85105359050408e-05, + "loss": 5.1794, + "step": 91400 + }, + { + "epoch": 0.3936363910279312, + "grad_norm": 2.403564453125, + "learning_rate": 9.850889603481475e-05, + "loss": 5.2385, + "step": 91450 + }, + { + "epoch": 0.3938516104872138, + "grad_norm": 1.6444262266159058, + "learning_rate": 9.850725527601743e-05, + "loss": 5.2763, + "step": 91500 + }, + { + "epoch": 0.39406682994649644, + "grad_norm": 2.808849811553955, + "learning_rate": 9.850561362867887e-05, + "loss": 5.2024, + "step": 91550 + }, + { + "epoch": 0.3942820494057791, + "grad_norm": 1.587497353553772, + "learning_rate": 9.850397109282917e-05, + "loss": 4.5432, + "step": 91600 + }, + { + "epoch": 0.3944972688650617, + "grad_norm": 1.9520387649536133, + "learning_rate": 9.85023276684984e-05, + "loss": 5.4927, + "step": 91650 + }, + { + "epoch": 0.39471248832434436, + "grad_norm": 1.8448737859725952, + "learning_rate": 9.85006833557167e-05, + "loss": 4.9559, + "step": 91700 + }, + { + "epoch": 0.39492770778362696, + "grad_norm": 0.8822876811027527, + "learning_rate": 9.849903815451413e-05, + "loss": 5.0114, + "step": 91750 + }, + { + "epoch": 0.3951429272429096, + "grad_norm": 2.2746846675872803, + "learning_rate": 9.849739206492087e-05, + "loss": 5.228, + "step": 91800 + }, + { + "epoch": 0.3953581467021922, + "grad_norm": 2.293971300125122, + "learning_rate": 9.849574508696706e-05, + "loss": 5.4089, + "step": 91850 + }, + { + "epoch": 0.39557336616147487, + "grad_norm": 2.3590118885040283, + "learning_rate": 9.849409722068286e-05, + "loss": 5.2588, + "step": 91900 + }, + { + "epoch": 0.39578858562075747, + "grad_norm": 2.7524807453155518, + "learning_rate": 9.849244846609849e-05, + "loss": 5.2064, + "step": 91950 + }, + { + "epoch": 0.3960038050800401, + "grad_norm": 1.752387285232544, + "learning_rate": 9.84907988232441e-05, + "loss": 5.137, + "step": 92000 + }, + { + "epoch": 0.39621902453932273, + "grad_norm": 3.0871236324310303, + "learning_rate": 9.848914829214995e-05, + "loss": 5.4263, + "step": 92050 + }, + { + "epoch": 0.3964342439986054, + "grad_norm": 1.6178125143051147, + "learning_rate": 9.848749687284625e-05, + "loss": 5.0311, + "step": 92100 + }, + { + "epoch": 0.396649463457888, + "grad_norm": 1.0070396661758423, + "learning_rate": 9.848584456536325e-05, + "loss": 4.3914, + "step": 92150 + }, + { + "epoch": 0.39686468291717064, + "grad_norm": 1.5877410173416138, + "learning_rate": 9.848419136973123e-05, + "loss": 4.9439, + "step": 92200 + }, + { + "epoch": 0.39707990237645324, + "grad_norm": 2.147219181060791, + "learning_rate": 9.848253728598047e-05, + "loss": 5.254, + "step": 92250 + }, + { + "epoch": 0.3972951218357359, + "grad_norm": 0.9695080518722534, + "learning_rate": 9.848088231414126e-05, + "loss": 5.317, + "step": 92300 + }, + { + "epoch": 0.39751034129501855, + "grad_norm": 0.9261944890022278, + "learning_rate": 9.847922645424392e-05, + "loss": 4.5015, + "step": 92350 + }, + { + "epoch": 0.39772556075430116, + "grad_norm": 2.5680551528930664, + "learning_rate": 9.847756970631878e-05, + "loss": 5.4079, + "step": 92400 + }, + { + "epoch": 0.3979407802135838, + "grad_norm": 1.7601152658462524, + "learning_rate": 9.84759120703962e-05, + "loss": 4.7971, + "step": 92450 + }, + { + "epoch": 0.3981559996728664, + "grad_norm": 2.4024336338043213, + "learning_rate": 9.847425354650655e-05, + "loss": 4.7692, + "step": 92500 + }, + { + "epoch": 0.39837121913214907, + "grad_norm": 2.880866765975952, + "learning_rate": 9.847259413468016e-05, + "loss": 5.4401, + "step": 92550 + }, + { + "epoch": 0.39858643859143167, + "grad_norm": 2.2878313064575195, + "learning_rate": 9.847093383494748e-05, + "loss": 5.2404, + "step": 92600 + }, + { + "epoch": 0.3988016580507143, + "grad_norm": 1.8065351247787476, + "learning_rate": 9.84692726473389e-05, + "loss": 5.0521, + "step": 92650 + }, + { + "epoch": 0.3990168775099969, + "grad_norm": 1.0057268142700195, + "learning_rate": 9.846761057188486e-05, + "loss": 5.0535, + "step": 92700 + }, + { + "epoch": 0.3992320969692796, + "grad_norm": 2.7567572593688965, + "learning_rate": 9.846594760861579e-05, + "loss": 5.0096, + "step": 92750 + }, + { + "epoch": 0.3994473164285622, + "grad_norm": 3.222585916519165, + "learning_rate": 9.846428375756216e-05, + "loss": 5.1937, + "step": 92800 + }, + { + "epoch": 0.39966253588784484, + "grad_norm": 2.0777244567871094, + "learning_rate": 9.846261901875446e-05, + "loss": 5.1912, + "step": 92850 + }, + { + "epoch": 0.39987775534712744, + "grad_norm": 1.0652265548706055, + "learning_rate": 9.846095339222316e-05, + "loss": 5.052, + "step": 92900 + }, + { + "epoch": 0.4000929748064101, + "grad_norm": 2.776710033416748, + "learning_rate": 9.845928687799879e-05, + "loss": 4.6549, + "step": 92950 + }, + { + "epoch": 0.40030819426569275, + "grad_norm": 3.127589225769043, + "learning_rate": 9.845761947611187e-05, + "loss": 4.7442, + "step": 93000 + }, + { + "epoch": 0.40030819426569275, + "eval_loss": 5.497459411621094, + "eval_runtime": 34.96, + "eval_samples_per_second": 18.307, + "eval_steps_per_second": 9.153, + "eval_tts_loss": 6.57450410887732, + "step": 93000 + }, + { + "epoch": 0.40052341372497535, + "grad_norm": 4.565796375274658, + "learning_rate": 9.845595118659294e-05, + "loss": 5.1072, + "step": 93050 + }, + { + "epoch": 0.400738633184258, + "grad_norm": 2.0911166667938232, + "learning_rate": 9.845428200947257e-05, + "loss": 5.0767, + "step": 93100 + }, + { + "epoch": 0.4009538526435406, + "grad_norm": 2.3617923259735107, + "learning_rate": 9.845261194478132e-05, + "loss": 5.0681, + "step": 93150 + }, + { + "epoch": 0.40116907210282327, + "grad_norm": 1.57003915309906, + "learning_rate": 9.845094099254978e-05, + "loss": 4.9043, + "step": 93200 + }, + { + "epoch": 0.40138429156210587, + "grad_norm": 1.905303716659546, + "learning_rate": 9.844926915280858e-05, + "loss": 4.8755, + "step": 93250 + }, + { + "epoch": 0.4015995110213885, + "grad_norm": 1.9363036155700684, + "learning_rate": 9.844759642558833e-05, + "loss": 4.7338, + "step": 93300 + }, + { + "epoch": 0.4018147304806711, + "grad_norm": 2.7584245204925537, + "learning_rate": 9.844592281091966e-05, + "loss": 4.7133, + "step": 93350 + }, + { + "epoch": 0.4020299499399538, + "grad_norm": 0.914656400680542, + "learning_rate": 9.844424830883325e-05, + "loss": 5.1626, + "step": 93400 + }, + { + "epoch": 0.4022451693992364, + "grad_norm": 2.9613099098205566, + "learning_rate": 9.844257291935977e-05, + "loss": 5.0774, + "step": 93450 + }, + { + "epoch": 0.40246038885851904, + "grad_norm": 1.5476586818695068, + "learning_rate": 9.844089664252989e-05, + "loss": 4.3066, + "step": 93500 + }, + { + "epoch": 0.40267560831780164, + "grad_norm": 1.090958595275879, + "learning_rate": 9.843921947837431e-05, + "loss": 5.0351, + "step": 93550 + }, + { + "epoch": 0.4028908277770843, + "grad_norm": 1.534397840499878, + "learning_rate": 9.84375414269238e-05, + "loss": 4.8722, + "step": 93600 + }, + { + "epoch": 0.40310604723636695, + "grad_norm": 1.7665855884552002, + "learning_rate": 9.843586248820904e-05, + "loss": 4.68, + "step": 93650 + }, + { + "epoch": 0.40332126669564955, + "grad_norm": 2.797039747238159, + "learning_rate": 9.843418266226083e-05, + "loss": 4.9356, + "step": 93700 + }, + { + "epoch": 0.4035364861549322, + "grad_norm": 2.285006284713745, + "learning_rate": 9.843250194910992e-05, + "loss": 5.0594, + "step": 93750 + }, + { + "epoch": 0.4037517056142148, + "grad_norm": 3.301145076751709, + "learning_rate": 9.843082034878707e-05, + "loss": 5.2162, + "step": 93800 + }, + { + "epoch": 0.40396692507349746, + "grad_norm": 2.6975786685943604, + "learning_rate": 9.842913786132314e-05, + "loss": 4.9172, + "step": 93850 + }, + { + "epoch": 0.40418214453278006, + "grad_norm": 3.4499709606170654, + "learning_rate": 9.842745448674892e-05, + "loss": 5.0881, + "step": 93900 + }, + { + "epoch": 0.4043973639920627, + "grad_norm": 1.884997844696045, + "learning_rate": 9.842577022509523e-05, + "loss": 5.1793, + "step": 93950 + }, + { + "epoch": 0.4046125834513453, + "grad_norm": 2.178675413131714, + "learning_rate": 9.842408507639294e-05, + "loss": 4.7296, + "step": 94000 + }, + { + "epoch": 0.404827802910628, + "grad_norm": 2.096127510070801, + "learning_rate": 9.842239904067291e-05, + "loss": 4.5126, + "step": 94050 + }, + { + "epoch": 0.4050430223699106, + "grad_norm": 1.0641096830368042, + "learning_rate": 9.842071211796606e-05, + "loss": 4.7315, + "step": 94100 + }, + { + "epoch": 0.40525824182919323, + "grad_norm": 4.1841139793396, + "learning_rate": 9.841902430830323e-05, + "loss": 5.0209, + "step": 94150 + }, + { + "epoch": 0.40547346128847583, + "grad_norm": 2.7262396812438965, + "learning_rate": 9.841733561171538e-05, + "loss": 4.5069, + "step": 94200 + }, + { + "epoch": 0.4056886807477585, + "grad_norm": 3.6551127433776855, + "learning_rate": 9.841564602823343e-05, + "loss": 4.9307, + "step": 94250 + }, + { + "epoch": 0.40590390020704115, + "grad_norm": 1.7723350524902344, + "learning_rate": 9.841395555788833e-05, + "loss": 5.1276, + "step": 94300 + }, + { + "epoch": 0.40611911966632375, + "grad_norm": 2.275730609893799, + "learning_rate": 9.841226420071105e-05, + "loss": 5.3453, + "step": 94350 + }, + { + "epoch": 0.4063343391256064, + "grad_norm": 2.272017240524292, + "learning_rate": 9.841057195673256e-05, + "loss": 4.6059, + "step": 94400 + }, + { + "epoch": 0.406549558584889, + "grad_norm": 2.593118190765381, + "learning_rate": 9.840887882598387e-05, + "loss": 4.8579, + "step": 94450 + }, + { + "epoch": 0.40676477804417166, + "grad_norm": 2.3674066066741943, + "learning_rate": 9.840718480849598e-05, + "loss": 4.6893, + "step": 94500 + }, + { + "epoch": 0.40697999750345426, + "grad_norm": 1.3122670650482178, + "learning_rate": 9.840548990429992e-05, + "loss": 4.8012, + "step": 94550 + }, + { + "epoch": 0.4071952169627369, + "grad_norm": 3.7641441822052, + "learning_rate": 9.840379411342678e-05, + "loss": 5.4147, + "step": 94600 + }, + { + "epoch": 0.4074104364220195, + "grad_norm": 2.9251091480255127, + "learning_rate": 9.840209743590755e-05, + "loss": 4.8114, + "step": 94650 + }, + { + "epoch": 0.4076256558813022, + "grad_norm": 1.7514009475708008, + "learning_rate": 9.840039987177338e-05, + "loss": 4.7025, + "step": 94700 + }, + { + "epoch": 0.4078408753405848, + "grad_norm": 0.6977071762084961, + "learning_rate": 9.839870142105531e-05, + "loss": 5.6137, + "step": 94750 + }, + { + "epoch": 0.40805609479986743, + "grad_norm": 1.9833108186721802, + "learning_rate": 9.83970020837845e-05, + "loss": 4.7518, + "step": 94800 + }, + { + "epoch": 0.40827131425915003, + "grad_norm": 1.8142520189285278, + "learning_rate": 9.839530185999204e-05, + "loss": 4.905, + "step": 94850 + }, + { + "epoch": 0.4084865337184327, + "grad_norm": 1.0312514305114746, + "learning_rate": 9.83936007497091e-05, + "loss": 4.8751, + "step": 94900 + }, + { + "epoch": 0.40870175317771534, + "grad_norm": 2.8556413650512695, + "learning_rate": 9.83918987529668e-05, + "loss": 5.4708, + "step": 94950 + }, + { + "epoch": 0.40891697263699794, + "grad_norm": 2.535505533218384, + "learning_rate": 9.839019586979636e-05, + "loss": 4.7104, + "step": 95000 + }, + { + "epoch": 0.4091321920962806, + "grad_norm": 1.6215523481369019, + "learning_rate": 9.838849210022897e-05, + "loss": 5.4617, + "step": 95050 + }, + { + "epoch": 0.4093474115555632, + "grad_norm": 3.9489247798919678, + "learning_rate": 9.838678744429581e-05, + "loss": 5.3932, + "step": 95100 + }, + { + "epoch": 0.40956263101484586, + "grad_norm": 1.2710689306259155, + "learning_rate": 9.838508190202813e-05, + "loss": 4.7224, + "step": 95150 + }, + { + "epoch": 0.40977785047412846, + "grad_norm": 3.835360288619995, + "learning_rate": 9.838337547345717e-05, + "loss": 4.9559, + "step": 95200 + }, + { + "epoch": 0.4099930699334111, + "grad_norm": 2.807403087615967, + "learning_rate": 9.838166815861418e-05, + "loss": 5.0695, + "step": 95250 + }, + { + "epoch": 0.4102082893926937, + "grad_norm": 3.4930193424224854, + "learning_rate": 9.837995995753042e-05, + "loss": 5.0059, + "step": 95300 + }, + { + "epoch": 0.41042350885197637, + "grad_norm": 1.6570701599121094, + "learning_rate": 9.837825087023722e-05, + "loss": 5.2183, + "step": 95350 + }, + { + "epoch": 0.41063872831125897, + "grad_norm": 2.1693122386932373, + "learning_rate": 9.837654089676585e-05, + "loss": 4.746, + "step": 95400 + }, + { + "epoch": 0.4108539477705416, + "grad_norm": 2.098546266555786, + "learning_rate": 9.837483003714765e-05, + "loss": 5.3225, + "step": 95450 + }, + { + "epoch": 0.41106916722982423, + "grad_norm": 0.6822731494903564, + "learning_rate": 9.837311829141394e-05, + "loss": 4.8506, + "step": 95500 + }, + { + "epoch": 0.4112843866891069, + "grad_norm": 1.6425849199295044, + "learning_rate": 9.837140565959611e-05, + "loss": 4.9616, + "step": 95550 + }, + { + "epoch": 0.4114996061483895, + "grad_norm": 1.7158772945404053, + "learning_rate": 9.836969214172549e-05, + "loss": 4.9711, + "step": 95600 + }, + { + "epoch": 0.41171482560767214, + "grad_norm": 0.3179967999458313, + "learning_rate": 9.83679777378335e-05, + "loss": 4.7569, + "step": 95650 + }, + { + "epoch": 0.4119300450669548, + "grad_norm": 1.8137924671173096, + "learning_rate": 9.836626244795153e-05, + "loss": 5.4063, + "step": 95700 + }, + { + "epoch": 0.4121452645262374, + "grad_norm": 1.4340026378631592, + "learning_rate": 9.836454627211101e-05, + "loss": 5.0172, + "step": 95750 + }, + { + "epoch": 0.41236048398552005, + "grad_norm": 2.8278579711914062, + "learning_rate": 9.836282921034337e-05, + "loss": 5.0598, + "step": 95800 + }, + { + "epoch": 0.41257570344480266, + "grad_norm": 0.9528377056121826, + "learning_rate": 9.836111126268006e-05, + "loss": 4.9894, + "step": 95850 + }, + { + "epoch": 0.4127909229040853, + "grad_norm": 2.1655685901641846, + "learning_rate": 9.835939242915254e-05, + "loss": 5.016, + "step": 95900 + }, + { + "epoch": 0.4130061423633679, + "grad_norm": 2.120915651321411, + "learning_rate": 9.835767270979231e-05, + "loss": 5.0279, + "step": 95950 + }, + { + "epoch": 0.41322136182265057, + "grad_norm": 3.4390125274658203, + "learning_rate": 9.835595210463088e-05, + "loss": 5.0955, + "step": 96000 + }, + { + "epoch": 0.41322136182265057, + "eval_loss": 5.488664150238037, + "eval_runtime": 34.9615, + "eval_samples_per_second": 18.306, + "eval_steps_per_second": 9.153, + "eval_tts_loss": 6.589798867600411, + "step": 96000 + }, + { + "epoch": 0.41343658128193317, + "grad_norm": 2.383037567138672, + "learning_rate": 9.835423061369975e-05, + "loss": 4.7761, + "step": 96050 + }, + { + "epoch": 0.4136518007412158, + "grad_norm": 2.9497435092926025, + "learning_rate": 9.835250823703045e-05, + "loss": 5.3335, + "step": 96100 + }, + { + "epoch": 0.4138670202004984, + "grad_norm": 2.384578227996826, + "learning_rate": 9.835078497465454e-05, + "loss": 4.833, + "step": 96150 + }, + { + "epoch": 0.4140822396597811, + "grad_norm": 2.165750503540039, + "learning_rate": 9.834906082660359e-05, + "loss": 5.1608, + "step": 96200 + }, + { + "epoch": 0.4142974591190637, + "grad_norm": 2.2659049034118652, + "learning_rate": 9.834733579290918e-05, + "loss": 5.334, + "step": 96250 + }, + { + "epoch": 0.41451267857834634, + "grad_norm": 1.8610354661941528, + "learning_rate": 9.83456098736029e-05, + "loss": 4.8135, + "step": 96300 + }, + { + "epoch": 0.414727898037629, + "grad_norm": 1.1025253534317017, + "learning_rate": 9.834388306871637e-05, + "loss": 4.8031, + "step": 96350 + }, + { + "epoch": 0.4149431174969116, + "grad_norm": 0.40176263451576233, + "learning_rate": 9.834215537828122e-05, + "loss": 5.2147, + "step": 96400 + }, + { + "epoch": 0.41515833695619425, + "grad_norm": 2.329707384109497, + "learning_rate": 9.834042680232911e-05, + "loss": 5.1472, + "step": 96450 + }, + { + "epoch": 0.41537355641547685, + "grad_norm": 1.7552810907363892, + "learning_rate": 9.833869734089168e-05, + "loss": 4.7359, + "step": 96500 + }, + { + "epoch": 0.4155887758747595, + "grad_norm": 0.9936187863349915, + "learning_rate": 9.833696699400064e-05, + "loss": 4.5705, + "step": 96550 + }, + { + "epoch": 0.4158039953340421, + "grad_norm": 1.991598129272461, + "learning_rate": 9.833523576168764e-05, + "loss": 5.1525, + "step": 96600 + }, + { + "epoch": 0.41601921479332477, + "grad_norm": 2.5870285034179688, + "learning_rate": 9.833350364398444e-05, + "loss": 4.6331, + "step": 96650 + }, + { + "epoch": 0.41623443425260737, + "grad_norm": 1.5752900838851929, + "learning_rate": 9.833177064092274e-05, + "loss": 4.6894, + "step": 96700 + }, + { + "epoch": 0.41644965371189, + "grad_norm": 2.8123462200164795, + "learning_rate": 9.83300367525343e-05, + "loss": 4.7346, + "step": 96750 + }, + { + "epoch": 0.4166648731711726, + "grad_norm": 2.050963878631592, + "learning_rate": 9.832830197885087e-05, + "loss": 5.2055, + "step": 96800 + }, + { + "epoch": 0.4168800926304553, + "grad_norm": 0.5777395963668823, + "learning_rate": 9.832656631990424e-05, + "loss": 4.8035, + "step": 96850 + }, + { + "epoch": 0.4170953120897379, + "grad_norm": 1.177029013633728, + "learning_rate": 9.832482977572619e-05, + "loss": 4.8294, + "step": 96900 + }, + { + "epoch": 0.41731053154902054, + "grad_norm": 3.075434446334839, + "learning_rate": 9.832309234634853e-05, + "loss": 4.825, + "step": 96950 + }, + { + "epoch": 0.4175257510083032, + "grad_norm": 2.057128429412842, + "learning_rate": 9.83213540318031e-05, + "loss": 4.7002, + "step": 97000 + }, + { + "epoch": 0.4177409704675858, + "grad_norm": 3.403470039367676, + "learning_rate": 9.831961483212172e-05, + "loss": 4.9918, + "step": 97050 + }, + { + "epoch": 0.41795618992686845, + "grad_norm": 2.3243556022644043, + "learning_rate": 9.831787474733627e-05, + "loss": 4.6211, + "step": 97100 + }, + { + "epoch": 0.41817140938615105, + "grad_norm": 3.2576467990875244, + "learning_rate": 9.831613377747861e-05, + "loss": 5.1587, + "step": 97150 + }, + { + "epoch": 0.4183866288454337, + "grad_norm": 4.342401027679443, + "learning_rate": 9.831439192258063e-05, + "loss": 5.2498, + "step": 97200 + }, + { + "epoch": 0.4186018483047163, + "grad_norm": 1.5030683279037476, + "learning_rate": 9.831264918267424e-05, + "loss": 5.2121, + "step": 97250 + }, + { + "epoch": 0.41881706776399896, + "grad_norm": 5.904765605926514, + "learning_rate": 9.831090555779136e-05, + "loss": 5.183, + "step": 97300 + }, + { + "epoch": 0.41903228722328156, + "grad_norm": 4.892551898956299, + "learning_rate": 9.830916104796394e-05, + "loss": 4.7415, + "step": 97350 + }, + { + "epoch": 0.4192475066825642, + "grad_norm": 1.0925787687301636, + "learning_rate": 9.830741565322395e-05, + "loss": 5.2189, + "step": 97400 + }, + { + "epoch": 0.4194627261418468, + "grad_norm": 1.1017227172851562, + "learning_rate": 9.830566937360333e-05, + "loss": 4.6353, + "step": 97450 + }, + { + "epoch": 0.4196779456011295, + "grad_norm": 0.37346354126930237, + "learning_rate": 9.830392220913407e-05, + "loss": 5.1358, + "step": 97500 + }, + { + "epoch": 0.4198931650604121, + "grad_norm": 3.63446044921875, + "learning_rate": 9.830217415984817e-05, + "loss": 4.7153, + "step": 97550 + }, + { + "epoch": 0.42010838451969473, + "grad_norm": 1.810828685760498, + "learning_rate": 9.830042522577769e-05, + "loss": 5.3797, + "step": 97600 + }, + { + "epoch": 0.4203236039789774, + "grad_norm": 2.5413718223571777, + "learning_rate": 9.829867540695462e-05, + "loss": 5.1487, + "step": 97650 + }, + { + "epoch": 0.42053882343826, + "grad_norm": 2.9675588607788086, + "learning_rate": 9.829692470341105e-05, + "loss": 5.0899, + "step": 97700 + }, + { + "epoch": 0.42075404289754265, + "grad_norm": 0.6092607975006104, + "learning_rate": 9.829517311517902e-05, + "loss": 5.1231, + "step": 97750 + }, + { + "epoch": 0.42096926235682525, + "grad_norm": 2.350558280944824, + "learning_rate": 9.829342064229061e-05, + "loss": 4.5502, + "step": 97800 + }, + { + "epoch": 0.4211844818161079, + "grad_norm": 1.635928988456726, + "learning_rate": 9.829166728477797e-05, + "loss": 5.5748, + "step": 97850 + }, + { + "epoch": 0.4213997012753905, + "grad_norm": 2.531296730041504, + "learning_rate": 9.828991304267316e-05, + "loss": 5.2311, + "step": 97900 + }, + { + "epoch": 0.42161492073467316, + "grad_norm": 3.390477180480957, + "learning_rate": 9.828815791600835e-05, + "loss": 4.7516, + "step": 97950 + }, + { + "epoch": 0.42183014019395576, + "grad_norm": 2.1715612411499023, + "learning_rate": 9.828640190481568e-05, + "loss": 4.749, + "step": 98000 + }, + { + "epoch": 0.4220453596532384, + "grad_norm": 2.0156450271606445, + "learning_rate": 9.82846450091273e-05, + "loss": 4.8054, + "step": 98050 + }, + { + "epoch": 0.422260579112521, + "grad_norm": 1.7142488956451416, + "learning_rate": 9.82828872289754e-05, + "loss": 4.9482, + "step": 98100 + }, + { + "epoch": 0.4224757985718037, + "grad_norm": 1.0744781494140625, + "learning_rate": 9.828112856439222e-05, + "loss": 5.2294, + "step": 98150 + }, + { + "epoch": 0.4226910180310863, + "grad_norm": 2.0289199352264404, + "learning_rate": 9.82793690154099e-05, + "loss": 5.1421, + "step": 98200 + }, + { + "epoch": 0.42290623749036893, + "grad_norm": 1.8578128814697266, + "learning_rate": 9.827760858206074e-05, + "loss": 5.0212, + "step": 98250 + }, + { + "epoch": 0.4231214569496516, + "grad_norm": 1.8661326169967651, + "learning_rate": 9.827584726437695e-05, + "loss": 5.4812, + "step": 98300 + }, + { + "epoch": 0.4233366764089342, + "grad_norm": 2.2616968154907227, + "learning_rate": 9.82740850623908e-05, + "loss": 4.7666, + "step": 98350 + }, + { + "epoch": 0.42355189586821684, + "grad_norm": 3.0323903560638428, + "learning_rate": 9.827232197613458e-05, + "loss": 5.0237, + "step": 98400 + }, + { + "epoch": 0.42376711532749944, + "grad_norm": 3.6268975734710693, + "learning_rate": 9.827055800564056e-05, + "loss": 4.5593, + "step": 98450 + }, + { + "epoch": 0.4239823347867821, + "grad_norm": 0.8930902481079102, + "learning_rate": 9.826879315094106e-05, + "loss": 5.3255, + "step": 98500 + }, + { + "epoch": 0.4241975542460647, + "grad_norm": 3.3744986057281494, + "learning_rate": 9.826702741206844e-05, + "loss": 5.0867, + "step": 98550 + }, + { + "epoch": 0.42441277370534736, + "grad_norm": 0.4764692485332489, + "learning_rate": 9.826526078905502e-05, + "loss": 4.8386, + "step": 98600 + }, + { + "epoch": 0.42462799316462996, + "grad_norm": 2.2218782901763916, + "learning_rate": 9.826349328193314e-05, + "loss": 5.4403, + "step": 98650 + }, + { + "epoch": 0.4248432126239126, + "grad_norm": 1.4911102056503296, + "learning_rate": 9.826172489073522e-05, + "loss": 4.7846, + "step": 98700 + }, + { + "epoch": 0.4250584320831952, + "grad_norm": 4.598142623901367, + "learning_rate": 9.825995561549362e-05, + "loss": 4.9401, + "step": 98750 + }, + { + "epoch": 0.42527365154247787, + "grad_norm": 0.6797985434532166, + "learning_rate": 9.825818545624074e-05, + "loss": 4.6298, + "step": 98800 + }, + { + "epoch": 0.42548887100176047, + "grad_norm": 3.6285688877105713, + "learning_rate": 9.825641441300905e-05, + "loss": 5.1647, + "step": 98850 + }, + { + "epoch": 0.4257040904610431, + "grad_norm": 1.4113253355026245, + "learning_rate": 9.825464248583096e-05, + "loss": 5.4223, + "step": 98900 + }, + { + "epoch": 0.4259193099203258, + "grad_norm": 3.438866138458252, + "learning_rate": 9.825286967473893e-05, + "loss": 4.8073, + "step": 98950 + }, + { + "epoch": 0.4261345293796084, + "grad_norm": 3.985208511352539, + "learning_rate": 9.825109597976545e-05, + "loss": 4.1927, + "step": 99000 + }, + { + "epoch": 0.4261345293796084, + "eval_loss": 5.46209192276001, + "eval_runtime": 34.9183, + "eval_samples_per_second": 18.329, + "eval_steps_per_second": 9.164, + "eval_tts_loss": 6.635383632162362, + "step": 99000 + }, + { + "epoch": 0.42634974883889104, + "grad_norm": 1.927975058555603, + "learning_rate": 9.824932140094298e-05, + "loss": 5.1543, + "step": 99050 + }, + { + "epoch": 0.42656496829817364, + "grad_norm": 1.8192040920257568, + "learning_rate": 9.824754593830405e-05, + "loss": 5.0512, + "step": 99100 + }, + { + "epoch": 0.4267801877574563, + "grad_norm": 2.472343921661377, + "learning_rate": 9.824576959188118e-05, + "loss": 4.662, + "step": 99150 + }, + { + "epoch": 0.4269954072167389, + "grad_norm": 1.499601125717163, + "learning_rate": 9.82439923617069e-05, + "loss": 5.1749, + "step": 99200 + }, + { + "epoch": 0.42721062667602155, + "grad_norm": 1.6917864084243774, + "learning_rate": 9.824221424781378e-05, + "loss": 5.1336, + "step": 99250 + }, + { + "epoch": 0.42742584613530415, + "grad_norm": 2.468897581100464, + "learning_rate": 9.824043525023437e-05, + "loss": 4.9779, + "step": 99300 + }, + { + "epoch": 0.4276410655945868, + "grad_norm": 1.7957125902175903, + "learning_rate": 9.823865536900127e-05, + "loss": 5.119, + "step": 99350 + }, + { + "epoch": 0.4278562850538694, + "grad_norm": 2.750459671020508, + "learning_rate": 9.823687460414708e-05, + "loss": 5.432, + "step": 99400 + }, + { + "epoch": 0.42807150451315207, + "grad_norm": 1.5533593893051147, + "learning_rate": 9.823509295570442e-05, + "loss": 5.3583, + "step": 99450 + }, + { + "epoch": 0.42828672397243467, + "grad_norm": 2.7905678749084473, + "learning_rate": 9.823331042370593e-05, + "loss": 4.9588, + "step": 99500 + }, + { + "epoch": 0.4285019434317173, + "grad_norm": 2.499770164489746, + "learning_rate": 9.823152700818426e-05, + "loss": 5.2599, + "step": 99550 + }, + { + "epoch": 0.4287171628909999, + "grad_norm": 2.9524567127227783, + "learning_rate": 9.822974270917207e-05, + "loss": 5.2023, + "step": 99600 + }, + { + "epoch": 0.4289323823502826, + "grad_norm": 1.985453486442566, + "learning_rate": 9.822795752670205e-05, + "loss": 4.781, + "step": 99650 + }, + { + "epoch": 0.42914760180956524, + "grad_norm": 0.8984674215316772, + "learning_rate": 9.82261714608069e-05, + "loss": 5.2502, + "step": 99700 + }, + { + "epoch": 0.42936282126884784, + "grad_norm": 3.6878294944763184, + "learning_rate": 9.822438451151935e-05, + "loss": 5.1323, + "step": 99750 + }, + { + "epoch": 0.4295780407281305, + "grad_norm": 2.810225009918213, + "learning_rate": 9.82225966788721e-05, + "loss": 5.3076, + "step": 99800 + }, + { + "epoch": 0.4297932601874131, + "grad_norm": 0.7105533480644226, + "learning_rate": 9.822080796289793e-05, + "loss": 5.1916, + "step": 99850 + }, + { + "epoch": 0.43000847964669575, + "grad_norm": 2.5738778114318848, + "learning_rate": 9.821901836362959e-05, + "loss": 5.0618, + "step": 99900 + }, + { + "epoch": 0.43022369910597835, + "grad_norm": 2.3180606365203857, + "learning_rate": 9.821722788109987e-05, + "loss": 4.6797, + "step": 99950 + }, + { + "epoch": 0.430438918565261, + "grad_norm": 1.8972946405410767, + "learning_rate": 9.821543651534157e-05, + "loss": 5.1786, + "step": 100000 + }, + { + "epoch": 0.4306541380245436, + "grad_norm": 4.466055393218994, + "learning_rate": 9.821364426638749e-05, + "loss": 5.36, + "step": 100050 + }, + { + "epoch": 0.43086935748382627, + "grad_norm": 1.9708633422851562, + "learning_rate": 9.821185113427044e-05, + "loss": 4.9698, + "step": 100100 + }, + { + "epoch": 0.43108457694310887, + "grad_norm": 2.5800321102142334, + "learning_rate": 9.821005711902333e-05, + "loss": 5.0111, + "step": 100150 + }, + { + "epoch": 0.4312997964023915, + "grad_norm": 1.7961292266845703, + "learning_rate": 9.820826222067899e-05, + "loss": 5.4653, + "step": 100200 + }, + { + "epoch": 0.4315150158616741, + "grad_norm": 2.070439338684082, + "learning_rate": 9.820646643927027e-05, + "loss": 5.1117, + "step": 100250 + }, + { + "epoch": 0.4317302353209568, + "grad_norm": 3.5072202682495117, + "learning_rate": 9.820466977483009e-05, + "loss": 5.1687, + "step": 100300 + }, + { + "epoch": 0.43194545478023944, + "grad_norm": 2.3685004711151123, + "learning_rate": 9.820287222739135e-05, + "loss": 4.8709, + "step": 100350 + }, + { + "epoch": 0.43216067423952204, + "grad_norm": 2.1142501831054688, + "learning_rate": 9.820107379698701e-05, + "loss": 5.2279, + "step": 100400 + }, + { + "epoch": 0.4323758936988047, + "grad_norm": 2.9170684814453125, + "learning_rate": 9.819927448364999e-05, + "loss": 5.0345, + "step": 100450 + }, + { + "epoch": 0.4325911131580873, + "grad_norm": 2.612673282623291, + "learning_rate": 9.819747428741321e-05, + "loss": 5.0724, + "step": 100500 + }, + { + "epoch": 0.43280633261736995, + "grad_norm": 1.6060080528259277, + "learning_rate": 9.819567320830971e-05, + "loss": 4.7463, + "step": 100550 + }, + { + "epoch": 0.43302155207665255, + "grad_norm": 2.8487024307250977, + "learning_rate": 9.819387124637246e-05, + "loss": 4.8495, + "step": 100600 + }, + { + "epoch": 0.4332367715359352, + "grad_norm": 2.477750301361084, + "learning_rate": 9.819206840163447e-05, + "loss": 4.891, + "step": 100650 + }, + { + "epoch": 0.4334519909952178, + "grad_norm": 3.8155646324157715, + "learning_rate": 9.819026467412873e-05, + "loss": 4.3894, + "step": 100700 + }, + { + "epoch": 0.43366721045450046, + "grad_norm": 0.6117767095565796, + "learning_rate": 9.818846006388833e-05, + "loss": 4.9096, + "step": 100750 + }, + { + "epoch": 0.43388242991378306, + "grad_norm": 1.511406421661377, + "learning_rate": 9.818665457094629e-05, + "loss": 5.3093, + "step": 100800 + }, + { + "epoch": 0.4340976493730657, + "grad_norm": 1.249752163887024, + "learning_rate": 9.81848481953357e-05, + "loss": 4.8553, + "step": 100850 + }, + { + "epoch": 0.4343128688323483, + "grad_norm": 1.0230920314788818, + "learning_rate": 9.818304093708965e-05, + "loss": 4.6546, + "step": 100900 + }, + { + "epoch": 0.434528088291631, + "grad_norm": 2.3194704055786133, + "learning_rate": 9.818123279624123e-05, + "loss": 4.8926, + "step": 100950 + }, + { + "epoch": 0.43474330775091363, + "grad_norm": 3.799790143966675, + "learning_rate": 9.817942377282356e-05, + "loss": 5.0985, + "step": 101000 + }, + { + "epoch": 0.43495852721019623, + "grad_norm": 2.3841280937194824, + "learning_rate": 9.817761386686982e-05, + "loss": 4.8741, + "step": 101050 + }, + { + "epoch": 0.4351737466694789, + "grad_norm": 1.3638763427734375, + "learning_rate": 9.81758030784131e-05, + "loss": 5.271, + "step": 101100 + }, + { + "epoch": 0.4353889661287615, + "grad_norm": 2.3046109676361084, + "learning_rate": 9.817399140748662e-05, + "loss": 5.083, + "step": 101150 + }, + { + "epoch": 0.43560418558804415, + "grad_norm": 2.345134735107422, + "learning_rate": 9.817217885412354e-05, + "loss": 4.9077, + "step": 101200 + }, + { + "epoch": 0.43581940504732675, + "grad_norm": 2.2554519176483154, + "learning_rate": 9.817036541835706e-05, + "loss": 5.1306, + "step": 101250 + }, + { + "epoch": 0.4360346245066094, + "grad_norm": 3.0156350135803223, + "learning_rate": 9.81685511002204e-05, + "loss": 5.1789, + "step": 101300 + }, + { + "epoch": 0.436249843965892, + "grad_norm": 2.961275339126587, + "learning_rate": 9.816673589974682e-05, + "loss": 4.568, + "step": 101350 + }, + { + "epoch": 0.43646506342517466, + "grad_norm": 1.97943913936615, + "learning_rate": 9.816491981696953e-05, + "loss": 4.7637, + "step": 101400 + }, + { + "epoch": 0.43668028288445726, + "grad_norm": 9.106917381286621, + "learning_rate": 9.816310285192183e-05, + "loss": 5.2201, + "step": 101450 + }, + { + "epoch": 0.4368955023437399, + "grad_norm": 2.787698984146118, + "learning_rate": 9.8161285004637e-05, + "loss": 5.1951, + "step": 101500 + }, + { + "epoch": 0.4371107218030225, + "grad_norm": 3.4959516525268555, + "learning_rate": 9.815946627514829e-05, + "loss": 4.7617, + "step": 101550 + }, + { + "epoch": 0.4373259412623052, + "grad_norm": 1.5832973718643188, + "learning_rate": 9.815764666348909e-05, + "loss": 4.6692, + "step": 101600 + }, + { + "epoch": 0.43754116072158783, + "grad_norm": 0.35332444310188293, + "learning_rate": 9.815582616969267e-05, + "loss": 4.9216, + "step": 101650 + }, + { + "epoch": 0.43775638018087043, + "grad_norm": 1.8680223226547241, + "learning_rate": 9.815400479379242e-05, + "loss": 5.0935, + "step": 101700 + }, + { + "epoch": 0.4379715996401531, + "grad_norm": 1.225132703781128, + "learning_rate": 9.815218253582168e-05, + "loss": 5.1564, + "step": 101750 + }, + { + "epoch": 0.4381868190994357, + "grad_norm": 1.67378830909729, + "learning_rate": 9.815035939581382e-05, + "loss": 4.9245, + "step": 101800 + }, + { + "epoch": 0.43840203855871834, + "grad_norm": 1.950993537902832, + "learning_rate": 9.814853537380227e-05, + "loss": 5.2729, + "step": 101850 + }, + { + "epoch": 0.43861725801800094, + "grad_norm": 0.3046077787876129, + "learning_rate": 9.81467104698204e-05, + "loss": 5.0851, + "step": 101900 + }, + { + "epoch": 0.4388324774772836, + "grad_norm": 2.4163105487823486, + "learning_rate": 9.814488468390167e-05, + "loss": 5.2228, + "step": 101950 + }, + { + "epoch": 0.4390476969365662, + "grad_norm": 2.708658456802368, + "learning_rate": 9.814305801607952e-05, + "loss": 5.4283, + "step": 102000 + }, + { + "epoch": 0.4390476969365662, + "eval_loss": 5.449886798858643, + "eval_runtime": 35.209, + "eval_samples_per_second": 18.177, + "eval_steps_per_second": 9.089, + "eval_tts_loss": 6.575715528237652, + "step": 102000 + }, + { + "epoch": 0.43926291639584886, + "grad_norm": 2.405566930770874, + "learning_rate": 9.81412304663874e-05, + "loss": 4.8584, + "step": 102050 + }, + { + "epoch": 0.43947813585513146, + "grad_norm": 1.9931913614273071, + "learning_rate": 9.813940203485879e-05, + "loss": 5.1161, + "step": 102100 + }, + { + "epoch": 0.4396933553144141, + "grad_norm": 2.250798463821411, + "learning_rate": 9.813757272152717e-05, + "loss": 5.129, + "step": 102150 + }, + { + "epoch": 0.4399085747736967, + "grad_norm": 1.0229653120040894, + "learning_rate": 9.813574252642608e-05, + "loss": 4.4066, + "step": 102200 + }, + { + "epoch": 0.44012379423297937, + "grad_norm": 2.156139373779297, + "learning_rate": 9.813391144958904e-05, + "loss": 4.9545, + "step": 102250 + }, + { + "epoch": 0.440339013692262, + "grad_norm": 3.233355760574341, + "learning_rate": 9.813207949104954e-05, + "loss": 5.1113, + "step": 102300 + }, + { + "epoch": 0.4405542331515446, + "grad_norm": 2.1709530353546143, + "learning_rate": 9.81302466508412e-05, + "loss": 5.3261, + "step": 102350 + }, + { + "epoch": 0.4407694526108273, + "grad_norm": 2.7577016353607178, + "learning_rate": 9.812841292899757e-05, + "loss": 5.1634, + "step": 102400 + }, + { + "epoch": 0.4409846720701099, + "grad_norm": 1.8268043994903564, + "learning_rate": 9.812657832555223e-05, + "loss": 5.053, + "step": 102450 + }, + { + "epoch": 0.44119989152939254, + "grad_norm": 3.3673927783966064, + "learning_rate": 9.81247428405388e-05, + "loss": 5.4374, + "step": 102500 + }, + { + "epoch": 0.44141511098867514, + "grad_norm": 2.8773279190063477, + "learning_rate": 9.81229064739909e-05, + "loss": 5.0942, + "step": 102550 + }, + { + "epoch": 0.4416303304479578, + "grad_norm": 2.7338671684265137, + "learning_rate": 9.812106922594215e-05, + "loss": 5.2754, + "step": 102600 + }, + { + "epoch": 0.4418455499072404, + "grad_norm": 2.774035930633545, + "learning_rate": 9.811923109642623e-05, + "loss": 4.7011, + "step": 102650 + }, + { + "epoch": 0.44206076936652305, + "grad_norm": 2.2408525943756104, + "learning_rate": 9.81173920854768e-05, + "loss": 4.9768, + "step": 102700 + }, + { + "epoch": 0.44227598882580565, + "grad_norm": 2.4884462356567383, + "learning_rate": 9.811555219312753e-05, + "loss": 4.9964, + "step": 102750 + }, + { + "epoch": 0.4424912082850883, + "grad_norm": 2.76836895942688, + "learning_rate": 9.811371141941215e-05, + "loss": 5.1862, + "step": 102800 + }, + { + "epoch": 0.4427064277443709, + "grad_norm": 2.4185547828674316, + "learning_rate": 9.811186976436437e-05, + "loss": 4.9726, + "step": 102850 + }, + { + "epoch": 0.44292164720365357, + "grad_norm": 2.7168500423431396, + "learning_rate": 9.81100272280179e-05, + "loss": 4.9645, + "step": 102900 + }, + { + "epoch": 0.44313686666293617, + "grad_norm": 3.709196090698242, + "learning_rate": 9.810818381040654e-05, + "loss": 5.42, + "step": 102950 + }, + { + "epoch": 0.4433520861222188, + "grad_norm": 2.2150182723999023, + "learning_rate": 9.8106339511564e-05, + "loss": 4.8197, + "step": 103000 + }, + { + "epoch": 0.4435673055815015, + "grad_norm": 2.463442802429199, + "learning_rate": 9.810449433152413e-05, + "loss": 5.1162, + "step": 103050 + }, + { + "epoch": 0.4437825250407841, + "grad_norm": 2.1524229049682617, + "learning_rate": 9.810264827032066e-05, + "loss": 4.6847, + "step": 103100 + }, + { + "epoch": 0.44399774450006674, + "grad_norm": 2.272141933441162, + "learning_rate": 9.810080132798744e-05, + "loss": 4.7025, + "step": 103150 + }, + { + "epoch": 0.44421296395934934, + "grad_norm": 2.300902843475342, + "learning_rate": 9.809895350455832e-05, + "loss": 5.072, + "step": 103200 + }, + { + "epoch": 0.444428183418632, + "grad_norm": 0.6872298121452332, + "learning_rate": 9.809710480006711e-05, + "loss": 5.0904, + "step": 103250 + }, + { + "epoch": 0.4446434028779146, + "grad_norm": 1.874204397201538, + "learning_rate": 9.80952552145477e-05, + "loss": 5.0266, + "step": 103300 + }, + { + "epoch": 0.44485862233719725, + "grad_norm": 2.1113967895507812, + "learning_rate": 9.809340474803397e-05, + "loss": 4.9928, + "step": 103350 + }, + { + "epoch": 0.44507384179647985, + "grad_norm": 1.6547836065292358, + "learning_rate": 9.809155340055979e-05, + "loss": 4.6932, + "step": 103400 + }, + { + "epoch": 0.4452890612557625, + "grad_norm": 2.8495075702667236, + "learning_rate": 9.80897011721591e-05, + "loss": 4.852, + "step": 103450 + }, + { + "epoch": 0.4455042807150451, + "grad_norm": 1.4892014265060425, + "learning_rate": 9.808784806286582e-05, + "loss": 4.9498, + "step": 103500 + }, + { + "epoch": 0.44571950017432777, + "grad_norm": 1.5925055742263794, + "learning_rate": 9.808599407271388e-05, + "loss": 4.7661, + "step": 103550 + }, + { + "epoch": 0.44593471963361037, + "grad_norm": 1.7197164297103882, + "learning_rate": 9.808413920173727e-05, + "loss": 4.7921, + "step": 103600 + }, + { + "epoch": 0.446149939092893, + "grad_norm": 2.6712424755096436, + "learning_rate": 9.808228344996993e-05, + "loss": 5.075, + "step": 103650 + }, + { + "epoch": 0.4463651585521757, + "grad_norm": 3.5875513553619385, + "learning_rate": 9.80804268174459e-05, + "loss": 4.3839, + "step": 103700 + }, + { + "epoch": 0.4465803780114583, + "grad_norm": 1.5879915952682495, + "learning_rate": 9.807856930419915e-05, + "loss": 4.7898, + "step": 103750 + }, + { + "epoch": 0.44679559747074094, + "grad_norm": 2.5947883129119873, + "learning_rate": 9.807671091026371e-05, + "loss": 4.6301, + "step": 103800 + }, + { + "epoch": 0.44701081693002354, + "grad_norm": 3.487008571624756, + "learning_rate": 9.807485163567365e-05, + "loss": 4.7253, + "step": 103850 + }, + { + "epoch": 0.4472260363893062, + "grad_norm": 5.137566566467285, + "learning_rate": 9.8072991480463e-05, + "loss": 4.7143, + "step": 103900 + }, + { + "epoch": 0.4474412558485888, + "grad_norm": 2.667692184448242, + "learning_rate": 9.807113044466586e-05, + "loss": 4.7059, + "step": 103950 + }, + { + "epoch": 0.44765647530787145, + "grad_norm": 1.1021875143051147, + "learning_rate": 9.806926852831627e-05, + "loss": 5.1764, + "step": 104000 + }, + { + "epoch": 0.44787169476715405, + "grad_norm": 0.6162276268005371, + "learning_rate": 9.806740573144838e-05, + "loss": 4.7789, + "step": 104050 + }, + { + "epoch": 0.4480869142264367, + "grad_norm": 2.188039541244507, + "learning_rate": 9.80655420540963e-05, + "loss": 4.9189, + "step": 104100 + }, + { + "epoch": 0.4483021336857193, + "grad_norm": 2.2997829914093018, + "learning_rate": 9.806367749629417e-05, + "loss": 4.778, + "step": 104150 + }, + { + "epoch": 0.44851735314500196, + "grad_norm": 1.7106226682662964, + "learning_rate": 9.806181205807615e-05, + "loss": 4.9625, + "step": 104200 + }, + { + "epoch": 0.44873257260428456, + "grad_norm": 1.5364097356796265, + "learning_rate": 9.805994573947639e-05, + "loss": 5.2496, + "step": 104250 + }, + { + "epoch": 0.4489477920635672, + "grad_norm": 1.500243067741394, + "learning_rate": 9.80580785405291e-05, + "loss": 4.8605, + "step": 104300 + }, + { + "epoch": 0.4491630115228499, + "grad_norm": 1.9490214586257935, + "learning_rate": 9.805621046126846e-05, + "loss": 5.1333, + "step": 104350 + }, + { + "epoch": 0.4493782309821325, + "grad_norm": 3.6763861179351807, + "learning_rate": 9.80543415017287e-05, + "loss": 5.2342, + "step": 104400 + }, + { + "epoch": 0.44959345044141513, + "grad_norm": 0.9905281066894531, + "learning_rate": 9.805247166194408e-05, + "loss": 4.8308, + "step": 104450 + }, + { + "epoch": 0.44980866990069773, + "grad_norm": 1.9247649908065796, + "learning_rate": 9.805060094194882e-05, + "loss": 5.1407, + "step": 104500 + }, + { + "epoch": 0.4500238893599804, + "grad_norm": 1.3250036239624023, + "learning_rate": 9.804872934177718e-05, + "loss": 5.2111, + "step": 104550 + }, + { + "epoch": 0.450239108819263, + "grad_norm": 3.0906357765197754, + "learning_rate": 9.804685686146346e-05, + "loss": 5.5765, + "step": 104600 + }, + { + "epoch": 0.45045432827854565, + "grad_norm": 2.4508326053619385, + "learning_rate": 9.804498350104197e-05, + "loss": 5.4208, + "step": 104650 + }, + { + "epoch": 0.45066954773782825, + "grad_norm": 1.7106306552886963, + "learning_rate": 9.8043109260547e-05, + "loss": 5.0902, + "step": 104700 + }, + { + "epoch": 0.4508847671971109, + "grad_norm": 1.8744380474090576, + "learning_rate": 9.804123414001291e-05, + "loss": 4.9924, + "step": 104750 + }, + { + "epoch": 0.4510999866563935, + "grad_norm": 4.179686069488525, + "learning_rate": 9.803935813947402e-05, + "loss": 5.0181, + "step": 104800 + }, + { + "epoch": 0.45131520611567616, + "grad_norm": 1.260206699371338, + "learning_rate": 9.803748125896472e-05, + "loss": 5.0322, + "step": 104850 + }, + { + "epoch": 0.45153042557495876, + "grad_norm": 2.2692677974700928, + "learning_rate": 9.803560349851937e-05, + "loss": 4.987, + "step": 104900 + }, + { + "epoch": 0.4517456450342414, + "grad_norm": 2.6680266857147217, + "learning_rate": 9.803372485817237e-05, + "loss": 4.6478, + "step": 104950 + }, + { + "epoch": 0.4519608644935241, + "grad_norm": 1.6980055570602417, + "learning_rate": 9.803184533795815e-05, + "loss": 5.0147, + "step": 105000 + }, + { + "epoch": 0.4519608644935241, + "eval_loss": 5.427267551422119, + "eval_runtime": 35.1754, + "eval_samples_per_second": 18.195, + "eval_steps_per_second": 9.097, + "eval_tts_loss": 6.602801948402629, + "step": 105000 + }, + { + "epoch": 0.4521760839528067, + "grad_norm": 1.662852168083191, + "learning_rate": 9.802996493791112e-05, + "loss": 4.7558, + "step": 105050 + }, + { + "epoch": 0.45239130341208933, + "grad_norm": 1.8274260759353638, + "learning_rate": 9.802808365806572e-05, + "loss": 5.2964, + "step": 105100 + }, + { + "epoch": 0.45260652287137193, + "grad_norm": 1.887987494468689, + "learning_rate": 9.802620149845643e-05, + "loss": 4.8719, + "step": 105150 + }, + { + "epoch": 0.4528217423306546, + "grad_norm": 1.860149621963501, + "learning_rate": 9.80243184591177e-05, + "loss": 4.9149, + "step": 105200 + }, + { + "epoch": 0.4530369617899372, + "grad_norm": 2.683406114578247, + "learning_rate": 9.802243454008406e-05, + "loss": 4.9236, + "step": 105250 + }, + { + "epoch": 0.45325218124921984, + "grad_norm": 2.2835843563079834, + "learning_rate": 9.802054974138998e-05, + "loss": 4.824, + "step": 105300 + }, + { + "epoch": 0.45346740070850244, + "grad_norm": 3.2986581325531006, + "learning_rate": 9.801866406307e-05, + "loss": 4.4871, + "step": 105350 + }, + { + "epoch": 0.4536826201677851, + "grad_norm": 1.20033597946167, + "learning_rate": 9.801677750515869e-05, + "loss": 4.5081, + "step": 105400 + }, + { + "epoch": 0.4538978396270677, + "grad_norm": 1.3738596439361572, + "learning_rate": 9.801489006769057e-05, + "loss": 5.0578, + "step": 105450 + }, + { + "epoch": 0.45411305908635036, + "grad_norm": 2.098691940307617, + "learning_rate": 9.801300175070022e-05, + "loss": 4.8646, + "step": 105500 + }, + { + "epoch": 0.45432827854563296, + "grad_norm": 2.5167553424835205, + "learning_rate": 9.801111255422224e-05, + "loss": 5.1228, + "step": 105550 + }, + { + "epoch": 0.4545434980049156, + "grad_norm": 2.40883207321167, + "learning_rate": 9.800922247829123e-05, + "loss": 5.2453, + "step": 105600 + }, + { + "epoch": 0.45475871746419827, + "grad_norm": 1.283543348312378, + "learning_rate": 9.800733152294182e-05, + "loss": 4.7288, + "step": 105650 + }, + { + "epoch": 0.45497393692348087, + "grad_norm": 0.42188867926597595, + "learning_rate": 9.800543968820863e-05, + "loss": 5.0244, + "step": 105700 + }, + { + "epoch": 0.4551891563827635, + "grad_norm": 3.0093963146209717, + "learning_rate": 9.800354697412634e-05, + "loss": 4.8447, + "step": 105750 + }, + { + "epoch": 0.4554043758420461, + "grad_norm": 1.5715078115463257, + "learning_rate": 9.800165338072959e-05, + "loss": 5.0959, + "step": 105800 + }, + { + "epoch": 0.4556195953013288, + "grad_norm": 2.358354330062866, + "learning_rate": 9.79997589080531e-05, + "loss": 5.4719, + "step": 105850 + }, + { + "epoch": 0.4558348147606114, + "grad_norm": 0.13123005628585815, + "learning_rate": 9.799786355613154e-05, + "loss": 4.7617, + "step": 105900 + }, + { + "epoch": 0.45605003421989404, + "grad_norm": 0.46421128511428833, + "learning_rate": 9.799596732499966e-05, + "loss": 4.9011, + "step": 105950 + }, + { + "epoch": 0.45626525367917664, + "grad_norm": 2.4603421688079834, + "learning_rate": 9.799407021469214e-05, + "loss": 4.9496, + "step": 106000 + }, + { + "epoch": 0.4564804731384593, + "grad_norm": 1.8149019479751587, + "learning_rate": 9.79921722252438e-05, + "loss": 5.3044, + "step": 106050 + }, + { + "epoch": 0.4566956925977419, + "grad_norm": 2.057365655899048, + "learning_rate": 9.799027335668935e-05, + "loss": 4.609, + "step": 106100 + }, + { + "epoch": 0.45691091205702455, + "grad_norm": 2.030459403991699, + "learning_rate": 9.798837360906362e-05, + "loss": 5.2899, + "step": 106150 + }, + { + "epoch": 0.45712613151630715, + "grad_norm": 2.442204475402832, + "learning_rate": 9.798647298240138e-05, + "loss": 4.7473, + "step": 106200 + }, + { + "epoch": 0.4573413509755898, + "grad_norm": 3.5632379055023193, + "learning_rate": 9.798457147673745e-05, + "loss": 5.3195, + "step": 106250 + }, + { + "epoch": 0.45755657043487247, + "grad_norm": 3.4968578815460205, + "learning_rate": 9.798266909210666e-05, + "loss": 4.7508, + "step": 106300 + }, + { + "epoch": 0.45777178989415507, + "grad_norm": 1.534586787223816, + "learning_rate": 9.798076582854386e-05, + "loss": 4.6484, + "step": 106350 + }, + { + "epoch": 0.4579870093534377, + "grad_norm": 1.3809458017349243, + "learning_rate": 9.797886168608393e-05, + "loss": 4.8572, + "step": 106400 + }, + { + "epoch": 0.4582022288127203, + "grad_norm": 2.7285237312316895, + "learning_rate": 9.797695666476172e-05, + "loss": 4.9629, + "step": 106450 + }, + { + "epoch": 0.458417448272003, + "grad_norm": 2.600465774536133, + "learning_rate": 9.797505076461213e-05, + "loss": 5.2722, + "step": 106500 + }, + { + "epoch": 0.4586326677312856, + "grad_norm": 1.6089060306549072, + "learning_rate": 9.797314398567009e-05, + "loss": 4.8094, + "step": 106550 + }, + { + "epoch": 0.45884788719056824, + "grad_norm": 2.5023016929626465, + "learning_rate": 9.79712363279705e-05, + "loss": 5.0266, + "step": 106600 + }, + { + "epoch": 0.45906310664985084, + "grad_norm": 1.9810258150100708, + "learning_rate": 9.796932779154835e-05, + "loss": 4.8309, + "step": 106650 + }, + { + "epoch": 0.4592783261091335, + "grad_norm": 2.747722864151001, + "learning_rate": 9.796741837643855e-05, + "loss": 5.2933, + "step": 106700 + }, + { + "epoch": 0.4594935455684161, + "grad_norm": 1.6246715784072876, + "learning_rate": 9.79655080826761e-05, + "loss": 4.714, + "step": 106750 + }, + { + "epoch": 0.45970876502769875, + "grad_norm": 2.237888813018799, + "learning_rate": 9.796359691029601e-05, + "loss": 5.2508, + "step": 106800 + }, + { + "epoch": 0.45992398448698135, + "grad_norm": 2.345644474029541, + "learning_rate": 9.796168485933326e-05, + "loss": 5.0279, + "step": 106850 + }, + { + "epoch": 0.460139203946264, + "grad_norm": 2.4095680713653564, + "learning_rate": 9.795977192982288e-05, + "loss": 4.5213, + "step": 106900 + }, + { + "epoch": 0.4603544234055466, + "grad_norm": 3.0461161136627197, + "learning_rate": 9.795785812179991e-05, + "loss": 4.7823, + "step": 106950 + }, + { + "epoch": 0.46056964286482927, + "grad_norm": 1.1952157020568848, + "learning_rate": 9.795594343529941e-05, + "loss": 4.7882, + "step": 107000 + }, + { + "epoch": 0.4607848623241119, + "grad_norm": 2.4469852447509766, + "learning_rate": 9.795402787035646e-05, + "loss": 4.8806, + "step": 107050 + }, + { + "epoch": 0.4610000817833945, + "grad_norm": 2.087371826171875, + "learning_rate": 9.795211142700613e-05, + "loss": 5.1314, + "step": 107100 + }, + { + "epoch": 0.4612153012426772, + "grad_norm": 3.0972111225128174, + "learning_rate": 9.795019410528355e-05, + "loss": 5.376, + "step": 107150 + }, + { + "epoch": 0.4614305207019598, + "grad_norm": 2.022634744644165, + "learning_rate": 9.79482759052238e-05, + "loss": 4.7702, + "step": 107200 + }, + { + "epoch": 0.46164574016124243, + "grad_norm": 2.9017980098724365, + "learning_rate": 9.794635682686207e-05, + "loss": 4.9203, + "step": 107250 + }, + { + "epoch": 0.46186095962052504, + "grad_norm": 0.8761736154556274, + "learning_rate": 9.794443687023348e-05, + "loss": 4.8867, + "step": 107300 + }, + { + "epoch": 0.4620761790798077, + "grad_norm": 2.4504597187042236, + "learning_rate": 9.794251603537321e-05, + "loss": 5.1777, + "step": 107350 + }, + { + "epoch": 0.4622913985390903, + "grad_norm": 7.264087200164795, + "learning_rate": 9.794059432231643e-05, + "loss": 4.6273, + "step": 107400 + }, + { + "epoch": 0.46250661799837295, + "grad_norm": 1.8326588869094849, + "learning_rate": 9.793867173109837e-05, + "loss": 5.3108, + "step": 107450 + }, + { + "epoch": 0.46272183745765555, + "grad_norm": 2.1767327785491943, + "learning_rate": 9.793674826175421e-05, + "loss": 4.4936, + "step": 107500 + }, + { + "epoch": 0.4629370569169382, + "grad_norm": 2.271338939666748, + "learning_rate": 9.793482391431923e-05, + "loss": 4.7958, + "step": 107550 + }, + { + "epoch": 0.4631522763762208, + "grad_norm": 2.8854825496673584, + "learning_rate": 9.793289868882862e-05, + "loss": 4.8682, + "step": 107600 + }, + { + "epoch": 0.46336749583550346, + "grad_norm": 1.33201003074646, + "learning_rate": 9.793097258531772e-05, + "loss": 4.664, + "step": 107650 + }, + { + "epoch": 0.4635827152947861, + "grad_norm": 1.966083288192749, + "learning_rate": 9.792904560382175e-05, + "loss": 4.5097, + "step": 107700 + }, + { + "epoch": 0.4637979347540687, + "grad_norm": 1.4725494384765625, + "learning_rate": 9.792711774437602e-05, + "loss": 5.0793, + "step": 107750 + }, + { + "epoch": 0.4640131542133514, + "grad_norm": 1.7039378881454468, + "learning_rate": 9.792518900701586e-05, + "loss": 4.6247, + "step": 107800 + }, + { + "epoch": 0.464228373672634, + "grad_norm": 2.823045015335083, + "learning_rate": 9.792325939177659e-05, + "loss": 5.0039, + "step": 107850 + }, + { + "epoch": 0.46444359313191663, + "grad_norm": 0.9179458022117615, + "learning_rate": 9.792132889869357e-05, + "loss": 5.148, + "step": 107900 + }, + { + "epoch": 0.46465881259119923, + "grad_norm": 2.9090540409088135, + "learning_rate": 9.791939752780212e-05, + "loss": 5.1645, + "step": 107950 + }, + { + "epoch": 0.4648740320504819, + "grad_norm": 4.288646697998047, + "learning_rate": 9.791746527913769e-05, + "loss": 5.0593, + "step": 108000 + }, + { + "epoch": 0.4648740320504819, + "eval_loss": 5.417452812194824, + "eval_runtime": 35.196, + "eval_samples_per_second": 18.184, + "eval_steps_per_second": 9.092, + "eval_tts_loss": 6.6709730898717385, + "step": 108000 + }, + { + "epoch": 0.4650892515097645, + "grad_norm": 2.8657386302948, + "learning_rate": 9.791553215273561e-05, + "loss": 5.0608, + "step": 108050 + }, + { + "epoch": 0.46530447096904715, + "grad_norm": 1.3798720836639404, + "learning_rate": 9.791359814863133e-05, + "loss": 4.9097, + "step": 108100 + }, + { + "epoch": 0.46551969042832975, + "grad_norm": 1.8591058254241943, + "learning_rate": 9.791166326686025e-05, + "loss": 4.7919, + "step": 108150 + }, + { + "epoch": 0.4657349098876124, + "grad_norm": 0.383480429649353, + "learning_rate": 9.790972750745781e-05, + "loss": 4.809, + "step": 108200 + }, + { + "epoch": 0.465950129346895, + "grad_norm": 2.1631577014923096, + "learning_rate": 9.79077908704595e-05, + "loss": 4.9126, + "step": 108250 + }, + { + "epoch": 0.46616534880617766, + "grad_norm": 2.373779773712158, + "learning_rate": 9.790585335590077e-05, + "loss": 5.1031, + "step": 108300 + }, + { + "epoch": 0.4663805682654603, + "grad_norm": 2.670522451400757, + "learning_rate": 9.790391496381713e-05, + "loss": 4.8072, + "step": 108350 + }, + { + "epoch": 0.4665957877247429, + "grad_norm": 2.3194613456726074, + "learning_rate": 9.790197569424406e-05, + "loss": 5.089, + "step": 108400 + }, + { + "epoch": 0.4668110071840256, + "grad_norm": 0.6755481362342834, + "learning_rate": 9.79000355472171e-05, + "loss": 4.9113, + "step": 108450 + }, + { + "epoch": 0.4670262266433082, + "grad_norm": 2.9145889282226562, + "learning_rate": 9.789809452277179e-05, + "loss": 4.9846, + "step": 108500 + }, + { + "epoch": 0.46724144610259083, + "grad_norm": 1.8787875175476074, + "learning_rate": 9.789615262094369e-05, + "loss": 4.8853, + "step": 108550 + }, + { + "epoch": 0.46745666556187343, + "grad_norm": 2.3472046852111816, + "learning_rate": 9.789420984176833e-05, + "loss": 4.9475, + "step": 108600 + }, + { + "epoch": 0.4676718850211561, + "grad_norm": 2.564995765686035, + "learning_rate": 9.789226618528135e-05, + "loss": 4.7877, + "step": 108650 + }, + { + "epoch": 0.4678871044804387, + "grad_norm": 0.8075962066650391, + "learning_rate": 9.789032165151834e-05, + "loss": 4.7737, + "step": 108700 + }, + { + "epoch": 0.46810232393972134, + "grad_norm": 1.9746900796890259, + "learning_rate": 9.788837624051492e-05, + "loss": 5.1553, + "step": 108750 + }, + { + "epoch": 0.46831754339900394, + "grad_norm": 3.8850109577178955, + "learning_rate": 9.78864299523067e-05, + "loss": 5.4191, + "step": 108800 + }, + { + "epoch": 0.4685327628582866, + "grad_norm": 2.782280683517456, + "learning_rate": 9.788448278692936e-05, + "loss": 4.5231, + "step": 108850 + }, + { + "epoch": 0.4687479823175692, + "grad_norm": 1.2259013652801514, + "learning_rate": 9.788253474441855e-05, + "loss": 4.3256, + "step": 108900 + }, + { + "epoch": 0.46896320177685186, + "grad_norm": 1.6840226650238037, + "learning_rate": 9.788058582480997e-05, + "loss": 4.8696, + "step": 108950 + }, + { + "epoch": 0.4691784212361345, + "grad_norm": 4.100315570831299, + "learning_rate": 9.78786360281393e-05, + "loss": 4.6479, + "step": 109000 + }, + { + "epoch": 0.4693936406954171, + "grad_norm": 0.9205912947654724, + "learning_rate": 9.787668535444226e-05, + "loss": 4.6935, + "step": 109050 + }, + { + "epoch": 0.46960886015469977, + "grad_norm": 1.7249467372894287, + "learning_rate": 9.787473380375461e-05, + "loss": 4.377, + "step": 109100 + }, + { + "epoch": 0.46982407961398237, + "grad_norm": 1.7901753187179565, + "learning_rate": 9.787278137611205e-05, + "loss": 4.6516, + "step": 109150 + }, + { + "epoch": 0.470039299073265, + "grad_norm": 3.026771068572998, + "learning_rate": 9.787082807155039e-05, + "loss": 4.9296, + "step": 109200 + }, + { + "epoch": 0.4702545185325476, + "grad_norm": 2.7552924156188965, + "learning_rate": 9.786887389010537e-05, + "loss": 5.0504, + "step": 109250 + }, + { + "epoch": 0.4704697379918303, + "grad_norm": 3.154627561569214, + "learning_rate": 9.786691883181283e-05, + "loss": 4.8664, + "step": 109300 + }, + { + "epoch": 0.4706849574511129, + "grad_norm": 2.6334855556488037, + "learning_rate": 9.786496289670854e-05, + "loss": 5.0546, + "step": 109350 + }, + { + "epoch": 0.47090017691039554, + "grad_norm": 0.8788037896156311, + "learning_rate": 9.786300608482837e-05, + "loss": 4.9779, + "step": 109400 + }, + { + "epoch": 0.47111539636967814, + "grad_norm": 2.270023822784424, + "learning_rate": 9.78610483962081e-05, + "loss": 4.9108, + "step": 109450 + }, + { + "epoch": 0.4713306158289608, + "grad_norm": 1.877614140510559, + "learning_rate": 9.785908983088367e-05, + "loss": 4.8638, + "step": 109500 + }, + { + "epoch": 0.4715458352882434, + "grad_norm": 1.6177771091461182, + "learning_rate": 9.785713038889091e-05, + "loss": 4.4156, + "step": 109550 + }, + { + "epoch": 0.47176105474752605, + "grad_norm": 2.0811679363250732, + "learning_rate": 9.78551700702657e-05, + "loss": 4.6514, + "step": 109600 + }, + { + "epoch": 0.4719762742068087, + "grad_norm": 6.606409549713135, + "learning_rate": 9.785320887504398e-05, + "loss": 5.231, + "step": 109650 + }, + { + "epoch": 0.4721914936660913, + "grad_norm": 3.02718186378479, + "learning_rate": 9.785124680326167e-05, + "loss": 4.9022, + "step": 109700 + }, + { + "epoch": 0.47240671312537397, + "grad_norm": 2.007220506668091, + "learning_rate": 9.784928385495471e-05, + "loss": 5.1506, + "step": 109750 + }, + { + "epoch": 0.47262193258465657, + "grad_norm": 1.6595892906188965, + "learning_rate": 9.784732003015904e-05, + "loss": 5.1939, + "step": 109800 + }, + { + "epoch": 0.4728371520439392, + "grad_norm": 1.7487945556640625, + "learning_rate": 9.784535532891063e-05, + "loss": 5.3276, + "step": 109850 + }, + { + "epoch": 0.4730523715032218, + "grad_norm": 0.7949261665344238, + "learning_rate": 9.78433897512455e-05, + "loss": 4.8761, + "step": 109900 + }, + { + "epoch": 0.4732675909625045, + "grad_norm": 2.1775283813476562, + "learning_rate": 9.784142329719963e-05, + "loss": 5.1346, + "step": 109950 + }, + { + "epoch": 0.4734828104217871, + "grad_norm": 1.6403969526290894, + "learning_rate": 9.783945596680905e-05, + "loss": 4.8597, + "step": 110000 + }, + { + "epoch": 0.47369802988106974, + "grad_norm": 0.9212824106216431, + "learning_rate": 9.783748776010979e-05, + "loss": 4.6636, + "step": 110050 + }, + { + "epoch": 0.47391324934035234, + "grad_norm": 3.2973568439483643, + "learning_rate": 9.783551867713792e-05, + "loss": 5.1508, + "step": 110100 + }, + { + "epoch": 0.474128468799635, + "grad_norm": 2.3524060249328613, + "learning_rate": 9.783354871792948e-05, + "loss": 4.4597, + "step": 110150 + }, + { + "epoch": 0.4743436882589176, + "grad_norm": 2.257934331893921, + "learning_rate": 9.783157788252057e-05, + "loss": 4.8128, + "step": 110200 + }, + { + "epoch": 0.47455890771820025, + "grad_norm": 4.591152667999268, + "learning_rate": 9.782960617094731e-05, + "loss": 5.1512, + "step": 110250 + }, + { + "epoch": 0.47477412717748285, + "grad_norm": 1.31900155544281, + "learning_rate": 9.78276335832458e-05, + "loss": 4.7981, + "step": 110300 + }, + { + "epoch": 0.4749893466367655, + "grad_norm": 0.869868814945221, + "learning_rate": 9.782566011945216e-05, + "loss": 5.0142, + "step": 110350 + }, + { + "epoch": 0.47520456609604816, + "grad_norm": 0.7561297416687012, + "learning_rate": 9.782368577960257e-05, + "loss": 4.4634, + "step": 110400 + }, + { + "epoch": 0.47541978555533077, + "grad_norm": 3.9145407676696777, + "learning_rate": 9.782171056373316e-05, + "loss": 4.853, + "step": 110450 + }, + { + "epoch": 0.4756350050146134, + "grad_norm": 2.450838327407837, + "learning_rate": 9.781973447188013e-05, + "loss": 4.925, + "step": 110500 + }, + { + "epoch": 0.475850224473896, + "grad_norm": 2.1092708110809326, + "learning_rate": 9.781775750407969e-05, + "loss": 5.1233, + "step": 110550 + }, + { + "epoch": 0.4760654439331787, + "grad_norm": 2.3649773597717285, + "learning_rate": 9.781577966036803e-05, + "loss": 5.0617, + "step": 110600 + }, + { + "epoch": 0.4762806633924613, + "grad_norm": 1.8446540832519531, + "learning_rate": 9.781380094078141e-05, + "loss": 4.8226, + "step": 110650 + }, + { + "epoch": 0.47649588285174393, + "grad_norm": 2.453632116317749, + "learning_rate": 9.781182134535604e-05, + "loss": 4.726, + "step": 110700 + }, + { + "epoch": 0.47671110231102654, + "grad_norm": 1.72090744972229, + "learning_rate": 9.780984087412819e-05, + "loss": 5.2217, + "step": 110750 + }, + { + "epoch": 0.4769263217703092, + "grad_norm": 3.0121781826019287, + "learning_rate": 9.780785952713417e-05, + "loss": 5.072, + "step": 110800 + }, + { + "epoch": 0.4771415412295918, + "grad_norm": 2.454059600830078, + "learning_rate": 9.780587730441022e-05, + "loss": 4.587, + "step": 110850 + }, + { + "epoch": 0.47735676068887445, + "grad_norm": 3.0630199909210205, + "learning_rate": 9.780389420599268e-05, + "loss": 4.9417, + "step": 110900 + }, + { + "epoch": 0.47757198014815705, + "grad_norm": 2.3443729877471924, + "learning_rate": 9.78019102319179e-05, + "loss": 4.8444, + "step": 110950 + }, + { + "epoch": 0.4777871996074397, + "grad_norm": 0.6958873867988586, + "learning_rate": 9.779992538222217e-05, + "loss": 4.5662, + "step": 111000 + }, + { + "epoch": 0.4777871996074397, + "eval_loss": 5.418324947357178, + "eval_runtime": 35.0853, + "eval_samples_per_second": 18.241, + "eval_steps_per_second": 9.121, + "eval_tts_loss": 6.602839937322053, + "step": 111000 + }, + { + "epoch": 0.47800241906672236, + "grad_norm": 1.8669241666793823, + "learning_rate": 9.779793965694188e-05, + "loss": 4.3981, + "step": 111050 + }, + { + "epoch": 0.47821763852600496, + "grad_norm": 2.271446943283081, + "learning_rate": 9.779595305611341e-05, + "loss": 5.0715, + "step": 111100 + }, + { + "epoch": 0.4784328579852876, + "grad_norm": 1.514321208000183, + "learning_rate": 9.779396557977314e-05, + "loss": 4.9657, + "step": 111150 + }, + { + "epoch": 0.4786480774445702, + "grad_norm": 0.9251741766929626, + "learning_rate": 9.779197722795745e-05, + "loss": 4.8331, + "step": 111200 + }, + { + "epoch": 0.4788632969038529, + "grad_norm": 2.9866724014282227, + "learning_rate": 9.77899880007028e-05, + "loss": 4.778, + "step": 111250 + }, + { + "epoch": 0.4790785163631355, + "grad_norm": 3.680928945541382, + "learning_rate": 9.778799789804563e-05, + "loss": 5.1706, + "step": 111300 + }, + { + "epoch": 0.47929373582241813, + "grad_norm": 3.140465259552002, + "learning_rate": 9.778600692002235e-05, + "loss": 5.1485, + "step": 111350 + }, + { + "epoch": 0.47950895528170073, + "grad_norm": 2.9192395210266113, + "learning_rate": 9.778401506666948e-05, + "loss": 4.9677, + "step": 111400 + }, + { + "epoch": 0.4797241747409834, + "grad_norm": 1.953035831451416, + "learning_rate": 9.778202233802347e-05, + "loss": 5.253, + "step": 111450 + }, + { + "epoch": 0.479939394200266, + "grad_norm": 3.583052635192871, + "learning_rate": 9.778002873412084e-05, + "loss": 4.7197, + "step": 111500 + }, + { + "epoch": 0.48015461365954865, + "grad_norm": 2.8364875316619873, + "learning_rate": 9.777803425499811e-05, + "loss": 4.8642, + "step": 111550 + }, + { + "epoch": 0.48036983311883125, + "grad_norm": 2.2803900241851807, + "learning_rate": 9.77760389006918e-05, + "loss": 4.4416, + "step": 111600 + }, + { + "epoch": 0.4805850525781139, + "grad_norm": 3.0113635063171387, + "learning_rate": 9.777404267123848e-05, + "loss": 5.107, + "step": 111650 + }, + { + "epoch": 0.48080027203739656, + "grad_norm": 1.6337335109710693, + "learning_rate": 9.77720455666747e-05, + "loss": 4.7807, + "step": 111700 + }, + { + "epoch": 0.48101549149667916, + "grad_norm": 1.0058674812316895, + "learning_rate": 9.777004758703705e-05, + "loss": 4.857, + "step": 111750 + }, + { + "epoch": 0.4812307109559618, + "grad_norm": 2.8310530185699463, + "learning_rate": 9.776804873236212e-05, + "loss": 4.667, + "step": 111800 + }, + { + "epoch": 0.4814459304152444, + "grad_norm": 2.184800863265991, + "learning_rate": 9.776604900268654e-05, + "loss": 5.1806, + "step": 111850 + }, + { + "epoch": 0.4816611498745271, + "grad_norm": 2.718696117401123, + "learning_rate": 9.776404839804692e-05, + "loss": 4.797, + "step": 111900 + }, + { + "epoch": 0.4818763693338097, + "grad_norm": 2.3085544109344482, + "learning_rate": 9.776204691847991e-05, + "loss": 5.1832, + "step": 111950 + }, + { + "epoch": 0.48209158879309233, + "grad_norm": 2.281425714492798, + "learning_rate": 9.776004456402222e-05, + "loss": 5.0813, + "step": 112000 + }, + { + "epoch": 0.48230680825237493, + "grad_norm": 4.384036064147949, + "learning_rate": 9.775804133471044e-05, + "loss": 4.9217, + "step": 112050 + }, + { + "epoch": 0.4825220277116576, + "grad_norm": 0.5839751958847046, + "learning_rate": 9.775603723058136e-05, + "loss": 4.672, + "step": 112100 + }, + { + "epoch": 0.4827372471709402, + "grad_norm": 2.6911461353302, + "learning_rate": 9.775403225167162e-05, + "loss": 4.4128, + "step": 112150 + }, + { + "epoch": 0.48295246663022284, + "grad_norm": 3.2363626956939697, + "learning_rate": 9.775202639801796e-05, + "loss": 4.3452, + "step": 112200 + }, + { + "epoch": 0.48316768608950544, + "grad_norm": 1.925606608390808, + "learning_rate": 9.775001966965714e-05, + "loss": 4.9658, + "step": 112250 + }, + { + "epoch": 0.4833829055487881, + "grad_norm": 3.260385751724243, + "learning_rate": 9.774801206662591e-05, + "loss": 5.0781, + "step": 112300 + }, + { + "epoch": 0.48359812500807076, + "grad_norm": 5.040075778961182, + "learning_rate": 9.774600358896106e-05, + "loss": 4.6143, + "step": 112350 + }, + { + "epoch": 0.48381334446735336, + "grad_norm": 0.8774921894073486, + "learning_rate": 9.774399423669936e-05, + "loss": 4.6963, + "step": 112400 + }, + { + "epoch": 0.484028563926636, + "grad_norm": 2.3033411502838135, + "learning_rate": 9.774198400987762e-05, + "loss": 5.2021, + "step": 112450 + }, + { + "epoch": 0.4842437833859186, + "grad_norm": 1.8960646390914917, + "learning_rate": 9.773997290853268e-05, + "loss": 5.2344, + "step": 112500 + }, + { + "epoch": 0.48445900284520127, + "grad_norm": 2.0605854988098145, + "learning_rate": 9.773796093270135e-05, + "loss": 4.731, + "step": 112550 + }, + { + "epoch": 0.48467422230448387, + "grad_norm": 0.4957844018936157, + "learning_rate": 9.77359480824205e-05, + "loss": 4.7064, + "step": 112600 + }, + { + "epoch": 0.4848894417637665, + "grad_norm": 1.3461626768112183, + "learning_rate": 9.773393435772701e-05, + "loss": 4.8432, + "step": 112650 + }, + { + "epoch": 0.4851046612230491, + "grad_norm": 1.805510401725769, + "learning_rate": 9.773191975865775e-05, + "loss": 5.1488, + "step": 112700 + }, + { + "epoch": 0.4853198806823318, + "grad_norm": 2.363908529281616, + "learning_rate": 9.772990428524964e-05, + "loss": 5.1498, + "step": 112750 + }, + { + "epoch": 0.4855351001416144, + "grad_norm": 3.050687551498413, + "learning_rate": 9.772788793753957e-05, + "loss": 4.9339, + "step": 112800 + }, + { + "epoch": 0.48575031960089704, + "grad_norm": 1.0839442014694214, + "learning_rate": 9.772587071556451e-05, + "loss": 4.7748, + "step": 112850 + }, + { + "epoch": 0.48596553906017964, + "grad_norm": 2.422084093093872, + "learning_rate": 9.772385261936138e-05, + "loss": 4.9238, + "step": 112900 + }, + { + "epoch": 0.4861807585194623, + "grad_norm": 5.330097675323486, + "learning_rate": 9.772183364896718e-05, + "loss": 5.1076, + "step": 112950 + }, + { + "epoch": 0.48639597797874495, + "grad_norm": 1.9913737773895264, + "learning_rate": 9.771981380441888e-05, + "loss": 5.0252, + "step": 113000 + }, + { + "epoch": 0.48661119743802755, + "grad_norm": 3.201813220977783, + "learning_rate": 9.771779308575346e-05, + "loss": 5.366, + "step": 113050 + }, + { + "epoch": 0.4868264168973102, + "grad_norm": 1.1240105628967285, + "learning_rate": 9.771577149300794e-05, + "loss": 5.2966, + "step": 113100 + }, + { + "epoch": 0.4870416363565928, + "grad_norm": 1.4304618835449219, + "learning_rate": 9.771374902621938e-05, + "loss": 4.3877, + "step": 113150 + }, + { + "epoch": 0.48725685581587547, + "grad_norm": 2.1799392700195312, + "learning_rate": 9.77117256854248e-05, + "loss": 4.4816, + "step": 113200 + }, + { + "epoch": 0.48747207527515807, + "grad_norm": 2.842520236968994, + "learning_rate": 9.770970147066126e-05, + "loss": 5.2744, + "step": 113250 + }, + { + "epoch": 0.4876872947344407, + "grad_norm": 2.2544913291931152, + "learning_rate": 9.770767638196585e-05, + "loss": 5.1181, + "step": 113300 + }, + { + "epoch": 0.4879025141937233, + "grad_norm": 2.72629714012146, + "learning_rate": 9.770565041937567e-05, + "loss": 4.9218, + "step": 113350 + }, + { + "epoch": 0.488117733653006, + "grad_norm": 1.7599674463272095, + "learning_rate": 9.770362358292782e-05, + "loss": 4.2867, + "step": 113400 + }, + { + "epoch": 0.4883329531122886, + "grad_norm": 5.569612503051758, + "learning_rate": 9.770159587265943e-05, + "loss": 5.3068, + "step": 113450 + }, + { + "epoch": 0.48854817257157124, + "grad_norm": 2.5586915016174316, + "learning_rate": 9.769956728860765e-05, + "loss": 4.5899, + "step": 113500 + }, + { + "epoch": 0.48876339203085384, + "grad_norm": 3.936237096786499, + "learning_rate": 9.769753783080964e-05, + "loss": 5.062, + "step": 113550 + }, + { + "epoch": 0.4889786114901365, + "grad_norm": 1.6353505849838257, + "learning_rate": 9.769550749930256e-05, + "loss": 5.2127, + "step": 113600 + }, + { + "epoch": 0.4891938309494191, + "grad_norm": 2.215658187866211, + "learning_rate": 9.76934762941236e-05, + "loss": 5.4328, + "step": 113650 + }, + { + "epoch": 0.48940905040870175, + "grad_norm": 1.6356232166290283, + "learning_rate": 9.769144421530997e-05, + "loss": 5.2813, + "step": 113700 + }, + { + "epoch": 0.4896242698679844, + "grad_norm": 1.3151355981826782, + "learning_rate": 9.768941126289892e-05, + "loss": 5.1446, + "step": 113750 + }, + { + "epoch": 0.489839489327267, + "grad_norm": 1.6871289014816284, + "learning_rate": 9.768737743692766e-05, + "loss": 5.0808, + "step": 113800 + }, + { + "epoch": 0.49005470878654966, + "grad_norm": 3.131892681121826, + "learning_rate": 9.768534273743346e-05, + "loss": 4.82, + "step": 113850 + }, + { + "epoch": 0.49026992824583226, + "grad_norm": 1.2804864645004272, + "learning_rate": 9.768330716445357e-05, + "loss": 5.4837, + "step": 113900 + }, + { + "epoch": 0.4904851477051149, + "grad_norm": 3.3712987899780273, + "learning_rate": 9.768127071802531e-05, + "loss": 5.2126, + "step": 113950 + }, + { + "epoch": 0.4907003671643975, + "grad_norm": 2.0219838619232178, + "learning_rate": 9.767923339818595e-05, + "loss": 4.9114, + "step": 114000 + }, + { + "epoch": 0.4907003671643975, + "eval_loss": 5.401873588562012, + "eval_runtime": 35.0785, + "eval_samples_per_second": 18.245, + "eval_steps_per_second": 9.122, + "eval_tts_loss": 6.684323388107381, + "step": 114000 + }, + { + "epoch": 0.4909155866236802, + "grad_norm": 3.98518443107605, + "learning_rate": 9.767719520497283e-05, + "loss": 5.0939, + "step": 114050 + }, + { + "epoch": 0.4911308060829628, + "grad_norm": 3.3885338306427, + "learning_rate": 9.767515613842328e-05, + "loss": 5.0447, + "step": 114100 + }, + { + "epoch": 0.49134602554224543, + "grad_norm": 1.4417778253555298, + "learning_rate": 9.767311619857465e-05, + "loss": 4.811, + "step": 114150 + }, + { + "epoch": 0.49156124500152804, + "grad_norm": 1.7138901948928833, + "learning_rate": 9.767107538546429e-05, + "loss": 4.7791, + "step": 114200 + }, + { + "epoch": 0.4917764644608107, + "grad_norm": 2.0129969120025635, + "learning_rate": 9.76690336991296e-05, + "loss": 5.1028, + "step": 114250 + }, + { + "epoch": 0.4919916839200933, + "grad_norm": 2.312920331954956, + "learning_rate": 9.7666991139608e-05, + "loss": 5.2933, + "step": 114300 + }, + { + "epoch": 0.49220690337937595, + "grad_norm": 5.714343070983887, + "learning_rate": 9.766494770693687e-05, + "loss": 4.9037, + "step": 114350 + }, + { + "epoch": 0.4924221228386586, + "grad_norm": 2.2385706901550293, + "learning_rate": 9.766290340115365e-05, + "loss": 5.0284, + "step": 114400 + }, + { + "epoch": 0.4926373422979412, + "grad_norm": 2.9048755168914795, + "learning_rate": 9.76608582222958e-05, + "loss": 5.1729, + "step": 114450 + }, + { + "epoch": 0.49285256175722386, + "grad_norm": 4.474659442901611, + "learning_rate": 9.765881217040078e-05, + "loss": 4.9186, + "step": 114500 + }, + { + "epoch": 0.49306778121650646, + "grad_norm": 3.096486806869507, + "learning_rate": 9.765676524550604e-05, + "loss": 4.7613, + "step": 114550 + }, + { + "epoch": 0.4932830006757891, + "grad_norm": 2.9399023056030273, + "learning_rate": 9.765471744764911e-05, + "loss": 4.7911, + "step": 114600 + }, + { + "epoch": 0.4934982201350717, + "grad_norm": 1.5110983848571777, + "learning_rate": 9.76526687768675e-05, + "loss": 4.5377, + "step": 114650 + }, + { + "epoch": 0.4937134395943544, + "grad_norm": 1.8147931098937988, + "learning_rate": 9.765061923319871e-05, + "loss": 4.8204, + "step": 114700 + }, + { + "epoch": 0.493928659053637, + "grad_norm": 1.9541486501693726, + "learning_rate": 9.76485688166803e-05, + "loss": 5.1607, + "step": 114750 + }, + { + "epoch": 0.49414387851291963, + "grad_norm": 2.1750595569610596, + "learning_rate": 9.764651752734984e-05, + "loss": 5.0992, + "step": 114800 + }, + { + "epoch": 0.49435909797220223, + "grad_norm": 2.8394038677215576, + "learning_rate": 9.764446536524489e-05, + "loss": 5.1705, + "step": 114850 + }, + { + "epoch": 0.4945743174314849, + "grad_norm": 2.9970011711120605, + "learning_rate": 9.764241233040303e-05, + "loss": 4.5284, + "step": 114900 + }, + { + "epoch": 0.4947895368907675, + "grad_norm": 2.0068955421447754, + "learning_rate": 9.76403584228619e-05, + "loss": 5.062, + "step": 114950 + }, + { + "epoch": 0.49500475635005015, + "grad_norm": 2.364488124847412, + "learning_rate": 9.763830364265909e-05, + "loss": 4.4478, + "step": 115000 + }, + { + "epoch": 0.4952199758093328, + "grad_norm": 1.1546635627746582, + "learning_rate": 9.763624798983227e-05, + "loss": 5.2216, + "step": 115050 + }, + { + "epoch": 0.4954351952686154, + "grad_norm": 0.26819318532943726, + "learning_rate": 9.763419146441906e-05, + "loss": 5.0785, + "step": 115100 + }, + { + "epoch": 0.49565041472789806, + "grad_norm": 3.629939556121826, + "learning_rate": 9.763213406645713e-05, + "loss": 4.8174, + "step": 115150 + }, + { + "epoch": 0.49586563418718066, + "grad_norm": 2.795199394226074, + "learning_rate": 9.763007579598422e-05, + "loss": 4.6841, + "step": 115200 + }, + { + "epoch": 0.4960808536464633, + "grad_norm": 2.8344578742980957, + "learning_rate": 9.762801665303798e-05, + "loss": 5.3245, + "step": 115250 + }, + { + "epoch": 0.4962960731057459, + "grad_norm": 5.558097839355469, + "learning_rate": 9.762595663765615e-05, + "loss": 5.4329, + "step": 115300 + }, + { + "epoch": 0.4965112925650286, + "grad_norm": 4.053457260131836, + "learning_rate": 9.762389574987645e-05, + "loss": 4.7508, + "step": 115350 + }, + { + "epoch": 0.4967265120243112, + "grad_norm": 3.217935800552368, + "learning_rate": 9.762183398973665e-05, + "loss": 5.1535, + "step": 115400 + }, + { + "epoch": 0.49694173148359383, + "grad_norm": 2.751406669616699, + "learning_rate": 9.761977135727451e-05, + "loss": 5.1235, + "step": 115450 + }, + { + "epoch": 0.49715695094287643, + "grad_norm": 2.5760374069213867, + "learning_rate": 9.761770785252782e-05, + "loss": 4.8535, + "step": 115500 + }, + { + "epoch": 0.4973721704021591, + "grad_norm": 4.4301300048828125, + "learning_rate": 9.761564347553435e-05, + "loss": 4.9949, + "step": 115550 + }, + { + "epoch": 0.4975873898614417, + "grad_norm": 2.3735015392303467, + "learning_rate": 9.761357822633194e-05, + "loss": 4.8782, + "step": 115600 + }, + { + "epoch": 0.49780260932072434, + "grad_norm": 0.7045377492904663, + "learning_rate": 9.761151210495843e-05, + "loss": 4.8938, + "step": 115650 + }, + { + "epoch": 0.498017828780007, + "grad_norm": 2.780378580093384, + "learning_rate": 9.760944511145162e-05, + "loss": 4.6826, + "step": 115700 + }, + { + "epoch": 0.4982330482392896, + "grad_norm": 2.9126205444335938, + "learning_rate": 9.760737724584943e-05, + "loss": 5.1934, + "step": 115750 + }, + { + "epoch": 0.49844826769857226, + "grad_norm": 2.470390558242798, + "learning_rate": 9.760530850818971e-05, + "loss": 4.6646, + "step": 115800 + }, + { + "epoch": 0.49866348715785486, + "grad_norm": 2.5425360202789307, + "learning_rate": 9.760323889851036e-05, + "loss": 4.5611, + "step": 115850 + }, + { + "epoch": 0.4988787066171375, + "grad_norm": 3.0800747871398926, + "learning_rate": 9.760116841684928e-05, + "loss": 5.3614, + "step": 115900 + }, + { + "epoch": 0.4990939260764201, + "grad_norm": 1.6835260391235352, + "learning_rate": 9.759909706324439e-05, + "loss": 4.447, + "step": 115950 + }, + { + "epoch": 0.49930914553570277, + "grad_norm": 2.2698984146118164, + "learning_rate": 9.759702483773367e-05, + "loss": 4.9095, + "step": 116000 + }, + { + "epoch": 0.49952436499498537, + "grad_norm": 2.113539934158325, + "learning_rate": 9.759495174035504e-05, + "loss": 4.8995, + "step": 116050 + }, + { + "epoch": 0.499739584454268, + "grad_norm": 2.7016212940216064, + "learning_rate": 9.75928777711465e-05, + "loss": 4.6533, + "step": 116100 + }, + { + "epoch": 0.4999548039135506, + "grad_norm": 2.9160850048065186, + "learning_rate": 9.759080293014602e-05, + "loss": 4.8814, + "step": 116150 + }, + { + "epoch": 0.5001700233728332, + "grad_norm": 1.0726102590560913, + "learning_rate": 9.758872721739161e-05, + "loss": 5.0675, + "step": 116200 + }, + { + "epoch": 0.5003852428321159, + "grad_norm": 2.361037492752075, + "learning_rate": 9.758665063292132e-05, + "loss": 4.7016, + "step": 116250 + }, + { + "epoch": 0.5006004622913985, + "grad_norm": 2.3903465270996094, + "learning_rate": 9.758457317677314e-05, + "loss": 4.6985, + "step": 116300 + }, + { + "epoch": 0.5008156817506811, + "grad_norm": 0.7180473208427429, + "learning_rate": 9.758249484898517e-05, + "loss": 4.9129, + "step": 116350 + }, + { + "epoch": 0.5010309012099639, + "grad_norm": 2.7031476497650146, + "learning_rate": 9.758041564959546e-05, + "loss": 5.0978, + "step": 116400 + }, + { + "epoch": 0.5012461206692465, + "grad_norm": 2.625885248184204, + "learning_rate": 9.757833557864208e-05, + "loss": 4.293, + "step": 116450 + }, + { + "epoch": 0.501461340128529, + "grad_norm": 1.800687551498413, + "learning_rate": 9.757625463616316e-05, + "loss": 4.575, + "step": 116500 + }, + { + "epoch": 0.5016765595878117, + "grad_norm": 2.0335988998413086, + "learning_rate": 9.757417282219682e-05, + "loss": 5.1789, + "step": 116550 + }, + { + "epoch": 0.5018917790470944, + "grad_norm": 2.680281639099121, + "learning_rate": 9.757209013678117e-05, + "loss": 5.2665, + "step": 116600 + }, + { + "epoch": 0.502106998506377, + "grad_norm": 2.598043441772461, + "learning_rate": 9.757000657995437e-05, + "loss": 5.2367, + "step": 116650 + }, + { + "epoch": 0.5023222179656596, + "grad_norm": 3.5090954303741455, + "learning_rate": 9.756792215175458e-05, + "loss": 5.1759, + "step": 116700 + }, + { + "epoch": 0.5025374374249422, + "grad_norm": 0.9201160073280334, + "learning_rate": 9.756583685222e-05, + "loss": 4.6134, + "step": 116750 + }, + { + "epoch": 0.5027526568842249, + "grad_norm": 1.9258708953857422, + "learning_rate": 9.756375068138881e-05, + "loss": 4.9346, + "step": 116800 + }, + { + "epoch": 0.5029678763435075, + "grad_norm": 2.117375135421753, + "learning_rate": 9.756166363929925e-05, + "loss": 4.7083, + "step": 116850 + }, + { + "epoch": 0.5031830958027901, + "grad_norm": 0.8093687891960144, + "learning_rate": 9.755957572598951e-05, + "loss": 5.1443, + "step": 116900 + }, + { + "epoch": 0.5033983152620727, + "grad_norm": 2.5095293521881104, + "learning_rate": 9.755748694149784e-05, + "loss": 4.5353, + "step": 116950 + }, + { + "epoch": 0.5036135347213554, + "grad_norm": 2.4617600440979004, + "learning_rate": 9.755539728586254e-05, + "loss": 5.1367, + "step": 117000 + }, + { + "epoch": 0.5036135347213554, + "eval_loss": 5.378237247467041, + "eval_runtime": 34.8814, + "eval_samples_per_second": 18.348, + "eval_steps_per_second": 9.174, + "eval_tts_loss": 6.602500068012613, + "step": 117000 + }, + { + "epoch": 0.503828754180638, + "grad_norm": 1.965749740600586, + "learning_rate": 9.755330675912187e-05, + "loss": 4.8805, + "step": 117050 + }, + { + "epoch": 0.5040439736399206, + "grad_norm": 2.0743064880371094, + "learning_rate": 9.755121536131411e-05, + "loss": 4.8372, + "step": 117100 + }, + { + "epoch": 0.5042591930992033, + "grad_norm": 2.355271100997925, + "learning_rate": 9.754912309247759e-05, + "loss": 5.2057, + "step": 117150 + }, + { + "epoch": 0.5044744125584859, + "grad_norm": 1.998244047164917, + "learning_rate": 9.754702995265061e-05, + "loss": 5.1125, + "step": 117200 + }, + { + "epoch": 0.5046896320177685, + "grad_norm": 2.4738030433654785, + "learning_rate": 9.754493594187153e-05, + "loss": 4.9978, + "step": 117250 + }, + { + "epoch": 0.5049048514770511, + "grad_norm": 1.4705935716629028, + "learning_rate": 9.75428410601787e-05, + "loss": 5.1477, + "step": 117300 + }, + { + "epoch": 0.5051200709363338, + "grad_norm": 2.050607919692993, + "learning_rate": 9.75407453076105e-05, + "loss": 4.9712, + "step": 117350 + }, + { + "epoch": 0.5053352903956164, + "grad_norm": 2.465409278869629, + "learning_rate": 9.753864868420532e-05, + "loss": 4.6319, + "step": 117400 + }, + { + "epoch": 0.505550509854899, + "grad_norm": 3.503427028656006, + "learning_rate": 9.753655119000157e-05, + "loss": 5.1988, + "step": 117450 + }, + { + "epoch": 0.5057657293141816, + "grad_norm": 2.6836159229278564, + "learning_rate": 9.753445282503764e-05, + "loss": 4.8246, + "step": 117500 + }, + { + "epoch": 0.5059809487734643, + "grad_norm": 2.584301471710205, + "learning_rate": 9.753235358935201e-05, + "loss": 5.3329, + "step": 117550 + }, + { + "epoch": 0.5061961682327469, + "grad_norm": 2.490903854370117, + "learning_rate": 9.753025348298311e-05, + "loss": 4.9133, + "step": 117600 + }, + { + "epoch": 0.5064113876920295, + "grad_norm": 0.39302125573158264, + "learning_rate": 9.75281525059694e-05, + "loss": 4.7418, + "step": 117650 + }, + { + "epoch": 0.5066266071513122, + "grad_norm": 2.0212390422821045, + "learning_rate": 9.752605065834938e-05, + "loss": 4.8478, + "step": 117700 + }, + { + "epoch": 0.5068418266105948, + "grad_norm": 1.6751596927642822, + "learning_rate": 9.752394794016157e-05, + "loss": 4.8644, + "step": 117750 + }, + { + "epoch": 0.5070570460698774, + "grad_norm": 2.7252938747406006, + "learning_rate": 9.752184435144445e-05, + "loss": 4.9687, + "step": 117800 + }, + { + "epoch": 0.50727226552916, + "grad_norm": 2.1096818447113037, + "learning_rate": 9.751973989223656e-05, + "loss": 4.8352, + "step": 117850 + }, + { + "epoch": 0.5074874849884428, + "grad_norm": 2.2554686069488525, + "learning_rate": 9.751763456257647e-05, + "loss": 5.4, + "step": 117900 + }, + { + "epoch": 0.5077027044477254, + "grad_norm": 0.48765578866004944, + "learning_rate": 9.751552836250273e-05, + "loss": 4.4229, + "step": 117950 + }, + { + "epoch": 0.507917923907008, + "grad_norm": 0.9581682682037354, + "learning_rate": 9.751342129205391e-05, + "loss": 4.7085, + "step": 118000 + }, + { + "epoch": 0.5081331433662906, + "grad_norm": 2.1941518783569336, + "learning_rate": 9.751131335126862e-05, + "loss": 4.6009, + "step": 118050 + }, + { + "epoch": 0.5083483628255733, + "grad_norm": 1.740017294883728, + "learning_rate": 9.750920454018548e-05, + "loss": 5.0052, + "step": 118100 + }, + { + "epoch": 0.5085635822848559, + "grad_norm": 3.6457414627075195, + "learning_rate": 9.750709485884311e-05, + "loss": 4.8433, + "step": 118150 + }, + { + "epoch": 0.5087788017441385, + "grad_norm": 2.6969821453094482, + "learning_rate": 9.750498430728015e-05, + "loss": 4.8516, + "step": 118200 + }, + { + "epoch": 0.5089940212034211, + "grad_norm": 2.374300718307495, + "learning_rate": 9.750287288553529e-05, + "loss": 4.9479, + "step": 118250 + }, + { + "epoch": 0.5092092406627038, + "grad_norm": 2.2294113636016846, + "learning_rate": 9.750076059364714e-05, + "loss": 4.9711, + "step": 118300 + }, + { + "epoch": 0.5094244601219864, + "grad_norm": 1.7815632820129395, + "learning_rate": 9.749864743165447e-05, + "loss": 4.3005, + "step": 118350 + }, + { + "epoch": 0.509639679581269, + "grad_norm": 2.443844795227051, + "learning_rate": 9.749653339959593e-05, + "loss": 4.7694, + "step": 118400 + }, + { + "epoch": 0.5098548990405517, + "grad_norm": 3.5639920234680176, + "learning_rate": 9.749441849751027e-05, + "loss": 4.7254, + "step": 118450 + }, + { + "epoch": 0.5100701184998343, + "grad_norm": 2.833979606628418, + "learning_rate": 9.749230272543624e-05, + "loss": 4.9334, + "step": 118500 + }, + { + "epoch": 0.5102853379591169, + "grad_norm": 2.5641558170318604, + "learning_rate": 9.749018608341257e-05, + "loss": 5.1099, + "step": 118550 + }, + { + "epoch": 0.5105005574183995, + "grad_norm": 3.2041501998901367, + "learning_rate": 9.748806857147806e-05, + "loss": 4.8986, + "step": 118600 + }, + { + "epoch": 0.5107157768776822, + "grad_norm": 3.616576671600342, + "learning_rate": 9.748595018967146e-05, + "loss": 5.0019, + "step": 118650 + }, + { + "epoch": 0.5109309963369648, + "grad_norm": 1.9490107297897339, + "learning_rate": 9.74838309380316e-05, + "loss": 4.1615, + "step": 118700 + }, + { + "epoch": 0.5111462157962474, + "grad_norm": 0.5641140341758728, + "learning_rate": 9.748171081659731e-05, + "loss": 4.6894, + "step": 118750 + }, + { + "epoch": 0.51136143525553, + "grad_norm": 3.27207350730896, + "learning_rate": 9.747958982540741e-05, + "loss": 4.7879, + "step": 118800 + }, + { + "epoch": 0.5115766547148127, + "grad_norm": 1.9410041570663452, + "learning_rate": 9.747746796450074e-05, + "loss": 4.9334, + "step": 118850 + }, + { + "epoch": 0.5117918741740953, + "grad_norm": 1.9689726829528809, + "learning_rate": 9.747534523391619e-05, + "loss": 5.0575, + "step": 118900 + }, + { + "epoch": 0.5120070936333779, + "grad_norm": 3.8332056999206543, + "learning_rate": 9.747322163369265e-05, + "loss": 4.7147, + "step": 118950 + }, + { + "epoch": 0.5122223130926605, + "grad_norm": 2.4796667098999023, + "learning_rate": 9.747109716386898e-05, + "loss": 4.6686, + "step": 119000 + }, + { + "epoch": 0.5124375325519432, + "grad_norm": 1.4290963411331177, + "learning_rate": 9.746897182448414e-05, + "loss": 4.9777, + "step": 119050 + }, + { + "epoch": 0.5126527520112258, + "grad_norm": 0.8685661554336548, + "learning_rate": 9.746684561557702e-05, + "loss": 4.9772, + "step": 119100 + }, + { + "epoch": 0.5128679714705084, + "grad_norm": 1.0366863012313843, + "learning_rate": 9.74647185371866e-05, + "loss": 5.0479, + "step": 119150 + }, + { + "epoch": 0.5130831909297912, + "grad_norm": 2.023739814758301, + "learning_rate": 9.746259058935183e-05, + "loss": 5.3337, + "step": 119200 + }, + { + "epoch": 0.5132984103890738, + "grad_norm": 3.374821186065674, + "learning_rate": 9.74604617721117e-05, + "loss": 5.074, + "step": 119250 + }, + { + "epoch": 0.5135136298483564, + "grad_norm": 1.2162377834320068, + "learning_rate": 9.745833208550519e-05, + "loss": 4.8414, + "step": 119300 + }, + { + "epoch": 0.513728849307639, + "grad_norm": 0.7842527031898499, + "learning_rate": 9.745620152957131e-05, + "loss": 5.1477, + "step": 119350 + }, + { + "epoch": 0.5139440687669217, + "grad_norm": 2.4622910022735596, + "learning_rate": 9.74540701043491e-05, + "loss": 5.136, + "step": 119400 + }, + { + "epoch": 0.5141592882262043, + "grad_norm": 2.9381728172302246, + "learning_rate": 9.745193780987759e-05, + "loss": 4.8653, + "step": 119450 + }, + { + "epoch": 0.5143745076854869, + "grad_norm": 4.566466808319092, + "learning_rate": 9.744980464619585e-05, + "loss": 5.5592, + "step": 119500 + }, + { + "epoch": 0.5145897271447695, + "grad_norm": 2.2731590270996094, + "learning_rate": 9.744767061334295e-05, + "loss": 4.6288, + "step": 119550 + }, + { + "epoch": 0.5148049466040522, + "grad_norm": 3.0954577922821045, + "learning_rate": 9.744553571135798e-05, + "loss": 4.6058, + "step": 119600 + }, + { + "epoch": 0.5150201660633348, + "grad_norm": 2.0709095001220703, + "learning_rate": 9.744339994028004e-05, + "loss": 5.0284, + "step": 119650 + }, + { + "epoch": 0.5152353855226174, + "grad_norm": 1.6199077367782593, + "learning_rate": 9.744126330014826e-05, + "loss": 4.9903, + "step": 119700 + }, + { + "epoch": 0.5154506049819001, + "grad_norm": 2.6253106594085693, + "learning_rate": 9.743912579100179e-05, + "loss": 5.1988, + "step": 119750 + }, + { + "epoch": 0.5156658244411827, + "grad_norm": 2.805166482925415, + "learning_rate": 9.743698741287977e-05, + "loss": 4.8126, + "step": 119800 + }, + { + "epoch": 0.5158810439004653, + "grad_norm": 1.9894675016403198, + "learning_rate": 9.743484816582135e-05, + "loss": 4.4933, + "step": 119850 + }, + { + "epoch": 0.5160962633597479, + "grad_norm": 2.3717634677886963, + "learning_rate": 9.743270804986577e-05, + "loss": 4.8967, + "step": 119900 + }, + { + "epoch": 0.5163114828190306, + "grad_norm": 1.35196852684021, + "learning_rate": 9.743056706505218e-05, + "loss": 5.0236, + "step": 119950 + }, + { + "epoch": 0.5165267022783132, + "grad_norm": 2.6227035522460938, + "learning_rate": 9.742842521141982e-05, + "loss": 4.7181, + "step": 120000 + }, + { + "epoch": 0.5165267022783132, + "eval_loss": 5.369743347167969, + "eval_runtime": 34.8706, + "eval_samples_per_second": 18.354, + "eval_steps_per_second": 9.177, + "eval_tts_loss": 6.652991364198576, + "step": 120000 + }, + { + "epoch": 0.5167419217375958, + "grad_norm": 5.844998836517334, + "learning_rate": 9.742628248900792e-05, + "loss": 4.9268, + "step": 120050 + }, + { + "epoch": 0.5169571411968784, + "grad_norm": 2.3358314037323, + "learning_rate": 9.742413889785574e-05, + "loss": 4.7991, + "step": 120100 + }, + { + "epoch": 0.5171723606561611, + "grad_norm": 2.460012674331665, + "learning_rate": 9.742199443800253e-05, + "loss": 5.4036, + "step": 120150 + }, + { + "epoch": 0.5173875801154437, + "grad_norm": 2.8824820518493652, + "learning_rate": 9.741984910948758e-05, + "loss": 5.024, + "step": 120200 + }, + { + "epoch": 0.5176027995747263, + "grad_norm": 1.6432433128356934, + "learning_rate": 9.741770291235018e-05, + "loss": 4.9757, + "step": 120250 + }, + { + "epoch": 0.5178180190340089, + "grad_norm": 2.0175228118896484, + "learning_rate": 9.741555584662966e-05, + "loss": 4.8434, + "step": 120300 + }, + { + "epoch": 0.5180332384932916, + "grad_norm": 3.6207315921783447, + "learning_rate": 9.741340791236533e-05, + "loss": 4.4804, + "step": 120350 + }, + { + "epoch": 0.5182484579525742, + "grad_norm": 1.962744116783142, + "learning_rate": 9.741125910959655e-05, + "loss": 4.5257, + "step": 120400 + }, + { + "epoch": 0.5184636774118568, + "grad_norm": 2.867837905883789, + "learning_rate": 9.740910943836268e-05, + "loss": 4.5918, + "step": 120450 + }, + { + "epoch": 0.5186788968711395, + "grad_norm": 4.802084445953369, + "learning_rate": 9.740695889870308e-05, + "loss": 5.4082, + "step": 120500 + }, + { + "epoch": 0.5188941163304222, + "grad_norm": 2.4279139041900635, + "learning_rate": 9.740480749065716e-05, + "loss": 5.3153, + "step": 120550 + }, + { + "epoch": 0.5191093357897048, + "grad_norm": 2.430676221847534, + "learning_rate": 9.740265521426433e-05, + "loss": 5.1231, + "step": 120600 + }, + { + "epoch": 0.5193245552489874, + "grad_norm": 1.3881430625915527, + "learning_rate": 9.740050206956399e-05, + "loss": 5.2169, + "step": 120650 + }, + { + "epoch": 0.5195397747082701, + "grad_norm": 4.894848823547363, + "learning_rate": 9.739834805659562e-05, + "loss": 4.8904, + "step": 120700 + }, + { + "epoch": 0.5197549941675527, + "grad_norm": 2.7352755069732666, + "learning_rate": 9.739619317539865e-05, + "loss": 4.83, + "step": 120750 + }, + { + "epoch": 0.5199702136268353, + "grad_norm": 0.5433706045150757, + "learning_rate": 9.739403742601256e-05, + "loss": 4.7594, + "step": 120800 + }, + { + "epoch": 0.5201854330861179, + "grad_norm": 1.9404829740524292, + "learning_rate": 9.739188080847683e-05, + "loss": 5.1038, + "step": 120850 + }, + { + "epoch": 0.5204006525454006, + "grad_norm": 0.6687833666801453, + "learning_rate": 9.738972332283098e-05, + "loss": 5.3069, + "step": 120900 + }, + { + "epoch": 0.5206158720046832, + "grad_norm": 2.076805830001831, + "learning_rate": 9.738756496911452e-05, + "loss": 5.1299, + "step": 120950 + }, + { + "epoch": 0.5208310914639658, + "grad_norm": 1.8047771453857422, + "learning_rate": 9.738540574736697e-05, + "loss": 5.3171, + "step": 121000 + }, + { + "epoch": 0.5210463109232485, + "grad_norm": 1.4069586992263794, + "learning_rate": 9.738324565762792e-05, + "loss": 4.7583, + "step": 121050 + }, + { + "epoch": 0.5212615303825311, + "grad_norm": 0.753667950630188, + "learning_rate": 9.73810846999369e-05, + "loss": 4.9725, + "step": 121100 + }, + { + "epoch": 0.5214767498418137, + "grad_norm": 0.49219995737075806, + "learning_rate": 9.737892287433353e-05, + "loss": 4.7048, + "step": 121150 + }, + { + "epoch": 0.5216919693010963, + "grad_norm": 2.9225451946258545, + "learning_rate": 9.737676018085737e-05, + "loss": 4.1791, + "step": 121200 + }, + { + "epoch": 0.521907188760379, + "grad_norm": 2.8763861656188965, + "learning_rate": 9.737459661954807e-05, + "loss": 4.8637, + "step": 121250 + }, + { + "epoch": 0.5221224082196616, + "grad_norm": 2.8578853607177734, + "learning_rate": 9.737243219044525e-05, + "loss": 5.0027, + "step": 121300 + }, + { + "epoch": 0.5223376276789442, + "grad_norm": 1.9182426929473877, + "learning_rate": 9.737026689358854e-05, + "loss": 4.5825, + "step": 121350 + }, + { + "epoch": 0.5225528471382268, + "grad_norm": 3.1647000312805176, + "learning_rate": 9.736810072901764e-05, + "loss": 4.7455, + "step": 121400 + }, + { + "epoch": 0.5227680665975095, + "grad_norm": 2.4591758251190186, + "learning_rate": 9.736593369677219e-05, + "loss": 5.3553, + "step": 121450 + }, + { + "epoch": 0.5229832860567921, + "grad_norm": 5.212037086486816, + "learning_rate": 9.73637657968919e-05, + "loss": 4.7448, + "step": 121500 + }, + { + "epoch": 0.5231985055160747, + "grad_norm": 0.5842111706733704, + "learning_rate": 9.73615970294165e-05, + "loss": 5.2353, + "step": 121550 + }, + { + "epoch": 0.5234137249753573, + "grad_norm": 2.6291136741638184, + "learning_rate": 9.73594273943857e-05, + "loss": 5.1153, + "step": 121600 + }, + { + "epoch": 0.52362894443464, + "grad_norm": 0.4623349606990814, + "learning_rate": 9.735725689183923e-05, + "loss": 4.9115, + "step": 121650 + }, + { + "epoch": 0.5238441638939226, + "grad_norm": 2.6065869331359863, + "learning_rate": 9.735508552181686e-05, + "loss": 4.9665, + "step": 121700 + }, + { + "epoch": 0.5240593833532052, + "grad_norm": 2.051321029663086, + "learning_rate": 9.735291328435838e-05, + "loss": 4.8333, + "step": 121750 + }, + { + "epoch": 0.5242746028124879, + "grad_norm": 1.970245361328125, + "learning_rate": 9.735074017950354e-05, + "loss": 5.0963, + "step": 121800 + }, + { + "epoch": 0.5244898222717705, + "grad_norm": 2.499281644821167, + "learning_rate": 9.73485662072922e-05, + "loss": 4.9214, + "step": 121850 + }, + { + "epoch": 0.5247050417310531, + "grad_norm": 1.3328520059585571, + "learning_rate": 9.734639136776414e-05, + "loss": 4.9775, + "step": 121900 + }, + { + "epoch": 0.5249202611903357, + "grad_norm": 1.1158465147018433, + "learning_rate": 9.734421566095922e-05, + "loss": 4.9487, + "step": 121950 + }, + { + "epoch": 0.5251354806496185, + "grad_norm": 1.5102944374084473, + "learning_rate": 9.734203908691729e-05, + "loss": 4.6551, + "step": 122000 + }, + { + "epoch": 0.5253507001089011, + "grad_norm": 2.655609130859375, + "learning_rate": 9.73398616456782e-05, + "loss": 5.1076, + "step": 122050 + }, + { + "epoch": 0.5255659195681837, + "grad_norm": 1.730638861656189, + "learning_rate": 9.733768333728185e-05, + "loss": 5.0192, + "step": 122100 + }, + { + "epoch": 0.5257811390274663, + "grad_norm": 1.3290019035339355, + "learning_rate": 9.733550416176818e-05, + "loss": 4.6015, + "step": 122150 + }, + { + "epoch": 0.525996358486749, + "grad_norm": 1.8898437023162842, + "learning_rate": 9.733332411917704e-05, + "loss": 5.0843, + "step": 122200 + }, + { + "epoch": 0.5262115779460316, + "grad_norm": 1.6767356395721436, + "learning_rate": 9.733114320954841e-05, + "loss": 4.8521, + "step": 122250 + }, + { + "epoch": 0.5264267974053142, + "grad_norm": 1.820876955986023, + "learning_rate": 9.732896143292221e-05, + "loss": 5.5017, + "step": 122300 + }, + { + "epoch": 0.5266420168645969, + "grad_norm": 2.5992636680603027, + "learning_rate": 9.732677878933842e-05, + "loss": 4.4783, + "step": 122350 + }, + { + "epoch": 0.5268572363238795, + "grad_norm": 2.2159602642059326, + "learning_rate": 9.732459527883703e-05, + "loss": 4.4736, + "step": 122400 + }, + { + "epoch": 0.5270724557831621, + "grad_norm": 2.581139326095581, + "learning_rate": 9.732241090145802e-05, + "loss": 4.74, + "step": 122450 + }, + { + "epoch": 0.5272876752424447, + "grad_norm": 1.882226586341858, + "learning_rate": 9.732022565724141e-05, + "loss": 4.781, + "step": 122500 + }, + { + "epoch": 0.5275028947017274, + "grad_norm": 2.2536988258361816, + "learning_rate": 9.731803954622723e-05, + "loss": 4.9296, + "step": 122550 + }, + { + "epoch": 0.52771811416101, + "grad_norm": 3.24753737449646, + "learning_rate": 9.731585256845552e-05, + "loss": 5.0109, + "step": 122600 + }, + { + "epoch": 0.5279333336202926, + "grad_norm": 0.7132563591003418, + "learning_rate": 9.731366472396635e-05, + "loss": 4.5566, + "step": 122650 + }, + { + "epoch": 0.5281485530795752, + "grad_norm": 2.469709873199463, + "learning_rate": 9.731147601279979e-05, + "loss": 5.0279, + "step": 122700 + }, + { + "epoch": 0.5283637725388579, + "grad_norm": 1.6441599130630493, + "learning_rate": 9.730928643499593e-05, + "loss": 5.3179, + "step": 122750 + }, + { + "epoch": 0.5285789919981405, + "grad_norm": 1.4474691152572632, + "learning_rate": 9.730709599059488e-05, + "loss": 5.1883, + "step": 122800 + }, + { + "epoch": 0.5287942114574231, + "grad_norm": 3.3174502849578857, + "learning_rate": 9.730490467963675e-05, + "loss": 4.5322, + "step": 122850 + }, + { + "epoch": 0.5290094309167057, + "grad_norm": 0.323354572057724, + "learning_rate": 9.730271250216172e-05, + "loss": 4.5342, + "step": 122900 + }, + { + "epoch": 0.5292246503759884, + "grad_norm": 2.781018018722534, + "learning_rate": 9.73005194582099e-05, + "loss": 4.6179, + "step": 122950 + }, + { + "epoch": 0.529439869835271, + "grad_norm": 2.3235349655151367, + "learning_rate": 9.729832554782148e-05, + "loss": 4.746, + "step": 123000 + }, + { + "epoch": 0.529439869835271, + "eval_loss": 5.36830997467041, + "eval_runtime": 34.7331, + "eval_samples_per_second": 18.426, + "eval_steps_per_second": 9.213, + "eval_tts_loss": 6.663744048165308, + "step": 123000 + }, + { + "epoch": 0.5296550892945536, + "grad_norm": 1.85641610622406, + "learning_rate": 9.729613077103666e-05, + "loss": 5.2744, + "step": 123050 + }, + { + "epoch": 0.5298703087538363, + "grad_norm": 2.916466474533081, + "learning_rate": 9.729393512789561e-05, + "loss": 4.9271, + "step": 123100 + }, + { + "epoch": 0.5300855282131189, + "grad_norm": 3.1504263877868652, + "learning_rate": 9.729173861843859e-05, + "loss": 4.8656, + "step": 123150 + }, + { + "epoch": 0.5303007476724015, + "grad_norm": 2.1747829914093018, + "learning_rate": 9.728954124270582e-05, + "loss": 4.1357, + "step": 123200 + }, + { + "epoch": 0.5305159671316841, + "grad_norm": 3.545717716217041, + "learning_rate": 9.728734300073752e-05, + "loss": 5.14, + "step": 123250 + }, + { + "epoch": 0.5307311865909669, + "grad_norm": 5.794431686401367, + "learning_rate": 9.7285143892574e-05, + "loss": 4.5742, + "step": 123300 + }, + { + "epoch": 0.5309464060502495, + "grad_norm": 1.7299740314483643, + "learning_rate": 9.728294391825552e-05, + "loss": 4.9672, + "step": 123350 + }, + { + "epoch": 0.531161625509532, + "grad_norm": 1.4219121932983398, + "learning_rate": 9.72807430778224e-05, + "loss": 5.0834, + "step": 123400 + }, + { + "epoch": 0.5313768449688147, + "grad_norm": 2.349823474884033, + "learning_rate": 9.727854137131491e-05, + "loss": 4.7099, + "step": 123450 + }, + { + "epoch": 0.5315920644280974, + "grad_norm": 2.256392240524292, + "learning_rate": 9.727633879877344e-05, + "loss": 4.548, + "step": 123500 + }, + { + "epoch": 0.53180728388738, + "grad_norm": 3.261540174484253, + "learning_rate": 9.727413536023828e-05, + "loss": 5.0619, + "step": 123550 + }, + { + "epoch": 0.5320225033466626, + "grad_norm": 1.7902581691741943, + "learning_rate": 9.727193105574983e-05, + "loss": 5.0503, + "step": 123600 + }, + { + "epoch": 0.5322377228059452, + "grad_norm": 1.8719955682754517, + "learning_rate": 9.726972588534845e-05, + "loss": 5.0986, + "step": 123650 + }, + { + "epoch": 0.5324529422652279, + "grad_norm": 2.567262887954712, + "learning_rate": 9.726751984907454e-05, + "loss": 5.0159, + "step": 123700 + }, + { + "epoch": 0.5326681617245105, + "grad_norm": 2.5816195011138916, + "learning_rate": 9.72653129469685e-05, + "loss": 4.7832, + "step": 123750 + }, + { + "epoch": 0.5328833811837931, + "grad_norm": 1.9633246660232544, + "learning_rate": 9.726310517907077e-05, + "loss": 4.9778, + "step": 123800 + }, + { + "epoch": 0.5330986006430758, + "grad_norm": 4.04349422454834, + "learning_rate": 9.726089654542179e-05, + "loss": 4.7423, + "step": 123850 + }, + { + "epoch": 0.5333138201023584, + "grad_norm": 1.6754952669143677, + "learning_rate": 9.7258687046062e-05, + "loss": 4.7544, + "step": 123900 + }, + { + "epoch": 0.533529039561641, + "grad_norm": 2.926621198654175, + "learning_rate": 9.725647668103188e-05, + "loss": 4.7117, + "step": 123950 + }, + { + "epoch": 0.5337442590209236, + "grad_norm": 1.7496135234832764, + "learning_rate": 9.725426545037193e-05, + "loss": 4.8428, + "step": 124000 + }, + { + "epoch": 0.5339594784802063, + "grad_norm": 1.1248406171798706, + "learning_rate": 9.725205335412264e-05, + "loss": 4.8595, + "step": 124050 + }, + { + "epoch": 0.5341746979394889, + "grad_norm": 0.8906363844871521, + "learning_rate": 9.724984039232455e-05, + "loss": 5.1021, + "step": 124100 + }, + { + "epoch": 0.5343899173987715, + "grad_norm": 2.106297492980957, + "learning_rate": 9.724762656501818e-05, + "loss": 4.6384, + "step": 124150 + }, + { + "epoch": 0.5346051368580541, + "grad_norm": 1.7026995420455933, + "learning_rate": 9.72454118722441e-05, + "loss": 4.9259, + "step": 124200 + }, + { + "epoch": 0.5348203563173368, + "grad_norm": 3.221714735031128, + "learning_rate": 9.724319631404284e-05, + "loss": 5.0437, + "step": 124250 + }, + { + "epoch": 0.5350355757766194, + "grad_norm": 2.510545253753662, + "learning_rate": 9.724097989045502e-05, + "loss": 4.9178, + "step": 124300 + }, + { + "epoch": 0.535250795235902, + "grad_norm": 1.9899482727050781, + "learning_rate": 9.723876260152123e-05, + "loss": 5.1921, + "step": 124350 + }, + { + "epoch": 0.5354660146951847, + "grad_norm": 2.213951349258423, + "learning_rate": 9.723654444728207e-05, + "loss": 5.135, + "step": 124400 + }, + { + "epoch": 0.5356812341544673, + "grad_norm": 2.8186042308807373, + "learning_rate": 9.72343254277782e-05, + "loss": 5.0366, + "step": 124450 + }, + { + "epoch": 0.5358964536137499, + "grad_norm": 1.6994200944900513, + "learning_rate": 9.723210554305026e-05, + "loss": 4.9077, + "step": 124500 + }, + { + "epoch": 0.5361116730730325, + "grad_norm": 0.763529360294342, + "learning_rate": 9.722988479313889e-05, + "loss": 5.02, + "step": 124550 + }, + { + "epoch": 0.5363268925323152, + "grad_norm": 1.9500163793563843, + "learning_rate": 9.722766317808479e-05, + "loss": 4.9657, + "step": 124600 + }, + { + "epoch": 0.5365421119915978, + "grad_norm": 1.7281832695007324, + "learning_rate": 9.722544069792865e-05, + "loss": 4.8953, + "step": 124650 + }, + { + "epoch": 0.5367573314508804, + "grad_norm": 2.1841158866882324, + "learning_rate": 9.722321735271119e-05, + "loss": 4.9337, + "step": 124700 + }, + { + "epoch": 0.536972550910163, + "grad_norm": 3.0073513984680176, + "learning_rate": 9.722099314247311e-05, + "loss": 4.9917, + "step": 124750 + }, + { + "epoch": 0.5371877703694458, + "grad_norm": 2.779168128967285, + "learning_rate": 9.721876806725519e-05, + "loss": 5.0755, + "step": 124800 + }, + { + "epoch": 0.5374029898287284, + "grad_norm": 3.1692166328430176, + "learning_rate": 9.721654212709815e-05, + "loss": 4.5103, + "step": 124850 + }, + { + "epoch": 0.537618209288011, + "grad_norm": 3.06379771232605, + "learning_rate": 9.721431532204278e-05, + "loss": 4.7066, + "step": 124900 + }, + { + "epoch": 0.5378334287472936, + "grad_norm": 3.504709243774414, + "learning_rate": 9.721208765212987e-05, + "loss": 5.1579, + "step": 124950 + }, + { + "epoch": 0.5380486482065763, + "grad_norm": 2.8213260173797607, + "learning_rate": 9.720985911740025e-05, + "loss": 5.2899, + "step": 125000 + }, + { + "epoch": 0.5382638676658589, + "grad_norm": 1.025315284729004, + "learning_rate": 9.72076297178947e-05, + "loss": 4.4643, + "step": 125050 + }, + { + "epoch": 0.5384790871251415, + "grad_norm": 3.0031774044036865, + "learning_rate": 9.720539945365409e-05, + "loss": 5.5037, + "step": 125100 + }, + { + "epoch": 0.5386943065844242, + "grad_norm": 4.446894645690918, + "learning_rate": 9.720316832471925e-05, + "loss": 5.3472, + "step": 125150 + }, + { + "epoch": 0.5389095260437068, + "grad_norm": 1.4484431743621826, + "learning_rate": 9.720093633113107e-05, + "loss": 4.8477, + "step": 125200 + }, + { + "epoch": 0.5391247455029894, + "grad_norm": 0.6812838315963745, + "learning_rate": 9.719870347293041e-05, + "loss": 4.8429, + "step": 125250 + }, + { + "epoch": 0.539339964962272, + "grad_norm": 3.8786561489105225, + "learning_rate": 9.719646975015819e-05, + "loss": 5.1617, + "step": 125300 + }, + { + "epoch": 0.5395551844215547, + "grad_norm": 1.7482633590698242, + "learning_rate": 9.719423516285533e-05, + "loss": 5.3927, + "step": 125350 + }, + { + "epoch": 0.5397704038808373, + "grad_norm": 3.9857053756713867, + "learning_rate": 9.719199971106274e-05, + "loss": 4.4537, + "step": 125400 + }, + { + "epoch": 0.5399856233401199, + "grad_norm": 0.5762657523155212, + "learning_rate": 9.71897633948214e-05, + "loss": 5.0572, + "step": 125450 + }, + { + "epoch": 0.5402008427994025, + "grad_norm": 1.8341072797775269, + "learning_rate": 9.718752621417224e-05, + "loss": 5.0947, + "step": 125500 + }, + { + "epoch": 0.5404160622586852, + "grad_norm": 3.109468460083008, + "learning_rate": 9.718528816915626e-05, + "loss": 4.8243, + "step": 125550 + }, + { + "epoch": 0.5406312817179678, + "grad_norm": 2.856895923614502, + "learning_rate": 9.718304925981446e-05, + "loss": 4.5723, + "step": 125600 + }, + { + "epoch": 0.5408465011772504, + "grad_norm": 2.129800796508789, + "learning_rate": 9.718080948618784e-05, + "loss": 4.9891, + "step": 125650 + }, + { + "epoch": 0.5410617206365331, + "grad_norm": 3.594459295272827, + "learning_rate": 9.717856884831742e-05, + "loss": 5.044, + "step": 125700 + }, + { + "epoch": 0.5412769400958157, + "grad_norm": 2.3100948333740234, + "learning_rate": 9.717632734624426e-05, + "loss": 4.6695, + "step": 125750 + }, + { + "epoch": 0.5414921595550983, + "grad_norm": 0.8006340265274048, + "learning_rate": 9.717408498000943e-05, + "loss": 4.7454, + "step": 125800 + }, + { + "epoch": 0.5417073790143809, + "grad_norm": 3.175206422805786, + "learning_rate": 9.717184174965397e-05, + "loss": 4.8804, + "step": 125850 + }, + { + "epoch": 0.5419225984736636, + "grad_norm": 1.1306846141815186, + "learning_rate": 9.7169597655219e-05, + "loss": 5.0379, + "step": 125900 + }, + { + "epoch": 0.5421378179329462, + "grad_norm": 2.9317028522491455, + "learning_rate": 9.716735269674562e-05, + "loss": 4.7029, + "step": 125950 + }, + { + "epoch": 0.5423530373922288, + "grad_norm": 1.8526374101638794, + "learning_rate": 9.716510687427494e-05, + "loss": 5.1984, + "step": 126000 + }, + { + "epoch": 0.5423530373922288, + "eval_loss": 5.355620384216309, + "eval_runtime": 34.7687, + "eval_samples_per_second": 18.407, + "eval_steps_per_second": 9.204, + "eval_tts_loss": 6.705765888810959, + "step": 126000 + }, + { + "epoch": 0.5425682568515114, + "grad_norm": 2.7676072120666504, + "learning_rate": 9.716286018784812e-05, + "loss": 4.8712, + "step": 126050 + }, + { + "epoch": 0.5427834763107942, + "grad_norm": 2.780696392059326, + "learning_rate": 9.716061263750629e-05, + "loss": 4.709, + "step": 126100 + }, + { + "epoch": 0.5429986957700768, + "grad_norm": 3.3426220417022705, + "learning_rate": 9.715836422329063e-05, + "loss": 4.7528, + "step": 126150 + }, + { + "epoch": 0.5432139152293594, + "grad_norm": 2.7046868801116943, + "learning_rate": 9.715611494524233e-05, + "loss": 5.2901, + "step": 126200 + }, + { + "epoch": 0.543429134688642, + "grad_norm": 0.8238584399223328, + "learning_rate": 9.715386480340259e-05, + "loss": 5.1331, + "step": 126250 + }, + { + "epoch": 0.5436443541479247, + "grad_norm": 1.9602450132369995, + "learning_rate": 9.715161379781263e-05, + "loss": 4.9205, + "step": 126300 + }, + { + "epoch": 0.5438595736072073, + "grad_norm": 1.8051942586898804, + "learning_rate": 9.714936192851368e-05, + "loss": 5.0735, + "step": 126350 + }, + { + "epoch": 0.5440747930664899, + "grad_norm": 3.2136600017547607, + "learning_rate": 9.714710919554697e-05, + "loss": 4.9347, + "step": 126400 + }, + { + "epoch": 0.5442900125257726, + "grad_norm": 1.6839247941970825, + "learning_rate": 9.71448555989538e-05, + "loss": 4.8837, + "step": 126450 + }, + { + "epoch": 0.5445052319850552, + "grad_norm": 1.3821263313293457, + "learning_rate": 9.714260113877544e-05, + "loss": 4.3427, + "step": 126500 + }, + { + "epoch": 0.5447204514443378, + "grad_norm": 4.668375492095947, + "learning_rate": 9.714034581505317e-05, + "loss": 5.0729, + "step": 126550 + }, + { + "epoch": 0.5449356709036204, + "grad_norm": 0.7709909677505493, + "learning_rate": 9.713808962782832e-05, + "loss": 4.6443, + "step": 126600 + }, + { + "epoch": 0.5451508903629031, + "grad_norm": 1.1823052167892456, + "learning_rate": 9.713583257714221e-05, + "loss": 5.2336, + "step": 126650 + }, + { + "epoch": 0.5453661098221857, + "grad_norm": 0.6696082353591919, + "learning_rate": 9.713357466303616e-05, + "loss": 4.3182, + "step": 126700 + }, + { + "epoch": 0.5455813292814683, + "grad_norm": 2.305453062057495, + "learning_rate": 9.713131588555158e-05, + "loss": 4.9178, + "step": 126750 + }, + { + "epoch": 0.5457965487407509, + "grad_norm": 2.612248182296753, + "learning_rate": 9.712905624472982e-05, + "loss": 4.873, + "step": 126800 + }, + { + "epoch": 0.5460117682000336, + "grad_norm": 2.537482976913452, + "learning_rate": 9.712679574061227e-05, + "loss": 4.3481, + "step": 126850 + }, + { + "epoch": 0.5462269876593162, + "grad_norm": 0.9703443646430969, + "learning_rate": 9.712453437324033e-05, + "loss": 4.9934, + "step": 126900 + }, + { + "epoch": 0.5464422071185988, + "grad_norm": 1.7809431552886963, + "learning_rate": 9.712227214265544e-05, + "loss": 4.6735, + "step": 126950 + }, + { + "epoch": 0.5466574265778814, + "grad_norm": 2.0490775108337402, + "learning_rate": 9.712000904889903e-05, + "loss": 5.3208, + "step": 127000 + }, + { + "epoch": 0.5468726460371641, + "grad_norm": 3.84274959564209, + "learning_rate": 9.711774509201256e-05, + "loss": 5.0183, + "step": 127050 + }, + { + "epoch": 0.5470878654964467, + "grad_norm": 1.6673637628555298, + "learning_rate": 9.711548027203749e-05, + "loss": 4.7054, + "step": 127100 + }, + { + "epoch": 0.5473030849557293, + "grad_norm": 0.21418127417564392, + "learning_rate": 9.711321458901532e-05, + "loss": 4.9607, + "step": 127150 + }, + { + "epoch": 0.547518304415012, + "grad_norm": 2.599499225616455, + "learning_rate": 9.711094804298752e-05, + "loss": 5.1979, + "step": 127200 + }, + { + "epoch": 0.5477335238742946, + "grad_norm": 3.4633469581604004, + "learning_rate": 9.710868063399565e-05, + "loss": 5.0209, + "step": 127250 + }, + { + "epoch": 0.5479487433335772, + "grad_norm": 2.688302516937256, + "learning_rate": 9.710641236208125e-05, + "loss": 4.9241, + "step": 127300 + }, + { + "epoch": 0.5481639627928598, + "grad_norm": 1.9390418529510498, + "learning_rate": 9.710414322728581e-05, + "loss": 4.5426, + "step": 127350 + }, + { + "epoch": 0.5483791822521425, + "grad_norm": 0.5529974699020386, + "learning_rate": 9.710187322965095e-05, + "loss": 4.9869, + "step": 127400 + }, + { + "epoch": 0.5485944017114252, + "grad_norm": 3.348822593688965, + "learning_rate": 9.709960236921823e-05, + "loss": 4.6483, + "step": 127450 + }, + { + "epoch": 0.5488096211707078, + "grad_norm": 2.7613565921783447, + "learning_rate": 9.709733064602924e-05, + "loss": 4.7787, + "step": 127500 + }, + { + "epoch": 0.5490248406299904, + "grad_norm": 3.641036033630371, + "learning_rate": 9.70950580601256e-05, + "loss": 5.4307, + "step": 127550 + }, + { + "epoch": 0.5492400600892731, + "grad_norm": 2.8566131591796875, + "learning_rate": 9.709278461154893e-05, + "loss": 4.5604, + "step": 127600 + }, + { + "epoch": 0.5494552795485557, + "grad_norm": 2.8845746517181396, + "learning_rate": 9.70905103003409e-05, + "loss": 5.283, + "step": 127650 + }, + { + "epoch": 0.5496704990078383, + "grad_norm": 1.589932918548584, + "learning_rate": 9.708823512654317e-05, + "loss": 4.7719, + "step": 127700 + }, + { + "epoch": 0.549885718467121, + "grad_norm": 1.2555547952651978, + "learning_rate": 9.708595909019738e-05, + "loss": 4.9188, + "step": 127750 + }, + { + "epoch": 0.5501009379264036, + "grad_norm": 2.6427364349365234, + "learning_rate": 9.708368219134525e-05, + "loss": 5.2191, + "step": 127800 + }, + { + "epoch": 0.5503161573856862, + "grad_norm": 4.361861228942871, + "learning_rate": 9.708140443002849e-05, + "loss": 5.0029, + "step": 127850 + }, + { + "epoch": 0.5505313768449688, + "grad_norm": 2.0424675941467285, + "learning_rate": 9.707912580628879e-05, + "loss": 5.1969, + "step": 127900 + }, + { + "epoch": 0.5507465963042515, + "grad_norm": 2.1595547199249268, + "learning_rate": 9.707684632016794e-05, + "loss": 5.0232, + "step": 127950 + }, + { + "epoch": 0.5509618157635341, + "grad_norm": 3.3656911849975586, + "learning_rate": 9.707456597170764e-05, + "loss": 4.7466, + "step": 128000 + }, + { + "epoch": 0.5511770352228167, + "grad_norm": 1.8441309928894043, + "learning_rate": 9.707228476094969e-05, + "loss": 4.8526, + "step": 128050 + }, + { + "epoch": 0.5513922546820993, + "grad_norm": 2.339371681213379, + "learning_rate": 9.70700026879359e-05, + "loss": 4.6514, + "step": 128100 + }, + { + "epoch": 0.551607474141382, + "grad_norm": 1.0086708068847656, + "learning_rate": 9.706771975270803e-05, + "loss": 5.0069, + "step": 128150 + }, + { + "epoch": 0.5518226936006646, + "grad_norm": 3.573784828186035, + "learning_rate": 9.70654359553079e-05, + "loss": 5.0904, + "step": 128200 + }, + { + "epoch": 0.5520379130599472, + "grad_norm": 3.2334749698638916, + "learning_rate": 9.706315129577738e-05, + "loss": 4.7328, + "step": 128250 + }, + { + "epoch": 0.5522531325192298, + "grad_norm": 1.6294689178466797, + "learning_rate": 9.70608657741583e-05, + "loss": 4.6294, + "step": 128300 + }, + { + "epoch": 0.5524683519785125, + "grad_norm": 2.135917901992798, + "learning_rate": 9.705857939049252e-05, + "loss": 5.3512, + "step": 128350 + }, + { + "epoch": 0.5526835714377951, + "grad_norm": 2.2830910682678223, + "learning_rate": 9.705629214482192e-05, + "loss": 4.7524, + "step": 128400 + }, + { + "epoch": 0.5528987908970777, + "grad_norm": 3.1648857593536377, + "learning_rate": 9.705400403718841e-05, + "loss": 5.0094, + "step": 128450 + }, + { + "epoch": 0.5531140103563604, + "grad_norm": 1.829702615737915, + "learning_rate": 9.705171506763389e-05, + "loss": 5.2741, + "step": 128500 + }, + { + "epoch": 0.553329229815643, + "grad_norm": 1.0462944507598877, + "learning_rate": 9.70494252362003e-05, + "loss": 4.2138, + "step": 128550 + }, + { + "epoch": 0.5535444492749256, + "grad_norm": 2.5871403217315674, + "learning_rate": 9.704713454292958e-05, + "loss": 5.2878, + "step": 128600 + }, + { + "epoch": 0.5537596687342082, + "grad_norm": 2.6285006999969482, + "learning_rate": 9.704484298786367e-05, + "loss": 4.6624, + "step": 128650 + }, + { + "epoch": 0.5539748881934909, + "grad_norm": 1.7678900957107544, + "learning_rate": 9.704255057104459e-05, + "loss": 5.3471, + "step": 128700 + }, + { + "epoch": 0.5541901076527735, + "grad_norm": 2.200467109680176, + "learning_rate": 9.704025729251429e-05, + "loss": 4.678, + "step": 128750 + }, + { + "epoch": 0.5544053271120561, + "grad_norm": 2.127563238143921, + "learning_rate": 9.703796315231479e-05, + "loss": 5.0611, + "step": 128800 + }, + { + "epoch": 0.5546205465713387, + "grad_norm": 2.621483325958252, + "learning_rate": 9.703566815048814e-05, + "loss": 4.6554, + "step": 128850 + }, + { + "epoch": 0.5548357660306215, + "grad_norm": 2.312119722366333, + "learning_rate": 9.703337228707635e-05, + "loss": 5.306, + "step": 128900 + }, + { + "epoch": 0.5550509854899041, + "grad_norm": 2.7283852100372314, + "learning_rate": 9.703107556212146e-05, + "loss": 4.9579, + "step": 128950 + }, + { + "epoch": 0.5552662049491867, + "grad_norm": 3.2012126445770264, + "learning_rate": 9.702877797566557e-05, + "loss": 4.2113, + "step": 129000 + }, + { + "epoch": 0.5552662049491867, + "eval_loss": 5.347733974456787, + "eval_runtime": 34.7963, + "eval_samples_per_second": 18.393, + "eval_steps_per_second": 9.196, + "eval_tts_loss": 6.687486809834342, + "step": 129000 + }, + { + "epoch": 0.5554814244084694, + "grad_norm": 1.505682349205017, + "learning_rate": 9.702647952775078e-05, + "loss": 5.0241, + "step": 129050 + }, + { + "epoch": 0.555696643867752, + "grad_norm": 1.3901264667510986, + "learning_rate": 9.702418021841915e-05, + "loss": 4.7343, + "step": 129100 + }, + { + "epoch": 0.5559118633270346, + "grad_norm": 2.1173675060272217, + "learning_rate": 9.702188004771282e-05, + "loss": 5.0733, + "step": 129150 + }, + { + "epoch": 0.5561270827863172, + "grad_norm": 0.9307823181152344, + "learning_rate": 9.701957901567394e-05, + "loss": 4.8139, + "step": 129200 + }, + { + "epoch": 0.5563423022455999, + "grad_norm": 2.4076812267303467, + "learning_rate": 9.701727712234463e-05, + "loss": 5.0126, + "step": 129250 + }, + { + "epoch": 0.5565575217048825, + "grad_norm": 0.7499299645423889, + "learning_rate": 9.701497436776707e-05, + "loss": 5.0731, + "step": 129300 + }, + { + "epoch": 0.5567727411641651, + "grad_norm": 2.304159641265869, + "learning_rate": 9.701267075198344e-05, + "loss": 5.201, + "step": 129350 + }, + { + "epoch": 0.5569879606234477, + "grad_norm": 1.6597014665603638, + "learning_rate": 9.701036627503593e-05, + "loss": 4.5575, + "step": 129400 + }, + { + "epoch": 0.5572031800827304, + "grad_norm": 0.6023521423339844, + "learning_rate": 9.700806093696677e-05, + "loss": 5.3592, + "step": 129450 + }, + { + "epoch": 0.557418399542013, + "grad_norm": 2.524160385131836, + "learning_rate": 9.700575473781817e-05, + "loss": 4.4991, + "step": 129500 + }, + { + "epoch": 0.5576336190012956, + "grad_norm": 3.091538429260254, + "learning_rate": 9.700344767763237e-05, + "loss": 4.6629, + "step": 129550 + }, + { + "epoch": 0.5578488384605782, + "grad_norm": 1.3581780195236206, + "learning_rate": 9.700113975645165e-05, + "loss": 4.8274, + "step": 129600 + }, + { + "epoch": 0.5580640579198609, + "grad_norm": 2.4774131774902344, + "learning_rate": 9.699883097431828e-05, + "loss": 5.216, + "step": 129650 + }, + { + "epoch": 0.5582792773791435, + "grad_norm": 2.468528985977173, + "learning_rate": 9.699652133127455e-05, + "loss": 5.2229, + "step": 129700 + }, + { + "epoch": 0.5584944968384261, + "grad_norm": 2.3694021701812744, + "learning_rate": 9.699421082736277e-05, + "loss": 4.7588, + "step": 129750 + }, + { + "epoch": 0.5587097162977088, + "grad_norm": 1.942644715309143, + "learning_rate": 9.699189946262525e-05, + "loss": 4.7319, + "step": 129800 + }, + { + "epoch": 0.5589249357569914, + "grad_norm": 3.8598368167877197, + "learning_rate": 9.698958723710434e-05, + "loss": 4.9824, + "step": 129850 + }, + { + "epoch": 0.559140155216274, + "grad_norm": 4.044145584106445, + "learning_rate": 9.698727415084239e-05, + "loss": 5.2786, + "step": 129900 + }, + { + "epoch": 0.5593553746755566, + "grad_norm": 2.473392963409424, + "learning_rate": 9.69849602038818e-05, + "loss": 5.0776, + "step": 129950 + }, + { + "epoch": 0.5595705941348393, + "grad_norm": 1.8372315168380737, + "learning_rate": 9.698264539626489e-05, + "loss": 4.6827, + "step": 130000 + }, + { + "epoch": 0.5597858135941219, + "grad_norm": 0.6409121751785278, + "learning_rate": 9.698032972803413e-05, + "loss": 4.0037, + "step": 130050 + }, + { + "epoch": 0.5600010330534045, + "grad_norm": 3.2523889541625977, + "learning_rate": 9.69780131992319e-05, + "loss": 4.9642, + "step": 130100 + }, + { + "epoch": 0.5602162525126871, + "grad_norm": 2.0752956867218018, + "learning_rate": 9.697569580990065e-05, + "loss": 5.6078, + "step": 130150 + }, + { + "epoch": 0.5604314719719699, + "grad_norm": 2.0073561668395996, + "learning_rate": 9.69733775600828e-05, + "loss": 4.8251, + "step": 130200 + }, + { + "epoch": 0.5606466914312525, + "grad_norm": 2.7323553562164307, + "learning_rate": 9.697105844982086e-05, + "loss": 4.6503, + "step": 130250 + }, + { + "epoch": 0.560861910890535, + "grad_norm": 0.9506784081459045, + "learning_rate": 9.696873847915727e-05, + "loss": 4.4855, + "step": 130300 + }, + { + "epoch": 0.5610771303498177, + "grad_norm": 2.4860222339630127, + "learning_rate": 9.696641764813456e-05, + "loss": 5.0279, + "step": 130350 + }, + { + "epoch": 0.5612923498091004, + "grad_norm": 1.9698433876037598, + "learning_rate": 9.69640959567952e-05, + "loss": 4.5695, + "step": 130400 + }, + { + "epoch": 0.561507569268383, + "grad_norm": 1.9023594856262207, + "learning_rate": 9.696177340518177e-05, + "loss": 5.0678, + "step": 130450 + }, + { + "epoch": 0.5617227887276656, + "grad_norm": 0.6357467770576477, + "learning_rate": 9.695944999333679e-05, + "loss": 4.8916, + "step": 130500 + }, + { + "epoch": 0.5619380081869483, + "grad_norm": 1.9951235055923462, + "learning_rate": 9.695712572130281e-05, + "loss": 4.8219, + "step": 130550 + }, + { + "epoch": 0.5621532276462309, + "grad_norm": 0.6118124723434448, + "learning_rate": 9.69548005891224e-05, + "loss": 5.037, + "step": 130600 + }, + { + "epoch": 0.5623684471055135, + "grad_norm": 0.7312654256820679, + "learning_rate": 9.695247459683818e-05, + "loss": 4.4842, + "step": 130650 + }, + { + "epoch": 0.5625836665647961, + "grad_norm": 2.004077434539795, + "learning_rate": 9.695014774449275e-05, + "loss": 4.9906, + "step": 130700 + }, + { + "epoch": 0.5627988860240788, + "grad_norm": 2.19923734664917, + "learning_rate": 9.69478200321287e-05, + "loss": 5.1667, + "step": 130750 + }, + { + "epoch": 0.5630141054833614, + "grad_norm": 1.4821745157241821, + "learning_rate": 9.69454914597887e-05, + "loss": 4.6597, + "step": 130800 + }, + { + "epoch": 0.563229324942644, + "grad_norm": 1.1734143495559692, + "learning_rate": 9.694316202751541e-05, + "loss": 4.7393, + "step": 130850 + }, + { + "epoch": 0.5634445444019266, + "grad_norm": 3.0615878105163574, + "learning_rate": 9.694083173535147e-05, + "loss": 4.8377, + "step": 130900 + }, + { + "epoch": 0.5636597638612093, + "grad_norm": 1.9319593906402588, + "learning_rate": 9.693850058333959e-05, + "loss": 4.6083, + "step": 130950 + }, + { + "epoch": 0.5638749833204919, + "grad_norm": 2.2542989253997803, + "learning_rate": 9.693616857152246e-05, + "loss": 5.1667, + "step": 131000 + }, + { + "epoch": 0.5640902027797745, + "grad_norm": 1.270866870880127, + "learning_rate": 9.693383569994279e-05, + "loss": 4.5674, + "step": 131050 + }, + { + "epoch": 0.5643054222390572, + "grad_norm": 3.8727915287017822, + "learning_rate": 9.693150196864334e-05, + "loss": 5.0311, + "step": 131100 + }, + { + "epoch": 0.5645206416983398, + "grad_norm": 0.9435884356498718, + "learning_rate": 9.692916737766683e-05, + "loss": 4.234, + "step": 131150 + }, + { + "epoch": 0.5647358611576224, + "grad_norm": 1.333231806755066, + "learning_rate": 9.692683192705604e-05, + "loss": 5.0891, + "step": 131200 + }, + { + "epoch": 0.564951080616905, + "grad_norm": 2.709131956100464, + "learning_rate": 9.692449561685373e-05, + "loss": 4.6821, + "step": 131250 + }, + { + "epoch": 0.5651663000761877, + "grad_norm": 2.496527671813965, + "learning_rate": 9.692215844710273e-05, + "loss": 4.6593, + "step": 131300 + }, + { + "epoch": 0.5653815195354703, + "grad_norm": 2.4022700786590576, + "learning_rate": 9.691982041784584e-05, + "loss": 4.9699, + "step": 131350 + }, + { + "epoch": 0.5655967389947529, + "grad_norm": 1.739414930343628, + "learning_rate": 9.691748152912587e-05, + "loss": 4.8365, + "step": 131400 + }, + { + "epoch": 0.5658119584540355, + "grad_norm": 4.637904167175293, + "learning_rate": 9.691514178098566e-05, + "loss": 4.8018, + "step": 131450 + }, + { + "epoch": 0.5660271779133182, + "grad_norm": 1.8399211168289185, + "learning_rate": 9.69128011734681e-05, + "loss": 4.3457, + "step": 131500 + }, + { + "epoch": 0.5662423973726008, + "grad_norm": 0.7757171988487244, + "learning_rate": 9.691045970661604e-05, + "loss": 4.823, + "step": 131550 + }, + { + "epoch": 0.5664576168318834, + "grad_norm": 0.9457610845565796, + "learning_rate": 9.690811738047238e-05, + "loss": 4.7168, + "step": 131600 + }, + { + "epoch": 0.566672836291166, + "grad_norm": 5.5345282554626465, + "learning_rate": 9.690577419508003e-05, + "loss": 4.4689, + "step": 131650 + }, + { + "epoch": 0.5668880557504488, + "grad_norm": 0.6747812032699585, + "learning_rate": 9.690343015048189e-05, + "loss": 4.8158, + "step": 131700 + }, + { + "epoch": 0.5671032752097314, + "grad_norm": 1.4698585271835327, + "learning_rate": 9.690108524672091e-05, + "loss": 5.2259, + "step": 131750 + }, + { + "epoch": 0.567318494669014, + "grad_norm": 1.4821953773498535, + "learning_rate": 9.689873948384005e-05, + "loss": 4.9037, + "step": 131800 + }, + { + "epoch": 0.5675337141282967, + "grad_norm": 2.988558292388916, + "learning_rate": 9.689639286188228e-05, + "loss": 4.8887, + "step": 131850 + }, + { + "epoch": 0.5677489335875793, + "grad_norm": 2.5244381427764893, + "learning_rate": 9.689404538089057e-05, + "loss": 4.5167, + "step": 131900 + }, + { + "epoch": 0.5679641530468619, + "grad_norm": 2.0022971630096436, + "learning_rate": 9.689169704090795e-05, + "loss": 4.9465, + "step": 131950 + }, + { + "epoch": 0.5681793725061445, + "grad_norm": 2.6848387718200684, + "learning_rate": 9.68893478419774e-05, + "loss": 4.9368, + "step": 132000 + }, + { + "epoch": 0.5681793725061445, + "eval_loss": 5.337172031402588, + "eval_runtime": 34.8255, + "eval_samples_per_second": 18.377, + "eval_steps_per_second": 9.189, + "eval_tts_loss": 6.648752665402546, + "step": 132000 + }, + { + "epoch": 0.5683945919654272, + "grad_norm": 1.4832277297973633, + "learning_rate": 9.688699778414196e-05, + "loss": 4.9561, + "step": 132050 + }, + { + "epoch": 0.5686098114247098, + "grad_norm": 4.884256839752197, + "learning_rate": 9.68846468674447e-05, + "loss": 5.52, + "step": 132100 + }, + { + "epoch": 0.5688250308839924, + "grad_norm": 0.6973890662193298, + "learning_rate": 9.688229509192867e-05, + "loss": 5.0109, + "step": 132150 + }, + { + "epoch": 0.569040250343275, + "grad_norm": 2.1739461421966553, + "learning_rate": 9.687994245763693e-05, + "loss": 5.2209, + "step": 132200 + }, + { + "epoch": 0.5692554698025577, + "grad_norm": 2.6692097187042236, + "learning_rate": 9.687758896461262e-05, + "loss": 4.8467, + "step": 132250 + }, + { + "epoch": 0.5694706892618403, + "grad_norm": 1.8820005655288696, + "learning_rate": 9.687523461289881e-05, + "loss": 4.831, + "step": 132300 + }, + { + "epoch": 0.5696859087211229, + "grad_norm": 1.6660008430480957, + "learning_rate": 9.687287940253864e-05, + "loss": 4.7005, + "step": 132350 + }, + { + "epoch": 0.5699011281804056, + "grad_norm": 1.7978891134262085, + "learning_rate": 9.687052333357525e-05, + "loss": 4.9738, + "step": 132400 + }, + { + "epoch": 0.5701163476396882, + "grad_norm": 1.5957428216934204, + "learning_rate": 9.68681664060518e-05, + "loss": 4.8994, + "step": 132450 + }, + { + "epoch": 0.5703315670989708, + "grad_norm": 2.8161098957061768, + "learning_rate": 9.686580862001146e-05, + "loss": 5.1348, + "step": 132500 + }, + { + "epoch": 0.5705467865582534, + "grad_norm": 1.7098777294158936, + "learning_rate": 9.686344997549744e-05, + "loss": 4.8465, + "step": 132550 + }, + { + "epoch": 0.5707620060175361, + "grad_norm": 3.6457414627075195, + "learning_rate": 9.686109047255289e-05, + "loss": 4.8926, + "step": 132600 + }, + { + "epoch": 0.5709772254768187, + "grad_norm": 1.981307864189148, + "learning_rate": 9.68587301112211e-05, + "loss": 4.9458, + "step": 132650 + }, + { + "epoch": 0.5711924449361013, + "grad_norm": 2.527451276779175, + "learning_rate": 9.685636889154526e-05, + "loss": 4.8782, + "step": 132700 + }, + { + "epoch": 0.5714076643953839, + "grad_norm": 4.155847549438477, + "learning_rate": 9.685400681356865e-05, + "loss": 5.032, + "step": 132750 + }, + { + "epoch": 0.5716228838546666, + "grad_norm": 1.8260301351547241, + "learning_rate": 9.685164387733449e-05, + "loss": 4.6125, + "step": 132800 + }, + { + "epoch": 0.5718381033139492, + "grad_norm": 2.8804163932800293, + "learning_rate": 9.684928008288614e-05, + "loss": 4.7869, + "step": 132850 + }, + { + "epoch": 0.5720533227732318, + "grad_norm": 2.2473936080932617, + "learning_rate": 9.684691543026683e-05, + "loss": 4.7683, + "step": 132900 + }, + { + "epoch": 0.5722685422325144, + "grad_norm": 1.379428744316101, + "learning_rate": 9.684454991951991e-05, + "loss": 4.7862, + "step": 132950 + }, + { + "epoch": 0.5724837616917972, + "grad_norm": 3.112919807434082, + "learning_rate": 9.68421835506887e-05, + "loss": 4.5669, + "step": 133000 + }, + { + "epoch": 0.5726989811510798, + "grad_norm": 2.9040777683258057, + "learning_rate": 9.683981632381655e-05, + "loss": 4.8791, + "step": 133050 + }, + { + "epoch": 0.5729142006103624, + "grad_norm": 1.9083874225616455, + "learning_rate": 9.683744823894682e-05, + "loss": 5.0776, + "step": 133100 + }, + { + "epoch": 0.5731294200696451, + "grad_norm": 1.2252275943756104, + "learning_rate": 9.68350792961229e-05, + "loss": 4.3765, + "step": 133150 + }, + { + "epoch": 0.5733446395289277, + "grad_norm": 2.46844744682312, + "learning_rate": 9.683270949538816e-05, + "loss": 5.078, + "step": 133200 + }, + { + "epoch": 0.5735598589882103, + "grad_norm": 2.050983190536499, + "learning_rate": 9.683033883678603e-05, + "loss": 5.0218, + "step": 133250 + }, + { + "epoch": 0.5737750784474929, + "grad_norm": 2.518181562423706, + "learning_rate": 9.682796732035992e-05, + "loss": 4.8085, + "step": 133300 + }, + { + "epoch": 0.5739902979067756, + "grad_norm": 2.371340036392212, + "learning_rate": 9.682559494615329e-05, + "loss": 5.2122, + "step": 133350 + }, + { + "epoch": 0.5742055173660582, + "grad_norm": 2.552292823791504, + "learning_rate": 9.682322171420956e-05, + "loss": 4.1977, + "step": 133400 + }, + { + "epoch": 0.5744207368253408, + "grad_norm": 1.9413158893585205, + "learning_rate": 9.682084762457225e-05, + "loss": 4.9206, + "step": 133450 + }, + { + "epoch": 0.5746359562846234, + "grad_norm": 1.9089080095291138, + "learning_rate": 9.681847267728481e-05, + "loss": 4.7687, + "step": 133500 + }, + { + "epoch": 0.5748511757439061, + "grad_norm": 2.6296920776367188, + "learning_rate": 9.681609687239077e-05, + "loss": 5.1362, + "step": 133550 + }, + { + "epoch": 0.5750663952031887, + "grad_norm": 2.9168508052825928, + "learning_rate": 9.681372020993363e-05, + "loss": 4.7509, + "step": 133600 + }, + { + "epoch": 0.5752816146624713, + "grad_norm": 2.12385892868042, + "learning_rate": 9.681134268995693e-05, + "loss": 5.1895, + "step": 133650 + }, + { + "epoch": 0.5754968341217539, + "grad_norm": 1.9237511157989502, + "learning_rate": 9.680896431250422e-05, + "loss": 4.398, + "step": 133700 + }, + { + "epoch": 0.5757120535810366, + "grad_norm": 2.8768208026885986, + "learning_rate": 9.680658507761908e-05, + "loss": 4.7627, + "step": 133750 + }, + { + "epoch": 0.5759272730403192, + "grad_norm": 1.7778829336166382, + "learning_rate": 9.680420498534507e-05, + "loss": 5.0987, + "step": 133800 + }, + { + "epoch": 0.5761424924996018, + "grad_norm": 2.467918634414673, + "learning_rate": 9.68018240357258e-05, + "loss": 4.2527, + "step": 133850 + }, + { + "epoch": 0.5763577119588845, + "grad_norm": 1.820198893547058, + "learning_rate": 9.679944222880488e-05, + "loss": 5.1013, + "step": 133900 + }, + { + "epoch": 0.5765729314181671, + "grad_norm": 0.9117358326911926, + "learning_rate": 9.679705956462595e-05, + "loss": 5.1966, + "step": 133950 + }, + { + "epoch": 0.5767881508774497, + "grad_norm": 1.0781848430633545, + "learning_rate": 9.679467604323265e-05, + "loss": 5.2188, + "step": 134000 + }, + { + "epoch": 0.5770033703367323, + "grad_norm": 2.4207024574279785, + "learning_rate": 9.679229166466863e-05, + "loss": 5.2495, + "step": 134050 + }, + { + "epoch": 0.577218589796015, + "grad_norm": 5.5188093185424805, + "learning_rate": 9.678990642897756e-05, + "loss": 5.0443, + "step": 134100 + }, + { + "epoch": 0.5774338092552976, + "grad_norm": 1.8237333297729492, + "learning_rate": 9.678752033620317e-05, + "loss": 4.8023, + "step": 134150 + }, + { + "epoch": 0.5776490287145802, + "grad_norm": 1.0535930395126343, + "learning_rate": 9.678513338638912e-05, + "loss": 4.7449, + "step": 134200 + }, + { + "epoch": 0.5778642481738628, + "grad_norm": 1.675207257270813, + "learning_rate": 9.678274557957918e-05, + "loss": 4.8492, + "step": 134250 + }, + { + "epoch": 0.5780794676331455, + "grad_norm": 3.7565526962280273, + "learning_rate": 9.678035691581706e-05, + "loss": 5.1926, + "step": 134300 + }, + { + "epoch": 0.5782946870924282, + "grad_norm": 1.8570371866226196, + "learning_rate": 9.67779673951465e-05, + "loss": 4.9322, + "step": 134350 + }, + { + "epoch": 0.5785099065517108, + "grad_norm": 4.945214748382568, + "learning_rate": 9.677557701761131e-05, + "loss": 4.8507, + "step": 134400 + }, + { + "epoch": 0.5787251260109935, + "grad_norm": 3.440633535385132, + "learning_rate": 9.677318578325527e-05, + "loss": 4.795, + "step": 134450 + }, + { + "epoch": 0.5789403454702761, + "grad_norm": 3.2276227474212646, + "learning_rate": 9.677079369212214e-05, + "loss": 5.4868, + "step": 134500 + }, + { + "epoch": 0.5791555649295587, + "grad_norm": 1.3862528800964355, + "learning_rate": 9.676840074425578e-05, + "loss": 5.3843, + "step": 134550 + }, + { + "epoch": 0.5793707843888413, + "grad_norm": 2.2343294620513916, + "learning_rate": 9.676600693970001e-05, + "loss": 4.9999, + "step": 134600 + }, + { + "epoch": 0.579586003848124, + "grad_norm": 2.5799143314361572, + "learning_rate": 9.676361227849869e-05, + "loss": 5.1839, + "step": 134650 + }, + { + "epoch": 0.5798012233074066, + "grad_norm": 1.4713902473449707, + "learning_rate": 9.676121676069565e-05, + "loss": 4.6635, + "step": 134700 + }, + { + "epoch": 0.5800164427666892, + "grad_norm": 2.4586997032165527, + "learning_rate": 9.675882038633482e-05, + "loss": 5.1601, + "step": 134750 + }, + { + "epoch": 0.5802316622259718, + "grad_norm": 5.70206356048584, + "learning_rate": 9.675642315546005e-05, + "loss": 4.9282, + "step": 134800 + }, + { + "epoch": 0.5804468816852545, + "grad_norm": 3.547096014022827, + "learning_rate": 9.675402506811527e-05, + "loss": 4.8976, + "step": 134850 + }, + { + "epoch": 0.5806621011445371, + "grad_norm": 2.2211647033691406, + "learning_rate": 9.675162612434442e-05, + "loss": 4.7103, + "step": 134900 + }, + { + "epoch": 0.5808773206038197, + "grad_norm": 1.0617612600326538, + "learning_rate": 9.674922632419144e-05, + "loss": 4.5706, + "step": 134950 + }, + { + "epoch": 0.5810925400631023, + "grad_norm": 1.9316880702972412, + "learning_rate": 9.674682566770025e-05, + "loss": 5.1191, + "step": 135000 + }, + { + "epoch": 0.5810925400631023, + "eval_loss": 5.326813220977783, + "eval_runtime": 34.7991, + "eval_samples_per_second": 18.391, + "eval_steps_per_second": 9.196, + "eval_tts_loss": 6.64434679194797, + "step": 135000 + }, + { + "epoch": 0.581307759522385, + "grad_norm": 1.7800546884536743, + "learning_rate": 9.674442415491489e-05, + "loss": 4.9685, + "step": 135050 + }, + { + "epoch": 0.5815229789816676, + "grad_norm": 1.9603791236877441, + "learning_rate": 9.67420217858793e-05, + "loss": 4.8148, + "step": 135100 + }, + { + "epoch": 0.5817381984409502, + "grad_norm": 2.6234686374664307, + "learning_rate": 9.673961856063751e-05, + "loss": 5.0715, + "step": 135150 + }, + { + "epoch": 0.5819534179002329, + "grad_norm": 3.6094229221343994, + "learning_rate": 9.673721447923353e-05, + "loss": 5.0512, + "step": 135200 + }, + { + "epoch": 0.5821686373595155, + "grad_norm": 2.14821720123291, + "learning_rate": 9.67348095417114e-05, + "loss": 4.887, + "step": 135250 + }, + { + "epoch": 0.5823838568187981, + "grad_norm": 2.804936408996582, + "learning_rate": 9.673240374811518e-05, + "loss": 5.2062, + "step": 135300 + }, + { + "epoch": 0.5825990762780807, + "grad_norm": 2.1928858757019043, + "learning_rate": 9.672999709848893e-05, + "loss": 4.5363, + "step": 135350 + }, + { + "epoch": 0.5828142957373634, + "grad_norm": 2.012925624847412, + "learning_rate": 9.672758959287672e-05, + "loss": 4.763, + "step": 135400 + }, + { + "epoch": 0.583029515196646, + "grad_norm": 4.5754194259643555, + "learning_rate": 9.672518123132268e-05, + "loss": 4.7825, + "step": 135450 + }, + { + "epoch": 0.5832447346559286, + "grad_norm": 0.35088592767715454, + "learning_rate": 9.672277201387091e-05, + "loss": 4.8551, + "step": 135500 + }, + { + "epoch": 0.5834599541152112, + "grad_norm": 2.080634355545044, + "learning_rate": 9.672036194056555e-05, + "loss": 5.0328, + "step": 135550 + }, + { + "epoch": 0.5836751735744939, + "grad_norm": 1.9886418581008911, + "learning_rate": 9.671795101145073e-05, + "loss": 4.6979, + "step": 135600 + }, + { + "epoch": 0.5838903930337765, + "grad_norm": 1.9827425479888916, + "learning_rate": 9.671553922657063e-05, + "loss": 4.5758, + "step": 135650 + }, + { + "epoch": 0.5841056124930591, + "grad_norm": 2.665067434310913, + "learning_rate": 9.671312658596942e-05, + "loss": 4.5501, + "step": 135700 + }, + { + "epoch": 0.5843208319523419, + "grad_norm": 1.9999055862426758, + "learning_rate": 9.671071308969129e-05, + "loss": 5.1329, + "step": 135750 + }, + { + "epoch": 0.5845360514116245, + "grad_norm": 3.332183599472046, + "learning_rate": 9.670829873778047e-05, + "loss": 4.5375, + "step": 135800 + }, + { + "epoch": 0.5847512708709071, + "grad_norm": 2.154738426208496, + "learning_rate": 9.670588353028115e-05, + "loss": 4.6552, + "step": 135850 + }, + { + "epoch": 0.5849664903301897, + "grad_norm": 4.545044898986816, + "learning_rate": 9.67034674672376e-05, + "loss": 4.7414, + "step": 135900 + }, + { + "epoch": 0.5851817097894724, + "grad_norm": 2.004547119140625, + "learning_rate": 9.670105054869408e-05, + "loss": 4.8039, + "step": 135950 + }, + { + "epoch": 0.585396929248755, + "grad_norm": 2.5746090412139893, + "learning_rate": 9.669863277469483e-05, + "loss": 4.778, + "step": 136000 + }, + { + "epoch": 0.5856121487080376, + "grad_norm": 1.1107285022735596, + "learning_rate": 9.669621414528417e-05, + "loss": 5.6772, + "step": 136050 + }, + { + "epoch": 0.5858273681673202, + "grad_norm": 0.7853596210479736, + "learning_rate": 9.669379466050639e-05, + "loss": 4.7746, + "step": 136100 + }, + { + "epoch": 0.5860425876266029, + "grad_norm": 2.5652525424957275, + "learning_rate": 9.669137432040582e-05, + "loss": 5.0669, + "step": 136150 + }, + { + "epoch": 0.5862578070858855, + "grad_norm": 1.1772984266281128, + "learning_rate": 9.668895312502677e-05, + "loss": 4.5853, + "step": 136200 + }, + { + "epoch": 0.5864730265451681, + "grad_norm": 2.7772958278656006, + "learning_rate": 9.668653107441363e-05, + "loss": 4.2713, + "step": 136250 + }, + { + "epoch": 0.5866882460044507, + "grad_norm": 1.9948155879974365, + "learning_rate": 9.668410816861072e-05, + "loss": 4.8628, + "step": 136300 + }, + { + "epoch": 0.5869034654637334, + "grad_norm": 2.76033353805542, + "learning_rate": 9.668168440766245e-05, + "loss": 4.9611, + "step": 136350 + }, + { + "epoch": 0.587118684923016, + "grad_norm": 2.6899595260620117, + "learning_rate": 9.667925979161323e-05, + "loss": 4.8912, + "step": 136400 + }, + { + "epoch": 0.5873339043822986, + "grad_norm": 2.8074560165405273, + "learning_rate": 9.667683432050745e-05, + "loss": 4.7824, + "step": 136450 + }, + { + "epoch": 0.5875491238415813, + "grad_norm": 3.4509177207946777, + "learning_rate": 9.667440799438954e-05, + "loss": 5.0418, + "step": 136500 + }, + { + "epoch": 0.5877643433008639, + "grad_norm": 3.330620050430298, + "learning_rate": 9.667198081330396e-05, + "loss": 4.941, + "step": 136550 + }, + { + "epoch": 0.5879795627601465, + "grad_norm": 2.590696096420288, + "learning_rate": 9.666955277729515e-05, + "loss": 5.2647, + "step": 136600 + }, + { + "epoch": 0.5881947822194291, + "grad_norm": 2.7659459114074707, + "learning_rate": 9.66671238864076e-05, + "loss": 4.7141, + "step": 136650 + }, + { + "epoch": 0.5884100016787118, + "grad_norm": 0.5298041701316833, + "learning_rate": 9.66646941406858e-05, + "loss": 4.8323, + "step": 136700 + }, + { + "epoch": 0.5886252211379944, + "grad_norm": 1.9726741313934326, + "learning_rate": 9.666226354017427e-05, + "loss": 4.7889, + "step": 136750 + }, + { + "epoch": 0.588840440597277, + "grad_norm": 1.0062187910079956, + "learning_rate": 9.665983208491751e-05, + "loss": 4.9392, + "step": 136800 + }, + { + "epoch": 0.5890556600565596, + "grad_norm": 2.5035738945007324, + "learning_rate": 9.665739977496006e-05, + "loss": 4.9633, + "step": 136850 + }, + { + "epoch": 0.5892708795158423, + "grad_norm": 1.2964457273483276, + "learning_rate": 9.665496661034649e-05, + "loss": 4.4878, + "step": 136900 + }, + { + "epoch": 0.5894860989751249, + "grad_norm": 2.3645267486572266, + "learning_rate": 9.665253259112136e-05, + "loss": 5.1251, + "step": 136950 + }, + { + "epoch": 0.5897013184344075, + "grad_norm": 3.3298747539520264, + "learning_rate": 9.665009771732926e-05, + "loss": 4.8054, + "step": 137000 + }, + { + "epoch": 0.5899165378936901, + "grad_norm": 5.815441608428955, + "learning_rate": 9.664766198901478e-05, + "loss": 4.8248, + "step": 137050 + }, + { + "epoch": 0.5901317573529729, + "grad_norm": 2.404167413711548, + "learning_rate": 9.664522540622256e-05, + "loss": 5.3029, + "step": 137100 + }, + { + "epoch": 0.5903469768122555, + "grad_norm": 3.627758264541626, + "learning_rate": 9.664278796899722e-05, + "loss": 5.0027, + "step": 137150 + }, + { + "epoch": 0.590562196271538, + "grad_norm": 2.5434727668762207, + "learning_rate": 9.664034967738339e-05, + "loss": 4.8172, + "step": 137200 + }, + { + "epoch": 0.5907774157308208, + "grad_norm": 2.6302223205566406, + "learning_rate": 9.663791053142576e-05, + "loss": 4.8208, + "step": 137250 + }, + { + "epoch": 0.5909926351901034, + "grad_norm": 2.0764784812927246, + "learning_rate": 9.663547053116899e-05, + "loss": 4.7574, + "step": 137300 + }, + { + "epoch": 0.591207854649386, + "grad_norm": 2.764317750930786, + "learning_rate": 9.66330296766578e-05, + "loss": 4.7722, + "step": 137350 + }, + { + "epoch": 0.5914230741086686, + "grad_norm": 5.755559921264648, + "learning_rate": 9.663058796793689e-05, + "loss": 4.5889, + "step": 137400 + }, + { + "epoch": 0.5916382935679513, + "grad_norm": 2.6737353801727295, + "learning_rate": 9.662814540505098e-05, + "loss": 4.9599, + "step": 137450 + }, + { + "epoch": 0.5918535130272339, + "grad_norm": 2.97489595413208, + "learning_rate": 9.66257019880448e-05, + "loss": 4.8259, + "step": 137500 + }, + { + "epoch": 0.5920687324865165, + "grad_norm": 0.7553098797798157, + "learning_rate": 9.662325771696314e-05, + "loss": 4.497, + "step": 137550 + }, + { + "epoch": 0.5922839519457991, + "grad_norm": 1.6053775548934937, + "learning_rate": 9.662081259185073e-05, + "loss": 4.7921, + "step": 137600 + }, + { + "epoch": 0.5924991714050818, + "grad_norm": 2.3841230869293213, + "learning_rate": 9.66183666127524e-05, + "loss": 4.7366, + "step": 137650 + }, + { + "epoch": 0.5927143908643644, + "grad_norm": 1.188173770904541, + "learning_rate": 9.661591977971296e-05, + "loss": 5.304, + "step": 137700 + }, + { + "epoch": 0.592929610323647, + "grad_norm": 1.1048862934112549, + "learning_rate": 9.66134720927772e-05, + "loss": 4.7847, + "step": 137750 + }, + { + "epoch": 0.5931448297829297, + "grad_norm": 0.7667586803436279, + "learning_rate": 9.661102355198995e-05, + "loss": 4.7721, + "step": 137800 + }, + { + "epoch": 0.5933600492422123, + "grad_norm": 3.6138086318969727, + "learning_rate": 9.660857415739609e-05, + "loss": 4.8689, + "step": 137850 + }, + { + "epoch": 0.5935752687014949, + "grad_norm": 1.9714311361312866, + "learning_rate": 9.660612390904047e-05, + "loss": 4.533, + "step": 137900 + }, + { + "epoch": 0.5937904881607775, + "grad_norm": 0.5424425601959229, + "learning_rate": 9.660367280696799e-05, + "loss": 4.5084, + "step": 137950 + }, + { + "epoch": 0.5940057076200602, + "grad_norm": 3.37300705909729, + "learning_rate": 9.660122085122353e-05, + "loss": 5.1, + "step": 138000 + }, + { + "epoch": 0.5940057076200602, + "eval_loss": 5.311753273010254, + "eval_runtime": 34.7334, + "eval_samples_per_second": 18.426, + "eval_steps_per_second": 9.213, + "eval_tts_loss": 6.659056199023701, + "step": 138000 + }, + { + "epoch": 0.5942209270793428, + "grad_norm": 1.709944725036621, + "learning_rate": 9.6598768041852e-05, + "loss": 4.9421, + "step": 138050 + }, + { + "epoch": 0.5944361465386254, + "grad_norm": 3.1865804195404053, + "learning_rate": 9.659631437889836e-05, + "loss": 4.686, + "step": 138100 + }, + { + "epoch": 0.594651365997908, + "grad_norm": 2.3442795276641846, + "learning_rate": 9.659385986240751e-05, + "loss": 4.7421, + "step": 138150 + }, + { + "epoch": 0.5948665854571907, + "grad_norm": 2.7487270832061768, + "learning_rate": 9.659140449242446e-05, + "loss": 5.1277, + "step": 138200 + }, + { + "epoch": 0.5950818049164733, + "grad_norm": 3.687743663787842, + "learning_rate": 9.658894826899416e-05, + "loss": 5.3694, + "step": 138250 + }, + { + "epoch": 0.5952970243757559, + "grad_norm": 2.1889641284942627, + "learning_rate": 9.65864911921616e-05, + "loss": 4.7819, + "step": 138300 + }, + { + "epoch": 0.5955122438350385, + "grad_norm": 2.5050244331359863, + "learning_rate": 9.65840332619718e-05, + "loss": 4.736, + "step": 138350 + }, + { + "epoch": 0.5957274632943212, + "grad_norm": 1.9939156770706177, + "learning_rate": 9.658157447846977e-05, + "loss": 4.8862, + "step": 138400 + }, + { + "epoch": 0.5959426827536038, + "grad_norm": 1.4959609508514404, + "learning_rate": 9.657911484170057e-05, + "loss": 5.0135, + "step": 138450 + }, + { + "epoch": 0.5961579022128864, + "grad_norm": 0.8701924681663513, + "learning_rate": 9.657665435170923e-05, + "loss": 4.8021, + "step": 138500 + }, + { + "epoch": 0.5963731216721692, + "grad_norm": 5.805509567260742, + "learning_rate": 9.657419300854083e-05, + "loss": 4.56, + "step": 138550 + }, + { + "epoch": 0.5965883411314518, + "grad_norm": 0.7888686656951904, + "learning_rate": 9.657173081224047e-05, + "loss": 4.6637, + "step": 138600 + }, + { + "epoch": 0.5968035605907344, + "grad_norm": 1.7826011180877686, + "learning_rate": 9.656926776285323e-05, + "loss": 4.8954, + "step": 138650 + }, + { + "epoch": 0.597018780050017, + "grad_norm": 2.2107036113739014, + "learning_rate": 9.656680386042425e-05, + "loss": 4.6618, + "step": 138700 + }, + { + "epoch": 0.5972339995092997, + "grad_norm": 2.471970558166504, + "learning_rate": 9.656433910499864e-05, + "loss": 5.3665, + "step": 138750 + }, + { + "epoch": 0.5974492189685823, + "grad_norm": 2.432603597640991, + "learning_rate": 9.656187349662156e-05, + "loss": 4.6119, + "step": 138800 + }, + { + "epoch": 0.5976644384278649, + "grad_norm": 2.9456114768981934, + "learning_rate": 9.65594070353382e-05, + "loss": 4.7814, + "step": 138850 + }, + { + "epoch": 0.5978796578871475, + "grad_norm": 2.5431740283966064, + "learning_rate": 9.655693972119367e-05, + "loss": 4.5934, + "step": 138900 + }, + { + "epoch": 0.5980948773464302, + "grad_norm": 3.2089462280273438, + "learning_rate": 9.655447155423323e-05, + "loss": 5.0803, + "step": 138950 + }, + { + "epoch": 0.5983100968057128, + "grad_norm": 1.8377301692962646, + "learning_rate": 9.655200253450207e-05, + "loss": 5.181, + "step": 139000 + }, + { + "epoch": 0.5985253162649954, + "grad_norm": 2.5329840183258057, + "learning_rate": 9.654953266204543e-05, + "loss": 4.9736, + "step": 139050 + }, + { + "epoch": 0.5987405357242781, + "grad_norm": 2.120412588119507, + "learning_rate": 9.654706193690852e-05, + "loss": 5.1399, + "step": 139100 + }, + { + "epoch": 0.5989557551835607, + "grad_norm": 1.4971307516098022, + "learning_rate": 9.654459035913662e-05, + "loss": 4.675, + "step": 139150 + }, + { + "epoch": 0.5991709746428433, + "grad_norm": 2.1383888721466064, + "learning_rate": 9.6542117928775e-05, + "loss": 5.2492, + "step": 139200 + }, + { + "epoch": 0.5993861941021259, + "grad_norm": 2.29113507270813, + "learning_rate": 9.653964464586896e-05, + "loss": 5.0491, + "step": 139250 + }, + { + "epoch": 0.5996014135614086, + "grad_norm": 2.957007884979248, + "learning_rate": 9.65371705104638e-05, + "loss": 4.9019, + "step": 139300 + }, + { + "epoch": 0.5998166330206912, + "grad_norm": 2.5269787311553955, + "learning_rate": 9.653469552260481e-05, + "loss": 4.8282, + "step": 139350 + }, + { + "epoch": 0.6000318524799738, + "grad_norm": 3.5073578357696533, + "learning_rate": 9.653221968233736e-05, + "loss": 4.9913, + "step": 139400 + }, + { + "epoch": 0.6002470719392564, + "grad_norm": 2.415330171585083, + "learning_rate": 9.652974298970681e-05, + "loss": 4.9528, + "step": 139450 + }, + { + "epoch": 0.6004622913985391, + "grad_norm": 4.05001163482666, + "learning_rate": 9.652726544475849e-05, + "loss": 5.1551, + "step": 139500 + }, + { + "epoch": 0.6006775108578217, + "grad_norm": 1.4295368194580078, + "learning_rate": 9.652478704753782e-05, + "loss": 5.1823, + "step": 139550 + }, + { + "epoch": 0.6008927303171043, + "grad_norm": 0.9624719619750977, + "learning_rate": 9.652230779809019e-05, + "loss": 4.451, + "step": 139600 + }, + { + "epoch": 0.6011079497763869, + "grad_norm": 2.810183525085449, + "learning_rate": 9.6519827696461e-05, + "loss": 4.5572, + "step": 139650 + }, + { + "epoch": 0.6013231692356696, + "grad_norm": 2.593533992767334, + "learning_rate": 9.651734674269567e-05, + "loss": 4.4397, + "step": 139700 + }, + { + "epoch": 0.6015383886949522, + "grad_norm": 2.77506685256958, + "learning_rate": 9.651486493683967e-05, + "loss": 5.0239, + "step": 139750 + }, + { + "epoch": 0.6017536081542348, + "grad_norm": 1.9925401210784912, + "learning_rate": 9.651238227893846e-05, + "loss": 5.0598, + "step": 139800 + }, + { + "epoch": 0.6019688276135176, + "grad_norm": 2.1826765537261963, + "learning_rate": 9.650989876903749e-05, + "loss": 4.91, + "step": 139850 + }, + { + "epoch": 0.6021840470728002, + "grad_norm": 2.6906373500823975, + "learning_rate": 9.650741440718229e-05, + "loss": 4.4743, + "step": 139900 + }, + { + "epoch": 0.6023992665320828, + "grad_norm": 3.135125160217285, + "learning_rate": 9.650492919341833e-05, + "loss": 4.7259, + "step": 139950 + }, + { + "epoch": 0.6026144859913654, + "grad_norm": 0.8599072694778442, + "learning_rate": 9.650244312779118e-05, + "loss": 4.3551, + "step": 140000 + }, + { + "epoch": 0.6028297054506481, + "grad_norm": 2.98295259475708, + "learning_rate": 9.649995621034634e-05, + "loss": 5.0341, + "step": 140050 + }, + { + "epoch": 0.6030449249099307, + "grad_norm": 2.0064053535461426, + "learning_rate": 9.649746844112936e-05, + "loss": 5.47, + "step": 140100 + }, + { + "epoch": 0.6032601443692133, + "grad_norm": 1.6083699464797974, + "learning_rate": 9.649497982018583e-05, + "loss": 4.4531, + "step": 140150 + }, + { + "epoch": 0.6034753638284959, + "grad_norm": 1.8711199760437012, + "learning_rate": 9.649249034756134e-05, + "loss": 4.5694, + "step": 140200 + }, + { + "epoch": 0.6036905832877786, + "grad_norm": 0.5344048738479614, + "learning_rate": 9.649000002330149e-05, + "loss": 5.0424, + "step": 140250 + }, + { + "epoch": 0.6039058027470612, + "grad_norm": 1.2907356023788452, + "learning_rate": 9.648750884745189e-05, + "loss": 5.1245, + "step": 140300 + }, + { + "epoch": 0.6041210222063438, + "grad_norm": 0.8841058611869812, + "learning_rate": 9.648501682005815e-05, + "loss": 4.8178, + "step": 140350 + }, + { + "epoch": 0.6043362416656265, + "grad_norm": 2.8156378269195557, + "learning_rate": 9.648252394116595e-05, + "loss": 4.4328, + "step": 140400 + }, + { + "epoch": 0.6045514611249091, + "grad_norm": 1.9138541221618652, + "learning_rate": 9.648003021082096e-05, + "loss": 5.0134, + "step": 140450 + }, + { + "epoch": 0.6047666805841917, + "grad_norm": 1.054513931274414, + "learning_rate": 9.647753562906883e-05, + "loss": 4.8713, + "step": 140500 + }, + { + "epoch": 0.6049819000434743, + "grad_norm": 2.0459070205688477, + "learning_rate": 9.647504019595528e-05, + "loss": 5.1227, + "step": 140550 + }, + { + "epoch": 0.605197119502757, + "grad_norm": 1.7907350063323975, + "learning_rate": 9.6472543911526e-05, + "loss": 4.9649, + "step": 140600 + }, + { + "epoch": 0.6054123389620396, + "grad_norm": 4.151786804199219, + "learning_rate": 9.647004677582675e-05, + "loss": 4.6877, + "step": 140650 + }, + { + "epoch": 0.6056275584213222, + "grad_norm": 4.919275760650635, + "learning_rate": 9.64675487889032e-05, + "loss": 5.074, + "step": 140700 + }, + { + "epoch": 0.6058427778806048, + "grad_norm": 2.73040509223938, + "learning_rate": 9.646504995080119e-05, + "loss": 4.8499, + "step": 140750 + }, + { + "epoch": 0.6060579973398875, + "grad_norm": 2.2600619792938232, + "learning_rate": 9.646255026156645e-05, + "loss": 5.0721, + "step": 140800 + }, + { + "epoch": 0.6062732167991701, + "grad_norm": 2.5283355712890625, + "learning_rate": 9.646004972124479e-05, + "loss": 4.6263, + "step": 140850 + }, + { + "epoch": 0.6064884362584527, + "grad_norm": 0.902189314365387, + "learning_rate": 9.645754832988199e-05, + "loss": 4.7605, + "step": 140900 + }, + { + "epoch": 0.6067036557177353, + "grad_norm": 3.564846992492676, + "learning_rate": 9.645504608752388e-05, + "loss": 4.8464, + "step": 140950 + }, + { + "epoch": 0.606918875177018, + "grad_norm": 1.8800263404846191, + "learning_rate": 9.645254299421629e-05, + "loss": 4.7427, + "step": 141000 + }, + { + "epoch": 0.606918875177018, + "eval_loss": 5.313602924346924, + "eval_runtime": 34.8565, + "eval_samples_per_second": 18.361, + "eval_steps_per_second": 9.18, + "eval_tts_loss": 6.695272867147776, + "step": 141000 + }, + { + "epoch": 0.6071340946363006, + "grad_norm": 1.8911025524139404, + "learning_rate": 9.645003905000506e-05, + "loss": 4.8081, + "step": 141050 + }, + { + "epoch": 0.6073493140955832, + "grad_norm": 4.121145725250244, + "learning_rate": 9.644753425493611e-05, + "loss": 4.8363, + "step": 141100 + }, + { + "epoch": 0.607564533554866, + "grad_norm": 1.9904556274414062, + "learning_rate": 9.644502860905527e-05, + "loss": 4.8569, + "step": 141150 + }, + { + "epoch": 0.6077797530141485, + "grad_norm": 2.5717499256134033, + "learning_rate": 9.644252211240845e-05, + "loss": 4.6545, + "step": 141200 + }, + { + "epoch": 0.6079949724734312, + "grad_norm": 1.9176746606826782, + "learning_rate": 9.644001476504157e-05, + "loss": 5.0358, + "step": 141250 + }, + { + "epoch": 0.6082101919327138, + "grad_norm": 0.34041735529899597, + "learning_rate": 9.643750656700057e-05, + "loss": 4.9654, + "step": 141300 + }, + { + "epoch": 0.6084254113919965, + "grad_norm": 0.687423050403595, + "learning_rate": 9.643499751833136e-05, + "loss": 4.414, + "step": 141350 + }, + { + "epoch": 0.6086406308512791, + "grad_norm": 2.224834442138672, + "learning_rate": 9.643248761907992e-05, + "loss": 4.5631, + "step": 141400 + }, + { + "epoch": 0.6088558503105617, + "grad_norm": 3.1855695247650146, + "learning_rate": 9.642997686929223e-05, + "loss": 4.6361, + "step": 141450 + }, + { + "epoch": 0.6090710697698443, + "grad_norm": 6.733048915863037, + "learning_rate": 9.642746526901428e-05, + "loss": 4.6668, + "step": 141500 + }, + { + "epoch": 0.609286289229127, + "grad_norm": 3.2327425479888916, + "learning_rate": 9.642495281829206e-05, + "loss": 4.8281, + "step": 141550 + }, + { + "epoch": 0.6095015086884096, + "grad_norm": 1.4435335397720337, + "learning_rate": 9.642243951717162e-05, + "loss": 4.752, + "step": 141600 + }, + { + "epoch": 0.6097167281476922, + "grad_norm": 3.1595141887664795, + "learning_rate": 9.641992536569897e-05, + "loss": 4.9211, + "step": 141650 + }, + { + "epoch": 0.6099319476069748, + "grad_norm": 2.159522294998169, + "learning_rate": 9.641741036392018e-05, + "loss": 5.1271, + "step": 141700 + }, + { + "epoch": 0.6101471670662575, + "grad_norm": 2.6914000511169434, + "learning_rate": 9.641489451188133e-05, + "loss": 4.9304, + "step": 141750 + }, + { + "epoch": 0.6103623865255401, + "grad_norm": 1.7843252420425415, + "learning_rate": 9.641237780962847e-05, + "loss": 4.9248, + "step": 141800 + }, + { + "epoch": 0.6105776059848227, + "grad_norm": 0.6680654287338257, + "learning_rate": 9.640986025720773e-05, + "loss": 4.815, + "step": 141850 + }, + { + "epoch": 0.6107928254441054, + "grad_norm": 3.2864537239074707, + "learning_rate": 9.640734185466522e-05, + "loss": 4.5785, + "step": 141900 + }, + { + "epoch": 0.611008044903388, + "grad_norm": 2.6002063751220703, + "learning_rate": 9.640482260204706e-05, + "loss": 4.2449, + "step": 141950 + }, + { + "epoch": 0.6112232643626706, + "grad_norm": 2.0287108421325684, + "learning_rate": 9.64023024993994e-05, + "loss": 4.7079, + "step": 142000 + }, + { + "epoch": 0.6114384838219532, + "grad_norm": 0.5127223134040833, + "learning_rate": 9.639978154676843e-05, + "loss": 4.7446, + "step": 142050 + }, + { + "epoch": 0.6116537032812359, + "grad_norm": 2.0893704891204834, + "learning_rate": 9.639725974420029e-05, + "loss": 4.8481, + "step": 142100 + }, + { + "epoch": 0.6118689227405185, + "grad_norm": 2.554455041885376, + "learning_rate": 9.639473709174118e-05, + "loss": 4.7366, + "step": 142150 + }, + { + "epoch": 0.6120841421998011, + "grad_norm": 1.1450814008712769, + "learning_rate": 9.639221358943734e-05, + "loss": 4.686, + "step": 142200 + }, + { + "epoch": 0.6122993616590837, + "grad_norm": 1.73625910282135, + "learning_rate": 9.638968923733495e-05, + "loss": 4.9671, + "step": 142250 + }, + { + "epoch": 0.6125145811183664, + "grad_norm": 2.1706979274749756, + "learning_rate": 9.638716403548028e-05, + "loss": 4.8772, + "step": 142300 + }, + { + "epoch": 0.612729800577649, + "grad_norm": 1.7658251523971558, + "learning_rate": 9.638463798391958e-05, + "loss": 5.1916, + "step": 142350 + }, + { + "epoch": 0.6129450200369316, + "grad_norm": 2.281869411468506, + "learning_rate": 9.638211108269912e-05, + "loss": 4.5738, + "step": 142400 + }, + { + "epoch": 0.6131602394962143, + "grad_norm": 2.4167487621307373, + "learning_rate": 9.637958333186519e-05, + "loss": 5.2284, + "step": 142450 + }, + { + "epoch": 0.6133754589554969, + "grad_norm": 1.9489940404891968, + "learning_rate": 9.637705473146407e-05, + "loss": 5.1965, + "step": 142500 + }, + { + "epoch": 0.6135906784147795, + "grad_norm": 0.8669845461845398, + "learning_rate": 9.637452528154214e-05, + "loss": 4.7131, + "step": 142550 + }, + { + "epoch": 0.6138058978740621, + "grad_norm": 1.3851827383041382, + "learning_rate": 9.637199498214567e-05, + "loss": 4.4338, + "step": 142600 + }, + { + "epoch": 0.6140211173333449, + "grad_norm": 5.071634292602539, + "learning_rate": 9.636946383332102e-05, + "loss": 5.1717, + "step": 142650 + }, + { + "epoch": 0.6142363367926275, + "grad_norm": 2.0694093704223633, + "learning_rate": 9.636693183511457e-05, + "loss": 5.2609, + "step": 142700 + }, + { + "epoch": 0.6144515562519101, + "grad_norm": 1.705145001411438, + "learning_rate": 9.63643989875727e-05, + "loss": 4.2293, + "step": 142750 + }, + { + "epoch": 0.6146667757111927, + "grad_norm": 1.1042041778564453, + "learning_rate": 9.63618652907418e-05, + "loss": 4.7299, + "step": 142800 + }, + { + "epoch": 0.6148819951704754, + "grad_norm": 2.219320774078369, + "learning_rate": 9.635933074466828e-05, + "loss": 4.7975, + "step": 142850 + }, + { + "epoch": 0.615097214629758, + "grad_norm": 1.526316523551941, + "learning_rate": 9.635679534939859e-05, + "loss": 5.005, + "step": 142900 + }, + { + "epoch": 0.6153124340890406, + "grad_norm": 2.378319263458252, + "learning_rate": 9.635425910497912e-05, + "loss": 5.0119, + "step": 142950 + }, + { + "epoch": 0.6155276535483232, + "grad_norm": 1.9882842302322388, + "learning_rate": 9.635172201145639e-05, + "loss": 4.7336, + "step": 143000 + }, + { + "epoch": 0.6157428730076059, + "grad_norm": 1.910014271736145, + "learning_rate": 9.634918406887684e-05, + "loss": 4.141, + "step": 143050 + }, + { + "epoch": 0.6159580924668885, + "grad_norm": 2.543832778930664, + "learning_rate": 9.634664527728697e-05, + "loss": 5.1116, + "step": 143100 + }, + { + "epoch": 0.6161733119261711, + "grad_norm": 2.1053378582000732, + "learning_rate": 9.634410563673326e-05, + "loss": 4.9159, + "step": 143150 + }, + { + "epoch": 0.6163885313854538, + "grad_norm": 2.243131637573242, + "learning_rate": 9.634156514726224e-05, + "loss": 4.7277, + "step": 143200 + }, + { + "epoch": 0.6166037508447364, + "grad_norm": 1.9470096826553345, + "learning_rate": 9.633902380892048e-05, + "loss": 5.1604, + "step": 143250 + }, + { + "epoch": 0.616818970304019, + "grad_norm": 1.9676024913787842, + "learning_rate": 9.63364816217545e-05, + "loss": 5.1491, + "step": 143300 + }, + { + "epoch": 0.6170341897633016, + "grad_norm": 4.157006740570068, + "learning_rate": 9.633393858581088e-05, + "loss": 4.6211, + "step": 143350 + }, + { + "epoch": 0.6172494092225843, + "grad_norm": 2.040973424911499, + "learning_rate": 9.633139470113619e-05, + "loss": 4.3325, + "step": 143400 + }, + { + "epoch": 0.6174646286818669, + "grad_norm": 2.029280185699463, + "learning_rate": 9.632884996777703e-05, + "loss": 4.9634, + "step": 143450 + }, + { + "epoch": 0.6176798481411495, + "grad_norm": 0.4245627224445343, + "learning_rate": 9.632630438578003e-05, + "loss": 4.8496, + "step": 143500 + }, + { + "epoch": 0.6178950676004321, + "grad_norm": 2.328066825866699, + "learning_rate": 9.63237579551918e-05, + "loss": 4.8456, + "step": 143550 + }, + { + "epoch": 0.6181102870597148, + "grad_norm": 1.980931282043457, + "learning_rate": 9.6321210676059e-05, + "loss": 4.8839, + "step": 143600 + }, + { + "epoch": 0.6183255065189974, + "grad_norm": 2.612452507019043, + "learning_rate": 9.631866254842828e-05, + "loss": 4.5882, + "step": 143650 + }, + { + "epoch": 0.61854072597828, + "grad_norm": 2.1011650562286377, + "learning_rate": 9.63161135723463e-05, + "loss": 4.8856, + "step": 143700 + }, + { + "epoch": 0.6187559454375627, + "grad_norm": 1.9154530763626099, + "learning_rate": 9.631356374785978e-05, + "loss": 4.9047, + "step": 143750 + }, + { + "epoch": 0.6189711648968453, + "grad_norm": 0.9289756417274475, + "learning_rate": 9.631101307501542e-05, + "loss": 4.522, + "step": 143800 + }, + { + "epoch": 0.6191863843561279, + "grad_norm": 3.1780459880828857, + "learning_rate": 9.630846155385994e-05, + "loss": 4.7308, + "step": 143850 + }, + { + "epoch": 0.6194016038154105, + "grad_norm": 1.8549811840057373, + "learning_rate": 9.630590918444008e-05, + "loss": 4.7549, + "step": 143900 + }, + { + "epoch": 0.6196168232746933, + "grad_norm": 2.3384687900543213, + "learning_rate": 9.630335596680259e-05, + "loss": 5.0979, + "step": 143950 + }, + { + "epoch": 0.6198320427339759, + "grad_norm": 2.4868524074554443, + "learning_rate": 9.630080190099423e-05, + "loss": 4.879, + "step": 144000 + }, + { + "epoch": 0.6198320427339759, + "eval_loss": 5.304580211639404, + "eval_runtime": 34.8921, + "eval_samples_per_second": 18.342, + "eval_steps_per_second": 9.171, + "eval_tts_loss": 6.672987589159308, + "step": 144000 + }, + { + "epoch": 0.6200472621932585, + "grad_norm": 6.63310432434082, + "learning_rate": 9.62982469870618e-05, + "loss": 5.3367, + "step": 144050 + }, + { + "epoch": 0.620262481652541, + "grad_norm": 1.8005168437957764, + "learning_rate": 9.62956912250521e-05, + "loss": 4.9119, + "step": 144100 + }, + { + "epoch": 0.6204777011118238, + "grad_norm": 2.1717417240142822, + "learning_rate": 9.629313461501192e-05, + "loss": 4.6233, + "step": 144150 + }, + { + "epoch": 0.6206929205711064, + "grad_norm": 1.637709617614746, + "learning_rate": 9.629057715698814e-05, + "loss": 4.8286, + "step": 144200 + }, + { + "epoch": 0.620908140030389, + "grad_norm": 1.4864298105239868, + "learning_rate": 9.628801885102756e-05, + "loss": 4.6812, + "step": 144250 + }, + { + "epoch": 0.6211233594896716, + "grad_norm": 4.160035133361816, + "learning_rate": 9.628545969717706e-05, + "loss": 4.9448, + "step": 144300 + }, + { + "epoch": 0.6213385789489543, + "grad_norm": 0.6495093107223511, + "learning_rate": 9.628289969548352e-05, + "loss": 5.2005, + "step": 144350 + }, + { + "epoch": 0.6215537984082369, + "grad_norm": 0.6739805340766907, + "learning_rate": 9.628033884599383e-05, + "loss": 4.9377, + "step": 144400 + }, + { + "epoch": 0.6217690178675195, + "grad_norm": 0.7286418676376343, + "learning_rate": 9.627777714875491e-05, + "loss": 4.3316, + "step": 144450 + }, + { + "epoch": 0.6219842373268022, + "grad_norm": 1.0741769075393677, + "learning_rate": 9.627521460381366e-05, + "loss": 4.838, + "step": 144500 + }, + { + "epoch": 0.6221994567860848, + "grad_norm": 1.4692777395248413, + "learning_rate": 9.627265121121705e-05, + "loss": 5.1714, + "step": 144550 + }, + { + "epoch": 0.6224146762453674, + "grad_norm": 2.30108904838562, + "learning_rate": 9.627008697101201e-05, + "loss": 4.4915, + "step": 144600 + }, + { + "epoch": 0.62262989570465, + "grad_norm": 2.12243914604187, + "learning_rate": 9.626752188324553e-05, + "loss": 4.9739, + "step": 144650 + }, + { + "epoch": 0.6228451151639327, + "grad_norm": 2.1692826747894287, + "learning_rate": 9.626495594796458e-05, + "loss": 5.2737, + "step": 144700 + }, + { + "epoch": 0.6230603346232153, + "grad_norm": 1.936368703842163, + "learning_rate": 9.626238916521617e-05, + "loss": 5.324, + "step": 144750 + }, + { + "epoch": 0.6232755540824979, + "grad_norm": 2.526075839996338, + "learning_rate": 9.625982153504731e-05, + "loss": 4.7368, + "step": 144800 + }, + { + "epoch": 0.6234907735417805, + "grad_norm": 2.413363218307495, + "learning_rate": 9.625725305750506e-05, + "loss": 4.6393, + "step": 144850 + }, + { + "epoch": 0.6237059930010632, + "grad_norm": 3.426584005355835, + "learning_rate": 9.625468373263644e-05, + "loss": 4.5884, + "step": 144900 + }, + { + "epoch": 0.6239212124603458, + "grad_norm": 2.4162943363189697, + "learning_rate": 9.625211356048851e-05, + "loss": 4.2383, + "step": 144950 + }, + { + "epoch": 0.6241364319196284, + "grad_norm": 2.3249094486236572, + "learning_rate": 9.624954254110837e-05, + "loss": 5.2947, + "step": 145000 + }, + { + "epoch": 0.624351651378911, + "grad_norm": 1.5104682445526123, + "learning_rate": 9.624697067454312e-05, + "loss": 4.722, + "step": 145050 + }, + { + "epoch": 0.6245668708381937, + "grad_norm": 0.6632451415061951, + "learning_rate": 9.624439796083984e-05, + "loss": 4.8496, + "step": 145100 + }, + { + "epoch": 0.6247820902974763, + "grad_norm": 1.8325589895248413, + "learning_rate": 9.624182440004568e-05, + "loss": 4.5969, + "step": 145150 + }, + { + "epoch": 0.6249973097567589, + "grad_norm": 2.3417677879333496, + "learning_rate": 9.623924999220779e-05, + "loss": 4.9708, + "step": 145200 + }, + { + "epoch": 0.6252125292160416, + "grad_norm": 2.5992302894592285, + "learning_rate": 9.62366747373733e-05, + "loss": 4.7908, + "step": 145250 + }, + { + "epoch": 0.6254277486753242, + "grad_norm": 2.0382628440856934, + "learning_rate": 9.623409863558939e-05, + "loss": 4.6715, + "step": 145300 + }, + { + "epoch": 0.6256429681346068, + "grad_norm": 1.9317626953125, + "learning_rate": 9.623152168690327e-05, + "loss": 4.4983, + "step": 145350 + }, + { + "epoch": 0.6258581875938894, + "grad_norm": 2.8048384189605713, + "learning_rate": 9.622894389136213e-05, + "loss": 4.8868, + "step": 145400 + }, + { + "epoch": 0.6260734070531722, + "grad_norm": 2.787827968597412, + "learning_rate": 9.622636524901316e-05, + "loss": 4.7233, + "step": 145450 + }, + { + "epoch": 0.6262886265124548, + "grad_norm": 2.3023648262023926, + "learning_rate": 9.622378575990365e-05, + "loss": 5.0314, + "step": 145500 + }, + { + "epoch": 0.6265038459717374, + "grad_norm": 1.096458911895752, + "learning_rate": 9.622120542408081e-05, + "loss": 4.8052, + "step": 145550 + }, + { + "epoch": 0.62671906543102, + "grad_norm": 0.77128005027771, + "learning_rate": 9.621862424159191e-05, + "loss": 5.2443, + "step": 145600 + }, + { + "epoch": 0.6269342848903027, + "grad_norm": 2.2298927307128906, + "learning_rate": 9.621604221248426e-05, + "loss": 4.4739, + "step": 145650 + }, + { + "epoch": 0.6271495043495853, + "grad_norm": 2.522228479385376, + "learning_rate": 9.621345933680512e-05, + "loss": 4.823, + "step": 145700 + }, + { + "epoch": 0.6273647238088679, + "grad_norm": 3.499014377593994, + "learning_rate": 9.621087561460182e-05, + "loss": 4.7229, + "step": 145750 + }, + { + "epoch": 0.6275799432681506, + "grad_norm": 1.8243491649627686, + "learning_rate": 9.620829104592171e-05, + "loss": 4.6069, + "step": 145800 + }, + { + "epoch": 0.6277951627274332, + "grad_norm": 4.155776023864746, + "learning_rate": 9.620570563081209e-05, + "loss": 4.442, + "step": 145850 + }, + { + "epoch": 0.6280103821867158, + "grad_norm": 0.9483282566070557, + "learning_rate": 9.620311936932034e-05, + "loss": 4.9115, + "step": 145900 + }, + { + "epoch": 0.6282256016459984, + "grad_norm": 2.9037046432495117, + "learning_rate": 9.620053226149384e-05, + "loss": 4.9748, + "step": 145950 + }, + { + "epoch": 0.6284408211052811, + "grad_norm": 4.213788032531738, + "learning_rate": 9.619794430737996e-05, + "loss": 5.0103, + "step": 146000 + }, + { + "epoch": 0.6286560405645637, + "grad_norm": 2.098719358444214, + "learning_rate": 9.619535550702613e-05, + "loss": 4.5453, + "step": 146050 + }, + { + "epoch": 0.6288712600238463, + "grad_norm": 1.3325320482254028, + "learning_rate": 9.619276586047976e-05, + "loss": 4.7812, + "step": 146100 + }, + { + "epoch": 0.6290864794831289, + "grad_norm": 1.8620500564575195, + "learning_rate": 9.619017536778827e-05, + "loss": 5.1781, + "step": 146150 + }, + { + "epoch": 0.6293016989424116, + "grad_norm": 2.5153636932373047, + "learning_rate": 9.618758402899915e-05, + "loss": 5.0221, + "step": 146200 + }, + { + "epoch": 0.6295169184016942, + "grad_norm": 1.8497190475463867, + "learning_rate": 9.618499184415984e-05, + "loss": 4.9799, + "step": 146250 + }, + { + "epoch": 0.6297321378609768, + "grad_norm": 2.832843542098999, + "learning_rate": 9.618239881331781e-05, + "loss": 4.867, + "step": 146300 + }, + { + "epoch": 0.6299473573202594, + "grad_norm": 0.6990771889686584, + "learning_rate": 9.617980493652058e-05, + "loss": 4.9379, + "step": 146350 + }, + { + "epoch": 0.6301625767795421, + "grad_norm": 1.224868655204773, + "learning_rate": 9.617721021381569e-05, + "loss": 4.7432, + "step": 146400 + }, + { + "epoch": 0.6303777962388247, + "grad_norm": 2.4848392009735107, + "learning_rate": 9.617461464525061e-05, + "loss": 5.141, + "step": 146450 + }, + { + "epoch": 0.6305930156981073, + "grad_norm": 2.9702892303466797, + "learning_rate": 9.617201823087292e-05, + "loss": 4.8491, + "step": 146500 + }, + { + "epoch": 0.63080823515739, + "grad_norm": 2.03548264503479, + "learning_rate": 9.616942097073018e-05, + "loss": 5.0246, + "step": 146550 + }, + { + "epoch": 0.6310234546166726, + "grad_norm": 1.6829283237457275, + "learning_rate": 9.616682286486995e-05, + "loss": 4.9761, + "step": 146600 + }, + { + "epoch": 0.6312386740759552, + "grad_norm": 1.9973013401031494, + "learning_rate": 9.616422391333984e-05, + "loss": 4.9392, + "step": 146650 + }, + { + "epoch": 0.6314538935352378, + "grad_norm": 2.3607780933380127, + "learning_rate": 9.616162411618744e-05, + "loss": 4.8549, + "step": 146700 + }, + { + "epoch": 0.6316691129945206, + "grad_norm": 1.604590654373169, + "learning_rate": 9.61590234734604e-05, + "loss": 5.1178, + "step": 146750 + }, + { + "epoch": 0.6318843324538032, + "grad_norm": 1.8374361991882324, + "learning_rate": 9.615642198520632e-05, + "loss": 4.3751, + "step": 146800 + }, + { + "epoch": 0.6320995519130858, + "grad_norm": 3.95770263671875, + "learning_rate": 9.615381965147287e-05, + "loss": 4.3497, + "step": 146850 + }, + { + "epoch": 0.6323147713723684, + "grad_norm": 3.669022798538208, + "learning_rate": 9.615121647230774e-05, + "loss": 5.0024, + "step": 146900 + }, + { + "epoch": 0.6325299908316511, + "grad_norm": 2.6916158199310303, + "learning_rate": 9.614861244775857e-05, + "loss": 5.0205, + "step": 146950 + }, + { + "epoch": 0.6327452102909337, + "grad_norm": 0.5466786623001099, + "learning_rate": 9.614600757787311e-05, + "loss": 5.0292, + "step": 147000 + }, + { + "epoch": 0.6327452102909337, + "eval_loss": 5.29662561416626, + "eval_runtime": 35.1453, + "eval_samples_per_second": 18.21, + "eval_steps_per_second": 9.105, + "eval_tts_loss": 6.648943983487482, + "step": 147000 + }, + { + "epoch": 0.6329604297502163, + "grad_norm": 2.2534492015838623, + "learning_rate": 9.614340186269903e-05, + "loss": 4.8262, + "step": 147050 + }, + { + "epoch": 0.633175649209499, + "grad_norm": 2.2285990715026855, + "learning_rate": 9.614079530228408e-05, + "loss": 4.6868, + "step": 147100 + }, + { + "epoch": 0.6333908686687816, + "grad_norm": 3.227511167526245, + "learning_rate": 9.613818789667601e-05, + "loss": 4.8965, + "step": 147150 + }, + { + "epoch": 0.6336060881280642, + "grad_norm": 2.132155656814575, + "learning_rate": 9.61355796459226e-05, + "loss": 4.4857, + "step": 147200 + }, + { + "epoch": 0.6338213075873468, + "grad_norm": 3.385236978530884, + "learning_rate": 9.613297055007158e-05, + "loss": 4.8068, + "step": 147250 + }, + { + "epoch": 0.6340365270466295, + "grad_norm": 0.7659268975257874, + "learning_rate": 9.613036060917078e-05, + "loss": 5.2504, + "step": 147300 + }, + { + "epoch": 0.6342517465059121, + "grad_norm": 1.347859501838684, + "learning_rate": 9.612774982326799e-05, + "loss": 4.6278, + "step": 147350 + }, + { + "epoch": 0.6344669659651947, + "grad_norm": 2.2454211711883545, + "learning_rate": 9.612513819241105e-05, + "loss": 4.7203, + "step": 147400 + }, + { + "epoch": 0.6346821854244773, + "grad_norm": 2.110975980758667, + "learning_rate": 9.612252571664777e-05, + "loss": 4.4542, + "step": 147450 + }, + { + "epoch": 0.63489740488376, + "grad_norm": 2.351548671722412, + "learning_rate": 9.611991239602605e-05, + "loss": 4.6036, + "step": 147500 + }, + { + "epoch": 0.6351126243430426, + "grad_norm": 2.097212553024292, + "learning_rate": 9.61172982305937e-05, + "loss": 4.6513, + "step": 147550 + }, + { + "epoch": 0.6353278438023252, + "grad_norm": 2.364168167114258, + "learning_rate": 9.611468322039865e-05, + "loss": 5.0948, + "step": 147600 + }, + { + "epoch": 0.6355430632616078, + "grad_norm": 1.7647356986999512, + "learning_rate": 9.611206736548882e-05, + "loss": 4.8002, + "step": 147650 + }, + { + "epoch": 0.6357582827208905, + "grad_norm": 2.5805985927581787, + "learning_rate": 9.610945066591206e-05, + "loss": 4.8846, + "step": 147700 + }, + { + "epoch": 0.6359735021801731, + "grad_norm": 2.1220624446868896, + "learning_rate": 9.610683312171635e-05, + "loss": 4.9966, + "step": 147750 + }, + { + "epoch": 0.6361887216394557, + "grad_norm": 2.0982866287231445, + "learning_rate": 9.610421473294963e-05, + "loss": 4.703, + "step": 147800 + }, + { + "epoch": 0.6364039410987384, + "grad_norm": 4.675599098205566, + "learning_rate": 9.610159549965984e-05, + "loss": 5.089, + "step": 147850 + }, + { + "epoch": 0.636619160558021, + "grad_norm": 0.6748958230018616, + "learning_rate": 9.609897542189502e-05, + "loss": 4.9279, + "step": 147900 + }, + { + "epoch": 0.6368343800173036, + "grad_norm": 3.2859373092651367, + "learning_rate": 9.609635449970307e-05, + "loss": 4.5921, + "step": 147950 + }, + { + "epoch": 0.6370495994765862, + "grad_norm": 1.7579450607299805, + "learning_rate": 9.609373273313207e-05, + "loss": 4.5856, + "step": 148000 + }, + { + "epoch": 0.637264818935869, + "grad_norm": 2.5300469398498535, + "learning_rate": 9.609111012223003e-05, + "loss": 5.1425, + "step": 148050 + }, + { + "epoch": 0.6374800383951515, + "grad_norm": 1.979454517364502, + "learning_rate": 9.608848666704498e-05, + "loss": 4.8938, + "step": 148100 + }, + { + "epoch": 0.6376952578544341, + "grad_norm": 2.009977102279663, + "learning_rate": 9.608586236762498e-05, + "loss": 4.9217, + "step": 148150 + }, + { + "epoch": 0.6379104773137168, + "grad_norm": 1.3238004446029663, + "learning_rate": 9.60832372240181e-05, + "loss": 4.5144, + "step": 148200 + }, + { + "epoch": 0.6381256967729995, + "grad_norm": 1.0945689678192139, + "learning_rate": 9.608061123627243e-05, + "loss": 4.7011, + "step": 148250 + }, + { + "epoch": 0.6383409162322821, + "grad_norm": 2.2663474082946777, + "learning_rate": 9.607798440443608e-05, + "loss": 4.7416, + "step": 148300 + }, + { + "epoch": 0.6385561356915647, + "grad_norm": 3.120506763458252, + "learning_rate": 9.607535672855714e-05, + "loss": 4.7652, + "step": 148350 + }, + { + "epoch": 0.6387713551508473, + "grad_norm": 2.0181076526641846, + "learning_rate": 9.607272820868378e-05, + "loss": 5.154, + "step": 148400 + }, + { + "epoch": 0.63898657461013, + "grad_norm": 1.2501239776611328, + "learning_rate": 9.607009884486412e-05, + "loss": 4.8528, + "step": 148450 + }, + { + "epoch": 0.6392017940694126, + "grad_norm": 1.9461414813995361, + "learning_rate": 9.606746863714633e-05, + "loss": 5.157, + "step": 148500 + }, + { + "epoch": 0.6394170135286952, + "grad_norm": 2.530801773071289, + "learning_rate": 9.606483758557859e-05, + "loss": 5.0516, + "step": 148550 + }, + { + "epoch": 0.6396322329879779, + "grad_norm": 0.6310584545135498, + "learning_rate": 9.60622056902091e-05, + "loss": 4.5213, + "step": 148600 + }, + { + "epoch": 0.6398474524472605, + "grad_norm": 2.7120797634124756, + "learning_rate": 9.605957295108608e-05, + "loss": 5.1043, + "step": 148650 + }, + { + "epoch": 0.6400626719065431, + "grad_norm": 3.454878091812134, + "learning_rate": 9.605693936825773e-05, + "loss": 4.9193, + "step": 148700 + }, + { + "epoch": 0.6402778913658257, + "grad_norm": 1.3616071939468384, + "learning_rate": 9.60543049417723e-05, + "loss": 5.0231, + "step": 148750 + }, + { + "epoch": 0.6404931108251084, + "grad_norm": 3.7922608852386475, + "learning_rate": 9.605166967167806e-05, + "loss": 4.7457, + "step": 148800 + }, + { + "epoch": 0.640708330284391, + "grad_norm": 0.44138333201408386, + "learning_rate": 9.604903355802326e-05, + "loss": 4.855, + "step": 148850 + }, + { + "epoch": 0.6409235497436736, + "grad_norm": 2.8160955905914307, + "learning_rate": 9.604639660085621e-05, + "loss": 4.965, + "step": 148900 + }, + { + "epoch": 0.6411387692029562, + "grad_norm": 2.5846593379974365, + "learning_rate": 9.60437588002252e-05, + "loss": 4.6925, + "step": 148950 + }, + { + "epoch": 0.6413539886622389, + "grad_norm": 1.4381592273712158, + "learning_rate": 9.604112015617857e-05, + "loss": 4.6988, + "step": 149000 + }, + { + "epoch": 0.6415692081215215, + "grad_norm": 1.9534692764282227, + "learning_rate": 9.603848066876462e-05, + "loss": 4.6132, + "step": 149050 + }, + { + "epoch": 0.6417844275808041, + "grad_norm": 1.6546834707260132, + "learning_rate": 9.603584033803172e-05, + "loss": 5.0401, + "step": 149100 + }, + { + "epoch": 0.6419996470400868, + "grad_norm": 2.858306884765625, + "learning_rate": 9.603319916402823e-05, + "loss": 4.9131, + "step": 149150 + }, + { + "epoch": 0.6422148664993694, + "grad_norm": 4.952290058135986, + "learning_rate": 9.603055714680252e-05, + "loss": 4.6048, + "step": 149200 + }, + { + "epoch": 0.642430085958652, + "grad_norm": 1.8035417795181274, + "learning_rate": 9.6027914286403e-05, + "loss": 4.7434, + "step": 149250 + }, + { + "epoch": 0.6426453054179346, + "grad_norm": 2.437145948410034, + "learning_rate": 9.602527058287807e-05, + "loss": 4.8991, + "step": 149300 + }, + { + "epoch": 0.6428605248772173, + "grad_norm": 2.010551929473877, + "learning_rate": 9.602262603627619e-05, + "loss": 4.8124, + "step": 149350 + }, + { + "epoch": 0.6430757443364999, + "grad_norm": 3.3924267292022705, + "learning_rate": 9.601998064664575e-05, + "loss": 4.5706, + "step": 149400 + }, + { + "epoch": 0.6432909637957825, + "grad_norm": 2.0923357009887695, + "learning_rate": 9.601733441403524e-05, + "loss": 4.9696, + "step": 149450 + }, + { + "epoch": 0.6435061832550651, + "grad_norm": 1.8503038883209229, + "learning_rate": 9.601468733849312e-05, + "loss": 4.8727, + "step": 149500 + }, + { + "epoch": 0.6437214027143479, + "grad_norm": 1.9496405124664307, + "learning_rate": 9.601203942006789e-05, + "loss": 4.5903, + "step": 149550 + }, + { + "epoch": 0.6439366221736305, + "grad_norm": 1.6320509910583496, + "learning_rate": 9.600939065880805e-05, + "loss": 5.0317, + "step": 149600 + }, + { + "epoch": 0.6441518416329131, + "grad_norm": 1.8355395793914795, + "learning_rate": 9.600674105476213e-05, + "loss": 4.7896, + "step": 149650 + }, + { + "epoch": 0.6443670610921957, + "grad_norm": 2.1129214763641357, + "learning_rate": 9.600409060797864e-05, + "loss": 4.7311, + "step": 149700 + }, + { + "epoch": 0.6445822805514784, + "grad_norm": 2.4421374797821045, + "learning_rate": 9.600143931850614e-05, + "loss": 5.1234, + "step": 149750 + }, + { + "epoch": 0.644797500010761, + "grad_norm": 2.983194351196289, + "learning_rate": 9.599878718639319e-05, + "loss": 4.5924, + "step": 149800 + }, + { + "epoch": 0.6450127194700436, + "grad_norm": 1.1555876731872559, + "learning_rate": 9.599613421168839e-05, + "loss": 4.6444, + "step": 149850 + }, + { + "epoch": 0.6452279389293263, + "grad_norm": 2.303072929382324, + "learning_rate": 9.599348039444032e-05, + "loss": 4.8662, + "step": 149900 + }, + { + "epoch": 0.6454431583886089, + "grad_norm": 1.4115328788757324, + "learning_rate": 9.599082573469759e-05, + "loss": 4.993, + "step": 149950 + }, + { + "epoch": 0.6456583778478915, + "grad_norm": 3.1833982467651367, + "learning_rate": 9.598817023250884e-05, + "loss": 4.8128, + "step": 150000 + }, + { + "epoch": 0.6456583778478915, + "eval_loss": 5.2878618240356445, + "eval_runtime": 34.9932, + "eval_samples_per_second": 18.289, + "eval_steps_per_second": 9.145, + "eval_tts_loss": 6.771099524143738, + "step": 150000 + }, + { + "epoch": 0.6458735973071741, + "grad_norm": 1.7413872480392456, + "learning_rate": 9.59855138879227e-05, + "loss": 4.922, + "step": 150050 + }, + { + "epoch": 0.6460888167664568, + "grad_norm": 2.367825508117676, + "learning_rate": 9.598285670098785e-05, + "loss": 5.0334, + "step": 150100 + }, + { + "epoch": 0.6463040362257394, + "grad_norm": 2.0339515209198, + "learning_rate": 9.598019867175294e-05, + "loss": 4.8856, + "step": 150150 + }, + { + "epoch": 0.646519255685022, + "grad_norm": 1.8738131523132324, + "learning_rate": 9.597753980026667e-05, + "loss": 4.9227, + "step": 150200 + }, + { + "epoch": 0.6467344751443046, + "grad_norm": 3.372340202331543, + "learning_rate": 9.597488008657774e-05, + "loss": 4.7616, + "step": 150250 + }, + { + "epoch": 0.6469496946035873, + "grad_norm": 0.9084614515304565, + "learning_rate": 9.597221953073485e-05, + "loss": 5.0516, + "step": 150300 + }, + { + "epoch": 0.6471649140628699, + "grad_norm": 1.631717324256897, + "learning_rate": 9.596955813278679e-05, + "loss": 4.5171, + "step": 150350 + }, + { + "epoch": 0.6473801335221525, + "grad_norm": 1.7847416400909424, + "learning_rate": 9.596689589278226e-05, + "loss": 4.0214, + "step": 150400 + }, + { + "epoch": 0.6475953529814352, + "grad_norm": 2.514068365097046, + "learning_rate": 9.596423281077004e-05, + "loss": 4.6988, + "step": 150450 + }, + { + "epoch": 0.6478105724407178, + "grad_norm": 2.048532724380493, + "learning_rate": 9.596156888679893e-05, + "loss": 4.6888, + "step": 150500 + }, + { + "epoch": 0.6480257919000004, + "grad_norm": 3.2920594215393066, + "learning_rate": 9.59589041209177e-05, + "loss": 4.9597, + "step": 150550 + }, + { + "epoch": 0.648241011359283, + "grad_norm": 0.8841613531112671, + "learning_rate": 9.595623851317518e-05, + "loss": 5.4187, + "step": 150600 + }, + { + "epoch": 0.6484562308185657, + "grad_norm": 1.9168146848678589, + "learning_rate": 9.595357206362018e-05, + "loss": 5.4502, + "step": 150650 + }, + { + "epoch": 0.6486714502778483, + "grad_norm": 2.486406087875366, + "learning_rate": 9.595090477230157e-05, + "loss": 5.1344, + "step": 150700 + }, + { + "epoch": 0.6488866697371309, + "grad_norm": 2.0182580947875977, + "learning_rate": 9.59482366392682e-05, + "loss": 4.7315, + "step": 150750 + }, + { + "epoch": 0.6491018891964135, + "grad_norm": 3.5255303382873535, + "learning_rate": 9.594556766456892e-05, + "loss": 4.7486, + "step": 150800 + }, + { + "epoch": 0.6493171086556963, + "grad_norm": 2.344407081604004, + "learning_rate": 9.594289784825264e-05, + "loss": 4.6131, + "step": 150850 + }, + { + "epoch": 0.6495323281149789, + "grad_norm": 2.4837806224823, + "learning_rate": 9.594022719036828e-05, + "loss": 4.9392, + "step": 150900 + }, + { + "epoch": 0.6497475475742615, + "grad_norm": 1.6227117776870728, + "learning_rate": 9.593755569096472e-05, + "loss": 4.5643, + "step": 150950 + }, + { + "epoch": 0.649962767033544, + "grad_norm": 2.458714008331299, + "learning_rate": 9.593488335009093e-05, + "loss": 4.7945, + "step": 151000 + }, + { + "epoch": 0.6501779864928268, + "grad_norm": 3.0209288597106934, + "learning_rate": 9.593221016779586e-05, + "loss": 5.0872, + "step": 151050 + }, + { + "epoch": 0.6503932059521094, + "grad_norm": 1.9234949350357056, + "learning_rate": 9.592953614412844e-05, + "loss": 4.6015, + "step": 151100 + }, + { + "epoch": 0.650608425411392, + "grad_norm": 1.8337647914886475, + "learning_rate": 9.592686127913769e-05, + "loss": 4.5803, + "step": 151150 + }, + { + "epoch": 0.6508236448706747, + "grad_norm": 5.5093159675598145, + "learning_rate": 9.592418557287259e-05, + "loss": 4.894, + "step": 151200 + }, + { + "epoch": 0.6510388643299573, + "grad_norm": 1.3072996139526367, + "learning_rate": 9.592150902538217e-05, + "loss": 4.7387, + "step": 151250 + }, + { + "epoch": 0.6512540837892399, + "grad_norm": 1.9800814390182495, + "learning_rate": 9.591883163671545e-05, + "loss": 4.6369, + "step": 151300 + }, + { + "epoch": 0.6514693032485225, + "grad_norm": 1.821463942527771, + "learning_rate": 9.591615340692146e-05, + "loss": 5.2422, + "step": 151350 + }, + { + "epoch": 0.6516845227078052, + "grad_norm": 2.3861186504364014, + "learning_rate": 9.591347433604927e-05, + "loss": 4.9197, + "step": 151400 + }, + { + "epoch": 0.6518997421670878, + "grad_norm": 2.400552749633789, + "learning_rate": 9.591079442414795e-05, + "loss": 5.1481, + "step": 151450 + }, + { + "epoch": 0.6521149616263704, + "grad_norm": 2.1004109382629395, + "learning_rate": 9.59081136712666e-05, + "loss": 4.4101, + "step": 151500 + }, + { + "epoch": 0.652330181085653, + "grad_norm": 1.7326966524124146, + "learning_rate": 9.590543207745431e-05, + "loss": 4.5864, + "step": 151550 + }, + { + "epoch": 0.6525454005449357, + "grad_norm": 1.12568998336792, + "learning_rate": 9.59027496427602e-05, + "loss": 4.8792, + "step": 151600 + }, + { + "epoch": 0.6527606200042183, + "grad_norm": 2.4987425804138184, + "learning_rate": 9.590006636723343e-05, + "loss": 4.5959, + "step": 151650 + }, + { + "epoch": 0.6529758394635009, + "grad_norm": 2.2339439392089844, + "learning_rate": 9.589738225092314e-05, + "loss": 4.8876, + "step": 151700 + }, + { + "epoch": 0.6531910589227835, + "grad_norm": 2.5360424518585205, + "learning_rate": 9.58946972938785e-05, + "loss": 4.4105, + "step": 151750 + }, + { + "epoch": 0.6534062783820662, + "grad_norm": 2.316676616668701, + "learning_rate": 9.589201149614866e-05, + "loss": 4.8414, + "step": 151800 + }, + { + "epoch": 0.6536214978413488, + "grad_norm": 0.666368842124939, + "learning_rate": 9.588932485778286e-05, + "loss": 4.6336, + "step": 151850 + }, + { + "epoch": 0.6538367173006314, + "grad_norm": 2.004701614379883, + "learning_rate": 9.58866373788303e-05, + "loss": 4.5729, + "step": 151900 + }, + { + "epoch": 0.6540519367599141, + "grad_norm": 1.8496509790420532, + "learning_rate": 9.58839490593402e-05, + "loss": 4.9132, + "step": 151950 + }, + { + "epoch": 0.6542671562191967, + "grad_norm": 1.9056885242462158, + "learning_rate": 9.58812598993618e-05, + "loss": 4.6818, + "step": 152000 + }, + { + "epoch": 0.6544823756784793, + "grad_norm": 2.530623435974121, + "learning_rate": 9.587856989894437e-05, + "loss": 4.8103, + "step": 152050 + }, + { + "epoch": 0.6546975951377619, + "grad_norm": 4.059035778045654, + "learning_rate": 9.587587905813718e-05, + "loss": 4.3655, + "step": 152100 + }, + { + "epoch": 0.6549128145970446, + "grad_norm": 2.4149045944213867, + "learning_rate": 9.587318737698954e-05, + "loss": 4.8829, + "step": 152150 + }, + { + "epoch": 0.6551280340563272, + "grad_norm": 4.830288887023926, + "learning_rate": 9.587049485555072e-05, + "loss": 5.0648, + "step": 152200 + }, + { + "epoch": 0.6553432535156098, + "grad_norm": 2.330284357070923, + "learning_rate": 9.586780149387007e-05, + "loss": 4.6749, + "step": 152250 + }, + { + "epoch": 0.6555584729748924, + "grad_norm": 0.6579521894454956, + "learning_rate": 9.586510729199689e-05, + "loss": 4.2646, + "step": 152300 + }, + { + "epoch": 0.6557736924341752, + "grad_norm": 1.6457276344299316, + "learning_rate": 9.586241224998058e-05, + "loss": 4.8798, + "step": 152350 + }, + { + "epoch": 0.6559889118934578, + "grad_norm": 3.0289647579193115, + "learning_rate": 9.585971636787048e-05, + "loss": 4.8427, + "step": 152400 + }, + { + "epoch": 0.6562041313527404, + "grad_norm": 1.9251019954681396, + "learning_rate": 9.585701964571598e-05, + "loss": 4.6923, + "step": 152450 + }, + { + "epoch": 0.6564193508120231, + "grad_norm": 4.530583381652832, + "learning_rate": 9.585432208356646e-05, + "loss": 4.4563, + "step": 152500 + }, + { + "epoch": 0.6566345702713057, + "grad_norm": 0.864109218120575, + "learning_rate": 9.585162368147136e-05, + "loss": 4.8429, + "step": 152550 + }, + { + "epoch": 0.6568497897305883, + "grad_norm": 2.0649099349975586, + "learning_rate": 9.584892443948008e-05, + "loss": 4.9232, + "step": 152600 + }, + { + "epoch": 0.6570650091898709, + "grad_norm": 2.058701753616333, + "learning_rate": 9.584622435764207e-05, + "loss": 4.7702, + "step": 152650 + }, + { + "epoch": 0.6572802286491536, + "grad_norm": 0.6295965909957886, + "learning_rate": 9.584352343600683e-05, + "loss": 4.8046, + "step": 152700 + }, + { + "epoch": 0.6574954481084362, + "grad_norm": 2.5566980838775635, + "learning_rate": 9.584082167462378e-05, + "loss": 4.9468, + "step": 152750 + }, + { + "epoch": 0.6577106675677188, + "grad_norm": 1.6969618797302246, + "learning_rate": 9.583811907354245e-05, + "loss": 4.6525, + "step": 152800 + }, + { + "epoch": 0.6579258870270014, + "grad_norm": 1.7222943305969238, + "learning_rate": 9.583541563281231e-05, + "loss": 4.6468, + "step": 152850 + }, + { + "epoch": 0.6581411064862841, + "grad_norm": 1.8445067405700684, + "learning_rate": 9.583271135248291e-05, + "loss": 4.5376, + "step": 152900 + }, + { + "epoch": 0.6583563259455667, + "grad_norm": 3.560070037841797, + "learning_rate": 9.583000623260377e-05, + "loss": 4.5368, + "step": 152950 + }, + { + "epoch": 0.6585715454048493, + "grad_norm": 2.3003711700439453, + "learning_rate": 9.582730027322445e-05, + "loss": 4.9739, + "step": 153000 + }, + { + "epoch": 0.6585715454048493, + "eval_loss": 5.272153854370117, + "eval_runtime": 35.1218, + "eval_samples_per_second": 18.222, + "eval_steps_per_second": 9.111, + "eval_tts_loss": 6.694865113997357, + "step": 153000 + }, + { + "epoch": 0.6587867648641319, + "grad_norm": 2.2196459770202637, + "learning_rate": 9.582459347439452e-05, + "loss": 4.5081, + "step": 153050 + }, + { + "epoch": 0.6590019843234146, + "grad_norm": 2.8521106243133545, + "learning_rate": 9.582188583616354e-05, + "loss": 4.9548, + "step": 153100 + }, + { + "epoch": 0.6592172037826972, + "grad_norm": 2.583758592605591, + "learning_rate": 9.581917735858114e-05, + "loss": 4.754, + "step": 153150 + }, + { + "epoch": 0.6594324232419798, + "grad_norm": 2.706932306289673, + "learning_rate": 9.58164680416969e-05, + "loss": 4.5148, + "step": 153200 + }, + { + "epoch": 0.6596476427012625, + "grad_norm": 2.3889999389648438, + "learning_rate": 9.581375788556049e-05, + "loss": 5.0297, + "step": 153250 + }, + { + "epoch": 0.6598628621605451, + "grad_norm": 2.3039729595184326, + "learning_rate": 9.58110468902215e-05, + "loss": 4.9612, + "step": 153300 + }, + { + "epoch": 0.6600780816198277, + "grad_norm": 3.2756195068359375, + "learning_rate": 9.580833505572964e-05, + "loss": 4.8939, + "step": 153350 + }, + { + "epoch": 0.6602933010791103, + "grad_norm": 2.502011775970459, + "learning_rate": 9.580562238213455e-05, + "loss": 4.5933, + "step": 153400 + }, + { + "epoch": 0.660508520538393, + "grad_norm": 1.9105238914489746, + "learning_rate": 9.580290886948593e-05, + "loss": 4.8757, + "step": 153450 + }, + { + "epoch": 0.6607237399976756, + "grad_norm": 2.917357921600342, + "learning_rate": 9.58001945178335e-05, + "loss": 4.9255, + "step": 153500 + }, + { + "epoch": 0.6609389594569582, + "grad_norm": 2.551344394683838, + "learning_rate": 9.579747932722696e-05, + "loss": 4.8493, + "step": 153550 + }, + { + "epoch": 0.6611541789162408, + "grad_norm": 3.0208661556243896, + "learning_rate": 9.579476329771606e-05, + "loss": 5.0175, + "step": 153600 + }, + { + "epoch": 0.6613693983755236, + "grad_norm": 2.5403621196746826, + "learning_rate": 9.579204642935055e-05, + "loss": 4.6444, + "step": 153650 + }, + { + "epoch": 0.6615846178348062, + "grad_norm": 2.2425408363342285, + "learning_rate": 9.578932872218019e-05, + "loss": 4.8393, + "step": 153700 + }, + { + "epoch": 0.6617998372940888, + "grad_norm": 3.1248345375061035, + "learning_rate": 9.578661017625476e-05, + "loss": 4.6539, + "step": 153750 + }, + { + "epoch": 0.6620150567533715, + "grad_norm": 2.1445491313934326, + "learning_rate": 9.578389079162406e-05, + "loss": 5.1382, + "step": 153800 + }, + { + "epoch": 0.6622302762126541, + "grad_norm": 1.667829990386963, + "learning_rate": 9.578117056833792e-05, + "loss": 5.0801, + "step": 153850 + }, + { + "epoch": 0.6624454956719367, + "grad_norm": 2.3175783157348633, + "learning_rate": 9.577844950644615e-05, + "loss": 4.8094, + "step": 153900 + }, + { + "epoch": 0.6626607151312193, + "grad_norm": 2.3628766536712646, + "learning_rate": 9.577572760599857e-05, + "loss": 4.9215, + "step": 153950 + }, + { + "epoch": 0.662875934590502, + "grad_norm": 1.7879496812820435, + "learning_rate": 9.57730048670451e-05, + "loss": 4.5267, + "step": 154000 + }, + { + "epoch": 0.6630911540497846, + "grad_norm": 2.8337862491607666, + "learning_rate": 9.577028128963557e-05, + "loss": 4.6611, + "step": 154050 + }, + { + "epoch": 0.6633063735090672, + "grad_norm": 3.708286762237549, + "learning_rate": 9.576755687381989e-05, + "loss": 4.3863, + "step": 154100 + }, + { + "epoch": 0.6635215929683498, + "grad_norm": 2.160456657409668, + "learning_rate": 9.576483161964793e-05, + "loss": 4.8265, + "step": 154150 + }, + { + "epoch": 0.6637368124276325, + "grad_norm": 1.9885178804397583, + "learning_rate": 9.576210552716965e-05, + "loss": 4.6774, + "step": 154200 + }, + { + "epoch": 0.6639520318869151, + "grad_norm": 2.5419833660125732, + "learning_rate": 9.575937859643498e-05, + "loss": 5.2916, + "step": 154250 + }, + { + "epoch": 0.6641672513461977, + "grad_norm": 1.4203705787658691, + "learning_rate": 9.575665082749385e-05, + "loss": 4.6066, + "step": 154300 + }, + { + "epoch": 0.6643824708054803, + "grad_norm": 2.913315534591675, + "learning_rate": 9.575392222039623e-05, + "loss": 4.6251, + "step": 154350 + }, + { + "epoch": 0.664597690264763, + "grad_norm": 0.5716208815574646, + "learning_rate": 9.575119277519213e-05, + "loss": 4.4496, + "step": 154400 + }, + { + "epoch": 0.6648129097240456, + "grad_norm": 1.3070112466812134, + "learning_rate": 9.574846249193152e-05, + "loss": 4.5559, + "step": 154450 + }, + { + "epoch": 0.6650281291833282, + "grad_norm": 2.82600998878479, + "learning_rate": 9.574573137066443e-05, + "loss": 4.9279, + "step": 154500 + }, + { + "epoch": 0.6652433486426109, + "grad_norm": 1.2746036052703857, + "learning_rate": 9.574299941144086e-05, + "loss": 5.0061, + "step": 154550 + }, + { + "epoch": 0.6654585681018935, + "grad_norm": 1.4085019826889038, + "learning_rate": 9.574026661431088e-05, + "loss": 4.8083, + "step": 154600 + }, + { + "epoch": 0.6656737875611761, + "grad_norm": 2.000643253326416, + "learning_rate": 9.573753297932456e-05, + "loss": 4.8887, + "step": 154650 + }, + { + "epoch": 0.6658890070204587, + "grad_norm": 4.668451309204102, + "learning_rate": 9.573479850653192e-05, + "loss": 4.6735, + "step": 154700 + }, + { + "epoch": 0.6661042264797414, + "grad_norm": 1.3504831790924072, + "learning_rate": 9.573206319598308e-05, + "loss": 4.4908, + "step": 154750 + }, + { + "epoch": 0.666319445939024, + "grad_norm": 3.0221409797668457, + "learning_rate": 9.572932704772818e-05, + "loss": 5.1797, + "step": 154800 + }, + { + "epoch": 0.6665346653983066, + "grad_norm": 2.146824836730957, + "learning_rate": 9.57265900618173e-05, + "loss": 4.7862, + "step": 154850 + }, + { + "epoch": 0.6667498848575892, + "grad_norm": 1.6081867218017578, + "learning_rate": 9.572385223830057e-05, + "loss": 4.9362, + "step": 154900 + }, + { + "epoch": 0.666965104316872, + "grad_norm": 0.6270937919616699, + "learning_rate": 9.572111357722815e-05, + "loss": 4.6332, + "step": 154950 + }, + { + "epoch": 0.6671803237761545, + "grad_norm": 2.637610912322998, + "learning_rate": 9.571837407865022e-05, + "loss": 5.0299, + "step": 155000 + }, + { + "epoch": 0.6673955432354371, + "grad_norm": 3.1862165927886963, + "learning_rate": 9.571563374261697e-05, + "loss": 4.9406, + "step": 155050 + }, + { + "epoch": 0.6676107626947198, + "grad_norm": 2.8708019256591797, + "learning_rate": 9.571289256917854e-05, + "loss": 4.787, + "step": 155100 + }, + { + "epoch": 0.6678259821540025, + "grad_norm": 1.7833313941955566, + "learning_rate": 9.57101505583852e-05, + "loss": 5.0834, + "step": 155150 + }, + { + "epoch": 0.6680412016132851, + "grad_norm": 2.009474515914917, + "learning_rate": 9.570740771028715e-05, + "loss": 4.3708, + "step": 155200 + }, + { + "epoch": 0.6682564210725677, + "grad_norm": 2.4546263217926025, + "learning_rate": 9.570466402493464e-05, + "loss": 4.5493, + "step": 155250 + }, + { + "epoch": 0.6684716405318504, + "grad_norm": 2.4150097370147705, + "learning_rate": 9.570191950237792e-05, + "loss": 4.6964, + "step": 155300 + }, + { + "epoch": 0.668686859991133, + "grad_norm": 0.6917418837547302, + "learning_rate": 9.569917414266729e-05, + "loss": 4.8652, + "step": 155350 + }, + { + "epoch": 0.6689020794504156, + "grad_norm": 3.48405122756958, + "learning_rate": 9.5696427945853e-05, + "loss": 4.7592, + "step": 155400 + }, + { + "epoch": 0.6691172989096982, + "grad_norm": 2.649857521057129, + "learning_rate": 9.56936809119854e-05, + "loss": 4.6405, + "step": 155450 + }, + { + "epoch": 0.6693325183689809, + "grad_norm": 1.8692641258239746, + "learning_rate": 9.569093304111476e-05, + "loss": 4.879, + "step": 155500 + }, + { + "epoch": 0.6695477378282635, + "grad_norm": 1.9653756618499756, + "learning_rate": 9.568818433329144e-05, + "loss": 4.9864, + "step": 155550 + }, + { + "epoch": 0.6697629572875461, + "grad_norm": 1.6265102624893188, + "learning_rate": 9.56854347885658e-05, + "loss": 4.7988, + "step": 155600 + }, + { + "epoch": 0.6699781767468287, + "grad_norm": 2.3346219062805176, + "learning_rate": 9.568268440698818e-05, + "loss": 4.6711, + "step": 155650 + }, + { + "epoch": 0.6701933962061114, + "grad_norm": 2.8704001903533936, + "learning_rate": 9.567993318860898e-05, + "loss": 4.6221, + "step": 155700 + }, + { + "epoch": 0.670408615665394, + "grad_norm": 2.664832830429077, + "learning_rate": 9.567718113347858e-05, + "loss": 4.7157, + "step": 155750 + }, + { + "epoch": 0.6706238351246766, + "grad_norm": 2.540748119354248, + "learning_rate": 9.56744282416474e-05, + "loss": 4.9498, + "step": 155800 + }, + { + "epoch": 0.6708390545839593, + "grad_norm": 2.1693196296691895, + "learning_rate": 9.567167451316587e-05, + "loss": 4.9497, + "step": 155850 + }, + { + "epoch": 0.6710542740432419, + "grad_norm": 0.8333892226219177, + "learning_rate": 9.566891994808445e-05, + "loss": 4.7667, + "step": 155900 + }, + { + "epoch": 0.6712694935025245, + "grad_norm": 3.8814573287963867, + "learning_rate": 9.566616454645355e-05, + "loss": 4.8331, + "step": 155950 + }, + { + "epoch": 0.6714847129618071, + "grad_norm": 0.8848453760147095, + "learning_rate": 9.566340830832367e-05, + "loss": 4.7662, + "step": 156000 + }, + { + "epoch": 0.6714847129618071, + "eval_loss": 5.278883457183838, + "eval_runtime": 34.9809, + "eval_samples_per_second": 18.296, + "eval_steps_per_second": 9.148, + "eval_tts_loss": 6.7161242666201, + "step": 156000 + }, + { + "epoch": 0.6716999324210898, + "grad_norm": 2.2339110374450684, + "learning_rate": 9.566065123374532e-05, + "loss": 4.7874, + "step": 156050 + }, + { + "epoch": 0.6719151518803724, + "grad_norm": 2.6937265396118164, + "learning_rate": 9.565789332276896e-05, + "loss": 4.7203, + "step": 156100 + }, + { + "epoch": 0.672130371339655, + "grad_norm": 2.039909601211548, + "learning_rate": 9.565513457544514e-05, + "loss": 4.4855, + "step": 156150 + }, + { + "epoch": 0.6723455907989376, + "grad_norm": 4.409247875213623, + "learning_rate": 9.565237499182437e-05, + "loss": 4.939, + "step": 156200 + }, + { + "epoch": 0.6725608102582203, + "grad_norm": 3.3766236305236816, + "learning_rate": 9.564961457195723e-05, + "loss": 5.1472, + "step": 156250 + }, + { + "epoch": 0.6727760297175029, + "grad_norm": 2.638456106185913, + "learning_rate": 9.564685331589425e-05, + "loss": 4.7656, + "step": 156300 + }, + { + "epoch": 0.6729912491767855, + "grad_norm": 2.9064435958862305, + "learning_rate": 9.564409122368603e-05, + "loss": 5.0273, + "step": 156350 + }, + { + "epoch": 0.6732064686360681, + "grad_norm": 1.0630778074264526, + "learning_rate": 9.564132829538318e-05, + "loss": 4.4001, + "step": 156400 + }, + { + "epoch": 0.6734216880953509, + "grad_norm": 3.940795660018921, + "learning_rate": 9.563856453103627e-05, + "loss": 4.6476, + "step": 156450 + }, + { + "epoch": 0.6736369075546335, + "grad_norm": 1.6362303495407104, + "learning_rate": 9.563579993069596e-05, + "loss": 4.8702, + "step": 156500 + }, + { + "epoch": 0.6738521270139161, + "grad_norm": 1.6776889562606812, + "learning_rate": 9.563303449441287e-05, + "loss": 4.8697, + "step": 156550 + }, + { + "epoch": 0.6740673464731988, + "grad_norm": 2.7823495864868164, + "learning_rate": 9.563026822223766e-05, + "loss": 4.6157, + "step": 156600 + }, + { + "epoch": 0.6742825659324814, + "grad_norm": 1.7275969982147217, + "learning_rate": 9.562750111422101e-05, + "loss": 4.7095, + "step": 156650 + }, + { + "epoch": 0.674497785391764, + "grad_norm": 2.3900394439697266, + "learning_rate": 9.562473317041363e-05, + "loss": 4.7376, + "step": 156700 + }, + { + "epoch": 0.6747130048510466, + "grad_norm": 0.6373631954193115, + "learning_rate": 9.562196439086617e-05, + "loss": 4.7814, + "step": 156750 + }, + { + "epoch": 0.6749282243103293, + "grad_norm": 1.034432053565979, + "learning_rate": 9.561919477562938e-05, + "loss": 4.9494, + "step": 156800 + }, + { + "epoch": 0.6751434437696119, + "grad_norm": 3.057790994644165, + "learning_rate": 9.561642432475399e-05, + "loss": 4.2604, + "step": 156850 + }, + { + "epoch": 0.6753586632288945, + "grad_norm": 4.676861763000488, + "learning_rate": 9.561365303829074e-05, + "loss": 4.8963, + "step": 156900 + }, + { + "epoch": 0.6755738826881771, + "grad_norm": 0.48604294657707214, + "learning_rate": 9.561088091629041e-05, + "loss": 4.9133, + "step": 156950 + }, + { + "epoch": 0.6757891021474598, + "grad_norm": 5.246880531311035, + "learning_rate": 9.560810795880375e-05, + "loss": 4.4028, + "step": 157000 + }, + { + "epoch": 0.6760043216067424, + "grad_norm": 2.468273639678955, + "learning_rate": 9.560533416588159e-05, + "loss": 4.9799, + "step": 157050 + }, + { + "epoch": 0.676219541066025, + "grad_norm": 3.471484899520874, + "learning_rate": 9.56025595375747e-05, + "loss": 4.7573, + "step": 157100 + }, + { + "epoch": 0.6764347605253077, + "grad_norm": 2.8362720012664795, + "learning_rate": 9.559978407393396e-05, + "loss": 4.8944, + "step": 157150 + }, + { + "epoch": 0.6766499799845903, + "grad_norm": 2.390781879425049, + "learning_rate": 9.559700777501015e-05, + "loss": 4.2222, + "step": 157200 + }, + { + "epoch": 0.6768651994438729, + "grad_norm": 2.7690231800079346, + "learning_rate": 9.559423064085416e-05, + "loss": 5.0108, + "step": 157250 + }, + { + "epoch": 0.6770804189031555, + "grad_norm": 1.9207054376602173, + "learning_rate": 9.559145267151686e-05, + "loss": 4.5075, + "step": 157300 + }, + { + "epoch": 0.6772956383624382, + "grad_norm": 2.0313377380371094, + "learning_rate": 9.558867386704912e-05, + "loss": 5.2782, + "step": 157350 + }, + { + "epoch": 0.6775108578217208, + "grad_norm": 3.1666529178619385, + "learning_rate": 9.558589422750185e-05, + "loss": 4.5551, + "step": 157400 + }, + { + "epoch": 0.6777260772810034, + "grad_norm": 3.358729124069214, + "learning_rate": 9.558311375292597e-05, + "loss": 4.3422, + "step": 157450 + }, + { + "epoch": 0.677941296740286, + "grad_norm": 0.9722416400909424, + "learning_rate": 9.558033244337241e-05, + "loss": 4.7758, + "step": 157500 + }, + { + "epoch": 0.6781565161995687, + "grad_norm": 1.403948187828064, + "learning_rate": 9.55775502988921e-05, + "loss": 5.1122, + "step": 157550 + }, + { + "epoch": 0.6783717356588513, + "grad_norm": 1.938759446144104, + "learning_rate": 9.557476731953603e-05, + "loss": 5.0244, + "step": 157600 + }, + { + "epoch": 0.6785869551181339, + "grad_norm": 0.845973789691925, + "learning_rate": 9.557198350535517e-05, + "loss": 4.9599, + "step": 157650 + }, + { + "epoch": 0.6788021745774165, + "grad_norm": 2.838351249694824, + "learning_rate": 9.556919885640051e-05, + "loss": 4.6652, + "step": 157700 + }, + { + "epoch": 0.6790173940366993, + "grad_norm": 4.650486469268799, + "learning_rate": 9.556641337272305e-05, + "loss": 4.955, + "step": 157750 + }, + { + "epoch": 0.6792326134959819, + "grad_norm": 2.6118009090423584, + "learning_rate": 9.556362705437383e-05, + "loss": 4.3873, + "step": 157800 + }, + { + "epoch": 0.6794478329552645, + "grad_norm": 1.6742748022079468, + "learning_rate": 9.556083990140387e-05, + "loss": 4.8117, + "step": 157850 + }, + { + "epoch": 0.6796630524145472, + "grad_norm": 2.0197079181671143, + "learning_rate": 9.555805191386423e-05, + "loss": 4.7861, + "step": 157900 + }, + { + "epoch": 0.6798782718738298, + "grad_norm": 1.7193886041641235, + "learning_rate": 9.5555263091806e-05, + "loss": 4.823, + "step": 157950 + }, + { + "epoch": 0.6800934913331124, + "grad_norm": 2.7835936546325684, + "learning_rate": 9.555247343528025e-05, + "loss": 5.3709, + "step": 158000 + }, + { + "epoch": 0.680308710792395, + "grad_norm": 1.0675398111343384, + "learning_rate": 9.554968294433806e-05, + "loss": 5.0095, + "step": 158050 + }, + { + "epoch": 0.6805239302516777, + "grad_norm": 1.0224539041519165, + "learning_rate": 9.554689161903057e-05, + "loss": 4.8699, + "step": 158100 + }, + { + "epoch": 0.6807391497109603, + "grad_norm": 4.396988391876221, + "learning_rate": 9.55440994594089e-05, + "loss": 4.6961, + "step": 158150 + }, + { + "epoch": 0.6809543691702429, + "grad_norm": 3.2974281311035156, + "learning_rate": 9.554130646552422e-05, + "loss": 4.72, + "step": 158200 + }, + { + "epoch": 0.6811695886295255, + "grad_norm": 1.8034601211547852, + "learning_rate": 9.553851263742766e-05, + "loss": 4.8696, + "step": 158250 + }, + { + "epoch": 0.6813848080888082, + "grad_norm": 1.6334964036941528, + "learning_rate": 9.55357179751704e-05, + "loss": 5.0263, + "step": 158300 + }, + { + "epoch": 0.6816000275480908, + "grad_norm": 2.213195323944092, + "learning_rate": 9.553292247880364e-05, + "loss": 4.792, + "step": 158350 + }, + { + "epoch": 0.6818152470073734, + "grad_norm": 2.67893385887146, + "learning_rate": 9.55301261483786e-05, + "loss": 4.8719, + "step": 158400 + }, + { + "epoch": 0.6820304664666561, + "grad_norm": 3.172650098800659, + "learning_rate": 9.552732898394647e-05, + "loss": 4.6673, + "step": 158450 + }, + { + "epoch": 0.6822456859259387, + "grad_norm": 3.065744400024414, + "learning_rate": 9.552453098555853e-05, + "loss": 4.9899, + "step": 158500 + }, + { + "epoch": 0.6824609053852213, + "grad_norm": 0.5553640127182007, + "learning_rate": 9.5521732153266e-05, + "loss": 4.8354, + "step": 158550 + }, + { + "epoch": 0.6826761248445039, + "grad_norm": 2.641530990600586, + "learning_rate": 9.551893248712016e-05, + "loss": 5.1297, + "step": 158600 + }, + { + "epoch": 0.6828913443037866, + "grad_norm": 1.9952492713928223, + "learning_rate": 9.551613198717229e-05, + "loss": 5.0197, + "step": 158650 + }, + { + "epoch": 0.6831065637630692, + "grad_norm": 5.184808731079102, + "learning_rate": 9.551333065347368e-05, + "loss": 4.94, + "step": 158700 + }, + { + "epoch": 0.6833217832223518, + "grad_norm": 2.9462733268737793, + "learning_rate": 9.551052848607567e-05, + "loss": 5.0009, + "step": 158750 + }, + { + "epoch": 0.6835370026816344, + "grad_norm": 2.3588979244232178, + "learning_rate": 9.550772548502957e-05, + "loss": 4.8058, + "step": 158800 + }, + { + "epoch": 0.6837522221409171, + "grad_norm": 2.082214117050171, + "learning_rate": 9.550492165038672e-05, + "loss": 4.8264, + "step": 158850 + }, + { + "epoch": 0.6839674416001997, + "grad_norm": 2.406637191772461, + "learning_rate": 9.55021169821985e-05, + "loss": 4.6784, + "step": 158900 + }, + { + "epoch": 0.6841826610594823, + "grad_norm": 2.0293004512786865, + "learning_rate": 9.549931148051627e-05, + "loss": 4.465, + "step": 158950 + }, + { + "epoch": 0.6843978805187649, + "grad_norm": 4.027707576751709, + "learning_rate": 9.549650514539141e-05, + "loss": 5.019, + "step": 159000 + }, + { + "epoch": 0.6843978805187649, + "eval_loss": 5.260510444641113, + "eval_runtime": 35.079, + "eval_samples_per_second": 18.245, + "eval_steps_per_second": 9.122, + "eval_tts_loss": 6.714355394688211, + "step": 159000 + }, + { + "epoch": 0.6846130999780476, + "grad_norm": 1.51261568069458, + "learning_rate": 9.549369797687535e-05, + "loss": 4.9821, + "step": 159050 + }, + { + "epoch": 0.6848283194373302, + "grad_norm": 1.9397823810577393, + "learning_rate": 9.54908899750195e-05, + "loss": 4.6168, + "step": 159100 + }, + { + "epoch": 0.6850435388966128, + "grad_norm": 3.673186779022217, + "learning_rate": 9.54880811398753e-05, + "loss": 5.0454, + "step": 159150 + }, + { + "epoch": 0.6852587583558956, + "grad_norm": 1.8566477298736572, + "learning_rate": 9.548527147149418e-05, + "loss": 4.7255, + "step": 159200 + }, + { + "epoch": 0.6854739778151782, + "grad_norm": 1.8592307567596436, + "learning_rate": 9.548246096992764e-05, + "loss": 4.6219, + "step": 159250 + }, + { + "epoch": 0.6856891972744608, + "grad_norm": 1.697898507118225, + "learning_rate": 9.547964963522713e-05, + "loss": 4.9162, + "step": 159300 + }, + { + "epoch": 0.6859044167337434, + "grad_norm": 6.261716842651367, + "learning_rate": 9.547683746744418e-05, + "loss": 4.6507, + "step": 159350 + }, + { + "epoch": 0.6861196361930261, + "grad_norm": 4.1410393714904785, + "learning_rate": 9.547402446663027e-05, + "loss": 5.019, + "step": 159400 + }, + { + "epoch": 0.6863348556523087, + "grad_norm": 2.162022113800049, + "learning_rate": 9.547121063283696e-05, + "loss": 4.9875, + "step": 159450 + }, + { + "epoch": 0.6865500751115913, + "grad_norm": 2.90623140335083, + "learning_rate": 9.546839596611576e-05, + "loss": 5.1534, + "step": 159500 + }, + { + "epoch": 0.6867652945708739, + "grad_norm": 2.275810718536377, + "learning_rate": 9.546558046651826e-05, + "loss": 5.1112, + "step": 159550 + }, + { + "epoch": 0.6869805140301566, + "grad_norm": 3.74157977104187, + "learning_rate": 9.546276413409601e-05, + "loss": 4.9619, + "step": 159600 + }, + { + "epoch": 0.6871957334894392, + "grad_norm": 0.9785647988319397, + "learning_rate": 9.54599469689006e-05, + "loss": 4.385, + "step": 159650 + }, + { + "epoch": 0.6874109529487218, + "grad_norm": 1.9849896430969238, + "learning_rate": 9.545712897098365e-05, + "loss": 5.1754, + "step": 159700 + }, + { + "epoch": 0.6876261724080044, + "grad_norm": 4.778702735900879, + "learning_rate": 9.545431014039676e-05, + "loss": 4.3714, + "step": 159750 + }, + { + "epoch": 0.6878413918672871, + "grad_norm": 2.2906696796417236, + "learning_rate": 9.545149047719157e-05, + "loss": 4.6408, + "step": 159800 + }, + { + "epoch": 0.6880566113265697, + "grad_norm": 3.158996820449829, + "learning_rate": 9.544866998141975e-05, + "loss": 4.8877, + "step": 159850 + }, + { + "epoch": 0.6882718307858523, + "grad_norm": 2.9619832038879395, + "learning_rate": 9.544584865313295e-05, + "loss": 5.0654, + "step": 159900 + }, + { + "epoch": 0.688487050245135, + "grad_norm": 2.9292378425598145, + "learning_rate": 9.544302649238283e-05, + "loss": 4.8493, + "step": 159950 + }, + { + "epoch": 0.6887022697044176, + "grad_norm": 1.5913935899734497, + "learning_rate": 9.544020349922112e-05, + "loss": 4.8194, + "step": 160000 + }, + { + "epoch": 0.6889174891637002, + "grad_norm": 2.433671712875366, + "learning_rate": 9.543737967369952e-05, + "loss": 4.7018, + "step": 160050 + }, + { + "epoch": 0.6891327086229828, + "grad_norm": 2.2870354652404785, + "learning_rate": 9.543455501586972e-05, + "loss": 4.9895, + "step": 160100 + }, + { + "epoch": 0.6893479280822655, + "grad_norm": 3.609937906265259, + "learning_rate": 9.543172952578352e-05, + "loss": 4.5899, + "step": 160150 + }, + { + "epoch": 0.6895631475415481, + "grad_norm": 2.1792562007904053, + "learning_rate": 9.542890320349265e-05, + "loss": 4.8772, + "step": 160200 + }, + { + "epoch": 0.6897783670008307, + "grad_norm": 0.5431050062179565, + "learning_rate": 9.542607604904887e-05, + "loss": 5.2018, + "step": 160250 + }, + { + "epoch": 0.6899935864601133, + "grad_norm": 2.969134569168091, + "learning_rate": 9.542324806250398e-05, + "loss": 4.5582, + "step": 160300 + }, + { + "epoch": 0.690208805919396, + "grad_norm": 3.0785508155822754, + "learning_rate": 9.542041924390979e-05, + "loss": 4.7654, + "step": 160350 + }, + { + "epoch": 0.6904240253786786, + "grad_norm": 2.483086109161377, + "learning_rate": 9.541758959331811e-05, + "loss": 4.3514, + "step": 160400 + }, + { + "epoch": 0.6906392448379612, + "grad_norm": 2.8327715396881104, + "learning_rate": 9.541475911078075e-05, + "loss": 4.7034, + "step": 160450 + }, + { + "epoch": 0.690854464297244, + "grad_norm": 0.9109283089637756, + "learning_rate": 9.541192779634958e-05, + "loss": 4.7518, + "step": 160500 + }, + { + "epoch": 0.6910696837565266, + "grad_norm": 2.4828672409057617, + "learning_rate": 9.540909565007648e-05, + "loss": 4.8303, + "step": 160550 + }, + { + "epoch": 0.6912849032158092, + "grad_norm": 4.317615985870361, + "learning_rate": 9.540626267201328e-05, + "loss": 4.8139, + "step": 160600 + }, + { + "epoch": 0.6915001226750918, + "grad_norm": 2.0684759616851807, + "learning_rate": 9.540342886221192e-05, + "loss": 4.5373, + "step": 160650 + }, + { + "epoch": 0.6917153421343745, + "grad_norm": 2.9401750564575195, + "learning_rate": 9.540059422072429e-05, + "loss": 4.7932, + "step": 160700 + }, + { + "epoch": 0.6919305615936571, + "grad_norm": 1.6128252744674683, + "learning_rate": 9.539775874760233e-05, + "loss": 4.6996, + "step": 160750 + }, + { + "epoch": 0.6921457810529397, + "grad_norm": 1.8453497886657715, + "learning_rate": 9.539492244289793e-05, + "loss": 4.6413, + "step": 160800 + }, + { + "epoch": 0.6923610005122223, + "grad_norm": 5.51900577545166, + "learning_rate": 9.539208530666313e-05, + "loss": 4.6811, + "step": 160850 + }, + { + "epoch": 0.692576219971505, + "grad_norm": 2.461278200149536, + "learning_rate": 9.538924733894981e-05, + "loss": 4.7491, + "step": 160900 + }, + { + "epoch": 0.6927914394307876, + "grad_norm": 0.741333544254303, + "learning_rate": 9.538640853981e-05, + "loss": 4.815, + "step": 160950 + }, + { + "epoch": 0.6930066588900702, + "grad_norm": 2.993150234222412, + "learning_rate": 9.538356890929572e-05, + "loss": 4.7551, + "step": 161000 + }, + { + "epoch": 0.6932218783493528, + "grad_norm": 2.4920217990875244, + "learning_rate": 9.538072844745893e-05, + "loss": 4.7613, + "step": 161050 + }, + { + "epoch": 0.6934370978086355, + "grad_norm": 2.326223373413086, + "learning_rate": 9.537788715435171e-05, + "loss": 4.3174, + "step": 161100 + }, + { + "epoch": 0.6936523172679181, + "grad_norm": 2.928088903427124, + "learning_rate": 9.537504503002608e-05, + "loss": 4.9496, + "step": 161150 + }, + { + "epoch": 0.6938675367272007, + "grad_norm": 2.002707004547119, + "learning_rate": 9.537220207453412e-05, + "loss": 5.3118, + "step": 161200 + }, + { + "epoch": 0.6940827561864834, + "grad_norm": 2.829404354095459, + "learning_rate": 9.536935828792788e-05, + "loss": 4.9233, + "step": 161250 + }, + { + "epoch": 0.694297975645766, + "grad_norm": 2.2821083068847656, + "learning_rate": 9.536651367025947e-05, + "loss": 4.5413, + "step": 161300 + }, + { + "epoch": 0.6945131951050486, + "grad_norm": 1.7957453727722168, + "learning_rate": 9.536366822158098e-05, + "loss": 4.8888, + "step": 161350 + }, + { + "epoch": 0.6947284145643312, + "grad_norm": 4.236700534820557, + "learning_rate": 9.536082194194457e-05, + "loss": 4.84, + "step": 161400 + }, + { + "epoch": 0.6949436340236139, + "grad_norm": 5.083244323730469, + "learning_rate": 9.535797483140234e-05, + "loss": 4.679, + "step": 161450 + }, + { + "epoch": 0.6951588534828965, + "grad_norm": 3.4878830909729004, + "learning_rate": 9.535512689000645e-05, + "loss": 4.3704, + "step": 161500 + }, + { + "epoch": 0.6953740729421791, + "grad_norm": 2.413973569869995, + "learning_rate": 9.535227811780909e-05, + "loss": 4.8061, + "step": 161550 + }, + { + "epoch": 0.6955892924014617, + "grad_norm": 2.4817192554473877, + "learning_rate": 9.534942851486241e-05, + "loss": 4.7359, + "step": 161600 + }, + { + "epoch": 0.6958045118607444, + "grad_norm": 2.592068910598755, + "learning_rate": 9.534657808121863e-05, + "loss": 5.2111, + "step": 161650 + }, + { + "epoch": 0.696019731320027, + "grad_norm": 0.7227672934532166, + "learning_rate": 9.534372681692996e-05, + "loss": 4.4912, + "step": 161700 + }, + { + "epoch": 0.6962349507793096, + "grad_norm": 4.775062084197998, + "learning_rate": 9.534087472204862e-05, + "loss": 4.8446, + "step": 161750 + }, + { + "epoch": 0.6964501702385923, + "grad_norm": 2.261852502822876, + "learning_rate": 9.533802179662687e-05, + "loss": 5.0083, + "step": 161800 + }, + { + "epoch": 0.696665389697875, + "grad_norm": 2.7314951419830322, + "learning_rate": 9.533516804071694e-05, + "loss": 5.2758, + "step": 161850 + }, + { + "epoch": 0.6968806091571575, + "grad_norm": 4.601281642913818, + "learning_rate": 9.533231345437114e-05, + "loss": 4.5602, + "step": 161900 + }, + { + "epoch": 0.6970958286164401, + "grad_norm": 7.151758670806885, + "learning_rate": 9.532945803764175e-05, + "loss": 4.7032, + "step": 161950 + }, + { + "epoch": 0.6973110480757229, + "grad_norm": 1.8549736738204956, + "learning_rate": 9.532660179058103e-05, + "loss": 5.4468, + "step": 162000 + }, + { + "epoch": 0.6973110480757229, + "eval_loss": 5.2578864097595215, + "eval_runtime": 35.1433, + "eval_samples_per_second": 18.211, + "eval_steps_per_second": 9.106, + "eval_tts_loss": 6.683059308904011, + "step": 162000 + }, + { + "epoch": 0.6975262675350055, + "grad_norm": 1.7714359760284424, + "learning_rate": 9.532374471324137e-05, + "loss": 4.8299, + "step": 162050 + }, + { + "epoch": 0.6977414869942881, + "grad_norm": 3.2684900760650635, + "learning_rate": 9.532088680567505e-05, + "loss": 4.4895, + "step": 162100 + }, + { + "epoch": 0.6979567064535707, + "grad_norm": 2.858443260192871, + "learning_rate": 9.531802806793446e-05, + "loss": 4.8858, + "step": 162150 + }, + { + "epoch": 0.6981719259128534, + "grad_norm": 0.5888779163360596, + "learning_rate": 9.531516850007196e-05, + "loss": 5.0181, + "step": 162200 + }, + { + "epoch": 0.698387145372136, + "grad_norm": 0.8760647177696228, + "learning_rate": 9.53123081021399e-05, + "loss": 4.5687, + "step": 162250 + }, + { + "epoch": 0.6986023648314186, + "grad_norm": 2.984555244445801, + "learning_rate": 9.530944687419069e-05, + "loss": 5.1395, + "step": 162300 + }, + { + "epoch": 0.6988175842907012, + "grad_norm": 2.9227700233459473, + "learning_rate": 9.530658481627674e-05, + "loss": 4.6742, + "step": 162350 + }, + { + "epoch": 0.6990328037499839, + "grad_norm": 6.077144145965576, + "learning_rate": 9.53037219284505e-05, + "loss": 5.0118, + "step": 162400 + }, + { + "epoch": 0.6992480232092665, + "grad_norm": 3.8647806644439697, + "learning_rate": 9.53008582107644e-05, + "loss": 4.9235, + "step": 162450 + }, + { + "epoch": 0.6994632426685491, + "grad_norm": 2.9000492095947266, + "learning_rate": 9.529799366327087e-05, + "loss": 4.7205, + "step": 162500 + }, + { + "epoch": 0.6996784621278318, + "grad_norm": 3.853804111480713, + "learning_rate": 9.52951282860224e-05, + "loss": 4.9144, + "step": 162550 + }, + { + "epoch": 0.6998936815871144, + "grad_norm": 3.611832857131958, + "learning_rate": 9.529226207907149e-05, + "loss": 4.7319, + "step": 162600 + }, + { + "epoch": 0.700108901046397, + "grad_norm": 2.5794291496276855, + "learning_rate": 9.528939504247063e-05, + "loss": 5.1745, + "step": 162650 + }, + { + "epoch": 0.7003241205056796, + "grad_norm": 2.3908915519714355, + "learning_rate": 9.528652717627233e-05, + "loss": 4.7766, + "step": 162700 + }, + { + "epoch": 0.7005393399649623, + "grad_norm": 1.8995658159255981, + "learning_rate": 9.528365848052915e-05, + "loss": 4.5439, + "step": 162750 + }, + { + "epoch": 0.7007545594242449, + "grad_norm": 1.7970938682556152, + "learning_rate": 9.528078895529362e-05, + "loss": 4.6625, + "step": 162800 + }, + { + "epoch": 0.7009697788835275, + "grad_norm": 2.241791248321533, + "learning_rate": 9.527791860061828e-05, + "loss": 4.8764, + "step": 162850 + }, + { + "epoch": 0.7011849983428101, + "grad_norm": 2.0582191944122314, + "learning_rate": 9.527504741655575e-05, + "loss": 4.5504, + "step": 162900 + }, + { + "epoch": 0.7014002178020928, + "grad_norm": 3.2841413021087646, + "learning_rate": 9.52721754031586e-05, + "loss": 5.2117, + "step": 162950 + }, + { + "epoch": 0.7016154372613754, + "grad_norm": 3.006070613861084, + "learning_rate": 9.526930256047944e-05, + "loss": 4.5682, + "step": 163000 + }, + { + "epoch": 0.701830656720658, + "grad_norm": 3.1570489406585693, + "learning_rate": 9.526642888857089e-05, + "loss": 4.9777, + "step": 163050 + }, + { + "epoch": 0.7020458761799406, + "grad_norm": 3.4569666385650635, + "learning_rate": 9.52635543874856e-05, + "loss": 4.7028, + "step": 163100 + }, + { + "epoch": 0.7022610956392233, + "grad_norm": 2.04366135597229, + "learning_rate": 9.526067905727622e-05, + "loss": 5.1829, + "step": 163150 + }, + { + "epoch": 0.7024763150985059, + "grad_norm": 2.1680846214294434, + "learning_rate": 9.525780289799543e-05, + "loss": 5.1804, + "step": 163200 + }, + { + "epoch": 0.7026915345577885, + "grad_norm": 3.2189414501190186, + "learning_rate": 9.525492590969589e-05, + "loss": 4.8931, + "step": 163250 + }, + { + "epoch": 0.7029067540170713, + "grad_norm": 2.5682802200317383, + "learning_rate": 9.525204809243032e-05, + "loss": 4.7324, + "step": 163300 + }, + { + "epoch": 0.7031219734763539, + "grad_norm": 2.7376718521118164, + "learning_rate": 9.524916944625145e-05, + "loss": 4.5381, + "step": 163350 + }, + { + "epoch": 0.7033371929356365, + "grad_norm": 3.043344020843506, + "learning_rate": 9.524628997121196e-05, + "loss": 4.6139, + "step": 163400 + }, + { + "epoch": 0.7035524123949191, + "grad_norm": 1.8004236221313477, + "learning_rate": 9.524340966736463e-05, + "loss": 4.4967, + "step": 163450 + }, + { + "epoch": 0.7037676318542018, + "grad_norm": 3.1802303791046143, + "learning_rate": 9.524052853476222e-05, + "loss": 4.7721, + "step": 163500 + }, + { + "epoch": 0.7039828513134844, + "grad_norm": 2.1641175746917725, + "learning_rate": 9.52376465734575e-05, + "loss": 5.1055, + "step": 163550 + }, + { + "epoch": 0.704198070772767, + "grad_norm": 2.2088725566864014, + "learning_rate": 9.523476378350327e-05, + "loss": 4.7969, + "step": 163600 + }, + { + "epoch": 0.7044132902320496, + "grad_norm": 2.8075568675994873, + "learning_rate": 9.52318801649523e-05, + "loss": 5.0079, + "step": 163650 + }, + { + "epoch": 0.7046285096913323, + "grad_norm": 2.5364694595336914, + "learning_rate": 9.522899571785747e-05, + "loss": 4.8647, + "step": 163700 + }, + { + "epoch": 0.7048437291506149, + "grad_norm": 2.7772562503814697, + "learning_rate": 9.522611044227157e-05, + "loss": 4.4922, + "step": 163750 + }, + { + "epoch": 0.7050589486098975, + "grad_norm": 3.107919216156006, + "learning_rate": 9.522322433824748e-05, + "loss": 5.0672, + "step": 163800 + }, + { + "epoch": 0.7052741680691802, + "grad_norm": 2.9180619716644287, + "learning_rate": 9.522033740583805e-05, + "loss": 4.6774, + "step": 163850 + }, + { + "epoch": 0.7054893875284628, + "grad_norm": 3.4016270637512207, + "learning_rate": 9.521744964509615e-05, + "loss": 4.8389, + "step": 163900 + }, + { + "epoch": 0.7057046069877454, + "grad_norm": 1.9809845685958862, + "learning_rate": 9.52145610560747e-05, + "loss": 4.7471, + "step": 163950 + }, + { + "epoch": 0.705919826447028, + "grad_norm": 2.197559356689453, + "learning_rate": 9.52116716388266e-05, + "loss": 4.7746, + "step": 164000 + }, + { + "epoch": 0.7061350459063107, + "grad_norm": 2.874267101287842, + "learning_rate": 9.52087813934048e-05, + "loss": 4.5231, + "step": 164050 + }, + { + "epoch": 0.7063502653655933, + "grad_norm": 2.11076021194458, + "learning_rate": 9.520589031986222e-05, + "loss": 4.4911, + "step": 164100 + }, + { + "epoch": 0.7065654848248759, + "grad_norm": 3.3843462467193604, + "learning_rate": 9.520299841825182e-05, + "loss": 4.5183, + "step": 164150 + }, + { + "epoch": 0.7067807042841585, + "grad_norm": 1.8477073907852173, + "learning_rate": 9.520010568862658e-05, + "loss": 4.744, + "step": 164200 + }, + { + "epoch": 0.7069959237434412, + "grad_norm": 3.267091989517212, + "learning_rate": 9.519721213103948e-05, + "loss": 4.9663, + "step": 164250 + }, + { + "epoch": 0.7072111432027238, + "grad_norm": 3.314011335372925, + "learning_rate": 9.519431774554352e-05, + "loss": 4.8756, + "step": 164300 + }, + { + "epoch": 0.7074263626620064, + "grad_norm": 1.5094016790390015, + "learning_rate": 9.519142253219173e-05, + "loss": 4.3713, + "step": 164350 + }, + { + "epoch": 0.707641582121289, + "grad_norm": 0.415081650018692, + "learning_rate": 9.518852649103715e-05, + "loss": 4.6238, + "step": 164400 + }, + { + "epoch": 0.7078568015805717, + "grad_norm": 2.4782874584198, + "learning_rate": 9.518562962213282e-05, + "loss": 4.9095, + "step": 164450 + }, + { + "epoch": 0.7080720210398543, + "grad_norm": 0.49775704741477966, + "learning_rate": 9.518273192553179e-05, + "loss": 4.3577, + "step": 164500 + }, + { + "epoch": 0.7082872404991369, + "grad_norm": 0.47827357053756714, + "learning_rate": 9.517983340128715e-05, + "loss": 4.5777, + "step": 164550 + }, + { + "epoch": 0.7085024599584197, + "grad_norm": 3.1043388843536377, + "learning_rate": 9.5176934049452e-05, + "loss": 4.9406, + "step": 164600 + }, + { + "epoch": 0.7087176794177023, + "grad_norm": 1.4667540788650513, + "learning_rate": 9.517403387007946e-05, + "loss": 4.9688, + "step": 164650 + }, + { + "epoch": 0.7089328988769849, + "grad_norm": 1.9192180633544922, + "learning_rate": 9.517113286322262e-05, + "loss": 4.2414, + "step": 164700 + }, + { + "epoch": 0.7091481183362675, + "grad_norm": 3.2223873138427734, + "learning_rate": 9.516823102893464e-05, + "loss": 5.2858, + "step": 164750 + }, + { + "epoch": 0.7093633377955502, + "grad_norm": 2.881511688232422, + "learning_rate": 9.51653283672687e-05, + "loss": 4.7227, + "step": 164800 + }, + { + "epoch": 0.7095785572548328, + "grad_norm": 2.379365921020508, + "learning_rate": 9.516242487827791e-05, + "loss": 5.1696, + "step": 164850 + }, + { + "epoch": 0.7097937767141154, + "grad_norm": 3.398790121078491, + "learning_rate": 9.515952056201552e-05, + "loss": 4.253, + "step": 164900 + }, + { + "epoch": 0.710008996173398, + "grad_norm": 2.7239303588867188, + "learning_rate": 9.515661541853468e-05, + "loss": 4.7715, + "step": 164950 + }, + { + "epoch": 0.7102242156326807, + "grad_norm": 2.0463685989379883, + "learning_rate": 9.515370944788864e-05, + "loss": 4.3042, + "step": 165000 + }, + { + "epoch": 0.7102242156326807, + "eval_loss": 5.262209892272949, + "eval_runtime": 35.0541, + "eval_samples_per_second": 18.257, + "eval_steps_per_second": 9.129, + "eval_tts_loss": 6.780276179219636, + "step": 165000 + }, + { + "epoch": 0.7104394350919633, + "grad_norm": 3.291053533554077, + "learning_rate": 9.51508026501306e-05, + "loss": 4.7792, + "step": 165050 + }, + { + "epoch": 0.7106546545512459, + "grad_norm": 3.5379045009613037, + "learning_rate": 9.514789502531382e-05, + "loss": 4.894, + "step": 165100 + }, + { + "epoch": 0.7108698740105286, + "grad_norm": 4.889164447784424, + "learning_rate": 9.514498657349158e-05, + "loss": 5.2738, + "step": 165150 + }, + { + "epoch": 0.7110850934698112, + "grad_norm": 4.509217262268066, + "learning_rate": 9.514207729471714e-05, + "loss": 4.5976, + "step": 165200 + }, + { + "epoch": 0.7113003129290938, + "grad_norm": 2.601890802383423, + "learning_rate": 9.513916718904379e-05, + "loss": 5.2597, + "step": 165250 + }, + { + "epoch": 0.7115155323883764, + "grad_norm": 0.6298353672027588, + "learning_rate": 9.513625625652482e-05, + "loss": 4.7104, + "step": 165300 + }, + { + "epoch": 0.7117307518476591, + "grad_norm": 3.9793622493743896, + "learning_rate": 9.513334449721358e-05, + "loss": 5.1392, + "step": 165350 + }, + { + "epoch": 0.7119459713069417, + "grad_norm": 3.3591229915618896, + "learning_rate": 9.513043191116342e-05, + "loss": 4.7117, + "step": 165400 + }, + { + "epoch": 0.7121611907662243, + "grad_norm": 1.8814846277236938, + "learning_rate": 9.512751849842762e-05, + "loss": 5.2697, + "step": 165450 + }, + { + "epoch": 0.7123764102255069, + "grad_norm": 2.2339019775390625, + "learning_rate": 9.512460425905964e-05, + "loss": 4.7468, + "step": 165500 + }, + { + "epoch": 0.7125916296847896, + "grad_norm": 2.618014097213745, + "learning_rate": 9.512168919311279e-05, + "loss": 4.5348, + "step": 165550 + }, + { + "epoch": 0.7128068491440722, + "grad_norm": 1.042298436164856, + "learning_rate": 9.51187733006405e-05, + "loss": 4.6452, + "step": 165600 + }, + { + "epoch": 0.7130220686033548, + "grad_norm": 2.6839635372161865, + "learning_rate": 9.511585658169617e-05, + "loss": 5.1689, + "step": 165650 + }, + { + "epoch": 0.7132372880626374, + "grad_norm": 2.019730806350708, + "learning_rate": 9.511293903633325e-05, + "loss": 4.7706, + "step": 165700 + }, + { + "epoch": 0.7134525075219201, + "grad_norm": 1.8766263723373413, + "learning_rate": 9.511002066460515e-05, + "loss": 4.836, + "step": 165750 + }, + { + "epoch": 0.7136677269812027, + "grad_norm": 1.7911090850830078, + "learning_rate": 9.510710146656536e-05, + "loss": 4.8497, + "step": 165800 + }, + { + "epoch": 0.7138829464404853, + "grad_norm": 2.1640853881835938, + "learning_rate": 9.510418144226731e-05, + "loss": 4.6069, + "step": 165850 + }, + { + "epoch": 0.714098165899768, + "grad_norm": 7.557070732116699, + "learning_rate": 9.510126059176454e-05, + "loss": 5.1005, + "step": 165900 + }, + { + "epoch": 0.7143133853590506, + "grad_norm": 4.004714488983154, + "learning_rate": 9.509833891511052e-05, + "loss": 4.69, + "step": 165950 + }, + { + "epoch": 0.7145286048183332, + "grad_norm": 1.7188713550567627, + "learning_rate": 9.509541641235879e-05, + "loss": 4.7011, + "step": 166000 + }, + { + "epoch": 0.7147438242776158, + "grad_norm": 1.1840540170669556, + "learning_rate": 9.509249308356285e-05, + "loss": 4.1114, + "step": 166050 + }, + { + "epoch": 0.7149590437368986, + "grad_norm": 1.7746108770370483, + "learning_rate": 9.508956892877628e-05, + "loss": 4.7826, + "step": 166100 + }, + { + "epoch": 0.7151742631961812, + "grad_norm": 3.0163493156433105, + "learning_rate": 9.508664394805264e-05, + "loss": 4.5662, + "step": 166150 + }, + { + "epoch": 0.7153894826554638, + "grad_norm": 2.084078073501587, + "learning_rate": 9.50837181414455e-05, + "loss": 5.0528, + "step": 166200 + }, + { + "epoch": 0.7156047021147464, + "grad_norm": 4.385222911834717, + "learning_rate": 9.508079150900845e-05, + "loss": 5.0489, + "step": 166250 + }, + { + "epoch": 0.7158199215740291, + "grad_norm": 1.953593373298645, + "learning_rate": 9.50778640507951e-05, + "loss": 4.9155, + "step": 166300 + }, + { + "epoch": 0.7160351410333117, + "grad_norm": 1.819185733795166, + "learning_rate": 9.507493576685908e-05, + "loss": 4.5284, + "step": 166350 + }, + { + "epoch": 0.7162503604925943, + "grad_norm": 0.6881950497627258, + "learning_rate": 9.507200665725404e-05, + "loss": 4.6134, + "step": 166400 + }, + { + "epoch": 0.7164655799518769, + "grad_norm": 3.1959197521209717, + "learning_rate": 9.506907672203363e-05, + "loss": 4.9089, + "step": 166450 + }, + { + "epoch": 0.7166807994111596, + "grad_norm": 1.962356686592102, + "learning_rate": 9.50661459612515e-05, + "loss": 4.6446, + "step": 166500 + }, + { + "epoch": 0.7168960188704422, + "grad_norm": 2.737814426422119, + "learning_rate": 9.506321437496135e-05, + "loss": 5.0433, + "step": 166550 + }, + { + "epoch": 0.7171112383297248, + "grad_norm": 3.9460654258728027, + "learning_rate": 9.506028196321688e-05, + "loss": 5.0007, + "step": 166600 + }, + { + "epoch": 0.7173264577890075, + "grad_norm": 2.128997325897217, + "learning_rate": 9.505734872607179e-05, + "loss": 5.124, + "step": 166650 + }, + { + "epoch": 0.7175416772482901, + "grad_norm": 2.4889814853668213, + "learning_rate": 9.505441466357986e-05, + "loss": 4.3843, + "step": 166700 + }, + { + "epoch": 0.7177568967075727, + "grad_norm": 3.956205368041992, + "learning_rate": 9.505147977579476e-05, + "loss": 5.1049, + "step": 166750 + }, + { + "epoch": 0.7179721161668553, + "grad_norm": 2.113394021987915, + "learning_rate": 9.50485440627703e-05, + "loss": 4.2306, + "step": 166800 + }, + { + "epoch": 0.718187335626138, + "grad_norm": 1.9558604955673218, + "learning_rate": 9.504560752456025e-05, + "loss": 4.9792, + "step": 166850 + }, + { + "epoch": 0.7184025550854206, + "grad_norm": 1.9185892343521118, + "learning_rate": 9.504267016121838e-05, + "loss": 4.8673, + "step": 166900 + }, + { + "epoch": 0.7186177745447032, + "grad_norm": 0.9683041572570801, + "learning_rate": 9.503973197279852e-05, + "loss": 4.3656, + "step": 166950 + }, + { + "epoch": 0.7188329940039858, + "grad_norm": 2.786907196044922, + "learning_rate": 9.503679295935449e-05, + "loss": 5.1828, + "step": 167000 + }, + { + "epoch": 0.7190482134632685, + "grad_norm": 2.008927822113037, + "learning_rate": 9.503385312094011e-05, + "loss": 4.526, + "step": 167050 + }, + { + "epoch": 0.7192634329225511, + "grad_norm": 2.0136959552764893, + "learning_rate": 9.503091245760923e-05, + "loss": 5.0578, + "step": 167100 + }, + { + "epoch": 0.7194786523818337, + "grad_norm": 1.6682411432266235, + "learning_rate": 9.502797096941573e-05, + "loss": 4.4329, + "step": 167150 + }, + { + "epoch": 0.7196938718411164, + "grad_norm": 3.10388445854187, + "learning_rate": 9.50250286564135e-05, + "loss": 4.5144, + "step": 167200 + }, + { + "epoch": 0.719909091300399, + "grad_norm": 2.4328079223632812, + "learning_rate": 9.50220855186564e-05, + "loss": 4.7622, + "step": 167250 + }, + { + "epoch": 0.7201243107596816, + "grad_norm": 2.2987701892852783, + "learning_rate": 9.501914155619838e-05, + "loss": 4.4982, + "step": 167300 + }, + { + "epoch": 0.7203395302189642, + "grad_norm": 3.081772804260254, + "learning_rate": 9.501619676909334e-05, + "loss": 5.206, + "step": 167350 + }, + { + "epoch": 0.720554749678247, + "grad_norm": 2.007720708847046, + "learning_rate": 9.501325115739525e-05, + "loss": 5.0797, + "step": 167400 + }, + { + "epoch": 0.7207699691375296, + "grad_norm": 2.033362865447998, + "learning_rate": 9.501030472115804e-05, + "loss": 4.5194, + "step": 167450 + }, + { + "epoch": 0.7209851885968122, + "grad_norm": 2.5503101348876953, + "learning_rate": 9.500735746043568e-05, + "loss": 4.8566, + "step": 167500 + }, + { + "epoch": 0.7212004080560948, + "grad_norm": 3.4684946537017822, + "learning_rate": 9.500440937528218e-05, + "loss": 4.5903, + "step": 167550 + }, + { + "epoch": 0.7214156275153775, + "grad_norm": 5.96560001373291, + "learning_rate": 9.500146046575153e-05, + "loss": 4.6616, + "step": 167600 + }, + { + "epoch": 0.7216308469746601, + "grad_norm": 0.9140272736549377, + "learning_rate": 9.499851073189773e-05, + "loss": 4.7135, + "step": 167650 + }, + { + "epoch": 0.7218460664339427, + "grad_norm": 2.9270174503326416, + "learning_rate": 9.499556017377485e-05, + "loss": 4.8267, + "step": 167700 + }, + { + "epoch": 0.7220612858932253, + "grad_norm": 1.3002935647964478, + "learning_rate": 9.499260879143691e-05, + "loss": 4.8268, + "step": 167750 + }, + { + "epoch": 0.722276505352508, + "grad_norm": 1.5149694681167603, + "learning_rate": 9.498965658493798e-05, + "loss": 4.1996, + "step": 167800 + }, + { + "epoch": 0.7224917248117906, + "grad_norm": 2.0360970497131348, + "learning_rate": 9.498670355433213e-05, + "loss": 4.6804, + "step": 167850 + }, + { + "epoch": 0.7227069442710732, + "grad_norm": 2.6634061336517334, + "learning_rate": 9.498374969967347e-05, + "loss": 4.494, + "step": 167900 + }, + { + "epoch": 0.7229221637303559, + "grad_norm": 0.6916083097457886, + "learning_rate": 9.49807950210161e-05, + "loss": 5.1142, + "step": 167950 + }, + { + "epoch": 0.7231373831896385, + "grad_norm": 1.4380977153778076, + "learning_rate": 9.497783951841413e-05, + "loss": 5.1097, + "step": 168000 + }, + { + "epoch": 0.7231373831896385, + "eval_loss": 5.251073360443115, + "eval_runtime": 35.0034, + "eval_samples_per_second": 18.284, + "eval_steps_per_second": 9.142, + "eval_tts_loss": 6.740395789084839, + "step": 168000 + }, + { + "epoch": 0.7233526026489211, + "grad_norm": 2.0493290424346924, + "learning_rate": 9.497488319192171e-05, + "loss": 4.7093, + "step": 168050 + }, + { + "epoch": 0.7235678221082037, + "grad_norm": 0.9666145443916321, + "learning_rate": 9.497192604159301e-05, + "loss": 4.8638, + "step": 168100 + }, + { + "epoch": 0.7237830415674864, + "grad_norm": 1.5337632894515991, + "learning_rate": 9.496896806748216e-05, + "loss": 4.3808, + "step": 168150 + }, + { + "epoch": 0.723998261026769, + "grad_norm": 2.3795409202575684, + "learning_rate": 9.496600926964337e-05, + "loss": 4.8401, + "step": 168200 + }, + { + "epoch": 0.7242134804860516, + "grad_norm": 4.0036540031433105, + "learning_rate": 9.496304964813083e-05, + "loss": 4.6723, + "step": 168250 + }, + { + "epoch": 0.7244286999453342, + "grad_norm": 3.0135059356689453, + "learning_rate": 9.496008920299876e-05, + "loss": 4.6741, + "step": 168300 + }, + { + "epoch": 0.7246439194046169, + "grad_norm": 0.6635975241661072, + "learning_rate": 9.495712793430138e-05, + "loss": 4.5693, + "step": 168350 + }, + { + "epoch": 0.7248591388638995, + "grad_norm": 1.9244436025619507, + "learning_rate": 9.495416584209293e-05, + "loss": 5.1783, + "step": 168400 + }, + { + "epoch": 0.7250743583231821, + "grad_norm": 0.50797438621521, + "learning_rate": 9.49512029264277e-05, + "loss": 4.4988, + "step": 168450 + }, + { + "epoch": 0.7252895777824648, + "grad_norm": 4.677603244781494, + "learning_rate": 9.494823918735991e-05, + "loss": 4.5571, + "step": 168500 + }, + { + "epoch": 0.7255047972417474, + "grad_norm": 0.6684544682502747, + "learning_rate": 9.494527462494389e-05, + "loss": 4.7031, + "step": 168550 + }, + { + "epoch": 0.72572001670103, + "grad_norm": 2.1145856380462646, + "learning_rate": 9.494230923923392e-05, + "loss": 4.6654, + "step": 168600 + }, + { + "epoch": 0.7259352361603126, + "grad_norm": 2.2304320335388184, + "learning_rate": 9.493934303028436e-05, + "loss": 4.5439, + "step": 168650 + }, + { + "epoch": 0.7261504556195953, + "grad_norm": 2.836183547973633, + "learning_rate": 9.493637599814951e-05, + "loss": 4.957, + "step": 168700 + }, + { + "epoch": 0.726365675078878, + "grad_norm": 3.12644624710083, + "learning_rate": 9.49334081428837e-05, + "loss": 4.9755, + "step": 168750 + }, + { + "epoch": 0.7265808945381605, + "grad_norm": 2.033298969268799, + "learning_rate": 9.493043946454134e-05, + "loss": 4.6288, + "step": 168800 + }, + { + "epoch": 0.7267961139974431, + "grad_norm": 1.3273228406906128, + "learning_rate": 9.492746996317678e-05, + "loss": 4.2541, + "step": 168850 + }, + { + "epoch": 0.7270113334567259, + "grad_norm": 3.040282726287842, + "learning_rate": 9.492449963884443e-05, + "loss": 4.5974, + "step": 168900 + }, + { + "epoch": 0.7272265529160085, + "grad_norm": 4.738039493560791, + "learning_rate": 9.49215284915987e-05, + "loss": 4.8568, + "step": 168950 + }, + { + "epoch": 0.7274417723752911, + "grad_norm": 2.4944138526916504, + "learning_rate": 9.4918556521494e-05, + "loss": 5.0261, + "step": 169000 + }, + { + "epoch": 0.7276569918345737, + "grad_norm": 2.094395160675049, + "learning_rate": 9.491558372858475e-05, + "loss": 4.8288, + "step": 169050 + }, + { + "epoch": 0.7278722112938564, + "grad_norm": 2.8448307514190674, + "learning_rate": 9.491261011292547e-05, + "loss": 4.6151, + "step": 169100 + }, + { + "epoch": 0.728087430753139, + "grad_norm": 0.885973334312439, + "learning_rate": 9.490963567457058e-05, + "loss": 4.6417, + "step": 169150 + }, + { + "epoch": 0.7283026502124216, + "grad_norm": 2.16654896736145, + "learning_rate": 9.490666041357456e-05, + "loss": 4.9399, + "step": 169200 + }, + { + "epoch": 0.7285178696717043, + "grad_norm": 1.9702174663543701, + "learning_rate": 9.490368432999195e-05, + "loss": 4.5959, + "step": 169250 + }, + { + "epoch": 0.7287330891309869, + "grad_norm": 3.2032086849212646, + "learning_rate": 9.490070742387723e-05, + "loss": 4.9215, + "step": 169300 + }, + { + "epoch": 0.7289483085902695, + "grad_norm": 1.8533735275268555, + "learning_rate": 9.489772969528493e-05, + "loss": 4.8587, + "step": 169350 + }, + { + "epoch": 0.7291635280495521, + "grad_norm": 1.331933856010437, + "learning_rate": 9.489475114426962e-05, + "loss": 4.5721, + "step": 169400 + }, + { + "epoch": 0.7293787475088348, + "grad_norm": 2.3085246086120605, + "learning_rate": 9.489177177088584e-05, + "loss": 5.0685, + "step": 169450 + }, + { + "epoch": 0.7295939669681174, + "grad_norm": 2.292767286300659, + "learning_rate": 9.488879157518817e-05, + "loss": 4.8511, + "step": 169500 + }, + { + "epoch": 0.7298091864274, + "grad_norm": 2.7876644134521484, + "learning_rate": 9.488581055723121e-05, + "loss": 4.747, + "step": 169550 + }, + { + "epoch": 0.7300244058866826, + "grad_norm": 2.907675266265869, + "learning_rate": 9.488282871706955e-05, + "loss": 5.127, + "step": 169600 + }, + { + "epoch": 0.7302396253459653, + "grad_norm": 3.603684902191162, + "learning_rate": 9.487984605475783e-05, + "loss": 5.17, + "step": 169650 + }, + { + "epoch": 0.7304548448052479, + "grad_norm": 2.2233731746673584, + "learning_rate": 9.487686257035065e-05, + "loss": 4.8928, + "step": 169700 + }, + { + "epoch": 0.7306700642645305, + "grad_norm": 2.938422918319702, + "learning_rate": 9.487387826390271e-05, + "loss": 4.5059, + "step": 169750 + }, + { + "epoch": 0.7308852837238131, + "grad_norm": 1.7614070177078247, + "learning_rate": 9.487089313546864e-05, + "loss": 4.8044, + "step": 169800 + }, + { + "epoch": 0.7311005031830958, + "grad_norm": 3.3152689933776855, + "learning_rate": 9.486790718510312e-05, + "loss": 4.9208, + "step": 169850 + }, + { + "epoch": 0.7313157226423784, + "grad_norm": 2.54388165473938, + "learning_rate": 9.486492041286086e-05, + "loss": 4.471, + "step": 169900 + }, + { + "epoch": 0.731530942101661, + "grad_norm": 2.3638269901275635, + "learning_rate": 9.486193281879658e-05, + "loss": 4.5428, + "step": 169950 + }, + { + "epoch": 0.7317461615609437, + "grad_norm": 2.088045120239258, + "learning_rate": 9.485894440296498e-05, + "loss": 4.4987, + "step": 170000 + }, + { + "epoch": 0.7319613810202263, + "grad_norm": 2.6131746768951416, + "learning_rate": 9.485595516542082e-05, + "loss": 4.9068, + "step": 170050 + }, + { + "epoch": 0.7321766004795089, + "grad_norm": 1.1853605508804321, + "learning_rate": 9.485296510621884e-05, + "loss": 4.8096, + "step": 170100 + }, + { + "epoch": 0.7323918199387915, + "grad_norm": 2.3076462745666504, + "learning_rate": 9.484997422541383e-05, + "loss": 5.08, + "step": 170150 + }, + { + "epoch": 0.7326070393980743, + "grad_norm": 1.9434186220169067, + "learning_rate": 9.484698252306057e-05, + "loss": 5.2345, + "step": 170200 + }, + { + "epoch": 0.7328222588573569, + "grad_norm": 2.6506872177124023, + "learning_rate": 9.484398999921385e-05, + "loss": 4.7775, + "step": 170250 + }, + { + "epoch": 0.7330374783166395, + "grad_norm": 3.129879951477051, + "learning_rate": 9.484099665392849e-05, + "loss": 4.8716, + "step": 170300 + }, + { + "epoch": 0.7332526977759221, + "grad_norm": 1.8700907230377197, + "learning_rate": 9.483800248725933e-05, + "loss": 4.7911, + "step": 170350 + }, + { + "epoch": 0.7334679172352048, + "grad_norm": 1.7285605669021606, + "learning_rate": 9.48350074992612e-05, + "loss": 4.3561, + "step": 170400 + }, + { + "epoch": 0.7336831366944874, + "grad_norm": 0.9548271298408508, + "learning_rate": 9.483201168998899e-05, + "loss": 4.8163, + "step": 170450 + }, + { + "epoch": 0.73389835615377, + "grad_norm": 2.280202865600586, + "learning_rate": 9.482901505949755e-05, + "loss": 4.7552, + "step": 170500 + }, + { + "epoch": 0.7341135756130527, + "grad_norm": 1.8008184432983398, + "learning_rate": 9.482601760784178e-05, + "loss": 4.9602, + "step": 170550 + }, + { + "epoch": 0.7343287950723353, + "grad_norm": 2.07310152053833, + "learning_rate": 9.482301933507659e-05, + "loss": 4.7177, + "step": 170600 + }, + { + "epoch": 0.7345440145316179, + "grad_norm": 2.3239169120788574, + "learning_rate": 9.482002024125689e-05, + "loss": 4.5968, + "step": 170650 + }, + { + "epoch": 0.7347592339909005, + "grad_norm": 1.7853082418441772, + "learning_rate": 9.481702032643763e-05, + "loss": 4.5582, + "step": 170700 + }, + { + "epoch": 0.7349744534501832, + "grad_norm": 2.7287869453430176, + "learning_rate": 9.481401959067376e-05, + "loss": 4.9301, + "step": 170750 + }, + { + "epoch": 0.7351896729094658, + "grad_norm": 0.8740906715393066, + "learning_rate": 9.481101803402024e-05, + "loss": 4.8324, + "step": 170800 + }, + { + "epoch": 0.7354048923687484, + "grad_norm": 1.5767943859100342, + "learning_rate": 9.480801565653205e-05, + "loss": 4.5678, + "step": 170850 + }, + { + "epoch": 0.735620111828031, + "grad_norm": 2.8164587020874023, + "learning_rate": 9.48050124582642e-05, + "loss": 4.6403, + "step": 170900 + }, + { + "epoch": 0.7358353312873137, + "grad_norm": 3.265805244445801, + "learning_rate": 9.480200843927168e-05, + "loss": 4.8015, + "step": 170950 + }, + { + "epoch": 0.7360505507465963, + "grad_norm": 2.6549715995788574, + "learning_rate": 9.479900359960954e-05, + "loss": 4.6047, + "step": 171000 + }, + { + "epoch": 0.7360505507465963, + "eval_loss": 5.248293876647949, + "eval_runtime": 34.9337, + "eval_samples_per_second": 18.32, + "eval_steps_per_second": 9.16, + "eval_tts_loss": 6.705654384009889, + "step": 171000 + }, + { + "epoch": 0.7362657702058789, + "grad_norm": 3.5205750465393066, + "learning_rate": 9.47959979393328e-05, + "loss": 4.7864, + "step": 171050 + }, + { + "epoch": 0.7364809896651615, + "grad_norm": 2.0540976524353027, + "learning_rate": 9.479299145849654e-05, + "loss": 5.0161, + "step": 171100 + }, + { + "epoch": 0.7366962091244442, + "grad_norm": 1.1932066679000854, + "learning_rate": 9.478998415715581e-05, + "loss": 5.0511, + "step": 171150 + }, + { + "epoch": 0.7369114285837268, + "grad_norm": 2.325730085372925, + "learning_rate": 9.478697603536571e-05, + "loss": 4.4954, + "step": 171200 + }, + { + "epoch": 0.7371266480430094, + "grad_norm": 3.7106058597564697, + "learning_rate": 9.478396709318134e-05, + "loss": 5.1727, + "step": 171250 + }, + { + "epoch": 0.7373418675022921, + "grad_norm": 3.3711416721343994, + "learning_rate": 9.478095733065782e-05, + "loss": 4.9541, + "step": 171300 + }, + { + "epoch": 0.7375570869615747, + "grad_norm": 1.6810582876205444, + "learning_rate": 9.477794674785026e-05, + "loss": 4.644, + "step": 171350 + }, + { + "epoch": 0.7377723064208573, + "grad_norm": 2.1764676570892334, + "learning_rate": 9.477493534481383e-05, + "loss": 5.0369, + "step": 171400 + }, + { + "epoch": 0.7379875258801399, + "grad_norm": 2.637925863265991, + "learning_rate": 9.47719231216037e-05, + "loss": 4.6, + "step": 171450 + }, + { + "epoch": 0.7382027453394227, + "grad_norm": 0.7273229956626892, + "learning_rate": 9.476891007827501e-05, + "loss": 4.481, + "step": 171500 + }, + { + "epoch": 0.7384179647987053, + "grad_norm": 1.447170615196228, + "learning_rate": 9.4765896214883e-05, + "loss": 4.6214, + "step": 171550 + }, + { + "epoch": 0.7386331842579879, + "grad_norm": 2.2083821296691895, + "learning_rate": 9.476288153148285e-05, + "loss": 4.9357, + "step": 171600 + }, + { + "epoch": 0.7388484037172705, + "grad_norm": 1.9047503471374512, + "learning_rate": 9.475986602812976e-05, + "loss": 4.5798, + "step": 171650 + }, + { + "epoch": 0.7390636231765532, + "grad_norm": 3.1142382621765137, + "learning_rate": 9.475684970487901e-05, + "loss": 4.5864, + "step": 171700 + }, + { + "epoch": 0.7392788426358358, + "grad_norm": 2.150876522064209, + "learning_rate": 9.475383256178582e-05, + "loss": 4.604, + "step": 171750 + }, + { + "epoch": 0.7394940620951184, + "grad_norm": 1.776948094367981, + "learning_rate": 9.475081459890549e-05, + "loss": 4.9514, + "step": 171800 + }, + { + "epoch": 0.7397092815544011, + "grad_norm": 2.5387394428253174, + "learning_rate": 9.474779581629327e-05, + "loss": 4.6099, + "step": 171850 + }, + { + "epoch": 0.7399245010136837, + "grad_norm": 2.8373453617095947, + "learning_rate": 9.474477621400448e-05, + "loss": 4.7913, + "step": 171900 + }, + { + "epoch": 0.7401397204729663, + "grad_norm": 2.2868008613586426, + "learning_rate": 9.474175579209442e-05, + "loss": 4.1548, + "step": 171950 + }, + { + "epoch": 0.7403549399322489, + "grad_norm": 2.803581476211548, + "learning_rate": 9.47387345506184e-05, + "loss": 4.8907, + "step": 172000 + }, + { + "epoch": 0.7405701593915316, + "grad_norm": 1.3785403966903687, + "learning_rate": 9.47357124896318e-05, + "loss": 4.8705, + "step": 172050 + }, + { + "epoch": 0.7407853788508142, + "grad_norm": 0.5442289710044861, + "learning_rate": 9.473268960918994e-05, + "loss": 4.9322, + "step": 172100 + }, + { + "epoch": 0.7410005983100968, + "grad_norm": 2.9565887451171875, + "learning_rate": 9.472966590934822e-05, + "loss": 4.8724, + "step": 172150 + }, + { + "epoch": 0.7412158177693794, + "grad_norm": 3.3961431980133057, + "learning_rate": 9.472664139016202e-05, + "loss": 5.0384, + "step": 172200 + }, + { + "epoch": 0.7414310372286621, + "grad_norm": 1.6623259782791138, + "learning_rate": 9.472361605168674e-05, + "loss": 4.6743, + "step": 172250 + }, + { + "epoch": 0.7416462566879447, + "grad_norm": 2.7966320514678955, + "learning_rate": 9.472058989397781e-05, + "loss": 4.8017, + "step": 172300 + }, + { + "epoch": 0.7418614761472273, + "grad_norm": 2.2106664180755615, + "learning_rate": 9.471756291709062e-05, + "loss": 4.6776, + "step": 172350 + }, + { + "epoch": 0.7420766956065099, + "grad_norm": 2.7538602352142334, + "learning_rate": 9.471453512108065e-05, + "loss": 4.7585, + "step": 172400 + }, + { + "epoch": 0.7422919150657926, + "grad_norm": 2.549455404281616, + "learning_rate": 9.47115065060034e-05, + "loss": 5.0803, + "step": 172450 + }, + { + "epoch": 0.7425071345250752, + "grad_norm": 2.9620940685272217, + "learning_rate": 9.470847707191427e-05, + "loss": 4.974, + "step": 172500 + }, + { + "epoch": 0.7427223539843578, + "grad_norm": 1.8486123085021973, + "learning_rate": 9.470544681886879e-05, + "loss": 4.7384, + "step": 172550 + }, + { + "epoch": 0.7429375734436405, + "grad_norm": 2.082120180130005, + "learning_rate": 9.470241574692247e-05, + "loss": 4.8635, + "step": 172600 + }, + { + "epoch": 0.7431527929029231, + "grad_norm": 2.143564224243164, + "learning_rate": 9.469938385613084e-05, + "loss": 4.4578, + "step": 172650 + }, + { + "epoch": 0.7433680123622057, + "grad_norm": 2.609556198120117, + "learning_rate": 9.469635114654941e-05, + "loss": 5.2349, + "step": 172700 + }, + { + "epoch": 0.7435832318214883, + "grad_norm": 5.412603378295898, + "learning_rate": 9.469331761823375e-05, + "loss": 4.8806, + "step": 172750 + }, + { + "epoch": 0.743798451280771, + "grad_norm": 2.3500823974609375, + "learning_rate": 9.469028327123944e-05, + "loss": 5.1627, + "step": 172800 + }, + { + "epoch": 0.7440136707400536, + "grad_norm": 4.5299458503723145, + "learning_rate": 9.468724810562202e-05, + "loss": 4.9552, + "step": 172850 + }, + { + "epoch": 0.7442288901993362, + "grad_norm": 2.08247447013855, + "learning_rate": 9.468421212143715e-05, + "loss": 4.5414, + "step": 172900 + }, + { + "epoch": 0.7444441096586188, + "grad_norm": 2.1404597759246826, + "learning_rate": 9.468117531874037e-05, + "loss": 4.8068, + "step": 172950 + }, + { + "epoch": 0.7446593291179016, + "grad_norm": 0.653763473033905, + "learning_rate": 9.467813769758737e-05, + "loss": 4.4031, + "step": 173000 + }, + { + "epoch": 0.7448745485771842, + "grad_norm": 1.8194618225097656, + "learning_rate": 9.467509925803376e-05, + "loss": 4.4884, + "step": 173050 + }, + { + "epoch": 0.7450897680364668, + "grad_norm": 2.0662007331848145, + "learning_rate": 9.467206000013518e-05, + "loss": 4.5138, + "step": 173100 + }, + { + "epoch": 0.7453049874957494, + "grad_norm": 2.353506088256836, + "learning_rate": 9.466901992394735e-05, + "loss": 4.4992, + "step": 173150 + }, + { + "epoch": 0.7455202069550321, + "grad_norm": 2.0108373165130615, + "learning_rate": 9.466597902952594e-05, + "loss": 4.6711, + "step": 173200 + }, + { + "epoch": 0.7457354264143147, + "grad_norm": 3.427410125732422, + "learning_rate": 9.466293731692661e-05, + "loss": 4.8347, + "step": 173250 + }, + { + "epoch": 0.7459506458735973, + "grad_norm": 1.481244444847107, + "learning_rate": 9.465989478620513e-05, + "loss": 4.482, + "step": 173300 + }, + { + "epoch": 0.74616586533288, + "grad_norm": 2.8314406871795654, + "learning_rate": 9.465685143741723e-05, + "loss": 5.0332, + "step": 173350 + }, + { + "epoch": 0.7463810847921626, + "grad_norm": 3.1553120613098145, + "learning_rate": 9.465380727061863e-05, + "loss": 4.7454, + "step": 173400 + }, + { + "epoch": 0.7465963042514452, + "grad_norm": 2.747929811477661, + "learning_rate": 9.46507622858651e-05, + "loss": 5.0171, + "step": 173450 + }, + { + "epoch": 0.7468115237107278, + "grad_norm": 1.7678313255310059, + "learning_rate": 9.464771648321242e-05, + "loss": 4.7573, + "step": 173500 + }, + { + "epoch": 0.7470267431700105, + "grad_norm": 2.3142058849334717, + "learning_rate": 9.464466986271639e-05, + "loss": 4.614, + "step": 173550 + }, + { + "epoch": 0.7472419626292931, + "grad_norm": 1.0973875522613525, + "learning_rate": 9.46416224244328e-05, + "loss": 4.6451, + "step": 173600 + }, + { + "epoch": 0.7474571820885757, + "grad_norm": 1.324944257736206, + "learning_rate": 9.463857416841748e-05, + "loss": 3.991, + "step": 173650 + }, + { + "epoch": 0.7476724015478583, + "grad_norm": 3.7112653255462646, + "learning_rate": 9.463552509472629e-05, + "loss": 4.5077, + "step": 173700 + }, + { + "epoch": 0.747887621007141, + "grad_norm": 3.8976566791534424, + "learning_rate": 9.463247520341504e-05, + "loss": 4.5475, + "step": 173750 + }, + { + "epoch": 0.7481028404664236, + "grad_norm": 1.9264146089553833, + "learning_rate": 9.462942449453962e-05, + "loss": 4.9097, + "step": 173800 + }, + { + "epoch": 0.7483180599257062, + "grad_norm": 4.140900611877441, + "learning_rate": 9.462637296815591e-05, + "loss": 4.7833, + "step": 173850 + }, + { + "epoch": 0.7485332793849889, + "grad_norm": 4.184798717498779, + "learning_rate": 9.462332062431981e-05, + "loss": 4.588, + "step": 173900 + }, + { + "epoch": 0.7487484988442715, + "grad_norm": 1.9360474348068237, + "learning_rate": 9.462026746308723e-05, + "loss": 4.5283, + "step": 173950 + }, + { + "epoch": 0.7489637183035541, + "grad_norm": 3.600937843322754, + "learning_rate": 9.461721348451409e-05, + "loss": 4.8128, + "step": 174000 + }, + { + "epoch": 0.7489637183035541, + "eval_loss": 5.237414360046387, + "eval_runtime": 35.1119, + "eval_samples_per_second": 18.227, + "eval_steps_per_second": 9.114, + "eval_tts_loss": 6.725263701585723, + "step": 174000 + }, + { + "epoch": 0.7491789377628367, + "grad_norm": 1.8850865364074707, + "learning_rate": 9.461415868865636e-05, + "loss": 4.5292, + "step": 174050 + }, + { + "epoch": 0.7493941572221194, + "grad_norm": 1.1913362741470337, + "learning_rate": 9.461110307556994e-05, + "loss": 4.701, + "step": 174100 + }, + { + "epoch": 0.749609376681402, + "grad_norm": 1.2338004112243652, + "learning_rate": 9.460804664531085e-05, + "loss": 4.8928, + "step": 174150 + }, + { + "epoch": 0.7498245961406846, + "grad_norm": 3.213083505630493, + "learning_rate": 9.460498939793507e-05, + "loss": 5.1429, + "step": 174200 + }, + { + "epoch": 0.7500398155999672, + "grad_norm": 3.1626646518707275, + "learning_rate": 9.460193133349859e-05, + "loss": 4.3884, + "step": 174250 + }, + { + "epoch": 0.75025503505925, + "grad_norm": 2.7065768241882324, + "learning_rate": 9.459887245205744e-05, + "loss": 4.5238, + "step": 174300 + }, + { + "epoch": 0.7504702545185326, + "grad_norm": 1.7505899667739868, + "learning_rate": 9.459581275366762e-05, + "loss": 4.4742, + "step": 174350 + }, + { + "epoch": 0.7506854739778152, + "grad_norm": 1.2657495737075806, + "learning_rate": 9.45927522383852e-05, + "loss": 4.6572, + "step": 174400 + }, + { + "epoch": 0.7509006934370978, + "grad_norm": 2.290170907974243, + "learning_rate": 9.458969090626627e-05, + "loss": 4.824, + "step": 174450 + }, + { + "epoch": 0.7511159128963805, + "grad_norm": 4.245881080627441, + "learning_rate": 9.458662875736684e-05, + "loss": 4.783, + "step": 174500 + }, + { + "epoch": 0.7513311323556631, + "grad_norm": 1.3724415302276611, + "learning_rate": 9.458356579174306e-05, + "loss": 4.4142, + "step": 174550 + }, + { + "epoch": 0.7515463518149457, + "grad_norm": 2.5623419284820557, + "learning_rate": 9.458050200945101e-05, + "loss": 5.0646, + "step": 174600 + }, + { + "epoch": 0.7517615712742284, + "grad_norm": 1.7191712856292725, + "learning_rate": 9.457743741054683e-05, + "loss": 4.7233, + "step": 174650 + }, + { + "epoch": 0.751976790733511, + "grad_norm": 1.2372431755065918, + "learning_rate": 9.457437199508664e-05, + "loss": 4.2826, + "step": 174700 + }, + { + "epoch": 0.7521920101927936, + "grad_norm": 2.6033365726470947, + "learning_rate": 9.457130576312658e-05, + "loss": 4.756, + "step": 174750 + }, + { + "epoch": 0.7524072296520762, + "grad_norm": 4.6771039962768555, + "learning_rate": 9.456823871472284e-05, + "loss": 4.7721, + "step": 174800 + }, + { + "epoch": 0.7526224491113589, + "grad_norm": 2.376307964324951, + "learning_rate": 9.45651708499316e-05, + "loss": 5.0768, + "step": 174850 + }, + { + "epoch": 0.7528376685706415, + "grad_norm": 4.981451511383057, + "learning_rate": 9.456210216880902e-05, + "loss": 4.8776, + "step": 174900 + }, + { + "epoch": 0.7530528880299241, + "grad_norm": 1.7316652536392212, + "learning_rate": 9.455903267141138e-05, + "loss": 4.7081, + "step": 174950 + }, + { + "epoch": 0.7532681074892067, + "grad_norm": 3.5610265731811523, + "learning_rate": 9.455596235779483e-05, + "loss": 4.5479, + "step": 175000 + }, + { + "epoch": 0.7534833269484894, + "grad_norm": 3.4163172245025635, + "learning_rate": 9.455289122801567e-05, + "loss": 5.0834, + "step": 175050 + }, + { + "epoch": 0.753698546407772, + "grad_norm": 2.2002787590026855, + "learning_rate": 9.454981928213013e-05, + "loss": 4.3581, + "step": 175100 + }, + { + "epoch": 0.7539137658670546, + "grad_norm": 0.7090714573860168, + "learning_rate": 9.454674652019447e-05, + "loss": 4.2958, + "step": 175150 + }, + { + "epoch": 0.7541289853263373, + "grad_norm": 1.9822616577148438, + "learning_rate": 9.4543672942265e-05, + "loss": 4.5747, + "step": 175200 + }, + { + "epoch": 0.7543442047856199, + "grad_norm": 1.1731784343719482, + "learning_rate": 9.4540598548398e-05, + "loss": 4.4894, + "step": 175250 + }, + { + "epoch": 0.7545594242449025, + "grad_norm": 0.6473475694656372, + "learning_rate": 9.453752333864982e-05, + "loss": 4.6257, + "step": 175300 + }, + { + "epoch": 0.7547746437041851, + "grad_norm": 2.950418710708618, + "learning_rate": 9.453444731307676e-05, + "loss": 4.707, + "step": 175350 + }, + { + "epoch": 0.7549898631634678, + "grad_norm": 2.3484675884246826, + "learning_rate": 9.453137047173516e-05, + "loss": 4.7561, + "step": 175400 + }, + { + "epoch": 0.7552050826227504, + "grad_norm": 2.3283779621124268, + "learning_rate": 9.452829281468141e-05, + "loss": 4.9643, + "step": 175450 + }, + { + "epoch": 0.755420302082033, + "grad_norm": 3.1240406036376953, + "learning_rate": 9.452521434197186e-05, + "loss": 4.8177, + "step": 175500 + }, + { + "epoch": 0.7556355215413156, + "grad_norm": 2.433018207550049, + "learning_rate": 9.45221350536629e-05, + "loss": 4.8043, + "step": 175550 + }, + { + "epoch": 0.7558507410005983, + "grad_norm": 2.0088346004486084, + "learning_rate": 9.451905494981094e-05, + "loss": 4.7083, + "step": 175600 + }, + { + "epoch": 0.756065960459881, + "grad_norm": 3.186539649963379, + "learning_rate": 9.451597403047243e-05, + "loss": 4.5113, + "step": 175650 + }, + { + "epoch": 0.7562811799191635, + "grad_norm": 1.5048218965530396, + "learning_rate": 9.451289229570377e-05, + "loss": 4.9926, + "step": 175700 + }, + { + "epoch": 0.7564963993784461, + "grad_norm": 0.5613989233970642, + "learning_rate": 9.450980974556143e-05, + "loss": 4.954, + "step": 175750 + }, + { + "epoch": 0.7567116188377289, + "grad_norm": 2.896710157394409, + "learning_rate": 9.450672638010185e-05, + "loss": 4.8325, + "step": 175800 + }, + { + "epoch": 0.7569268382970115, + "grad_norm": 2.861781120300293, + "learning_rate": 9.450364219938154e-05, + "loss": 4.8169, + "step": 175850 + }, + { + "epoch": 0.7571420577562941, + "grad_norm": 0.2500028610229492, + "learning_rate": 9.450055720345698e-05, + "loss": 4.7284, + "step": 175900 + }, + { + "epoch": 0.7573572772155768, + "grad_norm": 2.3097877502441406, + "learning_rate": 9.449747139238467e-05, + "loss": 4.9083, + "step": 175950 + }, + { + "epoch": 0.7575724966748594, + "grad_norm": 1.0943201780319214, + "learning_rate": 9.449438476622116e-05, + "loss": 4.5453, + "step": 176000 + }, + { + "epoch": 0.757787716134142, + "grad_norm": 2.0373356342315674, + "learning_rate": 9.449129732502297e-05, + "loss": 4.2354, + "step": 176050 + }, + { + "epoch": 0.7580029355934246, + "grad_norm": 3.7276036739349365, + "learning_rate": 9.448820906884666e-05, + "loss": 5.0679, + "step": 176100 + }, + { + "epoch": 0.7582181550527073, + "grad_norm": 4.025689125061035, + "learning_rate": 9.44851199977488e-05, + "loss": 4.9396, + "step": 176150 + }, + { + "epoch": 0.7584333745119899, + "grad_norm": 3.202852964401245, + "learning_rate": 9.448203011178598e-05, + "loss": 5.1677, + "step": 176200 + }, + { + "epoch": 0.7586485939712725, + "grad_norm": 2.9532394409179688, + "learning_rate": 9.44789394110148e-05, + "loss": 4.9985, + "step": 176250 + }, + { + "epoch": 0.7588638134305551, + "grad_norm": 3.5950732231140137, + "learning_rate": 9.447584789549186e-05, + "loss": 4.5983, + "step": 176300 + }, + { + "epoch": 0.7590790328898378, + "grad_norm": 2.749112129211426, + "learning_rate": 9.447275556527381e-05, + "loss": 4.8564, + "step": 176350 + }, + { + "epoch": 0.7592942523491204, + "grad_norm": 3.6065826416015625, + "learning_rate": 9.446966242041729e-05, + "loss": 4.5788, + "step": 176400 + }, + { + "epoch": 0.759509471808403, + "grad_norm": 0.8486310839653015, + "learning_rate": 9.446656846097892e-05, + "loss": 4.2023, + "step": 176450 + }, + { + "epoch": 0.7597246912676857, + "grad_norm": 3.269174575805664, + "learning_rate": 9.446347368701544e-05, + "loss": 4.8914, + "step": 176500 + }, + { + "epoch": 0.7599399107269683, + "grad_norm": 3.1560678482055664, + "learning_rate": 9.446037809858349e-05, + "loss": 4.9867, + "step": 176550 + }, + { + "epoch": 0.7601551301862509, + "grad_norm": 1.743828535079956, + "learning_rate": 9.44572816957398e-05, + "loss": 4.8196, + "step": 176600 + }, + { + "epoch": 0.7603703496455335, + "grad_norm": 2.625025510787964, + "learning_rate": 9.445418447854106e-05, + "loss": 5.0897, + "step": 176650 + }, + { + "epoch": 0.7605855691048162, + "grad_norm": 2.5187628269195557, + "learning_rate": 9.445108644704404e-05, + "loss": 5.3843, + "step": 176700 + }, + { + "epoch": 0.7608007885640988, + "grad_norm": 2.872493028640747, + "learning_rate": 9.444798760130548e-05, + "loss": 4.5995, + "step": 176750 + }, + { + "epoch": 0.7610160080233814, + "grad_norm": 4.138384819030762, + "learning_rate": 9.444488794138212e-05, + "loss": 4.7974, + "step": 176800 + }, + { + "epoch": 0.761231227482664, + "grad_norm": 1.6806941032409668, + "learning_rate": 9.444178746733076e-05, + "loss": 5.1254, + "step": 176850 + }, + { + "epoch": 0.7614464469419467, + "grad_norm": 1.7933290004730225, + "learning_rate": 9.443868617920819e-05, + "loss": 4.9319, + "step": 176900 + }, + { + "epoch": 0.7616616664012293, + "grad_norm": 2.0298917293548584, + "learning_rate": 9.44355840770712e-05, + "loss": 4.9651, + "step": 176950 + }, + { + "epoch": 0.7618768858605119, + "grad_norm": 2.175424098968506, + "learning_rate": 9.443248116097666e-05, + "loss": 4.7369, + "step": 177000 + }, + { + "epoch": 0.7618768858605119, + "eval_loss": 5.231561183929443, + "eval_runtime": 35.0094, + "eval_samples_per_second": 18.281, + "eval_steps_per_second": 9.14, + "eval_tts_loss": 6.73691140936782, + "step": 177000 + }, + { + "epoch": 0.7620921053197945, + "grad_norm": 2.275515079498291, + "learning_rate": 9.442937743098135e-05, + "loss": 4.758, + "step": 177050 + }, + { + "epoch": 0.7623073247790773, + "grad_norm": 4.370133876800537, + "learning_rate": 9.442627288714217e-05, + "loss": 4.9505, + "step": 177100 + }, + { + "epoch": 0.7625225442383599, + "grad_norm": 2.64532208442688, + "learning_rate": 9.442316752951596e-05, + "loss": 4.9071, + "step": 177150 + }, + { + "epoch": 0.7627377636976425, + "grad_norm": 0.6991246938705444, + "learning_rate": 9.442006135815962e-05, + "loss": 4.9732, + "step": 177200 + }, + { + "epoch": 0.7629529831569252, + "grad_norm": 2.9217333793640137, + "learning_rate": 9.441695437313003e-05, + "loss": 4.5334, + "step": 177250 + }, + { + "epoch": 0.7631682026162078, + "grad_norm": 0.9961326718330383, + "learning_rate": 9.44138465744841e-05, + "loss": 4.4843, + "step": 177300 + }, + { + "epoch": 0.7633834220754904, + "grad_norm": 2.889305591583252, + "learning_rate": 9.441073796227881e-05, + "loss": 4.8682, + "step": 177350 + }, + { + "epoch": 0.763598641534773, + "grad_norm": 2.375128984451294, + "learning_rate": 9.440762853657104e-05, + "loss": 4.5887, + "step": 177400 + }, + { + "epoch": 0.7638138609940557, + "grad_norm": 0.9591078758239746, + "learning_rate": 9.440451829741776e-05, + "loss": 4.2595, + "step": 177450 + }, + { + "epoch": 0.7640290804533383, + "grad_norm": 0.693954348564148, + "learning_rate": 9.440140724487598e-05, + "loss": 4.3439, + "step": 177500 + }, + { + "epoch": 0.7642442999126209, + "grad_norm": 2.5575876235961914, + "learning_rate": 9.439829537900264e-05, + "loss": 4.399, + "step": 177550 + }, + { + "epoch": 0.7644595193719035, + "grad_norm": 0.6336132884025574, + "learning_rate": 9.439518269985476e-05, + "loss": 5.0331, + "step": 177600 + }, + { + "epoch": 0.7646747388311862, + "grad_norm": 2.0233304500579834, + "learning_rate": 9.439206920748939e-05, + "loss": 4.6664, + "step": 177650 + }, + { + "epoch": 0.7648899582904688, + "grad_norm": 3.119706630706787, + "learning_rate": 9.438895490196351e-05, + "loss": 4.866, + "step": 177700 + }, + { + "epoch": 0.7651051777497514, + "grad_norm": 1.5416491031646729, + "learning_rate": 9.438583978333418e-05, + "loss": 4.5692, + "step": 177750 + }, + { + "epoch": 0.765320397209034, + "grad_norm": 2.2814130783081055, + "learning_rate": 9.43827238516585e-05, + "loss": 4.6271, + "step": 177800 + }, + { + "epoch": 0.7655356166683167, + "grad_norm": 2.1556663513183594, + "learning_rate": 9.437960710699349e-05, + "loss": 4.9773, + "step": 177850 + }, + { + "epoch": 0.7657508361275993, + "grad_norm": 2.987297296524048, + "learning_rate": 9.437648954939628e-05, + "loss": 4.5444, + "step": 177900 + }, + { + "epoch": 0.7659660555868819, + "grad_norm": 1.8535926342010498, + "learning_rate": 9.437337117892395e-05, + "loss": 4.7619, + "step": 177950 + }, + { + "epoch": 0.7661812750461646, + "grad_norm": 2.799400568008423, + "learning_rate": 9.437025199563365e-05, + "loss": 4.6802, + "step": 178000 + }, + { + "epoch": 0.7663964945054472, + "grad_norm": 1.0847301483154297, + "learning_rate": 9.436713199958252e-05, + "loss": 4.4987, + "step": 178050 + }, + { + "epoch": 0.7666117139647298, + "grad_norm": 2.070974826812744, + "learning_rate": 9.436401119082768e-05, + "loss": 5.2055, + "step": 178100 + }, + { + "epoch": 0.7668269334240124, + "grad_norm": 3.505695104598999, + "learning_rate": 9.43608895694263e-05, + "loss": 4.8949, + "step": 178150 + }, + { + "epoch": 0.7670421528832951, + "grad_norm": 2.6085410118103027, + "learning_rate": 9.435776713543557e-05, + "loss": 4.7627, + "step": 178200 + }, + { + "epoch": 0.7672573723425777, + "grad_norm": 1.7210012674331665, + "learning_rate": 9.435464388891269e-05, + "loss": 4.574, + "step": 178250 + }, + { + "epoch": 0.7674725918018603, + "grad_norm": 3.111241579055786, + "learning_rate": 9.435151982991488e-05, + "loss": 4.5795, + "step": 178300 + }, + { + "epoch": 0.7676878112611429, + "grad_norm": 1.9401297569274902, + "learning_rate": 9.434839495849935e-05, + "loss": 4.3195, + "step": 178350 + }, + { + "epoch": 0.7679030307204257, + "grad_norm": 3.5041425228118896, + "learning_rate": 9.434526927472333e-05, + "loss": 5.0039, + "step": 178400 + }, + { + "epoch": 0.7681182501797083, + "grad_norm": 3.9648995399475098, + "learning_rate": 9.43421427786441e-05, + "loss": 4.5286, + "step": 178450 + }, + { + "epoch": 0.7683334696389909, + "grad_norm": 4.539130687713623, + "learning_rate": 9.433901547031892e-05, + "loss": 5.0371, + "step": 178500 + }, + { + "epoch": 0.7685486890982736, + "grad_norm": 2.5421700477600098, + "learning_rate": 9.433588734980507e-05, + "loss": 4.8886, + "step": 178550 + }, + { + "epoch": 0.7687639085575562, + "grad_norm": 0.866698682308197, + "learning_rate": 9.433275841715985e-05, + "loss": 4.895, + "step": 178600 + }, + { + "epoch": 0.7689791280168388, + "grad_norm": 2.3506195545196533, + "learning_rate": 9.43296286724406e-05, + "loss": 4.704, + "step": 178650 + }, + { + "epoch": 0.7691943474761214, + "grad_norm": 0.6726365685462952, + "learning_rate": 9.432649811570461e-05, + "loss": 4.8531, + "step": 178700 + }, + { + "epoch": 0.7694095669354041, + "grad_norm": 0.8619710206985474, + "learning_rate": 9.432336674700925e-05, + "loss": 4.6655, + "step": 178750 + }, + { + "epoch": 0.7696247863946867, + "grad_norm": 0.8872758746147156, + "learning_rate": 9.432023456641187e-05, + "loss": 4.4153, + "step": 178800 + }, + { + "epoch": 0.7698400058539693, + "grad_norm": 2.542766571044922, + "learning_rate": 9.431710157396984e-05, + "loss": 5.0589, + "step": 178850 + }, + { + "epoch": 0.7700552253132519, + "grad_norm": 3.03132700920105, + "learning_rate": 9.431396776974057e-05, + "loss": 4.7108, + "step": 178900 + }, + { + "epoch": 0.7702704447725346, + "grad_norm": 2.1209986209869385, + "learning_rate": 9.431083315378144e-05, + "loss": 4.3572, + "step": 178950 + }, + { + "epoch": 0.7704856642318172, + "grad_norm": 1.1983375549316406, + "learning_rate": 9.430769772614989e-05, + "loss": 5.0889, + "step": 179000 + }, + { + "epoch": 0.7707008836910998, + "grad_norm": 2.2669405937194824, + "learning_rate": 9.430456148690333e-05, + "loss": 4.821, + "step": 179050 + }, + { + "epoch": 0.7709161031503824, + "grad_norm": 1.8690121173858643, + "learning_rate": 9.430142443609924e-05, + "loss": 4.7157, + "step": 179100 + }, + { + "epoch": 0.7711313226096651, + "grad_norm": 6.672796249389648, + "learning_rate": 9.429828657379505e-05, + "loss": 4.889, + "step": 179150 + }, + { + "epoch": 0.7713465420689477, + "grad_norm": 2.0847949981689453, + "learning_rate": 9.429514790004825e-05, + "loss": 4.9609, + "step": 179200 + }, + { + "epoch": 0.7715617615282303, + "grad_norm": 1.871484398841858, + "learning_rate": 9.429200841491634e-05, + "loss": 4.5565, + "step": 179250 + }, + { + "epoch": 0.771776980987513, + "grad_norm": 2.2354896068573, + "learning_rate": 9.428886811845684e-05, + "loss": 5.2257, + "step": 179300 + }, + { + "epoch": 0.7719922004467956, + "grad_norm": 2.4401750564575195, + "learning_rate": 9.428572701072724e-05, + "loss": 4.5693, + "step": 179350 + }, + { + "epoch": 0.7722074199060782, + "grad_norm": 1.5792691707611084, + "learning_rate": 9.428258509178511e-05, + "loss": 5.0284, + "step": 179400 + }, + { + "epoch": 0.7724226393653608, + "grad_norm": 0.7564102411270142, + "learning_rate": 9.4279442361688e-05, + "loss": 4.2484, + "step": 179450 + }, + { + "epoch": 0.7726378588246435, + "grad_norm": 0.5406323671340942, + "learning_rate": 9.427629882049345e-05, + "loss": 4.833, + "step": 179500 + }, + { + "epoch": 0.7728530782839261, + "grad_norm": 0.7234613299369812, + "learning_rate": 9.427315446825905e-05, + "loss": 4.5154, + "step": 179550 + }, + { + "epoch": 0.7730682977432087, + "grad_norm": 0.7167948484420776, + "learning_rate": 9.427000930504242e-05, + "loss": 4.8937, + "step": 179600 + }, + { + "epoch": 0.7732835172024913, + "grad_norm": 4.674708366394043, + "learning_rate": 9.426686333090116e-05, + "loss": 4.5428, + "step": 179650 + }, + { + "epoch": 0.773498736661774, + "grad_norm": 0.8226141333580017, + "learning_rate": 9.426371654589288e-05, + "loss": 4.8923, + "step": 179700 + }, + { + "epoch": 0.7737139561210566, + "grad_norm": 2.5077080726623535, + "learning_rate": 9.426056895007524e-05, + "loss": 4.691, + "step": 179750 + }, + { + "epoch": 0.7739291755803392, + "grad_norm": 4.052765846252441, + "learning_rate": 9.42574205435059e-05, + "loss": 4.9935, + "step": 179800 + }, + { + "epoch": 0.774144395039622, + "grad_norm": 1.720339059829712, + "learning_rate": 9.425427132624254e-05, + "loss": 4.8388, + "step": 179850 + }, + { + "epoch": 0.7743596144989046, + "grad_norm": 2.8714425563812256, + "learning_rate": 9.425112129834282e-05, + "loss": 4.7716, + "step": 179900 + }, + { + "epoch": 0.7745748339581872, + "grad_norm": 1.689247488975525, + "learning_rate": 9.424797045986444e-05, + "loss": 4.7268, + "step": 179950 + }, + { + "epoch": 0.7747900534174698, + "grad_norm": 3.0910258293151855, + "learning_rate": 9.424481881086513e-05, + "loss": 4.7605, + "step": 180000 + }, + { + "epoch": 0.7747900534174698, + "eval_loss": 5.2312493324279785, + "eval_runtime": 35.1208, + "eval_samples_per_second": 18.223, + "eval_steps_per_second": 9.111, + "eval_tts_loss": 6.724886300389768, + "step": 180000 + }, + { + "epoch": 0.7750052728767525, + "grad_norm": 2.266731023788452, + "learning_rate": 9.424166635140264e-05, + "loss": 4.6758, + "step": 180050 + }, + { + "epoch": 0.7752204923360351, + "grad_norm": 2.786032199859619, + "learning_rate": 9.423851308153468e-05, + "loss": 4.5534, + "step": 180100 + }, + { + "epoch": 0.7754357117953177, + "grad_norm": 1.3109441995620728, + "learning_rate": 9.423535900131904e-05, + "loss": 4.5802, + "step": 180150 + }, + { + "epoch": 0.7756509312546003, + "grad_norm": 0.5433363914489746, + "learning_rate": 9.423220411081348e-05, + "loss": 4.6509, + "step": 180200 + }, + { + "epoch": 0.775866150713883, + "grad_norm": 2.1597352027893066, + "learning_rate": 9.422904841007579e-05, + "loss": 5.1107, + "step": 180250 + }, + { + "epoch": 0.7760813701731656, + "grad_norm": 2.57503342628479, + "learning_rate": 9.422589189916376e-05, + "loss": 4.2998, + "step": 180300 + }, + { + "epoch": 0.7762965896324482, + "grad_norm": 1.5627505779266357, + "learning_rate": 9.422273457813525e-05, + "loss": 4.512, + "step": 180350 + }, + { + "epoch": 0.7765118090917308, + "grad_norm": 2.117169141769409, + "learning_rate": 9.421957644704807e-05, + "loss": 5.1467, + "step": 180400 + }, + { + "epoch": 0.7767270285510135, + "grad_norm": 2.9874281883239746, + "learning_rate": 9.421641750596007e-05, + "loss": 4.9098, + "step": 180450 + }, + { + "epoch": 0.7769422480102961, + "grad_norm": 2.138292074203491, + "learning_rate": 9.421325775492911e-05, + "loss": 4.9589, + "step": 180500 + }, + { + "epoch": 0.7771574674695787, + "grad_norm": 2.650320053100586, + "learning_rate": 9.421009719401309e-05, + "loss": 4.4896, + "step": 180550 + }, + { + "epoch": 0.7773726869288614, + "grad_norm": 2.6690661907196045, + "learning_rate": 9.420693582326987e-05, + "loss": 5.037, + "step": 180600 + }, + { + "epoch": 0.777587906388144, + "grad_norm": 3.730393409729004, + "learning_rate": 9.420377364275739e-05, + "loss": 4.918, + "step": 180650 + }, + { + "epoch": 0.7778031258474266, + "grad_norm": 1.7289904356002808, + "learning_rate": 9.420061065253356e-05, + "loss": 4.8759, + "step": 180700 + }, + { + "epoch": 0.7780183453067092, + "grad_norm": 2.763723373413086, + "learning_rate": 9.419744685265632e-05, + "loss": 4.4854, + "step": 180750 + }, + { + "epoch": 0.7782335647659919, + "grad_norm": 0.7246621251106262, + "learning_rate": 9.419428224318363e-05, + "loss": 4.6949, + "step": 180800 + }, + { + "epoch": 0.7784487842252745, + "grad_norm": 5.601085662841797, + "learning_rate": 9.419111682417346e-05, + "loss": 4.5288, + "step": 180850 + }, + { + "epoch": 0.7786640036845571, + "grad_norm": 0.7194662094116211, + "learning_rate": 9.418795059568378e-05, + "loss": 4.5904, + "step": 180900 + }, + { + "epoch": 0.7788792231438397, + "grad_norm": 0.9236837029457092, + "learning_rate": 9.418478355777258e-05, + "loss": 4.5197, + "step": 180950 + }, + { + "epoch": 0.7790944426031224, + "grad_norm": 3.828740119934082, + "learning_rate": 9.418161571049791e-05, + "loss": 5.0409, + "step": 181000 + }, + { + "epoch": 0.779309662062405, + "grad_norm": 1.8483991622924805, + "learning_rate": 9.417844705391778e-05, + "loss": 4.9606, + "step": 181050 + }, + { + "epoch": 0.7795248815216876, + "grad_norm": 2.919088840484619, + "learning_rate": 9.417527758809021e-05, + "loss": 4.3193, + "step": 181100 + }, + { + "epoch": 0.7797401009809702, + "grad_norm": 2.381227970123291, + "learning_rate": 9.417210731307328e-05, + "loss": 4.8081, + "step": 181150 + }, + { + "epoch": 0.779955320440253, + "grad_norm": 1.9584165811538696, + "learning_rate": 9.416893622892506e-05, + "loss": 5.0029, + "step": 181200 + }, + { + "epoch": 0.7801705398995356, + "grad_norm": 2.7675938606262207, + "learning_rate": 9.416576433570363e-05, + "loss": 4.9354, + "step": 181250 + }, + { + "epoch": 0.7803857593588182, + "grad_norm": 3.3072919845581055, + "learning_rate": 9.41625916334671e-05, + "loss": 4.7699, + "step": 181300 + }, + { + "epoch": 0.7806009788181009, + "grad_norm": 0.9700275659561157, + "learning_rate": 9.415941812227358e-05, + "loss": 4.6066, + "step": 181350 + }, + { + "epoch": 0.7808161982773835, + "grad_norm": 1.280367136001587, + "learning_rate": 9.415624380218121e-05, + "loss": 4.5093, + "step": 181400 + }, + { + "epoch": 0.7810314177366661, + "grad_norm": 1.8365185260772705, + "learning_rate": 9.415306867324812e-05, + "loss": 5.0844, + "step": 181450 + }, + { + "epoch": 0.7812466371959487, + "grad_norm": 2.4391894340515137, + "learning_rate": 9.41498927355325e-05, + "loss": 5.2016, + "step": 181500 + }, + { + "epoch": 0.7814618566552314, + "grad_norm": 1.4426076412200928, + "learning_rate": 9.414671598909248e-05, + "loss": 4.6648, + "step": 181550 + }, + { + "epoch": 0.781677076114514, + "grad_norm": 1.9843686819076538, + "learning_rate": 9.414353843398632e-05, + "loss": 4.7363, + "step": 181600 + }, + { + "epoch": 0.7818922955737966, + "grad_norm": 2.807039260864258, + "learning_rate": 9.414036007027213e-05, + "loss": 4.8308, + "step": 181650 + }, + { + "epoch": 0.7821075150330792, + "grad_norm": 2.164083957672119, + "learning_rate": 9.413718089800823e-05, + "loss": 4.4073, + "step": 181700 + }, + { + "epoch": 0.7823227344923619, + "grad_norm": 1.9534205198287964, + "learning_rate": 9.413400091725277e-05, + "loss": 4.908, + "step": 181750 + }, + { + "epoch": 0.7825379539516445, + "grad_norm": 3.0719590187072754, + "learning_rate": 9.413082012806406e-05, + "loss": 4.8349, + "step": 181800 + }, + { + "epoch": 0.7827531734109271, + "grad_norm": 0.9804259538650513, + "learning_rate": 9.412763853050035e-05, + "loss": 4.5563, + "step": 181850 + }, + { + "epoch": 0.7829683928702098, + "grad_norm": 3.1104907989501953, + "learning_rate": 9.412445612461991e-05, + "loss": 4.4279, + "step": 181900 + }, + { + "epoch": 0.7831836123294924, + "grad_norm": 2.300685405731201, + "learning_rate": 9.412127291048103e-05, + "loss": 4.4337, + "step": 181950 + }, + { + "epoch": 0.783398831788775, + "grad_norm": 3.7139933109283447, + "learning_rate": 9.411808888814201e-05, + "loss": 4.116, + "step": 182000 + }, + { + "epoch": 0.7836140512480576, + "grad_norm": 2.3068947792053223, + "learning_rate": 9.411490405766123e-05, + "loss": 4.6394, + "step": 182050 + }, + { + "epoch": 0.7838292707073403, + "grad_norm": 2.780261278152466, + "learning_rate": 9.411171841909697e-05, + "loss": 4.4223, + "step": 182100 + }, + { + "epoch": 0.7840444901666229, + "grad_norm": 2.5353000164031982, + "learning_rate": 9.410853197250759e-05, + "loss": 5.1759, + "step": 182150 + }, + { + "epoch": 0.7842597096259055, + "grad_norm": 2.8179819583892822, + "learning_rate": 9.41053447179515e-05, + "loss": 4.6936, + "step": 182200 + }, + { + "epoch": 0.7844749290851881, + "grad_norm": 2.108032464981079, + "learning_rate": 9.410215665548703e-05, + "loss": 5.0307, + "step": 182250 + }, + { + "epoch": 0.7846901485444708, + "grad_norm": 2.1533315181732178, + "learning_rate": 9.40989677851726e-05, + "loss": 4.8848, + "step": 182300 + }, + { + "epoch": 0.7849053680037534, + "grad_norm": 2.8021814823150635, + "learning_rate": 9.409577810706664e-05, + "loss": 4.3461, + "step": 182350 + }, + { + "epoch": 0.785120587463036, + "grad_norm": 2.612481117248535, + "learning_rate": 9.409258762122756e-05, + "loss": 4.6458, + "step": 182400 + }, + { + "epoch": 0.7853358069223186, + "grad_norm": 5.804429531097412, + "learning_rate": 9.408939632771381e-05, + "loss": 4.2882, + "step": 182450 + }, + { + "epoch": 0.7855510263816013, + "grad_norm": 2.048861503601074, + "learning_rate": 9.408620422658383e-05, + "loss": 4.9885, + "step": 182500 + }, + { + "epoch": 0.785766245840884, + "grad_norm": 2.8961360454559326, + "learning_rate": 9.40830113178961e-05, + "loss": 5.1039, + "step": 182550 + }, + { + "epoch": 0.7859814653001665, + "grad_norm": 2.5409646034240723, + "learning_rate": 9.407981760170912e-05, + "loss": 4.9706, + "step": 182600 + }, + { + "epoch": 0.7861966847594493, + "grad_norm": 0.5485794544219971, + "learning_rate": 9.407662307808138e-05, + "loss": 4.9108, + "step": 182650 + }, + { + "epoch": 0.7864119042187319, + "grad_norm": 2.4829444885253906, + "learning_rate": 9.407342774707141e-05, + "loss": 4.7105, + "step": 182700 + }, + { + "epoch": 0.7866271236780145, + "grad_norm": 1.4634356498718262, + "learning_rate": 9.407023160873771e-05, + "loss": 4.81, + "step": 182750 + }, + { + "epoch": 0.7868423431372971, + "grad_norm": 2.5417027473449707, + "learning_rate": 9.406703466313886e-05, + "loss": 4.7888, + "step": 182800 + }, + { + "epoch": 0.7870575625965798, + "grad_norm": 2.121460199356079, + "learning_rate": 9.40638369103334e-05, + "loss": 4.5221, + "step": 182850 + }, + { + "epoch": 0.7872727820558624, + "grad_norm": 2.6695022583007812, + "learning_rate": 9.406063835037992e-05, + "loss": 4.3507, + "step": 182900 + }, + { + "epoch": 0.787488001515145, + "grad_norm": 2.361351251602173, + "learning_rate": 9.4057438983337e-05, + "loss": 4.4811, + "step": 182950 + }, + { + "epoch": 0.7877032209744276, + "grad_norm": 5.025791168212891, + "learning_rate": 9.405423880926324e-05, + "loss": 4.6245, + "step": 183000 + }, + { + "epoch": 0.7877032209744276, + "eval_loss": 5.228704929351807, + "eval_runtime": 35.0141, + "eval_samples_per_second": 18.278, + "eval_steps_per_second": 9.139, + "eval_tts_loss": 6.74354209298225, + "step": 183000 + }, + { + "epoch": 0.7879184404337103, + "grad_norm": 2.2965877056121826, + "learning_rate": 9.405103782821727e-05, + "loss": 5.0211, + "step": 183050 + }, + { + "epoch": 0.7881336598929929, + "grad_norm": 1.8105207681655884, + "learning_rate": 9.404783604025772e-05, + "loss": 4.7765, + "step": 183100 + }, + { + "epoch": 0.7883488793522755, + "grad_norm": 2.1758463382720947, + "learning_rate": 9.404463344544326e-05, + "loss": 4.7205, + "step": 183150 + }, + { + "epoch": 0.7885640988115582, + "grad_norm": 3.3909318447113037, + "learning_rate": 9.40414300438325e-05, + "loss": 4.5109, + "step": 183200 + }, + { + "epoch": 0.7887793182708408, + "grad_norm": 2.025642156600952, + "learning_rate": 9.403822583548417e-05, + "loss": 4.8275, + "step": 183250 + }, + { + "epoch": 0.7889945377301234, + "grad_norm": 3.1710994243621826, + "learning_rate": 9.403502082045695e-05, + "loss": 4.8854, + "step": 183300 + }, + { + "epoch": 0.789209757189406, + "grad_norm": 2.2191197872161865, + "learning_rate": 9.403181499880955e-05, + "loss": 5.0559, + "step": 183350 + }, + { + "epoch": 0.7894249766486887, + "grad_norm": 3.041351079940796, + "learning_rate": 9.402860837060068e-05, + "loss": 4.898, + "step": 183400 + }, + { + "epoch": 0.7896401961079713, + "grad_norm": 2.36780047416687, + "learning_rate": 9.40254009358891e-05, + "loss": 4.7093, + "step": 183450 + }, + { + "epoch": 0.7898554155672539, + "grad_norm": 1.90168297290802, + "learning_rate": 9.402219269473354e-05, + "loss": 4.9518, + "step": 183500 + }, + { + "epoch": 0.7900706350265365, + "grad_norm": 0.6865656971931458, + "learning_rate": 9.40189836471928e-05, + "loss": 4.8536, + "step": 183550 + }, + { + "epoch": 0.7902858544858192, + "grad_norm": 1.8951488733291626, + "learning_rate": 9.40157737933256e-05, + "loss": 4.5627, + "step": 183600 + }, + { + "epoch": 0.7905010739451018, + "grad_norm": 2.408231258392334, + "learning_rate": 9.401256313319082e-05, + "loss": 5.1633, + "step": 183650 + }, + { + "epoch": 0.7907162934043844, + "grad_norm": 3.1720314025878906, + "learning_rate": 9.40093516668472e-05, + "loss": 4.7859, + "step": 183700 + }, + { + "epoch": 0.790931512863667, + "grad_norm": 3.122274398803711, + "learning_rate": 9.400613939435361e-05, + "loss": 4.2895, + "step": 183750 + }, + { + "epoch": 0.7911467323229497, + "grad_norm": 1.6511573791503906, + "learning_rate": 9.400292631576885e-05, + "loss": 4.55, + "step": 183800 + }, + { + "epoch": 0.7913619517822323, + "grad_norm": 1.7661155462265015, + "learning_rate": 9.399971243115184e-05, + "loss": 4.7581, + "step": 183850 + }, + { + "epoch": 0.7915771712415149, + "grad_norm": 2.338334321975708, + "learning_rate": 9.399649774056141e-05, + "loss": 4.6847, + "step": 183900 + }, + { + "epoch": 0.7917923907007977, + "grad_norm": 1.4181978702545166, + "learning_rate": 9.399328224405643e-05, + "loss": 4.6668, + "step": 183950 + }, + { + "epoch": 0.7920076101600803, + "grad_norm": 3.201955556869507, + "learning_rate": 9.399006594169584e-05, + "loss": 4.8995, + "step": 184000 + }, + { + "epoch": 0.7922228296193629, + "grad_norm": 1.2524312734603882, + "learning_rate": 9.398684883353852e-05, + "loss": 4.5244, + "step": 184050 + }, + { + "epoch": 0.7924380490786455, + "grad_norm": 2.3951380252838135, + "learning_rate": 9.398363091964343e-05, + "loss": 5.2328, + "step": 184100 + }, + { + "epoch": 0.7926532685379282, + "grad_norm": 2.4287376403808594, + "learning_rate": 9.39804122000695e-05, + "loss": 4.5155, + "step": 184150 + }, + { + "epoch": 0.7928684879972108, + "grad_norm": 0.7837318181991577, + "learning_rate": 9.397719267487569e-05, + "loss": 4.4488, + "step": 184200 + }, + { + "epoch": 0.7930837074564934, + "grad_norm": 1.325567364692688, + "learning_rate": 9.397397234412095e-05, + "loss": 4.5658, + "step": 184250 + }, + { + "epoch": 0.793298926915776, + "grad_norm": 3.045278787612915, + "learning_rate": 9.397075120786432e-05, + "loss": 4.5658, + "step": 184300 + }, + { + "epoch": 0.7935141463750587, + "grad_norm": 2.787545680999756, + "learning_rate": 9.396752926616478e-05, + "loss": 4.6324, + "step": 184350 + }, + { + "epoch": 0.7937293658343413, + "grad_norm": 1.637205958366394, + "learning_rate": 9.396430651908134e-05, + "loss": 4.8265, + "step": 184400 + }, + { + "epoch": 0.7939445852936239, + "grad_norm": 3.189713478088379, + "learning_rate": 9.396108296667303e-05, + "loss": 4.6836, + "step": 184450 + }, + { + "epoch": 0.7941598047529065, + "grad_norm": 2.0942819118499756, + "learning_rate": 9.395785860899892e-05, + "loss": 4.9004, + "step": 184500 + }, + { + "epoch": 0.7943750242121892, + "grad_norm": 2.0170814990997314, + "learning_rate": 9.395463344611806e-05, + "loss": 4.64, + "step": 184550 + }, + { + "epoch": 0.7945902436714718, + "grad_norm": 3.134268045425415, + "learning_rate": 9.395140747808951e-05, + "loss": 4.7231, + "step": 184600 + }, + { + "epoch": 0.7948054631307544, + "grad_norm": 0.6823921799659729, + "learning_rate": 9.39481807049724e-05, + "loss": 4.6559, + "step": 184650 + }, + { + "epoch": 0.7950206825900371, + "grad_norm": 3.1626925468444824, + "learning_rate": 9.39449531268258e-05, + "loss": 4.7653, + "step": 184700 + }, + { + "epoch": 0.7952359020493197, + "grad_norm": 4.726264476776123, + "learning_rate": 9.394172474370885e-05, + "loss": 4.5384, + "step": 184750 + }, + { + "epoch": 0.7954511215086023, + "grad_norm": 2.0807137489318848, + "learning_rate": 9.39384955556807e-05, + "loss": 5.2027, + "step": 184800 + }, + { + "epoch": 0.7956663409678849, + "grad_norm": 0.7623159289360046, + "learning_rate": 9.39352655628005e-05, + "loss": 4.576, + "step": 184850 + }, + { + "epoch": 0.7958815604271676, + "grad_norm": 1.0977578163146973, + "learning_rate": 9.393203476512738e-05, + "loss": 4.7793, + "step": 184900 + }, + { + "epoch": 0.7960967798864502, + "grad_norm": 1.2451286315917969, + "learning_rate": 9.392880316272055e-05, + "loss": 5.1058, + "step": 184950 + }, + { + "epoch": 0.7963119993457328, + "grad_norm": 1.6111650466918945, + "learning_rate": 9.39255707556392e-05, + "loss": 4.722, + "step": 185000 + }, + { + "epoch": 0.7965272188050154, + "grad_norm": 2.7530367374420166, + "learning_rate": 9.392233754394253e-05, + "loss": 4.3697, + "step": 185050 + }, + { + "epoch": 0.7967424382642981, + "grad_norm": 1.1732368469238281, + "learning_rate": 9.391910352768978e-05, + "loss": 4.6401, + "step": 185100 + }, + { + "epoch": 0.7969576577235807, + "grad_norm": 1.120763897895813, + "learning_rate": 9.391586870694019e-05, + "loss": 5.046, + "step": 185150 + }, + { + "epoch": 0.7971728771828633, + "grad_norm": 3.3629331588745117, + "learning_rate": 9.3912633081753e-05, + "loss": 4.5094, + "step": 185200 + }, + { + "epoch": 0.797388096642146, + "grad_norm": 3.2171289920806885, + "learning_rate": 9.39093966521875e-05, + "loss": 4.5134, + "step": 185250 + }, + { + "epoch": 0.7976033161014286, + "grad_norm": 2.1875147819519043, + "learning_rate": 9.390615941830295e-05, + "loss": 4.6554, + "step": 185300 + }, + { + "epoch": 0.7978185355607113, + "grad_norm": 0.5096136927604675, + "learning_rate": 9.390292138015866e-05, + "loss": 4.5686, + "step": 185350 + }, + { + "epoch": 0.7980337550199939, + "grad_norm": 2.928222894668579, + "learning_rate": 9.389968253781395e-05, + "loss": 4.9628, + "step": 185400 + }, + { + "epoch": 0.7982489744792766, + "grad_norm": 1.8802573680877686, + "learning_rate": 9.389644289132814e-05, + "loss": 4.6255, + "step": 185450 + }, + { + "epoch": 0.7984641939385592, + "grad_norm": 2.841101884841919, + "learning_rate": 9.389320244076057e-05, + "loss": 4.6458, + "step": 185500 + }, + { + "epoch": 0.7986794133978418, + "grad_norm": 4.211334228515625, + "learning_rate": 9.388996118617062e-05, + "loss": 4.9039, + "step": 185550 + }, + { + "epoch": 0.7988946328571244, + "grad_norm": 2.6955807209014893, + "learning_rate": 9.388671912761762e-05, + "loss": 4.5231, + "step": 185600 + }, + { + "epoch": 0.7991098523164071, + "grad_norm": 2.592399835586548, + "learning_rate": 9.3883476265161e-05, + "loss": 4.5701, + "step": 185650 + }, + { + "epoch": 0.7993250717756897, + "grad_norm": 3.473428249359131, + "learning_rate": 9.388023259886014e-05, + "loss": 4.3279, + "step": 185700 + }, + { + "epoch": 0.7995402912349723, + "grad_norm": 1.1913548707962036, + "learning_rate": 9.387698812877446e-05, + "loss": 3.7534, + "step": 185750 + }, + { + "epoch": 0.7997555106942549, + "grad_norm": 2.3190958499908447, + "learning_rate": 9.387374285496337e-05, + "loss": 4.196, + "step": 185800 + }, + { + "epoch": 0.7999707301535376, + "grad_norm": 4.5819501876831055, + "learning_rate": 9.387049677748636e-05, + "loss": 4.4271, + "step": 185850 + }, + { + "epoch": 0.8001859496128202, + "grad_norm": 2.171680212020874, + "learning_rate": 9.386724989640287e-05, + "loss": 4.5474, + "step": 185900 + }, + { + "epoch": 0.8004011690721028, + "grad_norm": 2.553633451461792, + "learning_rate": 9.386400221177236e-05, + "loss": 4.9086, + "step": 185950 + }, + { + "epoch": 0.8006163885313855, + "grad_norm": 1.9111876487731934, + "learning_rate": 9.386075372365434e-05, + "loss": 5.0144, + "step": 186000 + }, + { + "epoch": 0.8006163885313855, + "eval_loss": 5.224416255950928, + "eval_runtime": 35.0821, + "eval_samples_per_second": 18.243, + "eval_steps_per_second": 9.121, + "eval_tts_loss": 6.690784172783986, + "step": 186000 + }, + { + "epoch": 0.8008316079906681, + "grad_norm": 2.340367317199707, + "learning_rate": 9.385750443210833e-05, + "loss": 4.866, + "step": 186050 + }, + { + "epoch": 0.8010468274499507, + "grad_norm": 1.5970919132232666, + "learning_rate": 9.385425433719379e-05, + "loss": 4.6275, + "step": 186100 + }, + { + "epoch": 0.8012620469092333, + "grad_norm": 1.3948001861572266, + "learning_rate": 9.38510034389703e-05, + "loss": 4.4163, + "step": 186150 + }, + { + "epoch": 0.801477266368516, + "grad_norm": 3.1360490322113037, + "learning_rate": 9.384775173749743e-05, + "loss": 4.714, + "step": 186200 + }, + { + "epoch": 0.8016924858277986, + "grad_norm": 3.1537868976593018, + "learning_rate": 9.384449923283468e-05, + "loss": 4.795, + "step": 186250 + }, + { + "epoch": 0.8019077052870812, + "grad_norm": 5.263357162475586, + "learning_rate": 9.384124592504168e-05, + "loss": 4.4258, + "step": 186300 + }, + { + "epoch": 0.8021229247463638, + "grad_norm": 1.642029047012329, + "learning_rate": 9.3837991814178e-05, + "loss": 5.0155, + "step": 186350 + }, + { + "epoch": 0.8023381442056465, + "grad_norm": 2.37801456451416, + "learning_rate": 9.383473690030327e-05, + "loss": 4.6962, + "step": 186400 + }, + { + "epoch": 0.8025533636649291, + "grad_norm": 1.0524505376815796, + "learning_rate": 9.383148118347706e-05, + "loss": 4.9396, + "step": 186450 + }, + { + "epoch": 0.8027685831242117, + "grad_norm": 1.9480938911437988, + "learning_rate": 9.382822466375908e-05, + "loss": 4.819, + "step": 186500 + }, + { + "epoch": 0.8029838025834944, + "grad_norm": 3.7513175010681152, + "learning_rate": 9.382496734120894e-05, + "loss": 5.1958, + "step": 186550 + }, + { + "epoch": 0.803199022042777, + "grad_norm": 2.979698419570923, + "learning_rate": 9.38217092158863e-05, + "loss": 4.8239, + "step": 186600 + }, + { + "epoch": 0.8034142415020596, + "grad_norm": 2.1691033840179443, + "learning_rate": 9.381845028785086e-05, + "loss": 4.5849, + "step": 186650 + }, + { + "epoch": 0.8036294609613422, + "grad_norm": 2.3529889583587646, + "learning_rate": 9.38151905571623e-05, + "loss": 4.6915, + "step": 186700 + }, + { + "epoch": 0.803844680420625, + "grad_norm": 2.598629951477051, + "learning_rate": 9.381193002388035e-05, + "loss": 4.7001, + "step": 186750 + }, + { + "epoch": 0.8040598998799076, + "grad_norm": 3.2998342514038086, + "learning_rate": 9.380866868806473e-05, + "loss": 4.975, + "step": 186800 + }, + { + "epoch": 0.8042751193391902, + "grad_norm": 3.121752977371216, + "learning_rate": 9.380540654977517e-05, + "loss": 4.7871, + "step": 186850 + }, + { + "epoch": 0.8044903387984728, + "grad_norm": 0.4186832904815674, + "learning_rate": 9.380214360907143e-05, + "loss": 4.5354, + "step": 186900 + }, + { + "epoch": 0.8047055582577555, + "grad_norm": 1.9494072198867798, + "learning_rate": 9.379887986601327e-05, + "loss": 4.7524, + "step": 186950 + }, + { + "epoch": 0.8049207777170381, + "grad_norm": 2.9033243656158447, + "learning_rate": 9.379561532066052e-05, + "loss": 4.7669, + "step": 187000 + }, + { + "epoch": 0.8051359971763207, + "grad_norm": 3.342738151550293, + "learning_rate": 9.379234997307291e-05, + "loss": 4.7997, + "step": 187050 + }, + { + "epoch": 0.8053512166356033, + "grad_norm": 2.352165460586548, + "learning_rate": 9.37890838233103e-05, + "loss": 4.8739, + "step": 187100 + }, + { + "epoch": 0.805566436094886, + "grad_norm": 2.0931475162506104, + "learning_rate": 9.378581687143251e-05, + "loss": 4.7506, + "step": 187150 + }, + { + "epoch": 0.8057816555541686, + "grad_norm": 0.7547945380210876, + "learning_rate": 9.378254911749937e-05, + "loss": 4.7572, + "step": 187200 + }, + { + "epoch": 0.8059968750134512, + "grad_norm": 2.220317840576172, + "learning_rate": 9.377928056157075e-05, + "loss": 5.21, + "step": 187250 + }, + { + "epoch": 0.8062120944727339, + "grad_norm": 2.047247886657715, + "learning_rate": 9.377601120370652e-05, + "loss": 4.6132, + "step": 187300 + }, + { + "epoch": 0.8064273139320165, + "grad_norm": 4.531040191650391, + "learning_rate": 9.377274104396656e-05, + "loss": 5.0571, + "step": 187350 + }, + { + "epoch": 0.8066425333912991, + "grad_norm": 1.9355950355529785, + "learning_rate": 9.37694700824108e-05, + "loss": 4.9246, + "step": 187400 + }, + { + "epoch": 0.8068577528505817, + "grad_norm": 2.3034894466400146, + "learning_rate": 9.376619831909911e-05, + "loss": 5.0452, + "step": 187450 + }, + { + "epoch": 0.8070729723098644, + "grad_norm": 1.4248008728027344, + "learning_rate": 9.376292575409144e-05, + "loss": 4.7408, + "step": 187500 + }, + { + "epoch": 0.807288191769147, + "grad_norm": 1.2338848114013672, + "learning_rate": 9.375965238744776e-05, + "loss": 4.9173, + "step": 187550 + }, + { + "epoch": 0.8075034112284296, + "grad_norm": 1.4876084327697754, + "learning_rate": 9.3756378219228e-05, + "loss": 5.0233, + "step": 187600 + }, + { + "epoch": 0.8077186306877122, + "grad_norm": 3.505035638809204, + "learning_rate": 9.375310324949216e-05, + "loss": 5.0934, + "step": 187650 + }, + { + "epoch": 0.8079338501469949, + "grad_norm": 3.329453706741333, + "learning_rate": 9.374982747830021e-05, + "loss": 4.9139, + "step": 187700 + }, + { + "epoch": 0.8081490696062775, + "grad_norm": 2.6589181423187256, + "learning_rate": 9.374655090571216e-05, + "loss": 4.1205, + "step": 187750 + }, + { + "epoch": 0.8083642890655601, + "grad_norm": 0.703048586845398, + "learning_rate": 9.374327353178803e-05, + "loss": 4.7005, + "step": 187800 + }, + { + "epoch": 0.8085795085248427, + "grad_norm": 2.0758988857269287, + "learning_rate": 9.373999535658786e-05, + "loss": 4.7813, + "step": 187850 + }, + { + "epoch": 0.8087947279841254, + "grad_norm": 2.306945323944092, + "learning_rate": 9.373671638017168e-05, + "loss": 4.9655, + "step": 187900 + }, + { + "epoch": 0.809009947443408, + "grad_norm": 1.9991326332092285, + "learning_rate": 9.373343660259959e-05, + "loss": 4.9369, + "step": 187950 + }, + { + "epoch": 0.8092251669026906, + "grad_norm": 3.5134224891662598, + "learning_rate": 9.373015602393162e-05, + "loss": 4.5155, + "step": 188000 + }, + { + "epoch": 0.8094403863619734, + "grad_norm": 2.6724693775177, + "learning_rate": 9.37268746442279e-05, + "loss": 4.7283, + "step": 188050 + }, + { + "epoch": 0.809655605821256, + "grad_norm": 5.14189338684082, + "learning_rate": 9.372359246354853e-05, + "loss": 5.0507, + "step": 188100 + }, + { + "epoch": 0.8098708252805386, + "grad_norm": 2.387340545654297, + "learning_rate": 9.372030948195362e-05, + "loss": 4.8356, + "step": 188150 + }, + { + "epoch": 0.8100860447398212, + "grad_norm": 2.702594757080078, + "learning_rate": 9.371702569950331e-05, + "loss": 4.7091, + "step": 188200 + }, + { + "epoch": 0.8103012641991039, + "grad_norm": 1.7402033805847168, + "learning_rate": 9.371374111625776e-05, + "loss": 4.8854, + "step": 188250 + }, + { + "epoch": 0.8105164836583865, + "grad_norm": 3.7855911254882812, + "learning_rate": 9.371045573227712e-05, + "loss": 4.5652, + "step": 188300 + }, + { + "epoch": 0.8107317031176691, + "grad_norm": 1.73478102684021, + "learning_rate": 9.37071695476216e-05, + "loss": 4.8377, + "step": 188350 + }, + { + "epoch": 0.8109469225769517, + "grad_norm": 1.1231098175048828, + "learning_rate": 9.370388256235136e-05, + "loss": 5.0445, + "step": 188400 + }, + { + "epoch": 0.8111621420362344, + "grad_norm": 1.2219997644424438, + "learning_rate": 9.370059477652664e-05, + "loss": 4.6097, + "step": 188450 + }, + { + "epoch": 0.811377361495517, + "grad_norm": 3.9354968070983887, + "learning_rate": 9.369730619020765e-05, + "loss": 4.5924, + "step": 188500 + }, + { + "epoch": 0.8115925809547996, + "grad_norm": 1.350845456123352, + "learning_rate": 9.369401680345462e-05, + "loss": 5.0859, + "step": 188550 + }, + { + "epoch": 0.8118078004140823, + "grad_norm": 1.9544392824172974, + "learning_rate": 9.369072661632782e-05, + "loss": 4.9968, + "step": 188600 + }, + { + "epoch": 0.8120230198733649, + "grad_norm": 2.7270331382751465, + "learning_rate": 9.368743562888751e-05, + "loss": 4.8743, + "step": 188650 + }, + { + "epoch": 0.8122382393326475, + "grad_norm": 2.7398324012756348, + "learning_rate": 9.368414384119398e-05, + "loss": 4.8799, + "step": 188700 + }, + { + "epoch": 0.8124534587919301, + "grad_norm": 0.8594245314598083, + "learning_rate": 9.368085125330752e-05, + "loss": 4.8656, + "step": 188750 + }, + { + "epoch": 0.8126686782512128, + "grad_norm": 5.254597187042236, + "learning_rate": 9.367755786528847e-05, + "loss": 4.6961, + "step": 188800 + }, + { + "epoch": 0.8128838977104954, + "grad_norm": 0.9343959093093872, + "learning_rate": 9.36742636771971e-05, + "loss": 4.9579, + "step": 188850 + }, + { + "epoch": 0.813099117169778, + "grad_norm": 2.713838815689087, + "learning_rate": 9.367096868909381e-05, + "loss": 5.0055, + "step": 188900 + }, + { + "epoch": 0.8133143366290606, + "grad_norm": 2.832930088043213, + "learning_rate": 9.366767290103892e-05, + "loss": 4.54, + "step": 188950 + }, + { + "epoch": 0.8135295560883433, + "grad_norm": 4.801424980163574, + "learning_rate": 9.366437631309282e-05, + "loss": 4.9456, + "step": 189000 + }, + { + "epoch": 0.8135295560883433, + "eval_loss": 5.20615291595459, + "eval_runtime": 35.0005, + "eval_samples_per_second": 18.285, + "eval_steps_per_second": 9.143, + "eval_tts_loss": 6.768362013128695, + "step": 189000 + }, + { + "epoch": 0.8137447755476259, + "grad_norm": 5.412750244140625, + "learning_rate": 9.366107892531589e-05, + "loss": 4.8041, + "step": 189050 + }, + { + "epoch": 0.8139599950069085, + "grad_norm": 1.9363280534744263, + "learning_rate": 9.365778073776854e-05, + "loss": 4.9387, + "step": 189100 + }, + { + "epoch": 0.8141752144661911, + "grad_norm": 2.350440263748169, + "learning_rate": 9.365448175051117e-05, + "loss": 4.7638, + "step": 189150 + }, + { + "epoch": 0.8143904339254738, + "grad_norm": 2.8119924068450928, + "learning_rate": 9.36511819636042e-05, + "loss": 4.518, + "step": 189200 + }, + { + "epoch": 0.8146056533847564, + "grad_norm": 3.0520548820495605, + "learning_rate": 9.36478813771081e-05, + "loss": 4.7801, + "step": 189250 + }, + { + "epoch": 0.814820872844039, + "grad_norm": 2.35870361328125, + "learning_rate": 9.364457999108333e-05, + "loss": 4.7009, + "step": 189300 + }, + { + "epoch": 0.8150360923033217, + "grad_norm": 3.8503732681274414, + "learning_rate": 9.364127780559035e-05, + "loss": 4.7832, + "step": 189350 + }, + { + "epoch": 0.8152513117626043, + "grad_norm": 4.059573650360107, + "learning_rate": 9.363797482068964e-05, + "loss": 4.4811, + "step": 189400 + }, + { + "epoch": 0.815466531221887, + "grad_norm": 2.5103812217712402, + "learning_rate": 9.363467103644172e-05, + "loss": 4.6708, + "step": 189450 + }, + { + "epoch": 0.8156817506811695, + "grad_norm": 0.7802695631980896, + "learning_rate": 9.363136645290711e-05, + "loss": 4.8225, + "step": 189500 + }, + { + "epoch": 0.8158969701404523, + "grad_norm": 1.9004136323928833, + "learning_rate": 9.362806107014633e-05, + "loss": 4.6159, + "step": 189550 + }, + { + "epoch": 0.8161121895997349, + "grad_norm": 1.218076467514038, + "learning_rate": 9.362475488821992e-05, + "loss": 5.2615, + "step": 189600 + }, + { + "epoch": 0.8163274090590175, + "grad_norm": 1.4515451192855835, + "learning_rate": 9.362144790718847e-05, + "loss": 5.4075, + "step": 189650 + }, + { + "epoch": 0.8165426285183001, + "grad_norm": 1.948971152305603, + "learning_rate": 9.361814012711255e-05, + "loss": 5.1082, + "step": 189700 + }, + { + "epoch": 0.8167578479775828, + "grad_norm": 3.245199203491211, + "learning_rate": 9.361483154805271e-05, + "loss": 4.9219, + "step": 189750 + }, + { + "epoch": 0.8169730674368654, + "grad_norm": 1.9209944009780884, + "learning_rate": 9.36115221700696e-05, + "loss": 4.6561, + "step": 189800 + }, + { + "epoch": 0.817188286896148, + "grad_norm": 2.9945619106292725, + "learning_rate": 9.360821199322383e-05, + "loss": 4.2463, + "step": 189850 + }, + { + "epoch": 0.8174035063554307, + "grad_norm": 0.8662261962890625, + "learning_rate": 9.360490101757601e-05, + "loss": 4.4626, + "step": 189900 + }, + { + "epoch": 0.8176187258147133, + "grad_norm": 2.765700340270996, + "learning_rate": 9.360158924318685e-05, + "loss": 4.989, + "step": 189950 + }, + { + "epoch": 0.8178339452739959, + "grad_norm": 3.3067147731781006, + "learning_rate": 9.359827667011696e-05, + "loss": 4.9527, + "step": 190000 + }, + { + "epoch": 0.8180491647332785, + "grad_norm": 2.738037347793579, + "learning_rate": 9.359496329842701e-05, + "loss": 4.568, + "step": 190050 + }, + { + "epoch": 0.8182643841925612, + "grad_norm": 2.291219472885132, + "learning_rate": 9.359164912817774e-05, + "loss": 4.7359, + "step": 190100 + }, + { + "epoch": 0.8184796036518438, + "grad_norm": 2.3538007736206055, + "learning_rate": 9.358833415942982e-05, + "loss": 4.7037, + "step": 190150 + }, + { + "epoch": 0.8186948231111264, + "grad_norm": 1.2394182682037354, + "learning_rate": 9.3585018392244e-05, + "loss": 5.0512, + "step": 190200 + }, + { + "epoch": 0.818910042570409, + "grad_norm": 2.541017770767212, + "learning_rate": 9.3581701826681e-05, + "loss": 4.8916, + "step": 190250 + }, + { + "epoch": 0.8191252620296917, + "grad_norm": 0.7030532956123352, + "learning_rate": 9.357838446280158e-05, + "loss": 4.6635, + "step": 190300 + }, + { + "epoch": 0.8193404814889743, + "grad_norm": 1.8733288049697876, + "learning_rate": 9.35750663006665e-05, + "loss": 4.5083, + "step": 190350 + }, + { + "epoch": 0.8195557009482569, + "grad_norm": 2.3789172172546387, + "learning_rate": 9.357174734033655e-05, + "loss": 5.0579, + "step": 190400 + }, + { + "epoch": 0.8197709204075395, + "grad_norm": 2.020404577255249, + "learning_rate": 9.35684275818725e-05, + "loss": 5.4097, + "step": 190450 + }, + { + "epoch": 0.8199861398668222, + "grad_norm": 2.215183973312378, + "learning_rate": 9.356510702533521e-05, + "loss": 4.4802, + "step": 190500 + }, + { + "epoch": 0.8202013593261048, + "grad_norm": 2.5320885181427, + "learning_rate": 9.356178567078545e-05, + "loss": 5.0789, + "step": 190550 + }, + { + "epoch": 0.8204165787853874, + "grad_norm": 4.416962623596191, + "learning_rate": 9.355846351828409e-05, + "loss": 4.9289, + "step": 190600 + }, + { + "epoch": 0.8206317982446701, + "grad_norm": 1.8745973110198975, + "learning_rate": 9.355514056789198e-05, + "loss": 4.9457, + "step": 190650 + }, + { + "epoch": 0.8208470177039527, + "grad_norm": 1.5943892002105713, + "learning_rate": 9.355181681966999e-05, + "loss": 4.5983, + "step": 190700 + }, + { + "epoch": 0.8210622371632353, + "grad_norm": 4.762444972991943, + "learning_rate": 9.354849227367901e-05, + "loss": 5.3596, + "step": 190750 + }, + { + "epoch": 0.8212774566225179, + "grad_norm": 3.077148914337158, + "learning_rate": 9.354516692997992e-05, + "loss": 4.7345, + "step": 190800 + }, + { + "epoch": 0.8214926760818007, + "grad_norm": 3.39192795753479, + "learning_rate": 9.354184078863363e-05, + "loss": 4.7196, + "step": 190850 + }, + { + "epoch": 0.8217078955410833, + "grad_norm": 1.5794721841812134, + "learning_rate": 9.35385138497011e-05, + "loss": 4.5685, + "step": 190900 + }, + { + "epoch": 0.8219231150003659, + "grad_norm": 0.7110084891319275, + "learning_rate": 9.353518611324323e-05, + "loss": 4.737, + "step": 190950 + }, + { + "epoch": 0.8221383344596485, + "grad_norm": 2.6674294471740723, + "learning_rate": 9.353185757932102e-05, + "loss": 4.844, + "step": 191000 + }, + { + "epoch": 0.8223535539189312, + "grad_norm": 1.1264660358428955, + "learning_rate": 9.35285282479954e-05, + "loss": 4.6256, + "step": 191050 + }, + { + "epoch": 0.8225687733782138, + "grad_norm": 0.7707536816596985, + "learning_rate": 9.352519811932738e-05, + "loss": 4.3919, + "step": 191100 + }, + { + "epoch": 0.8227839928374964, + "grad_norm": 1.4470241069793701, + "learning_rate": 9.352186719337794e-05, + "loss": 4.4849, + "step": 191150 + }, + { + "epoch": 0.822999212296779, + "grad_norm": 3.1630239486694336, + "learning_rate": 9.351853547020813e-05, + "loss": 4.6507, + "step": 191200 + }, + { + "epoch": 0.8232144317560617, + "grad_norm": 1.9456192255020142, + "learning_rate": 9.351520294987895e-05, + "loss": 4.7154, + "step": 191250 + }, + { + "epoch": 0.8234296512153443, + "grad_norm": 3.283751964569092, + "learning_rate": 9.351186963245144e-05, + "loss": 4.9583, + "step": 191300 + }, + { + "epoch": 0.8236448706746269, + "grad_norm": 1.1558512449264526, + "learning_rate": 9.35085355179867e-05, + "loss": 4.7135, + "step": 191350 + }, + { + "epoch": 0.8238600901339096, + "grad_norm": 2.6374261379241943, + "learning_rate": 9.350520060654575e-05, + "loss": 5.0599, + "step": 191400 + }, + { + "epoch": 0.8240753095931922, + "grad_norm": 0.9400759339332581, + "learning_rate": 9.350186489818971e-05, + "loss": 4.657, + "step": 191450 + }, + { + "epoch": 0.8242905290524748, + "grad_norm": 2.4507017135620117, + "learning_rate": 9.349852839297968e-05, + "loss": 5.183, + "step": 191500 + }, + { + "epoch": 0.8245057485117574, + "grad_norm": 2.490328311920166, + "learning_rate": 9.349519109097678e-05, + "loss": 4.3417, + "step": 191550 + }, + { + "epoch": 0.8247209679710401, + "grad_norm": 3.201686143875122, + "learning_rate": 9.349185299224212e-05, + "loss": 4.5054, + "step": 191600 + }, + { + "epoch": 0.8249361874303227, + "grad_norm": 1.9084980487823486, + "learning_rate": 9.348851409683685e-05, + "loss": 4.3908, + "step": 191650 + }, + { + "epoch": 0.8251514068896053, + "grad_norm": 1.7880585193634033, + "learning_rate": 9.348517440482216e-05, + "loss": 4.2968, + "step": 191700 + }, + { + "epoch": 0.8253666263488879, + "grad_norm": 3.096014976501465, + "learning_rate": 9.348183391625922e-05, + "loss": 4.6015, + "step": 191750 + }, + { + "epoch": 0.8255818458081706, + "grad_norm": 1.7921721935272217, + "learning_rate": 9.347849263120918e-05, + "loss": 4.9587, + "step": 191800 + }, + { + "epoch": 0.8257970652674532, + "grad_norm": 1.9552100896835327, + "learning_rate": 9.347515054973327e-05, + "loss": 4.6824, + "step": 191850 + }, + { + "epoch": 0.8260122847267358, + "grad_norm": 2.0226783752441406, + "learning_rate": 9.347180767189272e-05, + "loss": 4.8725, + "step": 191900 + }, + { + "epoch": 0.8262275041860185, + "grad_norm": 1.7754284143447876, + "learning_rate": 9.346846399774876e-05, + "loss": 5.0717, + "step": 191950 + }, + { + "epoch": 0.8264427236453011, + "grad_norm": 1.0366710424423218, + "learning_rate": 9.346511952736262e-05, + "loss": 5.1519, + "step": 192000 + }, + { + "epoch": 0.8264427236453011, + "eval_loss": 5.2000885009765625, + "eval_runtime": 34.8359, + "eval_samples_per_second": 18.372, + "eval_steps_per_second": 9.186, + "eval_tts_loss": 6.704178571470074, + "step": 192000 + }, + { + "epoch": 0.8266579431045837, + "grad_norm": 2.3397347927093506, + "learning_rate": 9.346177426079559e-05, + "loss": 5.0184, + "step": 192050 + }, + { + "epoch": 0.8268731625638663, + "grad_norm": 2.7445719242095947, + "learning_rate": 9.345842819810891e-05, + "loss": 4.8735, + "step": 192100 + }, + { + "epoch": 0.827088382023149, + "grad_norm": 0.6724907755851746, + "learning_rate": 9.345508133936392e-05, + "loss": 4.8104, + "step": 192150 + }, + { + "epoch": 0.8273036014824316, + "grad_norm": 0.39006567001342773, + "learning_rate": 9.345173368462188e-05, + "loss": 4.5413, + "step": 192200 + }, + { + "epoch": 0.8275188209417143, + "grad_norm": 1.635183572769165, + "learning_rate": 9.344838523394415e-05, + "loss": 4.8906, + "step": 192250 + }, + { + "epoch": 0.8277340404009969, + "grad_norm": 1.570319652557373, + "learning_rate": 9.344503598739205e-05, + "loss": 5.0244, + "step": 192300 + }, + { + "epoch": 0.8279492598602796, + "grad_norm": 3.648540735244751, + "learning_rate": 9.344168594502694e-05, + "loss": 5.0858, + "step": 192350 + }, + { + "epoch": 0.8281644793195622, + "grad_norm": 1.1136547327041626, + "learning_rate": 9.343833510691015e-05, + "loss": 5.2062, + "step": 192400 + }, + { + "epoch": 0.8283796987788448, + "grad_norm": 1.5582127571105957, + "learning_rate": 9.343498347310309e-05, + "loss": 4.3275, + "step": 192450 + }, + { + "epoch": 0.8285949182381274, + "grad_norm": 4.185760498046875, + "learning_rate": 9.343163104366716e-05, + "loss": 5.0167, + "step": 192500 + }, + { + "epoch": 0.8288101376974101, + "grad_norm": 2.6980350017547607, + "learning_rate": 9.342827781866375e-05, + "loss": 4.6298, + "step": 192550 + }, + { + "epoch": 0.8290253571566927, + "grad_norm": 1.475448727607727, + "learning_rate": 9.342492379815428e-05, + "loss": 4.2579, + "step": 192600 + }, + { + "epoch": 0.8292405766159753, + "grad_norm": 3.8274178504943848, + "learning_rate": 9.342156898220022e-05, + "loss": 4.5687, + "step": 192650 + }, + { + "epoch": 0.829455796075258, + "grad_norm": 2.4502005577087402, + "learning_rate": 9.341821337086299e-05, + "loss": 4.9246, + "step": 192700 + }, + { + "epoch": 0.8296710155345406, + "grad_norm": 4.119244575500488, + "learning_rate": 9.341485696420408e-05, + "loss": 4.4729, + "step": 192750 + }, + { + "epoch": 0.8298862349938232, + "grad_norm": 2.705709934234619, + "learning_rate": 9.341149976228495e-05, + "loss": 4.2129, + "step": 192800 + }, + { + "epoch": 0.8301014544531058, + "grad_norm": 1.814461588859558, + "learning_rate": 9.34081417651671e-05, + "loss": 4.7228, + "step": 192850 + }, + { + "epoch": 0.8303166739123885, + "grad_norm": 2.3888537883758545, + "learning_rate": 9.340478297291207e-05, + "loss": 4.9913, + "step": 192900 + }, + { + "epoch": 0.8305318933716711, + "grad_norm": 4.1537394523620605, + "learning_rate": 9.340142338558134e-05, + "loss": 4.4417, + "step": 192950 + }, + { + "epoch": 0.8307471128309537, + "grad_norm": 3.1590754985809326, + "learning_rate": 9.339806300323647e-05, + "loss": 4.7815, + "step": 193000 + }, + { + "epoch": 0.8309623322902363, + "grad_norm": 2.248365879058838, + "learning_rate": 9.339470182593903e-05, + "loss": 4.4336, + "step": 193050 + }, + { + "epoch": 0.831177551749519, + "grad_norm": 3.2490179538726807, + "learning_rate": 9.339133985375057e-05, + "loss": 4.5179, + "step": 193100 + }, + { + "epoch": 0.8313927712088016, + "grad_norm": 2.442485809326172, + "learning_rate": 9.338797708673268e-05, + "loss": 4.3176, + "step": 193150 + }, + { + "epoch": 0.8316079906680842, + "grad_norm": 0.4881429374217987, + "learning_rate": 9.338461352494696e-05, + "loss": 4.7472, + "step": 193200 + }, + { + "epoch": 0.8318232101273669, + "grad_norm": 0.3160957992076874, + "learning_rate": 9.338124916845503e-05, + "loss": 5.0414, + "step": 193250 + }, + { + "epoch": 0.8320384295866495, + "grad_norm": 0.9519697427749634, + "learning_rate": 9.337788401731848e-05, + "loss": 4.9005, + "step": 193300 + }, + { + "epoch": 0.8322536490459321, + "grad_norm": 2.2458128929138184, + "learning_rate": 9.3374518071599e-05, + "loss": 4.7484, + "step": 193350 + }, + { + "epoch": 0.8324688685052147, + "grad_norm": 2.4378137588500977, + "learning_rate": 9.337115133135822e-05, + "loss": 4.9301, + "step": 193400 + }, + { + "epoch": 0.8326840879644974, + "grad_norm": 2.275014877319336, + "learning_rate": 9.336778379665783e-05, + "loss": 4.3442, + "step": 193450 + }, + { + "epoch": 0.83289930742378, + "grad_norm": 2.782167673110962, + "learning_rate": 9.336441546755947e-05, + "loss": 4.8463, + "step": 193500 + }, + { + "epoch": 0.8331145268830626, + "grad_norm": 2.43204665184021, + "learning_rate": 9.33610463441249e-05, + "loss": 5.0207, + "step": 193550 + }, + { + "epoch": 0.8333297463423452, + "grad_norm": 2.969275712966919, + "learning_rate": 9.33576764264158e-05, + "loss": 4.3645, + "step": 193600 + }, + { + "epoch": 0.833544965801628, + "grad_norm": 2.3831746578216553, + "learning_rate": 9.335430571449391e-05, + "loss": 5.0419, + "step": 193650 + }, + { + "epoch": 0.8337601852609106, + "grad_norm": 3.2668802738189697, + "learning_rate": 9.335093420842095e-05, + "loss": 4.6479, + "step": 193700 + }, + { + "epoch": 0.8339754047201932, + "grad_norm": 3.3023581504821777, + "learning_rate": 9.334756190825872e-05, + "loss": 4.807, + "step": 193750 + }, + { + "epoch": 0.8341906241794758, + "grad_norm": 2.457369804382324, + "learning_rate": 9.334418881406897e-05, + "loss": 5.1144, + "step": 193800 + }, + { + "epoch": 0.8344058436387585, + "grad_norm": 4.176352024078369, + "learning_rate": 9.334081492591346e-05, + "loss": 4.2369, + "step": 193850 + }, + { + "epoch": 0.8346210630980411, + "grad_norm": 2.0094122886657715, + "learning_rate": 9.333744024385404e-05, + "loss": 4.7162, + "step": 193900 + }, + { + "epoch": 0.8348362825573237, + "grad_norm": 2.457852840423584, + "learning_rate": 9.333406476795253e-05, + "loss": 4.6896, + "step": 193950 + }, + { + "epoch": 0.8350515020166064, + "grad_norm": 2.1227715015411377, + "learning_rate": 9.333068849827071e-05, + "loss": 4.6087, + "step": 194000 + }, + { + "epoch": 0.835266721475889, + "grad_norm": 2.8398020267486572, + "learning_rate": 9.332731143487046e-05, + "loss": 4.6533, + "step": 194050 + }, + { + "epoch": 0.8354819409351716, + "grad_norm": 2.1009390354156494, + "learning_rate": 9.332393357781362e-05, + "loss": 4.7853, + "step": 194100 + }, + { + "epoch": 0.8356971603944542, + "grad_norm": 1.4742586612701416, + "learning_rate": 9.332055492716209e-05, + "loss": 4.2164, + "step": 194150 + }, + { + "epoch": 0.8359123798537369, + "grad_norm": 2.8620059490203857, + "learning_rate": 9.331717548297773e-05, + "loss": 4.7979, + "step": 194200 + }, + { + "epoch": 0.8361275993130195, + "grad_norm": 1.9704945087432861, + "learning_rate": 9.331379524532248e-05, + "loss": 4.7784, + "step": 194250 + }, + { + "epoch": 0.8363428187723021, + "grad_norm": 0.5467448830604553, + "learning_rate": 9.331041421425823e-05, + "loss": 4.5849, + "step": 194300 + }, + { + "epoch": 0.8365580382315847, + "grad_norm": 3.029217004776001, + "learning_rate": 9.330703238984693e-05, + "loss": 4.8219, + "step": 194350 + }, + { + "epoch": 0.8367732576908674, + "grad_norm": 2.8484835624694824, + "learning_rate": 9.33036497721505e-05, + "loss": 4.8487, + "step": 194400 + }, + { + "epoch": 0.83698847715015, + "grad_norm": 2.185595750808716, + "learning_rate": 9.330026636123093e-05, + "loss": 4.9402, + "step": 194450 + }, + { + "epoch": 0.8372036966094326, + "grad_norm": 1.582587480545044, + "learning_rate": 9.329688215715018e-05, + "loss": 4.5036, + "step": 194500 + }, + { + "epoch": 0.8374189160687153, + "grad_norm": 2.0894949436187744, + "learning_rate": 9.329349715997026e-05, + "loss": 4.6912, + "step": 194550 + }, + { + "epoch": 0.8376341355279979, + "grad_norm": 1.092671275138855, + "learning_rate": 9.329011136975314e-05, + "loss": 4.6174, + "step": 194600 + }, + { + "epoch": 0.8378493549872805, + "grad_norm": 2.570307731628418, + "learning_rate": 9.328672478656089e-05, + "loss": 4.6437, + "step": 194650 + }, + { + "epoch": 0.8380645744465631, + "grad_norm": 2.410719871520996, + "learning_rate": 9.32833374104555e-05, + "loss": 4.4967, + "step": 194700 + }, + { + "epoch": 0.8382797939058458, + "grad_norm": 2.1133346557617188, + "learning_rate": 9.327994924149903e-05, + "loss": 4.7183, + "step": 194750 + }, + { + "epoch": 0.8384950133651284, + "grad_norm": 5.827842712402344, + "learning_rate": 9.327656027975356e-05, + "loss": 4.6798, + "step": 194800 + }, + { + "epoch": 0.838710232824411, + "grad_norm": 2.1439902782440186, + "learning_rate": 9.327317052528115e-05, + "loss": 4.9374, + "step": 194850 + }, + { + "epoch": 0.8389254522836936, + "grad_norm": 1.7001876831054688, + "learning_rate": 9.32697799781439e-05, + "loss": 4.7425, + "step": 194900 + }, + { + "epoch": 0.8391406717429764, + "grad_norm": 3.2245774269104004, + "learning_rate": 9.326638863840393e-05, + "loss": 4.6096, + "step": 194950 + }, + { + "epoch": 0.839355891202259, + "grad_norm": 2.2066738605499268, + "learning_rate": 9.326299650612334e-05, + "loss": 4.9437, + "step": 195000 + }, + { + "epoch": 0.839355891202259, + "eval_loss": 5.20053768157959, + "eval_runtime": 35.0229, + "eval_samples_per_second": 18.274, + "eval_steps_per_second": 9.137, + "eval_tts_loss": 6.716869990709077, + "step": 195000 + }, + { + "epoch": 0.8395711106615416, + "grad_norm": 0.963911771774292, + "learning_rate": 9.325960358136426e-05, + "loss": 4.7375, + "step": 195050 + }, + { + "epoch": 0.8397863301208242, + "grad_norm": 2.5315258502960205, + "learning_rate": 9.325620986418887e-05, + "loss": 4.5325, + "step": 195100 + }, + { + "epoch": 0.8400015495801069, + "grad_norm": 2.337113618850708, + "learning_rate": 9.325281535465931e-05, + "loss": 4.9504, + "step": 195150 + }, + { + "epoch": 0.8402167690393895, + "grad_norm": 2.958540916442871, + "learning_rate": 9.324942005283778e-05, + "loss": 4.1445, + "step": 195200 + }, + { + "epoch": 0.8404319884986721, + "grad_norm": 0.8696990013122559, + "learning_rate": 9.324602395878646e-05, + "loss": 4.7469, + "step": 195250 + }, + { + "epoch": 0.8406472079579548, + "grad_norm": 2.152987241744995, + "learning_rate": 9.324262707256755e-05, + "loss": 4.5124, + "step": 195300 + }, + { + "epoch": 0.8408624274172374, + "grad_norm": 3.32906436920166, + "learning_rate": 9.32392293942433e-05, + "loss": 4.7345, + "step": 195350 + }, + { + "epoch": 0.84107764687652, + "grad_norm": 2.2424678802490234, + "learning_rate": 9.323583092387593e-05, + "loss": 4.3338, + "step": 195400 + }, + { + "epoch": 0.8412928663358026, + "grad_norm": 2.2158286571502686, + "learning_rate": 9.323243166152768e-05, + "loss": 4.816, + "step": 195450 + }, + { + "epoch": 0.8415080857950853, + "grad_norm": 2.250025987625122, + "learning_rate": 9.322903160726086e-05, + "loss": 4.6832, + "step": 195500 + }, + { + "epoch": 0.8417233052543679, + "grad_norm": 1.1617755889892578, + "learning_rate": 9.32256307611377e-05, + "loss": 4.9213, + "step": 195550 + }, + { + "epoch": 0.8419385247136505, + "grad_norm": 1.1693099737167358, + "learning_rate": 9.322222912322054e-05, + "loss": 4.3141, + "step": 195600 + }, + { + "epoch": 0.8421537441729331, + "grad_norm": 2.1910910606384277, + "learning_rate": 9.321882669357165e-05, + "loss": 5.1916, + "step": 195650 + }, + { + "epoch": 0.8423689636322158, + "grad_norm": 1.4643447399139404, + "learning_rate": 9.321542347225337e-05, + "loss": 4.499, + "step": 195700 + }, + { + "epoch": 0.8425841830914984, + "grad_norm": 2.0660698413848877, + "learning_rate": 9.321201945932804e-05, + "loss": 4.7319, + "step": 195750 + }, + { + "epoch": 0.842799402550781, + "grad_norm": 2.2526652812957764, + "learning_rate": 9.320861465485804e-05, + "loss": 4.5549, + "step": 195800 + }, + { + "epoch": 0.8430146220100636, + "grad_norm": 1.0957895517349243, + "learning_rate": 9.320520905890571e-05, + "loss": 4.9201, + "step": 195850 + }, + { + "epoch": 0.8432298414693463, + "grad_norm": 0.7491609454154968, + "learning_rate": 9.320180267153342e-05, + "loss": 4.6839, + "step": 195900 + }, + { + "epoch": 0.8434450609286289, + "grad_norm": 1.9114655256271362, + "learning_rate": 9.31983954928036e-05, + "loss": 4.735, + "step": 195950 + }, + { + "epoch": 0.8436602803879115, + "grad_norm": 0.8906866908073425, + "learning_rate": 9.319498752277863e-05, + "loss": 5.171, + "step": 196000 + }, + { + "epoch": 0.8438754998471942, + "grad_norm": 2.8748552799224854, + "learning_rate": 9.319157876152096e-05, + "loss": 4.2013, + "step": 196050 + }, + { + "epoch": 0.8440907193064768, + "grad_norm": 2.038266658782959, + "learning_rate": 9.318816920909302e-05, + "loss": 4.4259, + "step": 196100 + }, + { + "epoch": 0.8443059387657594, + "grad_norm": 2.5188586711883545, + "learning_rate": 9.318475886555726e-05, + "loss": 5.0416, + "step": 196150 + }, + { + "epoch": 0.844521158225042, + "grad_norm": 3.811406135559082, + "learning_rate": 9.318134773097618e-05, + "loss": 4.6718, + "step": 196200 + }, + { + "epoch": 0.8447363776843247, + "grad_norm": 3.110952615737915, + "learning_rate": 9.317793580541223e-05, + "loss": 4.7346, + "step": 196250 + }, + { + "epoch": 0.8449515971436073, + "grad_norm": 2.3165318965911865, + "learning_rate": 9.317452308892791e-05, + "loss": 4.5565, + "step": 196300 + }, + { + "epoch": 0.84516681660289, + "grad_norm": 1.3025885820388794, + "learning_rate": 9.317110958158574e-05, + "loss": 4.0645, + "step": 196350 + }, + { + "epoch": 0.8453820360621725, + "grad_norm": 1.8127973079681396, + "learning_rate": 9.316769528344827e-05, + "loss": 4.4862, + "step": 196400 + }, + { + "epoch": 0.8455972555214553, + "grad_norm": 0.6782581210136414, + "learning_rate": 9.316428019457801e-05, + "loss": 4.4288, + "step": 196450 + }, + { + "epoch": 0.8458124749807379, + "grad_norm": 2.4477741718292236, + "learning_rate": 9.316086431503755e-05, + "loss": 4.8842, + "step": 196500 + }, + { + "epoch": 0.8460276944400205, + "grad_norm": 2.0100061893463135, + "learning_rate": 9.315744764488943e-05, + "loss": 4.5742, + "step": 196550 + }, + { + "epoch": 0.8462429138993032, + "grad_norm": 2.1132924556732178, + "learning_rate": 9.315403018419624e-05, + "loss": 4.7626, + "step": 196600 + }, + { + "epoch": 0.8464581333585858, + "grad_norm": 2.1185457706451416, + "learning_rate": 9.315061193302059e-05, + "loss": 4.5169, + "step": 196650 + }, + { + "epoch": 0.8466733528178684, + "grad_norm": 2.912675619125366, + "learning_rate": 9.314719289142509e-05, + "loss": 4.6699, + "step": 196700 + }, + { + "epoch": 0.846888572277151, + "grad_norm": 2.0619893074035645, + "learning_rate": 9.314377305947235e-05, + "loss": 4.8077, + "step": 196750 + }, + { + "epoch": 0.8471037917364337, + "grad_norm": 3.0347635746002197, + "learning_rate": 9.314035243722505e-05, + "loss": 4.5107, + "step": 196800 + }, + { + "epoch": 0.8473190111957163, + "grad_norm": 2.079214096069336, + "learning_rate": 9.313693102474583e-05, + "loss": 4.7733, + "step": 196850 + }, + { + "epoch": 0.8475342306549989, + "grad_norm": 2.218876600265503, + "learning_rate": 9.313350882209738e-05, + "loss": 5.0199, + "step": 196900 + }, + { + "epoch": 0.8477494501142815, + "grad_norm": 2.3831324577331543, + "learning_rate": 9.313008582934236e-05, + "loss": 4.5038, + "step": 196950 + }, + { + "epoch": 0.8479646695735642, + "grad_norm": 2.8992154598236084, + "learning_rate": 9.312666204654347e-05, + "loss": 4.578, + "step": 197000 + }, + { + "epoch": 0.8481798890328468, + "grad_norm": 2.1718294620513916, + "learning_rate": 9.312323747376344e-05, + "loss": 4.7624, + "step": 197050 + }, + { + "epoch": 0.8483951084921294, + "grad_norm": 0.8251355886459351, + "learning_rate": 9.3119812111065e-05, + "loss": 4.5395, + "step": 197100 + }, + { + "epoch": 0.848610327951412, + "grad_norm": 1.2370967864990234, + "learning_rate": 9.31163859585109e-05, + "loss": 4.6601, + "step": 197150 + }, + { + "epoch": 0.8488255474106947, + "grad_norm": 3.7518296241760254, + "learning_rate": 9.311295901616388e-05, + "loss": 4.8283, + "step": 197200 + }, + { + "epoch": 0.8490407668699773, + "grad_norm": 2.8222544193267822, + "learning_rate": 9.310953128408673e-05, + "loss": 4.5144, + "step": 197250 + }, + { + "epoch": 0.8492559863292599, + "grad_norm": 2.2372093200683594, + "learning_rate": 9.310610276234224e-05, + "loss": 4.6387, + "step": 197300 + }, + { + "epoch": 0.8494712057885426, + "grad_norm": 2.595913887023926, + "learning_rate": 9.31026734509932e-05, + "loss": 5.0226, + "step": 197350 + }, + { + "epoch": 0.8496864252478252, + "grad_norm": 2.0845816135406494, + "learning_rate": 9.309924335010242e-05, + "loss": 5.0097, + "step": 197400 + }, + { + "epoch": 0.8499016447071078, + "grad_norm": 1.2078754901885986, + "learning_rate": 9.309581245973278e-05, + "loss": 4.6567, + "step": 197450 + }, + { + "epoch": 0.8501168641663904, + "grad_norm": 3.7026636600494385, + "learning_rate": 9.309238077994707e-05, + "loss": 4.7932, + "step": 197500 + }, + { + "epoch": 0.8503320836256731, + "grad_norm": 0.49443677067756653, + "learning_rate": 9.308894831080816e-05, + "loss": 4.6022, + "step": 197550 + }, + { + "epoch": 0.8505473030849557, + "grad_norm": 1.7709636688232422, + "learning_rate": 9.308551505237894e-05, + "loss": 4.631, + "step": 197600 + }, + { + "epoch": 0.8507625225442383, + "grad_norm": 2.2761728763580322, + "learning_rate": 9.308208100472231e-05, + "loss": 4.6694, + "step": 197650 + }, + { + "epoch": 0.8509777420035209, + "grad_norm": 1.4533755779266357, + "learning_rate": 9.307864616790116e-05, + "loss": 4.4232, + "step": 197700 + }, + { + "epoch": 0.8511929614628037, + "grad_norm": 2.154472827911377, + "learning_rate": 9.30752105419784e-05, + "loss": 4.8519, + "step": 197750 + }, + { + "epoch": 0.8514081809220863, + "grad_norm": 3.0509417057037354, + "learning_rate": 9.307177412701697e-05, + "loss": 4.371, + "step": 197800 + }, + { + "epoch": 0.8516234003813689, + "grad_norm": 2.837885856628418, + "learning_rate": 9.306833692307982e-05, + "loss": 4.2806, + "step": 197850 + }, + { + "epoch": 0.8518386198406516, + "grad_norm": 2.145371913909912, + "learning_rate": 9.30648989302299e-05, + "loss": 4.788, + "step": 197900 + }, + { + "epoch": 0.8520538392999342, + "grad_norm": 3.7012181282043457, + "learning_rate": 9.306146014853021e-05, + "loss": 5.0576, + "step": 197950 + }, + { + "epoch": 0.8522690587592168, + "grad_norm": 2.726574182510376, + "learning_rate": 9.305802057804372e-05, + "loss": 5.095, + "step": 198000 + }, + { + "epoch": 0.8522690587592168, + "eval_loss": 5.192026615142822, + "eval_runtime": 34.9308, + "eval_samples_per_second": 18.322, + "eval_steps_per_second": 9.161, + "eval_tts_loss": 6.793758395835495, + "step": 198000 + }, + { + "epoch": 0.8524842782184994, + "grad_norm": 2.9693241119384766, + "learning_rate": 9.305458021883344e-05, + "loss": 4.896, + "step": 198050 + }, + { + "epoch": 0.8526994976777821, + "grad_norm": 2.1582891941070557, + "learning_rate": 9.305113907096239e-05, + "loss": 5.0647, + "step": 198100 + }, + { + "epoch": 0.8529147171370647, + "grad_norm": 1.6024563312530518, + "learning_rate": 9.30476971344936e-05, + "loss": 4.7496, + "step": 198150 + }, + { + "epoch": 0.8531299365963473, + "grad_norm": 1.6572364568710327, + "learning_rate": 9.304425440949012e-05, + "loss": 4.6076, + "step": 198200 + }, + { + "epoch": 0.8533451560556299, + "grad_norm": 3.7007670402526855, + "learning_rate": 9.304081089601503e-05, + "loss": 4.7988, + "step": 198250 + }, + { + "epoch": 0.8535603755149126, + "grad_norm": 2.8727729320526123, + "learning_rate": 9.303736659413138e-05, + "loss": 5.0275, + "step": 198300 + }, + { + "epoch": 0.8537755949741952, + "grad_norm": 2.2000598907470703, + "learning_rate": 9.30339215039023e-05, + "loss": 4.5176, + "step": 198350 + }, + { + "epoch": 0.8539908144334778, + "grad_norm": 2.867845296859741, + "learning_rate": 9.303047562539083e-05, + "loss": 4.962, + "step": 198400 + }, + { + "epoch": 0.8542060338927604, + "grad_norm": 0.6815716028213501, + "learning_rate": 9.302702895866015e-05, + "loss": 4.1853, + "step": 198450 + }, + { + "epoch": 0.8544212533520431, + "grad_norm": 3.10215425491333, + "learning_rate": 9.302358150377338e-05, + "loss": 4.7318, + "step": 198500 + }, + { + "epoch": 0.8546364728113257, + "grad_norm": 3.756913185119629, + "learning_rate": 9.302013326079366e-05, + "loss": 4.6844, + "step": 198550 + }, + { + "epoch": 0.8548516922706083, + "grad_norm": 2.7644906044006348, + "learning_rate": 9.301668422978416e-05, + "loss": 4.585, + "step": 198600 + }, + { + "epoch": 0.855066911729891, + "grad_norm": 5.85402250289917, + "learning_rate": 9.301323441080806e-05, + "loss": 4.684, + "step": 198650 + }, + { + "epoch": 0.8552821311891736, + "grad_norm": 2.6259360313415527, + "learning_rate": 9.300978380392856e-05, + "loss": 4.7424, + "step": 198700 + }, + { + "epoch": 0.8554973506484562, + "grad_norm": 1.8610515594482422, + "learning_rate": 9.300633240920884e-05, + "loss": 4.7911, + "step": 198750 + }, + { + "epoch": 0.8557125701077388, + "grad_norm": 3.516650676727295, + "learning_rate": 9.300288022671216e-05, + "loss": 5.0842, + "step": 198800 + }, + { + "epoch": 0.8559277895670215, + "grad_norm": 2.0451695919036865, + "learning_rate": 9.299942725650173e-05, + "loss": 4.7974, + "step": 198850 + }, + { + "epoch": 0.8561430090263041, + "grad_norm": 3.4129843711853027, + "learning_rate": 9.299597349864079e-05, + "loss": 4.2549, + "step": 198900 + }, + { + "epoch": 0.8563582284855867, + "grad_norm": 2.2698230743408203, + "learning_rate": 9.299251895319263e-05, + "loss": 4.7475, + "step": 198950 + }, + { + "epoch": 0.8565734479448693, + "grad_norm": 0.5470705628395081, + "learning_rate": 9.29890636202205e-05, + "loss": 4.4876, + "step": 199000 + }, + { + "epoch": 0.856788667404152, + "grad_norm": 3.9762237071990967, + "learning_rate": 9.298560749978776e-05, + "loss": 4.5179, + "step": 199050 + }, + { + "epoch": 0.8570038868634346, + "grad_norm": 1.9023988246917725, + "learning_rate": 9.298215059195763e-05, + "loss": 4.626, + "step": 199100 + }, + { + "epoch": 0.8572191063227173, + "grad_norm": 2.5226027965545654, + "learning_rate": 9.297869289679348e-05, + "loss": 4.9901, + "step": 199150 + }, + { + "epoch": 0.8574343257819999, + "grad_norm": 1.6914149522781372, + "learning_rate": 9.297523441435866e-05, + "loss": 4.5824, + "step": 199200 + }, + { + "epoch": 0.8576495452412826, + "grad_norm": 6.459962368011475, + "learning_rate": 9.29717751447165e-05, + "loss": 4.4057, + "step": 199250 + }, + { + "epoch": 0.8578647647005652, + "grad_norm": 3.9001381397247314, + "learning_rate": 9.296831508793035e-05, + "loss": 4.697, + "step": 199300 + }, + { + "epoch": 0.8580799841598478, + "grad_norm": 1.5959885120391846, + "learning_rate": 9.296485424406363e-05, + "loss": 4.7939, + "step": 199350 + }, + { + "epoch": 0.8582952036191305, + "grad_norm": 1.3501895666122437, + "learning_rate": 9.296139261317971e-05, + "loss": 4.375, + "step": 199400 + }, + { + "epoch": 0.8585104230784131, + "grad_norm": 2.5733020305633545, + "learning_rate": 9.295793019534199e-05, + "loss": 4.7252, + "step": 199450 + }, + { + "epoch": 0.8587256425376957, + "grad_norm": 0.35850608348846436, + "learning_rate": 9.295446699061392e-05, + "loss": 4.8571, + "step": 199500 + }, + { + "epoch": 0.8589408619969783, + "grad_norm": 2.5726680755615234, + "learning_rate": 9.295100299905893e-05, + "loss": 4.6915, + "step": 199550 + }, + { + "epoch": 0.859156081456261, + "grad_norm": 2.0805892944335938, + "learning_rate": 9.294753822074044e-05, + "loss": 4.5512, + "step": 199600 + }, + { + "epoch": 0.8593713009155436, + "grad_norm": 1.4426698684692383, + "learning_rate": 9.294407265572196e-05, + "loss": 5.0448, + "step": 199650 + }, + { + "epoch": 0.8595865203748262, + "grad_norm": 2.497868537902832, + "learning_rate": 9.294060630406696e-05, + "loss": 4.8992, + "step": 199700 + }, + { + "epoch": 0.8598017398341088, + "grad_norm": 2.1559722423553467, + "learning_rate": 9.293713916583892e-05, + "loss": 4.5363, + "step": 199750 + }, + { + "epoch": 0.8600169592933915, + "grad_norm": 2.4993064403533936, + "learning_rate": 9.293367124110134e-05, + "loss": 4.9626, + "step": 199800 + }, + { + "epoch": 0.8602321787526741, + "grad_norm": 2.158669948577881, + "learning_rate": 9.29302025299178e-05, + "loss": 4.6522, + "step": 199850 + }, + { + "epoch": 0.8604473982119567, + "grad_norm": 1.2476764917373657, + "learning_rate": 9.29267330323518e-05, + "loss": 4.4903, + "step": 199900 + }, + { + "epoch": 0.8606626176712394, + "grad_norm": 2.830291986465454, + "learning_rate": 9.292326274846688e-05, + "loss": 4.9589, + "step": 199950 + }, + { + "epoch": 0.860877837130522, + "grad_norm": 3.309196949005127, + "learning_rate": 9.291979167832662e-05, + "loss": 4.7172, + "step": 200000 + }, + { + "epoch": 0.8610930565898046, + "grad_norm": 1.3698314428329468, + "learning_rate": 9.291631982199463e-05, + "loss": 5.1227, + "step": 200050 + }, + { + "epoch": 0.8613082760490872, + "grad_norm": 2.432173728942871, + "learning_rate": 9.291284717953449e-05, + "loss": 4.4524, + "step": 200100 + }, + { + "epoch": 0.8615234955083699, + "grad_norm": 2.881755828857422, + "learning_rate": 9.290937375100977e-05, + "loss": 4.7273, + "step": 200150 + }, + { + "epoch": 0.8617387149676525, + "grad_norm": 0.9234189987182617, + "learning_rate": 9.290589953648414e-05, + "loss": 4.698, + "step": 200200 + }, + { + "epoch": 0.8619539344269351, + "grad_norm": 5.168801784515381, + "learning_rate": 9.290242453602123e-05, + "loss": 4.6295, + "step": 200250 + }, + { + "epoch": 0.8621691538862177, + "grad_norm": 1.9342143535614014, + "learning_rate": 9.28989487496847e-05, + "loss": 4.8198, + "step": 200300 + }, + { + "epoch": 0.8623843733455004, + "grad_norm": 1.402234435081482, + "learning_rate": 9.28954721775382e-05, + "loss": 4.4152, + "step": 200350 + }, + { + "epoch": 0.862599592804783, + "grad_norm": 2.261967658996582, + "learning_rate": 9.289199481964542e-05, + "loss": 4.5017, + "step": 200400 + }, + { + "epoch": 0.8628148122640656, + "grad_norm": 2.039243698120117, + "learning_rate": 9.288851667607007e-05, + "loss": 4.9661, + "step": 200450 + }, + { + "epoch": 0.8630300317233482, + "grad_norm": 1.5866386890411377, + "learning_rate": 9.288503774687585e-05, + "loss": 4.5773, + "step": 200500 + }, + { + "epoch": 0.863245251182631, + "grad_norm": 2.48905086517334, + "learning_rate": 9.288155803212648e-05, + "loss": 4.711, + "step": 200550 + }, + { + "epoch": 0.8634604706419136, + "grad_norm": 3.960510015487671, + "learning_rate": 9.287807753188574e-05, + "loss": 4.8027, + "step": 200600 + }, + { + "epoch": 0.8636756901011962, + "grad_norm": 2.0482242107391357, + "learning_rate": 9.287459624621732e-05, + "loss": 4.9029, + "step": 200650 + }, + { + "epoch": 0.8638909095604789, + "grad_norm": 2.2517781257629395, + "learning_rate": 9.287111417518503e-05, + "loss": 4.8359, + "step": 200700 + }, + { + "epoch": 0.8641061290197615, + "grad_norm": 1.201898455619812, + "learning_rate": 9.286763131885264e-05, + "loss": 4.3252, + "step": 200750 + }, + { + "epoch": 0.8643213484790441, + "grad_norm": 2.044598340988159, + "learning_rate": 9.286414767728396e-05, + "loss": 4.6065, + "step": 200800 + }, + { + "epoch": 0.8645365679383267, + "grad_norm": 2.316635847091675, + "learning_rate": 9.28606632505428e-05, + "loss": 4.0503, + "step": 200850 + }, + { + "epoch": 0.8647517873976094, + "grad_norm": 2.148477792739868, + "learning_rate": 9.285717803869298e-05, + "loss": 4.7224, + "step": 200900 + }, + { + "epoch": 0.864967006856892, + "grad_norm": 2.7369046211242676, + "learning_rate": 9.285369204179835e-05, + "loss": 5.1177, + "step": 200950 + }, + { + "epoch": 0.8651822263161746, + "grad_norm": 3.7745089530944824, + "learning_rate": 9.285020525992275e-05, + "loss": 5.0988, + "step": 201000 + }, + { + "epoch": 0.8651822263161746, + "eval_loss": 5.1982293128967285, + "eval_runtime": 34.9408, + "eval_samples_per_second": 18.317, + "eval_steps_per_second": 9.158, + "eval_tts_loss": 6.735413883345011, + "step": 201000 + }, + { + "epoch": 0.8653974457754572, + "grad_norm": 2.930049419403076, + "learning_rate": 9.284671769313008e-05, + "loss": 5.037, + "step": 201050 + }, + { + "epoch": 0.8656126652347399, + "grad_norm": 2.08650279045105, + "learning_rate": 9.284322934148418e-05, + "loss": 4.8259, + "step": 201100 + }, + { + "epoch": 0.8658278846940225, + "grad_norm": 2.257094383239746, + "learning_rate": 9.283974020504897e-05, + "loss": 4.4594, + "step": 201150 + }, + { + "epoch": 0.8660431041533051, + "grad_norm": 2.8705856800079346, + "learning_rate": 9.283625028388837e-05, + "loss": 5.1732, + "step": 201200 + }, + { + "epoch": 0.8662583236125878, + "grad_norm": 1.801612377166748, + "learning_rate": 9.28327595780663e-05, + "loss": 5.0306, + "step": 201250 + }, + { + "epoch": 0.8664735430718704, + "grad_norm": 2.612332820892334, + "learning_rate": 9.282926808764672e-05, + "loss": 4.538, + "step": 201300 + }, + { + "epoch": 0.866688762531153, + "grad_norm": 2.852316379547119, + "learning_rate": 9.282577581269355e-05, + "loss": 4.5118, + "step": 201350 + }, + { + "epoch": 0.8669039819904356, + "grad_norm": 3.3637537956237793, + "learning_rate": 9.282228275327079e-05, + "loss": 4.1046, + "step": 201400 + }, + { + "epoch": 0.8671192014497183, + "grad_norm": 2.8758351802825928, + "learning_rate": 9.281878890944242e-05, + "loss": 4.9789, + "step": 201450 + }, + { + "epoch": 0.8673344209090009, + "grad_norm": 0.6947157979011536, + "learning_rate": 9.281529428127243e-05, + "loss": 4.8074, + "step": 201500 + }, + { + "epoch": 0.8675496403682835, + "grad_norm": 3.2521004676818848, + "learning_rate": 9.281179886882483e-05, + "loss": 5.1188, + "step": 201550 + }, + { + "epoch": 0.8677648598275661, + "grad_norm": 1.7700777053833008, + "learning_rate": 9.280830267216366e-05, + "loss": 4.8444, + "step": 201600 + }, + { + "epoch": 0.8679800792868488, + "grad_norm": 2.877481698989868, + "learning_rate": 9.280480569135296e-05, + "loss": 5.0347, + "step": 201650 + }, + { + "epoch": 0.8681952987461314, + "grad_norm": 2.694570541381836, + "learning_rate": 9.280130792645679e-05, + "loss": 4.5741, + "step": 201700 + }, + { + "epoch": 0.868410518205414, + "grad_norm": 5.81212854385376, + "learning_rate": 9.279780937753922e-05, + "loss": 4.7836, + "step": 201750 + }, + { + "epoch": 0.8686257376646966, + "grad_norm": 2.7158362865448, + "learning_rate": 9.27943100446643e-05, + "loss": 4.5273, + "step": 201800 + }, + { + "epoch": 0.8688409571239794, + "grad_norm": 2.1914732456207275, + "learning_rate": 9.279080992789619e-05, + "loss": 4.9303, + "step": 201850 + }, + { + "epoch": 0.869056176583262, + "grad_norm": 5.132272720336914, + "learning_rate": 9.278730902729898e-05, + "loss": 4.7256, + "step": 201900 + }, + { + "epoch": 0.8692713960425446, + "grad_norm": 3.99056077003479, + "learning_rate": 9.278380734293676e-05, + "loss": 4.6523, + "step": 201950 + }, + { + "epoch": 0.8694866155018273, + "grad_norm": 2.8409423828125, + "learning_rate": 9.278030487487374e-05, + "loss": 4.5727, + "step": 202000 + }, + { + "epoch": 0.8697018349611099, + "grad_norm": 1.8989899158477783, + "learning_rate": 9.277680162317403e-05, + "loss": 4.6105, + "step": 202050 + }, + { + "epoch": 0.8699170544203925, + "grad_norm": 0.8636428117752075, + "learning_rate": 9.27732975879018e-05, + "loss": 4.6401, + "step": 202100 + }, + { + "epoch": 0.8701322738796751, + "grad_norm": 2.3513731956481934, + "learning_rate": 9.276979276912128e-05, + "loss": 4.9645, + "step": 202150 + }, + { + "epoch": 0.8703474933389578, + "grad_norm": 1.2137891054153442, + "learning_rate": 9.276628716689661e-05, + "loss": 4.7776, + "step": 202200 + }, + { + "epoch": 0.8705627127982404, + "grad_norm": 0.8902551531791687, + "learning_rate": 9.276278078129205e-05, + "loss": 4.7824, + "step": 202250 + }, + { + "epoch": 0.870777932257523, + "grad_norm": 0.5497661828994751, + "learning_rate": 9.275927361237182e-05, + "loss": 4.7534, + "step": 202300 + }, + { + "epoch": 0.8709931517168056, + "grad_norm": 0.6600897312164307, + "learning_rate": 9.275576566020016e-05, + "loss": 4.8162, + "step": 202350 + }, + { + "epoch": 0.8712083711760883, + "grad_norm": 1.5953245162963867, + "learning_rate": 9.27522569248413e-05, + "loss": 4.8996, + "step": 202400 + }, + { + "epoch": 0.8714235906353709, + "grad_norm": 2.6980931758880615, + "learning_rate": 9.274874740635957e-05, + "loss": 4.7001, + "step": 202450 + }, + { + "epoch": 0.8716388100946535, + "grad_norm": 1.034971833229065, + "learning_rate": 9.274523710481921e-05, + "loss": 4.4784, + "step": 202500 + }, + { + "epoch": 0.8718540295539361, + "grad_norm": 3.3079779148101807, + "learning_rate": 9.274172602028453e-05, + "loss": 4.7929, + "step": 202550 + }, + { + "epoch": 0.8720692490132188, + "grad_norm": 2.793865919113159, + "learning_rate": 9.273821415281985e-05, + "loss": 4.8674, + "step": 202600 + }, + { + "epoch": 0.8722844684725014, + "grad_norm": 0.4107474684715271, + "learning_rate": 9.27347015024895e-05, + "loss": 4.815, + "step": 202650 + }, + { + "epoch": 0.872499687931784, + "grad_norm": 4.887115478515625, + "learning_rate": 9.273118806935782e-05, + "loss": 4.6728, + "step": 202700 + }, + { + "epoch": 0.8727149073910667, + "grad_norm": 1.6501470804214478, + "learning_rate": 9.272767385348917e-05, + "loss": 4.803, + "step": 202750 + }, + { + "epoch": 0.8729301268503493, + "grad_norm": 2.8097455501556396, + "learning_rate": 9.272415885494793e-05, + "loss": 4.7953, + "step": 202800 + }, + { + "epoch": 0.8731453463096319, + "grad_norm": 2.2020974159240723, + "learning_rate": 9.272064307379847e-05, + "loss": 4.9638, + "step": 202850 + }, + { + "epoch": 0.8733605657689145, + "grad_norm": 4.4548139572143555, + "learning_rate": 9.271712651010521e-05, + "loss": 4.8348, + "step": 202900 + }, + { + "epoch": 0.8735757852281972, + "grad_norm": 0.5806538462638855, + "learning_rate": 9.271360916393254e-05, + "loss": 4.686, + "step": 202950 + }, + { + "epoch": 0.8737910046874798, + "grad_norm": 0.6304968595504761, + "learning_rate": 9.271009103534491e-05, + "loss": 4.7263, + "step": 203000 + }, + { + "epoch": 0.8740062241467624, + "grad_norm": 2.8968377113342285, + "learning_rate": 9.270657212440676e-05, + "loss": 4.6299, + "step": 203050 + }, + { + "epoch": 0.874221443606045, + "grad_norm": 2.3599631786346436, + "learning_rate": 9.270305243118256e-05, + "loss": 4.9815, + "step": 203100 + }, + { + "epoch": 0.8744366630653277, + "grad_norm": 3.3930392265319824, + "learning_rate": 9.269953195573674e-05, + "loss": 4.4282, + "step": 203150 + }, + { + "epoch": 0.8746518825246103, + "grad_norm": 1.034971833229065, + "learning_rate": 9.269601069813385e-05, + "loss": 4.6429, + "step": 203200 + }, + { + "epoch": 0.874867101983893, + "grad_norm": 1.8786509037017822, + "learning_rate": 9.269248865843835e-05, + "loss": 4.7644, + "step": 203250 + }, + { + "epoch": 0.8750823214431757, + "grad_norm": 0.9609358906745911, + "learning_rate": 9.268896583671476e-05, + "loss": 4.5201, + "step": 203300 + }, + { + "epoch": 0.8752975409024583, + "grad_norm": 2.148052453994751, + "learning_rate": 9.268544223302761e-05, + "loss": 4.8392, + "step": 203350 + }, + { + "epoch": 0.8755127603617409, + "grad_norm": 3.586998701095581, + "learning_rate": 9.268191784744145e-05, + "loss": 4.6707, + "step": 203400 + }, + { + "epoch": 0.8757279798210235, + "grad_norm": 2.3689448833465576, + "learning_rate": 9.267839268002083e-05, + "loss": 4.9812, + "step": 203450 + }, + { + "epoch": 0.8759431992803062, + "grad_norm": 1.652591347694397, + "learning_rate": 9.267486673083035e-05, + "loss": 4.4844, + "step": 203500 + }, + { + "epoch": 0.8761584187395888, + "grad_norm": 2.7972710132598877, + "learning_rate": 9.267133999993456e-05, + "loss": 4.6996, + "step": 203550 + }, + { + "epoch": 0.8763736381988714, + "grad_norm": 2.246983528137207, + "learning_rate": 9.266781248739809e-05, + "loss": 4.56, + "step": 203600 + }, + { + "epoch": 0.876588857658154, + "grad_norm": 4.6717209815979, + "learning_rate": 9.266428419328554e-05, + "loss": 4.7216, + "step": 203650 + }, + { + "epoch": 0.8768040771174367, + "grad_norm": 1.9932622909545898, + "learning_rate": 9.266075511766155e-05, + "loss": 4.9868, + "step": 203700 + }, + { + "epoch": 0.8770192965767193, + "grad_norm": 1.7917523384094238, + "learning_rate": 9.265722526059076e-05, + "loss": 4.8223, + "step": 203750 + }, + { + "epoch": 0.8772345160360019, + "grad_norm": 2.4577653408050537, + "learning_rate": 9.26536946221378e-05, + "loss": 5.151, + "step": 203800 + }, + { + "epoch": 0.8774497354952845, + "grad_norm": 2.3294432163238525, + "learning_rate": 9.265016320236741e-05, + "loss": 4.5675, + "step": 203850 + }, + { + "epoch": 0.8776649549545672, + "grad_norm": 4.163478851318359, + "learning_rate": 9.264663100134421e-05, + "loss": 4.4972, + "step": 203900 + }, + { + "epoch": 0.8778801744138498, + "grad_norm": 2.6111583709716797, + "learning_rate": 9.264309801913294e-05, + "loss": 4.7702, + "step": 203950 + }, + { + "epoch": 0.8780953938731324, + "grad_norm": 3.709117889404297, + "learning_rate": 9.263956425579831e-05, + "loss": 4.7646, + "step": 204000 + }, + { + "epoch": 0.8780953938731324, + "eval_loss": 5.195934772491455, + "eval_runtime": 35.0667, + "eval_samples_per_second": 18.251, + "eval_steps_per_second": 9.125, + "eval_tts_loss": 6.795124907270818, + "step": 204000 + }, + { + "epoch": 0.8783106133324151, + "grad_norm": 2.3791000843048096, + "learning_rate": 9.263602971140506e-05, + "loss": 4.7817, + "step": 204050 + }, + { + "epoch": 0.8785258327916977, + "grad_norm": 2.168837308883667, + "learning_rate": 9.263249438601791e-05, + "loss": 4.9872, + "step": 204100 + }, + { + "epoch": 0.8787410522509803, + "grad_norm": 0.6066736578941345, + "learning_rate": 9.262895827970161e-05, + "loss": 4.351, + "step": 204150 + }, + { + "epoch": 0.8789562717102629, + "grad_norm": 2.26094913482666, + "learning_rate": 9.262542139252099e-05, + "loss": 5.3283, + "step": 204200 + }, + { + "epoch": 0.8791714911695456, + "grad_norm": 3.4181954860687256, + "learning_rate": 9.262188372454079e-05, + "loss": 5.0553, + "step": 204250 + }, + { + "epoch": 0.8793867106288282, + "grad_norm": 2.6167197227478027, + "learning_rate": 9.261834527582582e-05, + "loss": 4.5373, + "step": 204300 + }, + { + "epoch": 0.8796019300881108, + "grad_norm": 0.5724769830703735, + "learning_rate": 9.26148060464409e-05, + "loss": 4.4456, + "step": 204350 + }, + { + "epoch": 0.8798171495473934, + "grad_norm": 1.76767098903656, + "learning_rate": 9.261126603645086e-05, + "loss": 3.9962, + "step": 204400 + }, + { + "epoch": 0.8800323690066761, + "grad_norm": 2.1694962978363037, + "learning_rate": 9.260772524592055e-05, + "loss": 4.2541, + "step": 204450 + }, + { + "epoch": 0.8802475884659587, + "grad_norm": 3.023176908493042, + "learning_rate": 9.260418367491482e-05, + "loss": 4.7488, + "step": 204500 + }, + { + "epoch": 0.8804628079252413, + "grad_norm": 4.257347106933594, + "learning_rate": 9.260064132349855e-05, + "loss": 4.5253, + "step": 204550 + }, + { + "epoch": 0.880678027384524, + "grad_norm": 0.7647531032562256, + "learning_rate": 9.259709819173662e-05, + "loss": 4.9295, + "step": 204600 + }, + { + "epoch": 0.8808932468438067, + "grad_norm": 2.10725998878479, + "learning_rate": 9.259355427969393e-05, + "loss": 4.7406, + "step": 204650 + }, + { + "epoch": 0.8811084663030893, + "grad_norm": 2.246164321899414, + "learning_rate": 9.259000958743544e-05, + "loss": 4.0623, + "step": 204700 + }, + { + "epoch": 0.8813236857623719, + "grad_norm": 1.572350263595581, + "learning_rate": 9.2586464115026e-05, + "loss": 4.5851, + "step": 204750 + }, + { + "epoch": 0.8815389052216546, + "grad_norm": 1.9995160102844238, + "learning_rate": 9.258291786253064e-05, + "loss": 4.66, + "step": 204800 + }, + { + "epoch": 0.8817541246809372, + "grad_norm": 3.354360818862915, + "learning_rate": 9.257937083001426e-05, + "loss": 5.0428, + "step": 204850 + }, + { + "epoch": 0.8819693441402198, + "grad_norm": 1.9760305881500244, + "learning_rate": 9.257582301754184e-05, + "loss": 4.5896, + "step": 204900 + }, + { + "epoch": 0.8821845635995024, + "grad_norm": 6.4184346199035645, + "learning_rate": 9.257227442517839e-05, + "loss": 4.6888, + "step": 204950 + }, + { + "epoch": 0.8823997830587851, + "grad_norm": 0.4583141803741455, + "learning_rate": 9.25687250529889e-05, + "loss": 4.8354, + "step": 205000 + }, + { + "epoch": 0.8826150025180677, + "grad_norm": 2.1278998851776123, + "learning_rate": 9.25651749010384e-05, + "loss": 4.7515, + "step": 205050 + }, + { + "epoch": 0.8828302219773503, + "grad_norm": 4.506360054016113, + "learning_rate": 9.25616239693919e-05, + "loss": 4.5573, + "step": 205100 + }, + { + "epoch": 0.8830454414366329, + "grad_norm": 2.357844591140747, + "learning_rate": 9.255807225811445e-05, + "loss": 4.7797, + "step": 205150 + }, + { + "epoch": 0.8832606608959156, + "grad_norm": 2.349296808242798, + "learning_rate": 9.255451976727113e-05, + "loss": 4.5049, + "step": 205200 + }, + { + "epoch": 0.8834758803551982, + "grad_norm": 4.446805477142334, + "learning_rate": 9.255096649692698e-05, + "loss": 4.7916, + "step": 205250 + }, + { + "epoch": 0.8836910998144808, + "grad_norm": 1.4232932329177856, + "learning_rate": 9.254741244714711e-05, + "loss": 4.8241, + "step": 205300 + }, + { + "epoch": 0.8839063192737635, + "grad_norm": 3.514197587966919, + "learning_rate": 9.254385761799662e-05, + "loss": 4.7621, + "step": 205350 + }, + { + "epoch": 0.8841215387330461, + "grad_norm": 1.7485225200653076, + "learning_rate": 9.254030200954062e-05, + "loss": 4.5609, + "step": 205400 + }, + { + "epoch": 0.8843367581923287, + "grad_norm": 2.4854531288146973, + "learning_rate": 9.253674562184425e-05, + "loss": 4.8527, + "step": 205450 + }, + { + "epoch": 0.8845519776516113, + "grad_norm": 2.1011338233947754, + "learning_rate": 9.253318845497263e-05, + "loss": 4.8456, + "step": 205500 + }, + { + "epoch": 0.884767197110894, + "grad_norm": 2.935192346572876, + "learning_rate": 9.252963050899093e-05, + "loss": 4.8249, + "step": 205550 + }, + { + "epoch": 0.8849824165701766, + "grad_norm": 2.6556448936462402, + "learning_rate": 9.252607178396435e-05, + "loss": 4.5695, + "step": 205600 + }, + { + "epoch": 0.8851976360294592, + "grad_norm": 2.075166940689087, + "learning_rate": 9.252251227995806e-05, + "loss": 4.9221, + "step": 205650 + }, + { + "epoch": 0.8854128554887418, + "grad_norm": 1.3359026908874512, + "learning_rate": 9.251895199703726e-05, + "loss": 4.283, + "step": 205700 + }, + { + "epoch": 0.8856280749480245, + "grad_norm": 2.1305294036865234, + "learning_rate": 9.251539093526715e-05, + "loss": 4.4706, + "step": 205750 + }, + { + "epoch": 0.8858432944073071, + "grad_norm": 3.734119415283203, + "learning_rate": 9.251182909471299e-05, + "loss": 4.692, + "step": 205800 + }, + { + "epoch": 0.8860585138665897, + "grad_norm": 2.6703648567199707, + "learning_rate": 9.250826647544001e-05, + "loss": 5.132, + "step": 205850 + }, + { + "epoch": 0.8862737333258723, + "grad_norm": 1.1157636642456055, + "learning_rate": 9.250470307751347e-05, + "loss": 4.6518, + "step": 205900 + }, + { + "epoch": 0.886488952785155, + "grad_norm": 0.7675050497055054, + "learning_rate": 9.250113890099864e-05, + "loss": 4.7793, + "step": 205950 + }, + { + "epoch": 0.8867041722444376, + "grad_norm": 0.8967563509941101, + "learning_rate": 9.249757394596082e-05, + "loss": 4.737, + "step": 206000 + }, + { + "epoch": 0.8869193917037203, + "grad_norm": 2.3028533458709717, + "learning_rate": 9.24940082124653e-05, + "loss": 4.847, + "step": 206050 + }, + { + "epoch": 0.887134611163003, + "grad_norm": 2.450397491455078, + "learning_rate": 9.249044170057739e-05, + "loss": 4.3768, + "step": 206100 + }, + { + "epoch": 0.8873498306222856, + "grad_norm": 4.2966628074646, + "learning_rate": 9.248687441036243e-05, + "loss": 4.9909, + "step": 206150 + }, + { + "epoch": 0.8875650500815682, + "grad_norm": 3.2514216899871826, + "learning_rate": 9.248330634188575e-05, + "loss": 5.0263, + "step": 206200 + }, + { + "epoch": 0.8877802695408508, + "grad_norm": 3.67887806892395, + "learning_rate": 9.247973749521275e-05, + "loss": 4.836, + "step": 206250 + }, + { + "epoch": 0.8879954890001335, + "grad_norm": 2.3410441875457764, + "learning_rate": 9.247616787040876e-05, + "loss": 4.5548, + "step": 206300 + }, + { + "epoch": 0.8882107084594161, + "grad_norm": 2.345627784729004, + "learning_rate": 9.24725974675392e-05, + "loss": 4.9838, + "step": 206350 + }, + { + "epoch": 0.8884259279186987, + "grad_norm": 5.222900390625, + "learning_rate": 9.246902628666944e-05, + "loss": 4.3589, + "step": 206400 + }, + { + "epoch": 0.8886411473779813, + "grad_norm": 3.188312292098999, + "learning_rate": 9.246545432786491e-05, + "loss": 5.111, + "step": 206450 + }, + { + "epoch": 0.888856366837264, + "grad_norm": 3.333204746246338, + "learning_rate": 9.246188159119105e-05, + "loss": 4.5767, + "step": 206500 + }, + { + "epoch": 0.8890715862965466, + "grad_norm": 3.2893497943878174, + "learning_rate": 9.245830807671327e-05, + "loss": 4.387, + "step": 206550 + }, + { + "epoch": 0.8892868057558292, + "grad_norm": 2.977144718170166, + "learning_rate": 9.245473378449707e-05, + "loss": 4.7591, + "step": 206600 + }, + { + "epoch": 0.8895020252151119, + "grad_norm": 3.6960718631744385, + "learning_rate": 9.245115871460792e-05, + "loss": 4.8653, + "step": 206650 + }, + { + "epoch": 0.8897172446743945, + "grad_norm": 3.015930414199829, + "learning_rate": 9.244758286711128e-05, + "loss": 4.6298, + "step": 206700 + }, + { + "epoch": 0.8899324641336771, + "grad_norm": 3.0349740982055664, + "learning_rate": 9.244400624207265e-05, + "loss": 5.1963, + "step": 206750 + }, + { + "epoch": 0.8901476835929597, + "grad_norm": 0.9104273319244385, + "learning_rate": 9.244042883955759e-05, + "loss": 4.6027, + "step": 206800 + }, + { + "epoch": 0.8903629030522424, + "grad_norm": 3.670980930328369, + "learning_rate": 9.243685065963157e-05, + "loss": 4.5875, + "step": 206850 + }, + { + "epoch": 0.890578122511525, + "grad_norm": 0.21435751020908356, + "learning_rate": 9.243327170236018e-05, + "loss": 4.7714, + "step": 206900 + }, + { + "epoch": 0.8907933419708076, + "grad_norm": 1.7038263082504272, + "learning_rate": 9.242969196780896e-05, + "loss": 4.5443, + "step": 206950 + }, + { + "epoch": 0.8910085614300902, + "grad_norm": 4.108440399169922, + "learning_rate": 9.242611145604348e-05, + "loss": 4.9673, + "step": 207000 + }, + { + "epoch": 0.8910085614300902, + "eval_loss": 5.1794867515563965, + "eval_runtime": 34.9202, + "eval_samples_per_second": 18.327, + "eval_steps_per_second": 9.164, + "eval_tts_loss": 6.781871014070983, + "step": 207000 + }, + { + "epoch": 0.8912237808893729, + "grad_norm": 3.7088303565979004, + "learning_rate": 9.242253016712935e-05, + "loss": 4.8672, + "step": 207050 + }, + { + "epoch": 0.8914390003486555, + "grad_norm": 2.7777700424194336, + "learning_rate": 9.241894810113212e-05, + "loss": 4.5685, + "step": 207100 + }, + { + "epoch": 0.8916542198079381, + "grad_norm": 2.448117971420288, + "learning_rate": 9.241536525811744e-05, + "loss": 4.8978, + "step": 207150 + }, + { + "epoch": 0.8918694392672207, + "grad_norm": 1.180673599243164, + "learning_rate": 9.241178163815097e-05, + "loss": 4.7165, + "step": 207200 + }, + { + "epoch": 0.8920846587265034, + "grad_norm": 1.4421809911727905, + "learning_rate": 9.240819724129829e-05, + "loss": 4.5313, + "step": 207250 + }, + { + "epoch": 0.892299878185786, + "grad_norm": 1.1947451829910278, + "learning_rate": 9.240461206762509e-05, + "loss": 4.5548, + "step": 207300 + }, + { + "epoch": 0.8925150976450686, + "grad_norm": 2.9505772590637207, + "learning_rate": 9.240102611719705e-05, + "loss": 5.3727, + "step": 207350 + }, + { + "epoch": 0.8927303171043514, + "grad_norm": 2.253817081451416, + "learning_rate": 9.239743939007985e-05, + "loss": 4.891, + "step": 207400 + }, + { + "epoch": 0.892945536563634, + "grad_norm": 1.5782009363174438, + "learning_rate": 9.239385188633917e-05, + "loss": 4.7315, + "step": 207450 + }, + { + "epoch": 0.8931607560229166, + "grad_norm": 2.0634868144989014, + "learning_rate": 9.239026360604078e-05, + "loss": 4.941, + "step": 207500 + }, + { + "epoch": 0.8933759754821992, + "grad_norm": 2.5475728511810303, + "learning_rate": 9.238667454925033e-05, + "loss": 4.5581, + "step": 207550 + }, + { + "epoch": 0.8935911949414819, + "grad_norm": 1.0299389362335205, + "learning_rate": 9.238308471603361e-05, + "loss": 4.523, + "step": 207600 + }, + { + "epoch": 0.8938064144007645, + "grad_norm": 2.3287899494171143, + "learning_rate": 9.237949410645638e-05, + "loss": 4.5128, + "step": 207650 + }, + { + "epoch": 0.8940216338600471, + "grad_norm": 2.411491870880127, + "learning_rate": 9.237590272058442e-05, + "loss": 4.8075, + "step": 207700 + }, + { + "epoch": 0.8942368533193297, + "grad_norm": 2.432854175567627, + "learning_rate": 9.237231055848347e-05, + "loss": 4.7536, + "step": 207750 + }, + { + "epoch": 0.8944520727786124, + "grad_norm": 2.8550236225128174, + "learning_rate": 9.236871762021939e-05, + "loss": 4.4668, + "step": 207800 + }, + { + "epoch": 0.894667292237895, + "grad_norm": 4.307577133178711, + "learning_rate": 9.236512390585795e-05, + "loss": 4.8053, + "step": 207850 + }, + { + "epoch": 0.8948825116971776, + "grad_norm": 2.597517490386963, + "learning_rate": 9.236152941546499e-05, + "loss": 5.0199, + "step": 207900 + }, + { + "epoch": 0.8950977311564603, + "grad_norm": 2.496788740158081, + "learning_rate": 9.235793414910635e-05, + "loss": 4.3904, + "step": 207950 + }, + { + "epoch": 0.8953129506157429, + "grad_norm": 1.5423589944839478, + "learning_rate": 9.23543381068479e-05, + "loss": 4.6649, + "step": 208000 + }, + { + "epoch": 0.8955281700750255, + "grad_norm": 3.0095200538635254, + "learning_rate": 9.23507412887555e-05, + "loss": 4.6004, + "step": 208050 + }, + { + "epoch": 0.8957433895343081, + "grad_norm": 1.786163330078125, + "learning_rate": 9.234714369489504e-05, + "loss": 4.6289, + "step": 208100 + }, + { + "epoch": 0.8959586089935908, + "grad_norm": 2.2323646545410156, + "learning_rate": 9.234354532533242e-05, + "loss": 4.7532, + "step": 208150 + }, + { + "epoch": 0.8961738284528734, + "grad_norm": 3.1730756759643555, + "learning_rate": 9.233994618013355e-05, + "loss": 5.5411, + "step": 208200 + }, + { + "epoch": 0.896389047912156, + "grad_norm": 2.1238341331481934, + "learning_rate": 9.233634625936435e-05, + "loss": 4.7159, + "step": 208250 + }, + { + "epoch": 0.8966042673714386, + "grad_norm": 2.921027660369873, + "learning_rate": 9.23327455630908e-05, + "loss": 4.5831, + "step": 208300 + }, + { + "epoch": 0.8968194868307213, + "grad_norm": 1.2503083944320679, + "learning_rate": 9.232914409137882e-05, + "loss": 4.6275, + "step": 208350 + }, + { + "epoch": 0.8970347062900039, + "grad_norm": 2.3184237480163574, + "learning_rate": 9.232554184429439e-05, + "loss": 4.8851, + "step": 208400 + }, + { + "epoch": 0.8972499257492865, + "grad_norm": 2.040240526199341, + "learning_rate": 9.232193882190348e-05, + "loss": 4.7696, + "step": 208450 + }, + { + "epoch": 0.8974651452085691, + "grad_norm": 3.932421922683716, + "learning_rate": 9.231833502427212e-05, + "loss": 4.8352, + "step": 208500 + }, + { + "epoch": 0.8976803646678518, + "grad_norm": 1.5178905725479126, + "learning_rate": 9.231473045146629e-05, + "loss": 4.8701, + "step": 208550 + }, + { + "epoch": 0.8978955841271344, + "grad_norm": 4.321361541748047, + "learning_rate": 9.231112510355205e-05, + "loss": 4.6528, + "step": 208600 + }, + { + "epoch": 0.898110803586417, + "grad_norm": 2.079680919647217, + "learning_rate": 9.230751898059542e-05, + "loss": 4.857, + "step": 208650 + }, + { + "epoch": 0.8983260230456998, + "grad_norm": 1.1596564054489136, + "learning_rate": 9.230391208266246e-05, + "loss": 4.7373, + "step": 208700 + }, + { + "epoch": 0.8985412425049824, + "grad_norm": 3.5080032348632812, + "learning_rate": 9.230030440981924e-05, + "loss": 4.6275, + "step": 208750 + }, + { + "epoch": 0.898756461964265, + "grad_norm": 2.798943281173706, + "learning_rate": 9.229669596213185e-05, + "loss": 5.1301, + "step": 208800 + }, + { + "epoch": 0.8989716814235476, + "grad_norm": 1.9169389009475708, + "learning_rate": 9.229308673966639e-05, + "loss": 4.8466, + "step": 208850 + }, + { + "epoch": 0.8991869008828303, + "grad_norm": 0.9530812501907349, + "learning_rate": 9.228947674248896e-05, + "loss": 4.4952, + "step": 208900 + }, + { + "epoch": 0.8994021203421129, + "grad_norm": 2.353475332260132, + "learning_rate": 9.22858659706657e-05, + "loss": 4.5856, + "step": 208950 + }, + { + "epoch": 0.8996173398013955, + "grad_norm": 0.8391301035881042, + "learning_rate": 9.228225442426275e-05, + "loss": 4.4008, + "step": 209000 + }, + { + "epoch": 0.8998325592606781, + "grad_norm": 2.7852423191070557, + "learning_rate": 9.227864210334624e-05, + "loss": 4.875, + "step": 209050 + }, + { + "epoch": 0.9000477787199608, + "grad_norm": 2.643541097640991, + "learning_rate": 9.227502900798238e-05, + "loss": 4.647, + "step": 209100 + }, + { + "epoch": 0.9002629981792434, + "grad_norm": 1.3980766534805298, + "learning_rate": 9.227141513823734e-05, + "loss": 4.4594, + "step": 209150 + }, + { + "epoch": 0.900478217638526, + "grad_norm": 2.3279290199279785, + "learning_rate": 9.22678004941773e-05, + "loss": 4.6804, + "step": 209200 + }, + { + "epoch": 0.9006934370978086, + "grad_norm": 2.752833127975464, + "learning_rate": 9.226418507586847e-05, + "loss": 4.9632, + "step": 209250 + }, + { + "epoch": 0.9009086565570913, + "grad_norm": 2.5093815326690674, + "learning_rate": 9.22605688833771e-05, + "loss": 4.5217, + "step": 209300 + }, + { + "epoch": 0.9011238760163739, + "grad_norm": 1.0909534692764282, + "learning_rate": 9.225695191676941e-05, + "loss": 4.6973, + "step": 209350 + }, + { + "epoch": 0.9013390954756565, + "grad_norm": 2.1754446029663086, + "learning_rate": 9.22533341761117e-05, + "loss": 4.7787, + "step": 209400 + }, + { + "epoch": 0.9015543149349392, + "grad_norm": 2.8884809017181396, + "learning_rate": 9.224971566147017e-05, + "loss": 4.6789, + "step": 209450 + }, + { + "epoch": 0.9017695343942218, + "grad_norm": 1.4513942003250122, + "learning_rate": 9.224609637291115e-05, + "loss": 4.769, + "step": 209500 + }, + { + "epoch": 0.9019847538535044, + "grad_norm": 3.6995556354522705, + "learning_rate": 9.224247631050092e-05, + "loss": 4.4753, + "step": 209550 + }, + { + "epoch": 0.902199973312787, + "grad_norm": 2.4168500900268555, + "learning_rate": 9.223885547430578e-05, + "loss": 4.9681, + "step": 209600 + }, + { + "epoch": 0.9024151927720697, + "grad_norm": 1.6027942895889282, + "learning_rate": 9.223523386439207e-05, + "loss": 4.2975, + "step": 209650 + }, + { + "epoch": 0.9026304122313523, + "grad_norm": 2.3501508235931396, + "learning_rate": 9.223161148082616e-05, + "loss": 4.1091, + "step": 209700 + }, + { + "epoch": 0.9028456316906349, + "grad_norm": 2.6365551948547363, + "learning_rate": 9.222798832367435e-05, + "loss": 4.2988, + "step": 209750 + }, + { + "epoch": 0.9030608511499175, + "grad_norm": 2.0265719890594482, + "learning_rate": 9.222436439300304e-05, + "loss": 4.5674, + "step": 209800 + }, + { + "epoch": 0.9032760706092002, + "grad_norm": 2.272753953933716, + "learning_rate": 9.222073968887858e-05, + "loss": 4.5632, + "step": 209850 + }, + { + "epoch": 0.9034912900684828, + "grad_norm": 1.774990439414978, + "learning_rate": 9.221711421136742e-05, + "loss": 4.7364, + "step": 209900 + }, + { + "epoch": 0.9037065095277654, + "grad_norm": 4.061366558074951, + "learning_rate": 9.221348796053592e-05, + "loss": 4.5776, + "step": 209950 + }, + { + "epoch": 0.9039217289870481, + "grad_norm": 2.240368366241455, + "learning_rate": 9.220986093645052e-05, + "loss": 4.7869, + "step": 210000 + }, + { + "epoch": 0.9039217289870481, + "eval_loss": 5.184208393096924, + "eval_runtime": 34.9023, + "eval_samples_per_second": 18.337, + "eval_steps_per_second": 9.168, + "eval_tts_loss": 6.828632389191485, + "step": 210000 + }, + { + "epoch": 0.9041369484463307, + "grad_norm": 1.5794379711151123, + "learning_rate": 9.220623313917767e-05, + "loss": 4.93, + "step": 210050 + }, + { + "epoch": 0.9043521679056133, + "grad_norm": 1.995529055595398, + "learning_rate": 9.220260456878382e-05, + "loss": 4.7628, + "step": 210100 + }, + { + "epoch": 0.904567387364896, + "grad_norm": 2.972630262374878, + "learning_rate": 9.219897522533542e-05, + "loss": 4.8649, + "step": 210150 + }, + { + "epoch": 0.9047826068241787, + "grad_norm": 2.400735855102539, + "learning_rate": 9.219534510889897e-05, + "loss": 4.8153, + "step": 210200 + }, + { + "epoch": 0.9049978262834613, + "grad_norm": 3.0786752700805664, + "learning_rate": 9.219171421954098e-05, + "loss": 4.777, + "step": 210250 + }, + { + "epoch": 0.9052130457427439, + "grad_norm": 3.0593597888946533, + "learning_rate": 9.218808255732791e-05, + "loss": 4.8716, + "step": 210300 + }, + { + "epoch": 0.9054282652020265, + "grad_norm": 2.9483330249786377, + "learning_rate": 9.218445012232633e-05, + "loss": 5.0818, + "step": 210350 + }, + { + "epoch": 0.9056434846613092, + "grad_norm": 4.601602077484131, + "learning_rate": 9.218081691460276e-05, + "loss": 4.694, + "step": 210400 + }, + { + "epoch": 0.9058587041205918, + "grad_norm": 1.900004267692566, + "learning_rate": 9.217718293422375e-05, + "loss": 4.9734, + "step": 210450 + }, + { + "epoch": 0.9060739235798744, + "grad_norm": 2.7816436290740967, + "learning_rate": 9.217354818125585e-05, + "loss": 4.9875, + "step": 210500 + }, + { + "epoch": 0.906289143039157, + "grad_norm": 2.4685535430908203, + "learning_rate": 9.216991265576568e-05, + "loss": 4.6926, + "step": 210550 + }, + { + "epoch": 0.9065043624984397, + "grad_norm": 2.99664306640625, + "learning_rate": 9.21662763578198e-05, + "loss": 4.5879, + "step": 210600 + }, + { + "epoch": 0.9067195819577223, + "grad_norm": 3.0859663486480713, + "learning_rate": 9.216263928748484e-05, + "loss": 4.5881, + "step": 210650 + }, + { + "epoch": 0.9069348014170049, + "grad_norm": 2.0938971042633057, + "learning_rate": 9.215900144482741e-05, + "loss": 4.4812, + "step": 210700 + }, + { + "epoch": 0.9071500208762876, + "grad_norm": 2.4970543384552, + "learning_rate": 9.215536282991415e-05, + "loss": 4.7334, + "step": 210750 + }, + { + "epoch": 0.9073652403355702, + "grad_norm": 1.4285820722579956, + "learning_rate": 9.215172344281173e-05, + "loss": 4.7031, + "step": 210800 + }, + { + "epoch": 0.9075804597948528, + "grad_norm": 4.276361465454102, + "learning_rate": 9.214808328358677e-05, + "loss": 4.42, + "step": 210850 + }, + { + "epoch": 0.9077956792541354, + "grad_norm": 1.2662419080734253, + "learning_rate": 9.2144442352306e-05, + "loss": 4.2082, + "step": 210900 + }, + { + "epoch": 0.9080108987134181, + "grad_norm": 4.11671257019043, + "learning_rate": 9.214080064903608e-05, + "loss": 4.9794, + "step": 210950 + }, + { + "epoch": 0.9082261181727007, + "grad_norm": 3.273491144180298, + "learning_rate": 9.213715817384372e-05, + "loss": 4.4773, + "step": 211000 + }, + { + "epoch": 0.9084413376319833, + "grad_norm": 0.46317505836486816, + "learning_rate": 9.213351492679565e-05, + "loss": 4.8493, + "step": 211050 + }, + { + "epoch": 0.9086565570912659, + "grad_norm": 1.9621803760528564, + "learning_rate": 9.212987090795862e-05, + "loss": 4.7866, + "step": 211100 + }, + { + "epoch": 0.9088717765505486, + "grad_norm": 3.0186350345611572, + "learning_rate": 9.212622611739936e-05, + "loss": 4.8972, + "step": 211150 + }, + { + "epoch": 0.9090869960098312, + "grad_norm": 2.1414072513580322, + "learning_rate": 9.212258055518465e-05, + "loss": 4.6437, + "step": 211200 + }, + { + "epoch": 0.9093022154691138, + "grad_norm": 2.5010507106781006, + "learning_rate": 9.211893422138125e-05, + "loss": 4.8416, + "step": 211250 + }, + { + "epoch": 0.9095174349283965, + "grad_norm": 2.220092535018921, + "learning_rate": 9.211528711605595e-05, + "loss": 4.8531, + "step": 211300 + }, + { + "epoch": 0.9097326543876791, + "grad_norm": 1.9437509775161743, + "learning_rate": 9.211163923927557e-05, + "loss": 5.0127, + "step": 211350 + }, + { + "epoch": 0.9099478738469617, + "grad_norm": 2.770219564437866, + "learning_rate": 9.210799059110694e-05, + "loss": 4.6853, + "step": 211400 + }, + { + "epoch": 0.9101630933062443, + "grad_norm": 2.5142171382904053, + "learning_rate": 9.210434117161688e-05, + "loss": 4.8298, + "step": 211450 + }, + { + "epoch": 0.910378312765527, + "grad_norm": 4.031444549560547, + "learning_rate": 9.210069098087224e-05, + "loss": 5.2358, + "step": 211500 + }, + { + "epoch": 0.9105935322248097, + "grad_norm": 2.181636333465576, + "learning_rate": 9.209704001893989e-05, + "loss": 4.1611, + "step": 211550 + }, + { + "epoch": 0.9108087516840923, + "grad_norm": 2.144559621810913, + "learning_rate": 9.209338828588668e-05, + "loss": 4.8991, + "step": 211600 + }, + { + "epoch": 0.9110239711433749, + "grad_norm": 2.6905360221862793, + "learning_rate": 9.208973578177956e-05, + "loss": 5.0691, + "step": 211650 + }, + { + "epoch": 0.9112391906026576, + "grad_norm": 2.0830562114715576, + "learning_rate": 9.208608250668536e-05, + "loss": 4.5661, + "step": 211700 + }, + { + "epoch": 0.9114544100619402, + "grad_norm": 1.0527058839797974, + "learning_rate": 9.208242846067105e-05, + "loss": 4.1824, + "step": 211750 + }, + { + "epoch": 0.9116696295212228, + "grad_norm": 2.095397472381592, + "learning_rate": 9.207877364380357e-05, + "loss": 4.3293, + "step": 211800 + }, + { + "epoch": 0.9118848489805054, + "grad_norm": 4.248065948486328, + "learning_rate": 9.207511805614983e-05, + "loss": 4.7603, + "step": 211850 + }, + { + "epoch": 0.9121000684397881, + "grad_norm": 3.058973789215088, + "learning_rate": 9.207146169777683e-05, + "loss": 4.7862, + "step": 211900 + }, + { + "epoch": 0.9123152878990707, + "grad_norm": 3.116267204284668, + "learning_rate": 9.206780456875152e-05, + "loss": 4.717, + "step": 211950 + }, + { + "epoch": 0.9125305073583533, + "grad_norm": 2.606358289718628, + "learning_rate": 9.20641466691409e-05, + "loss": 4.6345, + "step": 212000 + }, + { + "epoch": 0.912745726817636, + "grad_norm": 2.2412073612213135, + "learning_rate": 9.206048799901195e-05, + "loss": 4.502, + "step": 212050 + }, + { + "epoch": 0.9129609462769186, + "grad_norm": 0.40843284130096436, + "learning_rate": 9.205682855843175e-05, + "loss": 4.7483, + "step": 212100 + }, + { + "epoch": 0.9131761657362012, + "grad_norm": 2.1941137313842773, + "learning_rate": 9.205316834746727e-05, + "loss": 4.533, + "step": 212150 + }, + { + "epoch": 0.9133913851954838, + "grad_norm": 2.9520578384399414, + "learning_rate": 9.204950736618558e-05, + "loss": 4.8484, + "step": 212200 + }, + { + "epoch": 0.9136066046547665, + "grad_norm": 3.4460365772247314, + "learning_rate": 9.204584561465374e-05, + "loss": 4.7851, + "step": 212250 + }, + { + "epoch": 0.9138218241140491, + "grad_norm": 3.2139840126037598, + "learning_rate": 9.204218309293882e-05, + "loss": 4.4047, + "step": 212300 + }, + { + "epoch": 0.9140370435733317, + "grad_norm": 3.2896246910095215, + "learning_rate": 9.203851980110793e-05, + "loss": 4.5406, + "step": 212350 + }, + { + "epoch": 0.9142522630326143, + "grad_norm": 2.1088006496429443, + "learning_rate": 9.203485573922814e-05, + "loss": 5.1953, + "step": 212400 + }, + { + "epoch": 0.914467482491897, + "grad_norm": 0.5804000496864319, + "learning_rate": 9.20311909073666e-05, + "loss": 4.6377, + "step": 212450 + }, + { + "epoch": 0.9146827019511796, + "grad_norm": 2.478039503097534, + "learning_rate": 9.202752530559043e-05, + "loss": 4.6424, + "step": 212500 + }, + { + "epoch": 0.9148979214104622, + "grad_norm": 1.838805913925171, + "learning_rate": 9.202385893396677e-05, + "loss": 4.3209, + "step": 212550 + }, + { + "epoch": 0.9151131408697449, + "grad_norm": 2.4096176624298096, + "learning_rate": 9.202019179256276e-05, + "loss": 4.4838, + "step": 212600 + }, + { + "epoch": 0.9153283603290275, + "grad_norm": 3.8941469192504883, + "learning_rate": 9.201652388144561e-05, + "loss": 4.6727, + "step": 212650 + }, + { + "epoch": 0.9155435797883101, + "grad_norm": 3.462390899658203, + "learning_rate": 9.20128552006825e-05, + "loss": 5.0731, + "step": 212700 + }, + { + "epoch": 0.9157587992475927, + "grad_norm": 1.132787823677063, + "learning_rate": 9.200918575034061e-05, + "loss": 4.415, + "step": 212750 + }, + { + "epoch": 0.9159740187068754, + "grad_norm": 0.894419252872467, + "learning_rate": 9.200551553048718e-05, + "loss": 4.2915, + "step": 212800 + }, + { + "epoch": 0.916189238166158, + "grad_norm": 1.4691413640975952, + "learning_rate": 9.200184454118943e-05, + "loss": 5.4543, + "step": 212850 + }, + { + "epoch": 0.9164044576254406, + "grad_norm": 3.8485777378082275, + "learning_rate": 9.19981727825146e-05, + "loss": 4.6289, + "step": 212900 + }, + { + "epoch": 0.9166196770847233, + "grad_norm": 2.6266250610351562, + "learning_rate": 9.199450025452995e-05, + "loss": 4.4756, + "step": 212950 + }, + { + "epoch": 0.916834896544006, + "grad_norm": 1.1112397909164429, + "learning_rate": 9.199082695730276e-05, + "loss": 4.5122, + "step": 213000 + }, + { + "epoch": 0.916834896544006, + "eval_loss": 5.171109676361084, + "eval_runtime": 35.0429, + "eval_samples_per_second": 18.263, + "eval_steps_per_second": 9.132, + "eval_tts_loss": 6.774447509281595, + "step": 213000 + }, + { + "epoch": 0.9170501160032886, + "grad_norm": 1.096110463142395, + "learning_rate": 9.19871528909003e-05, + "loss": 4.699, + "step": 213050 + }, + { + "epoch": 0.9172653354625712, + "grad_norm": 2.3370230197906494, + "learning_rate": 9.198347805538989e-05, + "loss": 4.1441, + "step": 213100 + }, + { + "epoch": 0.9174805549218538, + "grad_norm": 1.9358984231948853, + "learning_rate": 9.197980245083883e-05, + "loss": 4.8295, + "step": 213150 + }, + { + "epoch": 0.9176957743811365, + "grad_norm": 0.3320905864238739, + "learning_rate": 9.197612607731447e-05, + "loss": 4.8421, + "step": 213200 + }, + { + "epoch": 0.9179109938404191, + "grad_norm": 1.7038615942001343, + "learning_rate": 9.197244893488412e-05, + "loss": 4.458, + "step": 213250 + }, + { + "epoch": 0.9181262132997017, + "grad_norm": 1.7396684885025024, + "learning_rate": 9.196877102361516e-05, + "loss": 4.7038, + "step": 213300 + }, + { + "epoch": 0.9183414327589844, + "grad_norm": 2.9147207736968994, + "learning_rate": 9.196509234357496e-05, + "loss": 4.7911, + "step": 213350 + }, + { + "epoch": 0.918556652218267, + "grad_norm": 3.788060188293457, + "learning_rate": 9.196141289483088e-05, + "loss": 4.8906, + "step": 213400 + }, + { + "epoch": 0.9187718716775496, + "grad_norm": 3.05705189704895, + "learning_rate": 9.195773267745035e-05, + "loss": 5.0452, + "step": 213450 + }, + { + "epoch": 0.9189870911368322, + "grad_norm": 2.4937164783477783, + "learning_rate": 9.195405169150078e-05, + "loss": 4.6399, + "step": 213500 + }, + { + "epoch": 0.9192023105961149, + "grad_norm": 2.3273544311523438, + "learning_rate": 9.195036993704958e-05, + "loss": 4.3965, + "step": 213550 + }, + { + "epoch": 0.9194175300553975, + "grad_norm": 0.6431455612182617, + "learning_rate": 9.194668741416421e-05, + "loss": 4.2441, + "step": 213600 + }, + { + "epoch": 0.9196327495146801, + "grad_norm": 2.8459396362304688, + "learning_rate": 9.19430041229121e-05, + "loss": 4.7691, + "step": 213650 + }, + { + "epoch": 0.9198479689739627, + "grad_norm": 2.2122514247894287, + "learning_rate": 9.193932006336074e-05, + "loss": 5.1573, + "step": 213700 + }, + { + "epoch": 0.9200631884332454, + "grad_norm": 2.6618592739105225, + "learning_rate": 9.193563523557762e-05, + "loss": 4.9461, + "step": 213750 + }, + { + "epoch": 0.920278407892528, + "grad_norm": 2.6928610801696777, + "learning_rate": 9.193194963963021e-05, + "loss": 4.2754, + "step": 213800 + }, + { + "epoch": 0.9204936273518106, + "grad_norm": 0.5619308948516846, + "learning_rate": 9.192826327558606e-05, + "loss": 4.6218, + "step": 213850 + }, + { + "epoch": 0.9207088468110932, + "grad_norm": 3.5764145851135254, + "learning_rate": 9.192457614351266e-05, + "loss": 4.4492, + "step": 213900 + }, + { + "epoch": 0.9209240662703759, + "grad_norm": 6.4995808601379395, + "learning_rate": 9.192088824347755e-05, + "loss": 4.7557, + "step": 213950 + }, + { + "epoch": 0.9211392857296585, + "grad_norm": 2.358064889907837, + "learning_rate": 9.191719957554832e-05, + "loss": 5.1503, + "step": 214000 + }, + { + "epoch": 0.9213545051889411, + "grad_norm": 2.9496219158172607, + "learning_rate": 9.191351013979251e-05, + "loss": 4.3424, + "step": 214050 + }, + { + "epoch": 0.9215697246482238, + "grad_norm": 0.6291042566299438, + "learning_rate": 9.190981993627769e-05, + "loss": 4.5665, + "step": 214100 + }, + { + "epoch": 0.9217849441075064, + "grad_norm": 2.178621530532837, + "learning_rate": 9.190612896507149e-05, + "loss": 4.99, + "step": 214150 + }, + { + "epoch": 0.922000163566789, + "grad_norm": 1.9590697288513184, + "learning_rate": 9.19024372262415e-05, + "loss": 4.5051, + "step": 214200 + }, + { + "epoch": 0.9222153830260716, + "grad_norm": 3.022777795791626, + "learning_rate": 9.189874471985533e-05, + "loss": 4.5796, + "step": 214250 + }, + { + "epoch": 0.9224306024853544, + "grad_norm": 2.7473304271698, + "learning_rate": 9.189505144598065e-05, + "loss": 4.6559, + "step": 214300 + }, + { + "epoch": 0.922645821944637, + "grad_norm": 2.3373770713806152, + "learning_rate": 9.18913574046851e-05, + "loss": 4.3368, + "step": 214350 + }, + { + "epoch": 0.9228610414039196, + "grad_norm": 3.2489006519317627, + "learning_rate": 9.188766259603634e-05, + "loss": 5.0428, + "step": 214400 + }, + { + "epoch": 0.9230762608632022, + "grad_norm": 2.7824440002441406, + "learning_rate": 9.188396702010204e-05, + "loss": 4.9454, + "step": 214450 + }, + { + "epoch": 0.9232914803224849, + "grad_norm": 2.257598876953125, + "learning_rate": 9.188027067694994e-05, + "loss": 4.8431, + "step": 214500 + }, + { + "epoch": 0.9235066997817675, + "grad_norm": 0.48804542422294617, + "learning_rate": 9.18765735666477e-05, + "loss": 4.6332, + "step": 214550 + }, + { + "epoch": 0.9237219192410501, + "grad_norm": 2.3636903762817383, + "learning_rate": 9.187287568926306e-05, + "loss": 4.6245, + "step": 214600 + }, + { + "epoch": 0.9239371387003328, + "grad_norm": 4.08838415145874, + "learning_rate": 9.186917704486375e-05, + "loss": 5.07, + "step": 214650 + }, + { + "epoch": 0.9241523581596154, + "grad_norm": 2.4671874046325684, + "learning_rate": 9.186547763351754e-05, + "loss": 5.0338, + "step": 214700 + }, + { + "epoch": 0.924367577618898, + "grad_norm": 2.61061692237854, + "learning_rate": 9.186177745529218e-05, + "loss": 4.0324, + "step": 214750 + }, + { + "epoch": 0.9245827970781806, + "grad_norm": 1.1905573606491089, + "learning_rate": 9.185807651025545e-05, + "loss": 4.1213, + "step": 214800 + }, + { + "epoch": 0.9247980165374633, + "grad_norm": 0.7724317908287048, + "learning_rate": 9.185437479847512e-05, + "loss": 4.6979, + "step": 214850 + }, + { + "epoch": 0.9250132359967459, + "grad_norm": 4.8311262130737305, + "learning_rate": 9.185067232001904e-05, + "loss": 4.6785, + "step": 214900 + }, + { + "epoch": 0.9252284554560285, + "grad_norm": 2.518062114715576, + "learning_rate": 9.184696907495502e-05, + "loss": 4.5607, + "step": 214950 + }, + { + "epoch": 0.9254436749153111, + "grad_norm": 3.2859668731689453, + "learning_rate": 9.184326506335087e-05, + "loss": 4.9168, + "step": 215000 + }, + { + "epoch": 0.9256588943745938, + "grad_norm": 0.5349797010421753, + "learning_rate": 9.183956028527447e-05, + "loss": 4.5915, + "step": 215050 + }, + { + "epoch": 0.9258741138338764, + "grad_norm": 5.9576849937438965, + "learning_rate": 9.183585474079366e-05, + "loss": 4.9939, + "step": 215100 + }, + { + "epoch": 0.926089333293159, + "grad_norm": 2.3628227710723877, + "learning_rate": 9.183214842997632e-05, + "loss": 4.8129, + "step": 215150 + }, + { + "epoch": 0.9263045527524416, + "grad_norm": 1.5128554105758667, + "learning_rate": 9.182844135289035e-05, + "loss": 4.8233, + "step": 215200 + }, + { + "epoch": 0.9265197722117243, + "grad_norm": 1.279036521911621, + "learning_rate": 9.182473350960365e-05, + "loss": 4.7568, + "step": 215250 + }, + { + "epoch": 0.9267349916710069, + "grad_norm": 2.1423206329345703, + "learning_rate": 9.182102490018414e-05, + "loss": 4.8749, + "step": 215300 + }, + { + "epoch": 0.9269502111302895, + "grad_norm": 2.361527681350708, + "learning_rate": 9.181731552469976e-05, + "loss": 4.5408, + "step": 215350 + }, + { + "epoch": 0.9271654305895722, + "grad_norm": 1.5605239868164062, + "learning_rate": 9.181360538321843e-05, + "loss": 4.6553, + "step": 215400 + }, + { + "epoch": 0.9273806500488548, + "grad_norm": 2.627577066421509, + "learning_rate": 9.180989447580816e-05, + "loss": 5.0843, + "step": 215450 + }, + { + "epoch": 0.9275958695081374, + "grad_norm": 2.1100096702575684, + "learning_rate": 9.180618280253688e-05, + "loss": 4.7549, + "step": 215500 + }, + { + "epoch": 0.92781108896742, + "grad_norm": 2.302618980407715, + "learning_rate": 9.18024703634726e-05, + "loss": 4.6129, + "step": 215550 + }, + { + "epoch": 0.9280263084267028, + "grad_norm": 0.4425703287124634, + "learning_rate": 9.179875715868333e-05, + "loss": 4.7223, + "step": 215600 + }, + { + "epoch": 0.9282415278859854, + "grad_norm": 1.1341710090637207, + "learning_rate": 9.179504318823706e-05, + "loss": 4.3102, + "step": 215650 + }, + { + "epoch": 0.928456747345268, + "grad_norm": 4.283944129943848, + "learning_rate": 9.179132845220184e-05, + "loss": 4.4137, + "step": 215700 + }, + { + "epoch": 0.9286719668045506, + "grad_norm": 4.843629360198975, + "learning_rate": 9.178761295064573e-05, + "loss": 4.6771, + "step": 215750 + }, + { + "epoch": 0.9288871862638333, + "grad_norm": 1.7146480083465576, + "learning_rate": 9.178389668363676e-05, + "loss": 4.8551, + "step": 215800 + }, + { + "epoch": 0.9291024057231159, + "grad_norm": 2.6116135120391846, + "learning_rate": 9.178017965124301e-05, + "loss": 4.725, + "step": 215850 + }, + { + "epoch": 0.9293176251823985, + "grad_norm": 2.9063844680786133, + "learning_rate": 9.177646185353259e-05, + "loss": 4.8229, + "step": 215900 + }, + { + "epoch": 0.9295328446416812, + "grad_norm": 0.9033623337745667, + "learning_rate": 9.17727432905736e-05, + "loss": 4.4919, + "step": 215950 + }, + { + "epoch": 0.9297480641009638, + "grad_norm": 1.1661771535873413, + "learning_rate": 9.176902396243411e-05, + "loss": 4.7516, + "step": 216000 + }, + { + "epoch": 0.9297480641009638, + "eval_loss": 5.181820869445801, + "eval_runtime": 34.9624, + "eval_samples_per_second": 18.305, + "eval_steps_per_second": 9.153, + "eval_tts_loss": 6.753472063002649, + "step": 216000 + }, + { + "epoch": 0.9299632835602464, + "grad_norm": 2.6374380588531494, + "learning_rate": 9.17653038691823e-05, + "loss": 5.0099, + "step": 216050 + }, + { + "epoch": 0.930178503019529, + "grad_norm": 1.4853732585906982, + "learning_rate": 9.176158301088629e-05, + "loss": 4.5359, + "step": 216100 + }, + { + "epoch": 0.9303937224788117, + "grad_norm": 0.8401529788970947, + "learning_rate": 9.175786138761423e-05, + "loss": 4.897, + "step": 216150 + }, + { + "epoch": 0.9306089419380943, + "grad_norm": 2.164759397506714, + "learning_rate": 9.175413899943431e-05, + "loss": 4.494, + "step": 216200 + }, + { + "epoch": 0.9308241613973769, + "grad_norm": 2.557626962661743, + "learning_rate": 9.175041584641471e-05, + "loss": 5.0919, + "step": 216250 + }, + { + "epoch": 0.9310393808566595, + "grad_norm": 2.014174461364746, + "learning_rate": 9.174669192862364e-05, + "loss": 4.8237, + "step": 216300 + }, + { + "epoch": 0.9312546003159422, + "grad_norm": 1.944980502128601, + "learning_rate": 9.174296724612929e-05, + "loss": 4.7425, + "step": 216350 + }, + { + "epoch": 0.9314698197752248, + "grad_norm": 3.971315860748291, + "learning_rate": 9.173924179899991e-05, + "loss": 4.5884, + "step": 216400 + }, + { + "epoch": 0.9316850392345074, + "grad_norm": 2.7441742420196533, + "learning_rate": 9.173551558730373e-05, + "loss": 4.8684, + "step": 216450 + }, + { + "epoch": 0.93190025869379, + "grad_norm": 0.753288745880127, + "learning_rate": 9.1731788611109e-05, + "loss": 4.7047, + "step": 216500 + }, + { + "epoch": 0.9321154781530727, + "grad_norm": 2.2101492881774902, + "learning_rate": 9.172806087048399e-05, + "loss": 4.8968, + "step": 216550 + }, + { + "epoch": 0.9323306976123553, + "grad_norm": 1.0013676881790161, + "learning_rate": 9.1724332365497e-05, + "loss": 4.5713, + "step": 216600 + }, + { + "epoch": 0.9325459170716379, + "grad_norm": 0.9127768874168396, + "learning_rate": 9.172060309621631e-05, + "loss": 4.8337, + "step": 216650 + }, + { + "epoch": 0.9327611365309206, + "grad_norm": 1.8990319967269897, + "learning_rate": 9.171687306271026e-05, + "loss": 4.7991, + "step": 216700 + }, + { + "epoch": 0.9329763559902032, + "grad_norm": 3.4745662212371826, + "learning_rate": 9.171314226504714e-05, + "loss": 4.9196, + "step": 216750 + }, + { + "epoch": 0.9331915754494858, + "grad_norm": 2.11970591545105, + "learning_rate": 9.17094107032953e-05, + "loss": 4.5757, + "step": 216800 + }, + { + "epoch": 0.9334067949087684, + "grad_norm": 3.4960479736328125, + "learning_rate": 9.17056783775231e-05, + "loss": 4.6733, + "step": 216850 + }, + { + "epoch": 0.9336220143680511, + "grad_norm": 1.4563487768173218, + "learning_rate": 9.170194528779892e-05, + "loss": 5.0008, + "step": 216900 + }, + { + "epoch": 0.9338372338273337, + "grad_norm": 2.71846342086792, + "learning_rate": 9.169821143419113e-05, + "loss": 4.8237, + "step": 216950 + }, + { + "epoch": 0.9340524532866163, + "grad_norm": 1.740983247756958, + "learning_rate": 9.169447681676811e-05, + "loss": 4.4916, + "step": 217000 + }, + { + "epoch": 0.934267672745899, + "grad_norm": 2.4219977855682373, + "learning_rate": 9.169074143559829e-05, + "loss": 4.5694, + "step": 217050 + }, + { + "epoch": 0.9344828922051817, + "grad_norm": 2.4786744117736816, + "learning_rate": 9.168700529075007e-05, + "loss": 5.0888, + "step": 217100 + }, + { + "epoch": 0.9346981116644643, + "grad_norm": 2.24969482421875, + "learning_rate": 9.16832683822919e-05, + "loss": 4.4377, + "step": 217150 + }, + { + "epoch": 0.9349133311237469, + "grad_norm": 4.955018997192383, + "learning_rate": 9.167953071029227e-05, + "loss": 4.3669, + "step": 217200 + }, + { + "epoch": 0.9351285505830295, + "grad_norm": 2.849762201309204, + "learning_rate": 9.16757922748196e-05, + "loss": 4.5016, + "step": 217250 + }, + { + "epoch": 0.9353437700423122, + "grad_norm": 2.670767307281494, + "learning_rate": 9.167205307594236e-05, + "loss": 4.8488, + "step": 217300 + }, + { + "epoch": 0.9355589895015948, + "grad_norm": 1.477488398551941, + "learning_rate": 9.166831311372908e-05, + "loss": 4.4574, + "step": 217350 + }, + { + "epoch": 0.9357742089608774, + "grad_norm": 2.999546766281128, + "learning_rate": 9.166457238824824e-05, + "loss": 4.1601, + "step": 217400 + }, + { + "epoch": 0.9359894284201601, + "grad_norm": 2.1389389038085938, + "learning_rate": 9.16608308995684e-05, + "loss": 4.5245, + "step": 217450 + }, + { + "epoch": 0.9362046478794427, + "grad_norm": 2.7869815826416016, + "learning_rate": 9.165708864775803e-05, + "loss": 4.905, + "step": 217500 + }, + { + "epoch": 0.9364198673387253, + "grad_norm": 2.262612819671631, + "learning_rate": 9.165334563288575e-05, + "loss": 4.8335, + "step": 217550 + }, + { + "epoch": 0.9366350867980079, + "grad_norm": 0.8138198256492615, + "learning_rate": 9.164960185502009e-05, + "loss": 4.7711, + "step": 217600 + }, + { + "epoch": 0.9368503062572906, + "grad_norm": 1.7178915739059448, + "learning_rate": 9.164585731422961e-05, + "loss": 4.9453, + "step": 217650 + }, + { + "epoch": 0.9370655257165732, + "grad_norm": 3.5525050163269043, + "learning_rate": 9.164211201058294e-05, + "loss": 5.152, + "step": 217700 + }, + { + "epoch": 0.9372807451758558, + "grad_norm": 1.8736685514450073, + "learning_rate": 9.163836594414865e-05, + "loss": 4.4637, + "step": 217750 + }, + { + "epoch": 0.9374959646351384, + "grad_norm": 2.0679638385772705, + "learning_rate": 9.163461911499538e-05, + "loss": 4.8467, + "step": 217800 + }, + { + "epoch": 0.9377111840944211, + "grad_norm": 1.7733451128005981, + "learning_rate": 9.163087152319176e-05, + "loss": 5.1074, + "step": 217850 + }, + { + "epoch": 0.9379264035537037, + "grad_norm": 2.020991325378418, + "learning_rate": 9.162712316880643e-05, + "loss": 4.54, + "step": 217900 + }, + { + "epoch": 0.9381416230129863, + "grad_norm": 2.9154160022735596, + "learning_rate": 9.162337405190806e-05, + "loss": 4.9897, + "step": 217950 + }, + { + "epoch": 0.938356842472269, + "grad_norm": 0.4434339702129364, + "learning_rate": 9.161962417256534e-05, + "loss": 4.6777, + "step": 218000 + }, + { + "epoch": 0.9385720619315516, + "grad_norm": 2.1341159343719482, + "learning_rate": 9.161587353084692e-05, + "loss": 4.9788, + "step": 218050 + }, + { + "epoch": 0.9387872813908342, + "grad_norm": 2.9569103717803955, + "learning_rate": 9.161212212682153e-05, + "loss": 4.2757, + "step": 218100 + }, + { + "epoch": 0.9390025008501168, + "grad_norm": 3.149364471435547, + "learning_rate": 9.160836996055787e-05, + "loss": 4.5592, + "step": 218150 + }, + { + "epoch": 0.9392177203093995, + "grad_norm": 4.377221584320068, + "learning_rate": 9.16046170321247e-05, + "loss": 4.6144, + "step": 218200 + }, + { + "epoch": 0.9394329397686821, + "grad_norm": 2.106987953186035, + "learning_rate": 9.160086334159074e-05, + "loss": 4.8326, + "step": 218250 + }, + { + "epoch": 0.9396481592279647, + "grad_norm": 3.4716193675994873, + "learning_rate": 9.159710888902475e-05, + "loss": 4.5885, + "step": 218300 + }, + { + "epoch": 0.9398633786872473, + "grad_norm": 0.9223523736000061, + "learning_rate": 9.159335367449551e-05, + "loss": 4.3913, + "step": 218350 + }, + { + "epoch": 0.94007859814653, + "grad_norm": 2.654120445251465, + "learning_rate": 9.158959769807182e-05, + "loss": 5.0856, + "step": 218400 + }, + { + "epoch": 0.9402938176058127, + "grad_norm": 1.7512197494506836, + "learning_rate": 9.158584095982246e-05, + "loss": 4.4385, + "step": 218450 + }, + { + "epoch": 0.9405090370650953, + "grad_norm": 1.640625238418579, + "learning_rate": 9.158208345981625e-05, + "loss": 4.9936, + "step": 218500 + }, + { + "epoch": 0.9407242565243779, + "grad_norm": 0.6605635285377502, + "learning_rate": 9.157832519812202e-05, + "loss": 4.5022, + "step": 218550 + }, + { + "epoch": 0.9409394759836606, + "grad_norm": 3.9816722869873047, + "learning_rate": 9.15745661748086e-05, + "loss": 5.0975, + "step": 218600 + }, + { + "epoch": 0.9411546954429432, + "grad_norm": 3.3800270557403564, + "learning_rate": 9.157080638994487e-05, + "loss": 4.8879, + "step": 218650 + }, + { + "epoch": 0.9413699149022258, + "grad_norm": 2.4751977920532227, + "learning_rate": 9.15670458435997e-05, + "loss": 4.8781, + "step": 218700 + }, + { + "epoch": 0.9415851343615085, + "grad_norm": 0.6990488767623901, + "learning_rate": 9.156328453584195e-05, + "loss": 4.8543, + "step": 218750 + }, + { + "epoch": 0.9418003538207911, + "grad_norm": 3.491647481918335, + "learning_rate": 9.155952246674056e-05, + "loss": 4.7385, + "step": 218800 + }, + { + "epoch": 0.9420155732800737, + "grad_norm": 2.2876505851745605, + "learning_rate": 9.155575963636438e-05, + "loss": 4.784, + "step": 218850 + }, + { + "epoch": 0.9422307927393563, + "grad_norm": 3.915524482727051, + "learning_rate": 9.15519960447824e-05, + "loss": 4.6232, + "step": 218900 + }, + { + "epoch": 0.942446012198639, + "grad_norm": 4.080489635467529, + "learning_rate": 9.154823169206351e-05, + "loss": 4.9492, + "step": 218950 + }, + { + "epoch": 0.9426612316579216, + "grad_norm": 2.14570689201355, + "learning_rate": 9.15444665782767e-05, + "loss": 4.5403, + "step": 219000 + }, + { + "epoch": 0.9426612316579216, + "eval_loss": 5.17002534866333, + "eval_runtime": 34.9117, + "eval_samples_per_second": 18.332, + "eval_steps_per_second": 9.166, + "eval_tts_loss": 6.7866895022351335, + "step": 219000 + }, + { + "epoch": 0.9428764511172042, + "grad_norm": 2.3471531867980957, + "learning_rate": 9.154070070349092e-05, + "loss": 5.0573, + "step": 219050 + }, + { + "epoch": 0.9430916705764868, + "grad_norm": 1.987949252128601, + "learning_rate": 9.153693406777517e-05, + "loss": 5.0149, + "step": 219100 + }, + { + "epoch": 0.9433068900357695, + "grad_norm": 3.0373172760009766, + "learning_rate": 9.153316667119843e-05, + "loss": 4.9036, + "step": 219150 + }, + { + "epoch": 0.9435221094950521, + "grad_norm": 0.7948267459869385, + "learning_rate": 9.15293985138297e-05, + "loss": 4.2614, + "step": 219200 + }, + { + "epoch": 0.9437373289543347, + "grad_norm": 2.0668416023254395, + "learning_rate": 9.152562959573803e-05, + "loss": 4.6125, + "step": 219250 + }, + { + "epoch": 0.9439525484136174, + "grad_norm": 1.2969601154327393, + "learning_rate": 9.152185991699244e-05, + "loss": 4.7391, + "step": 219300 + }, + { + "epoch": 0.9441677678729, + "grad_norm": 2.921823501586914, + "learning_rate": 9.151808947766199e-05, + "loss": 4.4091, + "step": 219350 + }, + { + "epoch": 0.9443829873321826, + "grad_norm": 2.0639901161193848, + "learning_rate": 9.151431827781574e-05, + "loss": 4.4764, + "step": 219400 + }, + { + "epoch": 0.9445982067914652, + "grad_norm": 4.735141277313232, + "learning_rate": 9.151054631752277e-05, + "loss": 4.6149, + "step": 219450 + }, + { + "epoch": 0.9448134262507479, + "grad_norm": 0.8125091791152954, + "learning_rate": 9.150677359685217e-05, + "loss": 4.7268, + "step": 219500 + }, + { + "epoch": 0.9450286457100305, + "grad_norm": 0.6390477418899536, + "learning_rate": 9.150300011587307e-05, + "loss": 4.8407, + "step": 219550 + }, + { + "epoch": 0.9452438651693131, + "grad_norm": 2.4249744415283203, + "learning_rate": 9.149922587465456e-05, + "loss": 4.7542, + "step": 219600 + }, + { + "epoch": 0.9454590846285957, + "grad_norm": 3.3037729263305664, + "learning_rate": 9.14954508732658e-05, + "loss": 4.5262, + "step": 219650 + }, + { + "epoch": 0.9456743040878784, + "grad_norm": 3.563939094543457, + "learning_rate": 9.149167511177593e-05, + "loss": 4.8408, + "step": 219700 + }, + { + "epoch": 0.945889523547161, + "grad_norm": 2.4274580478668213, + "learning_rate": 9.148789859025411e-05, + "loss": 4.8734, + "step": 219750 + }, + { + "epoch": 0.9461047430064436, + "grad_norm": 2.4242115020751953, + "learning_rate": 9.148412130876953e-05, + "loss": 5.0456, + "step": 219800 + }, + { + "epoch": 0.9463199624657262, + "grad_norm": 2.83921217918396, + "learning_rate": 9.148034326739136e-05, + "loss": 4.5626, + "step": 219850 + }, + { + "epoch": 0.946535181925009, + "grad_norm": 2.262019157409668, + "learning_rate": 9.147656446618881e-05, + "loss": 4.6741, + "step": 219900 + }, + { + "epoch": 0.9467504013842916, + "grad_norm": 1.6101288795471191, + "learning_rate": 9.147278490523112e-05, + "loss": 4.8864, + "step": 219950 + }, + { + "epoch": 0.9469656208435742, + "grad_norm": 2.241091251373291, + "learning_rate": 9.14690045845875e-05, + "loss": 5.2555, + "step": 220000 + }, + { + "epoch": 0.9471808403028569, + "grad_norm": 4.409801006317139, + "learning_rate": 9.146522350432721e-05, + "loss": 4.7686, + "step": 220050 + }, + { + "epoch": 0.9473960597621395, + "grad_norm": 2.007331132888794, + "learning_rate": 9.146144166451952e-05, + "loss": 4.7054, + "step": 220100 + }, + { + "epoch": 0.9476112792214221, + "grad_norm": 3.1247661113739014, + "learning_rate": 9.145765906523366e-05, + "loss": 5.0068, + "step": 220150 + }, + { + "epoch": 0.9478264986807047, + "grad_norm": 2.900730848312378, + "learning_rate": 9.145387570653896e-05, + "loss": 4.4453, + "step": 220200 + }, + { + "epoch": 0.9480417181399874, + "grad_norm": 0.9321229457855225, + "learning_rate": 9.145009158850472e-05, + "loss": 4.7225, + "step": 220250 + }, + { + "epoch": 0.94825693759927, + "grad_norm": 1.3136084079742432, + "learning_rate": 9.144630671120025e-05, + "loss": 4.6564, + "step": 220300 + }, + { + "epoch": 0.9484721570585526, + "grad_norm": 1.5740565061569214, + "learning_rate": 9.144252107469486e-05, + "loss": 4.4032, + "step": 220350 + }, + { + "epoch": 0.9486873765178352, + "grad_norm": 1.6706678867340088, + "learning_rate": 9.143873467905792e-05, + "loss": 4.9681, + "step": 220400 + }, + { + "epoch": 0.9489025959771179, + "grad_norm": 2.6994752883911133, + "learning_rate": 9.143494752435878e-05, + "loss": 4.3441, + "step": 220450 + }, + { + "epoch": 0.9491178154364005, + "grad_norm": 2.842681407928467, + "learning_rate": 9.143115961066682e-05, + "loss": 4.6664, + "step": 220500 + }, + { + "epoch": 0.9493330348956831, + "grad_norm": 2.538983106613159, + "learning_rate": 9.142737093805141e-05, + "loss": 4.6475, + "step": 220550 + }, + { + "epoch": 0.9495482543549657, + "grad_norm": 4.219677448272705, + "learning_rate": 9.142358150658195e-05, + "loss": 4.0581, + "step": 220600 + }, + { + "epoch": 0.9497634738142484, + "grad_norm": 1.2723932266235352, + "learning_rate": 9.141979131632788e-05, + "loss": 4.3031, + "step": 220650 + }, + { + "epoch": 0.949978693273531, + "grad_norm": 1.9945741891860962, + "learning_rate": 9.14160003673586e-05, + "loss": 4.8803, + "step": 220700 + }, + { + "epoch": 0.9501939127328136, + "grad_norm": 2.9867846965789795, + "learning_rate": 9.141220865974356e-05, + "loss": 4.6771, + "step": 220750 + }, + { + "epoch": 0.9504091321920963, + "grad_norm": 2.784268379211426, + "learning_rate": 9.14084161935522e-05, + "loss": 4.5006, + "step": 220800 + }, + { + "epoch": 0.9506243516513789, + "grad_norm": 1.3642460107803345, + "learning_rate": 9.140462296885404e-05, + "loss": 4.4409, + "step": 220850 + }, + { + "epoch": 0.9508395711106615, + "grad_norm": 3.6864566802978516, + "learning_rate": 9.140082898571849e-05, + "loss": 4.5369, + "step": 220900 + }, + { + "epoch": 0.9510547905699441, + "grad_norm": 1.685895323753357, + "learning_rate": 9.139703424421511e-05, + "loss": 4.9515, + "step": 220950 + }, + { + "epoch": 0.9512700100292268, + "grad_norm": 0.8786101341247559, + "learning_rate": 9.139323874441338e-05, + "loss": 4.8069, + "step": 221000 + }, + { + "epoch": 0.9514852294885094, + "grad_norm": 0.7255077958106995, + "learning_rate": 9.138944248638282e-05, + "loss": 4.3497, + "step": 221050 + }, + { + "epoch": 0.951700448947792, + "grad_norm": 1.8996869325637817, + "learning_rate": 9.1385645470193e-05, + "loss": 4.8276, + "step": 221100 + }, + { + "epoch": 0.9519156684070746, + "grad_norm": 0.5643138289451599, + "learning_rate": 9.138184769591345e-05, + "loss": 4.8885, + "step": 221150 + }, + { + "epoch": 0.9521308878663574, + "grad_norm": 2.954457998275757, + "learning_rate": 9.137804916361372e-05, + "loss": 5.2135, + "step": 221200 + }, + { + "epoch": 0.95234610732564, + "grad_norm": 3.0054409503936768, + "learning_rate": 9.137424987336342e-05, + "loss": 4.4629, + "step": 221250 + }, + { + "epoch": 0.9525613267849226, + "grad_norm": 2.342942714691162, + "learning_rate": 9.137044982523213e-05, + "loss": 4.5392, + "step": 221300 + }, + { + "epoch": 0.9527765462442053, + "grad_norm": 1.7126119136810303, + "learning_rate": 9.136664901928946e-05, + "loss": 4.8286, + "step": 221350 + }, + { + "epoch": 0.9529917657034879, + "grad_norm": 1.9259474277496338, + "learning_rate": 9.136284745560504e-05, + "loss": 4.6157, + "step": 221400 + }, + { + "epoch": 0.9532069851627705, + "grad_norm": 0.856359601020813, + "learning_rate": 9.135904513424849e-05, + "loss": 4.832, + "step": 221450 + }, + { + "epoch": 0.9534222046220531, + "grad_norm": 2.378783941268921, + "learning_rate": 9.135524205528948e-05, + "loss": 4.3936, + "step": 221500 + }, + { + "epoch": 0.9536374240813358, + "grad_norm": 0.9407606720924377, + "learning_rate": 9.135143821879766e-05, + "loss": 4.707, + "step": 221550 + }, + { + "epoch": 0.9538526435406184, + "grad_norm": 1.129098892211914, + "learning_rate": 9.134763362484272e-05, + "loss": 4.5397, + "step": 221600 + }, + { + "epoch": 0.954067862999901, + "grad_norm": 3.4322848320007324, + "learning_rate": 9.134382827349432e-05, + "loss": 4.8594, + "step": 221650 + }, + { + "epoch": 0.9542830824591836, + "grad_norm": 2.5756781101226807, + "learning_rate": 9.134002216482219e-05, + "loss": 4.7143, + "step": 221700 + }, + { + "epoch": 0.9544983019184663, + "grad_norm": 1.9800719022750854, + "learning_rate": 9.133621529889604e-05, + "loss": 4.5116, + "step": 221750 + }, + { + "epoch": 0.9547135213777489, + "grad_norm": 2.0239362716674805, + "learning_rate": 9.133240767578564e-05, + "loss": 4.3294, + "step": 221800 + }, + { + "epoch": 0.9549287408370315, + "grad_norm": 2.730041980743408, + "learning_rate": 9.132859929556068e-05, + "loss": 4.9555, + "step": 221850 + }, + { + "epoch": 0.9551439602963141, + "grad_norm": 0.8805431723594666, + "learning_rate": 9.132479015829095e-05, + "loss": 4.9598, + "step": 221900 + }, + { + "epoch": 0.9553591797555968, + "grad_norm": 2.8389668464660645, + "learning_rate": 9.132098026404623e-05, + "loss": 4.9576, + "step": 221950 + }, + { + "epoch": 0.9555743992148794, + "grad_norm": 2.966064929962158, + "learning_rate": 9.131716961289629e-05, + "loss": 4.3618, + "step": 222000 + }, + { + "epoch": 0.9555743992148794, + "eval_loss": 5.159615993499756, + "eval_runtime": 34.7594, + "eval_samples_per_second": 18.412, + "eval_steps_per_second": 9.206, + "eval_tts_loss": 6.733908970972779, + "step": 222000 + }, + { + "epoch": 0.955789618674162, + "grad_norm": 0.876314103603363, + "learning_rate": 9.131335820491093e-05, + "loss": 5.0474, + "step": 222050 + }, + { + "epoch": 0.9560048381334447, + "grad_norm": 2.7565948963165283, + "learning_rate": 9.130954604016e-05, + "loss": 4.6738, + "step": 222100 + }, + { + "epoch": 0.9562200575927273, + "grad_norm": 2.510988712310791, + "learning_rate": 9.130573311871331e-05, + "loss": 4.8852, + "step": 222150 + }, + { + "epoch": 0.9564352770520099, + "grad_norm": 1.5714668035507202, + "learning_rate": 9.13019194406407e-05, + "loss": 4.8692, + "step": 222200 + }, + { + "epoch": 0.9566504965112925, + "grad_norm": 3.9121408462524414, + "learning_rate": 9.129810500601203e-05, + "loss": 4.7218, + "step": 222250 + }, + { + "epoch": 0.9568657159705752, + "grad_norm": 3.0269322395324707, + "learning_rate": 9.129428981489716e-05, + "loss": 4.8983, + "step": 222300 + }, + { + "epoch": 0.9570809354298578, + "grad_norm": 3.1303977966308594, + "learning_rate": 9.1290473867366e-05, + "loss": 4.9343, + "step": 222350 + }, + { + "epoch": 0.9572961548891404, + "grad_norm": 2.1964216232299805, + "learning_rate": 9.128665716348843e-05, + "loss": 4.581, + "step": 222400 + }, + { + "epoch": 0.957511374348423, + "grad_norm": 1.3624963760375977, + "learning_rate": 9.128283970333437e-05, + "loss": 4.9012, + "step": 222450 + }, + { + "epoch": 0.9577265938077058, + "grad_norm": 1.7301967144012451, + "learning_rate": 9.127902148697374e-05, + "loss": 4.8973, + "step": 222500 + }, + { + "epoch": 0.9579418132669884, + "grad_norm": 2.4813928604125977, + "learning_rate": 9.12752025144765e-05, + "loss": 4.3869, + "step": 222550 + }, + { + "epoch": 0.958157032726271, + "grad_norm": 1.139331340789795, + "learning_rate": 9.127138278591259e-05, + "loss": 4.7329, + "step": 222600 + }, + { + "epoch": 0.9583722521855537, + "grad_norm": 2.3065154552459717, + "learning_rate": 9.126756230135198e-05, + "loss": 4.4816, + "step": 222650 + }, + { + "epoch": 0.9585874716448363, + "grad_norm": 0.7430478930473328, + "learning_rate": 9.126374106086466e-05, + "loss": 4.8694, + "step": 222700 + }, + { + "epoch": 0.9588026911041189, + "grad_norm": 1.933390736579895, + "learning_rate": 9.12599190645206e-05, + "loss": 4.5705, + "step": 222750 + }, + { + "epoch": 0.9590179105634015, + "grad_norm": 2.6682534217834473, + "learning_rate": 9.125609631238985e-05, + "loss": 4.7754, + "step": 222800 + }, + { + "epoch": 0.9592331300226842, + "grad_norm": 2.1545448303222656, + "learning_rate": 9.125227280454241e-05, + "loss": 4.791, + "step": 222850 + }, + { + "epoch": 0.9594483494819668, + "grad_norm": 1.5944799184799194, + "learning_rate": 9.124844854104833e-05, + "loss": 4.8624, + "step": 222900 + }, + { + "epoch": 0.9596635689412494, + "grad_norm": 3.0188281536102295, + "learning_rate": 9.124462352197763e-05, + "loss": 4.7228, + "step": 222950 + }, + { + "epoch": 0.959878788400532, + "grad_norm": 1.8037546873092651, + "learning_rate": 9.124079774740042e-05, + "loss": 4.7887, + "step": 223000 + }, + { + "epoch": 0.9600940078598147, + "grad_norm": 0.4208369255065918, + "learning_rate": 9.123697121738675e-05, + "loss": 4.7898, + "step": 223050 + }, + { + "epoch": 0.9603092273190973, + "grad_norm": 2.062035322189331, + "learning_rate": 9.123314393200672e-05, + "loss": 4.8014, + "step": 223100 + }, + { + "epoch": 0.9605244467783799, + "grad_norm": 5.0695085525512695, + "learning_rate": 9.122931589133046e-05, + "loss": 4.5829, + "step": 223150 + }, + { + "epoch": 0.9607396662376625, + "grad_norm": 2.5847861766815186, + "learning_rate": 9.122548709542805e-05, + "loss": 5.1497, + "step": 223200 + }, + { + "epoch": 0.9609548856969452, + "grad_norm": 2.8573551177978516, + "learning_rate": 9.122165754436965e-05, + "loss": 4.9274, + "step": 223250 + }, + { + "epoch": 0.9611701051562278, + "grad_norm": 0.77215975522995, + "learning_rate": 9.12178272382254e-05, + "loss": 4.5509, + "step": 223300 + }, + { + "epoch": 0.9613853246155104, + "grad_norm": 1.496648907661438, + "learning_rate": 9.121399617706548e-05, + "loss": 4.8961, + "step": 223350 + }, + { + "epoch": 0.9616005440747931, + "grad_norm": 2.3789288997650146, + "learning_rate": 9.121016436096003e-05, + "loss": 4.6751, + "step": 223400 + }, + { + "epoch": 0.9618157635340757, + "grad_norm": 0.5782761573791504, + "learning_rate": 9.120633178997926e-05, + "loss": 4.6456, + "step": 223450 + }, + { + "epoch": 0.9620309829933583, + "grad_norm": 2.474560499191284, + "learning_rate": 9.12024984641934e-05, + "loss": 4.9452, + "step": 223500 + }, + { + "epoch": 0.9622462024526409, + "grad_norm": 2.273359775543213, + "learning_rate": 9.119866438367263e-05, + "loss": 4.3354, + "step": 223550 + }, + { + "epoch": 0.9624614219119236, + "grad_norm": 2.6458523273468018, + "learning_rate": 9.119482954848719e-05, + "loss": 4.5333, + "step": 223600 + }, + { + "epoch": 0.9626766413712062, + "grad_norm": 3.4650845527648926, + "learning_rate": 9.119099395870733e-05, + "loss": 5.065, + "step": 223650 + }, + { + "epoch": 0.9628918608304888, + "grad_norm": 3.096362352371216, + "learning_rate": 9.118715761440333e-05, + "loss": 5.0723, + "step": 223700 + }, + { + "epoch": 0.9631070802897714, + "grad_norm": 2.803925037384033, + "learning_rate": 9.118332051564542e-05, + "loss": 4.808, + "step": 223750 + }, + { + "epoch": 0.9633222997490541, + "grad_norm": 2.1452648639678955, + "learning_rate": 9.11794826625039e-05, + "loss": 4.8443, + "step": 223800 + }, + { + "epoch": 0.9635375192083367, + "grad_norm": 1.096071481704712, + "learning_rate": 9.117564405504911e-05, + "loss": 4.2212, + "step": 223850 + }, + { + "epoch": 0.9637527386676193, + "grad_norm": 3.9908297061920166, + "learning_rate": 9.117180469335134e-05, + "loss": 4.7193, + "step": 223900 + }, + { + "epoch": 0.963967958126902, + "grad_norm": 1.7383825778961182, + "learning_rate": 9.11679645774809e-05, + "loss": 4.6564, + "step": 223950 + }, + { + "epoch": 0.9641831775861847, + "grad_norm": 2.8826348781585693, + "learning_rate": 9.116412370750813e-05, + "loss": 4.5679, + "step": 224000 + }, + { + "epoch": 0.9643983970454673, + "grad_norm": 1.7313616275787354, + "learning_rate": 9.116028208350341e-05, + "loss": 4.6292, + "step": 224050 + }, + { + "epoch": 0.9646136165047499, + "grad_norm": 2.992847204208374, + "learning_rate": 9.115643970553708e-05, + "loss": 4.4373, + "step": 224100 + }, + { + "epoch": 0.9648288359640326, + "grad_norm": 2.607717990875244, + "learning_rate": 9.115259657367959e-05, + "loss": 5.1069, + "step": 224150 + }, + { + "epoch": 0.9650440554233152, + "grad_norm": 1.7535597085952759, + "learning_rate": 9.114875268800124e-05, + "loss": 4.917, + "step": 224200 + }, + { + "epoch": 0.9652592748825978, + "grad_norm": 1.5660550594329834, + "learning_rate": 9.114490804857253e-05, + "loss": 4.4021, + "step": 224250 + }, + { + "epoch": 0.9654744943418804, + "grad_norm": 4.981210708618164, + "learning_rate": 9.114106265546384e-05, + "loss": 4.8745, + "step": 224300 + }, + { + "epoch": 0.9656897138011631, + "grad_norm": 2.4129064083099365, + "learning_rate": 9.11372165087456e-05, + "loss": 4.5483, + "step": 224350 + }, + { + "epoch": 0.9659049332604457, + "grad_norm": 1.1255158185958862, + "learning_rate": 9.11333696084883e-05, + "loss": 4.7158, + "step": 224400 + }, + { + "epoch": 0.9661201527197283, + "grad_norm": 2.251610517501831, + "learning_rate": 9.112952195476237e-05, + "loss": 4.6297, + "step": 224450 + }, + { + "epoch": 0.9663353721790109, + "grad_norm": 2.7969040870666504, + "learning_rate": 9.112567354763831e-05, + "loss": 4.5159, + "step": 224500 + }, + { + "epoch": 0.9665505916382936, + "grad_norm": 1.8788148164749146, + "learning_rate": 9.112182438718662e-05, + "loss": 4.6206, + "step": 224550 + }, + { + "epoch": 0.9667658110975762, + "grad_norm": 2.32454514503479, + "learning_rate": 9.111797447347778e-05, + "loss": 4.8782, + "step": 224600 + }, + { + "epoch": 0.9669810305568588, + "grad_norm": 2.2680492401123047, + "learning_rate": 9.111412380658233e-05, + "loss": 4.5851, + "step": 224650 + }, + { + "epoch": 0.9671962500161415, + "grad_norm": 3.2955188751220703, + "learning_rate": 9.111027238657082e-05, + "loss": 4.9951, + "step": 224700 + }, + { + "epoch": 0.9674114694754241, + "grad_norm": 1.9885367155075073, + "learning_rate": 9.110642021351377e-05, + "loss": 4.4731, + "step": 224750 + }, + { + "epoch": 0.9676266889347067, + "grad_norm": 2.056717872619629, + "learning_rate": 9.110256728748174e-05, + "loss": 4.2083, + "step": 224800 + }, + { + "epoch": 0.9678419083939893, + "grad_norm": 1.232332468032837, + "learning_rate": 9.109871360854534e-05, + "loss": 4.5725, + "step": 224850 + }, + { + "epoch": 0.968057127853272, + "grad_norm": 1.827211618423462, + "learning_rate": 9.109485917677515e-05, + "loss": 4.7186, + "step": 224900 + }, + { + "epoch": 0.9682723473125546, + "grad_norm": 2.183878183364868, + "learning_rate": 9.109100399224176e-05, + "loss": 4.896, + "step": 224950 + }, + { + "epoch": 0.9684875667718372, + "grad_norm": 0.5790379047393799, + "learning_rate": 9.108714805501579e-05, + "loss": 4.2435, + "step": 225000 + }, + { + "epoch": 0.9684875667718372, + "eval_loss": 5.145532131195068, + "eval_runtime": 34.9499, + "eval_samples_per_second": 18.312, + "eval_steps_per_second": 9.156, + "eval_tts_loss": 6.809307430897733, + "step": 225000 + }, + { + "epoch": 0.9687027862311198, + "grad_norm": 3.3493175506591797, + "learning_rate": 9.10832913651679e-05, + "loss": 4.8177, + "step": 225050 + }, + { + "epoch": 0.9689180056904025, + "grad_norm": 2.400627613067627, + "learning_rate": 9.107943392276869e-05, + "loss": 4.8526, + "step": 225100 + }, + { + "epoch": 0.9691332251496851, + "grad_norm": 0.6994125843048096, + "learning_rate": 9.107557572788884e-05, + "loss": 4.4717, + "step": 225150 + }, + { + "epoch": 0.9693484446089677, + "grad_norm": 3.2892518043518066, + "learning_rate": 9.107171678059902e-05, + "loss": 4.6802, + "step": 225200 + }, + { + "epoch": 0.9695636640682503, + "grad_norm": 3.159379482269287, + "learning_rate": 9.106785708096994e-05, + "loss": 4.6025, + "step": 225250 + }, + { + "epoch": 0.969778883527533, + "grad_norm": 2.710576057434082, + "learning_rate": 9.106399662907228e-05, + "loss": 4.7959, + "step": 225300 + }, + { + "epoch": 0.9699941029868157, + "grad_norm": 3.541262149810791, + "learning_rate": 9.106013542497677e-05, + "loss": 4.9035, + "step": 225350 + }, + { + "epoch": 0.9702093224460983, + "grad_norm": 2.8123466968536377, + "learning_rate": 9.105627346875412e-05, + "loss": 4.7664, + "step": 225400 + }, + { + "epoch": 0.970424541905381, + "grad_norm": 0.856834352016449, + "learning_rate": 9.105241076047507e-05, + "loss": 4.4167, + "step": 225450 + }, + { + "epoch": 0.9706397613646636, + "grad_norm": 1.2298731803894043, + "learning_rate": 9.104854730021039e-05, + "loss": 4.5047, + "step": 225500 + }, + { + "epoch": 0.9708549808239462, + "grad_norm": 0.7159408926963806, + "learning_rate": 9.104468308803083e-05, + "loss": 4.5881, + "step": 225550 + }, + { + "epoch": 0.9710702002832288, + "grad_norm": 2.2814712524414062, + "learning_rate": 9.104081812400722e-05, + "loss": 5.1704, + "step": 225600 + }, + { + "epoch": 0.9712854197425115, + "grad_norm": 3.779171943664551, + "learning_rate": 9.10369524082103e-05, + "loss": 4.6832, + "step": 225650 + }, + { + "epoch": 0.9715006392017941, + "grad_norm": 3.225120782852173, + "learning_rate": 9.103308594071091e-05, + "loss": 4.5321, + "step": 225700 + }, + { + "epoch": 0.9717158586610767, + "grad_norm": 2.322345733642578, + "learning_rate": 9.102921872157987e-05, + "loss": 4.9767, + "step": 225750 + }, + { + "epoch": 0.9719310781203593, + "grad_norm": 3.961658477783203, + "learning_rate": 9.102535075088803e-05, + "loss": 4.6488, + "step": 225800 + }, + { + "epoch": 0.972146297579642, + "grad_norm": 2.9746334552764893, + "learning_rate": 9.102148202870621e-05, + "loss": 4.5319, + "step": 225850 + }, + { + "epoch": 0.9723615170389246, + "grad_norm": 3.4118130207061768, + "learning_rate": 9.101761255510531e-05, + "loss": 4.4018, + "step": 225900 + }, + { + "epoch": 0.9725767364982072, + "grad_norm": 5.991429328918457, + "learning_rate": 9.10137423301562e-05, + "loss": 5.207, + "step": 225950 + }, + { + "epoch": 0.9727919559574899, + "grad_norm": 2.0919837951660156, + "learning_rate": 9.100987135392977e-05, + "loss": 4.6851, + "step": 226000 + }, + { + "epoch": 0.9730071754167725, + "grad_norm": 0.6975908875465393, + "learning_rate": 9.100599962649693e-05, + "loss": 4.7026, + "step": 226050 + }, + { + "epoch": 0.9732223948760551, + "grad_norm": 4.12595796585083, + "learning_rate": 9.10021271479286e-05, + "loss": 4.8802, + "step": 226100 + }, + { + "epoch": 0.9734376143353377, + "grad_norm": 2.2729711532592773, + "learning_rate": 9.099825391829572e-05, + "loss": 4.7429, + "step": 226150 + }, + { + "epoch": 0.9736528337946204, + "grad_norm": 2.2371857166290283, + "learning_rate": 9.099437993766922e-05, + "loss": 5.1614, + "step": 226200 + }, + { + "epoch": 0.973868053253903, + "grad_norm": 2.7675552368164062, + "learning_rate": 9.099050520612008e-05, + "loss": 4.8775, + "step": 226250 + }, + { + "epoch": 0.9740832727131856, + "grad_norm": 1.291011095046997, + "learning_rate": 9.098662972371927e-05, + "loss": 4.3749, + "step": 226300 + }, + { + "epoch": 0.9742984921724682, + "grad_norm": 2.0929195880889893, + "learning_rate": 9.09827534905378e-05, + "loss": 4.6067, + "step": 226350 + }, + { + "epoch": 0.9745137116317509, + "grad_norm": 3.3077635765075684, + "learning_rate": 9.097887650664662e-05, + "loss": 4.6647, + "step": 226400 + }, + { + "epoch": 0.9747289310910335, + "grad_norm": 2.350628137588501, + "learning_rate": 9.097499877211681e-05, + "loss": 4.62, + "step": 226450 + }, + { + "epoch": 0.9749441505503161, + "grad_norm": 1.7411442995071411, + "learning_rate": 9.097112028701938e-05, + "loss": 4.5648, + "step": 226500 + }, + { + "epoch": 0.9751593700095987, + "grad_norm": 2.1256275177001953, + "learning_rate": 9.096724105142536e-05, + "loss": 4.69, + "step": 226550 + }, + { + "epoch": 0.9753745894688814, + "grad_norm": 2.768524408340454, + "learning_rate": 9.09633610654058e-05, + "loss": 5.0089, + "step": 226600 + }, + { + "epoch": 0.975589808928164, + "grad_norm": 1.530928373336792, + "learning_rate": 9.09594803290318e-05, + "loss": 4.6421, + "step": 226650 + }, + { + "epoch": 0.9758050283874466, + "grad_norm": 3.3201706409454346, + "learning_rate": 9.095559884237445e-05, + "loss": 4.7973, + "step": 226700 + }, + { + "epoch": 0.9760202478467294, + "grad_norm": 2.4540064334869385, + "learning_rate": 9.095171660550483e-05, + "loss": 5.0497, + "step": 226750 + }, + { + "epoch": 0.976235467306012, + "grad_norm": 0.857481837272644, + "learning_rate": 9.094783361849405e-05, + "loss": 4.6953, + "step": 226800 + }, + { + "epoch": 0.9764506867652946, + "grad_norm": 2.4418323040008545, + "learning_rate": 9.094394988141326e-05, + "loss": 4.7957, + "step": 226850 + }, + { + "epoch": 0.9766659062245772, + "grad_norm": 2.22279953956604, + "learning_rate": 9.094006539433358e-05, + "loss": 5.2898, + "step": 226900 + }, + { + "epoch": 0.9768811256838599, + "grad_norm": 2.5654098987579346, + "learning_rate": 9.093618015732616e-05, + "loss": 4.8145, + "step": 226950 + }, + { + "epoch": 0.9770963451431425, + "grad_norm": 2.706346035003662, + "learning_rate": 9.09322941704622e-05, + "loss": 5.1425, + "step": 227000 + }, + { + "epoch": 0.9773115646024251, + "grad_norm": 3.7053115367889404, + "learning_rate": 9.092840743381285e-05, + "loss": 4.9074, + "step": 227050 + }, + { + "epoch": 0.9775267840617077, + "grad_norm": 3.4252264499664307, + "learning_rate": 9.092451994744935e-05, + "loss": 4.448, + "step": 227100 + }, + { + "epoch": 0.9777420035209904, + "grad_norm": 1.1722509860992432, + "learning_rate": 9.092063171144284e-05, + "loss": 4.2644, + "step": 227150 + }, + { + "epoch": 0.977957222980273, + "grad_norm": 2.3801348209381104, + "learning_rate": 9.091674272586461e-05, + "loss": 4.8611, + "step": 227200 + }, + { + "epoch": 0.9781724424395556, + "grad_norm": 2.2926151752471924, + "learning_rate": 9.091285299078587e-05, + "loss": 4.3475, + "step": 227250 + }, + { + "epoch": 0.9783876618988382, + "grad_norm": 2.199521541595459, + "learning_rate": 9.090896250627788e-05, + "loss": 4.9715, + "step": 227300 + }, + { + "epoch": 0.9786028813581209, + "grad_norm": 3.750473737716675, + "learning_rate": 9.090507127241187e-05, + "loss": 4.929, + "step": 227350 + }, + { + "epoch": 0.9788181008174035, + "grad_norm": 2.8236875534057617, + "learning_rate": 9.090117928925916e-05, + "loss": 4.4042, + "step": 227400 + }, + { + "epoch": 0.9790333202766861, + "grad_norm": 3.5159006118774414, + "learning_rate": 9.089728655689102e-05, + "loss": 4.5792, + "step": 227450 + }, + { + "epoch": 0.9792485397359688, + "grad_norm": 2.9956679344177246, + "learning_rate": 9.089339307537876e-05, + "loss": 4.9751, + "step": 227500 + }, + { + "epoch": 0.9794637591952514, + "grad_norm": 0.760811448097229, + "learning_rate": 9.088949884479372e-05, + "loss": 4.7196, + "step": 227550 + }, + { + "epoch": 0.979678978654534, + "grad_norm": 0.3068988025188446, + "learning_rate": 9.08856038652072e-05, + "loss": 4.7857, + "step": 227600 + }, + { + "epoch": 0.9798941981138166, + "grad_norm": 1.4131609201431274, + "learning_rate": 9.088170813669057e-05, + "loss": 4.5084, + "step": 227650 + }, + { + "epoch": 0.9801094175730993, + "grad_norm": 0.9070409536361694, + "learning_rate": 9.087781165931518e-05, + "loss": 4.5568, + "step": 227700 + }, + { + "epoch": 0.9803246370323819, + "grad_norm": 1.546830415725708, + "learning_rate": 9.08739144331524e-05, + "loss": 4.8848, + "step": 227750 + }, + { + "epoch": 0.9805398564916645, + "grad_norm": 2.872269630432129, + "learning_rate": 9.087001645827364e-05, + "loss": 4.8533, + "step": 227800 + }, + { + "epoch": 0.9807550759509471, + "grad_norm": 2.0057742595672607, + "learning_rate": 9.086611773475029e-05, + "loss": 4.5199, + "step": 227850 + }, + { + "epoch": 0.9809702954102298, + "grad_norm": 2.003734588623047, + "learning_rate": 9.086221826265376e-05, + "loss": 4.5771, + "step": 227900 + }, + { + "epoch": 0.9811855148695124, + "grad_norm": 2.3535823822021484, + "learning_rate": 9.085831804205546e-05, + "loss": 4.9969, + "step": 227950 + }, + { + "epoch": 0.981400734328795, + "grad_norm": 2.0756657123565674, + "learning_rate": 9.08544170730269e-05, + "loss": 4.644, + "step": 228000 + }, + { + "epoch": 0.981400734328795, + "eval_loss": 5.155125617980957, + "eval_runtime": 35.0409, + "eval_samples_per_second": 18.264, + "eval_steps_per_second": 9.132, + "eval_tts_loss": 6.812110962024677, + "step": 228000 + }, + { + "epoch": 0.9816159537880778, + "grad_norm": 3.867141008377075, + "learning_rate": 9.085051535563947e-05, + "loss": 4.9465, + "step": 228050 + }, + { + "epoch": 0.9818311732473604, + "grad_norm": 2.1439971923828125, + "learning_rate": 9.084661288996465e-05, + "loss": 4.9324, + "step": 228100 + }, + { + "epoch": 0.982046392706643, + "grad_norm": 3.0359065532684326, + "learning_rate": 9.084270967607395e-05, + "loss": 4.6709, + "step": 228150 + }, + { + "epoch": 0.9822616121659256, + "grad_norm": 2.482590675354004, + "learning_rate": 9.083880571403886e-05, + "loss": 4.4895, + "step": 228200 + }, + { + "epoch": 0.9824768316252083, + "grad_norm": 3.301137685775757, + "learning_rate": 9.083490100393088e-05, + "loss": 4.3716, + "step": 228250 + }, + { + "epoch": 0.9826920510844909, + "grad_norm": 2.0043482780456543, + "learning_rate": 9.083099554582156e-05, + "loss": 4.8137, + "step": 228300 + }, + { + "epoch": 0.9829072705437735, + "grad_norm": 2.3245067596435547, + "learning_rate": 9.08270893397824e-05, + "loss": 4.5592, + "step": 228350 + }, + { + "epoch": 0.9831224900030561, + "grad_norm": 2.3441860675811768, + "learning_rate": 9.082318238588498e-05, + "loss": 4.9542, + "step": 228400 + }, + { + "epoch": 0.9833377094623388, + "grad_norm": 3.8313302993774414, + "learning_rate": 9.081927468420087e-05, + "loss": 4.9126, + "step": 228450 + }, + { + "epoch": 0.9835529289216214, + "grad_norm": 2.2265048027038574, + "learning_rate": 9.081536623480163e-05, + "loss": 4.5904, + "step": 228500 + }, + { + "epoch": 0.983768148380904, + "grad_norm": 2.6971755027770996, + "learning_rate": 9.081145703775886e-05, + "loss": 5.253, + "step": 228550 + }, + { + "epoch": 0.9839833678401866, + "grad_norm": 1.2127333879470825, + "learning_rate": 9.080754709314418e-05, + "loss": 4.5263, + "step": 228600 + }, + { + "epoch": 0.9841985872994693, + "grad_norm": 3.2740702629089355, + "learning_rate": 9.08036364010292e-05, + "loss": 4.9832, + "step": 228650 + }, + { + "epoch": 0.9844138067587519, + "grad_norm": 1.9752295017242432, + "learning_rate": 9.079972496148556e-05, + "loss": 3.8978, + "step": 228700 + }, + { + "epoch": 0.9846290262180345, + "grad_norm": 2.154801845550537, + "learning_rate": 9.079581277458493e-05, + "loss": 4.8659, + "step": 228750 + }, + { + "epoch": 0.9848442456773172, + "grad_norm": 1.9649651050567627, + "learning_rate": 9.079189984039891e-05, + "loss": 4.7849, + "step": 228800 + }, + { + "epoch": 0.9850594651365998, + "grad_norm": 3.823300361633301, + "learning_rate": 9.078798615899924e-05, + "loss": 4.5709, + "step": 228850 + }, + { + "epoch": 0.9852746845958824, + "grad_norm": 1.350151777267456, + "learning_rate": 9.078407173045758e-05, + "loss": 4.4698, + "step": 228900 + }, + { + "epoch": 0.985489904055165, + "grad_norm": 0.5253221392631531, + "learning_rate": 9.078015655484563e-05, + "loss": 4.3726, + "step": 228950 + }, + { + "epoch": 0.9857051235144477, + "grad_norm": 2.366555690765381, + "learning_rate": 9.077624063223511e-05, + "loss": 4.7773, + "step": 229000 + }, + { + "epoch": 0.9859203429737303, + "grad_norm": 2.3861260414123535, + "learning_rate": 9.077232396269776e-05, + "loss": 4.8668, + "step": 229050 + }, + { + "epoch": 0.9861355624330129, + "grad_norm": 2.228336811065674, + "learning_rate": 9.076840654630534e-05, + "loss": 5.1197, + "step": 229100 + }, + { + "epoch": 0.9863507818922955, + "grad_norm": 2.8816475868225098, + "learning_rate": 9.076448838312957e-05, + "loss": 4.8055, + "step": 229150 + }, + { + "epoch": 0.9865660013515782, + "grad_norm": 0.20864450931549072, + "learning_rate": 9.076056947324224e-05, + "loss": 5.1507, + "step": 229200 + }, + { + "epoch": 0.9867812208108608, + "grad_norm": 0.637718677520752, + "learning_rate": 9.075664981671514e-05, + "loss": 4.861, + "step": 229250 + }, + { + "epoch": 0.9869964402701434, + "grad_norm": 2.4144232273101807, + "learning_rate": 9.075272941362005e-05, + "loss": 4.4782, + "step": 229300 + }, + { + "epoch": 0.9872116597294261, + "grad_norm": 1.385123610496521, + "learning_rate": 9.07488082640288e-05, + "loss": 4.7914, + "step": 229350 + }, + { + "epoch": 0.9874268791887088, + "grad_norm": 2.624330520629883, + "learning_rate": 9.074488636801323e-05, + "loss": 4.8766, + "step": 229400 + }, + { + "epoch": 0.9876420986479914, + "grad_norm": 2.103489875793457, + "learning_rate": 9.074096372564516e-05, + "loss": 4.6107, + "step": 229450 + }, + { + "epoch": 0.987857318107274, + "grad_norm": 1.1258586645126343, + "learning_rate": 9.073704033699643e-05, + "loss": 4.5154, + "step": 229500 + }, + { + "epoch": 0.9880725375665567, + "grad_norm": 1.9648264646530151, + "learning_rate": 9.073311620213894e-05, + "loss": 4.3393, + "step": 229550 + }, + { + "epoch": 0.9882877570258393, + "grad_norm": 1.8648390769958496, + "learning_rate": 9.072919132114455e-05, + "loss": 4.6776, + "step": 229600 + }, + { + "epoch": 0.9885029764851219, + "grad_norm": 1.6650872230529785, + "learning_rate": 9.072526569408516e-05, + "loss": 3.9975, + "step": 229650 + }, + { + "epoch": 0.9887181959444045, + "grad_norm": 2.1557655334472656, + "learning_rate": 9.072133932103269e-05, + "loss": 4.079, + "step": 229700 + }, + { + "epoch": 0.9889334154036872, + "grad_norm": 2.1244349479675293, + "learning_rate": 9.071741220205903e-05, + "loss": 4.4774, + "step": 229750 + }, + { + "epoch": 0.9891486348629698, + "grad_norm": 4.968222141265869, + "learning_rate": 9.071348433723617e-05, + "loss": 4.6478, + "step": 229800 + }, + { + "epoch": 0.9893638543222524, + "grad_norm": 2.9055347442626953, + "learning_rate": 9.0709555726636e-05, + "loss": 5.1945, + "step": 229850 + }, + { + "epoch": 0.989579073781535, + "grad_norm": 2.2974138259887695, + "learning_rate": 9.070562637033051e-05, + "loss": 4.7984, + "step": 229900 + }, + { + "epoch": 0.9897942932408177, + "grad_norm": 2.2958247661590576, + "learning_rate": 9.070169626839168e-05, + "loss": 5.118, + "step": 229950 + }, + { + "epoch": 0.9900095127001003, + "grad_norm": 1.1313785314559937, + "learning_rate": 9.069776542089151e-05, + "loss": 4.1906, + "step": 230000 + }, + { + "epoch": 0.9902247321593829, + "grad_norm": 2.931687593460083, + "learning_rate": 9.069383382790196e-05, + "loss": 4.9845, + "step": 230050 + }, + { + "epoch": 0.9904399516186656, + "grad_norm": 2.4445440769195557, + "learning_rate": 9.06899014894951e-05, + "loss": 4.5933, + "step": 230100 + }, + { + "epoch": 0.9906551710779482, + "grad_norm": 4.836852073669434, + "learning_rate": 9.068596840574293e-05, + "loss": 5.0089, + "step": 230150 + }, + { + "epoch": 0.9908703905372308, + "grad_norm": 1.5829111337661743, + "learning_rate": 9.068203457671752e-05, + "loss": 4.5747, + "step": 230200 + }, + { + "epoch": 0.9910856099965134, + "grad_norm": 1.7530920505523682, + "learning_rate": 9.06781000024909e-05, + "loss": 4.7618, + "step": 230250 + }, + { + "epoch": 0.9913008294557961, + "grad_norm": 0.8441370725631714, + "learning_rate": 9.067416468313516e-05, + "loss": 4.6279, + "step": 230300 + }, + { + "epoch": 0.9915160489150787, + "grad_norm": 4.503922462463379, + "learning_rate": 9.067022861872238e-05, + "loss": 4.9669, + "step": 230350 + }, + { + "epoch": 0.9917312683743613, + "grad_norm": 4.40223503112793, + "learning_rate": 9.066629180932467e-05, + "loss": 4.2803, + "step": 230400 + }, + { + "epoch": 0.9919464878336439, + "grad_norm": 3.701247215270996, + "learning_rate": 9.066235425501412e-05, + "loss": 4.6262, + "step": 230450 + }, + { + "epoch": 0.9921617072929266, + "grad_norm": 1.860521912574768, + "learning_rate": 9.065841595586289e-05, + "loss": 4.7628, + "step": 230500 + }, + { + "epoch": 0.9923769267522092, + "grad_norm": 2.1138370037078857, + "learning_rate": 9.065447691194307e-05, + "loss": 5.3904, + "step": 230550 + }, + { + "epoch": 0.9925921462114918, + "grad_norm": 2.5518875122070312, + "learning_rate": 9.065053712332688e-05, + "loss": 4.5191, + "step": 230600 + }, + { + "epoch": 0.9928073656707745, + "grad_norm": 2.2134642601013184, + "learning_rate": 9.064659659008644e-05, + "loss": 4.6547, + "step": 230650 + }, + { + "epoch": 0.9930225851300571, + "grad_norm": 3.912961006164551, + "learning_rate": 9.064265531229395e-05, + "loss": 3.8952, + "step": 230700 + }, + { + "epoch": 0.9932378045893397, + "grad_norm": 1.401541829109192, + "learning_rate": 9.063871329002159e-05, + "loss": 4.7503, + "step": 230750 + }, + { + "epoch": 0.9934530240486223, + "grad_norm": 3.4118170738220215, + "learning_rate": 9.06347705233416e-05, + "loss": 4.8883, + "step": 230800 + }, + { + "epoch": 0.9936682435079051, + "grad_norm": 2.6721689701080322, + "learning_rate": 9.063082701232617e-05, + "loss": 4.2754, + "step": 230850 + }, + { + "epoch": 0.9938834629671877, + "grad_norm": 2.196709394454956, + "learning_rate": 9.062688275704754e-05, + "loss": 4.8052, + "step": 230900 + }, + { + "epoch": 0.9940986824264703, + "grad_norm": 2.0302181243896484, + "learning_rate": 9.062293775757797e-05, + "loss": 4.9872, + "step": 230950 + }, + { + "epoch": 0.9943139018857529, + "grad_norm": 2.2259743213653564, + "learning_rate": 9.061899201398973e-05, + "loss": 4.4555, + "step": 231000 + }, + { + "epoch": 0.9943139018857529, + "eval_loss": 5.162271022796631, + "eval_runtime": 34.9114, + "eval_samples_per_second": 18.332, + "eval_steps_per_second": 9.166, + "eval_tts_loss": 6.849525757090375, + "step": 231000 + }, + { + "epoch": 0.9945291213450356, + "grad_norm": 1.8892889022827148, + "learning_rate": 9.061504552635509e-05, + "loss": 4.6516, + "step": 231050 + }, + { + "epoch": 0.9947443408043182, + "grad_norm": 2.1203012466430664, + "learning_rate": 9.061109829474634e-05, + "loss": 4.597, + "step": 231100 + }, + { + "epoch": 0.9949595602636008, + "grad_norm": 1.5606541633605957, + "learning_rate": 9.060715031923577e-05, + "loss": 4.0063, + "step": 231150 + }, + { + "epoch": 0.9951747797228834, + "grad_norm": 2.2398629188537598, + "learning_rate": 9.060320159989573e-05, + "loss": 4.9999, + "step": 231200 + }, + { + "epoch": 0.9953899991821661, + "grad_norm": 2.4329850673675537, + "learning_rate": 9.059925213679852e-05, + "loss": 4.6992, + "step": 231250 + }, + { + "epoch": 0.9956052186414487, + "grad_norm": 3.266641855239868, + "learning_rate": 9.059530193001648e-05, + "loss": 5.0492, + "step": 231300 + }, + { + "epoch": 0.9958204381007313, + "grad_norm": 0.6986448764801025, + "learning_rate": 9.059135097962201e-05, + "loss": 4.1625, + "step": 231350 + }, + { + "epoch": 0.996035657560014, + "grad_norm": 3.4229907989501953, + "learning_rate": 9.058739928568745e-05, + "loss": 4.4577, + "step": 231400 + }, + { + "epoch": 0.9962508770192966, + "grad_norm": 2.9923717975616455, + "learning_rate": 9.058344684828519e-05, + "loss": 4.5881, + "step": 231450 + }, + { + "epoch": 0.9964660964785792, + "grad_norm": 2.3239693641662598, + "learning_rate": 9.057949366748764e-05, + "loss": 4.247, + "step": 231500 + }, + { + "epoch": 0.9966813159378618, + "grad_norm": 3.583651304244995, + "learning_rate": 9.057553974336721e-05, + "loss": 4.1836, + "step": 231550 + }, + { + "epoch": 0.9968965353971445, + "grad_norm": 2.670489549636841, + "learning_rate": 9.057158507599633e-05, + "loss": 4.7296, + "step": 231600 + }, + { + "epoch": 0.9971117548564271, + "grad_norm": 1.8908741474151611, + "learning_rate": 9.05676296654474e-05, + "loss": 5.0711, + "step": 231650 + }, + { + "epoch": 0.9973269743157097, + "grad_norm": 1.4753364324569702, + "learning_rate": 9.056367351179295e-05, + "loss": 4.9982, + "step": 231700 + }, + { + "epoch": 0.9975421937749923, + "grad_norm": 2.279266119003296, + "learning_rate": 9.055971661510539e-05, + "loss": 4.9725, + "step": 231750 + }, + { + "epoch": 0.997757413234275, + "grad_norm": 0.6147887110710144, + "learning_rate": 9.05557589754572e-05, + "loss": 4.2816, + "step": 231800 + }, + { + "epoch": 0.9979726326935576, + "grad_norm": 3.2353999614715576, + "learning_rate": 9.05518005929209e-05, + "loss": 4.7478, + "step": 231850 + }, + { + "epoch": 0.9981878521528402, + "grad_norm": 2.842933416366577, + "learning_rate": 9.0547841467569e-05, + "loss": 4.4267, + "step": 231900 + }, + { + "epoch": 0.9984030716121228, + "grad_norm": 0.4495835602283478, + "learning_rate": 9.054388159947401e-05, + "loss": 5.0238, + "step": 231950 + }, + { + "epoch": 0.9986182910714055, + "grad_norm": 1.3843072652816772, + "learning_rate": 9.053992098870847e-05, + "loss": 4.8196, + "step": 232000 + }, + { + "epoch": 0.9988335105306881, + "grad_norm": 2.0857644081115723, + "learning_rate": 9.053595963534492e-05, + "loss": 4.941, + "step": 232050 + }, + { + "epoch": 0.9990487299899707, + "grad_norm": 2.570173501968384, + "learning_rate": 9.053199753945594e-05, + "loss": 4.7239, + "step": 232100 + }, + { + "epoch": 0.9992639494492535, + "grad_norm": 2.4565718173980713, + "learning_rate": 9.052803470111408e-05, + "loss": 5.0219, + "step": 232150 + }, + { + "epoch": 0.999479168908536, + "grad_norm": 2.536165237426758, + "learning_rate": 9.052407112039197e-05, + "loss": 4.7663, + "step": 232200 + }, + { + "epoch": 0.9996943883678187, + "grad_norm": 2.3159589767456055, + "learning_rate": 9.052010679736215e-05, + "loss": 4.5478, + "step": 232250 + }, + { + "epoch": 0.9999096078271013, + "grad_norm": 0.7568163871765137, + "learning_rate": 9.051614173209732e-05, + "loss": 4.766, + "step": 232300 + }, + { + "epoch": 1.0001248272863839, + "grad_norm": 2.594637393951416, + "learning_rate": 9.051217592467004e-05, + "loss": 4.2466, + "step": 232350 + }, + { + "epoch": 1.0003400467456665, + "grad_norm": 3.2307968139648438, + "learning_rate": 9.0508209375153e-05, + "loss": 4.9909, + "step": 232400 + }, + { + "epoch": 1.0005552662049493, + "grad_norm": 2.6119699478149414, + "learning_rate": 9.050424208361884e-05, + "loss": 4.6382, + "step": 232450 + }, + { + "epoch": 1.0007704856642319, + "grad_norm": 3.5268867015838623, + "learning_rate": 9.050027405014024e-05, + "loss": 4.9399, + "step": 232500 + }, + { + "epoch": 1.0009857051235145, + "grad_norm": 3.5582351684570312, + "learning_rate": 9.049630527478984e-05, + "loss": 4.6349, + "step": 232550 + }, + { + "epoch": 1.001200924582797, + "grad_norm": 2.2885935306549072, + "learning_rate": 9.049233575764043e-05, + "loss": 4.6821, + "step": 232600 + }, + { + "epoch": 1.0014161440420797, + "grad_norm": 3.182643413543701, + "learning_rate": 9.048836549876463e-05, + "loss": 4.9279, + "step": 232650 + }, + { + "epoch": 1.0016313635013623, + "grad_norm": 1.8422214984893799, + "learning_rate": 9.048439449823521e-05, + "loss": 4.9565, + "step": 232700 + }, + { + "epoch": 1.0018465829606449, + "grad_norm": 3.295146942138672, + "learning_rate": 9.048042275612492e-05, + "loss": 4.7002, + "step": 232750 + }, + { + "epoch": 1.0020618024199277, + "grad_norm": 2.9505951404571533, + "learning_rate": 9.047645027250649e-05, + "loss": 4.6416, + "step": 232800 + }, + { + "epoch": 1.0022770218792103, + "grad_norm": 1.9277595281600952, + "learning_rate": 9.047247704745269e-05, + "loss": 4.7517, + "step": 232850 + }, + { + "epoch": 1.002492241338493, + "grad_norm": 0.4439762234687805, + "learning_rate": 9.04685030810363e-05, + "loss": 4.8477, + "step": 232900 + }, + { + "epoch": 1.0027074607977755, + "grad_norm": 1.3215638399124146, + "learning_rate": 9.046452837333012e-05, + "loss": 4.5753, + "step": 232950 + }, + { + "epoch": 1.002922680257058, + "grad_norm": 2.413356304168701, + "learning_rate": 9.046055292440697e-05, + "loss": 4.7316, + "step": 233000 + }, + { + "epoch": 1.0031378997163407, + "grad_norm": 2.3087103366851807, + "learning_rate": 9.045657673433965e-05, + "loss": 4.4082, + "step": 233050 + }, + { + "epoch": 1.0033531191756233, + "grad_norm": 2.877204656600952, + "learning_rate": 9.0452599803201e-05, + "loss": 5.0039, + "step": 233100 + }, + { + "epoch": 1.003568338634906, + "grad_norm": 0.7532005906105042, + "learning_rate": 9.044862213106388e-05, + "loss": 4.9219, + "step": 233150 + }, + { + "epoch": 1.0037835580941887, + "grad_norm": 2.162299633026123, + "learning_rate": 9.044464371800115e-05, + "loss": 4.5803, + "step": 233200 + }, + { + "epoch": 1.0039987775534713, + "grad_norm": 3.042858600616455, + "learning_rate": 9.044066456408564e-05, + "loss": 4.917, + "step": 233250 + }, + { + "epoch": 1.004213997012754, + "grad_norm": 2.033209800720215, + "learning_rate": 9.04366846693903e-05, + "loss": 4.7754, + "step": 233300 + }, + { + "epoch": 1.0044292164720365, + "grad_norm": 0.7181519269943237, + "learning_rate": 9.043270403398801e-05, + "loss": 4.5765, + "step": 233350 + }, + { + "epoch": 1.0046444359313191, + "grad_norm": 2.315840244293213, + "learning_rate": 9.042872265795168e-05, + "loss": 4.5593, + "step": 233400 + }, + { + "epoch": 1.0048596553906017, + "grad_norm": 4.324007034301758, + "learning_rate": 9.042474054135426e-05, + "loss": 4.3299, + "step": 233450 + }, + { + "epoch": 1.0050748748498843, + "grad_norm": 2.5763468742370605, + "learning_rate": 9.042075768426866e-05, + "loss": 4.8319, + "step": 233500 + }, + { + "epoch": 1.0052900943091672, + "grad_norm": 3.045650005340576, + "learning_rate": 9.041677408676785e-05, + "loss": 4.8086, + "step": 233550 + }, + { + "epoch": 1.0055053137684498, + "grad_norm": 3.5241520404815674, + "learning_rate": 9.041278974892481e-05, + "loss": 4.85, + "step": 233600 + }, + { + "epoch": 1.0057205332277324, + "grad_norm": 0.9629567861557007, + "learning_rate": 9.040880467081253e-05, + "loss": 4.1168, + "step": 233650 + }, + { + "epoch": 1.005935752687015, + "grad_norm": 3.2948944568634033, + "learning_rate": 9.040481885250398e-05, + "loss": 4.5311, + "step": 233700 + }, + { + "epoch": 1.0061509721462976, + "grad_norm": 1.219082236289978, + "learning_rate": 9.040083229407222e-05, + "loss": 4.8337, + "step": 233750 + }, + { + "epoch": 1.0063661916055802, + "grad_norm": 2.525630235671997, + "learning_rate": 9.03968449955902e-05, + "loss": 4.6407, + "step": 233800 + }, + { + "epoch": 1.0065814110648628, + "grad_norm": 4.209072113037109, + "learning_rate": 9.039285695713103e-05, + "loss": 4.5113, + "step": 233850 + }, + { + "epoch": 1.0067966305241454, + "grad_norm": 3.8882765769958496, + "learning_rate": 9.03888681787677e-05, + "loss": 4.5306, + "step": 233900 + }, + { + "epoch": 1.0070118499834282, + "grad_norm": 2.9549458026885986, + "learning_rate": 9.038487866057333e-05, + "loss": 4.5329, + "step": 233950 + }, + { + "epoch": 1.0072270694427108, + "grad_norm": 2.753596067428589, + "learning_rate": 9.038088840262097e-05, + "loss": 4.6606, + "step": 234000 + }, + { + "epoch": 1.0072270694427108, + "eval_loss": 5.16220760345459, + "eval_runtime": 35.1072, + "eval_samples_per_second": 18.23, + "eval_steps_per_second": 9.115, + "eval_tts_loss": 6.827894885961053, + "step": 234000 + }, + { + "epoch": 1.0074422889019934, + "grad_norm": 1.5649333000183105, + "learning_rate": 9.037689740498372e-05, + "loss": 4.5215, + "step": 234050 + }, + { + "epoch": 1.007657508361276, + "grad_norm": 3.4820613861083984, + "learning_rate": 9.037290566773469e-05, + "loss": 4.6271, + "step": 234100 + }, + { + "epoch": 1.0078727278205586, + "grad_norm": 3.4423961639404297, + "learning_rate": 9.036891319094699e-05, + "loss": 4.6074, + "step": 234150 + }, + { + "epoch": 1.0080879472798412, + "grad_norm": 2.2342073917388916, + "learning_rate": 9.036491997469375e-05, + "loss": 4.564, + "step": 234200 + }, + { + "epoch": 1.0083031667391238, + "grad_norm": 2.0029468536376953, + "learning_rate": 9.036092601904813e-05, + "loss": 4.6237, + "step": 234250 + }, + { + "epoch": 1.0085183861984066, + "grad_norm": 3.597838878631592, + "learning_rate": 9.035693132408327e-05, + "loss": 4.6821, + "step": 234300 + }, + { + "epoch": 1.0087336056576892, + "grad_norm": 4.393778324127197, + "learning_rate": 9.035293588987237e-05, + "loss": 4.7352, + "step": 234350 + }, + { + "epoch": 1.0089488251169718, + "grad_norm": 1.2995833158493042, + "learning_rate": 9.03489397164886e-05, + "loss": 4.5658, + "step": 234400 + }, + { + "epoch": 1.0091640445762544, + "grad_norm": 4.732507705688477, + "learning_rate": 9.034494280400516e-05, + "loss": 4.6592, + "step": 234450 + }, + { + "epoch": 1.009379264035537, + "grad_norm": 0.4670126438140869, + "learning_rate": 9.034094515249527e-05, + "loss": 4.671, + "step": 234500 + }, + { + "epoch": 1.0095944834948196, + "grad_norm": 0.7142342925071716, + "learning_rate": 9.033694676203215e-05, + "loss": 4.3466, + "step": 234550 + }, + { + "epoch": 1.0098097029541022, + "grad_norm": 4.795847415924072, + "learning_rate": 9.033294763268906e-05, + "loss": 4.7222, + "step": 234600 + }, + { + "epoch": 1.0100249224133848, + "grad_norm": 1.9795774221420288, + "learning_rate": 9.032894776453925e-05, + "loss": 5.0282, + "step": 234650 + }, + { + "epoch": 1.0102401418726676, + "grad_norm": 1.2676187753677368, + "learning_rate": 9.032494715765596e-05, + "loss": 4.3659, + "step": 234700 + }, + { + "epoch": 1.0104553613319502, + "grad_norm": 2.360536813735962, + "learning_rate": 9.03209458121125e-05, + "loss": 4.8392, + "step": 234750 + }, + { + "epoch": 1.0106705807912328, + "grad_norm": 1.3573065996170044, + "learning_rate": 9.031694372798216e-05, + "loss": 4.5696, + "step": 234800 + }, + { + "epoch": 1.0108858002505154, + "grad_norm": 2.0183730125427246, + "learning_rate": 9.031294090533823e-05, + "loss": 4.5889, + "step": 234850 + }, + { + "epoch": 1.011101019709798, + "grad_norm": 2.2277979850769043, + "learning_rate": 9.030893734425407e-05, + "loss": 4.6382, + "step": 234900 + }, + { + "epoch": 1.0113162391690806, + "grad_norm": 4.431862831115723, + "learning_rate": 9.030493304480301e-05, + "loss": 4.3047, + "step": 234950 + }, + { + "epoch": 1.0115314586283632, + "grad_norm": 3.0739383697509766, + "learning_rate": 9.030092800705836e-05, + "loss": 4.9202, + "step": 235000 + }, + { + "epoch": 1.011746678087646, + "grad_norm": 2.9817025661468506, + "learning_rate": 9.029692223109351e-05, + "loss": 5.1487, + "step": 235050 + }, + { + "epoch": 1.0119618975469287, + "grad_norm": 2.2699756622314453, + "learning_rate": 9.029291571698184e-05, + "loss": 4.7142, + "step": 235100 + }, + { + "epoch": 1.0121771170062113, + "grad_norm": 1.1935571432113647, + "learning_rate": 9.028890846479674e-05, + "loss": 4.6726, + "step": 235150 + }, + { + "epoch": 1.0123923364654939, + "grad_norm": 3.621654510498047, + "learning_rate": 9.02849004746116e-05, + "loss": 4.4261, + "step": 235200 + }, + { + "epoch": 1.0126075559247765, + "grad_norm": 3.0840141773223877, + "learning_rate": 9.028089174649985e-05, + "loss": 4.5816, + "step": 235250 + }, + { + "epoch": 1.012822775384059, + "grad_norm": 2.3206470012664795, + "learning_rate": 9.027688228053491e-05, + "loss": 5.1506, + "step": 235300 + }, + { + "epoch": 1.0130379948433417, + "grad_norm": 1.3414890766143799, + "learning_rate": 9.027287207679024e-05, + "loss": 4.5068, + "step": 235350 + }, + { + "epoch": 1.0132532143026245, + "grad_norm": 2.2505602836608887, + "learning_rate": 9.026886113533928e-05, + "loss": 4.3438, + "step": 235400 + }, + { + "epoch": 1.013468433761907, + "grad_norm": 1.3292887210845947, + "learning_rate": 9.026484945625551e-05, + "loss": 4.2091, + "step": 235450 + }, + { + "epoch": 1.0136836532211897, + "grad_norm": 2.0047035217285156, + "learning_rate": 9.026083703961243e-05, + "loss": 4.5468, + "step": 235500 + }, + { + "epoch": 1.0138988726804723, + "grad_norm": 3.4392192363739014, + "learning_rate": 9.025682388548352e-05, + "loss": 4.6728, + "step": 235550 + }, + { + "epoch": 1.014114092139755, + "grad_norm": 0.7153475880622864, + "learning_rate": 9.025280999394227e-05, + "loss": 4.7457, + "step": 235600 + }, + { + "epoch": 1.0143293115990375, + "grad_norm": 3.5615897178649902, + "learning_rate": 9.024879536506224e-05, + "loss": 4.8726, + "step": 235650 + }, + { + "epoch": 1.01454453105832, + "grad_norm": 3.2722833156585693, + "learning_rate": 9.024477999891696e-05, + "loss": 4.8124, + "step": 235700 + }, + { + "epoch": 1.0147597505176027, + "grad_norm": 4.525266170501709, + "learning_rate": 9.024076389557999e-05, + "loss": 4.3933, + "step": 235750 + }, + { + "epoch": 1.0149749699768855, + "grad_norm": 0.5417091846466064, + "learning_rate": 9.023674705512488e-05, + "loss": 4.81, + "step": 235800 + }, + { + "epoch": 1.0151901894361681, + "grad_norm": 3.0682544708251953, + "learning_rate": 9.023272947762522e-05, + "loss": 4.0326, + "step": 235850 + }, + { + "epoch": 1.0154054088954507, + "grad_norm": 2.8630948066711426, + "learning_rate": 9.022871116315459e-05, + "loss": 4.4939, + "step": 235900 + }, + { + "epoch": 1.0156206283547333, + "grad_norm": 3.402219772338867, + "learning_rate": 9.022469211178662e-05, + "loss": 4.8633, + "step": 235950 + }, + { + "epoch": 1.015835847814016, + "grad_norm": 2.312837839126587, + "learning_rate": 9.02206723235949e-05, + "loss": 5.0847, + "step": 236000 + }, + { + "epoch": 1.0160510672732985, + "grad_norm": 2.4482274055480957, + "learning_rate": 9.021665179865306e-05, + "loss": 4.4612, + "step": 236050 + }, + { + "epoch": 1.0162662867325811, + "grad_norm": 2.8647117614746094, + "learning_rate": 9.02126305370348e-05, + "loss": 4.9203, + "step": 236100 + }, + { + "epoch": 1.016481506191864, + "grad_norm": 3.722533702850342, + "learning_rate": 9.020860853881371e-05, + "loss": 4.767, + "step": 236150 + }, + { + "epoch": 1.0166967256511465, + "grad_norm": 4.734673976898193, + "learning_rate": 9.020458580406352e-05, + "loss": 4.7984, + "step": 236200 + }, + { + "epoch": 1.0169119451104291, + "grad_norm": 3.128620147705078, + "learning_rate": 9.02005623328579e-05, + "loss": 4.2773, + "step": 236250 + }, + { + "epoch": 1.0171271645697118, + "grad_norm": 0.8888149261474609, + "learning_rate": 9.019653812527054e-05, + "loss": 5.0531, + "step": 236300 + }, + { + "epoch": 1.0173423840289944, + "grad_norm": 4.543176174163818, + "learning_rate": 9.019251318137515e-05, + "loss": 4.4285, + "step": 236350 + }, + { + "epoch": 1.017557603488277, + "grad_norm": 2.437171459197998, + "learning_rate": 9.018848750124549e-05, + "loss": 4.4928, + "step": 236400 + }, + { + "epoch": 1.0177728229475596, + "grad_norm": 4.068355560302734, + "learning_rate": 9.018446108495527e-05, + "loss": 4.9667, + "step": 236450 + }, + { + "epoch": 1.0179880424068422, + "grad_norm": 3.520293951034546, + "learning_rate": 9.018043393257825e-05, + "loss": 4.7528, + "step": 236500 + }, + { + "epoch": 1.018203261866125, + "grad_norm": 2.186736822128296, + "learning_rate": 9.01764060441882e-05, + "loss": 4.5238, + "step": 236550 + }, + { + "epoch": 1.0184184813254076, + "grad_norm": 2.6520674228668213, + "learning_rate": 9.01723774198589e-05, + "loss": 4.8144, + "step": 236600 + }, + { + "epoch": 1.0186337007846902, + "grad_norm": 2.4630093574523926, + "learning_rate": 9.016834805966416e-05, + "loss": 5.2552, + "step": 236650 + }, + { + "epoch": 1.0188489202439728, + "grad_norm": 1.045465111732483, + "learning_rate": 9.016431796367777e-05, + "loss": 4.7411, + "step": 236700 + }, + { + "epoch": 1.0190641397032554, + "grad_norm": 1.983132004737854, + "learning_rate": 9.016028713197357e-05, + "loss": 4.8228, + "step": 236750 + }, + { + "epoch": 1.019279359162538, + "grad_norm": 2.6645450592041016, + "learning_rate": 9.015625556462537e-05, + "loss": 4.9638, + "step": 236800 + }, + { + "epoch": 1.0194945786218206, + "grad_norm": 2.2907841205596924, + "learning_rate": 9.015222326170704e-05, + "loss": 4.898, + "step": 236850 + }, + { + "epoch": 1.0197097980811034, + "grad_norm": 1.0473179817199707, + "learning_rate": 9.014819022329244e-05, + "loss": 4.4895, + "step": 236900 + }, + { + "epoch": 1.019925017540386, + "grad_norm": 1.5376272201538086, + "learning_rate": 9.014415644945544e-05, + "loss": 4.3525, + "step": 236950 + }, + { + "epoch": 1.0201402369996686, + "grad_norm": 1.149485468864441, + "learning_rate": 9.014012194026993e-05, + "loss": 4.7711, + "step": 237000 + }, + { + "epoch": 1.0201402369996686, + "eval_loss": 5.139830112457275, + "eval_runtime": 35.1829, + "eval_samples_per_second": 18.191, + "eval_steps_per_second": 9.095, + "eval_tts_loss": 6.851900326398617, + "step": 237000 + }, + { + "epoch": 1.0203554564589512, + "grad_norm": 0.7251529097557068, + "learning_rate": 9.013608669580984e-05, + "loss": 4.9656, + "step": 237050 + }, + { + "epoch": 1.0205706759182338, + "grad_norm": 4.321776866912842, + "learning_rate": 9.013205071614902e-05, + "loss": 4.3374, + "step": 237100 + }, + { + "epoch": 1.0207858953775164, + "grad_norm": 2.0495550632476807, + "learning_rate": 9.012801400136147e-05, + "loss": 4.2936, + "step": 237150 + }, + { + "epoch": 1.021001114836799, + "grad_norm": 1.2381640672683716, + "learning_rate": 9.01239765515211e-05, + "loss": 4.5095, + "step": 237200 + }, + { + "epoch": 1.0212163342960816, + "grad_norm": 3.827983856201172, + "learning_rate": 9.011993836670187e-05, + "loss": 4.542, + "step": 237250 + }, + { + "epoch": 1.0214315537553644, + "grad_norm": 2.5031111240386963, + "learning_rate": 9.011589944697774e-05, + "loss": 5.0954, + "step": 237300 + }, + { + "epoch": 1.021646773214647, + "grad_norm": 2.194110631942749, + "learning_rate": 9.011185979242273e-05, + "loss": 4.9384, + "step": 237350 + }, + { + "epoch": 1.0218619926739296, + "grad_norm": 3.1436851024627686, + "learning_rate": 9.010781940311079e-05, + "loss": 4.6519, + "step": 237400 + }, + { + "epoch": 1.0220772121332122, + "grad_norm": 1.0713293552398682, + "learning_rate": 9.010377827911596e-05, + "loss": 4.7291, + "step": 237450 + }, + { + "epoch": 1.0222924315924948, + "grad_norm": 1.5934319496154785, + "learning_rate": 9.009973642051226e-05, + "loss": 5.0104, + "step": 237500 + }, + { + "epoch": 1.0225076510517774, + "grad_norm": 4.443734169006348, + "learning_rate": 9.009569382737373e-05, + "loss": 4.2316, + "step": 237550 + }, + { + "epoch": 1.02272287051106, + "grad_norm": 4.106814384460449, + "learning_rate": 9.009165049977441e-05, + "loss": 4.4436, + "step": 237600 + }, + { + "epoch": 1.0229380899703429, + "grad_norm": 2.02470064163208, + "learning_rate": 9.008760643778838e-05, + "loss": 4.5135, + "step": 237650 + }, + { + "epoch": 1.0231533094296255, + "grad_norm": 2.1204426288604736, + "learning_rate": 9.008356164148969e-05, + "loss": 4.5066, + "step": 237700 + }, + { + "epoch": 1.023368528888908, + "grad_norm": 3.6599466800689697, + "learning_rate": 9.007951611095245e-05, + "loss": 4.7428, + "step": 237750 + }, + { + "epoch": 1.0235837483481907, + "grad_norm": 3.4134132862091064, + "learning_rate": 9.007546984625077e-05, + "loss": 4.3684, + "step": 237800 + }, + { + "epoch": 1.0237989678074733, + "grad_norm": 2.707606792449951, + "learning_rate": 9.007142284745877e-05, + "loss": 4.6779, + "step": 237850 + }, + { + "epoch": 1.0240141872667559, + "grad_norm": 5.335859775543213, + "learning_rate": 9.006737511465058e-05, + "loss": 4.6422, + "step": 237900 + }, + { + "epoch": 1.0242294067260385, + "grad_norm": 2.433051824569702, + "learning_rate": 9.006332664790032e-05, + "loss": 4.8518, + "step": 237950 + }, + { + "epoch": 1.0244446261853213, + "grad_norm": 2.2024600505828857, + "learning_rate": 9.005927744728218e-05, + "loss": 4.1067, + "step": 238000 + }, + { + "epoch": 1.0246598456446039, + "grad_norm": 2.277918577194214, + "learning_rate": 9.005522751287031e-05, + "loss": 4.8698, + "step": 238050 + }, + { + "epoch": 1.0248750651038865, + "grad_norm": 0.7465620040893555, + "learning_rate": 9.005117684473892e-05, + "loss": 5.1124, + "step": 238100 + }, + { + "epoch": 1.025090284563169, + "grad_norm": 2.421989917755127, + "learning_rate": 9.004712544296217e-05, + "loss": 4.6855, + "step": 238150 + }, + { + "epoch": 1.0253055040224517, + "grad_norm": 2.3571581840515137, + "learning_rate": 9.004307330761432e-05, + "loss": 4.1124, + "step": 238200 + }, + { + "epoch": 1.0255207234817343, + "grad_norm": 2.5828120708465576, + "learning_rate": 9.003902043876957e-05, + "loss": 4.6163, + "step": 238250 + }, + { + "epoch": 1.0257359429410169, + "grad_norm": 3.575277805328369, + "learning_rate": 9.003496683650216e-05, + "loss": 4.3662, + "step": 238300 + }, + { + "epoch": 1.0259511624002995, + "grad_norm": 3.068408250808716, + "learning_rate": 9.003091250088634e-05, + "loss": 4.5865, + "step": 238350 + }, + { + "epoch": 1.0261663818595823, + "grad_norm": 1.2194466590881348, + "learning_rate": 9.002685743199638e-05, + "loss": 4.332, + "step": 238400 + }, + { + "epoch": 1.026381601318865, + "grad_norm": 1.8174201250076294, + "learning_rate": 9.002280162990657e-05, + "loss": 4.6974, + "step": 238450 + }, + { + "epoch": 1.0265968207781475, + "grad_norm": 3.190631866455078, + "learning_rate": 9.001874509469116e-05, + "loss": 4.8856, + "step": 238500 + }, + { + "epoch": 1.0268120402374301, + "grad_norm": 3.2850513458251953, + "learning_rate": 9.001468782642453e-05, + "loss": 4.862, + "step": 238550 + }, + { + "epoch": 1.0270272596967127, + "grad_norm": 4.083306789398193, + "learning_rate": 9.001062982518094e-05, + "loss": 4.3882, + "step": 238600 + }, + { + "epoch": 1.0272424791559953, + "grad_norm": 2.382922410964966, + "learning_rate": 9.000657109103475e-05, + "loss": 4.4016, + "step": 238650 + }, + { + "epoch": 1.027457698615278, + "grad_norm": 2.310898542404175, + "learning_rate": 9.000251162406031e-05, + "loss": 4.679, + "step": 238700 + }, + { + "epoch": 1.0276729180745607, + "grad_norm": 2.2722268104553223, + "learning_rate": 8.999845142433194e-05, + "loss": 4.235, + "step": 238750 + }, + { + "epoch": 1.0278881375338433, + "grad_norm": 4.040534496307373, + "learning_rate": 8.999439049192407e-05, + "loss": 5.1302, + "step": 238800 + }, + { + "epoch": 1.028103356993126, + "grad_norm": 1.307906985282898, + "learning_rate": 8.999032882691104e-05, + "loss": 4.3818, + "step": 238850 + }, + { + "epoch": 1.0283185764524085, + "grad_norm": 0.4477640688419342, + "learning_rate": 8.998626642936727e-05, + "loss": 4.5812, + "step": 238900 + }, + { + "epoch": 1.0285337959116911, + "grad_norm": 3.4138872623443604, + "learning_rate": 8.99822032993672e-05, + "loss": 4.8115, + "step": 238950 + }, + { + "epoch": 1.0287490153709737, + "grad_norm": 2.339745044708252, + "learning_rate": 8.99781394369852e-05, + "loss": 4.3726, + "step": 239000 + }, + { + "epoch": 1.0289642348302563, + "grad_norm": 2.9167847633361816, + "learning_rate": 8.997407484229576e-05, + "loss": 4.9302, + "step": 239050 + }, + { + "epoch": 1.029179454289539, + "grad_norm": 2.0793111324310303, + "learning_rate": 8.99700095153733e-05, + "loss": 4.355, + "step": 239100 + }, + { + "epoch": 1.0293946737488218, + "grad_norm": 1.4209437370300293, + "learning_rate": 8.996594345629232e-05, + "loss": 4.6229, + "step": 239150 + }, + { + "epoch": 1.0296098932081044, + "grad_norm": 1.9098069667816162, + "learning_rate": 8.996187666512728e-05, + "loss": 4.838, + "step": 239200 + }, + { + "epoch": 1.029825112667387, + "grad_norm": 3.24727201461792, + "learning_rate": 8.995780914195268e-05, + "loss": 4.8203, + "step": 239250 + }, + { + "epoch": 1.0300403321266696, + "grad_norm": 2.3268065452575684, + "learning_rate": 8.9953740886843e-05, + "loss": 4.4197, + "step": 239300 + }, + { + "epoch": 1.0302555515859522, + "grad_norm": 2.5630834102630615, + "learning_rate": 8.994967189987282e-05, + "loss": 4.2549, + "step": 239350 + }, + { + "epoch": 1.0304707710452348, + "grad_norm": 2.766475200653076, + "learning_rate": 8.994560218111662e-05, + "loss": 4.676, + "step": 239400 + }, + { + "epoch": 1.0306859905045174, + "grad_norm": 2.9348270893096924, + "learning_rate": 8.994153173064898e-05, + "loss": 5.0504, + "step": 239450 + }, + { + "epoch": 1.0309012099638002, + "grad_norm": 2.83009672164917, + "learning_rate": 8.993746054854447e-05, + "loss": 4.1978, + "step": 239500 + }, + { + "epoch": 1.0311164294230828, + "grad_norm": 3.635291337966919, + "learning_rate": 8.993338863487761e-05, + "loss": 4.8546, + "step": 239550 + }, + { + "epoch": 1.0313316488823654, + "grad_norm": 3.4864089488983154, + "learning_rate": 8.992931598972303e-05, + "loss": 4.5706, + "step": 239600 + }, + { + "epoch": 1.031546868341648, + "grad_norm": 3.109649419784546, + "learning_rate": 8.992524261315533e-05, + "loss": 4.6768, + "step": 239650 + }, + { + "epoch": 1.0317620878009306, + "grad_norm": 2.131072998046875, + "learning_rate": 8.992116850524914e-05, + "loss": 4.9037, + "step": 239700 + }, + { + "epoch": 1.0319773072602132, + "grad_norm": 3.1533544063568115, + "learning_rate": 8.991709366607903e-05, + "loss": 4.0345, + "step": 239750 + }, + { + "epoch": 1.0321925267194958, + "grad_norm": 1.9237171411514282, + "learning_rate": 8.99130180957197e-05, + "loss": 4.5216, + "step": 239800 + }, + { + "epoch": 1.0324077461787784, + "grad_norm": 6.300756931304932, + "learning_rate": 8.990894179424579e-05, + "loss": 4.5256, + "step": 239850 + }, + { + "epoch": 1.0326229656380612, + "grad_norm": 2.2235615253448486, + "learning_rate": 8.990486476173196e-05, + "loss": 4.8334, + "step": 239900 + }, + { + "epoch": 1.0328381850973438, + "grad_norm": 2.2902140617370605, + "learning_rate": 8.990078699825288e-05, + "loss": 4.761, + "step": 239950 + }, + { + "epoch": 1.0330534045566264, + "grad_norm": 4.105779647827148, + "learning_rate": 8.989670850388327e-05, + "loss": 4.8068, + "step": 240000 + }, + { + "epoch": 1.0330534045566264, + "eval_loss": 5.138946533203125, + "eval_runtime": 35.0113, + "eval_samples_per_second": 18.28, + "eval_steps_per_second": 9.14, + "eval_tts_loss": 6.83402160347209, + "step": 240000 + }, + { + "epoch": 1.033268624015909, + "grad_norm": 3.7013795375823975, + "learning_rate": 8.989262927869783e-05, + "loss": 4.0989, + "step": 240050 + }, + { + "epoch": 1.0334838434751916, + "grad_norm": 2.490708351135254, + "learning_rate": 8.988854932277129e-05, + "loss": 4.9587, + "step": 240100 + }, + { + "epoch": 1.0336990629344742, + "grad_norm": 2.575289011001587, + "learning_rate": 8.988446863617836e-05, + "loss": 4.7204, + "step": 240150 + }, + { + "epoch": 1.0339142823937568, + "grad_norm": 2.5553181171417236, + "learning_rate": 8.988038721899381e-05, + "loss": 4.7859, + "step": 240200 + }, + { + "epoch": 1.0341295018530396, + "grad_norm": 1.064810037612915, + "learning_rate": 8.987630507129241e-05, + "loss": 4.5833, + "step": 240250 + }, + { + "epoch": 1.0343447213123222, + "grad_norm": 2.573960065841675, + "learning_rate": 8.987222219314891e-05, + "loss": 4.5959, + "step": 240300 + }, + { + "epoch": 1.0345599407716048, + "grad_norm": 1.1885221004486084, + "learning_rate": 8.986813858463812e-05, + "loss": 4.0015, + "step": 240350 + }, + { + "epoch": 1.0347751602308874, + "grad_norm": 2.4826695919036865, + "learning_rate": 8.986405424583481e-05, + "loss": 4.6409, + "step": 240400 + }, + { + "epoch": 1.03499037969017, + "grad_norm": 2.7764179706573486, + "learning_rate": 8.985996917681386e-05, + "loss": 4.8145, + "step": 240450 + }, + { + "epoch": 1.0352055991494526, + "grad_norm": 0.9077030420303345, + "learning_rate": 8.985588337765003e-05, + "loss": 4.4155, + "step": 240500 + }, + { + "epoch": 1.0354208186087352, + "grad_norm": 2.700768232345581, + "learning_rate": 8.985179684841819e-05, + "loss": 4.1536, + "step": 240550 + }, + { + "epoch": 1.0356360380680179, + "grad_norm": 2.4181578159332275, + "learning_rate": 8.984770958919321e-05, + "loss": 4.6546, + "step": 240600 + }, + { + "epoch": 1.0358512575273007, + "grad_norm": 0.4241751730442047, + "learning_rate": 8.984362160004993e-05, + "loss": 4.4288, + "step": 240650 + }, + { + "epoch": 1.0360664769865833, + "grad_norm": 2.507143974304199, + "learning_rate": 8.983953288106326e-05, + "loss": 4.6195, + "step": 240700 + }, + { + "epoch": 1.0362816964458659, + "grad_norm": 3.2908217906951904, + "learning_rate": 8.983544343230808e-05, + "loss": 4.6509, + "step": 240750 + }, + { + "epoch": 1.0364969159051485, + "grad_norm": 1.5569360256195068, + "learning_rate": 8.983135325385933e-05, + "loss": 4.2482, + "step": 240800 + }, + { + "epoch": 1.036712135364431, + "grad_norm": 3.2446646690368652, + "learning_rate": 8.982726234579187e-05, + "loss": 5.0179, + "step": 240850 + }, + { + "epoch": 1.0369273548237137, + "grad_norm": 2.5106089115142822, + "learning_rate": 8.982317070818069e-05, + "loss": 4.6569, + "step": 240900 + }, + { + "epoch": 1.0371425742829963, + "grad_norm": 2.502789258956909, + "learning_rate": 8.98190783411007e-05, + "loss": 4.376, + "step": 240950 + }, + { + "epoch": 1.037357793742279, + "grad_norm": 2.325573444366455, + "learning_rate": 8.981498524462691e-05, + "loss": 4.9136, + "step": 241000 + }, + { + "epoch": 1.0375730132015617, + "grad_norm": 3.7546603679656982, + "learning_rate": 8.981089141883426e-05, + "loss": 5.1069, + "step": 241050 + }, + { + "epoch": 1.0377882326608443, + "grad_norm": 1.7755346298217773, + "learning_rate": 8.980679686379775e-05, + "loss": 4.2198, + "step": 241100 + }, + { + "epoch": 1.038003452120127, + "grad_norm": 1.3327142000198364, + "learning_rate": 8.980270157959239e-05, + "loss": 4.525, + "step": 241150 + }, + { + "epoch": 1.0382186715794095, + "grad_norm": 2.6060290336608887, + "learning_rate": 8.979860556629318e-05, + "loss": 4.1909, + "step": 241200 + }, + { + "epoch": 1.038433891038692, + "grad_norm": 1.6087427139282227, + "learning_rate": 8.979450882397517e-05, + "loss": 4.6821, + "step": 241250 + }, + { + "epoch": 1.0386491104979747, + "grad_norm": 1.8189913034439087, + "learning_rate": 8.979041135271337e-05, + "loss": 4.9703, + "step": 241300 + }, + { + "epoch": 1.0388643299572573, + "grad_norm": 1.9557288885116577, + "learning_rate": 8.97863131525829e-05, + "loss": 4.6867, + "step": 241350 + }, + { + "epoch": 1.0390795494165401, + "grad_norm": 0.5198611617088318, + "learning_rate": 8.978221422365875e-05, + "loss": 4.5546, + "step": 241400 + }, + { + "epoch": 1.0392947688758227, + "grad_norm": 3.7010045051574707, + "learning_rate": 8.977811456601605e-05, + "loss": 4.6046, + "step": 241450 + }, + { + "epoch": 1.0395099883351053, + "grad_norm": 2.596794366836548, + "learning_rate": 8.977401417972988e-05, + "loss": 4.6141, + "step": 241500 + }, + { + "epoch": 1.039725207794388, + "grad_norm": 1.4981143474578857, + "learning_rate": 8.976991306487536e-05, + "loss": 4.9499, + "step": 241550 + }, + { + "epoch": 1.0399404272536705, + "grad_norm": 3.9799106121063232, + "learning_rate": 8.976581122152763e-05, + "loss": 4.715, + "step": 241600 + }, + { + "epoch": 1.0401556467129531, + "grad_norm": 2.5304200649261475, + "learning_rate": 8.976170864976179e-05, + "loss": 4.6392, + "step": 241650 + }, + { + "epoch": 1.0403708661722357, + "grad_norm": 2.1400043964385986, + "learning_rate": 8.9757605349653e-05, + "loss": 4.2291, + "step": 241700 + }, + { + "epoch": 1.0405860856315186, + "grad_norm": 0.8942609429359436, + "learning_rate": 8.975350132127642e-05, + "loss": 4.7535, + "step": 241750 + }, + { + "epoch": 1.0408013050908012, + "grad_norm": 5.299892902374268, + "learning_rate": 8.974939656470726e-05, + "loss": 4.3649, + "step": 241800 + }, + { + "epoch": 1.0410165245500838, + "grad_norm": 4.286065578460693, + "learning_rate": 8.974529108002067e-05, + "loss": 4.3091, + "step": 241850 + }, + { + "epoch": 1.0412317440093664, + "grad_norm": 3.442019462585449, + "learning_rate": 8.974118486729186e-05, + "loss": 4.7124, + "step": 241900 + }, + { + "epoch": 1.041446963468649, + "grad_norm": 1.5312530994415283, + "learning_rate": 8.973707792659607e-05, + "loss": 4.7257, + "step": 241950 + }, + { + "epoch": 1.0416621829279316, + "grad_norm": 4.880786895751953, + "learning_rate": 8.973297025800851e-05, + "loss": 4.6681, + "step": 242000 + }, + { + "epoch": 1.0418774023872142, + "grad_norm": 2.004883050918579, + "learning_rate": 8.972886186160442e-05, + "loss": 4.3899, + "step": 242050 + }, + { + "epoch": 1.042092621846497, + "grad_norm": 3.1043570041656494, + "learning_rate": 8.972475273745906e-05, + "loss": 4.6509, + "step": 242100 + }, + { + "epoch": 1.0423078413057796, + "grad_norm": 3.5730574131011963, + "learning_rate": 8.97206428856477e-05, + "loss": 4.7596, + "step": 242150 + }, + { + "epoch": 1.0425230607650622, + "grad_norm": 0.6517900228500366, + "learning_rate": 8.971653230624564e-05, + "loss": 4.8143, + "step": 242200 + }, + { + "epoch": 1.0427382802243448, + "grad_norm": 1.210027813911438, + "learning_rate": 8.971242099932816e-05, + "loss": 4.5521, + "step": 242250 + }, + { + "epoch": 1.0429534996836274, + "grad_norm": 1.35159170627594, + "learning_rate": 8.970830896497055e-05, + "loss": 4.7413, + "step": 242300 + }, + { + "epoch": 1.04316871914291, + "grad_norm": 2.033064365386963, + "learning_rate": 8.970419620324817e-05, + "loss": 4.6353, + "step": 242350 + }, + { + "epoch": 1.0433839386021926, + "grad_norm": 0.8209533095359802, + "learning_rate": 8.970008271423635e-05, + "loss": 4.437, + "step": 242400 + }, + { + "epoch": 1.0435991580614752, + "grad_norm": 6.569743633270264, + "learning_rate": 8.969596849801041e-05, + "loss": 5.0919, + "step": 242450 + }, + { + "epoch": 1.043814377520758, + "grad_norm": 1.4877991676330566, + "learning_rate": 8.969185355464575e-05, + "loss": 4.7839, + "step": 242500 + }, + { + "epoch": 1.0440295969800406, + "grad_norm": 2.275784492492676, + "learning_rate": 8.968773788421774e-05, + "loss": 4.4646, + "step": 242550 + }, + { + "epoch": 1.0442448164393232, + "grad_norm": 3.2202723026275635, + "learning_rate": 8.968362148680173e-05, + "loss": 4.9125, + "step": 242600 + }, + { + "epoch": 1.0444600358986058, + "grad_norm": 1.9966390132904053, + "learning_rate": 8.967950436247317e-05, + "loss": 4.7634, + "step": 242650 + }, + { + "epoch": 1.0446752553578884, + "grad_norm": 2.613370418548584, + "learning_rate": 8.967538651130744e-05, + "loss": 4.7454, + "step": 242700 + }, + { + "epoch": 1.044890474817171, + "grad_norm": 2.5552074909210205, + "learning_rate": 8.967126793338002e-05, + "loss": 4.7731, + "step": 242750 + }, + { + "epoch": 1.0451056942764536, + "grad_norm": 3.3228044509887695, + "learning_rate": 8.966714862876631e-05, + "loss": 4.9076, + "step": 242800 + }, + { + "epoch": 1.0453209137357364, + "grad_norm": 3.096653461456299, + "learning_rate": 8.966302859754176e-05, + "loss": 4.7274, + "step": 242850 + }, + { + "epoch": 1.045536133195019, + "grad_norm": 2.165713310241699, + "learning_rate": 8.965890783978187e-05, + "loss": 4.864, + "step": 242900 + }, + { + "epoch": 1.0457513526543016, + "grad_norm": 2.4362664222717285, + "learning_rate": 8.96547863555621e-05, + "loss": 4.5675, + "step": 242950 + }, + { + "epoch": 1.0459665721135842, + "grad_norm": 0.9919951558113098, + "learning_rate": 8.965066414495798e-05, + "loss": 4.1544, + "step": 243000 + }, + { + "epoch": 1.0459665721135842, + "eval_loss": 5.12960958480835, + "eval_runtime": 35.1533, + "eval_samples_per_second": 18.206, + "eval_steps_per_second": 9.103, + "eval_tts_loss": 6.804610572258985, + "step": 243000 + }, + { + "epoch": 1.0461817915728668, + "grad_norm": 2.6091620922088623, + "learning_rate": 8.964654120804496e-05, + "loss": 4.3194, + "step": 243050 + }, + { + "epoch": 1.0463970110321494, + "grad_norm": 0.5670744776725769, + "learning_rate": 8.964241754489863e-05, + "loss": 4.9474, + "step": 243100 + }, + { + "epoch": 1.046612230491432, + "grad_norm": 2.5920605659484863, + "learning_rate": 8.963829315559447e-05, + "loss": 4.8153, + "step": 243150 + }, + { + "epoch": 1.0468274499507146, + "grad_norm": 2.723480224609375, + "learning_rate": 8.963416804020805e-05, + "loss": 4.6666, + "step": 243200 + }, + { + "epoch": 1.0470426694099975, + "grad_norm": 2.8573381900787354, + "learning_rate": 8.963004219881497e-05, + "loss": 4.6045, + "step": 243250 + }, + { + "epoch": 1.04725788886928, + "grad_norm": 2.781456708908081, + "learning_rate": 8.962591563149074e-05, + "loss": 4.7026, + "step": 243300 + }, + { + "epoch": 1.0474731083285627, + "grad_norm": 3.702700138092041, + "learning_rate": 8.962178833831099e-05, + "loss": 4.1244, + "step": 243350 + }, + { + "epoch": 1.0476883277878453, + "grad_norm": 0.699306845664978, + "learning_rate": 8.96176603193513e-05, + "loss": 4.5108, + "step": 243400 + }, + { + "epoch": 1.0479035472471279, + "grad_norm": 3.4854774475097656, + "learning_rate": 8.961353157468733e-05, + "loss": 4.2, + "step": 243450 + }, + { + "epoch": 1.0481187667064105, + "grad_norm": 3.2788619995117188, + "learning_rate": 8.960940210439465e-05, + "loss": 4.2123, + "step": 243500 + }, + { + "epoch": 1.048333986165693, + "grad_norm": 4.338934421539307, + "learning_rate": 8.960527190854893e-05, + "loss": 4.6212, + "step": 243550 + }, + { + "epoch": 1.0485492056249759, + "grad_norm": 3.9407196044921875, + "learning_rate": 8.960114098722585e-05, + "loss": 4.6354, + "step": 243600 + }, + { + "epoch": 1.0487644250842585, + "grad_norm": 2.414179563522339, + "learning_rate": 8.959700934050103e-05, + "loss": 4.7816, + "step": 243650 + }, + { + "epoch": 1.048979644543541, + "grad_norm": 2.0613017082214355, + "learning_rate": 8.959287696845019e-05, + "loss": 4.4186, + "step": 243700 + }, + { + "epoch": 1.0491948640028237, + "grad_norm": 2.107853889465332, + "learning_rate": 8.958874387114901e-05, + "loss": 4.4448, + "step": 243750 + }, + { + "epoch": 1.0494100834621063, + "grad_norm": 0.8134559392929077, + "learning_rate": 8.95846100486732e-05, + "loss": 4.7738, + "step": 243800 + }, + { + "epoch": 1.049625302921389, + "grad_norm": 2.751227855682373, + "learning_rate": 8.958047550109849e-05, + "loss": 4.6703, + "step": 243850 + }, + { + "epoch": 1.0498405223806715, + "grad_norm": 3.6851813793182373, + "learning_rate": 8.957634022850062e-05, + "loss": 4.7656, + "step": 243900 + }, + { + "epoch": 1.0500557418399543, + "grad_norm": 4.128626346588135, + "learning_rate": 8.95722042309553e-05, + "loss": 4.6942, + "step": 243950 + }, + { + "epoch": 1.050270961299237, + "grad_norm": 3.6943116188049316, + "learning_rate": 8.956806750853833e-05, + "loss": 4.6867, + "step": 244000 + }, + { + "epoch": 1.0504861807585195, + "grad_norm": 2.8777682781219482, + "learning_rate": 8.956393006132549e-05, + "loss": 4.572, + "step": 244050 + }, + { + "epoch": 1.0507014002178021, + "grad_norm": 4.033403396606445, + "learning_rate": 8.955979188939255e-05, + "loss": 4.2218, + "step": 244100 + }, + { + "epoch": 1.0509166196770847, + "grad_norm": 3.3528921604156494, + "learning_rate": 8.95556529928153e-05, + "loss": 4.3817, + "step": 244150 + }, + { + "epoch": 1.0511318391363673, + "grad_norm": 1.967759370803833, + "learning_rate": 8.955151337166958e-05, + "loss": 4.9259, + "step": 244200 + }, + { + "epoch": 1.05134705859565, + "grad_norm": 2.6564290523529053, + "learning_rate": 8.95473730260312e-05, + "loss": 4.3518, + "step": 244250 + }, + { + "epoch": 1.0515622780549325, + "grad_norm": 1.8243311643600464, + "learning_rate": 8.954323195597603e-05, + "loss": 4.5601, + "step": 244300 + }, + { + "epoch": 1.0517774975142153, + "grad_norm": 5.7112202644348145, + "learning_rate": 8.953909016157989e-05, + "loss": 5.0846, + "step": 244350 + }, + { + "epoch": 1.051992716973498, + "grad_norm": 3.7599456310272217, + "learning_rate": 8.953494764291866e-05, + "loss": 4.9799, + "step": 244400 + }, + { + "epoch": 1.0522079364327805, + "grad_norm": 1.7353967428207397, + "learning_rate": 8.953080440006822e-05, + "loss": 4.4827, + "step": 244450 + }, + { + "epoch": 1.0524231558920631, + "grad_norm": 1.9398655891418457, + "learning_rate": 8.95266604331045e-05, + "loss": 4.823, + "step": 244500 + }, + { + "epoch": 1.0526383753513457, + "grad_norm": 2.655510902404785, + "learning_rate": 8.952251574210334e-05, + "loss": 4.4705, + "step": 244550 + }, + { + "epoch": 1.0528535948106283, + "grad_norm": 1.3801134824752808, + "learning_rate": 8.951837032714072e-05, + "loss": 4.6585, + "step": 244600 + }, + { + "epoch": 1.053068814269911, + "grad_norm": 2.2310116291046143, + "learning_rate": 8.951422418829253e-05, + "loss": 4.8004, + "step": 244650 + }, + { + "epoch": 1.0532840337291938, + "grad_norm": 2.029315948486328, + "learning_rate": 8.951007732563476e-05, + "loss": 4.7348, + "step": 244700 + }, + { + "epoch": 1.0534992531884764, + "grad_norm": 2.226106643676758, + "learning_rate": 8.950592973924336e-05, + "loss": 5.0311, + "step": 244750 + }, + { + "epoch": 1.053714472647759, + "grad_norm": 2.966123580932617, + "learning_rate": 8.95017814291943e-05, + "loss": 4.5368, + "step": 244800 + }, + { + "epoch": 1.0539296921070416, + "grad_norm": 2.941349983215332, + "learning_rate": 8.949763239556354e-05, + "loss": 4.5425, + "step": 244850 + }, + { + "epoch": 1.0541449115663242, + "grad_norm": 2.1506569385528564, + "learning_rate": 8.949348263842711e-05, + "loss": 4.7366, + "step": 244900 + }, + { + "epoch": 1.0543601310256068, + "grad_norm": 2.16558575630188, + "learning_rate": 8.948933215786103e-05, + "loss": 4.4607, + "step": 244950 + }, + { + "epoch": 1.0545753504848894, + "grad_norm": 0.8995633721351624, + "learning_rate": 8.94851809539413e-05, + "loss": 5.1091, + "step": 245000 + }, + { + "epoch": 1.054790569944172, + "grad_norm": 4.235933780670166, + "learning_rate": 8.948102902674399e-05, + "loss": 4.332, + "step": 245050 + }, + { + "epoch": 1.0550057894034548, + "grad_norm": 2.373615264892578, + "learning_rate": 8.947687637634513e-05, + "loss": 4.3812, + "step": 245100 + }, + { + "epoch": 1.0552210088627374, + "grad_norm": 2.402993679046631, + "learning_rate": 8.947272300282081e-05, + "loss": 5.0572, + "step": 245150 + }, + { + "epoch": 1.05543622832202, + "grad_norm": 2.8497838973999023, + "learning_rate": 8.946856890624708e-05, + "loss": 4.7874, + "step": 245200 + }, + { + "epoch": 1.0556514477813026, + "grad_norm": 5.203251838684082, + "learning_rate": 8.946441408670005e-05, + "loss": 4.6523, + "step": 245250 + }, + { + "epoch": 1.0558666672405852, + "grad_norm": 2.9976110458374023, + "learning_rate": 8.946025854425584e-05, + "loss": 4.6602, + "step": 245300 + }, + { + "epoch": 1.0560818866998678, + "grad_norm": 2.795433282852173, + "learning_rate": 8.945610227899055e-05, + "loss": 4.4539, + "step": 245350 + }, + { + "epoch": 1.0562971061591504, + "grad_norm": 2.640166997909546, + "learning_rate": 8.945194529098032e-05, + "loss": 4.675, + "step": 245400 + }, + { + "epoch": 1.0565123256184332, + "grad_norm": 3.2293503284454346, + "learning_rate": 8.94477875803013e-05, + "loss": 5.0845, + "step": 245450 + }, + { + "epoch": 1.0567275450777158, + "grad_norm": 1.1640187501907349, + "learning_rate": 8.944362914702964e-05, + "loss": 4.5853, + "step": 245500 + }, + { + "epoch": 1.0569427645369984, + "grad_norm": 2.079631805419922, + "learning_rate": 8.943946999124153e-05, + "loss": 5.0061, + "step": 245550 + }, + { + "epoch": 1.057157983996281, + "grad_norm": 4.790759086608887, + "learning_rate": 8.943531011301314e-05, + "loss": 4.7917, + "step": 245600 + }, + { + "epoch": 1.0573732034555636, + "grad_norm": 2.562103033065796, + "learning_rate": 8.943114951242066e-05, + "loss": 5.1674, + "step": 245650 + }, + { + "epoch": 1.0575884229148462, + "grad_norm": 1.7930588722229004, + "learning_rate": 8.942698818954034e-05, + "loss": 4.9012, + "step": 245700 + }, + { + "epoch": 1.0578036423741288, + "grad_norm": 3.494962453842163, + "learning_rate": 8.942282614444838e-05, + "loss": 4.8531, + "step": 245750 + }, + { + "epoch": 1.0580188618334114, + "grad_norm": 2.3682031631469727, + "learning_rate": 8.941866337722102e-05, + "loss": 4.7811, + "step": 245800 + }, + { + "epoch": 1.0582340812926943, + "grad_norm": 0.615227997303009, + "learning_rate": 8.941449988793451e-05, + "loss": 4.4417, + "step": 245850 + }, + { + "epoch": 1.0584493007519769, + "grad_norm": 2.6337852478027344, + "learning_rate": 8.941033567666512e-05, + "loss": 4.3072, + "step": 245900 + }, + { + "epoch": 1.0586645202112595, + "grad_norm": 2.6446681022644043, + "learning_rate": 8.940617074348914e-05, + "loss": 4.1587, + "step": 245950 + }, + { + "epoch": 1.058879739670542, + "grad_norm": 2.7166149616241455, + "learning_rate": 8.940200508848284e-05, + "loss": 4.7731, + "step": 246000 + }, + { + "epoch": 1.058879739670542, + "eval_loss": 5.1370954513549805, + "eval_runtime": 35.0066, + "eval_samples_per_second": 18.282, + "eval_steps_per_second": 9.141, + "eval_tts_loss": 6.858599188667726, + "step": 246000 + }, + { + "epoch": 1.0590949591298247, + "grad_norm": 2.6827759742736816, + "learning_rate": 8.939783871172253e-05, + "loss": 4.8242, + "step": 246050 + }, + { + "epoch": 1.0593101785891073, + "grad_norm": 5.195135593414307, + "learning_rate": 8.939367161328455e-05, + "loss": 5.0516, + "step": 246100 + }, + { + "epoch": 1.0595253980483899, + "grad_norm": 2.8722782135009766, + "learning_rate": 8.938950379324519e-05, + "loss": 4.8699, + "step": 246150 + }, + { + "epoch": 1.0597406175076727, + "grad_norm": 1.9631459712982178, + "learning_rate": 8.938533525168085e-05, + "loss": 4.8478, + "step": 246200 + }, + { + "epoch": 1.0599558369669553, + "grad_norm": 2.3314907550811768, + "learning_rate": 8.938116598866786e-05, + "loss": 4.6663, + "step": 246250 + }, + { + "epoch": 1.0601710564262379, + "grad_norm": 2.56001877784729, + "learning_rate": 8.937699600428258e-05, + "loss": 4.902, + "step": 246300 + }, + { + "epoch": 1.0603862758855205, + "grad_norm": 2.287355899810791, + "learning_rate": 8.93728252986014e-05, + "loss": 4.8455, + "step": 246350 + }, + { + "epoch": 1.060601495344803, + "grad_norm": 2.329941987991333, + "learning_rate": 8.936865387170072e-05, + "loss": 4.7949, + "step": 246400 + }, + { + "epoch": 1.0608167148040857, + "grad_norm": 2.179692268371582, + "learning_rate": 8.936448172365697e-05, + "loss": 4.3567, + "step": 246450 + }, + { + "epoch": 1.0610319342633683, + "grad_norm": 2.289254903793335, + "learning_rate": 8.936030885454656e-05, + "loss": 4.8148, + "step": 246500 + }, + { + "epoch": 1.0612471537226509, + "grad_norm": 2.0638418197631836, + "learning_rate": 8.93561352644459e-05, + "loss": 4.5577, + "step": 246550 + }, + { + "epoch": 1.0614623731819337, + "grad_norm": 1.5693435668945312, + "learning_rate": 8.935196095343148e-05, + "loss": 4.1658, + "step": 246600 + }, + { + "epoch": 1.0616775926412163, + "grad_norm": 1.6512696743011475, + "learning_rate": 8.934778592157975e-05, + "loss": 4.6077, + "step": 246650 + }, + { + "epoch": 1.061892812100499, + "grad_norm": 2.3213586807250977, + "learning_rate": 8.934361016896719e-05, + "loss": 4.6454, + "step": 246700 + }, + { + "epoch": 1.0621080315597815, + "grad_norm": 1.9968138933181763, + "learning_rate": 8.933943369567028e-05, + "loss": 5.0595, + "step": 246750 + }, + { + "epoch": 1.062323251019064, + "grad_norm": 3.9939374923706055, + "learning_rate": 8.933525650176554e-05, + "loss": 4.7384, + "step": 246800 + }, + { + "epoch": 1.0625384704783467, + "grad_norm": 2.299830198287964, + "learning_rate": 8.933107858732948e-05, + "loss": 4.896, + "step": 246850 + }, + { + "epoch": 1.0627536899376293, + "grad_norm": 3.1180522441864014, + "learning_rate": 8.932689995243861e-05, + "loss": 4.9712, + "step": 246900 + }, + { + "epoch": 1.0629689093969121, + "grad_norm": 2.2229650020599365, + "learning_rate": 8.93227205971695e-05, + "loss": 4.6053, + "step": 246950 + }, + { + "epoch": 1.0631841288561947, + "grad_norm": 1.579463243484497, + "learning_rate": 8.93185405215987e-05, + "loss": 4.412, + "step": 247000 + }, + { + "epoch": 1.0633993483154773, + "grad_norm": 6.187130928039551, + "learning_rate": 8.931435972580275e-05, + "loss": 4.6175, + "step": 247050 + }, + { + "epoch": 1.06361456777476, + "grad_norm": 2.231096029281616, + "learning_rate": 8.931017820985827e-05, + "loss": 4.0459, + "step": 247100 + }, + { + "epoch": 1.0638297872340425, + "grad_norm": 0.9390613436698914, + "learning_rate": 8.930599597384187e-05, + "loss": 4.2669, + "step": 247150 + }, + { + "epoch": 1.0640450066933251, + "grad_norm": 1.1977558135986328, + "learning_rate": 8.930181301783009e-05, + "loss": 4.1287, + "step": 247200 + }, + { + "epoch": 1.0642602261526077, + "grad_norm": 2.6262919902801514, + "learning_rate": 8.929762934189962e-05, + "loss": 4.6995, + "step": 247250 + }, + { + "epoch": 1.0644754456118903, + "grad_norm": 2.8683462142944336, + "learning_rate": 8.929344494612706e-05, + "loss": 4.3729, + "step": 247300 + }, + { + "epoch": 1.0646906650711732, + "grad_norm": 3.149538040161133, + "learning_rate": 8.928925983058907e-05, + "loss": 4.5585, + "step": 247350 + }, + { + "epoch": 1.0649058845304558, + "grad_norm": 2.057633638381958, + "learning_rate": 8.928507399536231e-05, + "loss": 4.7693, + "step": 247400 + }, + { + "epoch": 1.0651211039897384, + "grad_norm": 2.5577526092529297, + "learning_rate": 8.928088744052345e-05, + "loss": 4.9304, + "step": 247450 + }, + { + "epoch": 1.065336323449021, + "grad_norm": 4.308826923370361, + "learning_rate": 8.92767001661492e-05, + "loss": 4.8611, + "step": 247500 + }, + { + "epoch": 1.0655515429083036, + "grad_norm": 1.522517204284668, + "learning_rate": 8.927251217231623e-05, + "loss": 4.7355, + "step": 247550 + }, + { + "epoch": 1.0657667623675862, + "grad_norm": 2.877683162689209, + "learning_rate": 8.926832345910126e-05, + "loss": 4.5196, + "step": 247600 + }, + { + "epoch": 1.0659819818268688, + "grad_norm": 1.503145456314087, + "learning_rate": 8.926413402658103e-05, + "loss": 4.4083, + "step": 247650 + }, + { + "epoch": 1.0661972012861516, + "grad_norm": 2.2802910804748535, + "learning_rate": 8.925994387483229e-05, + "loss": 4.4206, + "step": 247700 + }, + { + "epoch": 1.0664124207454342, + "grad_norm": 0.8796724081039429, + "learning_rate": 8.925575300393178e-05, + "loss": 4.2555, + "step": 247750 + }, + { + "epoch": 1.0666276402047168, + "grad_norm": 2.1020665168762207, + "learning_rate": 8.925156141395625e-05, + "loss": 4.4515, + "step": 247800 + }, + { + "epoch": 1.0668428596639994, + "grad_norm": 1.1456469297409058, + "learning_rate": 8.924736910498252e-05, + "loss": 4.5851, + "step": 247850 + }, + { + "epoch": 1.067058079123282, + "grad_norm": 3.168027877807617, + "learning_rate": 8.924317607708734e-05, + "loss": 4.753, + "step": 247900 + }, + { + "epoch": 1.0672732985825646, + "grad_norm": 0.8731731176376343, + "learning_rate": 8.923898233034756e-05, + "loss": 4.7452, + "step": 247950 + }, + { + "epoch": 1.0674885180418472, + "grad_norm": 2.059797763824463, + "learning_rate": 8.923478786483996e-05, + "loss": 4.755, + "step": 248000 + }, + { + "epoch": 1.0677037375011298, + "grad_norm": 4.861206531524658, + "learning_rate": 8.92305926806414e-05, + "loss": 4.985, + "step": 248050 + }, + { + "epoch": 1.0679189569604126, + "grad_norm": 4.287187099456787, + "learning_rate": 8.922639677782871e-05, + "loss": 4.5591, + "step": 248100 + }, + { + "epoch": 1.0681341764196952, + "grad_norm": 2.993001699447632, + "learning_rate": 8.922220015647875e-05, + "loss": 4.547, + "step": 248150 + }, + { + "epoch": 1.0683493958789778, + "grad_norm": 2.838280439376831, + "learning_rate": 8.92180028166684e-05, + "loss": 4.6427, + "step": 248200 + }, + { + "epoch": 1.0685646153382604, + "grad_norm": 2.1165599822998047, + "learning_rate": 8.921380475847456e-05, + "loss": 4.6486, + "step": 248250 + }, + { + "epoch": 1.068779834797543, + "grad_norm": 0.9204211831092834, + "learning_rate": 8.92096059819741e-05, + "loss": 4.1079, + "step": 248300 + }, + { + "epoch": 1.0689950542568256, + "grad_norm": 3.1241440773010254, + "learning_rate": 8.920540648724395e-05, + "loss": 4.7291, + "step": 248350 + }, + { + "epoch": 1.0692102737161082, + "grad_norm": 1.2918469905853271, + "learning_rate": 8.920120627436104e-05, + "loss": 4.2769, + "step": 248400 + }, + { + "epoch": 1.069425493175391, + "grad_norm": 3.2495882511138916, + "learning_rate": 8.919700534340228e-05, + "loss": 4.1213, + "step": 248450 + }, + { + "epoch": 1.0696407126346736, + "grad_norm": 0.6847423315048218, + "learning_rate": 8.919280369444465e-05, + "loss": 4.5809, + "step": 248500 + }, + { + "epoch": 1.0698559320939562, + "grad_norm": 2.0163192749023438, + "learning_rate": 8.918860132756511e-05, + "loss": 4.4258, + "step": 248550 + }, + { + "epoch": 1.0700711515532388, + "grad_norm": 3.256960868835449, + "learning_rate": 8.918439824284062e-05, + "loss": 4.8895, + "step": 248600 + }, + { + "epoch": 1.0702863710125214, + "grad_norm": 2.851992607116699, + "learning_rate": 8.918019444034818e-05, + "loss": 4.6239, + "step": 248650 + }, + { + "epoch": 1.070501590471804, + "grad_norm": 2.0654714107513428, + "learning_rate": 8.91759899201648e-05, + "loss": 5.1352, + "step": 248700 + }, + { + "epoch": 1.0707168099310866, + "grad_norm": 2.076650619506836, + "learning_rate": 8.91717846823675e-05, + "loss": 4.809, + "step": 248750 + }, + { + "epoch": 1.0709320293903692, + "grad_norm": 4.146795272827148, + "learning_rate": 8.916757872703329e-05, + "loss": 4.664, + "step": 248800 + }, + { + "epoch": 1.071147248849652, + "grad_norm": 3.183507204055786, + "learning_rate": 8.916337205423924e-05, + "loss": 4.4287, + "step": 248850 + }, + { + "epoch": 1.0713624683089347, + "grad_norm": 2.3412563800811768, + "learning_rate": 8.91591646640624e-05, + "loss": 4.6067, + "step": 248900 + }, + { + "epoch": 1.0715776877682173, + "grad_norm": 4.483916759490967, + "learning_rate": 8.915495655657982e-05, + "loss": 4.6185, + "step": 248950 + }, + { + "epoch": 1.0717929072274999, + "grad_norm": 2.3078067302703857, + "learning_rate": 8.91507477318686e-05, + "loss": 4.6201, + "step": 249000 + }, + { + "epoch": 1.0717929072274999, + "eval_loss": 5.1339111328125, + "eval_runtime": 35.106, + "eval_samples_per_second": 18.23, + "eval_steps_per_second": 9.115, + "eval_tts_loss": 6.880006599134726, + "step": 249000 + }, + { + "epoch": 1.0720081266867825, + "grad_norm": 2.068869113922119, + "learning_rate": 8.914653819000583e-05, + "loss": 4.865, + "step": 249050 + }, + { + "epoch": 1.072223346146065, + "grad_norm": 1.6491543054580688, + "learning_rate": 8.914232793106862e-05, + "loss": 4.5457, + "step": 249100 + }, + { + "epoch": 1.0724385656053477, + "grad_norm": 2.5536468029022217, + "learning_rate": 8.913811695513411e-05, + "loss": 4.9472, + "step": 249150 + }, + { + "epoch": 1.0726537850646305, + "grad_norm": 2.1397550106048584, + "learning_rate": 8.91339052622794e-05, + "loss": 4.8877, + "step": 249200 + }, + { + "epoch": 1.072869004523913, + "grad_norm": 4.773959636688232, + "learning_rate": 8.912969285258167e-05, + "loss": 4.7475, + "step": 249250 + }, + { + "epoch": 1.0730842239831957, + "grad_norm": 2.418576717376709, + "learning_rate": 8.912547972611807e-05, + "loss": 5.1932, + "step": 249300 + }, + { + "epoch": 1.0732994434424783, + "grad_norm": 5.086622714996338, + "learning_rate": 8.912126588296577e-05, + "loss": 4.403, + "step": 249350 + }, + { + "epoch": 1.073514662901761, + "grad_norm": 2.6017332077026367, + "learning_rate": 8.911705132320196e-05, + "loss": 4.9748, + "step": 249400 + }, + { + "epoch": 1.0737298823610435, + "grad_norm": 1.9791351556777954, + "learning_rate": 8.911283604690384e-05, + "loss": 4.8273, + "step": 249450 + }, + { + "epoch": 1.073945101820326, + "grad_norm": 5.355862140655518, + "learning_rate": 8.910862005414862e-05, + "loss": 5.032, + "step": 249500 + }, + { + "epoch": 1.074160321279609, + "grad_norm": 4.054447174072266, + "learning_rate": 8.910440334501355e-05, + "loss": 4.3555, + "step": 249550 + }, + { + "epoch": 1.0743755407388915, + "grad_norm": 6.191126346588135, + "learning_rate": 8.910018591957585e-05, + "loss": 4.9506, + "step": 249600 + }, + { + "epoch": 1.0745907601981741, + "grad_norm": 2.2558696269989014, + "learning_rate": 8.909596777791278e-05, + "loss": 4.0955, + "step": 249650 + }, + { + "epoch": 1.0748059796574567, + "grad_norm": 0.7516539692878723, + "learning_rate": 8.909174892010161e-05, + "loss": 4.6862, + "step": 249700 + }, + { + "epoch": 1.0750211991167393, + "grad_norm": 1.85518479347229, + "learning_rate": 8.90875293462196e-05, + "loss": 4.3636, + "step": 249750 + }, + { + "epoch": 1.075236418576022, + "grad_norm": 2.2150516510009766, + "learning_rate": 8.908330905634408e-05, + "loss": 4.9893, + "step": 249800 + }, + { + "epoch": 1.0754516380353045, + "grad_norm": 1.4117144346237183, + "learning_rate": 8.907908805055231e-05, + "loss": 4.6118, + "step": 249850 + }, + { + "epoch": 1.0756668574945873, + "grad_norm": 4.205339431762695, + "learning_rate": 8.907486632892164e-05, + "loss": 4.7231, + "step": 249900 + }, + { + "epoch": 1.07588207695387, + "grad_norm": 2.7894017696380615, + "learning_rate": 8.90706438915294e-05, + "loss": 4.7355, + "step": 249950 + }, + { + "epoch": 1.0760972964131525, + "grad_norm": 1.8730807304382324, + "learning_rate": 8.906642073845292e-05, + "loss": 4.7761, + "step": 250000 + }, + { + "epoch": 1.0763125158724351, + "grad_norm": 4.4357476234436035, + "learning_rate": 8.906219686976959e-05, + "loss": 4.4506, + "step": 250050 + }, + { + "epoch": 1.0765277353317178, + "grad_norm": 2.594094753265381, + "learning_rate": 8.905797228555675e-05, + "loss": 4.9445, + "step": 250100 + }, + { + "epoch": 1.0767429547910004, + "grad_norm": 2.856401205062866, + "learning_rate": 8.905374698589178e-05, + "loss": 5.2879, + "step": 250150 + }, + { + "epoch": 1.076958174250283, + "grad_norm": 2.8409652709960938, + "learning_rate": 8.90495209708521e-05, + "loss": 4.1338, + "step": 250200 + }, + { + "epoch": 1.0771733937095656, + "grad_norm": 2.7496933937072754, + "learning_rate": 8.904529424051513e-05, + "loss": 4.8403, + "step": 250250 + }, + { + "epoch": 1.0773886131688484, + "grad_norm": 2.914322853088379, + "learning_rate": 8.904106679495827e-05, + "loss": 4.2119, + "step": 250300 + }, + { + "epoch": 1.077603832628131, + "grad_norm": 2.546801805496216, + "learning_rate": 8.903683863425895e-05, + "loss": 4.9129, + "step": 250350 + }, + { + "epoch": 1.0778190520874136, + "grad_norm": 0.5961350202560425, + "learning_rate": 8.903260975849466e-05, + "loss": 4.4141, + "step": 250400 + }, + { + "epoch": 1.0780342715466962, + "grad_norm": 3.647237777709961, + "learning_rate": 8.902838016774282e-05, + "loss": 4.5692, + "step": 250450 + }, + { + "epoch": 1.0782494910059788, + "grad_norm": 2.1810221672058105, + "learning_rate": 8.902414986208092e-05, + "loss": 4.2916, + "step": 250500 + }, + { + "epoch": 1.0784647104652614, + "grad_norm": 3.20465350151062, + "learning_rate": 8.901991884158647e-05, + "loss": 4.544, + "step": 250550 + }, + { + "epoch": 1.078679929924544, + "grad_norm": 2.832581043243408, + "learning_rate": 8.901568710633694e-05, + "loss": 4.7527, + "step": 250600 + }, + { + "epoch": 1.0788951493838268, + "grad_norm": 0.9509122371673584, + "learning_rate": 8.901145465640987e-05, + "loss": 4.2083, + "step": 250650 + }, + { + "epoch": 1.0791103688431094, + "grad_norm": 2.3661270141601562, + "learning_rate": 8.900722149188279e-05, + "loss": 4.6434, + "step": 250700 + }, + { + "epoch": 1.079325588302392, + "grad_norm": 2.537494659423828, + "learning_rate": 8.900298761283322e-05, + "loss": 4.6034, + "step": 250750 + }, + { + "epoch": 1.0795408077616746, + "grad_norm": 2.9838647842407227, + "learning_rate": 8.899875301933873e-05, + "loss": 4.608, + "step": 250800 + }, + { + "epoch": 1.0797560272209572, + "grad_norm": 2.8191475868225098, + "learning_rate": 8.899451771147689e-05, + "loss": 4.457, + "step": 250850 + }, + { + "epoch": 1.0799712466802398, + "grad_norm": 2.827061891555786, + "learning_rate": 8.899028168932527e-05, + "loss": 4.6247, + "step": 250900 + }, + { + "epoch": 1.0801864661395224, + "grad_norm": 3.245079755783081, + "learning_rate": 8.898604495296148e-05, + "loss": 4.3271, + "step": 250950 + }, + { + "epoch": 1.080401685598805, + "grad_norm": 4.119300365447998, + "learning_rate": 8.89818075024631e-05, + "loss": 4.5732, + "step": 251000 + }, + { + "epoch": 1.0806169050580878, + "grad_norm": 1.0341823101043701, + "learning_rate": 8.897756933790779e-05, + "loss": 4.4489, + "step": 251050 + }, + { + "epoch": 1.0808321245173704, + "grad_norm": 1.9535648822784424, + "learning_rate": 8.897333045937315e-05, + "loss": 4.537, + "step": 251100 + }, + { + "epoch": 1.081047343976653, + "grad_norm": 2.153007984161377, + "learning_rate": 8.896909086693683e-05, + "loss": 4.5891, + "step": 251150 + }, + { + "epoch": 1.0812625634359356, + "grad_norm": 2.6594555377960205, + "learning_rate": 8.896485056067652e-05, + "loss": 4.6917, + "step": 251200 + }, + { + "epoch": 1.0814777828952182, + "grad_norm": 5.0851593017578125, + "learning_rate": 8.896060954066985e-05, + "loss": 4.8008, + "step": 251250 + }, + { + "epoch": 1.0816930023545008, + "grad_norm": 4.821244239807129, + "learning_rate": 8.895636780699454e-05, + "loss": 4.0757, + "step": 251300 + }, + { + "epoch": 1.0819082218137834, + "grad_norm": 2.888845682144165, + "learning_rate": 8.895212535972828e-05, + "loss": 4.7395, + "step": 251350 + }, + { + "epoch": 1.0821234412730663, + "grad_norm": 2.981084108352661, + "learning_rate": 8.894788219894877e-05, + "loss": 4.5867, + "step": 251400 + }, + { + "epoch": 1.0823386607323489, + "grad_norm": 2.51314377784729, + "learning_rate": 8.894363832473373e-05, + "loss": 4.6447, + "step": 251450 + }, + { + "epoch": 1.0825538801916315, + "grad_norm": 2.6093590259552, + "learning_rate": 8.893939373716091e-05, + "loss": 5.1381, + "step": 251500 + }, + { + "epoch": 1.082769099650914, + "grad_norm": 2.822742462158203, + "learning_rate": 8.893514843630807e-05, + "loss": 4.5999, + "step": 251550 + }, + { + "epoch": 1.0829843191101967, + "grad_norm": 2.9973127841949463, + "learning_rate": 8.893090242225298e-05, + "loss": 4.7114, + "step": 251600 + }, + { + "epoch": 1.0831995385694793, + "grad_norm": 4.656407356262207, + "learning_rate": 8.892665569507339e-05, + "loss": 4.9753, + "step": 251650 + }, + { + "epoch": 1.0834147580287619, + "grad_norm": 1.0731019973754883, + "learning_rate": 8.89224082548471e-05, + "loss": 4.5149, + "step": 251700 + }, + { + "epoch": 1.0836299774880445, + "grad_norm": 1.9322093725204468, + "learning_rate": 8.891816010165191e-05, + "loss": 4.7852, + "step": 251750 + }, + { + "epoch": 1.0838451969473273, + "grad_norm": 3.542860984802246, + "learning_rate": 8.891391123556565e-05, + "loss": 5.2326, + "step": 251800 + }, + { + "epoch": 1.0840604164066099, + "grad_norm": 3.224438190460205, + "learning_rate": 8.890966165666614e-05, + "loss": 4.5253, + "step": 251850 + }, + { + "epoch": 1.0842756358658925, + "grad_norm": 5.818978786468506, + "learning_rate": 8.890541136503123e-05, + "loss": 4.7801, + "step": 251900 + }, + { + "epoch": 1.084490855325175, + "grad_norm": 2.1159887313842773, + "learning_rate": 8.890116036073876e-05, + "loss": 4.7204, + "step": 251950 + }, + { + "epoch": 1.0847060747844577, + "grad_norm": 2.6705799102783203, + "learning_rate": 8.889690864386662e-05, + "loss": 4.6908, + "step": 252000 + }, + { + "epoch": 1.0847060747844577, + "eval_loss": 5.1385321617126465, + "eval_runtime": 34.8526, + "eval_samples_per_second": 18.363, + "eval_steps_per_second": 9.182, + "eval_tts_loss": 6.847465206421501, + "step": 252000 + }, + { + "epoch": 1.0849212942437403, + "grad_norm": 0.6352892518043518, + "learning_rate": 8.889265621449266e-05, + "loss": 4.8175, + "step": 252050 + }, + { + "epoch": 1.0851365137030229, + "grad_norm": 1.3989181518554688, + "learning_rate": 8.888840307269482e-05, + "loss": 4.8964, + "step": 252100 + }, + { + "epoch": 1.0853517331623057, + "grad_norm": 2.2682480812072754, + "learning_rate": 8.888414921855095e-05, + "loss": 4.4245, + "step": 252150 + }, + { + "epoch": 1.0855669526215883, + "grad_norm": 1.9559040069580078, + "learning_rate": 8.887989465213903e-05, + "loss": 4.2493, + "step": 252200 + }, + { + "epoch": 1.085782172080871, + "grad_norm": 2.921437978744507, + "learning_rate": 8.887563937353696e-05, + "loss": 4.5167, + "step": 252250 + }, + { + "epoch": 1.0859973915401535, + "grad_norm": 4.468379497528076, + "learning_rate": 8.887138338282269e-05, + "loss": 5.0313, + "step": 252300 + }, + { + "epoch": 1.0862126109994361, + "grad_norm": 2.606023073196411, + "learning_rate": 8.88671266800742e-05, + "loss": 4.4828, + "step": 252350 + }, + { + "epoch": 1.0864278304587187, + "grad_norm": 2.6712584495544434, + "learning_rate": 8.886286926536943e-05, + "loss": 4.9697, + "step": 252400 + }, + { + "epoch": 1.0866430499180013, + "grad_norm": 2.3453710079193115, + "learning_rate": 8.885861113878638e-05, + "loss": 4.7431, + "step": 252450 + }, + { + "epoch": 1.086858269377284, + "grad_norm": 4.050899505615234, + "learning_rate": 8.885435230040307e-05, + "loss": 4.7852, + "step": 252500 + }, + { + "epoch": 1.0870734888365667, + "grad_norm": 3.2368826866149902, + "learning_rate": 8.885009275029748e-05, + "loss": 4.9447, + "step": 252550 + }, + { + "epoch": 1.0872887082958493, + "grad_norm": 1.9841266870498657, + "learning_rate": 8.884583248854766e-05, + "loss": 4.6786, + "step": 252600 + }, + { + "epoch": 1.087503927755132, + "grad_norm": 0.7634773850440979, + "learning_rate": 8.884157151523162e-05, + "loss": 5.1495, + "step": 252650 + }, + { + "epoch": 1.0877191472144145, + "grad_norm": 4.283044338226318, + "learning_rate": 8.883730983042744e-05, + "loss": 5.0468, + "step": 252700 + }, + { + "epoch": 1.0879343666736971, + "grad_norm": 2.3152568340301514, + "learning_rate": 8.883304743421318e-05, + "loss": 4.676, + "step": 252750 + }, + { + "epoch": 1.0881495861329797, + "grad_norm": 2.2609081268310547, + "learning_rate": 8.88287843266669e-05, + "loss": 4.3222, + "step": 252800 + }, + { + "epoch": 1.0883648055922623, + "grad_norm": 0.9752897024154663, + "learning_rate": 8.882452050786671e-05, + "loss": 4.3227, + "step": 252850 + }, + { + "epoch": 1.0885800250515452, + "grad_norm": 2.9344639778137207, + "learning_rate": 8.882025597789069e-05, + "loss": 4.6597, + "step": 252900 + }, + { + "epoch": 1.0887952445108278, + "grad_norm": 2.561668872833252, + "learning_rate": 8.881599073681699e-05, + "loss": 4.6538, + "step": 252950 + }, + { + "epoch": 1.0890104639701104, + "grad_norm": 2.4553935527801514, + "learning_rate": 8.881172478472371e-05, + "loss": 4.2899, + "step": 253000 + }, + { + "epoch": 1.089225683429393, + "grad_norm": 0.8476051092147827, + "learning_rate": 8.880745812168899e-05, + "loss": 4.6428, + "step": 253050 + }, + { + "epoch": 1.0894409028886756, + "grad_norm": 2.595857858657837, + "learning_rate": 8.880319074779101e-05, + "loss": 4.7244, + "step": 253100 + }, + { + "epoch": 1.0896561223479582, + "grad_norm": 2.250408172607422, + "learning_rate": 8.879892266310792e-05, + "loss": 4.4407, + "step": 253150 + }, + { + "epoch": 1.0898713418072408, + "grad_norm": 0.5365474224090576, + "learning_rate": 8.879465386771791e-05, + "loss": 4.6191, + "step": 253200 + }, + { + "epoch": 1.0900865612665234, + "grad_norm": 2.8956308364868164, + "learning_rate": 8.879038436169917e-05, + "loss": 4.817, + "step": 253250 + }, + { + "epoch": 1.0903017807258062, + "grad_norm": 3.0738210678100586, + "learning_rate": 8.878611414512991e-05, + "loss": 4.8926, + "step": 253300 + }, + { + "epoch": 1.0905170001850888, + "grad_norm": 2.9866392612457275, + "learning_rate": 8.878184321808837e-05, + "loss": 4.877, + "step": 253350 + }, + { + "epoch": 1.0907322196443714, + "grad_norm": 1.3461077213287354, + "learning_rate": 8.877757158065274e-05, + "loss": 4.9303, + "step": 253400 + }, + { + "epoch": 1.090947439103654, + "grad_norm": 3.0970728397369385, + "learning_rate": 8.87732992329013e-05, + "loss": 4.6674, + "step": 253450 + }, + { + "epoch": 1.0911626585629366, + "grad_norm": 3.3678035736083984, + "learning_rate": 8.87690261749123e-05, + "loss": 4.2015, + "step": 253500 + }, + { + "epoch": 1.0913778780222192, + "grad_norm": 1.1934164762496948, + "learning_rate": 8.8764752406764e-05, + "loss": 5.008, + "step": 253550 + }, + { + "epoch": 1.0915930974815018, + "grad_norm": 0.9992848634719849, + "learning_rate": 8.876047792853472e-05, + "loss": 4.0873, + "step": 253600 + }, + { + "epoch": 1.0918083169407846, + "grad_norm": 1.9615836143493652, + "learning_rate": 8.875620274030272e-05, + "loss": 4.8147, + "step": 253650 + }, + { + "epoch": 1.0920235364000672, + "grad_norm": 3.1620895862579346, + "learning_rate": 8.875192684214633e-05, + "loss": 4.8998, + "step": 253700 + }, + { + "epoch": 1.0922387558593498, + "grad_norm": 2.3249361515045166, + "learning_rate": 8.874765023414387e-05, + "loss": 4.3713, + "step": 253750 + }, + { + "epoch": 1.0924539753186324, + "grad_norm": 5.301407814025879, + "learning_rate": 8.874337291637368e-05, + "loss": 4.8382, + "step": 253800 + }, + { + "epoch": 1.092669194777915, + "grad_norm": 0.7340923547744751, + "learning_rate": 8.873909488891411e-05, + "loss": 4.6697, + "step": 253850 + }, + { + "epoch": 1.0928844142371976, + "grad_norm": 2.1171116828918457, + "learning_rate": 8.873481615184354e-05, + "loss": 4.7934, + "step": 253900 + }, + { + "epoch": 1.0930996336964802, + "grad_norm": 2.8390705585479736, + "learning_rate": 8.873053670524032e-05, + "loss": 4.8358, + "step": 253950 + }, + { + "epoch": 1.0933148531557628, + "grad_norm": 2.1155054569244385, + "learning_rate": 8.872625654918286e-05, + "loss": 4.7084, + "step": 254000 + }, + { + "epoch": 1.0935300726150456, + "grad_norm": 0.8024306893348694, + "learning_rate": 8.872197568374955e-05, + "loss": 4.2135, + "step": 254050 + }, + { + "epoch": 1.0937452920743282, + "grad_norm": 3.3989431858062744, + "learning_rate": 8.87176941090188e-05, + "loss": 4.1875, + "step": 254100 + }, + { + "epoch": 1.0939605115336108, + "grad_norm": 3.5441317558288574, + "learning_rate": 8.871341182506906e-05, + "loss": 4.2036, + "step": 254150 + }, + { + "epoch": 1.0941757309928934, + "grad_norm": 4.191270351409912, + "learning_rate": 8.870912883197874e-05, + "loss": 4.5462, + "step": 254200 + }, + { + "epoch": 1.094390950452176, + "grad_norm": 2.0792031288146973, + "learning_rate": 8.870484512982633e-05, + "loss": 4.1903, + "step": 254250 + }, + { + "epoch": 1.0946061699114586, + "grad_norm": 1.6171338558197021, + "learning_rate": 8.870056071869028e-05, + "loss": 4.4086, + "step": 254300 + }, + { + "epoch": 1.0948213893707412, + "grad_norm": 1.7314186096191406, + "learning_rate": 8.869627559864906e-05, + "loss": 4.9338, + "step": 254350 + }, + { + "epoch": 1.095036608830024, + "grad_norm": 2.3815555572509766, + "learning_rate": 8.86919897697812e-05, + "loss": 4.5374, + "step": 254400 + }, + { + "epoch": 1.0952518282893067, + "grad_norm": 3.6454451084136963, + "learning_rate": 8.868770323216515e-05, + "loss": 4.5717, + "step": 254450 + }, + { + "epoch": 1.0954670477485893, + "grad_norm": 3.0090222358703613, + "learning_rate": 8.868341598587948e-05, + "loss": 4.8607, + "step": 254500 + }, + { + "epoch": 1.0956822672078719, + "grad_norm": 4.277331352233887, + "learning_rate": 8.86791280310027e-05, + "loss": 4.4888, + "step": 254550 + }, + { + "epoch": 1.0958974866671545, + "grad_norm": 0.8737815618515015, + "learning_rate": 8.867483936761338e-05, + "loss": 4.434, + "step": 254600 + }, + { + "epoch": 1.096112706126437, + "grad_norm": 4.215208053588867, + "learning_rate": 8.867054999579003e-05, + "loss": 4.3587, + "step": 254650 + }, + { + "epoch": 1.0963279255857197, + "grad_norm": 0.7222720980644226, + "learning_rate": 8.866625991561126e-05, + "loss": 4.2727, + "step": 254700 + }, + { + "epoch": 1.0965431450450023, + "grad_norm": 2.0649752616882324, + "learning_rate": 8.866196912715562e-05, + "loss": 4.9122, + "step": 254750 + }, + { + "epoch": 1.096758364504285, + "grad_norm": 2.7247917652130127, + "learning_rate": 8.865767763050176e-05, + "loss": 4.6298, + "step": 254800 + }, + { + "epoch": 1.0969735839635677, + "grad_norm": 3.7363996505737305, + "learning_rate": 8.865338542572826e-05, + "loss": 4.8549, + "step": 254850 + }, + { + "epoch": 1.0971888034228503, + "grad_norm": 6.610847473144531, + "learning_rate": 8.864909251291375e-05, + "loss": 5.0029, + "step": 254900 + }, + { + "epoch": 1.097404022882133, + "grad_norm": 3.0359039306640625, + "learning_rate": 8.864479889213684e-05, + "loss": 4.6365, + "step": 254950 + }, + { + "epoch": 1.0976192423414155, + "grad_norm": 1.5667914152145386, + "learning_rate": 8.864050456347621e-05, + "loss": 4.4989, + "step": 255000 + }, + { + "epoch": 1.0976192423414155, + "eval_loss": 5.126335144042969, + "eval_runtime": 35.0196, + "eval_samples_per_second": 18.275, + "eval_steps_per_second": 9.138, + "eval_tts_loss": 6.848426418301793, + "step": 255000 + }, + { + "epoch": 1.097834461800698, + "grad_norm": 2.1251578330993652, + "learning_rate": 8.863620952701051e-05, + "loss": 5.1869, + "step": 255050 + }, + { + "epoch": 1.0980496812599807, + "grad_norm": 1.0708529949188232, + "learning_rate": 8.863191378281843e-05, + "loss": 4.6448, + "step": 255100 + }, + { + "epoch": 1.0982649007192635, + "grad_norm": 2.960721731185913, + "learning_rate": 8.862761733097863e-05, + "loss": 4.6693, + "step": 255150 + }, + { + "epoch": 1.0984801201785461, + "grad_norm": 0.23930567502975464, + "learning_rate": 8.862332017156983e-05, + "loss": 4.0815, + "step": 255200 + }, + { + "epoch": 1.0986953396378287, + "grad_norm": 4.902028560638428, + "learning_rate": 8.861902230467074e-05, + "loss": 4.6767, + "step": 255250 + }, + { + "epoch": 1.0989105590971113, + "grad_norm": 2.0950052738189697, + "learning_rate": 8.86147237303601e-05, + "loss": 5.282, + "step": 255300 + }, + { + "epoch": 1.099125778556394, + "grad_norm": 3.122454881668091, + "learning_rate": 8.861042444871663e-05, + "loss": 4.3875, + "step": 255350 + }, + { + "epoch": 1.0993409980156765, + "grad_norm": 3.9101462364196777, + "learning_rate": 8.860612445981909e-05, + "loss": 4.7169, + "step": 255400 + }, + { + "epoch": 1.0995562174749591, + "grad_norm": 2.982966184616089, + "learning_rate": 8.860182376374624e-05, + "loss": 4.9825, + "step": 255450 + }, + { + "epoch": 1.0997714369342417, + "grad_norm": 2.521409749984741, + "learning_rate": 8.859752236057688e-05, + "loss": 4.7995, + "step": 255500 + }, + { + "epoch": 1.0999866563935246, + "grad_norm": 2.872166156768799, + "learning_rate": 8.859322025038977e-05, + "loss": 4.809, + "step": 255550 + }, + { + "epoch": 1.1002018758528072, + "grad_norm": 1.8371012210845947, + "learning_rate": 8.858891743326374e-05, + "loss": 4.7822, + "step": 255600 + }, + { + "epoch": 1.1004170953120898, + "grad_norm": 2.936534881591797, + "learning_rate": 8.85846139092776e-05, + "loss": 4.865, + "step": 255650 + }, + { + "epoch": 1.1006323147713724, + "grad_norm": 3.983560800552368, + "learning_rate": 8.85803096785102e-05, + "loss": 4.5419, + "step": 255700 + }, + { + "epoch": 1.100847534230655, + "grad_norm": 1.1841351985931396, + "learning_rate": 8.857600474104032e-05, + "loss": 4.2197, + "step": 255750 + }, + { + "epoch": 1.1010627536899376, + "grad_norm": 2.7758548259735107, + "learning_rate": 8.85716990969469e-05, + "loss": 4.4389, + "step": 255800 + }, + { + "epoch": 1.1012779731492202, + "grad_norm": 2.1651933193206787, + "learning_rate": 8.856739274630875e-05, + "loss": 4.9157, + "step": 255850 + }, + { + "epoch": 1.101493192608503, + "grad_norm": 1.5362248420715332, + "learning_rate": 8.856308568920477e-05, + "loss": 4.9392, + "step": 255900 + }, + { + "epoch": 1.1017084120677856, + "grad_norm": 3.0057132244110107, + "learning_rate": 8.855877792571388e-05, + "loss": 4.7543, + "step": 255950 + }, + { + "epoch": 1.1019236315270682, + "grad_norm": 3.3158650398254395, + "learning_rate": 8.855446945591494e-05, + "loss": 4.7771, + "step": 256000 + }, + { + "epoch": 1.1021388509863508, + "grad_norm": 1.5454576015472412, + "learning_rate": 8.855016027988692e-05, + "loss": 4.3503, + "step": 256050 + }, + { + "epoch": 1.1023540704456334, + "grad_norm": 4.190839767456055, + "learning_rate": 8.854585039770871e-05, + "loss": 4.8766, + "step": 256100 + }, + { + "epoch": 1.102569289904916, + "grad_norm": 2.967679977416992, + "learning_rate": 8.854153980945929e-05, + "loss": 4.6903, + "step": 256150 + }, + { + "epoch": 1.1027845093641986, + "grad_norm": 1.7487783432006836, + "learning_rate": 8.853722851521762e-05, + "loss": 4.6338, + "step": 256200 + }, + { + "epoch": 1.1029997288234814, + "grad_norm": 0.9283624291419983, + "learning_rate": 8.853291651506265e-05, + "loss": 4.4926, + "step": 256250 + }, + { + "epoch": 1.103214948282764, + "grad_norm": 3.966014862060547, + "learning_rate": 8.852860380907337e-05, + "loss": 4.6411, + "step": 256300 + }, + { + "epoch": 1.1034301677420466, + "grad_norm": 2.361802816390991, + "learning_rate": 8.85242903973288e-05, + "loss": 4.8194, + "step": 256350 + }, + { + "epoch": 1.1036453872013292, + "grad_norm": 2.1618268489837646, + "learning_rate": 8.851997627990793e-05, + "loss": 4.8367, + "step": 256400 + }, + { + "epoch": 1.1038606066606118, + "grad_norm": 3.6197261810302734, + "learning_rate": 8.85156614568898e-05, + "loss": 4.6229, + "step": 256450 + }, + { + "epoch": 1.1040758261198944, + "grad_norm": 2.2245945930480957, + "learning_rate": 8.851134592835343e-05, + "loss": 4.7014, + "step": 256500 + }, + { + "epoch": 1.104291045579177, + "grad_norm": 0.5572341680526733, + "learning_rate": 8.85070296943779e-05, + "loss": 4.7906, + "step": 256550 + }, + { + "epoch": 1.1045062650384598, + "grad_norm": 2.3649520874023438, + "learning_rate": 8.850271275504224e-05, + "loss": 4.5201, + "step": 256600 + }, + { + "epoch": 1.1047214844977424, + "grad_norm": 2.523818016052246, + "learning_rate": 8.849839511042554e-05, + "loss": 4.0952, + "step": 256650 + }, + { + "epoch": 1.104936703957025, + "grad_norm": 2.317150831222534, + "learning_rate": 8.849407676060689e-05, + "loss": 4.9233, + "step": 256700 + }, + { + "epoch": 1.1051519234163076, + "grad_norm": 1.5503652095794678, + "learning_rate": 8.848975770566541e-05, + "loss": 4.8922, + "step": 256750 + }, + { + "epoch": 1.1053671428755902, + "grad_norm": 1.6574984788894653, + "learning_rate": 8.848543794568019e-05, + "loss": 4.985, + "step": 256800 + }, + { + "epoch": 1.1055823623348728, + "grad_norm": 1.619415283203125, + "learning_rate": 8.848111748073034e-05, + "loss": 4.5049, + "step": 256850 + }, + { + "epoch": 1.1057975817941554, + "grad_norm": 2.1013548374176025, + "learning_rate": 8.847679631089504e-05, + "loss": 4.8646, + "step": 256900 + }, + { + "epoch": 1.106012801253438, + "grad_norm": 2.1602697372436523, + "learning_rate": 8.847247443625346e-05, + "loss": 4.4099, + "step": 256950 + }, + { + "epoch": 1.1062280207127209, + "grad_norm": 2.6105446815490723, + "learning_rate": 8.846815185688471e-05, + "loss": 4.6201, + "step": 257000 + }, + { + "epoch": 1.1064432401720035, + "grad_norm": 2.6292731761932373, + "learning_rate": 8.846382857286799e-05, + "loss": 4.7591, + "step": 257050 + }, + { + "epoch": 1.106658459631286, + "grad_norm": 1.6271488666534424, + "learning_rate": 8.845950458428252e-05, + "loss": 4.786, + "step": 257100 + }, + { + "epoch": 1.1068736790905687, + "grad_norm": 3.598353147506714, + "learning_rate": 8.845517989120746e-05, + "loss": 4.7241, + "step": 257150 + }, + { + "epoch": 1.1070888985498513, + "grad_norm": 5.5056843757629395, + "learning_rate": 8.845085449372209e-05, + "loss": 4.4728, + "step": 257200 + }, + { + "epoch": 1.1073041180091339, + "grad_norm": 2.4228270053863525, + "learning_rate": 8.844652839190557e-05, + "loss": 4.8896, + "step": 257250 + }, + { + "epoch": 1.1075193374684165, + "grad_norm": 3.4902825355529785, + "learning_rate": 8.84422015858372e-05, + "loss": 4.9812, + "step": 257300 + }, + { + "epoch": 1.1077345569276993, + "grad_norm": 1.7537034749984741, + "learning_rate": 8.843787407559621e-05, + "loss": 4.2922, + "step": 257350 + }, + { + "epoch": 1.1079497763869819, + "grad_norm": 0.6018660664558411, + "learning_rate": 8.843354586126187e-05, + "loss": 4.5073, + "step": 257400 + }, + { + "epoch": 1.1081649958462645, + "grad_norm": 2.202730417251587, + "learning_rate": 8.842921694291348e-05, + "loss": 4.1305, + "step": 257450 + }, + { + "epoch": 1.108380215305547, + "grad_norm": 2.546992063522339, + "learning_rate": 8.842488732063032e-05, + "loss": 4.7266, + "step": 257500 + }, + { + "epoch": 1.1085954347648297, + "grad_norm": 3.7018659114837646, + "learning_rate": 8.84205569944917e-05, + "loss": 4.6433, + "step": 257550 + }, + { + "epoch": 1.1088106542241123, + "grad_norm": 3.4273974895477295, + "learning_rate": 8.841622596457695e-05, + "loss": 4.6212, + "step": 257600 + }, + { + "epoch": 1.109025873683395, + "grad_norm": 2.094316005706787, + "learning_rate": 8.84118942309654e-05, + "loss": 4.8346, + "step": 257650 + }, + { + "epoch": 1.1092410931426775, + "grad_norm": 4.6898345947265625, + "learning_rate": 8.84075617937364e-05, + "loss": 4.7384, + "step": 257700 + }, + { + "epoch": 1.1094563126019603, + "grad_norm": 0.7077619433403015, + "learning_rate": 8.840322865296931e-05, + "loss": 4.4818, + "step": 257750 + }, + { + "epoch": 1.109671532061243, + "grad_norm": 2.390781879425049, + "learning_rate": 8.839889480874349e-05, + "loss": 4.9974, + "step": 257800 + }, + { + "epoch": 1.1098867515205255, + "grad_norm": 2.5259385108947754, + "learning_rate": 8.839456026113835e-05, + "loss": 4.5321, + "step": 257850 + }, + { + "epoch": 1.1101019709798081, + "grad_norm": 4.1910223960876465, + "learning_rate": 8.839022501023326e-05, + "loss": 4.8013, + "step": 257900 + }, + { + "epoch": 1.1103171904390907, + "grad_norm": 2.687385320663452, + "learning_rate": 8.838588905610765e-05, + "loss": 4.8109, + "step": 257950 + }, + { + "epoch": 1.1105324098983733, + "grad_norm": 1.2213791608810425, + "learning_rate": 8.838155239884096e-05, + "loss": 4.9144, + "step": 258000 + }, + { + "epoch": 1.1105324098983733, + "eval_loss": 5.119732856750488, + "eval_runtime": 35.1262, + "eval_samples_per_second": 18.22, + "eval_steps_per_second": 9.11, + "eval_tts_loss": 6.856424286597097, + "step": 258000 + }, + { + "epoch": 1.110747629357656, + "grad_norm": 5.568444728851318, + "learning_rate": 8.83772150385126e-05, + "loss": 4.7219, + "step": 258050 + }, + { + "epoch": 1.1109628488169387, + "grad_norm": 3.3525187969207764, + "learning_rate": 8.837287697520203e-05, + "loss": 5.1752, + "step": 258100 + }, + { + "epoch": 1.1111780682762213, + "grad_norm": 2.685465097427368, + "learning_rate": 8.836853820898869e-05, + "loss": 5.0281, + "step": 258150 + }, + { + "epoch": 1.111393287735504, + "grad_norm": 1.855284571647644, + "learning_rate": 8.83641987399521e-05, + "loss": 4.8999, + "step": 258200 + }, + { + "epoch": 1.1116085071947865, + "grad_norm": 3.468111753463745, + "learning_rate": 8.835985856817173e-05, + "loss": 4.7945, + "step": 258250 + }, + { + "epoch": 1.1118237266540691, + "grad_norm": 3.132376194000244, + "learning_rate": 8.835551769372708e-05, + "loss": 4.2848, + "step": 258300 + }, + { + "epoch": 1.1120389461133517, + "grad_norm": 2.390474796295166, + "learning_rate": 8.835117611669768e-05, + "loss": 4.6318, + "step": 258350 + }, + { + "epoch": 1.1122541655726343, + "grad_norm": 2.9280974864959717, + "learning_rate": 8.834683383716302e-05, + "loss": 4.7922, + "step": 258400 + }, + { + "epoch": 1.112469385031917, + "grad_norm": 3.8023011684417725, + "learning_rate": 8.834249085520268e-05, + "loss": 4.9664, + "step": 258450 + }, + { + "epoch": 1.1126846044911998, + "grad_norm": 3.8664886951446533, + "learning_rate": 8.833814717089618e-05, + "loss": 4.5684, + "step": 258500 + }, + { + "epoch": 1.1128998239504824, + "grad_norm": 0.6397255659103394, + "learning_rate": 8.83338027843231e-05, + "loss": 4.5198, + "step": 258550 + }, + { + "epoch": 1.113115043409765, + "grad_norm": 1.0848380327224731, + "learning_rate": 8.832945769556305e-05, + "loss": 4.2497, + "step": 258600 + }, + { + "epoch": 1.1133302628690476, + "grad_norm": 1.2639923095703125, + "learning_rate": 8.832511190469559e-05, + "loss": 4.4565, + "step": 258650 + }, + { + "epoch": 1.1135454823283302, + "grad_norm": 1.8577117919921875, + "learning_rate": 8.832076541180031e-05, + "loss": 4.4432, + "step": 258700 + }, + { + "epoch": 1.1137607017876128, + "grad_norm": 1.0198839902877808, + "learning_rate": 8.831641821695686e-05, + "loss": 4.7483, + "step": 258750 + }, + { + "epoch": 1.1139759212468954, + "grad_norm": 1.0428578853607178, + "learning_rate": 8.831207032024486e-05, + "loss": 4.5334, + "step": 258800 + }, + { + "epoch": 1.1141911407061782, + "grad_norm": 2.504979372024536, + "learning_rate": 8.830772172174394e-05, + "loss": 4.841, + "step": 258850 + }, + { + "epoch": 1.1144063601654608, + "grad_norm": 3.074845314025879, + "learning_rate": 8.830337242153377e-05, + "loss": 4.8969, + "step": 258900 + }, + { + "epoch": 1.1146215796247434, + "grad_norm": 1.9955708980560303, + "learning_rate": 8.829902241969403e-05, + "loss": 4.7916, + "step": 258950 + }, + { + "epoch": 1.114836799084026, + "grad_norm": 1.8970156908035278, + "learning_rate": 8.829467171630438e-05, + "loss": 4.8843, + "step": 259000 + }, + { + "epoch": 1.1150520185433086, + "grad_norm": 3.7150309085845947, + "learning_rate": 8.829032031144453e-05, + "loss": 4.6425, + "step": 259050 + }, + { + "epoch": 1.1152672380025912, + "grad_norm": 2.459216356277466, + "learning_rate": 8.828596820519417e-05, + "loss": 4.6423, + "step": 259100 + }, + { + "epoch": 1.1154824574618738, + "grad_norm": 5.496678829193115, + "learning_rate": 8.828161539763304e-05, + "loss": 4.4118, + "step": 259150 + }, + { + "epoch": 1.1156976769211564, + "grad_norm": 1.9221320152282715, + "learning_rate": 8.827726188884086e-05, + "loss": 4.9001, + "step": 259200 + }, + { + "epoch": 1.1159128963804392, + "grad_norm": 2.1974916458129883, + "learning_rate": 8.827290767889739e-05, + "loss": 3.978, + "step": 259250 + }, + { + "epoch": 1.1161281158397218, + "grad_norm": 2.731987476348877, + "learning_rate": 8.826855276788237e-05, + "loss": 4.8592, + "step": 259300 + }, + { + "epoch": 1.1163433352990044, + "grad_norm": 3.9928412437438965, + "learning_rate": 8.826419715587559e-05, + "loss": 4.497, + "step": 259350 + }, + { + "epoch": 1.116558554758287, + "grad_norm": 3.0885984897613525, + "learning_rate": 8.825984084295682e-05, + "loss": 4.6045, + "step": 259400 + }, + { + "epoch": 1.1167737742175696, + "grad_norm": 2.6725451946258545, + "learning_rate": 8.825548382920588e-05, + "loss": 4.7481, + "step": 259450 + }, + { + "epoch": 1.1169889936768522, + "grad_norm": 2.035816192626953, + "learning_rate": 8.825112611470255e-05, + "loss": 4.2984, + "step": 259500 + }, + { + "epoch": 1.1172042131361348, + "grad_norm": 2.4896678924560547, + "learning_rate": 8.824676769952668e-05, + "loss": 4.7016, + "step": 259550 + }, + { + "epoch": 1.1174194325954176, + "grad_norm": 4.968699932098389, + "learning_rate": 8.82424085837581e-05, + "loss": 4.6108, + "step": 259600 + }, + { + "epoch": 1.1176346520547003, + "grad_norm": 5.7979607582092285, + "learning_rate": 8.823804876747663e-05, + "loss": 4.6484, + "step": 259650 + }, + { + "epoch": 1.1178498715139829, + "grad_norm": 2.3808345794677734, + "learning_rate": 8.823368825076218e-05, + "loss": 4.4712, + "step": 259700 + }, + { + "epoch": 1.1180650909732655, + "grad_norm": 2.6356029510498047, + "learning_rate": 8.822932703369458e-05, + "loss": 4.4962, + "step": 259750 + }, + { + "epoch": 1.118280310432548, + "grad_norm": 2.290027618408203, + "learning_rate": 8.822496511635375e-05, + "loss": 4.5908, + "step": 259800 + }, + { + "epoch": 1.1184955298918307, + "grad_norm": 4.591884613037109, + "learning_rate": 8.822060249881959e-05, + "loss": 4.9678, + "step": 259850 + }, + { + "epoch": 1.1187107493511133, + "grad_norm": 2.371739387512207, + "learning_rate": 8.821623918117199e-05, + "loss": 4.2267, + "step": 259900 + }, + { + "epoch": 1.1189259688103959, + "grad_norm": 1.2500691413879395, + "learning_rate": 8.821187516349087e-05, + "loss": 4.9477, + "step": 259950 + }, + { + "epoch": 1.1191411882696787, + "grad_norm": 3.2084178924560547, + "learning_rate": 8.820751044585621e-05, + "loss": 4.777, + "step": 260000 + }, + { + "epoch": 1.1193564077289613, + "grad_norm": 2.308199644088745, + "learning_rate": 8.820314502834793e-05, + "loss": 4.6388, + "step": 260050 + }, + { + "epoch": 1.1195716271882439, + "grad_norm": 0.9255133271217346, + "learning_rate": 8.8198778911046e-05, + "loss": 4.5864, + "step": 260100 + }, + { + "epoch": 1.1197868466475265, + "grad_norm": 2.5684874057769775, + "learning_rate": 8.819441209403041e-05, + "loss": 4.7172, + "step": 260150 + }, + { + "epoch": 1.120002066106809, + "grad_norm": 1.4592262506484985, + "learning_rate": 8.819004457738113e-05, + "loss": 4.6232, + "step": 260200 + }, + { + "epoch": 1.1202172855660917, + "grad_norm": 2.8373477458953857, + "learning_rate": 8.818567636117816e-05, + "loss": 4.7569, + "step": 260250 + }, + { + "epoch": 1.1204325050253743, + "grad_norm": 2.0635604858398438, + "learning_rate": 8.818130744550155e-05, + "loss": 4.6277, + "step": 260300 + }, + { + "epoch": 1.120647724484657, + "grad_norm": 2.657029628753662, + "learning_rate": 8.817693783043128e-05, + "loss": 4.664, + "step": 260350 + }, + { + "epoch": 1.1208629439439397, + "grad_norm": 3.0381081104278564, + "learning_rate": 8.817256751604744e-05, + "loss": 4.7152, + "step": 260400 + }, + { + "epoch": 1.1210781634032223, + "grad_norm": 1.2745461463928223, + "learning_rate": 8.816819650243005e-05, + "loss": 4.7638, + "step": 260450 + }, + { + "epoch": 1.121293382862505, + "grad_norm": 0.8343294262886047, + "learning_rate": 8.81638247896592e-05, + "loss": 4.8599, + "step": 260500 + }, + { + "epoch": 1.1215086023217875, + "grad_norm": 3.098907470703125, + "learning_rate": 8.815945237781496e-05, + "loss": 4.8303, + "step": 260550 + }, + { + "epoch": 1.12172382178107, + "grad_norm": 2.8931093215942383, + "learning_rate": 8.815507926697741e-05, + "loss": 4.7884, + "step": 260600 + }, + { + "epoch": 1.1219390412403527, + "grad_norm": 1.265212059020996, + "learning_rate": 8.815070545722665e-05, + "loss": 4.4675, + "step": 260650 + }, + { + "epoch": 1.1221542606996353, + "grad_norm": 3.898240327835083, + "learning_rate": 8.814633094864285e-05, + "loss": 4.5813, + "step": 260700 + }, + { + "epoch": 1.1223694801589181, + "grad_norm": 2.3241710662841797, + "learning_rate": 8.814195574130608e-05, + "loss": 4.8703, + "step": 260750 + }, + { + "epoch": 1.1225846996182007, + "grad_norm": 1.620906949043274, + "learning_rate": 8.813757983529652e-05, + "loss": 4.6831, + "step": 260800 + }, + { + "epoch": 1.1227999190774833, + "grad_norm": 3.6936681270599365, + "learning_rate": 8.813320323069431e-05, + "loss": 3.9595, + "step": 260850 + }, + { + "epoch": 1.123015138536766, + "grad_norm": 0.17515362799167633, + "learning_rate": 8.812882592757963e-05, + "loss": 5.3474, + "step": 260900 + }, + { + "epoch": 1.1232303579960485, + "grad_norm": 2.7324671745300293, + "learning_rate": 8.812444792603265e-05, + "loss": 4.3315, + "step": 260950 + }, + { + "epoch": 1.1234455774553311, + "grad_norm": 1.9558091163635254, + "learning_rate": 8.812006922613356e-05, + "loss": 4.3447, + "step": 261000 + }, + { + "epoch": 1.1234455774553311, + "eval_loss": 5.106448173522949, + "eval_runtime": 35.0897, + "eval_samples_per_second": 18.239, + "eval_steps_per_second": 9.119, + "eval_tts_loss": 6.879708360673442, + "step": 261000 + }, + { + "epoch": 1.1236607969146137, + "grad_norm": 2.4393234252929688, + "learning_rate": 8.811568982796261e-05, + "loss": 4.4174, + "step": 261050 + }, + { + "epoch": 1.1238760163738966, + "grad_norm": 3.9175875186920166, + "learning_rate": 8.811130973159997e-05, + "loss": 4.6658, + "step": 261100 + }, + { + "epoch": 1.1240912358331792, + "grad_norm": 5.100289344787598, + "learning_rate": 8.810692893712588e-05, + "loss": 4.2574, + "step": 261150 + }, + { + "epoch": 1.1243064552924618, + "grad_norm": 2.8232016563415527, + "learning_rate": 8.810254744462063e-05, + "loss": 4.5311, + "step": 261200 + }, + { + "epoch": 1.1245216747517444, + "grad_norm": 3.4363162517547607, + "learning_rate": 8.809816525416443e-05, + "loss": 4.7163, + "step": 261250 + }, + { + "epoch": 1.124736894211027, + "grad_norm": 2.059758186340332, + "learning_rate": 8.809378236583759e-05, + "loss": 4.6216, + "step": 261300 + }, + { + "epoch": 1.1249521136703096, + "grad_norm": 4.331276893615723, + "learning_rate": 8.808939877972035e-05, + "loss": 4.5678, + "step": 261350 + }, + { + "epoch": 1.1251673331295922, + "grad_norm": 2.647447347640991, + "learning_rate": 8.808501449589303e-05, + "loss": 4.6127, + "step": 261400 + }, + { + "epoch": 1.1253825525888748, + "grad_norm": 1.3026503324508667, + "learning_rate": 8.808062951443595e-05, + "loss": 4.1373, + "step": 261450 + }, + { + "epoch": 1.1255977720481576, + "grad_norm": 2.1475961208343506, + "learning_rate": 8.807624383542943e-05, + "loss": 4.6902, + "step": 261500 + }, + { + "epoch": 1.1258129915074402, + "grad_norm": 3.6807451248168945, + "learning_rate": 8.807185745895379e-05, + "loss": 4.4293, + "step": 261550 + }, + { + "epoch": 1.1260282109667228, + "grad_norm": 2.045552968978882, + "learning_rate": 8.80674703850894e-05, + "loss": 4.8255, + "step": 261600 + }, + { + "epoch": 1.1262434304260054, + "grad_norm": 2.5425002574920654, + "learning_rate": 8.806308261391659e-05, + "loss": 4.5893, + "step": 261650 + }, + { + "epoch": 1.126458649885288, + "grad_norm": 5.01700496673584, + "learning_rate": 8.805869414551577e-05, + "loss": 4.712, + "step": 261700 + }, + { + "epoch": 1.1266738693445706, + "grad_norm": 2.7023797035217285, + "learning_rate": 8.80543049799673e-05, + "loss": 4.0511, + "step": 261750 + }, + { + "epoch": 1.1268890888038534, + "grad_norm": 3.1296682357788086, + "learning_rate": 8.804991511735159e-05, + "loss": 4.9522, + "step": 261800 + }, + { + "epoch": 1.127104308263136, + "grad_norm": 3.2946016788482666, + "learning_rate": 8.804552455774906e-05, + "loss": 4.5712, + "step": 261850 + }, + { + "epoch": 1.1273195277224186, + "grad_norm": 5.603846073150635, + "learning_rate": 8.80411333012401e-05, + "loss": 4.7087, + "step": 261900 + }, + { + "epoch": 1.1275347471817012, + "grad_norm": 1.8376853466033936, + "learning_rate": 8.80367413479052e-05, + "loss": 4.6416, + "step": 261950 + }, + { + "epoch": 1.1277499666409838, + "grad_norm": 1.5223585367202759, + "learning_rate": 8.803234869782478e-05, + "loss": 4.316, + "step": 262000 + }, + { + "epoch": 1.1279651861002664, + "grad_norm": 5.779716968536377, + "learning_rate": 8.802795535107931e-05, + "loss": 4.561, + "step": 262050 + }, + { + "epoch": 1.128180405559549, + "grad_norm": 1.139165997505188, + "learning_rate": 8.802356130774926e-05, + "loss": 4.8643, + "step": 262100 + }, + { + "epoch": 1.1283956250188316, + "grad_norm": 2.080116033554077, + "learning_rate": 8.801916656791511e-05, + "loss": 4.5872, + "step": 262150 + }, + { + "epoch": 1.1286108444781142, + "grad_norm": 2.2302474975585938, + "learning_rate": 8.801477113165739e-05, + "loss": 4.9266, + "step": 262200 + }, + { + "epoch": 1.128826063937397, + "grad_norm": 3.95322847366333, + "learning_rate": 8.80103749990566e-05, + "loss": 4.7199, + "step": 262250 + }, + { + "epoch": 1.1290412833966796, + "grad_norm": 1.592086672782898, + "learning_rate": 8.800597817019325e-05, + "loss": 4.6659, + "step": 262300 + }, + { + "epoch": 1.1292565028559622, + "grad_norm": 2.326411485671997, + "learning_rate": 8.80015806451479e-05, + "loss": 4.5714, + "step": 262350 + }, + { + "epoch": 1.1294717223152448, + "grad_norm": 1.863930344581604, + "learning_rate": 8.799718242400111e-05, + "loss": 4.9671, + "step": 262400 + }, + { + "epoch": 1.1296869417745274, + "grad_norm": 1.5570523738861084, + "learning_rate": 8.799278350683342e-05, + "loss": 4.4848, + "step": 262450 + }, + { + "epoch": 1.12990216123381, + "grad_norm": 3.4196689128875732, + "learning_rate": 8.798838389372541e-05, + "loss": 4.8198, + "step": 262500 + }, + { + "epoch": 1.1301173806930929, + "grad_norm": 2.550976514816284, + "learning_rate": 8.79839835847577e-05, + "loss": 4.7005, + "step": 262550 + }, + { + "epoch": 1.1303326001523755, + "grad_norm": 2.2183821201324463, + "learning_rate": 8.797958258001086e-05, + "loss": 4.6007, + "step": 262600 + }, + { + "epoch": 1.130547819611658, + "grad_norm": 0.6471936106681824, + "learning_rate": 8.797518087956553e-05, + "loss": 4.6231, + "step": 262650 + }, + { + "epoch": 1.1307630390709407, + "grad_norm": 3.221644401550293, + "learning_rate": 8.797077848350234e-05, + "loss": 5.0149, + "step": 262700 + }, + { + "epoch": 1.1309782585302233, + "grad_norm": 2.5334737300872803, + "learning_rate": 8.796637539190191e-05, + "loss": 4.9517, + "step": 262750 + }, + { + "epoch": 1.1311934779895059, + "grad_norm": 0.8160366415977478, + "learning_rate": 8.796197160484491e-05, + "loss": 4.32, + "step": 262800 + }, + { + "epoch": 1.1314086974487885, + "grad_norm": 3.408900260925293, + "learning_rate": 8.795756712241199e-05, + "loss": 4.9044, + "step": 262850 + }, + { + "epoch": 1.131623916908071, + "grad_norm": 3.522905111312866, + "learning_rate": 8.795316194468386e-05, + "loss": 4.5549, + "step": 262900 + }, + { + "epoch": 1.1318391363673537, + "grad_norm": 2.9331068992614746, + "learning_rate": 8.79487560717412e-05, + "loss": 4.6826, + "step": 262950 + }, + { + "epoch": 1.1320543558266365, + "grad_norm": 2.612461805343628, + "learning_rate": 8.794434950366471e-05, + "loss": 4.7024, + "step": 263000 + }, + { + "epoch": 1.132269575285919, + "grad_norm": 2.8285980224609375, + "learning_rate": 8.79399422405351e-05, + "loss": 4.848, + "step": 263050 + }, + { + "epoch": 1.1324847947452017, + "grad_norm": 2.4003188610076904, + "learning_rate": 8.79355342824331e-05, + "loss": 4.2211, + "step": 263100 + }, + { + "epoch": 1.1327000142044843, + "grad_norm": 4.969771862030029, + "learning_rate": 8.793112562943948e-05, + "loss": 4.463, + "step": 263150 + }, + { + "epoch": 1.132915233663767, + "grad_norm": 3.2464168071746826, + "learning_rate": 8.792671628163498e-05, + "loss": 4.8164, + "step": 263200 + }, + { + "epoch": 1.1331304531230495, + "grad_norm": 2.937483072280884, + "learning_rate": 8.792230623910038e-05, + "loss": 5.0107, + "step": 263250 + }, + { + "epoch": 1.1333456725823323, + "grad_norm": 6.2797112464904785, + "learning_rate": 8.791789550191644e-05, + "loss": 4.5617, + "step": 263300 + }, + { + "epoch": 1.133560892041615, + "grad_norm": 2.022037982940674, + "learning_rate": 8.791348407016397e-05, + "loss": 4.8244, + "step": 263350 + }, + { + "epoch": 1.1337761115008975, + "grad_norm": 3.2173142433166504, + "learning_rate": 8.790907194392377e-05, + "loss": 4.7499, + "step": 263400 + }, + { + "epoch": 1.1339913309601801, + "grad_norm": 3.2329652309417725, + "learning_rate": 8.790465912327667e-05, + "loss": 4.7415, + "step": 263450 + }, + { + "epoch": 1.1342065504194627, + "grad_norm": 2.2411653995513916, + "learning_rate": 8.790024560830349e-05, + "loss": 4.7357, + "step": 263500 + }, + { + "epoch": 1.1344217698787453, + "grad_norm": 2.6077351570129395, + "learning_rate": 8.789583139908509e-05, + "loss": 4.4666, + "step": 263550 + }, + { + "epoch": 1.134636989338028, + "grad_norm": 7.313924312591553, + "learning_rate": 8.789141649570232e-05, + "loss": 4.6776, + "step": 263600 + }, + { + "epoch": 1.1348522087973105, + "grad_norm": 1.2847851514816284, + "learning_rate": 8.788700089823604e-05, + "loss": 4.5505, + "step": 263650 + }, + { + "epoch": 1.1350674282565931, + "grad_norm": 0.8125954866409302, + "learning_rate": 8.788258460676715e-05, + "loss": 4.7055, + "step": 263700 + }, + { + "epoch": 1.135282647715876, + "grad_norm": 3.326084852218628, + "learning_rate": 8.787816762137652e-05, + "loss": 5.157, + "step": 263750 + }, + { + "epoch": 1.1354978671751585, + "grad_norm": 2.468205451965332, + "learning_rate": 8.787374994214512e-05, + "loss": 4.3607, + "step": 263800 + }, + { + "epoch": 1.1357130866344411, + "grad_norm": 2.340226888656616, + "learning_rate": 8.78693315691538e-05, + "loss": 4.8946, + "step": 263850 + }, + { + "epoch": 1.1359283060937237, + "grad_norm": 2.9427692890167236, + "learning_rate": 8.786491250248354e-05, + "loss": 5.1431, + "step": 263900 + }, + { + "epoch": 1.1361435255530064, + "grad_norm": 1.915858507156372, + "learning_rate": 8.786049274221528e-05, + "loss": 4.7657, + "step": 263950 + }, + { + "epoch": 1.136358745012289, + "grad_norm": 2.1915061473846436, + "learning_rate": 8.785607228842996e-05, + "loss": 4.628, + "step": 264000 + }, + { + "epoch": 1.136358745012289, + "eval_loss": 5.1186418533325195, + "eval_runtime": 35.0945, + "eval_samples_per_second": 18.236, + "eval_steps_per_second": 9.118, + "eval_tts_loss": 6.788307554023784, + "step": 264000 + }, + { + "epoch": 1.1365739644715718, + "grad_norm": 5.640307903289795, + "learning_rate": 8.785165114120858e-05, + "loss": 4.4893, + "step": 264050 + }, + { + "epoch": 1.1367891839308544, + "grad_norm": 2.832566499710083, + "learning_rate": 8.784722930063209e-05, + "loss": 4.6509, + "step": 264100 + }, + { + "epoch": 1.137004403390137, + "grad_norm": 2.1916770935058594, + "learning_rate": 8.784280676678152e-05, + "loss": 4.7853, + "step": 264150 + }, + { + "epoch": 1.1372196228494196, + "grad_norm": 0.7038397789001465, + "learning_rate": 8.783838353973786e-05, + "loss": 4.4437, + "step": 264200 + }, + { + "epoch": 1.1374348423087022, + "grad_norm": 4.119822025299072, + "learning_rate": 8.783395961958215e-05, + "loss": 4.616, + "step": 264250 + }, + { + "epoch": 1.1376500617679848, + "grad_norm": 3.811553478240967, + "learning_rate": 8.782953500639542e-05, + "loss": 4.7283, + "step": 264300 + }, + { + "epoch": 1.1378652812272674, + "grad_norm": 2.4846363067626953, + "learning_rate": 8.782510970025872e-05, + "loss": 4.9129, + "step": 264350 + }, + { + "epoch": 1.13808050068655, + "grad_norm": 3.871973991394043, + "learning_rate": 8.782068370125309e-05, + "loss": 4.6819, + "step": 264400 + }, + { + "epoch": 1.1382957201458328, + "grad_norm": 0.7000930905342102, + "learning_rate": 8.781625700945964e-05, + "loss": 4.562, + "step": 264450 + }, + { + "epoch": 1.1385109396051154, + "grad_norm": 3.123342275619507, + "learning_rate": 8.781182962495945e-05, + "loss": 4.6338, + "step": 264500 + }, + { + "epoch": 1.138726159064398, + "grad_norm": 2.6472630500793457, + "learning_rate": 8.780740154783358e-05, + "loss": 5.0182, + "step": 264550 + }, + { + "epoch": 1.1389413785236806, + "grad_norm": 0.5414468050003052, + "learning_rate": 8.780297277816318e-05, + "loss": 4.4896, + "step": 264600 + }, + { + "epoch": 1.1391565979829632, + "grad_norm": 1.9469913244247437, + "learning_rate": 8.779854331602939e-05, + "loss": 4.7293, + "step": 264650 + }, + { + "epoch": 1.1393718174422458, + "grad_norm": 2.7458789348602295, + "learning_rate": 8.77941131615133e-05, + "loss": 4.4651, + "step": 264700 + }, + { + "epoch": 1.1395870369015284, + "grad_norm": 2.3910908699035645, + "learning_rate": 8.778968231469609e-05, + "loss": 4.9879, + "step": 264750 + }, + { + "epoch": 1.1398022563608112, + "grad_norm": 2.3061890602111816, + "learning_rate": 8.778525077565892e-05, + "loss": 4.666, + "step": 264800 + }, + { + "epoch": 1.1400174758200938, + "grad_norm": 0.7018060088157654, + "learning_rate": 8.778081854448296e-05, + "loss": 4.4883, + "step": 264850 + }, + { + "epoch": 1.1402326952793764, + "grad_norm": 0.7562177777290344, + "learning_rate": 8.77763856212494e-05, + "loss": 4.0468, + "step": 264900 + }, + { + "epoch": 1.140447914738659, + "grad_norm": 2.251925230026245, + "learning_rate": 8.777195200603945e-05, + "loss": 4.4558, + "step": 264950 + }, + { + "epoch": 1.1406631341979416, + "grad_norm": 2.6354660987854004, + "learning_rate": 8.77675176989343e-05, + "loss": 4.3679, + "step": 265000 + }, + { + "epoch": 1.1408783536572242, + "grad_norm": 4.556951522827148, + "learning_rate": 8.776308270001519e-05, + "loss": 4.4468, + "step": 265050 + }, + { + "epoch": 1.1410935731165068, + "grad_norm": 2.539045810699463, + "learning_rate": 8.775864700936339e-05, + "loss": 4.8322, + "step": 265100 + }, + { + "epoch": 1.1413087925757894, + "grad_norm": 2.9242868423461914, + "learning_rate": 8.775421062706009e-05, + "loss": 4.2025, + "step": 265150 + }, + { + "epoch": 1.1415240120350723, + "grad_norm": 2.5807831287384033, + "learning_rate": 8.77497735531866e-05, + "loss": 4.6827, + "step": 265200 + }, + { + "epoch": 1.1417392314943549, + "grad_norm": 2.1802499294281006, + "learning_rate": 8.774533578782418e-05, + "loss": 5.0958, + "step": 265250 + }, + { + "epoch": 1.1419544509536375, + "grad_norm": 0.6372898817062378, + "learning_rate": 8.774089733105412e-05, + "loss": 4.5958, + "step": 265300 + }, + { + "epoch": 1.14216967041292, + "grad_norm": 5.985154628753662, + "learning_rate": 8.773645818295772e-05, + "loss": 4.2527, + "step": 265350 + }, + { + "epoch": 1.1423848898722027, + "grad_norm": 1.6225972175598145, + "learning_rate": 8.773201834361632e-05, + "loss": 4.3289, + "step": 265400 + }, + { + "epoch": 1.1426001093314853, + "grad_norm": 3.0752761363983154, + "learning_rate": 8.772757781311122e-05, + "loss": 5.0795, + "step": 265450 + }, + { + "epoch": 1.1428153287907679, + "grad_norm": 1.8708416223526, + "learning_rate": 8.772313659152376e-05, + "loss": 4.7609, + "step": 265500 + }, + { + "epoch": 1.1430305482500507, + "grad_norm": 2.4115638732910156, + "learning_rate": 8.771869467893531e-05, + "loss": 4.2495, + "step": 265550 + }, + { + "epoch": 1.1432457677093333, + "grad_norm": 2.1707000732421875, + "learning_rate": 8.771425207542723e-05, + "loss": 5.0125, + "step": 265600 + }, + { + "epoch": 1.1434609871686159, + "grad_norm": 2.9327306747436523, + "learning_rate": 8.770980878108088e-05, + "loss": 4.6261, + "step": 265650 + }, + { + "epoch": 1.1436762066278985, + "grad_norm": 3.1974048614501953, + "learning_rate": 8.770536479597769e-05, + "loss": 5.0725, + "step": 265700 + }, + { + "epoch": 1.143891426087181, + "grad_norm": 1.1062772274017334, + "learning_rate": 8.770092012019902e-05, + "loss": 4.3043, + "step": 265750 + }, + { + "epoch": 1.1441066455464637, + "grad_norm": 3.299227237701416, + "learning_rate": 8.76964747538263e-05, + "loss": 4.9327, + "step": 265800 + }, + { + "epoch": 1.1443218650057463, + "grad_norm": 2.7840042114257812, + "learning_rate": 8.769202869694097e-05, + "loss": 4.9339, + "step": 265850 + }, + { + "epoch": 1.1445370844650289, + "grad_norm": 1.853007435798645, + "learning_rate": 8.768758194962448e-05, + "loss": 4.7007, + "step": 265900 + }, + { + "epoch": 1.1447523039243117, + "grad_norm": 4.412351608276367, + "learning_rate": 8.768313451195826e-05, + "loss": 4.5549, + "step": 265950 + }, + { + "epoch": 1.1449675233835943, + "grad_norm": 1.062371015548706, + "learning_rate": 8.767868638402379e-05, + "loss": 4.3401, + "step": 266000 + }, + { + "epoch": 1.145182742842877, + "grad_norm": 3.0634703636169434, + "learning_rate": 8.767423756590254e-05, + "loss": 4.4419, + "step": 266050 + }, + { + "epoch": 1.1453979623021595, + "grad_norm": 3.5596930980682373, + "learning_rate": 8.766978805767603e-05, + "loss": 4.5189, + "step": 266100 + }, + { + "epoch": 1.1456131817614421, + "grad_norm": 0.7667574286460876, + "learning_rate": 8.766533785942572e-05, + "loss": 4.8032, + "step": 266150 + }, + { + "epoch": 1.1458284012207247, + "grad_norm": 3.974090337753296, + "learning_rate": 8.766088697123315e-05, + "loss": 4.9919, + "step": 266200 + }, + { + "epoch": 1.1460436206800073, + "grad_norm": 2.234696626663208, + "learning_rate": 8.765643539317987e-05, + "loss": 4.5915, + "step": 266250 + }, + { + "epoch": 1.1462588401392901, + "grad_norm": 2.955958604812622, + "learning_rate": 8.76519831253474e-05, + "loss": 5.1471, + "step": 266300 + }, + { + "epoch": 1.1464740595985727, + "grad_norm": 2.8459205627441406, + "learning_rate": 8.764753016781728e-05, + "loss": 4.8455, + "step": 266350 + }, + { + "epoch": 1.1466892790578553, + "grad_norm": 0.6529578566551208, + "learning_rate": 8.764307652067111e-05, + "loss": 4.7035, + "step": 266400 + }, + { + "epoch": 1.146904498517138, + "grad_norm": 2.3476943969726562, + "learning_rate": 8.763862218399046e-05, + "loss": 4.5733, + "step": 266450 + }, + { + "epoch": 1.1471197179764205, + "grad_norm": 1.583305835723877, + "learning_rate": 8.76341671578569e-05, + "loss": 4.5332, + "step": 266500 + }, + { + "epoch": 1.1473349374357031, + "grad_norm": 1.939075231552124, + "learning_rate": 8.762971144235208e-05, + "loss": 4.9356, + "step": 266550 + }, + { + "epoch": 1.1475501568949857, + "grad_norm": 1.0024229288101196, + "learning_rate": 8.762525503755759e-05, + "loss": 4.8011, + "step": 266600 + }, + { + "epoch": 1.1477653763542683, + "grad_norm": 2.5632684230804443, + "learning_rate": 8.762079794355507e-05, + "loss": 4.9553, + "step": 266650 + }, + { + "epoch": 1.1479805958135512, + "grad_norm": 3.1449949741363525, + "learning_rate": 8.761634016042615e-05, + "loss": 4.8062, + "step": 266700 + }, + { + "epoch": 1.1481958152728338, + "grad_norm": 4.277276039123535, + "learning_rate": 8.761188168825249e-05, + "loss": 4.2072, + "step": 266750 + }, + { + "epoch": 1.1484110347321164, + "grad_norm": 3.3056273460388184, + "learning_rate": 8.760742252711578e-05, + "loss": 4.9255, + "step": 266800 + }, + { + "epoch": 1.148626254191399, + "grad_norm": 2.7595107555389404, + "learning_rate": 8.760296267709768e-05, + "loss": 4.1815, + "step": 266850 + }, + { + "epoch": 1.1488414736506816, + "grad_norm": 2.339628219604492, + "learning_rate": 8.759850213827989e-05, + "loss": 4.6806, + "step": 266900 + }, + { + "epoch": 1.1490566931099642, + "grad_norm": 0.8928970098495483, + "learning_rate": 8.759404091074413e-05, + "loss": 4.5164, + "step": 266950 + }, + { + "epoch": 1.1492719125692468, + "grad_norm": 0.6787532567977905, + "learning_rate": 8.758957899457209e-05, + "loss": 4.8429, + "step": 267000 + }, + { + "epoch": 1.1492719125692468, + "eval_loss": 5.118129730224609, + "eval_runtime": 35.0761, + "eval_samples_per_second": 18.246, + "eval_steps_per_second": 9.123, + "eval_tts_loss": 6.826679772589828, + "step": 267000 + }, + { + "epoch": 1.1494871320285296, + "grad_norm": 1.4996159076690674, + "learning_rate": 8.758511638984552e-05, + "loss": 4.4613, + "step": 267050 + }, + { + "epoch": 1.1497023514878122, + "grad_norm": 0.9630982279777527, + "learning_rate": 8.758065309664617e-05, + "loss": 4.828, + "step": 267100 + }, + { + "epoch": 1.1499175709470948, + "grad_norm": 1.3968253135681152, + "learning_rate": 8.757618911505579e-05, + "loss": 4.5842, + "step": 267150 + }, + { + "epoch": 1.1501327904063774, + "grad_norm": 4.459580421447754, + "learning_rate": 8.757172444515616e-05, + "loss": 4.4327, + "step": 267200 + }, + { + "epoch": 1.15034800986566, + "grad_norm": 2.2411372661590576, + "learning_rate": 8.756725908702904e-05, + "loss": 4.6882, + "step": 267250 + }, + { + "epoch": 1.1505632293249426, + "grad_norm": 1.0798133611679077, + "learning_rate": 8.756279304075623e-05, + "loss": 4.5828, + "step": 267300 + }, + { + "epoch": 1.1507784487842252, + "grad_norm": 1.9791172742843628, + "learning_rate": 8.755832630641955e-05, + "loss": 4.4462, + "step": 267350 + }, + { + "epoch": 1.1509936682435078, + "grad_norm": 2.9723098278045654, + "learning_rate": 8.755385888410084e-05, + "loss": 5.1444, + "step": 267400 + }, + { + "epoch": 1.1512088877027906, + "grad_norm": 3.26579213142395, + "learning_rate": 8.754939077388189e-05, + "loss": 4.19, + "step": 267450 + }, + { + "epoch": 1.1514241071620732, + "grad_norm": 3.5275917053222656, + "learning_rate": 8.754492197584456e-05, + "loss": 4.3824, + "step": 267500 + }, + { + "epoch": 1.1516393266213558, + "grad_norm": 0.612269937992096, + "learning_rate": 8.754045249007073e-05, + "loss": 4.3039, + "step": 267550 + }, + { + "epoch": 1.1518545460806384, + "grad_norm": 1.9021192789077759, + "learning_rate": 8.753598231664223e-05, + "loss": 4.9288, + "step": 267600 + }, + { + "epoch": 1.152069765539921, + "grad_norm": 2.7725799083709717, + "learning_rate": 8.753151145564098e-05, + "loss": 4.364, + "step": 267650 + }, + { + "epoch": 1.1522849849992036, + "grad_norm": 3.0636372566223145, + "learning_rate": 8.752703990714885e-05, + "loss": 4.3964, + "step": 267700 + }, + { + "epoch": 1.1525002044584862, + "grad_norm": 1.9724239110946655, + "learning_rate": 8.752256767124778e-05, + "loss": 4.4891, + "step": 267750 + }, + { + "epoch": 1.152715423917769, + "grad_norm": 2.7772905826568604, + "learning_rate": 8.751809474801966e-05, + "loss": 4.4119, + "step": 267800 + }, + { + "epoch": 1.1529306433770516, + "grad_norm": 4.229108810424805, + "learning_rate": 8.751362113754644e-05, + "loss": 4.7967, + "step": 267850 + }, + { + "epoch": 1.1531458628363342, + "grad_norm": 1.1397321224212646, + "learning_rate": 8.750914683991006e-05, + "loss": 4.8356, + "step": 267900 + }, + { + "epoch": 1.1533610822956168, + "grad_norm": 1.7873995304107666, + "learning_rate": 8.750467185519249e-05, + "loss": 4.8262, + "step": 267950 + }, + { + "epoch": 1.1535763017548994, + "grad_norm": 2.4768009185791016, + "learning_rate": 8.750019618347569e-05, + "loss": 4.5953, + "step": 268000 + }, + { + "epoch": 1.153791521214182, + "grad_norm": 3.6306238174438477, + "learning_rate": 8.749571982484162e-05, + "loss": 4.178, + "step": 268050 + }, + { + "epoch": 1.1540067406734646, + "grad_norm": 3.0879101753234863, + "learning_rate": 8.749124277937232e-05, + "loss": 4.4865, + "step": 268100 + }, + { + "epoch": 1.1542219601327472, + "grad_norm": 3.8699965476989746, + "learning_rate": 8.74867650471498e-05, + "loss": 4.2386, + "step": 268150 + }, + { + "epoch": 1.15443717959203, + "grad_norm": 3.5784730911254883, + "learning_rate": 8.748228662825604e-05, + "loss": 4.8882, + "step": 268200 + }, + { + "epoch": 1.1546523990513127, + "grad_norm": 2.516282558441162, + "learning_rate": 8.74778075227731e-05, + "loss": 4.6489, + "step": 268250 + }, + { + "epoch": 1.1548676185105953, + "grad_norm": 2.3930366039276123, + "learning_rate": 8.747332773078304e-05, + "loss": 4.6058, + "step": 268300 + }, + { + "epoch": 1.1550828379698779, + "grad_norm": 3.085675001144409, + "learning_rate": 8.74688472523679e-05, + "loss": 4.6282, + "step": 268350 + }, + { + "epoch": 1.1552980574291605, + "grad_norm": 3.1805169582366943, + "learning_rate": 8.746436608760975e-05, + "loss": 4.9577, + "step": 268400 + }, + { + "epoch": 1.155513276888443, + "grad_norm": 4.426196575164795, + "learning_rate": 8.745988423659068e-05, + "loss": 4.0744, + "step": 268450 + }, + { + "epoch": 1.155728496347726, + "grad_norm": 2.712843179702759, + "learning_rate": 8.745540169939279e-05, + "loss": 4.4147, + "step": 268500 + }, + { + "epoch": 1.1559437158070085, + "grad_norm": 3.741818904876709, + "learning_rate": 8.745091847609818e-05, + "loss": 4.9244, + "step": 268550 + }, + { + "epoch": 1.156158935266291, + "grad_norm": 2.635009527206421, + "learning_rate": 8.744643456678901e-05, + "loss": 4.7967, + "step": 268600 + }, + { + "epoch": 1.1563741547255737, + "grad_norm": 3.9593002796173096, + "learning_rate": 8.744194997154735e-05, + "loss": 4.6831, + "step": 268650 + }, + { + "epoch": 1.1565893741848563, + "grad_norm": 1.870638132095337, + "learning_rate": 8.743746469045541e-05, + "loss": 4.7795, + "step": 268700 + }, + { + "epoch": 1.156804593644139, + "grad_norm": 3.0880372524261475, + "learning_rate": 8.74329787235953e-05, + "loss": 4.8726, + "step": 268750 + }, + { + "epoch": 1.1570198131034215, + "grad_norm": 3.3037750720977783, + "learning_rate": 8.742849207104924e-05, + "loss": 4.872, + "step": 268800 + }, + { + "epoch": 1.157235032562704, + "grad_norm": 2.4800775051116943, + "learning_rate": 8.742400473289937e-05, + "loss": 4.6026, + "step": 268850 + }, + { + "epoch": 1.1574502520219867, + "grad_norm": 2.461806058883667, + "learning_rate": 8.74195167092279e-05, + "loss": 4.5344, + "step": 268900 + }, + { + "epoch": 1.1576654714812695, + "grad_norm": 2.343578577041626, + "learning_rate": 8.741502800011707e-05, + "loss": 4.757, + "step": 268950 + }, + { + "epoch": 1.1578806909405521, + "grad_norm": 2.937713623046875, + "learning_rate": 8.741053860564907e-05, + "loss": 4.8557, + "step": 269000 + }, + { + "epoch": 1.1580959103998347, + "grad_norm": 4.643317699432373, + "learning_rate": 8.740604852590615e-05, + "loss": 4.8472, + "step": 269050 + }, + { + "epoch": 1.1583111298591173, + "grad_norm": 3.1256885528564453, + "learning_rate": 8.740155776097056e-05, + "loss": 4.705, + "step": 269100 + }, + { + "epoch": 1.1585263493184, + "grad_norm": 2.3200793266296387, + "learning_rate": 8.739706631092454e-05, + "loss": 4.3287, + "step": 269150 + }, + { + "epoch": 1.1587415687776825, + "grad_norm": 1.4657925367355347, + "learning_rate": 8.739257417585039e-05, + "loss": 4.5551, + "step": 269200 + }, + { + "epoch": 1.1589567882369654, + "grad_norm": 2.6929242610931396, + "learning_rate": 8.738808135583038e-05, + "loss": 4.1838, + "step": 269250 + }, + { + "epoch": 1.159172007696248, + "grad_norm": 0.26729530096054077, + "learning_rate": 8.738358785094681e-05, + "loss": 4.4712, + "step": 269300 + }, + { + "epoch": 1.1593872271555306, + "grad_norm": 2.9928290843963623, + "learning_rate": 8.7379093661282e-05, + "loss": 5.1178, + "step": 269350 + }, + { + "epoch": 1.1596024466148132, + "grad_norm": 1.0082478523254395, + "learning_rate": 8.737459878691825e-05, + "loss": 5.0195, + "step": 269400 + }, + { + "epoch": 1.1598176660740958, + "grad_norm": 0.9401183724403381, + "learning_rate": 8.737010322793793e-05, + "loss": 4.5148, + "step": 269450 + }, + { + "epoch": 1.1600328855333784, + "grad_norm": 3.4962618350982666, + "learning_rate": 8.736560698442335e-05, + "loss": 4.6318, + "step": 269500 + }, + { + "epoch": 1.160248104992661, + "grad_norm": 2.17209792137146, + "learning_rate": 8.73611100564569e-05, + "loss": 4.5514, + "step": 269550 + }, + { + "epoch": 1.1604633244519436, + "grad_norm": 1.7569776773452759, + "learning_rate": 8.735661244412094e-05, + "loss": 4.3615, + "step": 269600 + }, + { + "epoch": 1.1606785439112262, + "grad_norm": 4.519834518432617, + "learning_rate": 8.735211414749788e-05, + "loss": 4.5578, + "step": 269650 + }, + { + "epoch": 1.160893763370509, + "grad_norm": 2.2411961555480957, + "learning_rate": 8.734761516667007e-05, + "loss": 5.0356, + "step": 269700 + }, + { + "epoch": 1.1611089828297916, + "grad_norm": 2.8056998252868652, + "learning_rate": 8.734311550171996e-05, + "loss": 4.6097, + "step": 269750 + }, + { + "epoch": 1.1613242022890742, + "grad_norm": 1.7933101654052734, + "learning_rate": 8.733861515272997e-05, + "loss": 5.0482, + "step": 269800 + }, + { + "epoch": 1.1615394217483568, + "grad_norm": 3.178690195083618, + "learning_rate": 8.733411411978251e-05, + "loss": 4.7911, + "step": 269850 + }, + { + "epoch": 1.1617546412076394, + "grad_norm": 0.587513267993927, + "learning_rate": 8.732961240296007e-05, + "loss": 4.2967, + "step": 269900 + }, + { + "epoch": 1.161969860666922, + "grad_norm": 2.609285831451416, + "learning_rate": 8.732511000234508e-05, + "loss": 4.5709, + "step": 269950 + }, + { + "epoch": 1.1621850801262048, + "grad_norm": 2.235718011856079, + "learning_rate": 8.732060691802002e-05, + "loss": 4.9855, + "step": 270000 + }, + { + "epoch": 1.1621850801262048, + "eval_loss": 5.1059112548828125, + "eval_runtime": 35.0588, + "eval_samples_per_second": 18.255, + "eval_steps_per_second": 9.128, + "eval_tts_loss": 6.891413176306261, + "step": 270000 + }, + { + "epoch": 1.1624002995854874, + "grad_norm": 1.9140344858169556, + "learning_rate": 8.731610315006737e-05, + "loss": 4.5382, + "step": 270050 + }, + { + "epoch": 1.16261551904477, + "grad_norm": 0.795612633228302, + "learning_rate": 8.731159869856965e-05, + "loss": 4.2508, + "step": 270100 + }, + { + "epoch": 1.1628307385040526, + "grad_norm": 2.147523880004883, + "learning_rate": 8.730709356360936e-05, + "loss": 4.5704, + "step": 270150 + }, + { + "epoch": 1.1630459579633352, + "grad_norm": 2.5267486572265625, + "learning_rate": 8.730258774526901e-05, + "loss": 4.8181, + "step": 270200 + }, + { + "epoch": 1.1632611774226178, + "grad_norm": 3.0745556354522705, + "learning_rate": 8.729808124363114e-05, + "loss": 4.9423, + "step": 270250 + }, + { + "epoch": 1.1634763968819004, + "grad_norm": 4.444030284881592, + "learning_rate": 8.729357405877832e-05, + "loss": 4.1572, + "step": 270300 + }, + { + "epoch": 1.163691616341183, + "grad_norm": 3.949618339538574, + "learning_rate": 8.728906619079309e-05, + "loss": 4.542, + "step": 270350 + }, + { + "epoch": 1.1639068358004656, + "grad_norm": 2.1672751903533936, + "learning_rate": 8.728455763975805e-05, + "loss": 4.7124, + "step": 270400 + }, + { + "epoch": 1.1641220552597484, + "grad_norm": 3.637023687362671, + "learning_rate": 8.728004840575574e-05, + "loss": 4.3902, + "step": 270450 + }, + { + "epoch": 1.164337274719031, + "grad_norm": 1.239849328994751, + "learning_rate": 8.72755384888688e-05, + "loss": 4.4272, + "step": 270500 + }, + { + "epoch": 1.1645524941783136, + "grad_norm": 1.9949184656143188, + "learning_rate": 8.727102788917983e-05, + "loss": 4.702, + "step": 270550 + }, + { + "epoch": 1.1647677136375962, + "grad_norm": 3.734497308731079, + "learning_rate": 8.726651660677145e-05, + "loss": 4.2562, + "step": 270600 + }, + { + "epoch": 1.1649829330968788, + "grad_norm": 1.6296319961547852, + "learning_rate": 8.726200464172629e-05, + "loss": 4.3587, + "step": 270650 + }, + { + "epoch": 1.1651981525561614, + "grad_norm": 4.246856212615967, + "learning_rate": 8.725749199412703e-05, + "loss": 5.1436, + "step": 270700 + }, + { + "epoch": 1.1654133720154443, + "grad_norm": 2.173696756362915, + "learning_rate": 8.725297866405629e-05, + "loss": 4.2817, + "step": 270750 + }, + { + "epoch": 1.1656285914747269, + "grad_norm": 3.2415475845336914, + "learning_rate": 8.724846465159676e-05, + "loss": 4.4813, + "step": 270800 + }, + { + "epoch": 1.1658438109340095, + "grad_norm": 2.4371449947357178, + "learning_rate": 8.724394995683114e-05, + "loss": 4.7783, + "step": 270850 + }, + { + "epoch": 1.166059030393292, + "grad_norm": 3.155541181564331, + "learning_rate": 8.723943457984209e-05, + "loss": 4.9215, + "step": 270900 + }, + { + "epoch": 1.1662742498525747, + "grad_norm": 1.8362107276916504, + "learning_rate": 8.723491852071237e-05, + "loss": 4.3411, + "step": 270950 + }, + { + "epoch": 1.1664894693118573, + "grad_norm": 2.5873539447784424, + "learning_rate": 8.723040177952468e-05, + "loss": 4.7514, + "step": 271000 + }, + { + "epoch": 1.1667046887711399, + "grad_norm": 3.440190076828003, + "learning_rate": 8.722588435636176e-05, + "loss": 4.587, + "step": 271050 + }, + { + "epoch": 1.1669199082304225, + "grad_norm": 2.8789477348327637, + "learning_rate": 8.722136625130637e-05, + "loss": 4.3148, + "step": 271100 + }, + { + "epoch": 1.1671351276897053, + "grad_norm": 2.236161708831787, + "learning_rate": 8.721684746444123e-05, + "loss": 4.8545, + "step": 271150 + }, + { + "epoch": 1.1673503471489879, + "grad_norm": 1.5855308771133423, + "learning_rate": 8.721232799584915e-05, + "loss": 4.8984, + "step": 271200 + }, + { + "epoch": 1.1675655666082705, + "grad_norm": 2.2128376960754395, + "learning_rate": 8.720780784561294e-05, + "loss": 4.5875, + "step": 271250 + }, + { + "epoch": 1.167780786067553, + "grad_norm": 2.615851402282715, + "learning_rate": 8.720328701381533e-05, + "loss": 4.5798, + "step": 271300 + }, + { + "epoch": 1.1679960055268357, + "grad_norm": 2.790799379348755, + "learning_rate": 8.719876550053918e-05, + "loss": 4.8506, + "step": 271350 + }, + { + "epoch": 1.1682112249861183, + "grad_norm": 0.8049152493476868, + "learning_rate": 8.71942433058673e-05, + "loss": 4.2379, + "step": 271400 + }, + { + "epoch": 1.168426444445401, + "grad_norm": 2.0580251216888428, + "learning_rate": 8.718972042988252e-05, + "loss": 4.6885, + "step": 271450 + }, + { + "epoch": 1.1686416639046837, + "grad_norm": 3.6976072788238525, + "learning_rate": 8.71851968726677e-05, + "loss": 4.7557, + "step": 271500 + }, + { + "epoch": 1.1688568833639663, + "grad_norm": 1.1028026342391968, + "learning_rate": 8.718067263430572e-05, + "loss": 4.6906, + "step": 271550 + }, + { + "epoch": 1.169072102823249, + "grad_norm": 3.1057066917419434, + "learning_rate": 8.717614771487942e-05, + "loss": 5.358, + "step": 271600 + }, + { + "epoch": 1.1692873222825315, + "grad_norm": 1.8344004154205322, + "learning_rate": 8.71716221144717e-05, + "loss": 4.1019, + "step": 271650 + }, + { + "epoch": 1.1695025417418141, + "grad_norm": 2.809711456298828, + "learning_rate": 8.716709583316545e-05, + "loss": 4.4089, + "step": 271700 + }, + { + "epoch": 1.1697177612010967, + "grad_norm": 2.686034679412842, + "learning_rate": 8.716256887104358e-05, + "loss": 4.8437, + "step": 271750 + }, + { + "epoch": 1.1699329806603793, + "grad_norm": 3.668545961380005, + "learning_rate": 8.715804122818903e-05, + "loss": 5.0377, + "step": 271800 + }, + { + "epoch": 1.170148200119662, + "grad_norm": 2.117386817932129, + "learning_rate": 8.715351290468473e-05, + "loss": 4.2777, + "step": 271850 + }, + { + "epoch": 1.1703634195789447, + "grad_norm": 2.7868244647979736, + "learning_rate": 8.714898390061361e-05, + "loss": 4.7034, + "step": 271900 + }, + { + "epoch": 1.1705786390382273, + "grad_norm": 3.384958267211914, + "learning_rate": 8.714445421605868e-05, + "loss": 4.3569, + "step": 271950 + }, + { + "epoch": 1.17079385849751, + "grad_norm": 0.6990169882774353, + "learning_rate": 8.713992385110286e-05, + "loss": 4.5369, + "step": 272000 + }, + { + "epoch": 1.1710090779567925, + "grad_norm": 3.7861979007720947, + "learning_rate": 8.713539280582914e-05, + "loss": 4.8792, + "step": 272050 + }, + { + "epoch": 1.1712242974160751, + "grad_norm": 2.603386878967285, + "learning_rate": 8.713086108032056e-05, + "loss": 5.0034, + "step": 272100 + }, + { + "epoch": 1.1714395168753577, + "grad_norm": 2.6668336391448975, + "learning_rate": 8.712632867466009e-05, + "loss": 4.5779, + "step": 272150 + }, + { + "epoch": 1.1716547363346403, + "grad_norm": 3.342957019805908, + "learning_rate": 8.712179558893076e-05, + "loss": 4.5908, + "step": 272200 + }, + { + "epoch": 1.1718699557939232, + "grad_norm": 3.289246082305908, + "learning_rate": 8.711726182321563e-05, + "loss": 4.7848, + "step": 272250 + }, + { + "epoch": 1.1720851752532058, + "grad_norm": 2.3862147331237793, + "learning_rate": 8.711272737759773e-05, + "loss": 5.0196, + "step": 272300 + }, + { + "epoch": 1.1723003947124884, + "grad_norm": 5.360830783843994, + "learning_rate": 8.710819225216011e-05, + "loss": 4.6652, + "step": 272350 + }, + { + "epoch": 1.172515614171771, + "grad_norm": 2.263434410095215, + "learning_rate": 8.710365644698588e-05, + "loss": 4.6686, + "step": 272400 + }, + { + "epoch": 1.1727308336310536, + "grad_norm": 2.4036688804626465, + "learning_rate": 8.709911996215809e-05, + "loss": 4.2422, + "step": 272450 + }, + { + "epoch": 1.1729460530903362, + "grad_norm": 6.997186183929443, + "learning_rate": 8.709458279775984e-05, + "loss": 4.4959, + "step": 272500 + }, + { + "epoch": 1.1731612725496188, + "grad_norm": 1.5015747547149658, + "learning_rate": 8.709004495387425e-05, + "loss": 4.5013, + "step": 272550 + }, + { + "epoch": 1.1733764920089014, + "grad_norm": 3.072883367538452, + "learning_rate": 8.708550643058444e-05, + "loss": 4.6531, + "step": 272600 + }, + { + "epoch": 1.1735917114681842, + "grad_norm": 5.236781597137451, + "learning_rate": 8.708096722797356e-05, + "loss": 4.8153, + "step": 272650 + }, + { + "epoch": 1.1738069309274668, + "grad_norm": 4.486013889312744, + "learning_rate": 8.707642734612473e-05, + "loss": 4.5803, + "step": 272700 + }, + { + "epoch": 1.1740221503867494, + "grad_norm": 3.4735183715820312, + "learning_rate": 8.707188678512114e-05, + "loss": 4.7209, + "step": 272750 + }, + { + "epoch": 1.174237369846032, + "grad_norm": 5.226452827453613, + "learning_rate": 8.706734554504594e-05, + "loss": 4.4546, + "step": 272800 + }, + { + "epoch": 1.1744525893053146, + "grad_norm": 3.202437162399292, + "learning_rate": 8.706280362598234e-05, + "loss": 4.8168, + "step": 272850 + }, + { + "epoch": 1.1746678087645972, + "grad_norm": 1.6257749795913696, + "learning_rate": 8.70582610280135e-05, + "loss": 4.4392, + "step": 272900 + }, + { + "epoch": 1.1748830282238798, + "grad_norm": 3.7883970737457275, + "learning_rate": 8.705371775122265e-05, + "loss": 4.6168, + "step": 272950 + }, + { + "epoch": 1.1750982476831626, + "grad_norm": 3.0539610385894775, + "learning_rate": 8.704917379569303e-05, + "loss": 4.5129, + "step": 273000 + }, + { + "epoch": 1.1750982476831626, + "eval_loss": 5.110400676727295, + "eval_runtime": 35.005, + "eval_samples_per_second": 18.283, + "eval_steps_per_second": 9.142, + "eval_tts_loss": 6.902343588736428, + "step": 273000 + }, + { + "epoch": 1.1753134671424452, + "grad_norm": 2.3866629600524902, + "learning_rate": 8.704462916150783e-05, + "loss": 4.8412, + "step": 273050 + }, + { + "epoch": 1.1755286866017278, + "grad_norm": 2.6505141258239746, + "learning_rate": 8.704008384875035e-05, + "loss": 4.4844, + "step": 273100 + }, + { + "epoch": 1.1757439060610104, + "grad_norm": 2.806366443634033, + "learning_rate": 8.703553785750382e-05, + "loss": 4.8036, + "step": 273150 + }, + { + "epoch": 1.175959125520293, + "grad_norm": 1.3214281797409058, + "learning_rate": 8.703099118785152e-05, + "loss": 4.4609, + "step": 273200 + }, + { + "epoch": 1.1761743449795756, + "grad_norm": 2.487196683883667, + "learning_rate": 8.702644383987672e-05, + "loss": 4.9825, + "step": 273250 + }, + { + "epoch": 1.1763895644388582, + "grad_norm": 2.8325400352478027, + "learning_rate": 8.702189581366273e-05, + "loss": 4.9495, + "step": 273300 + }, + { + "epoch": 1.1766047838981408, + "grad_norm": 2.069601058959961, + "learning_rate": 8.701734710929287e-05, + "loss": 4.741, + "step": 273350 + }, + { + "epoch": 1.1768200033574236, + "grad_norm": 5.320068359375, + "learning_rate": 8.701279772685044e-05, + "loss": 4.9759, + "step": 273400 + }, + { + "epoch": 1.1770352228167063, + "grad_norm": 2.9068870544433594, + "learning_rate": 8.700824766641878e-05, + "loss": 4.4174, + "step": 273450 + }, + { + "epoch": 1.1772504422759889, + "grad_norm": 0.7819262146949768, + "learning_rate": 8.700369692808125e-05, + "loss": 4.7116, + "step": 273500 + }, + { + "epoch": 1.1774656617352715, + "grad_norm": 0.3748607039451599, + "learning_rate": 8.699914551192118e-05, + "loss": 4.6442, + "step": 273550 + }, + { + "epoch": 1.177680881194554, + "grad_norm": 4.315000534057617, + "learning_rate": 8.699459341802198e-05, + "loss": 4.7727, + "step": 273600 + }, + { + "epoch": 1.1778961006538367, + "grad_norm": 1.9122663736343384, + "learning_rate": 8.6990040646467e-05, + "loss": 4.5856, + "step": 273650 + }, + { + "epoch": 1.1781113201131193, + "grad_norm": 2.11395263671875, + "learning_rate": 8.698548719733968e-05, + "loss": 4.8763, + "step": 273700 + }, + { + "epoch": 1.178326539572402, + "grad_norm": 3.017780303955078, + "learning_rate": 8.698093307072337e-05, + "loss": 4.5667, + "step": 273750 + }, + { + "epoch": 1.1785417590316847, + "grad_norm": 2.0563735961914062, + "learning_rate": 8.697637826670153e-05, + "loss": 4.4554, + "step": 273800 + }, + { + "epoch": 1.1787569784909673, + "grad_norm": 4.281335830688477, + "learning_rate": 8.697182278535758e-05, + "loss": 4.5583, + "step": 273850 + }, + { + "epoch": 1.1789721979502499, + "grad_norm": 2.302370071411133, + "learning_rate": 8.696726662677496e-05, + "loss": 4.435, + "step": 273900 + }, + { + "epoch": 1.1791874174095325, + "grad_norm": 0.9956914186477661, + "learning_rate": 8.696270979103717e-05, + "loss": 4.727, + "step": 273950 + }, + { + "epoch": 1.179402636868815, + "grad_norm": 2.230214834213257, + "learning_rate": 8.695815227822763e-05, + "loss": 4.7712, + "step": 274000 + }, + { + "epoch": 1.1796178563280977, + "grad_norm": 0.47511523962020874, + "learning_rate": 8.695359408842985e-05, + "loss": 4.3344, + "step": 274050 + }, + { + "epoch": 1.1798330757873803, + "grad_norm": 3.288952589035034, + "learning_rate": 8.69490352217273e-05, + "loss": 4.3118, + "step": 274100 + }, + { + "epoch": 1.180048295246663, + "grad_norm": 1.7553125619888306, + "learning_rate": 8.694447567820352e-05, + "loss": 4.9172, + "step": 274150 + }, + { + "epoch": 1.1802635147059457, + "grad_norm": 2.7068378925323486, + "learning_rate": 8.693991545794203e-05, + "loss": 4.4602, + "step": 274200 + }, + { + "epoch": 1.1804787341652283, + "grad_norm": 2.3545682430267334, + "learning_rate": 8.693535456102631e-05, + "loss": 4.7342, + "step": 274250 + }, + { + "epoch": 1.180693953624511, + "grad_norm": 2.4210848808288574, + "learning_rate": 8.693079298753997e-05, + "loss": 4.5605, + "step": 274300 + }, + { + "epoch": 1.1809091730837935, + "grad_norm": 2.8150668144226074, + "learning_rate": 8.692623073756653e-05, + "loss": 4.4536, + "step": 274350 + }, + { + "epoch": 1.181124392543076, + "grad_norm": 3.200786828994751, + "learning_rate": 8.69216678111896e-05, + "loss": 4.1776, + "step": 274400 + }, + { + "epoch": 1.181339612002359, + "grad_norm": 2.620479106903076, + "learning_rate": 8.69171042084927e-05, + "loss": 4.7258, + "step": 274450 + }, + { + "epoch": 1.1815548314616415, + "grad_norm": 2.277559518814087, + "learning_rate": 8.691253992955947e-05, + "loss": 4.767, + "step": 274500 + }, + { + "epoch": 1.1817700509209241, + "grad_norm": 3.2317512035369873, + "learning_rate": 8.690797497447351e-05, + "loss": 4.6764, + "step": 274550 + }, + { + "epoch": 1.1819852703802067, + "grad_norm": 4.1900529861450195, + "learning_rate": 8.690340934331844e-05, + "loss": 4.6296, + "step": 274600 + }, + { + "epoch": 1.1822004898394893, + "grad_norm": 0.8448026776313782, + "learning_rate": 8.689884303617787e-05, + "loss": 5.2334, + "step": 274650 + }, + { + "epoch": 1.182415709298772, + "grad_norm": 3.6317152976989746, + "learning_rate": 8.689427605313547e-05, + "loss": 4.1198, + "step": 274700 + }, + { + "epoch": 1.1826309287580545, + "grad_norm": 1.346353530883789, + "learning_rate": 8.68897083942749e-05, + "loss": 4.4738, + "step": 274750 + }, + { + "epoch": 1.1828461482173371, + "grad_norm": 3.0171918869018555, + "learning_rate": 8.68851400596798e-05, + "loss": 4.5339, + "step": 274800 + }, + { + "epoch": 1.1830613676766197, + "grad_norm": 2.6002345085144043, + "learning_rate": 8.688057104943388e-05, + "loss": 4.6417, + "step": 274850 + }, + { + "epoch": 1.1832765871359026, + "grad_norm": 0.7055235505104065, + "learning_rate": 8.68760013636208e-05, + "loss": 4.4765, + "step": 274900 + }, + { + "epoch": 1.1834918065951852, + "grad_norm": 2.4482643604278564, + "learning_rate": 8.687143100232431e-05, + "loss": 4.6905, + "step": 274950 + }, + { + "epoch": 1.1837070260544678, + "grad_norm": 2.5380678176879883, + "learning_rate": 8.686685996562809e-05, + "loss": 4.3544, + "step": 275000 + }, + { + "epoch": 1.1839222455137504, + "grad_norm": 2.8907980918884277, + "learning_rate": 8.68622882536159e-05, + "loss": 4.6863, + "step": 275050 + }, + { + "epoch": 1.184137464973033, + "grad_norm": 3.0434844493865967, + "learning_rate": 8.685771586637147e-05, + "loss": 4.7777, + "step": 275100 + }, + { + "epoch": 1.1843526844323156, + "grad_norm": 1.8561114072799683, + "learning_rate": 8.685314280397855e-05, + "loss": 4.5874, + "step": 275150 + }, + { + "epoch": 1.1845679038915984, + "grad_norm": 1.3368302583694458, + "learning_rate": 8.684856906652092e-05, + "loss": 4.7576, + "step": 275200 + }, + { + "epoch": 1.184783123350881, + "grad_norm": 2.674131393432617, + "learning_rate": 8.684399465408235e-05, + "loss": 4.5733, + "step": 275250 + }, + { + "epoch": 1.1849983428101636, + "grad_norm": 2.837002754211426, + "learning_rate": 8.683941956674663e-05, + "loss": 4.3016, + "step": 275300 + }, + { + "epoch": 1.1852135622694462, + "grad_norm": 3.120420455932617, + "learning_rate": 8.683484380459758e-05, + "loss": 4.4, + "step": 275350 + }, + { + "epoch": 1.1854287817287288, + "grad_norm": 2.3586535453796387, + "learning_rate": 8.683026736771902e-05, + "loss": 4.9838, + "step": 275400 + }, + { + "epoch": 1.1856440011880114, + "grad_norm": 4.121358871459961, + "learning_rate": 8.682569025619475e-05, + "loss": 4.3471, + "step": 275450 + }, + { + "epoch": 1.185859220647294, + "grad_norm": 2.034788131713867, + "learning_rate": 8.682111247010864e-05, + "loss": 4.6395, + "step": 275500 + }, + { + "epoch": 1.1860744401065766, + "grad_norm": 2.6373491287231445, + "learning_rate": 8.681653400954455e-05, + "loss": 4.6196, + "step": 275550 + }, + { + "epoch": 1.1862896595658592, + "grad_norm": 2.522780656814575, + "learning_rate": 8.681195487458631e-05, + "loss": 4.8377, + "step": 275600 + }, + { + "epoch": 1.186504879025142, + "grad_norm": 1.5979362726211548, + "learning_rate": 8.680737506531784e-05, + "loss": 4.4958, + "step": 275650 + }, + { + "epoch": 1.1867200984844246, + "grad_norm": 1.9355531930923462, + "learning_rate": 8.680279458182301e-05, + "loss": 4.5002, + "step": 275700 + }, + { + "epoch": 1.1869353179437072, + "grad_norm": 3.1601076126098633, + "learning_rate": 8.679821342418573e-05, + "loss": 4.8629, + "step": 275750 + }, + { + "epoch": 1.1871505374029898, + "grad_norm": 2.1948161125183105, + "learning_rate": 8.67936315924899e-05, + "loss": 4.6602, + "step": 275800 + }, + { + "epoch": 1.1873657568622724, + "grad_norm": 2.3339285850524902, + "learning_rate": 8.678904908681946e-05, + "loss": 4.6137, + "step": 275850 + }, + { + "epoch": 1.187580976321555, + "grad_norm": 0.913962721824646, + "learning_rate": 8.678446590725837e-05, + "loss": 4.1978, + "step": 275900 + }, + { + "epoch": 1.1877961957808378, + "grad_norm": 1.6464896202087402, + "learning_rate": 8.677988205389056e-05, + "loss": 4.8279, + "step": 275950 + }, + { + "epoch": 1.1880114152401204, + "grad_norm": 2.6033053398132324, + "learning_rate": 8.67752975268e-05, + "loss": 4.4819, + "step": 276000 + }, + { + "epoch": 1.1880114152401204, + "eval_loss": 5.108389854431152, + "eval_runtime": 35.006, + "eval_samples_per_second": 18.283, + "eval_steps_per_second": 9.141, + "eval_tts_loss": 6.8743461235188255, + "step": 276000 + }, + { + "epoch": 1.188226634699403, + "grad_norm": 3.566166877746582, + "learning_rate": 8.677071232607068e-05, + "loss": 4.7852, + "step": 276050 + }, + { + "epoch": 1.1884418541586856, + "grad_norm": 1.8914649486541748, + "learning_rate": 8.676612645178657e-05, + "loss": 4.3507, + "step": 276100 + }, + { + "epoch": 1.1886570736179682, + "grad_norm": 1.645012378692627, + "learning_rate": 8.676153990403168e-05, + "loss": 4.5873, + "step": 276150 + }, + { + "epoch": 1.1888722930772508, + "grad_norm": 2.6179585456848145, + "learning_rate": 8.675695268289004e-05, + "loss": 4.7738, + "step": 276200 + }, + { + "epoch": 1.1890875125365334, + "grad_norm": 2.4149117469787598, + "learning_rate": 8.675236478844566e-05, + "loss": 4.7085, + "step": 276250 + }, + { + "epoch": 1.189302731995816, + "grad_norm": 7.017476558685303, + "learning_rate": 8.674777622078258e-05, + "loss": 4.5779, + "step": 276300 + }, + { + "epoch": 1.1895179514550986, + "grad_norm": 3.2715415954589844, + "learning_rate": 8.674318697998487e-05, + "loss": 4.3008, + "step": 276350 + }, + { + "epoch": 1.1897331709143815, + "grad_norm": 1.917776346206665, + "learning_rate": 8.673859706613657e-05, + "loss": 4.7362, + "step": 276400 + }, + { + "epoch": 1.189948390373664, + "grad_norm": 2.14143705368042, + "learning_rate": 8.673400647932177e-05, + "loss": 4.4171, + "step": 276450 + }, + { + "epoch": 1.1901636098329467, + "grad_norm": 2.5359609127044678, + "learning_rate": 8.672941521962457e-05, + "loss": 4.5874, + "step": 276500 + }, + { + "epoch": 1.1903788292922293, + "grad_norm": 2.4003512859344482, + "learning_rate": 8.672482328712904e-05, + "loss": 4.7062, + "step": 276550 + }, + { + "epoch": 1.1905940487515119, + "grad_norm": 2.653812885284424, + "learning_rate": 8.672023068191932e-05, + "loss": 4.8639, + "step": 276600 + }, + { + "epoch": 1.1908092682107945, + "grad_norm": 2.02938175201416, + "learning_rate": 8.671563740407954e-05, + "loss": 4.6225, + "step": 276650 + }, + { + "epoch": 1.1910244876700773, + "grad_norm": 2.4735708236694336, + "learning_rate": 8.671104345369381e-05, + "loss": 4.5092, + "step": 276700 + }, + { + "epoch": 1.19123970712936, + "grad_norm": 3.5868327617645264, + "learning_rate": 8.67064488308463e-05, + "loss": 4.8336, + "step": 276750 + }, + { + "epoch": 1.1914549265886425, + "grad_norm": 1.178166389465332, + "learning_rate": 8.670185353562116e-05, + "loss": 4.8363, + "step": 276800 + }, + { + "epoch": 1.191670146047925, + "grad_norm": 2.132412910461426, + "learning_rate": 8.66972575681026e-05, + "loss": 4.4704, + "step": 276850 + }, + { + "epoch": 1.1918853655072077, + "grad_norm": 2.1177008152008057, + "learning_rate": 8.669266092837478e-05, + "loss": 4.3331, + "step": 276900 + }, + { + "epoch": 1.1921005849664903, + "grad_norm": 4.752696514129639, + "learning_rate": 8.668806361652189e-05, + "loss": 4.7062, + "step": 276950 + }, + { + "epoch": 1.192315804425773, + "grad_norm": 3.2128384113311768, + "learning_rate": 8.668346563262814e-05, + "loss": 4.7756, + "step": 277000 + }, + { + "epoch": 1.1925310238850555, + "grad_norm": 1.472652792930603, + "learning_rate": 8.66788669767778e-05, + "loss": 4.5902, + "step": 277050 + }, + { + "epoch": 1.192746243344338, + "grad_norm": 2.899350166320801, + "learning_rate": 8.667426764905506e-05, + "loss": 4.7053, + "step": 277100 + }, + { + "epoch": 1.192961462803621, + "grad_norm": 3.656015157699585, + "learning_rate": 8.666966764954418e-05, + "loss": 4.89, + "step": 277150 + }, + { + "epoch": 1.1931766822629035, + "grad_norm": 3.6398704051971436, + "learning_rate": 8.666506697832944e-05, + "loss": 4.608, + "step": 277200 + }, + { + "epoch": 1.1933919017221861, + "grad_norm": 2.492180109024048, + "learning_rate": 8.66604656354951e-05, + "loss": 4.756, + "step": 277250 + }, + { + "epoch": 1.1936071211814687, + "grad_norm": 1.098436713218689, + "learning_rate": 8.665586362112544e-05, + "loss": 4.4528, + "step": 277300 + }, + { + "epoch": 1.1938223406407513, + "grad_norm": 6.239434242248535, + "learning_rate": 8.665126093530476e-05, + "loss": 4.7141, + "step": 277350 + }, + { + "epoch": 1.194037560100034, + "grad_norm": 1.7322027683258057, + "learning_rate": 8.664665757811739e-05, + "loss": 4.7446, + "step": 277400 + }, + { + "epoch": 1.1942527795593167, + "grad_norm": 7.549764633178711, + "learning_rate": 8.664205354964761e-05, + "loss": 4.4338, + "step": 277450 + }, + { + "epoch": 1.1944679990185993, + "grad_norm": 2.618931770324707, + "learning_rate": 8.663744884997983e-05, + "loss": 4.7827, + "step": 277500 + }, + { + "epoch": 1.194683218477882, + "grad_norm": 1.3963185548782349, + "learning_rate": 8.66328434791983e-05, + "loss": 4.7828, + "step": 277550 + }, + { + "epoch": 1.1948984379371645, + "grad_norm": 1.1029396057128906, + "learning_rate": 8.662823743738744e-05, + "loss": 4.45, + "step": 277600 + }, + { + "epoch": 1.1951136573964471, + "grad_norm": 1.9556175470352173, + "learning_rate": 8.662363072463165e-05, + "loss": 4.6387, + "step": 277650 + }, + { + "epoch": 1.1953288768557297, + "grad_norm": 1.6617695093154907, + "learning_rate": 8.661902334101524e-05, + "loss": 4.6573, + "step": 277700 + }, + { + "epoch": 1.1955440963150124, + "grad_norm": 1.1490037441253662, + "learning_rate": 8.661441528662266e-05, + "loss": 4.7426, + "step": 277750 + }, + { + "epoch": 1.195759315774295, + "grad_norm": 3.560856580734253, + "learning_rate": 8.660980656153829e-05, + "loss": 4.8682, + "step": 277800 + }, + { + "epoch": 1.1959745352335778, + "grad_norm": 1.3317826986312866, + "learning_rate": 8.660519716584657e-05, + "loss": 4.2435, + "step": 277850 + }, + { + "epoch": 1.1961897546928604, + "grad_norm": 2.7246336936950684, + "learning_rate": 8.660058709963192e-05, + "loss": 4.9578, + "step": 277900 + }, + { + "epoch": 1.196404974152143, + "grad_norm": 0.7753138542175293, + "learning_rate": 8.659597636297881e-05, + "loss": 4.8096, + "step": 277950 + }, + { + "epoch": 1.1966201936114256, + "grad_norm": 1.721441626548767, + "learning_rate": 8.659136495597168e-05, + "loss": 4.9597, + "step": 278000 + }, + { + "epoch": 1.1968354130707082, + "grad_norm": 3.475825786590576, + "learning_rate": 8.658675287869499e-05, + "loss": 4.716, + "step": 278050 + }, + { + "epoch": 1.1970506325299908, + "grad_norm": 2.848411798477173, + "learning_rate": 8.658214013123324e-05, + "loss": 4.3741, + "step": 278100 + }, + { + "epoch": 1.1972658519892734, + "grad_norm": 3.398360013961792, + "learning_rate": 8.657752671367092e-05, + "loss": 4.7815, + "step": 278150 + }, + { + "epoch": 1.1974810714485562, + "grad_norm": 2.5683419704437256, + "learning_rate": 8.657291262609253e-05, + "loss": 4.945, + "step": 278200 + }, + { + "epoch": 1.1976962909078388, + "grad_norm": 2.067892551422119, + "learning_rate": 8.65682978685826e-05, + "loss": 4.5064, + "step": 278250 + }, + { + "epoch": 1.1979115103671214, + "grad_norm": 2.199223756790161, + "learning_rate": 8.656368244122566e-05, + "loss": 4.9139, + "step": 278300 + }, + { + "epoch": 1.198126729826404, + "grad_norm": 2.432041645050049, + "learning_rate": 8.655906634410624e-05, + "loss": 4.4013, + "step": 278350 + }, + { + "epoch": 1.1983419492856866, + "grad_norm": 1.96934974193573, + "learning_rate": 8.655444957730892e-05, + "loss": 5.1027, + "step": 278400 + }, + { + "epoch": 1.1985571687449692, + "grad_norm": 1.9995648860931396, + "learning_rate": 8.654983214091825e-05, + "loss": 4.7276, + "step": 278450 + }, + { + "epoch": 1.1987723882042518, + "grad_norm": 0.8822823166847229, + "learning_rate": 8.654521403501881e-05, + "loss": 4.6722, + "step": 278500 + }, + { + "epoch": 1.1989876076635344, + "grad_norm": 1.7434087991714478, + "learning_rate": 8.654059525969522e-05, + "loss": 5.1156, + "step": 278550 + }, + { + "epoch": 1.1992028271228172, + "grad_norm": 3.0599849224090576, + "learning_rate": 8.653597581503206e-05, + "loss": 4.431, + "step": 278600 + }, + { + "epoch": 1.1994180465820998, + "grad_norm": 3.5324878692626953, + "learning_rate": 8.653135570111395e-05, + "loss": 4.43, + "step": 278650 + }, + { + "epoch": 1.1996332660413824, + "grad_norm": 5.416391849517822, + "learning_rate": 8.652673491802552e-05, + "loss": 4.8396, + "step": 278700 + }, + { + "epoch": 1.199848485500665, + "grad_norm": 4.787466049194336, + "learning_rate": 8.65221134658514e-05, + "loss": 4.3854, + "step": 278750 + }, + { + "epoch": 1.2000637049599476, + "grad_norm": 2.54695987701416, + "learning_rate": 8.651749134467629e-05, + "loss": 4.3194, + "step": 278800 + }, + { + "epoch": 1.2002789244192302, + "grad_norm": 1.348387360572815, + "learning_rate": 8.651286855458481e-05, + "loss": 4.7056, + "step": 278850 + }, + { + "epoch": 1.2004941438785128, + "grad_norm": 3.6101908683776855, + "learning_rate": 8.650824509566164e-05, + "loss": 4.7461, + "step": 278900 + }, + { + "epoch": 1.2007093633377957, + "grad_norm": 3.733847141265869, + "learning_rate": 8.65036209679915e-05, + "loss": 4.6507, + "step": 278950 + }, + { + "epoch": 1.2009245827970783, + "grad_norm": 1.7961368560791016, + "learning_rate": 8.649899617165909e-05, + "loss": 4.694, + "step": 279000 + }, + { + "epoch": 1.2009245827970783, + "eval_loss": 5.108611583709717, + "eval_runtime": 35.1224, + "eval_samples_per_second": 18.222, + "eval_steps_per_second": 9.111, + "eval_tts_loss": 6.895710204494709, + "step": 279000 + }, + { + "epoch": 1.2011398022563609, + "grad_norm": 5.582791328430176, + "learning_rate": 8.64943707067491e-05, + "loss": 4.6393, + "step": 279050 + }, + { + "epoch": 1.2013550217156435, + "grad_norm": 2.8218419551849365, + "learning_rate": 8.648974457334629e-05, + "loss": 4.5281, + "step": 279100 + }, + { + "epoch": 1.201570241174926, + "grad_norm": 2.5687453746795654, + "learning_rate": 8.648511777153536e-05, + "loss": 4.4019, + "step": 279150 + }, + { + "epoch": 1.2017854606342087, + "grad_norm": 3.6519784927368164, + "learning_rate": 8.648049030140111e-05, + "loss": 4.8879, + "step": 279200 + }, + { + "epoch": 1.2020006800934913, + "grad_norm": 0.7849458456039429, + "learning_rate": 8.647586216302825e-05, + "loss": 4.6699, + "step": 279250 + }, + { + "epoch": 1.2022158995527739, + "grad_norm": 5.227767467498779, + "learning_rate": 8.647123335650161e-05, + "loss": 4.7496, + "step": 279300 + }, + { + "epoch": 1.2024311190120567, + "grad_norm": 2.8482251167297363, + "learning_rate": 8.646660388190596e-05, + "loss": 4.3361, + "step": 279350 + }, + { + "epoch": 1.2026463384713393, + "grad_norm": 2.3003718852996826, + "learning_rate": 8.646197373932608e-05, + "loss": 4.1077, + "step": 279400 + }, + { + "epoch": 1.2028615579306219, + "grad_norm": 1.817009449005127, + "learning_rate": 8.64573429288468e-05, + "loss": 4.5981, + "step": 279450 + }, + { + "epoch": 1.2030767773899045, + "grad_norm": 1.4003803730010986, + "learning_rate": 8.645271145055296e-05, + "loss": 4.5381, + "step": 279500 + }, + { + "epoch": 1.203291996849187, + "grad_norm": 1.8072255849838257, + "learning_rate": 8.644807930452938e-05, + "loss": 5.2798, + "step": 279550 + }, + { + "epoch": 1.2035072163084697, + "grad_norm": 2.18540096282959, + "learning_rate": 8.644344649086091e-05, + "loss": 4.6841, + "step": 279600 + }, + { + "epoch": 1.2037224357677523, + "grad_norm": 3.1629021167755127, + "learning_rate": 8.643881300963241e-05, + "loss": 4.599, + "step": 279650 + }, + { + "epoch": 1.203937655227035, + "grad_norm": 1.9564361572265625, + "learning_rate": 8.643417886092877e-05, + "loss": 4.5982, + "step": 279700 + }, + { + "epoch": 1.2041528746863177, + "grad_norm": 3.6265907287597656, + "learning_rate": 8.642954404483488e-05, + "loss": 4.6264, + "step": 279750 + }, + { + "epoch": 1.2043680941456003, + "grad_norm": 0.981605052947998, + "learning_rate": 8.642490856143561e-05, + "loss": 4.7889, + "step": 279800 + }, + { + "epoch": 1.204583313604883, + "grad_norm": 1.8086750507354736, + "learning_rate": 8.64202724108159e-05, + "loss": 4.8622, + "step": 279850 + }, + { + "epoch": 1.2047985330641655, + "grad_norm": 0.9731691479682922, + "learning_rate": 8.641563559306066e-05, + "loss": 4.5931, + "step": 279900 + }, + { + "epoch": 1.2050137525234481, + "grad_norm": 5.141676902770996, + "learning_rate": 8.641099810825484e-05, + "loss": 4.9017, + "step": 279950 + }, + { + "epoch": 1.2052289719827307, + "grad_norm": 3.530211925506592, + "learning_rate": 8.640635995648336e-05, + "loss": 4.443, + "step": 280000 + }, + { + "epoch": 1.2054441914420133, + "grad_norm": 1.4987242221832275, + "learning_rate": 8.64017211378312e-05, + "loss": 4.6914, + "step": 280050 + }, + { + "epoch": 1.2056594109012961, + "grad_norm": 2.479703426361084, + "learning_rate": 8.639708165238333e-05, + "loss": 4.643, + "step": 280100 + }, + { + "epoch": 1.2058746303605787, + "grad_norm": 2.6359951496124268, + "learning_rate": 8.639244150022473e-05, + "loss": 4.5659, + "step": 280150 + }, + { + "epoch": 1.2060898498198613, + "grad_norm": 5.073355197906494, + "learning_rate": 8.638780068144042e-05, + "loss": 4.7436, + "step": 280200 + }, + { + "epoch": 1.206305069279144, + "grad_norm": 2.879728078842163, + "learning_rate": 8.638315919611537e-05, + "loss": 4.6105, + "step": 280250 + }, + { + "epoch": 1.2065202887384265, + "grad_norm": 3.124873399734497, + "learning_rate": 8.637851704433464e-05, + "loss": 4.4083, + "step": 280300 + }, + { + "epoch": 1.2067355081977091, + "grad_norm": 3.074805736541748, + "learning_rate": 8.637387422618322e-05, + "loss": 4.4703, + "step": 280350 + }, + { + "epoch": 1.2069507276569917, + "grad_norm": 2.478261947631836, + "learning_rate": 8.636923074174621e-05, + "loss": 4.541, + "step": 280400 + }, + { + "epoch": 1.2071659471162746, + "grad_norm": 2.246821880340576, + "learning_rate": 8.636458659110861e-05, + "loss": 4.6276, + "step": 280450 + }, + { + "epoch": 1.2073811665755572, + "grad_norm": 4.359123706817627, + "learning_rate": 8.635994177435554e-05, + "loss": 4.6114, + "step": 280500 + }, + { + "epoch": 1.2075963860348398, + "grad_norm": 1.888807773590088, + "learning_rate": 8.635529629157207e-05, + "loss": 4.5438, + "step": 280550 + }, + { + "epoch": 1.2078116054941224, + "grad_norm": 0.7729594707489014, + "learning_rate": 8.635065014284326e-05, + "loss": 4.6234, + "step": 280600 + }, + { + "epoch": 1.208026824953405, + "grad_norm": 0.7392024993896484, + "learning_rate": 8.634600332825425e-05, + "loss": 4.4726, + "step": 280650 + }, + { + "epoch": 1.2082420444126876, + "grad_norm": 3.031738042831421, + "learning_rate": 8.634135584789015e-05, + "loss": 4.7937, + "step": 280700 + }, + { + "epoch": 1.2084572638719702, + "grad_norm": 2.5273983478546143, + "learning_rate": 8.63367077018361e-05, + "loss": 4.423, + "step": 280750 + }, + { + "epoch": 1.2086724833312528, + "grad_norm": 2.0776357650756836, + "learning_rate": 8.633205889017724e-05, + "loss": 4.7363, + "step": 280800 + }, + { + "epoch": 1.2088877027905356, + "grad_norm": 2.640131950378418, + "learning_rate": 8.63274094129987e-05, + "loss": 4.8034, + "step": 280850 + }, + { + "epoch": 1.2091029222498182, + "grad_norm": 3.3411529064178467, + "learning_rate": 8.632275927038569e-05, + "loss": 4.4414, + "step": 280900 + }, + { + "epoch": 1.2093181417091008, + "grad_norm": 2.1945812702178955, + "learning_rate": 8.631810846242336e-05, + "loss": 4.3381, + "step": 280950 + }, + { + "epoch": 1.2095333611683834, + "grad_norm": 2.158235788345337, + "learning_rate": 8.631345698919693e-05, + "loss": 4.2821, + "step": 281000 + }, + { + "epoch": 1.209748580627666, + "grad_norm": 3.306896448135376, + "learning_rate": 8.630880485079156e-05, + "loss": 4.5206, + "step": 281050 + }, + { + "epoch": 1.2099638000869486, + "grad_norm": 2.58386492729187, + "learning_rate": 8.63041520472925e-05, + "loss": 4.8545, + "step": 281100 + }, + { + "epoch": 1.2101790195462314, + "grad_norm": 3.9871723651885986, + "learning_rate": 8.629949857878498e-05, + "loss": 4.5277, + "step": 281150 + }, + { + "epoch": 1.210394239005514, + "grad_norm": 2.253655195236206, + "learning_rate": 8.629484444535422e-05, + "loss": 4.5181, + "step": 281200 + }, + { + "epoch": 1.2106094584647966, + "grad_norm": 0.8933427929878235, + "learning_rate": 8.629018964708549e-05, + "loss": 4.5704, + "step": 281250 + }, + { + "epoch": 1.2108246779240792, + "grad_norm": 4.386649131774902, + "learning_rate": 8.628553418406404e-05, + "loss": 4.3841, + "step": 281300 + }, + { + "epoch": 1.2110398973833618, + "grad_norm": 3.135599136352539, + "learning_rate": 8.628087805637517e-05, + "loss": 4.5549, + "step": 281350 + }, + { + "epoch": 1.2112551168426444, + "grad_norm": 0.801242470741272, + "learning_rate": 8.627622126410415e-05, + "loss": 4.6564, + "step": 281400 + }, + { + "epoch": 1.211470336301927, + "grad_norm": 2.280271053314209, + "learning_rate": 8.62715638073363e-05, + "loss": 4.2897, + "step": 281450 + }, + { + "epoch": 1.2116855557612096, + "grad_norm": 3.018643379211426, + "learning_rate": 8.62669056861569e-05, + "loss": 4.5296, + "step": 281500 + }, + { + "epoch": 1.2119007752204922, + "grad_norm": 0.6560493111610413, + "learning_rate": 8.626224690065131e-05, + "loss": 4.4929, + "step": 281550 + }, + { + "epoch": 1.212115994679775, + "grad_norm": 3.214874505996704, + "learning_rate": 8.625758745090487e-05, + "loss": 4.9934, + "step": 281600 + }, + { + "epoch": 1.2123312141390576, + "grad_norm": 2.381348133087158, + "learning_rate": 8.62529273370029e-05, + "loss": 4.764, + "step": 281650 + }, + { + "epoch": 1.2125464335983402, + "grad_norm": 1.2387964725494385, + "learning_rate": 8.624826655903078e-05, + "loss": 4.3231, + "step": 281700 + }, + { + "epoch": 1.2127616530576228, + "grad_norm": 2.337218999862671, + "learning_rate": 8.624360511707387e-05, + "loss": 4.9198, + "step": 281750 + }, + { + "epoch": 1.2129768725169054, + "grad_norm": 1.459831953048706, + "learning_rate": 8.62389430112176e-05, + "loss": 4.5406, + "step": 281800 + }, + { + "epoch": 1.213192091976188, + "grad_norm": 2.5302224159240723, + "learning_rate": 8.623428024154734e-05, + "loss": 4.7769, + "step": 281850 + }, + { + "epoch": 1.2134073114354709, + "grad_norm": 2.132533550262451, + "learning_rate": 8.622961680814847e-05, + "loss": 4.3545, + "step": 281900 + }, + { + "epoch": 1.2136225308947535, + "grad_norm": 2.5330584049224854, + "learning_rate": 8.622495271110646e-05, + "loss": 4.5527, + "step": 281950 + }, + { + "epoch": 1.213837750354036, + "grad_norm": 1.7596142292022705, + "learning_rate": 8.622028795050675e-05, + "loss": 4.5287, + "step": 282000 + }, + { + "epoch": 1.213837750354036, + "eval_loss": 5.108687400817871, + "eval_runtime": 35.025, + "eval_samples_per_second": 18.273, + "eval_steps_per_second": 9.136, + "eval_tts_loss": 6.828727345244232, + "step": 282000 + }, + { + "epoch": 1.2140529698133187, + "grad_norm": 2.769932746887207, + "learning_rate": 8.621562252643475e-05, + "loss": 4.8579, + "step": 282050 + }, + { + "epoch": 1.2142681892726013, + "grad_norm": 1.9055144786834717, + "learning_rate": 8.621095643897594e-05, + "loss": 4.5789, + "step": 282100 + }, + { + "epoch": 1.2144834087318839, + "grad_norm": 4.7010064125061035, + "learning_rate": 8.620628968821578e-05, + "loss": 4.3303, + "step": 282150 + }, + { + "epoch": 1.2146986281911665, + "grad_norm": 2.7340736389160156, + "learning_rate": 8.620162227423978e-05, + "loss": 4.4331, + "step": 282200 + }, + { + "epoch": 1.214913847650449, + "grad_norm": 2.9912514686584473, + "learning_rate": 8.61969541971334e-05, + "loss": 4.4684, + "step": 282250 + }, + { + "epoch": 1.2151290671097317, + "grad_norm": 2.2333645820617676, + "learning_rate": 8.61922854569822e-05, + "loss": 5.0449, + "step": 282300 + }, + { + "epoch": 1.2153442865690145, + "grad_norm": 2.541475772857666, + "learning_rate": 8.618761605387165e-05, + "loss": 4.9209, + "step": 282350 + }, + { + "epoch": 1.215559506028297, + "grad_norm": 1.3602200746536255, + "learning_rate": 8.61829459878873e-05, + "loss": 4.2787, + "step": 282400 + }, + { + "epoch": 1.2157747254875797, + "grad_norm": 2.780759572982788, + "learning_rate": 8.617827525911468e-05, + "loss": 4.6451, + "step": 282450 + }, + { + "epoch": 1.2159899449468623, + "grad_norm": 2.3913512229919434, + "learning_rate": 8.61736038676394e-05, + "loss": 5.0538, + "step": 282500 + }, + { + "epoch": 1.216205164406145, + "grad_norm": 2.6706883907318115, + "learning_rate": 8.616893181354697e-05, + "loss": 4.8294, + "step": 282550 + }, + { + "epoch": 1.2164203838654275, + "grad_norm": 2.929410696029663, + "learning_rate": 8.616425909692298e-05, + "loss": 4.8068, + "step": 282600 + }, + { + "epoch": 1.2166356033247103, + "grad_norm": 2.581080913543701, + "learning_rate": 8.615958571785307e-05, + "loss": 4.6796, + "step": 282650 + }, + { + "epoch": 1.216850822783993, + "grad_norm": 2.346524477005005, + "learning_rate": 8.61549116764228e-05, + "loss": 4.5303, + "step": 282700 + }, + { + "epoch": 1.2170660422432755, + "grad_norm": 2.701550245285034, + "learning_rate": 8.61502369727178e-05, + "loss": 4.6463, + "step": 282750 + }, + { + "epoch": 1.2172812617025581, + "grad_norm": 1.8168708086013794, + "learning_rate": 8.61455616068237e-05, + "loss": 5.0198, + "step": 282800 + }, + { + "epoch": 1.2174964811618407, + "grad_norm": 4.467017650604248, + "learning_rate": 8.614088557882613e-05, + "loss": 4.6656, + "step": 282850 + }, + { + "epoch": 1.2177117006211233, + "grad_norm": 2.0420405864715576, + "learning_rate": 8.613620888881078e-05, + "loss": 4.1137, + "step": 282900 + }, + { + "epoch": 1.217926920080406, + "grad_norm": 3.203963041305542, + "learning_rate": 8.613153153686328e-05, + "loss": 5.1472, + "step": 282950 + }, + { + "epoch": 1.2181421395396885, + "grad_norm": 1.965268611907959, + "learning_rate": 8.612685352306931e-05, + "loss": 4.1867, + "step": 283000 + }, + { + "epoch": 1.2183573589989711, + "grad_norm": 1.8884316682815552, + "learning_rate": 8.612217484751459e-05, + "loss": 4.5842, + "step": 283050 + }, + { + "epoch": 1.218572578458254, + "grad_norm": 2.247495651245117, + "learning_rate": 8.61174955102848e-05, + "loss": 4.7811, + "step": 283100 + }, + { + "epoch": 1.2187877979175366, + "grad_norm": 1.1648762226104736, + "learning_rate": 8.611281551146565e-05, + "loss": 4.5385, + "step": 283150 + }, + { + "epoch": 1.2190030173768192, + "grad_norm": 2.726431131362915, + "learning_rate": 8.610813485114289e-05, + "loss": 5.0123, + "step": 283200 + }, + { + "epoch": 1.2192182368361018, + "grad_norm": 5.360476970672607, + "learning_rate": 8.610345352940224e-05, + "loss": 5.0465, + "step": 283250 + }, + { + "epoch": 1.2194334562953844, + "grad_norm": 1.956848382949829, + "learning_rate": 8.609877154632943e-05, + "loss": 4.2625, + "step": 283300 + }, + { + "epoch": 1.219648675754667, + "grad_norm": 2.539421558380127, + "learning_rate": 8.609408890201027e-05, + "loss": 5.0287, + "step": 283350 + }, + { + "epoch": 1.2198638952139498, + "grad_norm": 3.6784117221832275, + "learning_rate": 8.608940559653052e-05, + "loss": 4.9289, + "step": 283400 + }, + { + "epoch": 1.2200791146732324, + "grad_norm": 2.918447256088257, + "learning_rate": 8.608472162997595e-05, + "loss": 4.9645, + "step": 283450 + }, + { + "epoch": 1.220294334132515, + "grad_norm": 3.9707136154174805, + "learning_rate": 8.608003700243237e-05, + "loss": 4.8619, + "step": 283500 + }, + { + "epoch": 1.2205095535917976, + "grad_norm": 4.399134635925293, + "learning_rate": 8.607535171398559e-05, + "loss": 4.4449, + "step": 283550 + }, + { + "epoch": 1.2207247730510802, + "grad_norm": 0.4688147008419037, + "learning_rate": 8.607066576472145e-05, + "loss": 4.6265, + "step": 283600 + }, + { + "epoch": 1.2209399925103628, + "grad_norm": 1.1350653171539307, + "learning_rate": 8.606597915472576e-05, + "loss": 4.2476, + "step": 283650 + }, + { + "epoch": 1.2211552119696454, + "grad_norm": 2.6067111492156982, + "learning_rate": 8.606129188408438e-05, + "loss": 4.9243, + "step": 283700 + }, + { + "epoch": 1.221370431428928, + "grad_norm": 0.5034146308898926, + "learning_rate": 8.605660395288318e-05, + "loss": 4.5146, + "step": 283750 + }, + { + "epoch": 1.2215856508882108, + "grad_norm": 2.616295337677002, + "learning_rate": 8.605191536120801e-05, + "loss": 4.6162, + "step": 283800 + }, + { + "epoch": 1.2218008703474934, + "grad_norm": 2.58052396774292, + "learning_rate": 8.604722610914478e-05, + "loss": 4.5157, + "step": 283850 + }, + { + "epoch": 1.222016089806776, + "grad_norm": 4.241245746612549, + "learning_rate": 8.604253619677936e-05, + "loss": 4.9424, + "step": 283900 + }, + { + "epoch": 1.2222313092660586, + "grad_norm": 0.2464539110660553, + "learning_rate": 8.603784562419768e-05, + "loss": 4.9229, + "step": 283950 + }, + { + "epoch": 1.2224465287253412, + "grad_norm": 2.2177107334136963, + "learning_rate": 8.603315439148563e-05, + "loss": 4.2324, + "step": 284000 + }, + { + "epoch": 1.2226617481846238, + "grad_norm": 1.9725275039672852, + "learning_rate": 8.602846249872919e-05, + "loss": 4.2171, + "step": 284050 + }, + { + "epoch": 1.2228769676439064, + "grad_norm": 1.6774276494979858, + "learning_rate": 8.602376994601427e-05, + "loss": 4.5809, + "step": 284100 + }, + { + "epoch": 1.2230921871031892, + "grad_norm": 1.6379314661026, + "learning_rate": 8.601907673342681e-05, + "loss": 4.8863, + "step": 284150 + }, + { + "epoch": 1.2233074065624718, + "grad_norm": 4.233897686004639, + "learning_rate": 8.601438286105283e-05, + "loss": 4.4922, + "step": 284200 + }, + { + "epoch": 1.2235226260217544, + "grad_norm": 3.833388566970825, + "learning_rate": 8.600968832897828e-05, + "loss": 4.4924, + "step": 284250 + }, + { + "epoch": 1.223737845481037, + "grad_norm": 1.188116192817688, + "learning_rate": 8.600499313728918e-05, + "loss": 5.0423, + "step": 284300 + }, + { + "epoch": 1.2239530649403196, + "grad_norm": 2.366990089416504, + "learning_rate": 8.600029728607148e-05, + "loss": 4.6716, + "step": 284350 + }, + { + "epoch": 1.2241682843996022, + "grad_norm": 1.5771626234054565, + "learning_rate": 8.599560077541125e-05, + "loss": 4.4679, + "step": 284400 + }, + { + "epoch": 1.2243835038588848, + "grad_norm": 1.22776198387146, + "learning_rate": 8.599090360539448e-05, + "loss": 4.3661, + "step": 284450 + }, + { + "epoch": 1.2245987233181674, + "grad_norm": 3.0551185607910156, + "learning_rate": 8.598620577610724e-05, + "loss": 4.3491, + "step": 284500 + }, + { + "epoch": 1.2248139427774503, + "grad_norm": 3.3958191871643066, + "learning_rate": 8.598150728763558e-05, + "loss": 4.4925, + "step": 284550 + }, + { + "epoch": 1.2250291622367329, + "grad_norm": 2.1137986183166504, + "learning_rate": 8.597680814006556e-05, + "loss": 4.9426, + "step": 284600 + }, + { + "epoch": 1.2252443816960155, + "grad_norm": 1.81824791431427, + "learning_rate": 8.597210833348325e-05, + "loss": 4.8095, + "step": 284650 + }, + { + "epoch": 1.225459601155298, + "grad_norm": 2.729686737060547, + "learning_rate": 8.596740786797474e-05, + "loss": 5.047, + "step": 284700 + }, + { + "epoch": 1.2256748206145807, + "grad_norm": 2.211778163909912, + "learning_rate": 8.596270674362615e-05, + "loss": 4.564, + "step": 284750 + }, + { + "epoch": 1.2258900400738633, + "grad_norm": 3.4543545246124268, + "learning_rate": 8.595800496052359e-05, + "loss": 4.6054, + "step": 284800 + }, + { + "epoch": 1.2261052595331459, + "grad_norm": 3.1416568756103516, + "learning_rate": 8.595330251875315e-05, + "loss": 4.7556, + "step": 284850 + }, + { + "epoch": 1.2263204789924287, + "grad_norm": 2.0682172775268555, + "learning_rate": 8.594859941840103e-05, + "loss": 4.1911, + "step": 284900 + }, + { + "epoch": 1.2265356984517113, + "grad_norm": 2.6532411575317383, + "learning_rate": 8.594389565955333e-05, + "loss": 4.7177, + "step": 284950 + }, + { + "epoch": 1.2267509179109939, + "grad_norm": 2.193458080291748, + "learning_rate": 8.593919124229623e-05, + "loss": 5.2125, + "step": 285000 + }, + { + "epoch": 1.2267509179109939, + "eval_loss": 5.094329357147217, + "eval_runtime": 34.889, + "eval_samples_per_second": 18.344, + "eval_steps_per_second": 9.172, + "eval_tts_loss": 6.88661179426512, + "step": 285000 + }, + { + "epoch": 1.2269661373702765, + "grad_norm": 2.7374415397644043, + "learning_rate": 8.593448616671589e-05, + "loss": 4.9071, + "step": 285050 + }, + { + "epoch": 1.227181356829559, + "grad_norm": 4.319389343261719, + "learning_rate": 8.592978043289852e-05, + "loss": 4.1152, + "step": 285100 + }, + { + "epoch": 1.2273965762888417, + "grad_norm": 1.6038273572921753, + "learning_rate": 8.59250740409303e-05, + "loss": 4.5924, + "step": 285150 + }, + { + "epoch": 1.2276117957481243, + "grad_norm": 2.8439996242523193, + "learning_rate": 8.592036699089747e-05, + "loss": 4.9708, + "step": 285200 + }, + { + "epoch": 1.227827015207407, + "grad_norm": 3.852881908416748, + "learning_rate": 8.591565928288621e-05, + "loss": 4.601, + "step": 285250 + }, + { + "epoch": 1.2280422346666897, + "grad_norm": 1.9390946626663208, + "learning_rate": 8.591095091698278e-05, + "loss": 4.5585, + "step": 285300 + }, + { + "epoch": 1.2282574541259723, + "grad_norm": 0.9387653470039368, + "learning_rate": 8.590624189327341e-05, + "loss": 4.851, + "step": 285350 + }, + { + "epoch": 1.228472673585255, + "grad_norm": 2.305981397628784, + "learning_rate": 8.590153221184438e-05, + "loss": 4.5019, + "step": 285400 + }, + { + "epoch": 1.2286878930445375, + "grad_norm": 0.31693604588508606, + "learning_rate": 8.589682187278194e-05, + "loss": 4.6532, + "step": 285450 + }, + { + "epoch": 1.2289031125038201, + "grad_norm": 5.864621162414551, + "learning_rate": 8.589211087617238e-05, + "loss": 4.3672, + "step": 285500 + }, + { + "epoch": 1.2291183319631027, + "grad_norm": 2.2277567386627197, + "learning_rate": 8.5887399222102e-05, + "loss": 4.454, + "step": 285550 + }, + { + "epoch": 1.2293335514223853, + "grad_norm": 1.1440913677215576, + "learning_rate": 8.58826869106571e-05, + "loss": 4.4451, + "step": 285600 + }, + { + "epoch": 1.2295487708816681, + "grad_norm": 2.991032600402832, + "learning_rate": 8.587797394192401e-05, + "loss": 4.4243, + "step": 285650 + }, + { + "epoch": 1.2297639903409507, + "grad_norm": 1.6343799829483032, + "learning_rate": 8.587326031598904e-05, + "loss": 4.8408, + "step": 285700 + }, + { + "epoch": 1.2299792098002333, + "grad_norm": 2.8954954147338867, + "learning_rate": 8.586854603293856e-05, + "loss": 4.3453, + "step": 285750 + }, + { + "epoch": 1.230194429259516, + "grad_norm": 3.899287462234497, + "learning_rate": 8.58638310928589e-05, + "loss": 4.7375, + "step": 285800 + }, + { + "epoch": 1.2304096487187985, + "grad_norm": 2.0755996704101562, + "learning_rate": 8.585911549583642e-05, + "loss": 4.4358, + "step": 285850 + }, + { + "epoch": 1.2306248681780811, + "grad_norm": 1.1176104545593262, + "learning_rate": 8.585439924195755e-05, + "loss": 4.5065, + "step": 285900 + }, + { + "epoch": 1.2308400876373637, + "grad_norm": 3.6894729137420654, + "learning_rate": 8.584968233130864e-05, + "loss": 4.6778, + "step": 285950 + }, + { + "epoch": 1.2310553070966463, + "grad_norm": 1.4472711086273193, + "learning_rate": 8.584496476397608e-05, + "loss": 4.706, + "step": 286000 + }, + { + "epoch": 1.2312705265559292, + "grad_norm": 2.7686262130737305, + "learning_rate": 8.58402465400463e-05, + "loss": 4.6674, + "step": 286050 + }, + { + "epoch": 1.2314857460152118, + "grad_norm": 4.23680305480957, + "learning_rate": 8.583552765960575e-05, + "loss": 4.5037, + "step": 286100 + }, + { + "epoch": 1.2317009654744944, + "grad_norm": 2.2659411430358887, + "learning_rate": 8.583080812274084e-05, + "loss": 5.1066, + "step": 286150 + }, + { + "epoch": 1.231916184933777, + "grad_norm": 2.3380651473999023, + "learning_rate": 8.582608792953804e-05, + "loss": 4.5939, + "step": 286200 + }, + { + "epoch": 1.2321314043930596, + "grad_norm": 3.315882921218872, + "learning_rate": 8.582136708008381e-05, + "loss": 4.9944, + "step": 286250 + }, + { + "epoch": 1.2323466238523422, + "grad_norm": 3.0059099197387695, + "learning_rate": 8.58166455744646e-05, + "loss": 4.8984, + "step": 286300 + }, + { + "epoch": 1.2325618433116248, + "grad_norm": 1.5568739175796509, + "learning_rate": 8.581192341276694e-05, + "loss": 4.5645, + "step": 286350 + }, + { + "epoch": 1.2327770627709076, + "grad_norm": 0.7932285070419312, + "learning_rate": 8.580720059507729e-05, + "loss": 4.6095, + "step": 286400 + }, + { + "epoch": 1.2329922822301902, + "grad_norm": 2.3866429328918457, + "learning_rate": 8.580247712148218e-05, + "loss": 4.6744, + "step": 286450 + }, + { + "epoch": 1.2332075016894728, + "grad_norm": 2.822540760040283, + "learning_rate": 8.579775299206813e-05, + "loss": 4.9374, + "step": 286500 + }, + { + "epoch": 1.2334227211487554, + "grad_norm": 2.6664953231811523, + "learning_rate": 8.579302820692167e-05, + "loss": 4.7145, + "step": 286550 + }, + { + "epoch": 1.233637940608038, + "grad_norm": 3.721684694290161, + "learning_rate": 8.578830276612936e-05, + "loss": 4.1649, + "step": 286600 + }, + { + "epoch": 1.2338531600673206, + "grad_norm": 3.6309800148010254, + "learning_rate": 8.578357666977775e-05, + "loss": 4.2917, + "step": 286650 + }, + { + "epoch": 1.2340683795266032, + "grad_norm": 3.41499400138855, + "learning_rate": 8.577884991795342e-05, + "loss": 4.562, + "step": 286700 + }, + { + "epoch": 1.2342835989858858, + "grad_norm": 4.2694292068481445, + "learning_rate": 8.577412251074293e-05, + "loss": 4.8415, + "step": 286750 + }, + { + "epoch": 1.2344988184451686, + "grad_norm": 2.9901974201202393, + "learning_rate": 8.57693944482329e-05, + "loss": 4.509, + "step": 286800 + }, + { + "epoch": 1.2347140379044512, + "grad_norm": 2.996082067489624, + "learning_rate": 8.576466573050993e-05, + "loss": 3.8971, + "step": 286850 + }, + { + "epoch": 1.2349292573637338, + "grad_norm": 2.348146677017212, + "learning_rate": 8.575993635766063e-05, + "loss": 4.6788, + "step": 286900 + }, + { + "epoch": 1.2351444768230164, + "grad_norm": 3.1926891803741455, + "learning_rate": 8.575520632977164e-05, + "loss": 4.327, + "step": 286950 + }, + { + "epoch": 1.235359696282299, + "grad_norm": 2.273611545562744, + "learning_rate": 8.575047564692962e-05, + "loss": 4.629, + "step": 287000 + }, + { + "epoch": 1.2355749157415816, + "grad_norm": 0.5011412501335144, + "learning_rate": 8.574574430922119e-05, + "loss": 4.5748, + "step": 287050 + }, + { + "epoch": 1.2357901352008642, + "grad_norm": 1.332957148551941, + "learning_rate": 8.574101231673303e-05, + "loss": 4.5772, + "step": 287100 + }, + { + "epoch": 1.236005354660147, + "grad_norm": 1.390164852142334, + "learning_rate": 8.573627966955185e-05, + "loss": 4.3974, + "step": 287150 + }, + { + "epoch": 1.2362205741194296, + "grad_norm": 1.514778971672058, + "learning_rate": 8.573154636776429e-05, + "loss": 4.3361, + "step": 287200 + }, + { + "epoch": 1.2364357935787122, + "grad_norm": 1.4764354228973389, + "learning_rate": 8.572681241145708e-05, + "loss": 4.5609, + "step": 287250 + }, + { + "epoch": 1.2366510130379949, + "grad_norm": 2.257744073867798, + "learning_rate": 8.572207780071692e-05, + "loss": 4.449, + "step": 287300 + }, + { + "epoch": 1.2368662324972775, + "grad_norm": 2.4249846935272217, + "learning_rate": 8.571734253563059e-05, + "loss": 4.4599, + "step": 287350 + }, + { + "epoch": 1.23708145195656, + "grad_norm": 3.226790428161621, + "learning_rate": 8.571260661628476e-05, + "loss": 4.9003, + "step": 287400 + }, + { + "epoch": 1.2372966714158427, + "grad_norm": 2.1716055870056152, + "learning_rate": 8.570787004276621e-05, + "loss": 5.0324, + "step": 287450 + }, + { + "epoch": 1.2375118908751253, + "grad_norm": 1.5385518074035645, + "learning_rate": 8.570313281516173e-05, + "loss": 4.6219, + "step": 287500 + }, + { + "epoch": 1.237727110334408, + "grad_norm": 2.1002068519592285, + "learning_rate": 8.569839493355804e-05, + "loss": 4.5166, + "step": 287550 + }, + { + "epoch": 1.2379423297936907, + "grad_norm": 2.568754196166992, + "learning_rate": 8.569365639804198e-05, + "loss": 4.5106, + "step": 287600 + }, + { + "epoch": 1.2381575492529733, + "grad_norm": 3.281944513320923, + "learning_rate": 8.568891720870032e-05, + "loss": 4.4705, + "step": 287650 + }, + { + "epoch": 1.2383727687122559, + "grad_norm": 2.0220625400543213, + "learning_rate": 8.568417736561986e-05, + "loss": 4.9237, + "step": 287700 + }, + { + "epoch": 1.2385879881715385, + "grad_norm": 2.7848317623138428, + "learning_rate": 8.567943686888744e-05, + "loss": 4.3738, + "step": 287750 + }, + { + "epoch": 1.238803207630821, + "grad_norm": 3.697798728942871, + "learning_rate": 8.567469571858991e-05, + "loss": 5.1041, + "step": 287800 + }, + { + "epoch": 1.239018427090104, + "grad_norm": 2.6528491973876953, + "learning_rate": 8.566995391481409e-05, + "loss": 4.2644, + "step": 287850 + }, + { + "epoch": 1.2392336465493865, + "grad_norm": 4.922908782958984, + "learning_rate": 8.566521145764687e-05, + "loss": 4.7014, + "step": 287900 + }, + { + "epoch": 1.239448866008669, + "grad_norm": 3.2346184253692627, + "learning_rate": 8.566046834717508e-05, + "loss": 4.5584, + "step": 287950 + }, + { + "epoch": 1.2396640854679517, + "grad_norm": 2.2729270458221436, + "learning_rate": 8.565572458348562e-05, + "loss": 4.6369, + "step": 288000 + }, + { + "epoch": 1.2396640854679517, + "eval_loss": 5.100891590118408, + "eval_runtime": 35.1475, + "eval_samples_per_second": 18.209, + "eval_steps_per_second": 9.104, + "eval_tts_loss": 6.837890947766053, + "step": 288000 + }, + { + "epoch": 1.2398793049272343, + "grad_norm": 2.4563655853271484, + "learning_rate": 8.565098016666541e-05, + "loss": 4.3682, + "step": 288050 + }, + { + "epoch": 1.240094524386517, + "grad_norm": 5.19871187210083, + "learning_rate": 8.564623509680132e-05, + "loss": 4.9894, + "step": 288100 + }, + { + "epoch": 1.2403097438457995, + "grad_norm": 2.880560874938965, + "learning_rate": 8.564148937398029e-05, + "loss": 4.8087, + "step": 288150 + }, + { + "epoch": 1.240524963305082, + "grad_norm": 4.151907444000244, + "learning_rate": 8.563674299828923e-05, + "loss": 4.3394, + "step": 288200 + }, + { + "epoch": 1.2407401827643647, + "grad_norm": 1.4230051040649414, + "learning_rate": 8.563199596981513e-05, + "loss": 4.7064, + "step": 288250 + }, + { + "epoch": 1.2409554022236475, + "grad_norm": 2.730189561843872, + "learning_rate": 8.562724828864488e-05, + "loss": 4.5747, + "step": 288300 + }, + { + "epoch": 1.2411706216829301, + "grad_norm": 3.3190603256225586, + "learning_rate": 8.562249995486549e-05, + "loss": 4.2525, + "step": 288350 + }, + { + "epoch": 1.2413858411422127, + "grad_norm": 1.6938704252243042, + "learning_rate": 8.561775096856392e-05, + "loss": 5.0175, + "step": 288400 + }, + { + "epoch": 1.2416010606014953, + "grad_norm": 3.864053726196289, + "learning_rate": 8.56130013298272e-05, + "loss": 4.5688, + "step": 288450 + }, + { + "epoch": 1.241816280060778, + "grad_norm": 2.2591793537139893, + "learning_rate": 8.560825103874225e-05, + "loss": 4.5822, + "step": 288500 + }, + { + "epoch": 1.2420314995200605, + "grad_norm": 2.356640100479126, + "learning_rate": 8.560350009539617e-05, + "loss": 4.8007, + "step": 288550 + }, + { + "epoch": 1.2422467189793434, + "grad_norm": 4.426462650299072, + "learning_rate": 8.559874849987595e-05, + "loss": 4.7028, + "step": 288600 + }, + { + "epoch": 1.242461938438626, + "grad_norm": 5.793850421905518, + "learning_rate": 8.559399625226862e-05, + "loss": 4.7591, + "step": 288650 + }, + { + "epoch": 1.2426771578979086, + "grad_norm": 2.72941255569458, + "learning_rate": 8.558924335266123e-05, + "loss": 4.4038, + "step": 288700 + }, + { + "epoch": 1.2428923773571912, + "grad_norm": 1.8424044847488403, + "learning_rate": 8.558448980114086e-05, + "loss": 4.5895, + "step": 288750 + }, + { + "epoch": 1.2431075968164738, + "grad_norm": 3.047764301300049, + "learning_rate": 8.557973559779459e-05, + "loss": 4.6829, + "step": 288800 + }, + { + "epoch": 1.2433228162757564, + "grad_norm": 2.918477773666382, + "learning_rate": 8.557498074270947e-05, + "loss": 4.6564, + "step": 288850 + }, + { + "epoch": 1.243538035735039, + "grad_norm": 0.9637383818626404, + "learning_rate": 8.557022523597265e-05, + "loss": 4.8542, + "step": 288900 + }, + { + "epoch": 1.2437532551943216, + "grad_norm": 2.6055309772491455, + "learning_rate": 8.556546907767118e-05, + "loss": 4.8953, + "step": 288950 + }, + { + "epoch": 1.2439684746536042, + "grad_norm": 3.5714268684387207, + "learning_rate": 8.556071226789222e-05, + "loss": 4.7083, + "step": 289000 + }, + { + "epoch": 1.244183694112887, + "grad_norm": 1.625988245010376, + "learning_rate": 8.555595480672289e-05, + "loss": 4.5097, + "step": 289050 + }, + { + "epoch": 1.2443989135721696, + "grad_norm": 0.9695895314216614, + "learning_rate": 8.555119669425036e-05, + "loss": 4.4197, + "step": 289100 + }, + { + "epoch": 1.2446141330314522, + "grad_norm": 3.7810308933258057, + "learning_rate": 8.554643793056177e-05, + "loss": 4.4581, + "step": 289150 + }, + { + "epoch": 1.2448293524907348, + "grad_norm": 3.3008029460906982, + "learning_rate": 8.554167851574427e-05, + "loss": 4.7586, + "step": 289200 + }, + { + "epoch": 1.2450445719500174, + "grad_norm": 1.5311989784240723, + "learning_rate": 8.553691844988505e-05, + "loss": 4.4875, + "step": 289250 + }, + { + "epoch": 1.2452597914093, + "grad_norm": 1.9623754024505615, + "learning_rate": 8.553215773307134e-05, + "loss": 4.8105, + "step": 289300 + }, + { + "epoch": 1.2454750108685828, + "grad_norm": 2.884153366088867, + "learning_rate": 8.552739636539031e-05, + "loss": 4.9367, + "step": 289350 + }, + { + "epoch": 1.2456902303278654, + "grad_norm": 3.0847291946411133, + "learning_rate": 8.552263434692918e-05, + "loss": 4.6498, + "step": 289400 + }, + { + "epoch": 1.245905449787148, + "grad_norm": 2.5035195350646973, + "learning_rate": 8.551787167777518e-05, + "loss": 4.6634, + "step": 289450 + }, + { + "epoch": 1.2461206692464306, + "grad_norm": 0.9667875170707703, + "learning_rate": 8.551310835801559e-05, + "loss": 4.333, + "step": 289500 + }, + { + "epoch": 1.2463358887057132, + "grad_norm": 6.439640998840332, + "learning_rate": 8.550834438773759e-05, + "loss": 4.8045, + "step": 289550 + }, + { + "epoch": 1.2465511081649958, + "grad_norm": 3.17109751701355, + "learning_rate": 8.550357976702849e-05, + "loss": 4.8441, + "step": 289600 + }, + { + "epoch": 1.2467663276242784, + "grad_norm": 2.3779871463775635, + "learning_rate": 8.549881449597559e-05, + "loss": 4.7588, + "step": 289650 + }, + { + "epoch": 1.246981547083561, + "grad_norm": 2.313082456588745, + "learning_rate": 8.549404857466612e-05, + "loss": 4.5703, + "step": 289700 + }, + { + "epoch": 1.2471967665428436, + "grad_norm": 4.084051609039307, + "learning_rate": 8.548928200318742e-05, + "loss": 4.5197, + "step": 289750 + }, + { + "epoch": 1.2474119860021264, + "grad_norm": 0.8705922961235046, + "learning_rate": 8.54845147816268e-05, + "loss": 4.929, + "step": 289800 + }, + { + "epoch": 1.247627205461409, + "grad_norm": 0.6851893663406372, + "learning_rate": 8.547974691007157e-05, + "loss": 4.5474, + "step": 289850 + }, + { + "epoch": 1.2478424249206916, + "grad_norm": 4.363712787628174, + "learning_rate": 8.547497838860908e-05, + "loss": 4.8052, + "step": 289900 + }, + { + "epoch": 1.2480576443799742, + "grad_norm": 5.243893623352051, + "learning_rate": 8.547020921732667e-05, + "loss": 4.4713, + "step": 289950 + }, + { + "epoch": 1.2482728638392568, + "grad_norm": 2.7053167819976807, + "learning_rate": 8.54654393963117e-05, + "loss": 4.5272, + "step": 290000 + }, + { + "epoch": 1.2484880832985394, + "grad_norm": 3.5718331336975098, + "learning_rate": 8.546066892565155e-05, + "loss": 4.5399, + "step": 290050 + }, + { + "epoch": 1.2487033027578223, + "grad_norm": 3.467825412750244, + "learning_rate": 8.545589780543361e-05, + "loss": 4.9374, + "step": 290100 + }, + { + "epoch": 1.2489185222171049, + "grad_norm": 2.955554246902466, + "learning_rate": 8.545112603574525e-05, + "loss": 4.6434, + "step": 290150 + }, + { + "epoch": 1.2491337416763875, + "grad_norm": 4.310222625732422, + "learning_rate": 8.544635361667389e-05, + "loss": 4.5078, + "step": 290200 + }, + { + "epoch": 1.24934896113567, + "grad_norm": 3.265965461730957, + "learning_rate": 8.544158054830696e-05, + "loss": 4.5332, + "step": 290250 + }, + { + "epoch": 1.2495641805949527, + "grad_norm": 2.328184127807617, + "learning_rate": 8.543680683073189e-05, + "loss": 4.6968, + "step": 290300 + }, + { + "epoch": 1.2497794000542353, + "grad_norm": 2.351764678955078, + "learning_rate": 8.54320324640361e-05, + "loss": 4.8043, + "step": 290350 + }, + { + "epoch": 1.2499946195135179, + "grad_norm": 2.96968936920166, + "learning_rate": 8.542725744830709e-05, + "loss": 4.8053, + "step": 290400 + }, + { + "epoch": 1.2502098389728005, + "grad_norm": 2.2267255783081055, + "learning_rate": 8.542248178363229e-05, + "loss": 5.1626, + "step": 290450 + }, + { + "epoch": 1.250425058432083, + "grad_norm": 2.3947701454162598, + "learning_rate": 8.541770547009918e-05, + "loss": 4.5026, + "step": 290500 + }, + { + "epoch": 1.250640277891366, + "grad_norm": 4.2956013679504395, + "learning_rate": 8.541292850779526e-05, + "loss": 5.2012, + "step": 290550 + }, + { + "epoch": 1.2508554973506485, + "grad_norm": 1.9798896312713623, + "learning_rate": 8.540815089680802e-05, + "loss": 4.8337, + "step": 290600 + }, + { + "epoch": 1.251070716809931, + "grad_norm": 2.0638175010681152, + "learning_rate": 8.540337263722503e-05, + "loss": 4.9066, + "step": 290650 + }, + { + "epoch": 1.2512859362692137, + "grad_norm": 2.340773582458496, + "learning_rate": 8.539859372913374e-05, + "loss": 4.8185, + "step": 290700 + }, + { + "epoch": 1.2515011557284963, + "grad_norm": 5.321432113647461, + "learning_rate": 8.539381417262174e-05, + "loss": 5.0546, + "step": 290750 + }, + { + "epoch": 1.251716375187779, + "grad_norm": 3.7682034969329834, + "learning_rate": 8.538903396777655e-05, + "loss": 4.7336, + "step": 290800 + }, + { + "epoch": 1.2519315946470617, + "grad_norm": 2.026055097579956, + "learning_rate": 8.538425311468577e-05, + "loss": 4.9781, + "step": 290850 + }, + { + "epoch": 1.2521468141063443, + "grad_norm": 0.4051477909088135, + "learning_rate": 8.537947161343694e-05, + "loss": 4.8755, + "step": 290900 + }, + { + "epoch": 1.252362033565627, + "grad_norm": 0.670888364315033, + "learning_rate": 8.537468946411765e-05, + "loss": 4.0993, + "step": 290950 + }, + { + "epoch": 1.2525772530249095, + "grad_norm": 1.2149885892868042, + "learning_rate": 8.536990666681551e-05, + "loss": 4.6099, + "step": 291000 + }, + { + "epoch": 1.2525772530249095, + "eval_loss": 5.093409538269043, + "eval_runtime": 35.0404, + "eval_samples_per_second": 18.265, + "eval_steps_per_second": 9.132, + "eval_tts_loss": 6.82052322346812, + "step": 291000 + }, + { + "epoch": 1.2527924724841921, + "grad_norm": 3.2031285762786865, + "learning_rate": 8.536512322161812e-05, + "loss": 4.5078, + "step": 291050 + }, + { + "epoch": 1.2530076919434747, + "grad_norm": 1.4473693370819092, + "learning_rate": 8.536033912861313e-05, + "loss": 4.7315, + "step": 291100 + }, + { + "epoch": 1.2532229114027573, + "grad_norm": 2.5878918170928955, + "learning_rate": 8.535555438788812e-05, + "loss": 4.4871, + "step": 291150 + }, + { + "epoch": 1.25343813086204, + "grad_norm": 2.2650814056396484, + "learning_rate": 8.535076899953079e-05, + "loss": 4.6618, + "step": 291200 + }, + { + "epoch": 1.2536533503213225, + "grad_norm": 2.9955296516418457, + "learning_rate": 8.534598296362875e-05, + "loss": 4.3997, + "step": 291250 + }, + { + "epoch": 1.2538685697806053, + "grad_norm": 1.9226183891296387, + "learning_rate": 8.53411962802697e-05, + "loss": 4.3846, + "step": 291300 + }, + { + "epoch": 1.254083789239888, + "grad_norm": 3.7581887245178223, + "learning_rate": 8.533640894954132e-05, + "loss": 4.8884, + "step": 291350 + }, + { + "epoch": 1.2542990086991705, + "grad_norm": 3.5618486404418945, + "learning_rate": 8.533162097153128e-05, + "loss": 4.5973, + "step": 291400 + }, + { + "epoch": 1.2545142281584531, + "grad_norm": 1.2964309453964233, + "learning_rate": 8.532683234632731e-05, + "loss": 4.3984, + "step": 291450 + }, + { + "epoch": 1.2547294476177357, + "grad_norm": 5.830141067504883, + "learning_rate": 8.532204307401713e-05, + "loss": 5.0423, + "step": 291500 + }, + { + "epoch": 1.2549446670770186, + "grad_norm": 3.8155698776245117, + "learning_rate": 8.531725315468844e-05, + "loss": 4.2261, + "step": 291550 + }, + { + "epoch": 1.2551598865363012, + "grad_norm": 3.6628847122192383, + "learning_rate": 8.531246258842899e-05, + "loss": 4.9354, + "step": 291600 + }, + { + "epoch": 1.2553751059955838, + "grad_norm": 2.9867560863494873, + "learning_rate": 8.530767137532654e-05, + "loss": 4.5513, + "step": 291650 + }, + { + "epoch": 1.2555903254548664, + "grad_norm": 2.448665142059326, + "learning_rate": 8.530287951546887e-05, + "loss": 4.4085, + "step": 291700 + }, + { + "epoch": 1.255805544914149, + "grad_norm": 2.427839517593384, + "learning_rate": 8.529808700894373e-05, + "loss": 4.5937, + "step": 291750 + }, + { + "epoch": 1.2560207643734316, + "grad_norm": 1.6013996601104736, + "learning_rate": 8.529329385583891e-05, + "loss": 4.7065, + "step": 291800 + }, + { + "epoch": 1.2562359838327142, + "grad_norm": 2.0454862117767334, + "learning_rate": 8.528850005624221e-05, + "loss": 4.6576, + "step": 291850 + }, + { + "epoch": 1.2564512032919968, + "grad_norm": 2.6414551734924316, + "learning_rate": 8.528370561024146e-05, + "loss": 4.9041, + "step": 291900 + }, + { + "epoch": 1.2566664227512794, + "grad_norm": 2.7680795192718506, + "learning_rate": 8.527891051792445e-05, + "loss": 4.6818, + "step": 291950 + }, + { + "epoch": 1.256881642210562, + "grad_norm": 1.8351123332977295, + "learning_rate": 8.527411477937906e-05, + "loss": 5.1007, + "step": 292000 + }, + { + "epoch": 1.2570968616698448, + "grad_norm": 3.4553558826446533, + "learning_rate": 8.52693183946931e-05, + "loss": 4.6907, + "step": 292050 + }, + { + "epoch": 1.2573120811291274, + "grad_norm": 0.8769617080688477, + "learning_rate": 8.526452136395444e-05, + "loss": 4.4058, + "step": 292100 + }, + { + "epoch": 1.25752730058841, + "grad_norm": 2.4127161502838135, + "learning_rate": 8.525972368725094e-05, + "loss": 4.8766, + "step": 292150 + }, + { + "epoch": 1.2577425200476926, + "grad_norm": 1.4065546989440918, + "learning_rate": 8.52549253646705e-05, + "loss": 4.8383, + "step": 292200 + }, + { + "epoch": 1.2579577395069752, + "grad_norm": 3.777639150619507, + "learning_rate": 8.525012639630102e-05, + "loss": 4.1722, + "step": 292250 + }, + { + "epoch": 1.258172958966258, + "grad_norm": 0.5930464863777161, + "learning_rate": 8.524532678223039e-05, + "loss": 4.0825, + "step": 292300 + }, + { + "epoch": 1.2583881784255406, + "grad_norm": 2.2167980670928955, + "learning_rate": 8.524052652254653e-05, + "loss": 4.4839, + "step": 292350 + }, + { + "epoch": 1.2586033978848232, + "grad_norm": 0.8161476254463196, + "learning_rate": 8.523572561733735e-05, + "loss": 4.188, + "step": 292400 + }, + { + "epoch": 1.2588186173441058, + "grad_norm": 3.3435113430023193, + "learning_rate": 8.523092406669084e-05, + "loss": 4.491, + "step": 292450 + }, + { + "epoch": 1.2590338368033884, + "grad_norm": 3.0301513671875, + "learning_rate": 8.522612187069492e-05, + "loss": 4.4989, + "step": 292500 + }, + { + "epoch": 1.259249056262671, + "grad_norm": 2.6545417308807373, + "learning_rate": 8.522131902943755e-05, + "loss": 4.9416, + "step": 292550 + }, + { + "epoch": 1.2594642757219536, + "grad_norm": 0.9283846616744995, + "learning_rate": 8.521651554300673e-05, + "loss": 4.5902, + "step": 292600 + }, + { + "epoch": 1.2596794951812362, + "grad_norm": 2.7728466987609863, + "learning_rate": 8.521171141149042e-05, + "loss": 4.316, + "step": 292650 + }, + { + "epoch": 1.2598947146405188, + "grad_norm": 3.2533252239227295, + "learning_rate": 8.520690663497666e-05, + "loss": 4.7186, + "step": 292700 + }, + { + "epoch": 1.2601099340998014, + "grad_norm": 2.2068190574645996, + "learning_rate": 8.520210121355344e-05, + "loss": 4.3714, + "step": 292750 + }, + { + "epoch": 1.2603251535590843, + "grad_norm": 1.7356005907058716, + "learning_rate": 8.519729514730875e-05, + "loss": 4.1452, + "step": 292800 + }, + { + "epoch": 1.2605403730183669, + "grad_norm": 3.5017659664154053, + "learning_rate": 8.51924884363307e-05, + "loss": 4.6986, + "step": 292850 + }, + { + "epoch": 1.2607555924776495, + "grad_norm": 1.0593037605285645, + "learning_rate": 8.518768108070728e-05, + "loss": 4.4563, + "step": 292900 + }, + { + "epoch": 1.260970811936932, + "grad_norm": 2.977515459060669, + "learning_rate": 8.518287308052656e-05, + "loss": 4.6054, + "step": 292950 + }, + { + "epoch": 1.2611860313962147, + "grad_norm": 5.354854583740234, + "learning_rate": 8.517806443587664e-05, + "loss": 4.1348, + "step": 293000 + }, + { + "epoch": 1.2614012508554975, + "grad_norm": 3.5846657752990723, + "learning_rate": 8.517325514684557e-05, + "loss": 4.3749, + "step": 293050 + }, + { + "epoch": 1.26161647031478, + "grad_norm": 2.7386863231658936, + "learning_rate": 8.516844521352145e-05, + "loss": 4.6576, + "step": 293100 + }, + { + "epoch": 1.2618316897740627, + "grad_norm": 0.5440916419029236, + "learning_rate": 8.516363463599241e-05, + "loss": 4.729, + "step": 293150 + }, + { + "epoch": 1.2620469092333453, + "grad_norm": 1.674096703529358, + "learning_rate": 8.515882341434654e-05, + "loss": 4.8101, + "step": 293200 + }, + { + "epoch": 1.2622621286926279, + "grad_norm": 3.3169608116149902, + "learning_rate": 8.5154011548672e-05, + "loss": 4.7099, + "step": 293250 + }, + { + "epoch": 1.2624773481519105, + "grad_norm": 2.6938822269439697, + "learning_rate": 8.514919903905692e-05, + "loss": 4.6559, + "step": 293300 + }, + { + "epoch": 1.262692567611193, + "grad_norm": 6.065310001373291, + "learning_rate": 8.514438588558943e-05, + "loss": 4.8541, + "step": 293350 + }, + { + "epoch": 1.2629077870704757, + "grad_norm": 2.6808924674987793, + "learning_rate": 8.513957208835772e-05, + "loss": 4.2553, + "step": 293400 + }, + { + "epoch": 1.2631230065297583, + "grad_norm": 2.779261350631714, + "learning_rate": 8.513475764744997e-05, + "loss": 4.6133, + "step": 293450 + }, + { + "epoch": 1.263338225989041, + "grad_norm": 2.4016129970550537, + "learning_rate": 8.512994256295437e-05, + "loss": 4.8154, + "step": 293500 + }, + { + "epoch": 1.2635534454483237, + "grad_norm": 1.7220350503921509, + "learning_rate": 8.51251268349591e-05, + "loss": 4.3382, + "step": 293550 + }, + { + "epoch": 1.2637686649076063, + "grad_norm": 2.569462776184082, + "learning_rate": 8.51203104635524e-05, + "loss": 4.5606, + "step": 293600 + }, + { + "epoch": 1.263983884366889, + "grad_norm": 1.5794099569320679, + "learning_rate": 8.511549344882247e-05, + "loss": 4.4415, + "step": 293650 + }, + { + "epoch": 1.2641991038261715, + "grad_norm": 4.020901679992676, + "learning_rate": 8.511067579085756e-05, + "loss": 4.4669, + "step": 293700 + }, + { + "epoch": 1.2644143232854541, + "grad_norm": 3.5191938877105713, + "learning_rate": 8.510585748974594e-05, + "loss": 4.3924, + "step": 293750 + }, + { + "epoch": 1.264629542744737, + "grad_norm": 2.9946887493133545, + "learning_rate": 8.510103854557581e-05, + "loss": 4.3524, + "step": 293800 + }, + { + "epoch": 1.2648447622040195, + "grad_norm": 2.0701193809509277, + "learning_rate": 8.50962189584355e-05, + "loss": 4.5567, + "step": 293850 + }, + { + "epoch": 1.2650599816633021, + "grad_norm": 5.347604751586914, + "learning_rate": 8.509139872841327e-05, + "loss": 4.6747, + "step": 293900 + }, + { + "epoch": 1.2652752011225847, + "grad_norm": 2.3922433853149414, + "learning_rate": 8.508657785559744e-05, + "loss": 4.8229, + "step": 293950 + }, + { + "epoch": 1.2654904205818673, + "grad_norm": 2.190497398376465, + "learning_rate": 8.508175634007627e-05, + "loss": 4.5776, + "step": 294000 + }, + { + "epoch": 1.2654904205818673, + "eval_loss": 5.083926200866699, + "eval_runtime": 35.3145, + "eval_samples_per_second": 18.123, + "eval_steps_per_second": 9.061, + "eval_tts_loss": 6.836054731927517, + "step": 294000 + }, + { + "epoch": 1.26570564004115, + "grad_norm": 2.873950481414795, + "learning_rate": 8.507693418193811e-05, + "loss": 4.5881, + "step": 294050 + }, + { + "epoch": 1.2659208595004325, + "grad_norm": 4.743781089782715, + "learning_rate": 8.507211138127129e-05, + "loss": 4.9585, + "step": 294100 + }, + { + "epoch": 1.2661360789597151, + "grad_norm": 2.537785291671753, + "learning_rate": 8.506728793816415e-05, + "loss": 5.0469, + "step": 294150 + }, + { + "epoch": 1.2663512984189977, + "grad_norm": 0.7240046262741089, + "learning_rate": 8.506246385270505e-05, + "loss": 4.6207, + "step": 294200 + }, + { + "epoch": 1.2665665178782806, + "grad_norm": 6.598328590393066, + "learning_rate": 8.505763912498233e-05, + "loss": 4.7352, + "step": 294250 + }, + { + "epoch": 1.2667817373375632, + "grad_norm": 0.9972405433654785, + "learning_rate": 8.50528137550844e-05, + "loss": 4.4592, + "step": 294300 + }, + { + "epoch": 1.2669969567968458, + "grad_norm": 3.173866033554077, + "learning_rate": 8.504798774309963e-05, + "loss": 4.4231, + "step": 294350 + }, + { + "epoch": 1.2672121762561284, + "grad_norm": 0.9246065020561218, + "learning_rate": 8.504316108911643e-05, + "loss": 4.4871, + "step": 294400 + }, + { + "epoch": 1.267427395715411, + "grad_norm": 2.3941938877105713, + "learning_rate": 8.503833379322321e-05, + "loss": 5.1227, + "step": 294450 + }, + { + "epoch": 1.2676426151746936, + "grad_norm": 4.181195259094238, + "learning_rate": 8.50335058555084e-05, + "loss": 4.6869, + "step": 294500 + }, + { + "epoch": 1.2678578346339764, + "grad_norm": 3.2431352138519287, + "learning_rate": 8.502867727606043e-05, + "loss": 4.4987, + "step": 294550 + }, + { + "epoch": 1.268073054093259, + "grad_norm": 4.007645606994629, + "learning_rate": 8.502384805496776e-05, + "loss": 4.5382, + "step": 294600 + }, + { + "epoch": 1.2682882735525416, + "grad_norm": 3.3469955921173096, + "learning_rate": 8.501901819231883e-05, + "loss": 4.8055, + "step": 294650 + }, + { + "epoch": 1.2685034930118242, + "grad_norm": 3.0080809593200684, + "learning_rate": 8.501418768820213e-05, + "loss": 4.1051, + "step": 294700 + }, + { + "epoch": 1.2687187124711068, + "grad_norm": 1.8727258443832397, + "learning_rate": 8.500935654270614e-05, + "loss": 4.5163, + "step": 294750 + }, + { + "epoch": 1.2689339319303894, + "grad_norm": 1.5062036514282227, + "learning_rate": 8.500452475591936e-05, + "loss": 4.5125, + "step": 294800 + }, + { + "epoch": 1.269149151389672, + "grad_norm": 2.1269237995147705, + "learning_rate": 8.499969232793029e-05, + "loss": 4.1175, + "step": 294850 + }, + { + "epoch": 1.2693643708489546, + "grad_norm": 1.8637839555740356, + "learning_rate": 8.499485925882741e-05, + "loss": 4.1057, + "step": 294900 + }, + { + "epoch": 1.2695795903082372, + "grad_norm": 2.7523033618927, + "learning_rate": 8.499002554869933e-05, + "loss": 4.5311, + "step": 294950 + }, + { + "epoch": 1.26979480976752, + "grad_norm": 3.9823102951049805, + "learning_rate": 8.498519119763453e-05, + "loss": 4.638, + "step": 295000 + }, + { + "epoch": 1.2700100292268026, + "grad_norm": 3.9978466033935547, + "learning_rate": 8.49803562057216e-05, + "loss": 4.527, + "step": 295050 + }, + { + "epoch": 1.2702252486860852, + "grad_norm": 2.401151657104492, + "learning_rate": 8.49755205730491e-05, + "loss": 4.4996, + "step": 295100 + }, + { + "epoch": 1.2704404681453678, + "grad_norm": 4.783238887786865, + "learning_rate": 8.497068429970559e-05, + "loss": 4.9597, + "step": 295150 + }, + { + "epoch": 1.2706556876046504, + "grad_norm": 4.3085784912109375, + "learning_rate": 8.496584738577967e-05, + "loss": 4.4294, + "step": 295200 + }, + { + "epoch": 1.270870907063933, + "grad_norm": 5.374129772186279, + "learning_rate": 8.496100983135993e-05, + "loss": 4.9174, + "step": 295250 + }, + { + "epoch": 1.2710861265232158, + "grad_norm": 4.046342849731445, + "learning_rate": 8.4956171636535e-05, + "loss": 4.6565, + "step": 295300 + }, + { + "epoch": 1.2713013459824984, + "grad_norm": 1.6688505411148071, + "learning_rate": 8.49513328013935e-05, + "loss": 4.6526, + "step": 295350 + }, + { + "epoch": 1.271516565441781, + "grad_norm": 1.280116319656372, + "learning_rate": 8.494649332602407e-05, + "loss": 4.599, + "step": 295400 + }, + { + "epoch": 1.2717317849010636, + "grad_norm": 3.198150634765625, + "learning_rate": 8.494165321051535e-05, + "loss": 4.4821, + "step": 295450 + }, + { + "epoch": 1.2719470043603462, + "grad_norm": 3.392995595932007, + "learning_rate": 8.493681245495599e-05, + "loss": 4.7313, + "step": 295500 + }, + { + "epoch": 1.2721622238196288, + "grad_norm": 4.559877395629883, + "learning_rate": 8.493197105943468e-05, + "loss": 4.8547, + "step": 295550 + }, + { + "epoch": 1.2723774432789114, + "grad_norm": 1.196427345275879, + "learning_rate": 8.492712902404009e-05, + "loss": 4.6139, + "step": 295600 + }, + { + "epoch": 1.272592662738194, + "grad_norm": 4.013114929199219, + "learning_rate": 8.492228634886093e-05, + "loss": 4.4316, + "step": 295650 + }, + { + "epoch": 1.2728078821974766, + "grad_norm": 3.3585689067840576, + "learning_rate": 8.491744303398591e-05, + "loss": 4.5363, + "step": 295700 + }, + { + "epoch": 1.2730231016567595, + "grad_norm": 0.2863420248031616, + "learning_rate": 8.491259907950373e-05, + "loss": 4.6222, + "step": 295750 + }, + { + "epoch": 1.273238321116042, + "grad_norm": 2.688901424407959, + "learning_rate": 8.490775448550312e-05, + "loss": 4.928, + "step": 295800 + }, + { + "epoch": 1.2734535405753247, + "grad_norm": 2.251488208770752, + "learning_rate": 8.490290925207283e-05, + "loss": 4.7872, + "step": 295850 + }, + { + "epoch": 1.2736687600346073, + "grad_norm": 0.38149330019950867, + "learning_rate": 8.48980633793016e-05, + "loss": 4.7592, + "step": 295900 + }, + { + "epoch": 1.2738839794938899, + "grad_norm": 2.2865805625915527, + "learning_rate": 8.489321686727822e-05, + "loss": 4.8924, + "step": 295950 + }, + { + "epoch": 1.2740991989531725, + "grad_norm": 1.6720331907272339, + "learning_rate": 8.488836971609146e-05, + "loss": 4.654, + "step": 296000 + }, + { + "epoch": 1.2743144184124553, + "grad_norm": 2.3045449256896973, + "learning_rate": 8.488352192583009e-05, + "loss": 4.7114, + "step": 296050 + }, + { + "epoch": 1.274529637871738, + "grad_norm": 1.6895896196365356, + "learning_rate": 8.487867349658294e-05, + "loss": 4.6265, + "step": 296100 + }, + { + "epoch": 1.2747448573310205, + "grad_norm": 2.1520016193389893, + "learning_rate": 8.487382442843879e-05, + "loss": 4.6229, + "step": 296150 + }, + { + "epoch": 1.274960076790303, + "grad_norm": 2.1978256702423096, + "learning_rate": 8.486897472148647e-05, + "loss": 4.5649, + "step": 296200 + }, + { + "epoch": 1.2751752962495857, + "grad_norm": 5.325904369354248, + "learning_rate": 8.486412437581484e-05, + "loss": 4.9223, + "step": 296250 + }, + { + "epoch": 1.2753905157088683, + "grad_norm": 3.083810567855835, + "learning_rate": 8.485927339151274e-05, + "loss": 4.7969, + "step": 296300 + }, + { + "epoch": 1.275605735168151, + "grad_norm": 0.88279789686203, + "learning_rate": 8.4854421768669e-05, + "loss": 4.484, + "step": 296350 + }, + { + "epoch": 1.2758209546274335, + "grad_norm": 2.7663612365722656, + "learning_rate": 8.484956950737252e-05, + "loss": 4.7613, + "step": 296400 + }, + { + "epoch": 1.276036174086716, + "grad_norm": 2.5110349655151367, + "learning_rate": 8.484471660771218e-05, + "loss": 4.6569, + "step": 296450 + }, + { + "epoch": 1.276251393545999, + "grad_norm": 3.6361398696899414, + "learning_rate": 8.483986306977687e-05, + "loss": 4.5253, + "step": 296500 + }, + { + "epoch": 1.2764666130052815, + "grad_norm": 3.022374153137207, + "learning_rate": 8.483500889365548e-05, + "loss": 4.7015, + "step": 296550 + }, + { + "epoch": 1.2766818324645641, + "grad_norm": 3.39689564704895, + "learning_rate": 8.483015407943694e-05, + "loss": 4.4653, + "step": 296600 + }, + { + "epoch": 1.2768970519238467, + "grad_norm": 3.788363456726074, + "learning_rate": 8.482529862721019e-05, + "loss": 4.4901, + "step": 296650 + }, + { + "epoch": 1.2771122713831293, + "grad_norm": 2.470318078994751, + "learning_rate": 8.482044253706415e-05, + "loss": 4.3848, + "step": 296700 + }, + { + "epoch": 1.277327490842412, + "grad_norm": 4.531248092651367, + "learning_rate": 8.48155858090878e-05, + "loss": 4.7245, + "step": 296750 + }, + { + "epoch": 1.2775427103016948, + "grad_norm": 2.4525365829467773, + "learning_rate": 8.481072844337006e-05, + "loss": 4.4158, + "step": 296800 + }, + { + "epoch": 1.2777579297609774, + "grad_norm": 4.941079139709473, + "learning_rate": 8.480587043999994e-05, + "loss": 4.3467, + "step": 296850 + }, + { + "epoch": 1.27797314922026, + "grad_norm": 3.1338043212890625, + "learning_rate": 8.480101179906643e-05, + "loss": 4.431, + "step": 296900 + }, + { + "epoch": 1.2781883686795426, + "grad_norm": 2.5177218914031982, + "learning_rate": 8.479615252065852e-05, + "loss": 4.5688, + "step": 296950 + }, + { + "epoch": 1.2784035881388252, + "grad_norm": 2.168471574783325, + "learning_rate": 8.47912926048652e-05, + "loss": 4.4978, + "step": 297000 + }, + { + "epoch": 1.2784035881388252, + "eval_loss": 5.0937910079956055, + "eval_runtime": 35.0691, + "eval_samples_per_second": 18.25, + "eval_steps_per_second": 9.125, + "eval_tts_loss": 6.897483220064221, + "step": 297000 + }, + { + "epoch": 1.2786188075981078, + "grad_norm": 4.118424415588379, + "learning_rate": 8.478643205177554e-05, + "loss": 4.5898, + "step": 297050 + }, + { + "epoch": 1.2788340270573904, + "grad_norm": 1.466590166091919, + "learning_rate": 8.478157086147852e-05, + "loss": 4.7203, + "step": 297100 + }, + { + "epoch": 1.279049246516673, + "grad_norm": 2.288672924041748, + "learning_rate": 8.477670903406322e-05, + "loss": 4.5044, + "step": 297150 + }, + { + "epoch": 1.2792644659759556, + "grad_norm": 3.6370770931243896, + "learning_rate": 8.477184656961869e-05, + "loss": 4.6009, + "step": 297200 + }, + { + "epoch": 1.2794796854352384, + "grad_norm": 2.524061441421509, + "learning_rate": 8.476698346823401e-05, + "loss": 3.941, + "step": 297250 + }, + { + "epoch": 1.279694904894521, + "grad_norm": 2.3066227436065674, + "learning_rate": 8.476211972999824e-05, + "loss": 4.3645, + "step": 297300 + }, + { + "epoch": 1.2799101243538036, + "grad_norm": 3.159168243408203, + "learning_rate": 8.475725535500048e-05, + "loss": 4.6525, + "step": 297350 + }, + { + "epoch": 1.2801253438130862, + "grad_norm": 1.2488816976547241, + "learning_rate": 8.475239034332985e-05, + "loss": 4.1887, + "step": 297400 + }, + { + "epoch": 1.2803405632723688, + "grad_norm": 2.3246002197265625, + "learning_rate": 8.474752469507543e-05, + "loss": 4.4789, + "step": 297450 + }, + { + "epoch": 1.2805557827316514, + "grad_norm": 1.9571083784103394, + "learning_rate": 8.474265841032638e-05, + "loss": 4.6988, + "step": 297500 + }, + { + "epoch": 1.2807710021909342, + "grad_norm": 2.2886273860931396, + "learning_rate": 8.473779148917182e-05, + "loss": 4.9378, + "step": 297550 + }, + { + "epoch": 1.2809862216502168, + "grad_norm": 2.6113789081573486, + "learning_rate": 8.473292393170092e-05, + "loss": 4.8029, + "step": 297600 + }, + { + "epoch": 1.2812014411094994, + "grad_norm": 2.4747323989868164, + "learning_rate": 8.472805573800283e-05, + "loss": 4.5448, + "step": 297650 + }, + { + "epoch": 1.281416660568782, + "grad_norm": 2.6487960815429688, + "learning_rate": 8.472318690816673e-05, + "loss": 4.6939, + "step": 297700 + }, + { + "epoch": 1.2816318800280646, + "grad_norm": 1.6654057502746582, + "learning_rate": 8.471831744228181e-05, + "loss": 4.9968, + "step": 297750 + }, + { + "epoch": 1.2818470994873472, + "grad_norm": 0.7208316326141357, + "learning_rate": 8.471344734043722e-05, + "loss": 4.2656, + "step": 297800 + }, + { + "epoch": 1.2820623189466298, + "grad_norm": 1.986332893371582, + "learning_rate": 8.470857660272225e-05, + "loss": 4.5843, + "step": 297850 + }, + { + "epoch": 1.2822775384059124, + "grad_norm": 1.6323314905166626, + "learning_rate": 8.470370522922606e-05, + "loss": 4.4957, + "step": 297900 + }, + { + "epoch": 1.282492757865195, + "grad_norm": 2.6887807846069336, + "learning_rate": 8.46988332200379e-05, + "loss": 4.4428, + "step": 297950 + }, + { + "epoch": 1.2827079773244778, + "grad_norm": 3.0411055088043213, + "learning_rate": 8.469396057524701e-05, + "loss": 4.805, + "step": 298000 + }, + { + "epoch": 1.2829231967837604, + "grad_norm": 2.181535482406616, + "learning_rate": 8.468908729494267e-05, + "loss": 5.0488, + "step": 298050 + }, + { + "epoch": 1.283138416243043, + "grad_norm": 3.044881582260132, + "learning_rate": 8.468421337921412e-05, + "loss": 4.7229, + "step": 298100 + }, + { + "epoch": 1.2833536357023256, + "grad_norm": 1.434730052947998, + "learning_rate": 8.467933882815064e-05, + "loss": 5.0061, + "step": 298150 + }, + { + "epoch": 1.2835688551616082, + "grad_norm": 0.7668498754501343, + "learning_rate": 8.467446364184152e-05, + "loss": 4.3959, + "step": 298200 + }, + { + "epoch": 1.283784074620891, + "grad_norm": 2.4216675758361816, + "learning_rate": 8.46695878203761e-05, + "loss": 4.96, + "step": 298250 + }, + { + "epoch": 1.2839992940801737, + "grad_norm": 1.1467534303665161, + "learning_rate": 8.466471136384365e-05, + "loss": 4.7095, + "step": 298300 + }, + { + "epoch": 1.2842145135394563, + "grad_norm": 2.3060433864593506, + "learning_rate": 8.46598342723335e-05, + "loss": 4.6681, + "step": 298350 + }, + { + "epoch": 1.2844297329987389, + "grad_norm": 4.533605575561523, + "learning_rate": 8.465495654593498e-05, + "loss": 4.8702, + "step": 298400 + }, + { + "epoch": 1.2846449524580215, + "grad_norm": 5.03779935836792, + "learning_rate": 8.465007818473749e-05, + "loss": 4.5663, + "step": 298450 + }, + { + "epoch": 1.284860171917304, + "grad_norm": 2.7503671646118164, + "learning_rate": 8.464519918883033e-05, + "loss": 4.2601, + "step": 298500 + }, + { + "epoch": 1.2850753913765867, + "grad_norm": 2.7898168563842773, + "learning_rate": 8.464031955830292e-05, + "loss": 4.571, + "step": 298550 + }, + { + "epoch": 1.2852906108358693, + "grad_norm": 2.7135636806488037, + "learning_rate": 8.46354392932446e-05, + "loss": 4.4897, + "step": 298600 + }, + { + "epoch": 1.2855058302951519, + "grad_norm": 2.6889002323150635, + "learning_rate": 8.463055839374478e-05, + "loss": 4.5287, + "step": 298650 + }, + { + "epoch": 1.2857210497544345, + "grad_norm": 4.658017158508301, + "learning_rate": 8.462567685989289e-05, + "loss": 5.0265, + "step": 298700 + }, + { + "epoch": 1.2859362692137173, + "grad_norm": 1.5621217489242554, + "learning_rate": 8.462079469177832e-05, + "loss": 4.3413, + "step": 298750 + }, + { + "epoch": 1.2861514886729999, + "grad_norm": 3.3155860900878906, + "learning_rate": 8.461591188949052e-05, + "loss": 4.3636, + "step": 298800 + }, + { + "epoch": 1.2863667081322825, + "grad_norm": 2.496814727783203, + "learning_rate": 8.461102845311892e-05, + "loss": 4.9258, + "step": 298850 + }, + { + "epoch": 1.286581927591565, + "grad_norm": 2.294715642929077, + "learning_rate": 8.460614438275298e-05, + "loss": 4.639, + "step": 298900 + }, + { + "epoch": 1.2867971470508477, + "grad_norm": 2.4452672004699707, + "learning_rate": 8.460125967848215e-05, + "loss": 4.4647, + "step": 298950 + }, + { + "epoch": 1.2870123665101305, + "grad_norm": 1.9864650964736938, + "learning_rate": 8.459637434039594e-05, + "loss": 5.0277, + "step": 299000 + }, + { + "epoch": 1.2872275859694131, + "grad_norm": 0.853190541267395, + "learning_rate": 8.459148836858379e-05, + "loss": 4.6845, + "step": 299050 + }, + { + "epoch": 1.2874428054286957, + "grad_norm": 2.524461507797241, + "learning_rate": 8.458660176313525e-05, + "loss": 4.5495, + "step": 299100 + }, + { + "epoch": 1.2876580248879783, + "grad_norm": 1.8957006931304932, + "learning_rate": 8.458171452413979e-05, + "loss": 4.7112, + "step": 299150 + }, + { + "epoch": 1.287873244347261, + "grad_norm": 2.3001046180725098, + "learning_rate": 8.457682665168693e-05, + "loss": 4.7901, + "step": 299200 + }, + { + "epoch": 1.2880884638065435, + "grad_norm": 2.3557915687561035, + "learning_rate": 8.457193814586626e-05, + "loss": 4.6275, + "step": 299250 + }, + { + "epoch": 1.2883036832658261, + "grad_norm": 2.8111860752105713, + "learning_rate": 8.456704900676729e-05, + "loss": 4.5497, + "step": 299300 + }, + { + "epoch": 1.2885189027251087, + "grad_norm": 2.0848703384399414, + "learning_rate": 8.456215923447956e-05, + "loss": 4.3891, + "step": 299350 + }, + { + "epoch": 1.2887341221843913, + "grad_norm": 2.0771572589874268, + "learning_rate": 8.455726882909265e-05, + "loss": 4.7645, + "step": 299400 + }, + { + "epoch": 1.288949341643674, + "grad_norm": 3.8774497509002686, + "learning_rate": 8.455237779069617e-05, + "loss": 4.5295, + "step": 299450 + }, + { + "epoch": 1.2891645611029567, + "grad_norm": 3.3509609699249268, + "learning_rate": 8.454748611937968e-05, + "loss": 4.7865, + "step": 299500 + }, + { + "epoch": 1.2893797805622393, + "grad_norm": 0.8887951970100403, + "learning_rate": 8.454259381523279e-05, + "loss": 4.7278, + "step": 299550 + }, + { + "epoch": 1.289595000021522, + "grad_norm": 3.242792844772339, + "learning_rate": 8.453770087834514e-05, + "loss": 4.6562, + "step": 299600 + }, + { + "epoch": 1.2898102194808045, + "grad_norm": 2.6735451221466064, + "learning_rate": 8.453280730880632e-05, + "loss": 4.6012, + "step": 299650 + }, + { + "epoch": 1.2900254389400871, + "grad_norm": 2.48193097114563, + "learning_rate": 8.452791310670601e-05, + "loss": 4.8571, + "step": 299700 + }, + { + "epoch": 1.29024065839937, + "grad_norm": 2.1936120986938477, + "learning_rate": 8.452301827213382e-05, + "loss": 4.4836, + "step": 299750 + }, + { + "epoch": 1.2904558778586526, + "grad_norm": 2.0169475078582764, + "learning_rate": 8.451812280517943e-05, + "loss": 4.7109, + "step": 299800 + }, + { + "epoch": 1.2906710973179352, + "grad_norm": 2.105956792831421, + "learning_rate": 8.451322670593252e-05, + "loss": 4.6925, + "step": 299850 + }, + { + "epoch": 1.2908863167772178, + "grad_norm": 7.956499099731445, + "learning_rate": 8.450832997448277e-05, + "loss": 4.7355, + "step": 299900 + }, + { + "epoch": 1.2911015362365004, + "grad_norm": 0.8660747408866882, + "learning_rate": 8.450343261091988e-05, + "loss": 4.4562, + "step": 299950 + }, + { + "epoch": 1.291316755695783, + "grad_norm": 2.1873764991760254, + "learning_rate": 8.449853461533355e-05, + "loss": 4.8895, + "step": 300000 + }, + { + "epoch": 1.291316755695783, + "eval_loss": 5.082815647125244, + "eval_runtime": 34.8521, + "eval_samples_per_second": 18.363, + "eval_steps_per_second": 9.182, + "eval_tts_loss": 6.87028602185934, + "step": 300000 + }, + { + "epoch": 1.2915319751550656, + "grad_norm": 4.239205837249756, + "learning_rate": 8.449363598781348e-05, + "loss": 4.7731, + "step": 300050 + }, + { + "epoch": 1.2917471946143482, + "grad_norm": 3.043804407119751, + "learning_rate": 8.448873672844945e-05, + "loss": 4.453, + "step": 300100 + }, + { + "epoch": 1.2919624140736308, + "grad_norm": 2.70100474357605, + "learning_rate": 8.448383683733119e-05, + "loss": 4.6223, + "step": 300150 + }, + { + "epoch": 1.2921776335329136, + "grad_norm": 3.75998592376709, + "learning_rate": 8.447893631454842e-05, + "loss": 4.5224, + "step": 300200 + }, + { + "epoch": 1.2923928529921962, + "grad_norm": 3.244800567626953, + "learning_rate": 8.447403516019094e-05, + "loss": 4.7276, + "step": 300250 + }, + { + "epoch": 1.2926080724514788, + "grad_norm": 1.5009430646896362, + "learning_rate": 8.446913337434852e-05, + "loss": 4.5825, + "step": 300300 + }, + { + "epoch": 1.2928232919107614, + "grad_norm": 1.8711531162261963, + "learning_rate": 8.446423095711096e-05, + "loss": 4.5821, + "step": 300350 + }, + { + "epoch": 1.293038511370044, + "grad_norm": 0.9688680171966553, + "learning_rate": 8.445932790856801e-05, + "loss": 4.2259, + "step": 300400 + }, + { + "epoch": 1.2932537308293266, + "grad_norm": 0.6207114458084106, + "learning_rate": 8.445442422880955e-05, + "loss": 4.7543, + "step": 300450 + }, + { + "epoch": 1.2934689502886094, + "grad_norm": 2.403597593307495, + "learning_rate": 8.444951991792538e-05, + "loss": 4.7122, + "step": 300500 + }, + { + "epoch": 1.293684169747892, + "grad_norm": 2.139462471008301, + "learning_rate": 8.444461497600532e-05, + "loss": 4.1164, + "step": 300550 + }, + { + "epoch": 1.2938993892071746, + "grad_norm": 3.0834877490997314, + "learning_rate": 8.443970940313923e-05, + "loss": 4.9228, + "step": 300600 + }, + { + "epoch": 1.2941146086664572, + "grad_norm": 4.308004379272461, + "learning_rate": 8.443480319941697e-05, + "loss": 4.8853, + "step": 300650 + }, + { + "epoch": 1.2943298281257398, + "grad_norm": 1.7408512830734253, + "learning_rate": 8.44298963649284e-05, + "loss": 4.9193, + "step": 300700 + }, + { + "epoch": 1.2945450475850224, + "grad_norm": 1.5172054767608643, + "learning_rate": 8.44249888997634e-05, + "loss": 4.4437, + "step": 300750 + }, + { + "epoch": 1.294760267044305, + "grad_norm": 3.301928758621216, + "learning_rate": 8.442008080401189e-05, + "loss": 4.5783, + "step": 300800 + }, + { + "epoch": 1.2949754865035876, + "grad_norm": 2.2520394325256348, + "learning_rate": 8.441517207776375e-05, + "loss": 4.4763, + "step": 300850 + }, + { + "epoch": 1.2951907059628702, + "grad_norm": 1.7560797929763794, + "learning_rate": 8.44102627211089e-05, + "loss": 4.3985, + "step": 300900 + }, + { + "epoch": 1.295405925422153, + "grad_norm": 2.647325277328491, + "learning_rate": 8.440535273413727e-05, + "loss": 4.716, + "step": 300950 + }, + { + "epoch": 1.2956211448814356, + "grad_norm": 4.291378498077393, + "learning_rate": 8.440044211693882e-05, + "loss": 4.1573, + "step": 301000 + }, + { + "epoch": 1.2958363643407182, + "grad_norm": 0.8572250604629517, + "learning_rate": 8.439553086960347e-05, + "loss": 4.6972, + "step": 301050 + }, + { + "epoch": 1.2960515838000009, + "grad_norm": 3.4441123008728027, + "learning_rate": 8.439061899222119e-05, + "loss": 4.4869, + "step": 301100 + }, + { + "epoch": 1.2962668032592835, + "grad_norm": 2.3154749870300293, + "learning_rate": 8.438570648488196e-05, + "loss": 4.7567, + "step": 301150 + }, + { + "epoch": 1.296482022718566, + "grad_norm": 2.0431177616119385, + "learning_rate": 8.438079334767579e-05, + "loss": 4.595, + "step": 301200 + }, + { + "epoch": 1.2966972421778489, + "grad_norm": 2.645033121109009, + "learning_rate": 8.437587958069263e-05, + "loss": 4.5846, + "step": 301250 + }, + { + "epoch": 1.2969124616371315, + "grad_norm": 5.135073184967041, + "learning_rate": 8.437096518402252e-05, + "loss": 4.3474, + "step": 301300 + }, + { + "epoch": 1.297127681096414, + "grad_norm": 3.3777453899383545, + "learning_rate": 8.436605015775547e-05, + "loss": 4.6751, + "step": 301350 + }, + { + "epoch": 1.2973429005556967, + "grad_norm": 6.191769599914551, + "learning_rate": 8.436113450198151e-05, + "loss": 4.4802, + "step": 301400 + }, + { + "epoch": 1.2975581200149793, + "grad_norm": 0.7971835732460022, + "learning_rate": 8.43562182167907e-05, + "loss": 4.503, + "step": 301450 + }, + { + "epoch": 1.2977733394742619, + "grad_norm": 2.8808326721191406, + "learning_rate": 8.435130130227307e-05, + "loss": 4.2936, + "step": 301500 + }, + { + "epoch": 1.2979885589335445, + "grad_norm": 5.96058464050293, + "learning_rate": 8.43463837585187e-05, + "loss": 4.8894, + "step": 301550 + }, + { + "epoch": 1.298203778392827, + "grad_norm": 2.094419002532959, + "learning_rate": 8.434146558561768e-05, + "loss": 4.616, + "step": 301600 + }, + { + "epoch": 1.2984189978521097, + "grad_norm": 3.3740220069885254, + "learning_rate": 8.433654678366007e-05, + "loss": 4.5307, + "step": 301650 + }, + { + "epoch": 1.2986342173113925, + "grad_norm": 2.285975933074951, + "learning_rate": 8.4331627352736e-05, + "loss": 4.7454, + "step": 301700 + }, + { + "epoch": 1.298849436770675, + "grad_norm": 1.7356773614883423, + "learning_rate": 8.432670729293557e-05, + "loss": 4.5608, + "step": 301750 + }, + { + "epoch": 1.2990646562299577, + "grad_norm": 2.5305750370025635, + "learning_rate": 8.432178660434891e-05, + "loss": 4.556, + "step": 301800 + }, + { + "epoch": 1.2992798756892403, + "grad_norm": 5.534286975860596, + "learning_rate": 8.431686528706614e-05, + "loss": 4.6264, + "step": 301850 + }, + { + "epoch": 1.299495095148523, + "grad_norm": 2.2209837436676025, + "learning_rate": 8.431194334117743e-05, + "loss": 4.7958, + "step": 301900 + }, + { + "epoch": 1.2997103146078055, + "grad_norm": 3.5254030227661133, + "learning_rate": 8.430702076677291e-05, + "loss": 4.549, + "step": 301950 + }, + { + "epoch": 1.2999255340670883, + "grad_norm": 1.9100147485733032, + "learning_rate": 8.430209756394279e-05, + "loss": 4.4959, + "step": 302000 + }, + { + "epoch": 1.300140753526371, + "grad_norm": 2.753013849258423, + "learning_rate": 8.429717373277721e-05, + "loss": 4.6951, + "step": 302050 + }, + { + "epoch": 1.3003559729856535, + "grad_norm": 3.7968525886535645, + "learning_rate": 8.429224927336639e-05, + "loss": 4.8167, + "step": 302100 + }, + { + "epoch": 1.3005711924449361, + "grad_norm": 2.4788050651550293, + "learning_rate": 8.428732418580052e-05, + "loss": 4.7669, + "step": 302150 + }, + { + "epoch": 1.3007864119042187, + "grad_norm": 2.9981367588043213, + "learning_rate": 8.428239847016983e-05, + "loss": 4.762, + "step": 302200 + }, + { + "epoch": 1.3010016313635013, + "grad_norm": 2.0816893577575684, + "learning_rate": 8.427747212656453e-05, + "loss": 4.8356, + "step": 302250 + }, + { + "epoch": 1.301216850822784, + "grad_norm": 2.6231253147125244, + "learning_rate": 8.427254515507487e-05, + "loss": 4.516, + "step": 302300 + }, + { + "epoch": 1.3014320702820665, + "grad_norm": 3.764068365097046, + "learning_rate": 8.426761755579112e-05, + "loss": 4.7861, + "step": 302350 + }, + { + "epoch": 1.3016472897413491, + "grad_norm": 1.204003930091858, + "learning_rate": 8.42626893288035e-05, + "loss": 4.6279, + "step": 302400 + }, + { + "epoch": 1.301862509200632, + "grad_norm": 3.780625104904175, + "learning_rate": 8.425776047420233e-05, + "loss": 4.9621, + "step": 302450 + }, + { + "epoch": 1.3020777286599146, + "grad_norm": 2.2719931602478027, + "learning_rate": 8.425283099207786e-05, + "loss": 4.3652, + "step": 302500 + }, + { + "epoch": 1.3022929481191972, + "grad_norm": 2.6402084827423096, + "learning_rate": 8.424790088252039e-05, + "loss": 4.6396, + "step": 302550 + }, + { + "epoch": 1.3025081675784798, + "grad_norm": 2.4729835987091064, + "learning_rate": 8.424297014562026e-05, + "loss": 4.8085, + "step": 302600 + }, + { + "epoch": 1.3027233870377624, + "grad_norm": 0.46348699927330017, + "learning_rate": 8.423803878146774e-05, + "loss": 4.1911, + "step": 302650 + }, + { + "epoch": 1.302938606497045, + "grad_norm": 1.060428500175476, + "learning_rate": 8.423310679015321e-05, + "loss": 4.6144, + "step": 302700 + }, + { + "epoch": 1.3031538259563278, + "grad_norm": 1.4028128385543823, + "learning_rate": 8.422817417176698e-05, + "loss": 4.1903, + "step": 302750 + }, + { + "epoch": 1.3033690454156104, + "grad_norm": 0.9149894118309021, + "learning_rate": 8.422324092639942e-05, + "loss": 4.3158, + "step": 302800 + }, + { + "epoch": 1.303584264874893, + "grad_norm": 1.611409306526184, + "learning_rate": 8.421830705414089e-05, + "loss": 4.9404, + "step": 302850 + }, + { + "epoch": 1.3037994843341756, + "grad_norm": 0.6499851942062378, + "learning_rate": 8.421337255508177e-05, + "loss": 4.3302, + "step": 302900 + }, + { + "epoch": 1.3040147037934582, + "grad_norm": 3.874608039855957, + "learning_rate": 8.420843742931244e-05, + "loss": 4.5834, + "step": 302950 + }, + { + "epoch": 1.3042299232527408, + "grad_norm": 2.290311574935913, + "learning_rate": 8.42035016769233e-05, + "loss": 4.5608, + "step": 303000 + }, + { + "epoch": 1.3042299232527408, + "eval_loss": 5.083721160888672, + "eval_runtime": 35.1777, + "eval_samples_per_second": 18.193, + "eval_steps_per_second": 9.097, + "eval_tts_loss": 6.927451046610235, + "step": 303000 + }, + { + "epoch": 1.3044451427120234, + "grad_norm": 5.117006778717041, + "learning_rate": 8.419856529800479e-05, + "loss": 4.9247, + "step": 303050 + }, + { + "epoch": 1.304660362171306, + "grad_norm": 3.5054116249084473, + "learning_rate": 8.419362829264729e-05, + "loss": 4.5152, + "step": 303100 + }, + { + "epoch": 1.3048755816305886, + "grad_norm": 3.0239906311035156, + "learning_rate": 8.418869066094127e-05, + "loss": 4.6131, + "step": 303150 + }, + { + "epoch": 1.3050908010898714, + "grad_norm": 0.5873883366584778, + "learning_rate": 8.418375240297715e-05, + "loss": 4.6783, + "step": 303200 + }, + { + "epoch": 1.305306020549154, + "grad_norm": 2.5460715293884277, + "learning_rate": 8.417881351884541e-05, + "loss": 4.6479, + "step": 303250 + }, + { + "epoch": 1.3055212400084366, + "grad_norm": 2.028120517730713, + "learning_rate": 8.417387400863651e-05, + "loss": 4.0755, + "step": 303300 + }, + { + "epoch": 1.3057364594677192, + "grad_norm": 3.075871467590332, + "learning_rate": 8.416893387244092e-05, + "loss": 4.7585, + "step": 303350 + }, + { + "epoch": 1.3059516789270018, + "grad_norm": 2.699652910232544, + "learning_rate": 8.416399311034915e-05, + "loss": 4.6129, + "step": 303400 + }, + { + "epoch": 1.3061668983862844, + "grad_norm": 2.9710443019866943, + "learning_rate": 8.415905172245168e-05, + "loss": 4.23, + "step": 303450 + }, + { + "epoch": 1.3063821178455672, + "grad_norm": 3.348155975341797, + "learning_rate": 8.415410970883905e-05, + "loss": 4.6625, + "step": 303500 + }, + { + "epoch": 1.3065973373048498, + "grad_norm": 0.8332639336585999, + "learning_rate": 8.414916706960179e-05, + "loss": 4.4693, + "step": 303550 + }, + { + "epoch": 1.3068125567641324, + "grad_norm": 2.9772603511810303, + "learning_rate": 8.414422380483041e-05, + "loss": 4.667, + "step": 303600 + }, + { + "epoch": 1.307027776223415, + "grad_norm": 2.374998092651367, + "learning_rate": 8.413927991461545e-05, + "loss": 4.798, + "step": 303650 + }, + { + "epoch": 1.3072429956826976, + "grad_norm": 3.0556886196136475, + "learning_rate": 8.413433539904752e-05, + "loss": 4.2136, + "step": 303700 + }, + { + "epoch": 1.3074582151419802, + "grad_norm": 3.8256328105926514, + "learning_rate": 8.412939025821716e-05, + "loss": 4.6236, + "step": 303750 + }, + { + "epoch": 1.3076734346012628, + "grad_norm": 3.4280762672424316, + "learning_rate": 8.412444449221495e-05, + "loss": 4.8598, + "step": 303800 + }, + { + "epoch": 1.3078886540605454, + "grad_norm": 2.770430088043213, + "learning_rate": 8.41194981011315e-05, + "loss": 4.7315, + "step": 303850 + }, + { + "epoch": 1.308103873519828, + "grad_norm": 2.620570421218872, + "learning_rate": 8.411455108505742e-05, + "loss": 4.484, + "step": 303900 + }, + { + "epoch": 1.3083190929791109, + "grad_norm": 1.0774542093276978, + "learning_rate": 8.410960344408331e-05, + "loss": 4.1979, + "step": 303950 + }, + { + "epoch": 1.3085343124383935, + "grad_norm": 4.349213600158691, + "learning_rate": 8.410465517829979e-05, + "loss": 4.464, + "step": 304000 + }, + { + "epoch": 1.308749531897676, + "grad_norm": 3.054250955581665, + "learning_rate": 8.409970628779756e-05, + "loss": 4.9555, + "step": 304050 + }, + { + "epoch": 1.3089647513569587, + "grad_norm": 4.185551166534424, + "learning_rate": 8.40947567726672e-05, + "loss": 4.2913, + "step": 304100 + }, + { + "epoch": 1.3091799708162413, + "grad_norm": 2.5455121994018555, + "learning_rate": 8.408980663299941e-05, + "loss": 4.8942, + "step": 304150 + }, + { + "epoch": 1.309395190275524, + "grad_norm": 1.2753013372421265, + "learning_rate": 8.408485586888485e-05, + "loss": 4.9134, + "step": 304200 + }, + { + "epoch": 1.3096104097348067, + "grad_norm": 1.0928165912628174, + "learning_rate": 8.407990448041422e-05, + "loss": 4.5154, + "step": 304250 + }, + { + "epoch": 1.3098256291940893, + "grad_norm": 2.645813465118408, + "learning_rate": 8.407495246767824e-05, + "loss": 4.8638, + "step": 304300 + }, + { + "epoch": 1.310040848653372, + "grad_norm": 3.780654191970825, + "learning_rate": 8.406999983076757e-05, + "loss": 4.5396, + "step": 304350 + }, + { + "epoch": 1.3102560681126545, + "grad_norm": 2.060131311416626, + "learning_rate": 8.406504656977297e-05, + "loss": 5.1868, + "step": 304400 + }, + { + "epoch": 1.310471287571937, + "grad_norm": 2.1970081329345703, + "learning_rate": 8.406009268478514e-05, + "loss": 4.6753, + "step": 304450 + }, + { + "epoch": 1.3106865070312197, + "grad_norm": 2.596198558807373, + "learning_rate": 8.405513817589485e-05, + "loss": 4.9482, + "step": 304500 + }, + { + "epoch": 1.3109017264905023, + "grad_norm": 0.8365402221679688, + "learning_rate": 8.405018304319284e-05, + "loss": 4.5758, + "step": 304550 + }, + { + "epoch": 1.311116945949785, + "grad_norm": 0.5367375612258911, + "learning_rate": 8.404522728676989e-05, + "loss": 4.0888, + "step": 304600 + }, + { + "epoch": 1.3113321654090675, + "grad_norm": 2.4897429943084717, + "learning_rate": 8.404027090671675e-05, + "loss": 5.0241, + "step": 304650 + }, + { + "epoch": 1.3115473848683503, + "grad_norm": 2.5794219970703125, + "learning_rate": 8.403531390312426e-05, + "loss": 3.9127, + "step": 304700 + }, + { + "epoch": 1.311762604327633, + "grad_norm": 2.4296112060546875, + "learning_rate": 8.403035627608317e-05, + "loss": 4.4411, + "step": 304750 + }, + { + "epoch": 1.3119778237869155, + "grad_norm": 2.266916275024414, + "learning_rate": 8.402539802568433e-05, + "loss": 4.3059, + "step": 304800 + }, + { + "epoch": 1.3121930432461981, + "grad_norm": 2.152987480163574, + "learning_rate": 8.402043915201854e-05, + "loss": 4.5215, + "step": 304850 + }, + { + "epoch": 1.3124082627054807, + "grad_norm": 4.51939058303833, + "learning_rate": 8.401547965517665e-05, + "loss": 4.9349, + "step": 304900 + }, + { + "epoch": 1.3126234821647635, + "grad_norm": 2.534456253051758, + "learning_rate": 8.401051953524947e-05, + "loss": 4.7055, + "step": 304950 + }, + { + "epoch": 1.3128387016240461, + "grad_norm": 2.5612270832061768, + "learning_rate": 8.400555879232793e-05, + "loss": 4.745, + "step": 305000 + }, + { + "epoch": 1.3130539210833287, + "grad_norm": 1.1757075786590576, + "learning_rate": 8.400059742650281e-05, + "loss": 4.3584, + "step": 305050 + }, + { + "epoch": 1.3132691405426113, + "grad_norm": 1.4225696325302124, + "learning_rate": 8.399563543786507e-05, + "loss": 4.5772, + "step": 305100 + }, + { + "epoch": 1.313484360001894, + "grad_norm": 3.6288809776306152, + "learning_rate": 8.399067282650557e-05, + "loss": 4.3671, + "step": 305150 + }, + { + "epoch": 1.3136995794611765, + "grad_norm": 1.8255196809768677, + "learning_rate": 8.398570959251519e-05, + "loss": 4.6806, + "step": 305200 + }, + { + "epoch": 1.3139147989204591, + "grad_norm": 0.7925216555595398, + "learning_rate": 8.398074573598489e-05, + "loss": 4.8301, + "step": 305250 + }, + { + "epoch": 1.3141300183797417, + "grad_norm": 2.4469401836395264, + "learning_rate": 8.397578125700556e-05, + "loss": 4.4694, + "step": 305300 + }, + { + "epoch": 1.3143452378390243, + "grad_norm": 4.52958345413208, + "learning_rate": 8.397081615566817e-05, + "loss": 4.6246, + "step": 305350 + }, + { + "epoch": 1.314560457298307, + "grad_norm": 3.0161876678466797, + "learning_rate": 8.396585043206362e-05, + "loss": 4.6369, + "step": 305400 + }, + { + "epoch": 1.3147756767575898, + "grad_norm": 1.2806857824325562, + "learning_rate": 8.396088408628293e-05, + "loss": 4.4526, + "step": 305450 + }, + { + "epoch": 1.3149908962168724, + "grad_norm": 3.3434531688690186, + "learning_rate": 8.395591711841702e-05, + "loss": 4.8532, + "step": 305500 + }, + { + "epoch": 1.315206115676155, + "grad_norm": 2.970489978790283, + "learning_rate": 8.395094952855692e-05, + "loss": 4.3038, + "step": 305550 + }, + { + "epoch": 1.3154213351354376, + "grad_norm": 1.1273871660232544, + "learning_rate": 8.394598131679358e-05, + "loss": 4.361, + "step": 305600 + }, + { + "epoch": 1.3156365545947202, + "grad_norm": 4.091231346130371, + "learning_rate": 8.394101248321805e-05, + "loss": 4.7488, + "step": 305650 + }, + { + "epoch": 1.315851774054003, + "grad_norm": 2.73215389251709, + "learning_rate": 8.393604302792133e-05, + "loss": 4.5144, + "step": 305700 + }, + { + "epoch": 1.3160669935132856, + "grad_norm": 2.412202835083008, + "learning_rate": 8.393107295099443e-05, + "loss": 5.022, + "step": 305750 + }, + { + "epoch": 1.3162822129725682, + "grad_norm": 2.736128330230713, + "learning_rate": 8.39261022525284e-05, + "loss": 5.2367, + "step": 305800 + }, + { + "epoch": 1.3164974324318508, + "grad_norm": 1.6059223413467407, + "learning_rate": 8.392113093261432e-05, + "loss": 4.3723, + "step": 305850 + }, + { + "epoch": 1.3167126518911334, + "grad_norm": 2.13931941986084, + "learning_rate": 8.391615899134319e-05, + "loss": 4.7724, + "step": 305900 + }, + { + "epoch": 1.316927871350416, + "grad_norm": 2.6486763954162598, + "learning_rate": 8.391118642880615e-05, + "loss": 4.7601, + "step": 305950 + }, + { + "epoch": 1.3171430908096986, + "grad_norm": 2.0420753955841064, + "learning_rate": 8.390621324509426e-05, + "loss": 4.1692, + "step": 306000 + }, + { + "epoch": 1.3171430908096986, + "eval_loss": 5.085195064544678, + "eval_runtime": 35.1846, + "eval_samples_per_second": 18.19, + "eval_steps_per_second": 9.095, + "eval_tts_loss": 6.899363391335442, + "step": 306000 + }, + { + "epoch": 1.3173583102689812, + "grad_norm": 3.933044672012329, + "learning_rate": 8.390123944029862e-05, + "loss": 4.606, + "step": 306050 + }, + { + "epoch": 1.3175735297282638, + "grad_norm": 2.687304735183716, + "learning_rate": 8.389626501451033e-05, + "loss": 4.8119, + "step": 306100 + }, + { + "epoch": 1.3177887491875464, + "grad_norm": 2.04555082321167, + "learning_rate": 8.389128996782052e-05, + "loss": 4.0015, + "step": 306150 + }, + { + "epoch": 1.3180039686468292, + "grad_norm": 3.297322988510132, + "learning_rate": 8.388631430032032e-05, + "loss": 4.7446, + "step": 306200 + }, + { + "epoch": 1.3182191881061118, + "grad_norm": 2.303335189819336, + "learning_rate": 8.388133801210087e-05, + "loss": 4.6035, + "step": 306250 + }, + { + "epoch": 1.3184344075653944, + "grad_norm": 4.343925476074219, + "learning_rate": 8.387636110325332e-05, + "loss": 4.4771, + "step": 306300 + }, + { + "epoch": 1.318649627024677, + "grad_norm": 0.8763145804405212, + "learning_rate": 8.387138357386886e-05, + "loss": 4.5643, + "step": 306350 + }, + { + "epoch": 1.3188648464839596, + "grad_norm": 3.1465303897857666, + "learning_rate": 8.386640542403863e-05, + "loss": 4.3674, + "step": 306400 + }, + { + "epoch": 1.3190800659432425, + "grad_norm": 2.474740505218506, + "learning_rate": 8.386142665385384e-05, + "loss": 4.2871, + "step": 306450 + }, + { + "epoch": 1.319295285402525, + "grad_norm": 1.283050537109375, + "learning_rate": 8.385644726340567e-05, + "loss": 4.8109, + "step": 306500 + }, + { + "epoch": 1.3195105048618077, + "grad_norm": 2.648128032684326, + "learning_rate": 8.385146725278537e-05, + "loss": 4.2009, + "step": 306550 + }, + { + "epoch": 1.3197257243210903, + "grad_norm": 3.1038153171539307, + "learning_rate": 8.384648662208412e-05, + "loss": 4.1804, + "step": 306600 + }, + { + "epoch": 1.3199409437803729, + "grad_norm": 2.368666887283325, + "learning_rate": 8.384150537139317e-05, + "loss": 4.6399, + "step": 306650 + }, + { + "epoch": 1.3201561632396555, + "grad_norm": 3.310537338256836, + "learning_rate": 8.383652350080377e-05, + "loss": 4.9042, + "step": 306700 + }, + { + "epoch": 1.320371382698938, + "grad_norm": 3.082533359527588, + "learning_rate": 8.383154101040719e-05, + "loss": 4.5716, + "step": 306750 + }, + { + "epoch": 1.3205866021582207, + "grad_norm": 3.5327022075653076, + "learning_rate": 8.382655790029467e-05, + "loss": 4.9489, + "step": 306800 + }, + { + "epoch": 1.3208018216175033, + "grad_norm": 3.1694586277008057, + "learning_rate": 8.38215741705575e-05, + "loss": 4.6426, + "step": 306850 + }, + { + "epoch": 1.321017041076786, + "grad_norm": 6.732285499572754, + "learning_rate": 8.381658982128697e-05, + "loss": 4.5196, + "step": 306900 + }, + { + "epoch": 1.3212322605360687, + "grad_norm": 2.8801772594451904, + "learning_rate": 8.381160485257437e-05, + "loss": 4.0963, + "step": 306950 + }, + { + "epoch": 1.3214474799953513, + "grad_norm": 3.0390279293060303, + "learning_rate": 8.380661926451105e-05, + "loss": 4.6451, + "step": 307000 + }, + { + "epoch": 1.3216626994546339, + "grad_norm": 3.900115489959717, + "learning_rate": 8.380163305718831e-05, + "loss": 4.4702, + "step": 307050 + }, + { + "epoch": 1.3218779189139165, + "grad_norm": 1.2836724519729614, + "learning_rate": 8.379664623069748e-05, + "loss": 4.2445, + "step": 307100 + }, + { + "epoch": 1.322093138373199, + "grad_norm": 3.3143625259399414, + "learning_rate": 8.379165878512991e-05, + "loss": 4.7693, + "step": 307150 + }, + { + "epoch": 1.322308357832482, + "grad_norm": 1.7237197160720825, + "learning_rate": 8.378667072057695e-05, + "loss": 4.6168, + "step": 307200 + }, + { + "epoch": 1.3225235772917645, + "grad_norm": 2.747358560562134, + "learning_rate": 8.378168203713e-05, + "loss": 4.5024, + "step": 307250 + }, + { + "epoch": 1.322738796751047, + "grad_norm": 1.228512167930603, + "learning_rate": 8.37766927348804e-05, + "loss": 4.4649, + "step": 307300 + }, + { + "epoch": 1.3229540162103297, + "grad_norm": 2.5669186115264893, + "learning_rate": 8.377170281391959e-05, + "loss": 4.77, + "step": 307350 + }, + { + "epoch": 1.3231692356696123, + "grad_norm": 3.040313959121704, + "learning_rate": 8.376671227433894e-05, + "loss": 4.8465, + "step": 307400 + }, + { + "epoch": 1.323384455128895, + "grad_norm": 4.614471912384033, + "learning_rate": 8.376172111622988e-05, + "loss": 4.7326, + "step": 307450 + }, + { + "epoch": 1.3235996745881775, + "grad_norm": 1.4144479036331177, + "learning_rate": 8.375672933968381e-05, + "loss": 4.4425, + "step": 307500 + }, + { + "epoch": 1.3238148940474601, + "grad_norm": 2.3295738697052, + "learning_rate": 8.375173694479221e-05, + "loss": 4.7259, + "step": 307550 + }, + { + "epoch": 1.3240301135067427, + "grad_norm": 3.9467899799346924, + "learning_rate": 8.37467439316465e-05, + "loss": 4.732, + "step": 307600 + }, + { + "epoch": 1.3242453329660255, + "grad_norm": 0.749884307384491, + "learning_rate": 8.374175030033815e-05, + "loss": 4.4922, + "step": 307650 + }, + { + "epoch": 1.3244605524253081, + "grad_norm": 2.6406736373901367, + "learning_rate": 8.373675605095863e-05, + "loss": 4.8481, + "step": 307700 + }, + { + "epoch": 1.3246757718845907, + "grad_norm": 2.0671939849853516, + "learning_rate": 8.373176118359943e-05, + "loss": 4.6234, + "step": 307750 + }, + { + "epoch": 1.3248909913438733, + "grad_norm": 1.9638216495513916, + "learning_rate": 8.372676569835204e-05, + "loss": 4.5306, + "step": 307800 + }, + { + "epoch": 1.325106210803156, + "grad_norm": 2.901700019836426, + "learning_rate": 8.372176959530795e-05, + "loss": 5.0996, + "step": 307850 + }, + { + "epoch": 1.3253214302624385, + "grad_norm": 3.38617205619812, + "learning_rate": 8.37167728745587e-05, + "loss": 4.7077, + "step": 307900 + }, + { + "epoch": 1.3255366497217214, + "grad_norm": 3.094240427017212, + "learning_rate": 8.371177553619581e-05, + "loss": 4.6094, + "step": 307950 + }, + { + "epoch": 1.325751869181004, + "grad_norm": 1.8047672510147095, + "learning_rate": 8.370677758031081e-05, + "loss": 4.392, + "step": 308000 + }, + { + "epoch": 1.3259670886402866, + "grad_norm": 2.4859237670898438, + "learning_rate": 8.370177900699527e-05, + "loss": 4.5495, + "step": 308050 + }, + { + "epoch": 1.3261823080995692, + "grad_norm": 2.1836347579956055, + "learning_rate": 8.369677981634074e-05, + "loss": 4.3343, + "step": 308100 + }, + { + "epoch": 1.3263975275588518, + "grad_norm": 1.614544153213501, + "learning_rate": 8.369178000843877e-05, + "loss": 4.7109, + "step": 308150 + }, + { + "epoch": 1.3266127470181344, + "grad_norm": 2.7724387645721436, + "learning_rate": 8.3686779583381e-05, + "loss": 4.5518, + "step": 308200 + }, + { + "epoch": 1.326827966477417, + "grad_norm": 1.7900335788726807, + "learning_rate": 8.368177854125901e-05, + "loss": 4.8552, + "step": 308250 + }, + { + "epoch": 1.3270431859366996, + "grad_norm": 3.191295862197876, + "learning_rate": 8.367677688216437e-05, + "loss": 4.744, + "step": 308300 + }, + { + "epoch": 1.3272584053959822, + "grad_norm": 2.2700207233428955, + "learning_rate": 8.367177460618873e-05, + "loss": 4.5084, + "step": 308350 + }, + { + "epoch": 1.327473624855265, + "grad_norm": 3.258035659790039, + "learning_rate": 8.366677171342371e-05, + "loss": 4.0394, + "step": 308400 + }, + { + "epoch": 1.3276888443145476, + "grad_norm": 3.2204649448394775, + "learning_rate": 8.366176820396097e-05, + "loss": 4.5136, + "step": 308450 + }, + { + "epoch": 1.3279040637738302, + "grad_norm": 2.3910491466522217, + "learning_rate": 8.365676407789213e-05, + "loss": 4.5915, + "step": 308500 + }, + { + "epoch": 1.3281192832331128, + "grad_norm": 3.6881725788116455, + "learning_rate": 8.365175933530888e-05, + "loss": 3.9412, + "step": 308550 + }, + { + "epoch": 1.3283345026923954, + "grad_norm": 3.7994019985198975, + "learning_rate": 8.364675397630288e-05, + "loss": 4.6448, + "step": 308600 + }, + { + "epoch": 1.328549722151678, + "grad_norm": 2.466400146484375, + "learning_rate": 8.364174800096583e-05, + "loss": 4.7127, + "step": 308650 + }, + { + "epoch": 1.3287649416109608, + "grad_norm": 2.5052359104156494, + "learning_rate": 8.363674140938942e-05, + "loss": 4.7649, + "step": 308700 + }, + { + "epoch": 1.3289801610702434, + "grad_norm": 3.199462652206421, + "learning_rate": 8.363173420166537e-05, + "loss": 4.6405, + "step": 308750 + }, + { + "epoch": 1.329195380529526, + "grad_norm": 2.0157365798950195, + "learning_rate": 8.362672637788539e-05, + "loss": 5.0334, + "step": 308800 + }, + { + "epoch": 1.3294105999888086, + "grad_norm": 2.5788371562957764, + "learning_rate": 8.36217179381412e-05, + "loss": 4.8793, + "step": 308850 + }, + { + "epoch": 1.3296258194480912, + "grad_norm": 4.256916522979736, + "learning_rate": 8.361670888252457e-05, + "loss": 4.7667, + "step": 308900 + }, + { + "epoch": 1.3298410389073738, + "grad_norm": 1.560515284538269, + "learning_rate": 8.361169921112724e-05, + "loss": 4.3859, + "step": 308950 + }, + { + "epoch": 1.3300562583666564, + "grad_norm": 5.109623432159424, + "learning_rate": 8.360668892404098e-05, + "loss": 4.5815, + "step": 309000 + }, + { + "epoch": 1.3300562583666564, + "eval_loss": 5.076446056365967, + "eval_runtime": 34.909, + "eval_samples_per_second": 18.333, + "eval_steps_per_second": 9.167, + "eval_tts_loss": 6.918010526582643, + "step": 309000 + }, + { + "epoch": 1.330271477825939, + "grad_norm": 3.245652914047241, + "learning_rate": 8.360167802135755e-05, + "loss": 4.6581, + "step": 309050 + }, + { + "epoch": 1.3304866972852216, + "grad_norm": 3.7287089824676514, + "learning_rate": 8.359666650316877e-05, + "loss": 4.3088, + "step": 309100 + }, + { + "epoch": 1.3307019167445044, + "grad_norm": 2.607740640640259, + "learning_rate": 8.35916543695664e-05, + "loss": 4.9434, + "step": 309150 + }, + { + "epoch": 1.330917136203787, + "grad_norm": 1.6280027627944946, + "learning_rate": 8.358664162064229e-05, + "loss": 4.4037, + "step": 309200 + }, + { + "epoch": 1.3311323556630696, + "grad_norm": 0.6443197131156921, + "learning_rate": 8.358162825648823e-05, + "loss": 4.8763, + "step": 309250 + }, + { + "epoch": 1.3313475751223522, + "grad_norm": 2.251882314682007, + "learning_rate": 8.357661427719609e-05, + "loss": 4.6484, + "step": 309300 + }, + { + "epoch": 1.3315627945816348, + "grad_norm": 4.505273818969727, + "learning_rate": 8.357159968285767e-05, + "loss": 4.728, + "step": 309350 + }, + { + "epoch": 1.3317780140409174, + "grad_norm": 3.1548702716827393, + "learning_rate": 8.356658447356486e-05, + "loss": 4.4065, + "step": 309400 + }, + { + "epoch": 1.3319932335002003, + "grad_norm": 2.6846535205841064, + "learning_rate": 8.356156864940951e-05, + "loss": 5.1508, + "step": 309450 + }, + { + "epoch": 1.3322084529594829, + "grad_norm": 4.956014633178711, + "learning_rate": 8.355655221048351e-05, + "loss": 4.567, + "step": 309500 + }, + { + "epoch": 1.3324236724187655, + "grad_norm": 2.7625744342803955, + "learning_rate": 8.355153515687872e-05, + "loss": 4.4508, + "step": 309550 + }, + { + "epoch": 1.332638891878048, + "grad_norm": 4.412661552429199, + "learning_rate": 8.354651748868708e-05, + "loss": 4.7777, + "step": 309600 + }, + { + "epoch": 1.3328541113373307, + "grad_norm": 2.146510601043701, + "learning_rate": 8.354149920600049e-05, + "loss": 4.0909, + "step": 309650 + }, + { + "epoch": 1.3330693307966133, + "grad_norm": 2.941514253616333, + "learning_rate": 8.353648030891086e-05, + "loss": 4.5819, + "step": 309700 + }, + { + "epoch": 1.3332845502558959, + "grad_norm": 2.744055986404419, + "learning_rate": 8.353146079751014e-05, + "loss": 4.5635, + "step": 309750 + }, + { + "epoch": 1.3334997697151785, + "grad_norm": 2.7854905128479004, + "learning_rate": 8.352644067189028e-05, + "loss": 4.3521, + "step": 309800 + }, + { + "epoch": 1.333714989174461, + "grad_norm": 3.5479207038879395, + "learning_rate": 8.35214199321432e-05, + "loss": 4.4014, + "step": 309850 + }, + { + "epoch": 1.333930208633744, + "grad_norm": 2.407320022583008, + "learning_rate": 8.351639857836093e-05, + "loss": 4.6182, + "step": 309900 + }, + { + "epoch": 1.3341454280930265, + "grad_norm": 1.4819289445877075, + "learning_rate": 8.35113766106354e-05, + "loss": 4.938, + "step": 309950 + }, + { + "epoch": 1.334360647552309, + "grad_norm": 2.9989075660705566, + "learning_rate": 8.35063540290586e-05, + "loss": 4.9291, + "step": 310000 + }, + { + "epoch": 1.3345758670115917, + "grad_norm": 2.086756944656372, + "learning_rate": 8.350133083372256e-05, + "loss": 4.5085, + "step": 310050 + }, + { + "epoch": 1.3347910864708743, + "grad_norm": 3.012193441390991, + "learning_rate": 8.349630702471928e-05, + "loss": 4.4649, + "step": 310100 + }, + { + "epoch": 1.335006305930157, + "grad_norm": 2.5169780254364014, + "learning_rate": 8.349128260214079e-05, + "loss": 4.3916, + "step": 310150 + }, + { + "epoch": 1.3352215253894397, + "grad_norm": 2.30761981010437, + "learning_rate": 8.348625756607912e-05, + "loss": 4.5946, + "step": 310200 + }, + { + "epoch": 1.3354367448487223, + "grad_norm": 2.74033784866333, + "learning_rate": 8.348123191662631e-05, + "loss": 4.8018, + "step": 310250 + }, + { + "epoch": 1.335651964308005, + "grad_norm": 4.299282073974609, + "learning_rate": 8.347620565387444e-05, + "loss": 4.3589, + "step": 310300 + }, + { + "epoch": 1.3358671837672875, + "grad_norm": 2.7577717304229736, + "learning_rate": 8.347117877791557e-05, + "loss": 4.5254, + "step": 310350 + }, + { + "epoch": 1.3360824032265701, + "grad_norm": 2.1584224700927734, + "learning_rate": 8.346615128884177e-05, + "loss": 4.8447, + "step": 310400 + }, + { + "epoch": 1.3362976226858527, + "grad_norm": 2.58272123336792, + "learning_rate": 8.346112318674513e-05, + "loss": 4.4164, + "step": 310450 + }, + { + "epoch": 1.3365128421451353, + "grad_norm": 2.786668062210083, + "learning_rate": 8.345609447171776e-05, + "loss": 4.7728, + "step": 310500 + }, + { + "epoch": 1.336728061604418, + "grad_norm": 2.496610641479492, + "learning_rate": 8.345106514385179e-05, + "loss": 4.7544, + "step": 310550 + }, + { + "epoch": 1.3369432810637005, + "grad_norm": 2.2397937774658203, + "learning_rate": 8.344603520323933e-05, + "loss": 4.2413, + "step": 310600 + }, + { + "epoch": 1.3371585005229834, + "grad_norm": 1.9721094369888306, + "learning_rate": 8.344100464997254e-05, + "loss": 4.5808, + "step": 310650 + }, + { + "epoch": 1.337373719982266, + "grad_norm": 3.5657992362976074, + "learning_rate": 8.343597348414352e-05, + "loss": 4.3898, + "step": 310700 + }, + { + "epoch": 1.3375889394415486, + "grad_norm": 3.212224245071411, + "learning_rate": 8.343094170584447e-05, + "loss": 4.6775, + "step": 310750 + }, + { + "epoch": 1.3378041589008312, + "grad_norm": 2.406191825866699, + "learning_rate": 8.342590931516754e-05, + "loss": 4.9378, + "step": 310800 + }, + { + "epoch": 1.3380193783601138, + "grad_norm": 2.252808094024658, + "learning_rate": 8.342087631220493e-05, + "loss": 4.2553, + "step": 310850 + }, + { + "epoch": 1.3382345978193966, + "grad_norm": 3.3750874996185303, + "learning_rate": 8.34158426970488e-05, + "loss": 4.4538, + "step": 310900 + }, + { + "epoch": 1.3384498172786792, + "grad_norm": 3.1695077419281006, + "learning_rate": 8.341080846979141e-05, + "loss": 4.4626, + "step": 310950 + }, + { + "epoch": 1.3386650367379618, + "grad_norm": 5.65280818939209, + "learning_rate": 8.340577363052494e-05, + "loss": 4.7974, + "step": 311000 + }, + { + "epoch": 1.3388802561972444, + "grad_norm": 3.424224853515625, + "learning_rate": 8.34007381793416e-05, + "loss": 4.3839, + "step": 311050 + }, + { + "epoch": 1.339095475656527, + "grad_norm": 3.1837592124938965, + "learning_rate": 8.339570211633366e-05, + "loss": 4.7868, + "step": 311100 + }, + { + "epoch": 1.3393106951158096, + "grad_norm": 3.8975517749786377, + "learning_rate": 8.339066544159333e-05, + "loss": 4.9484, + "step": 311150 + }, + { + "epoch": 1.3395259145750922, + "grad_norm": 2.4979605674743652, + "learning_rate": 8.338562815521292e-05, + "loss": 4.568, + "step": 311200 + }, + { + "epoch": 1.3397411340343748, + "grad_norm": 1.4769121408462524, + "learning_rate": 8.338059025728469e-05, + "loss": 4.8891, + "step": 311250 + }, + { + "epoch": 1.3399563534936574, + "grad_norm": 2.776404619216919, + "learning_rate": 8.337555174790089e-05, + "loss": 4.627, + "step": 311300 + }, + { + "epoch": 1.34017157295294, + "grad_norm": 1.472396731376648, + "learning_rate": 8.337051262715383e-05, + "loss": 4.636, + "step": 311350 + }, + { + "epoch": 1.3403867924122228, + "grad_norm": 2.903010129928589, + "learning_rate": 8.336547289513584e-05, + "loss": 4.6827, + "step": 311400 + }, + { + "epoch": 1.3406020118715054, + "grad_norm": 2.8928961753845215, + "learning_rate": 8.336043255193919e-05, + "loss": 4.6995, + "step": 311450 + }, + { + "epoch": 1.340817231330788, + "grad_norm": 2.7604496479034424, + "learning_rate": 8.335539159765626e-05, + "loss": 4.5413, + "step": 311500 + }, + { + "epoch": 1.3410324507900706, + "grad_norm": 2.0921099185943604, + "learning_rate": 8.335035003237935e-05, + "loss": 4.5145, + "step": 311550 + }, + { + "epoch": 1.3412476702493532, + "grad_norm": 0.44096171855926514, + "learning_rate": 8.334530785620081e-05, + "loss": 4.6634, + "step": 311600 + }, + { + "epoch": 1.341462889708636, + "grad_norm": 1.3684492111206055, + "learning_rate": 8.334026506921302e-05, + "loss": 4.7917, + "step": 311650 + }, + { + "epoch": 1.3416781091679186, + "grad_norm": 0.655540943145752, + "learning_rate": 8.333522167150837e-05, + "loss": 4.583, + "step": 311700 + }, + { + "epoch": 1.3418933286272012, + "grad_norm": 4.282975196838379, + "learning_rate": 8.333017766317919e-05, + "loss": 4.685, + "step": 311750 + }, + { + "epoch": 1.3421085480864838, + "grad_norm": 2.2062454223632812, + "learning_rate": 8.332513304431792e-05, + "loss": 4.949, + "step": 311800 + }, + { + "epoch": 1.3423237675457664, + "grad_norm": 5.739840507507324, + "learning_rate": 8.332008781501693e-05, + "loss": 4.6781, + "step": 311850 + }, + { + "epoch": 1.342538987005049, + "grad_norm": 1.9207684993743896, + "learning_rate": 8.331504197536868e-05, + "loss": 4.6179, + "step": 311900 + }, + { + "epoch": 1.3427542064643316, + "grad_norm": 3.6176180839538574, + "learning_rate": 8.330999552546556e-05, + "loss": 4.584, + "step": 311950 + }, + { + "epoch": 1.3429694259236142, + "grad_norm": 0.9456440210342407, + "learning_rate": 8.330494846540002e-05, + "loss": 4.5265, + "step": 312000 + }, + { + "epoch": 1.3429694259236142, + "eval_loss": 5.07796573638916, + "eval_runtime": 35.1013, + "eval_samples_per_second": 18.233, + "eval_steps_per_second": 9.116, + "eval_tts_loss": 6.860268785550041, + "step": 312000 + } + ], + "logging_steps": 50, + "max_steps": 1161605, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 3000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6.387432161196114e+17, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}