diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,76396 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.597107438016529, + "eval_steps": 3000, + "global_step": 534000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00043044077134986227, + "grad_norm": 71.5, + "learning_rate": 9.800000000000001e-06, + "loss": 18.2106, + "step": 50 + }, + { + "epoch": 0.0008608815426997245, + "grad_norm": 5.375, + "learning_rate": 1.9800000000000004e-05, + "loss": 12.5337, + "step": 100 + }, + { + "epoch": 0.0012913223140495868, + "grad_norm": 7.34375, + "learning_rate": 2.98e-05, + "loss": 11.224, + "step": 150 + }, + { + "epoch": 0.001721763085399449, + "grad_norm": 12.125, + "learning_rate": 3.9800000000000005e-05, + "loss": 8.4094, + "step": 200 + }, + { + "epoch": 0.0021522038567493114, + "grad_norm": 10.375, + "learning_rate": 4.9800000000000004e-05, + "loss": 7.4048, + "step": 250 + }, + { + "epoch": 0.0025826446280991736, + "grad_norm": 6.09375, + "learning_rate": 5.9800000000000003e-05, + "loss": 7.1413, + "step": 300 + }, + { + "epoch": 0.003013085399449036, + "grad_norm": 2.421875, + "learning_rate": 6.98e-05, + "loss": 6.9154, + "step": 350 + }, + { + "epoch": 0.003443526170798898, + "grad_norm": 3.296875, + "learning_rate": 7.98e-05, + "loss": 6.9162, + "step": 400 + }, + { + "epoch": 0.0038739669421487604, + "grad_norm": 1.8671875, + "learning_rate": 8.98e-05, + "loss": 6.6844, + "step": 450 + }, + { + "epoch": 0.004304407713498623, + "grad_norm": 3.375, + "learning_rate": 9.98e-05, + "loss": 6.5678, + "step": 500 + }, + { + "epoch": 0.004734848484848485, + "grad_norm": 6.5, + "learning_rate": 0.00010980000000000001, + "loss": 7.1726, + "step": 550 + }, + { + "epoch": 0.005165289256198347, + "grad_norm": 3.53125, + "learning_rate": 0.0001198, + "loss": 6.5573, + "step": 600 + }, + { + "epoch": 0.0055957300275482095, + "grad_norm": 3.59375, + "learning_rate": 0.0001298, + "loss": 6.5085, + "step": 650 + }, + { + "epoch": 0.006026170798898072, + "grad_norm": 2.984375, + "learning_rate": 0.0001398, + "loss": 6.0831, + "step": 700 + }, + { + "epoch": 0.006456611570247934, + "grad_norm": 1.9921875, + "learning_rate": 0.0001498, + "loss": 7.2968, + "step": 750 + }, + { + "epoch": 0.006887052341597796, + "grad_norm": 3.984375, + "learning_rate": 0.0001598, + "loss": 6.4507, + "step": 800 + }, + { + "epoch": 0.007317493112947659, + "grad_norm": 0.85546875, + "learning_rate": 0.0001698, + "loss": 6.6401, + "step": 850 + }, + { + "epoch": 0.007747933884297521, + "grad_norm": 1.546875, + "learning_rate": 0.0001798, + "loss": 6.4949, + "step": 900 + }, + { + "epoch": 0.008178374655647382, + "grad_norm": 2.625, + "learning_rate": 0.0001898, + "loss": 6.7644, + "step": 950 + }, + { + "epoch": 0.008608815426997245, + "grad_norm": 4.0625, + "learning_rate": 0.0001998, + "loss": 6.7678, + "step": 1000 + }, + { + "epoch": 0.009039256198347107, + "grad_norm": 3.296875, + "learning_rate": 0.00019999999647543534, + "loss": 6.8504, + "step": 1050 + }, + { + "epoch": 0.00946969696969697, + "grad_norm": 2.03125, + "learning_rate": 0.00019999998561255402, + "loss": 6.9308, + "step": 1100 + }, + { + "epoch": 0.009900137741046831, + "grad_norm": 1.5234375, + "learning_rate": 0.0001999999674098889, + "loss": 6.5577, + "step": 1150 + }, + { + "epoch": 0.010330578512396695, + "grad_norm": 1.1328125, + "learning_rate": 0.00019999994186744137, + "loss": 6.6297, + "step": 1200 + }, + { + "epoch": 0.010761019283746556, + "grad_norm": 2.4375, + "learning_rate": 0.00019999990898521324, + "loss": 6.6041, + "step": 1250 + }, + { + "epoch": 0.011191460055096419, + "grad_norm": 2.890625, + "learning_rate": 0.00019999986876320693, + "loss": 6.7533, + "step": 1300 + }, + { + "epoch": 0.01162190082644628, + "grad_norm": 0.625, + "learning_rate": 0.00019999982120142543, + "loss": 6.3455, + "step": 1350 + }, + { + "epoch": 0.012052341597796144, + "grad_norm": 2.78125, + "learning_rate": 0.0001999997662998722, + "loss": 6.4072, + "step": 1400 + }, + { + "epoch": 0.012482782369146005, + "grad_norm": 2.21875, + "learning_rate": 0.00019999970405855128, + "loss": 6.1638, + "step": 1450 + }, + { + "epoch": 0.012913223140495868, + "grad_norm": 1.6484375, + "learning_rate": 0.00019999963447746722, + "loss": 6.7652, + "step": 1500 + }, + { + "epoch": 0.01334366391184573, + "grad_norm": 0.953125, + "learning_rate": 0.00019999955755662518, + "loss": 6.2853, + "step": 1550 + }, + { + "epoch": 0.013774104683195593, + "grad_norm": 2.359375, + "learning_rate": 0.00019999947329603078, + "loss": 6.5354, + "step": 1600 + }, + { + "epoch": 0.014204545454545454, + "grad_norm": 3.59375, + "learning_rate": 0.00019999938169569016, + "loss": 6.4226, + "step": 1650 + }, + { + "epoch": 0.014634986225895317, + "grad_norm": 1.78125, + "learning_rate": 0.00019999928275561007, + "loss": 6.2953, + "step": 1700 + }, + { + "epoch": 0.015065426997245179, + "grad_norm": 3.46875, + "learning_rate": 0.0001999991764757978, + "loss": 6.4832, + "step": 1750 + }, + { + "epoch": 0.015495867768595042, + "grad_norm": 1.53125, + "learning_rate": 0.0001999990628562611, + "loss": 6.1984, + "step": 1800 + }, + { + "epoch": 0.015926308539944905, + "grad_norm": 1.0234375, + "learning_rate": 0.0001999989418970084, + "loss": 6.6464, + "step": 1850 + }, + { + "epoch": 0.016356749311294765, + "grad_norm": 2.5, + "learning_rate": 0.00019999881359804848, + "loss": 6.5281, + "step": 1900 + }, + { + "epoch": 0.016787190082644628, + "grad_norm": 2.265625, + "learning_rate": 0.00019999867795939078, + "loss": 5.9519, + "step": 1950 + }, + { + "epoch": 0.01721763085399449, + "grad_norm": 1.7734375, + "learning_rate": 0.00019999853498104528, + "loss": 6.4505, + "step": 2000 + }, + { + "epoch": 0.017648071625344354, + "grad_norm": 1.9921875, + "learning_rate": 0.00019999838466302247, + "loss": 7.2526, + "step": 2050 + }, + { + "epoch": 0.018078512396694214, + "grad_norm": 1.484375, + "learning_rate": 0.0001999982270053334, + "loss": 6.2714, + "step": 2100 + }, + { + "epoch": 0.018508953168044077, + "grad_norm": 1.8671875, + "learning_rate": 0.0001999980620079896, + "loss": 6.3783, + "step": 2150 + }, + { + "epoch": 0.01893939393939394, + "grad_norm": 1.109375, + "learning_rate": 0.0001999978896710032, + "loss": 6.0807, + "step": 2200 + }, + { + "epoch": 0.019369834710743803, + "grad_norm": 2.859375, + "learning_rate": 0.00019999770999438684, + "loss": 6.0694, + "step": 2250 + }, + { + "epoch": 0.019800275482093663, + "grad_norm": 2.046875, + "learning_rate": 0.00019999752297815372, + "loss": 6.9441, + "step": 2300 + }, + { + "epoch": 0.020230716253443526, + "grad_norm": 1.140625, + "learning_rate": 0.00019999732862231758, + "loss": 6.1499, + "step": 2350 + }, + { + "epoch": 0.02066115702479339, + "grad_norm": 2.328125, + "learning_rate": 0.00019999712692689265, + "loss": 6.6325, + "step": 2400 + }, + { + "epoch": 0.021091597796143252, + "grad_norm": 1.203125, + "learning_rate": 0.00019999691789189376, + "loss": 6.3922, + "step": 2450 + }, + { + "epoch": 0.021522038567493112, + "grad_norm": 2.859375, + "learning_rate": 0.00019999670151733625, + "loss": 6.5116, + "step": 2500 + }, + { + "epoch": 0.021952479338842975, + "grad_norm": 2.3125, + "learning_rate": 0.00019999647780323597, + "loss": 6.0381, + "step": 2550 + }, + { + "epoch": 0.022382920110192838, + "grad_norm": 2.8125, + "learning_rate": 0.0001999962467496094, + "loss": 6.0474, + "step": 2600 + }, + { + "epoch": 0.0228133608815427, + "grad_norm": 2.171875, + "learning_rate": 0.00019999600835647347, + "loss": 6.3361, + "step": 2650 + }, + { + "epoch": 0.02324380165289256, + "grad_norm": 1.40625, + "learning_rate": 0.00019999576262384568, + "loss": 6.4502, + "step": 2700 + }, + { + "epoch": 0.023674242424242424, + "grad_norm": 1.3359375, + "learning_rate": 0.00019999550955174402, + "loss": 6.2025, + "step": 2750 + }, + { + "epoch": 0.024104683195592287, + "grad_norm": 1.5859375, + "learning_rate": 0.00019999524914018712, + "loss": 6.3278, + "step": 2800 + }, + { + "epoch": 0.02453512396694215, + "grad_norm": 2.296875, + "learning_rate": 0.0001999949813891941, + "loss": 6.0009, + "step": 2850 + }, + { + "epoch": 0.02496556473829201, + "grad_norm": 1.125, + "learning_rate": 0.0001999947062987846, + "loss": 6.1766, + "step": 2900 + }, + { + "epoch": 0.025396005509641873, + "grad_norm": 2.34375, + "learning_rate": 0.00019999442386897876, + "loss": 5.6974, + "step": 2950 + }, + { + "epoch": 0.025826446280991736, + "grad_norm": 2.0, + "learning_rate": 0.00019999413409979737, + "loss": 6.0692, + "step": 3000 + }, + { + "epoch": 0.025826446280991736, + "eval_loss": 6.841164588928223, + "eval_runtime": 22.2026, + "eval_samples_per_second": 28.825, + "eval_steps_per_second": 14.413, + "eval_tts_loss": 5.814084659717215, + "step": 3000 + }, + { + "epoch": 0.0262568870523416, + "grad_norm": 2.40625, + "learning_rate": 0.00019999383699126168, + "loss": 6.3387, + "step": 3050 + }, + { + "epoch": 0.02668732782369146, + "grad_norm": 2.265625, + "learning_rate": 0.00019999353254339348, + "loss": 6.078, + "step": 3100 + }, + { + "epoch": 0.027117768595041322, + "grad_norm": 3.21875, + "learning_rate": 0.00019999322075621513, + "loss": 6.05, + "step": 3150 + }, + { + "epoch": 0.027548209366391185, + "grad_norm": 1.890625, + "learning_rate": 0.00019999290162974959, + "loss": 5.7936, + "step": 3200 + }, + { + "epoch": 0.02797865013774105, + "grad_norm": 1.296875, + "learning_rate": 0.00019999257516402014, + "loss": 5.9983, + "step": 3250 + }, + { + "epoch": 0.028409090909090908, + "grad_norm": 2.09375, + "learning_rate": 0.00019999224135905082, + "loss": 5.9586, + "step": 3300 + }, + { + "epoch": 0.02883953168044077, + "grad_norm": 0.890625, + "learning_rate": 0.00019999190021486611, + "loss": 5.5419, + "step": 3350 + }, + { + "epoch": 0.029269972451790634, + "grad_norm": 3.71875, + "learning_rate": 0.00019999155173149112, + "loss": 5.386, + "step": 3400 + }, + { + "epoch": 0.029700413223140498, + "grad_norm": 1.6484375, + "learning_rate": 0.0001999911959089513, + "loss": 6.187, + "step": 3450 + }, + { + "epoch": 0.030130853994490357, + "grad_norm": 1.8203125, + "learning_rate": 0.0001999908327472729, + "loss": 6.3146, + "step": 3500 + }, + { + "epoch": 0.03056129476584022, + "grad_norm": 2.109375, + "learning_rate": 0.00019999046224648248, + "loss": 6.4992, + "step": 3550 + }, + { + "epoch": 0.030991735537190084, + "grad_norm": 3.046875, + "learning_rate": 0.00019999008440660726, + "loss": 5.8337, + "step": 3600 + }, + { + "epoch": 0.03142217630853995, + "grad_norm": 2.28125, + "learning_rate": 0.000199989699227675, + "loss": 5.7526, + "step": 3650 + }, + { + "epoch": 0.03185261707988981, + "grad_norm": 2.6875, + "learning_rate": 0.0001999893067097139, + "loss": 6.2363, + "step": 3700 + }, + { + "epoch": 0.032283057851239666, + "grad_norm": 3.078125, + "learning_rate": 0.0001999889068527529, + "loss": 6.3704, + "step": 3750 + }, + { + "epoch": 0.03271349862258953, + "grad_norm": 1.3671875, + "learning_rate": 0.00019998849965682122, + "loss": 5.7546, + "step": 3800 + }, + { + "epoch": 0.03314393939393939, + "grad_norm": 3.796875, + "learning_rate": 0.0001999880851219488, + "loss": 6.3607, + "step": 3850 + }, + { + "epoch": 0.033574380165289255, + "grad_norm": 2.453125, + "learning_rate": 0.00019998766324816607, + "loss": 6.3463, + "step": 3900 + }, + { + "epoch": 0.03400482093663912, + "grad_norm": 2.65625, + "learning_rate": 0.00019998723403550397, + "loss": 5.8813, + "step": 3950 + }, + { + "epoch": 0.03443526170798898, + "grad_norm": 1.8203125, + "learning_rate": 0.00019998679748399405, + "loss": 5.7699, + "step": 4000 + }, + { + "epoch": 0.034865702479338845, + "grad_norm": 1.3125, + "learning_rate": 0.0001999863535936683, + "loss": 5.9576, + "step": 4050 + }, + { + "epoch": 0.03529614325068871, + "grad_norm": 2.515625, + "learning_rate": 0.00019998590236455934, + "loss": 6.3861, + "step": 4100 + }, + { + "epoch": 0.035726584022038564, + "grad_norm": 1.40625, + "learning_rate": 0.00019998544379670023, + "loss": 5.977, + "step": 4150 + }, + { + "epoch": 0.03615702479338843, + "grad_norm": 3.390625, + "learning_rate": 0.00019998497789012473, + "loss": 5.6115, + "step": 4200 + }, + { + "epoch": 0.03658746556473829, + "grad_norm": 2.109375, + "learning_rate": 0.0001999845046448669, + "loss": 5.62, + "step": 4250 + }, + { + "epoch": 0.037017906336088154, + "grad_norm": 1.9375, + "learning_rate": 0.0001999840240609616, + "loss": 5.4771, + "step": 4300 + }, + { + "epoch": 0.03744834710743802, + "grad_norm": 2.953125, + "learning_rate": 0.00019998353613844406, + "loss": 5.2885, + "step": 4350 + }, + { + "epoch": 0.03787878787878788, + "grad_norm": 1.2421875, + "learning_rate": 0.0001999830408773501, + "loss": 5.4431, + "step": 4400 + }, + { + "epoch": 0.03830922865013774, + "grad_norm": 1.1875, + "learning_rate": 0.00019998253827771602, + "loss": 5.2422, + "step": 4450 + }, + { + "epoch": 0.038739669421487606, + "grad_norm": 1.28125, + "learning_rate": 0.00019998202833957875, + "loss": 5.8202, + "step": 4500 + }, + { + "epoch": 0.03917011019283746, + "grad_norm": 2.484375, + "learning_rate": 0.00019998151106297575, + "loss": 5.9591, + "step": 4550 + }, + { + "epoch": 0.039600550964187325, + "grad_norm": 0.67578125, + "learning_rate": 0.00019998098644794494, + "loss": 5.626, + "step": 4600 + }, + { + "epoch": 0.04003099173553719, + "grad_norm": 2.234375, + "learning_rate": 0.00019998045449452483, + "loss": 5.3955, + "step": 4650 + }, + { + "epoch": 0.04046143250688705, + "grad_norm": 0.69140625, + "learning_rate": 0.0001999799152027545, + "loss": 5.6826, + "step": 4700 + }, + { + "epoch": 0.040891873278236915, + "grad_norm": 2.4375, + "learning_rate": 0.00019997936857267344, + "loss": 6.3792, + "step": 4750 + }, + { + "epoch": 0.04132231404958678, + "grad_norm": 2.765625, + "learning_rate": 0.00019997881460432192, + "loss": 5.5136, + "step": 4800 + }, + { + "epoch": 0.04175275482093664, + "grad_norm": 2.296875, + "learning_rate": 0.00019997825329774046, + "loss": 5.5313, + "step": 4850 + }, + { + "epoch": 0.042183195592286504, + "grad_norm": 1.1796875, + "learning_rate": 0.00019997768465297036, + "loss": 5.5242, + "step": 4900 + }, + { + "epoch": 0.04261363636363636, + "grad_norm": 2.234375, + "learning_rate": 0.00019997710867005326, + "loss": 5.4816, + "step": 4950 + }, + { + "epoch": 0.043044077134986224, + "grad_norm": 2.09375, + "learning_rate": 0.00019997652534903154, + "loss": 5.5978, + "step": 5000 + }, + { + "epoch": 0.04347451790633609, + "grad_norm": 1.9921875, + "learning_rate": 0.00019997593468994794, + "loss": 5.6367, + "step": 5050 + }, + { + "epoch": 0.04390495867768595, + "grad_norm": 2.640625, + "learning_rate": 0.00019997533669284585, + "loss": 5.5829, + "step": 5100 + }, + { + "epoch": 0.04433539944903581, + "grad_norm": 2.453125, + "learning_rate": 0.00019997473135776914, + "loss": 5.4658, + "step": 5150 + }, + { + "epoch": 0.044765840220385676, + "grad_norm": 1.8984375, + "learning_rate": 0.00019997411868476225, + "loss": 5.6926, + "step": 5200 + }, + { + "epoch": 0.04519628099173554, + "grad_norm": 2.359375, + "learning_rate": 0.00019997349867387017, + "loss": 5.8453, + "step": 5250 + }, + { + "epoch": 0.0456267217630854, + "grad_norm": 1.5078125, + "learning_rate": 0.00019997287132513835, + "loss": 5.5398, + "step": 5300 + }, + { + "epoch": 0.04605716253443526, + "grad_norm": 2.03125, + "learning_rate": 0.0001999722366386129, + "loss": 5.5623, + "step": 5350 + }, + { + "epoch": 0.04648760330578512, + "grad_norm": 1.578125, + "learning_rate": 0.00019997159461434034, + "loss": 5.9666, + "step": 5400 + }, + { + "epoch": 0.046918044077134985, + "grad_norm": 2.40625, + "learning_rate": 0.00019997094525236786, + "loss": 5.7107, + "step": 5450 + }, + { + "epoch": 0.04734848484848485, + "grad_norm": 3.265625, + "learning_rate": 0.00019997028855274309, + "loss": 5.5953, + "step": 5500 + }, + { + "epoch": 0.04777892561983471, + "grad_norm": 2.578125, + "learning_rate": 0.0001999696245155142, + "loss": 5.4392, + "step": 5550 + }, + { + "epoch": 0.048209366391184574, + "grad_norm": 2.046875, + "learning_rate": 0.00019996895314073002, + "loss": 5.6254, + "step": 5600 + }, + { + "epoch": 0.04863980716253444, + "grad_norm": 1.8828125, + "learning_rate": 0.00019996827442843974, + "loss": 5.3142, + "step": 5650 + }, + { + "epoch": 0.0490702479338843, + "grad_norm": 2.109375, + "learning_rate": 0.0001999675883786932, + "loss": 5.831, + "step": 5700 + }, + { + "epoch": 0.04950068870523416, + "grad_norm": 2.1875, + "learning_rate": 0.00019996689499154075, + "loss": 5.4863, + "step": 5750 + }, + { + "epoch": 0.04993112947658402, + "grad_norm": 2.671875, + "learning_rate": 0.0001999661942670333, + "loss": 5.5451, + "step": 5800 + }, + { + "epoch": 0.05036157024793388, + "grad_norm": 2.890625, + "learning_rate": 0.0001999654862052223, + "loss": 6.0222, + "step": 5850 + }, + { + "epoch": 0.050792011019283746, + "grad_norm": 2.125, + "learning_rate": 0.00019996477080615966, + "loss": 5.6131, + "step": 5900 + }, + { + "epoch": 0.05122245179063361, + "grad_norm": 3.609375, + "learning_rate": 0.00019996404806989793, + "loss": 5.7678, + "step": 5950 + }, + { + "epoch": 0.05165289256198347, + "grad_norm": 0.89453125, + "learning_rate": 0.00019996331799649014, + "loss": 5.6118, + "step": 6000 + }, + { + "epoch": 0.05165289256198347, + "eval_loss": 6.263210296630859, + "eval_runtime": 21.7431, + "eval_samples_per_second": 29.435, + "eval_steps_per_second": 14.717, + "eval_tts_loss": 5.9185563337741085, + "step": 6000 + }, + { + "epoch": 0.052083333333333336, + "grad_norm": 3.578125, + "learning_rate": 0.00019996258058598988, + "loss": 6.1941, + "step": 6050 + }, + { + "epoch": 0.0525137741046832, + "grad_norm": 2.015625, + "learning_rate": 0.00019996183583845131, + "loss": 5.6182, + "step": 6100 + }, + { + "epoch": 0.052944214876033055, + "grad_norm": 1.6328125, + "learning_rate": 0.00019996108375392905, + "loss": 4.9397, + "step": 6150 + }, + { + "epoch": 0.05337465564738292, + "grad_norm": 2.296875, + "learning_rate": 0.0001999603243324783, + "loss": 5.3367, + "step": 6200 + }, + { + "epoch": 0.05380509641873278, + "grad_norm": 3.015625, + "learning_rate": 0.00019995955757415486, + "loss": 5.5759, + "step": 6250 + }, + { + "epoch": 0.054235537190082644, + "grad_norm": 0.9765625, + "learning_rate": 0.0001999587834790149, + "loss": 5.4017, + "step": 6300 + }, + { + "epoch": 0.05466597796143251, + "grad_norm": 1.6328125, + "learning_rate": 0.00019995800204711538, + "loss": 5.6686, + "step": 6350 + }, + { + "epoch": 0.05509641873278237, + "grad_norm": 2.8125, + "learning_rate": 0.0001999572132785135, + "loss": 5.6747, + "step": 6400 + }, + { + "epoch": 0.055526859504132234, + "grad_norm": 1.8359375, + "learning_rate": 0.00019995641717326725, + "loss": 6.1438, + "step": 6450 + }, + { + "epoch": 0.0559573002754821, + "grad_norm": 2.40625, + "learning_rate": 0.00019995561373143506, + "loss": 5.768, + "step": 6500 + }, + { + "epoch": 0.05638774104683195, + "grad_norm": 2.796875, + "learning_rate": 0.00019995480295307587, + "loss": 5.3983, + "step": 6550 + }, + { + "epoch": 0.056818181818181816, + "grad_norm": 2.234375, + "learning_rate": 0.00019995398483824917, + "loss": 5.8767, + "step": 6600 + }, + { + "epoch": 0.05724862258953168, + "grad_norm": 1.8515625, + "learning_rate": 0.0001999531593870151, + "loss": 5.8295, + "step": 6650 + }, + { + "epoch": 0.05767906336088154, + "grad_norm": 1.09375, + "learning_rate": 0.00019995232659943411, + "loss": 5.8067, + "step": 6700 + }, + { + "epoch": 0.058109504132231406, + "grad_norm": 2.875, + "learning_rate": 0.00019995148647556748, + "loss": 5.8018, + "step": 6750 + }, + { + "epoch": 0.05853994490358127, + "grad_norm": 2.546875, + "learning_rate": 0.00019995063901547674, + "loss": 5.6411, + "step": 6800 + }, + { + "epoch": 0.05897038567493113, + "grad_norm": 1.9140625, + "learning_rate": 0.00019994978421922417, + "loss": 5.5583, + "step": 6850 + }, + { + "epoch": 0.059400826446280995, + "grad_norm": 0.8984375, + "learning_rate": 0.0001999489220868725, + "loss": 5.5745, + "step": 6900 + }, + { + "epoch": 0.05983126721763085, + "grad_norm": 2.203125, + "learning_rate": 0.00019994805261848496, + "loss": 5.3674, + "step": 6950 + }, + { + "epoch": 0.060261707988980714, + "grad_norm": 1.9609375, + "learning_rate": 0.0001999471758141254, + "loss": 4.8269, + "step": 7000 + }, + { + "epoch": 0.06069214876033058, + "grad_norm": 2.84375, + "learning_rate": 0.00019994629167385824, + "loss": 5.3259, + "step": 7050 + }, + { + "epoch": 0.06112258953168044, + "grad_norm": 2.25, + "learning_rate": 0.00019994540019774824, + "loss": 5.4702, + "step": 7100 + }, + { + "epoch": 0.061553030303030304, + "grad_norm": 2.359375, + "learning_rate": 0.0001999445013858609, + "loss": 5.229, + "step": 7150 + }, + { + "epoch": 0.06198347107438017, + "grad_norm": 1.03125, + "learning_rate": 0.00019994359523826225, + "loss": 5.6542, + "step": 7200 + }, + { + "epoch": 0.06241391184573003, + "grad_norm": 3.34375, + "learning_rate": 0.00019994268175501872, + "loss": 5.5749, + "step": 7250 + }, + { + "epoch": 0.0628443526170799, + "grad_norm": 1.6328125, + "learning_rate": 0.00019994176093619736, + "loss": 5.0867, + "step": 7300 + }, + { + "epoch": 0.06327479338842976, + "grad_norm": 1.4765625, + "learning_rate": 0.0001999408327818658, + "loss": 5.463, + "step": 7350 + }, + { + "epoch": 0.06370523415977962, + "grad_norm": 1.7890625, + "learning_rate": 0.00019993989729209216, + "loss": 5.404, + "step": 7400 + }, + { + "epoch": 0.06413567493112948, + "grad_norm": 2.015625, + "learning_rate": 0.00019993895446694505, + "loss": 5.2026, + "step": 7450 + }, + { + "epoch": 0.06456611570247933, + "grad_norm": 1.4375, + "learning_rate": 0.00019993800430649373, + "loss": 5.4745, + "step": 7500 + }, + { + "epoch": 0.0649965564738292, + "grad_norm": 1.1875, + "learning_rate": 0.00019993704681080788, + "loss": 5.2587, + "step": 7550 + }, + { + "epoch": 0.06542699724517906, + "grad_norm": 2.96875, + "learning_rate": 0.00019993608197995785, + "loss": 5.2917, + "step": 7600 + }, + { + "epoch": 0.06585743801652892, + "grad_norm": 1.0390625, + "learning_rate": 0.00019993510981401442, + "loss": 5.5237, + "step": 7650 + }, + { + "epoch": 0.06628787878787878, + "grad_norm": 3.6875, + "learning_rate": 0.00019993413031304893, + "loss": 5.6417, + "step": 7700 + }, + { + "epoch": 0.06671831955922865, + "grad_norm": 2.1875, + "learning_rate": 0.0001999331434771333, + "loss": 5.3945, + "step": 7750 + }, + { + "epoch": 0.06714876033057851, + "grad_norm": 0.984375, + "learning_rate": 0.00019993214930633994, + "loss": 4.9617, + "step": 7800 + }, + { + "epoch": 0.06757920110192837, + "grad_norm": 2.140625, + "learning_rate": 0.0001999311478007418, + "loss": 5.226, + "step": 7850 + }, + { + "epoch": 0.06800964187327824, + "grad_norm": 2.3125, + "learning_rate": 0.0001999301389604125, + "loss": 5.7361, + "step": 7900 + }, + { + "epoch": 0.0684400826446281, + "grad_norm": 2.0625, + "learning_rate": 0.00019992912278542595, + "loss": 5.7224, + "step": 7950 + }, + { + "epoch": 0.06887052341597796, + "grad_norm": 3.296875, + "learning_rate": 0.0001999280992758568, + "loss": 5.4668, + "step": 8000 + }, + { + "epoch": 0.06930096418732783, + "grad_norm": 2.546875, + "learning_rate": 0.00019992706843178018, + "loss": 5.6261, + "step": 8050 + }, + { + "epoch": 0.06973140495867769, + "grad_norm": 2.9375, + "learning_rate": 0.0001999260302532717, + "loss": 5.6339, + "step": 8100 + }, + { + "epoch": 0.07016184573002755, + "grad_norm": 1.3203125, + "learning_rate": 0.0001999249847404076, + "loss": 5.1293, + "step": 8150 + }, + { + "epoch": 0.07059228650137742, + "grad_norm": 1.9296875, + "learning_rate": 0.00019992393189326464, + "loss": 5.2342, + "step": 8200 + }, + { + "epoch": 0.07102272727272728, + "grad_norm": 1.984375, + "learning_rate": 0.00019992287171192007, + "loss": 5.1224, + "step": 8250 + }, + { + "epoch": 0.07145316804407713, + "grad_norm": 2.890625, + "learning_rate": 0.0001999218041964517, + "loss": 5.6905, + "step": 8300 + }, + { + "epoch": 0.07188360881542699, + "grad_norm": 1.8984375, + "learning_rate": 0.00019992072934693788, + "loss": 5.5532, + "step": 8350 + }, + { + "epoch": 0.07231404958677685, + "grad_norm": 2.75, + "learning_rate": 0.00019991964716345752, + "loss": 5.9902, + "step": 8400 + }, + { + "epoch": 0.07274449035812672, + "grad_norm": 2.078125, + "learning_rate": 0.00019991855764609005, + "loss": 5.455, + "step": 8450 + }, + { + "epoch": 0.07317493112947658, + "grad_norm": 1.2109375, + "learning_rate": 0.0001999174607949154, + "loss": 5.5104, + "step": 8500 + }, + { + "epoch": 0.07360537190082644, + "grad_norm": 1.7890625, + "learning_rate": 0.00019991635661001413, + "loss": 5.6288, + "step": 8550 + }, + { + "epoch": 0.07403581267217631, + "grad_norm": 1.21875, + "learning_rate": 0.00019991524509146725, + "loss": 5.6406, + "step": 8600 + }, + { + "epoch": 0.07446625344352617, + "grad_norm": 2.421875, + "learning_rate": 0.00019991412623935638, + "loss": 5.186, + "step": 8650 + }, + { + "epoch": 0.07489669421487603, + "grad_norm": 2.390625, + "learning_rate": 0.00019991300005376357, + "loss": 5.3564, + "step": 8700 + }, + { + "epoch": 0.0753271349862259, + "grad_norm": 3.46875, + "learning_rate": 0.00019991186653477155, + "loss": 5.5007, + "step": 8750 + }, + { + "epoch": 0.07575757575757576, + "grad_norm": 2.0, + "learning_rate": 0.0001999107256824635, + "loss": 5.2855, + "step": 8800 + }, + { + "epoch": 0.07618801652892562, + "grad_norm": 1.890625, + "learning_rate": 0.00019990957749692313, + "loss": 5.382, + "step": 8850 + }, + { + "epoch": 0.07661845730027549, + "grad_norm": 1.8046875, + "learning_rate": 0.00019990842197823474, + "loss": 5.5644, + "step": 8900 + }, + { + "epoch": 0.07704889807162535, + "grad_norm": 2.125, + "learning_rate": 0.00019990725912648314, + "loss": 5.2274, + "step": 8950 + }, + { + "epoch": 0.07747933884297521, + "grad_norm": 1.75, + "learning_rate": 0.00019990608894175367, + "loss": 5.3645, + "step": 9000 + }, + { + "epoch": 0.07747933884297521, + "eval_loss": 6.056007385253906, + "eval_runtime": 21.6664, + "eval_samples_per_second": 29.539, + "eval_steps_per_second": 14.769, + "eval_tts_loss": 6.075921739670477, + "step": 9000 + }, + { + "epoch": 0.07790977961432508, + "grad_norm": 3.390625, + "learning_rate": 0.0001999049114241322, + "loss": 5.5031, + "step": 9050 + }, + { + "epoch": 0.07834022038567492, + "grad_norm": 2.234375, + "learning_rate": 0.00019990372657370523, + "loss": 5.5828, + "step": 9100 + }, + { + "epoch": 0.07877066115702479, + "grad_norm": 1.8671875, + "learning_rate": 0.00019990253439055965, + "loss": 5.2726, + "step": 9150 + }, + { + "epoch": 0.07920110192837465, + "grad_norm": 2.375, + "learning_rate": 0.000199901334874783, + "loss": 4.9601, + "step": 9200 + }, + { + "epoch": 0.07963154269972451, + "grad_norm": 1.984375, + "learning_rate": 0.00019990012802646333, + "loss": 5.3289, + "step": 9250 + }, + { + "epoch": 0.08006198347107438, + "grad_norm": 0.8828125, + "learning_rate": 0.00019989891384568917, + "loss": 5.1188, + "step": 9300 + }, + { + "epoch": 0.08049242424242424, + "grad_norm": 1.984375, + "learning_rate": 0.0001998976923325497, + "loss": 5.2473, + "step": 9350 + }, + { + "epoch": 0.0809228650137741, + "grad_norm": 1.9921875, + "learning_rate": 0.00019989646348713452, + "loss": 5.2454, + "step": 9400 + }, + { + "epoch": 0.08135330578512397, + "grad_norm": 1.9296875, + "learning_rate": 0.00019989522730953386, + "loss": 5.3245, + "step": 9450 + }, + { + "epoch": 0.08178374655647383, + "grad_norm": 2.5625, + "learning_rate": 0.00019989398379983845, + "loss": 5.3451, + "step": 9500 + }, + { + "epoch": 0.08221418732782369, + "grad_norm": 1.34375, + "learning_rate": 0.00019989273295813957, + "loss": 5.5107, + "step": 9550 + }, + { + "epoch": 0.08264462809917356, + "grad_norm": 2.34375, + "learning_rate": 0.000199891474784529, + "loss": 5.5401, + "step": 9600 + }, + { + "epoch": 0.08307506887052342, + "grad_norm": 2.375, + "learning_rate": 0.0001998902092790991, + "loss": 5.4677, + "step": 9650 + }, + { + "epoch": 0.08350550964187328, + "grad_norm": 3.140625, + "learning_rate": 0.00019988893644194277, + "loss": 5.4752, + "step": 9700 + }, + { + "epoch": 0.08393595041322315, + "grad_norm": 1.96875, + "learning_rate": 0.00019988765627315343, + "loss": 5.2186, + "step": 9750 + }, + { + "epoch": 0.08436639118457301, + "grad_norm": 2.8125, + "learning_rate": 0.000199886368772825, + "loss": 5.6908, + "step": 9800 + }, + { + "epoch": 0.08479683195592287, + "grad_norm": 2.078125, + "learning_rate": 0.00019988507394105205, + "loss": 4.9511, + "step": 9850 + }, + { + "epoch": 0.08522727272727272, + "grad_norm": 1.3359375, + "learning_rate": 0.00019988377177792955, + "loss": 5.3783, + "step": 9900 + }, + { + "epoch": 0.08565771349862258, + "grad_norm": 1.90625, + "learning_rate": 0.00019988246228355315, + "loss": 5.4935, + "step": 9950 + }, + { + "epoch": 0.08608815426997245, + "grad_norm": 2.328125, + "learning_rate": 0.00019988114545801887, + "loss": 5.4467, + "step": 10000 + }, + { + "epoch": 0.08651859504132231, + "grad_norm": 1.984375, + "learning_rate": 0.00019987982130142347, + "loss": 5.1502, + "step": 10050 + }, + { + "epoch": 0.08694903581267217, + "grad_norm": 2.375, + "learning_rate": 0.00019987848981386404, + "loss": 5.5199, + "step": 10100 + }, + { + "epoch": 0.08737947658402204, + "grad_norm": 1.8203125, + "learning_rate": 0.00019987715099543835, + "loss": 5.4569, + "step": 10150 + }, + { + "epoch": 0.0878099173553719, + "grad_norm": 1.609375, + "learning_rate": 0.00019987580484624468, + "loss": 5.3158, + "step": 10200 + }, + { + "epoch": 0.08824035812672176, + "grad_norm": 2.265625, + "learning_rate": 0.00019987445136638183, + "loss": 5.4302, + "step": 10250 + }, + { + "epoch": 0.08867079889807163, + "grad_norm": 2.1875, + "learning_rate": 0.00019987309055594912, + "loss": 5.4416, + "step": 10300 + }, + { + "epoch": 0.08910123966942149, + "grad_norm": 0.46875, + "learning_rate": 0.00019987172241504647, + "loss": 5.0894, + "step": 10350 + }, + { + "epoch": 0.08953168044077135, + "grad_norm": 1.9609375, + "learning_rate": 0.00019987034694377424, + "loss": 5.3113, + "step": 10400 + }, + { + "epoch": 0.08996212121212122, + "grad_norm": 1.796875, + "learning_rate": 0.00019986896414223346, + "loss": 5.2562, + "step": 10450 + }, + { + "epoch": 0.09039256198347108, + "grad_norm": 1.7734375, + "learning_rate": 0.00019986757401052553, + "loss": 5.2811, + "step": 10500 + }, + { + "epoch": 0.09082300275482094, + "grad_norm": 1.9140625, + "learning_rate": 0.0001998661765487526, + "loss": 5.3296, + "step": 10550 + }, + { + "epoch": 0.0912534435261708, + "grad_norm": 0.8828125, + "learning_rate": 0.00019986477175701713, + "loss": 5.5743, + "step": 10600 + }, + { + "epoch": 0.09168388429752067, + "grad_norm": 2.28125, + "learning_rate": 0.00019986335963542232, + "loss": 5.4655, + "step": 10650 + }, + { + "epoch": 0.09211432506887052, + "grad_norm": 1.859375, + "learning_rate": 0.00019986194018407173, + "loss": 5.0217, + "step": 10700 + }, + { + "epoch": 0.09254476584022038, + "grad_norm": 1.640625, + "learning_rate": 0.00019986051340306965, + "loss": 4.9831, + "step": 10750 + }, + { + "epoch": 0.09297520661157024, + "grad_norm": 1.328125, + "learning_rate": 0.0001998590792925207, + "loss": 5.1353, + "step": 10800 + }, + { + "epoch": 0.0934056473829201, + "grad_norm": 0.90234375, + "learning_rate": 0.0001998576378525302, + "loss": 5.5117, + "step": 10850 + }, + { + "epoch": 0.09383608815426997, + "grad_norm": 1.8984375, + "learning_rate": 0.0001998561890832039, + "loss": 5.5137, + "step": 10900 + }, + { + "epoch": 0.09426652892561983, + "grad_norm": 2.4375, + "learning_rate": 0.00019985473298464817, + "loss": 5.1704, + "step": 10950 + }, + { + "epoch": 0.0946969696969697, + "grad_norm": 2.09375, + "learning_rate": 0.0001998532695569699, + "loss": 5.1635, + "step": 11000 + }, + { + "epoch": 0.09512741046831956, + "grad_norm": 2.109375, + "learning_rate": 0.00019985179880027646, + "loss": 5.5797, + "step": 11050 + }, + { + "epoch": 0.09555785123966942, + "grad_norm": 2.3125, + "learning_rate": 0.00019985032071467588, + "loss": 5.4581, + "step": 11100 + }, + { + "epoch": 0.09598829201101929, + "grad_norm": 3.015625, + "learning_rate": 0.00019984883530027654, + "loss": 5.4119, + "step": 11150 + }, + { + "epoch": 0.09641873278236915, + "grad_norm": 2.03125, + "learning_rate": 0.00019984734255718754, + "loss": 5.5263, + "step": 11200 + }, + { + "epoch": 0.09684917355371901, + "grad_norm": 1.78125, + "learning_rate": 0.00019984584248551842, + "loss": 5.3163, + "step": 11250 + }, + { + "epoch": 0.09727961432506887, + "grad_norm": 1.9453125, + "learning_rate": 0.0001998443350853793, + "loss": 5.3459, + "step": 11300 + }, + { + "epoch": 0.09771005509641874, + "grad_norm": 1.7109375, + "learning_rate": 0.00019984282035688078, + "loss": 5.01, + "step": 11350 + }, + { + "epoch": 0.0981404958677686, + "grad_norm": 1.84375, + "learning_rate": 0.0001998412983001341, + "loss": 5.1599, + "step": 11400 + }, + { + "epoch": 0.09857093663911845, + "grad_norm": 3.125, + "learning_rate": 0.0001998397689152509, + "loss": 5.3925, + "step": 11450 + }, + { + "epoch": 0.09900137741046831, + "grad_norm": 1.484375, + "learning_rate": 0.0001998382322023435, + "loss": 5.4049, + "step": 11500 + }, + { + "epoch": 0.09943181818181818, + "grad_norm": 2.578125, + "learning_rate": 0.00019983668816152462, + "loss": 5.404, + "step": 11550 + }, + { + "epoch": 0.09986225895316804, + "grad_norm": 1.8984375, + "learning_rate": 0.0001998351367929077, + "loss": 5.8699, + "step": 11600 + }, + { + "epoch": 0.1002926997245179, + "grad_norm": 1.921875, + "learning_rate": 0.00019983357809660648, + "loss": 5.0001, + "step": 11650 + }, + { + "epoch": 0.10072314049586777, + "grad_norm": 2.828125, + "learning_rate": 0.00019983201207273546, + "loss": 5.2185, + "step": 11700 + }, + { + "epoch": 0.10115358126721763, + "grad_norm": 2.859375, + "learning_rate": 0.0001998304387214095, + "loss": 5.5458, + "step": 11750 + }, + { + "epoch": 0.10158402203856749, + "grad_norm": 2.5625, + "learning_rate": 0.00019982885804274418, + "loss": 5.4759, + "step": 11800 + }, + { + "epoch": 0.10201446280991736, + "grad_norm": 2.09375, + "learning_rate": 0.00019982727003685548, + "loss": 4.8674, + "step": 11850 + }, + { + "epoch": 0.10244490358126722, + "grad_norm": 3.34375, + "learning_rate": 0.0001998256747038599, + "loss": 5.4233, + "step": 11900 + }, + { + "epoch": 0.10287534435261708, + "grad_norm": 1.7890625, + "learning_rate": 0.0001998240720438746, + "loss": 5.2329, + "step": 11950 + }, + { + "epoch": 0.10330578512396695, + "grad_norm": 1.953125, + "learning_rate": 0.00019982246205701719, + "loss": 5.3219, + "step": 12000 + }, + { + "epoch": 0.10330578512396695, + "eval_loss": 5.914431095123291, + "eval_runtime": 21.8067, + "eval_samples_per_second": 29.349, + "eval_steps_per_second": 14.674, + "eval_tts_loss": 6.132826582490758, + "step": 12000 + }, + { + "epoch": 0.10373622589531681, + "grad_norm": 1.8125, + "learning_rate": 0.00019982084474340584, + "loss": 5.1588, + "step": 12050 + }, + { + "epoch": 0.10416666666666667, + "grad_norm": 2.859375, + "learning_rate": 0.00019981922010315923, + "loss": 5.4076, + "step": 12100 + }, + { + "epoch": 0.10459710743801653, + "grad_norm": 2.125, + "learning_rate": 0.00019981758813639665, + "loss": 5.226, + "step": 12150 + }, + { + "epoch": 0.1050275482093664, + "grad_norm": 1.203125, + "learning_rate": 0.00019981594884323786, + "loss": 5.5601, + "step": 12200 + }, + { + "epoch": 0.10545798898071625, + "grad_norm": 2.015625, + "learning_rate": 0.00019981430222380318, + "loss": 5.1232, + "step": 12250 + }, + { + "epoch": 0.10588842975206611, + "grad_norm": 1.140625, + "learning_rate": 0.00019981264827821348, + "loss": 5.0244, + "step": 12300 + }, + { + "epoch": 0.10631887052341597, + "grad_norm": 2.703125, + "learning_rate": 0.00019981098700659015, + "loss": 5.4677, + "step": 12350 + }, + { + "epoch": 0.10674931129476584, + "grad_norm": 1.9375, + "learning_rate": 0.00019980931840905513, + "loss": 4.7182, + "step": 12400 + }, + { + "epoch": 0.1071797520661157, + "grad_norm": 3.234375, + "learning_rate": 0.00019980764248573088, + "loss": 5.2571, + "step": 12450 + }, + { + "epoch": 0.10761019283746556, + "grad_norm": 2.4375, + "learning_rate": 0.00019980595923674042, + "loss": 5.2264, + "step": 12500 + }, + { + "epoch": 0.10804063360881543, + "grad_norm": 1.3984375, + "learning_rate": 0.00019980426866220724, + "loss": 5.1868, + "step": 12550 + }, + { + "epoch": 0.10847107438016529, + "grad_norm": 2.390625, + "learning_rate": 0.00019980257076225553, + "loss": 5.0848, + "step": 12600 + }, + { + "epoch": 0.10890151515151515, + "grad_norm": 2.4375, + "learning_rate": 0.00019980086553700983, + "loss": 5.2408, + "step": 12650 + }, + { + "epoch": 0.10933195592286502, + "grad_norm": 3.46875, + "learning_rate": 0.00019979915298659533, + "loss": 5.0821, + "step": 12700 + }, + { + "epoch": 0.10976239669421488, + "grad_norm": 3.328125, + "learning_rate": 0.00019979743311113773, + "loss": 5.2788, + "step": 12750 + }, + { + "epoch": 0.11019283746556474, + "grad_norm": 1.4765625, + "learning_rate": 0.00019979570591076325, + "loss": 4.6787, + "step": 12800 + }, + { + "epoch": 0.1106232782369146, + "grad_norm": 1.9453125, + "learning_rate": 0.0001997939713855987, + "loss": 5.2985, + "step": 12850 + }, + { + "epoch": 0.11105371900826447, + "grad_norm": 1.6796875, + "learning_rate": 0.00019979222953577135, + "loss": 5.2763, + "step": 12900 + }, + { + "epoch": 0.11148415977961433, + "grad_norm": 2.0625, + "learning_rate": 0.00019979048036140905, + "loss": 4.944, + "step": 12950 + }, + { + "epoch": 0.1119146005509642, + "grad_norm": 2.796875, + "learning_rate": 0.00019978872386264022, + "loss": 5.0702, + "step": 13000 + }, + { + "epoch": 0.11234504132231404, + "grad_norm": 1.7734375, + "learning_rate": 0.00019978696003959372, + "loss": 4.8859, + "step": 13050 + }, + { + "epoch": 0.1127754820936639, + "grad_norm": 3.015625, + "learning_rate": 0.00019978518889239908, + "loss": 5.0737, + "step": 13100 + }, + { + "epoch": 0.11320592286501377, + "grad_norm": 1.9765625, + "learning_rate": 0.00019978341042118625, + "loss": 4.9093, + "step": 13150 + }, + { + "epoch": 0.11363636363636363, + "grad_norm": 1.921875, + "learning_rate": 0.00019978162462608578, + "loss": 5.3666, + "step": 13200 + }, + { + "epoch": 0.1140668044077135, + "grad_norm": 1.7890625, + "learning_rate": 0.0001997798315072288, + "loss": 5.7781, + "step": 13250 + }, + { + "epoch": 0.11449724517906336, + "grad_norm": 3.078125, + "learning_rate": 0.00019977803106474681, + "loss": 5.461, + "step": 13300 + }, + { + "epoch": 0.11492768595041322, + "grad_norm": 2.296875, + "learning_rate": 0.00019977622329877206, + "loss": 5.0734, + "step": 13350 + }, + { + "epoch": 0.11535812672176309, + "grad_norm": 1.96875, + "learning_rate": 0.00019977440820943715, + "loss": 5.5124, + "step": 13400 + }, + { + "epoch": 0.11578856749311295, + "grad_norm": 2.625, + "learning_rate": 0.0001997725857968754, + "loss": 5.1575, + "step": 13450 + }, + { + "epoch": 0.11621900826446281, + "grad_norm": 1.2734375, + "learning_rate": 0.00019977075606122046, + "loss": 4.9595, + "step": 13500 + }, + { + "epoch": 0.11664944903581267, + "grad_norm": 2.578125, + "learning_rate": 0.00019976891900260672, + "loss": 5.2753, + "step": 13550 + }, + { + "epoch": 0.11707988980716254, + "grad_norm": 2.234375, + "learning_rate": 0.00019976707462116897, + "loss": 5.3432, + "step": 13600 + }, + { + "epoch": 0.1175103305785124, + "grad_norm": 2.921875, + "learning_rate": 0.00019976522291704262, + "loss": 5.3526, + "step": 13650 + }, + { + "epoch": 0.11794077134986226, + "grad_norm": 2.03125, + "learning_rate": 0.00019976336389036355, + "loss": 5.0664, + "step": 13700 + }, + { + "epoch": 0.11837121212121213, + "grad_norm": 2.703125, + "learning_rate": 0.00019976149754126824, + "loss": 5.2789, + "step": 13750 + }, + { + "epoch": 0.11880165289256199, + "grad_norm": 1.671875, + "learning_rate": 0.0001997596238698936, + "loss": 4.8826, + "step": 13800 + }, + { + "epoch": 0.11923209366391184, + "grad_norm": 1.9375, + "learning_rate": 0.00019975774287637723, + "loss": 5.2363, + "step": 13850 + }, + { + "epoch": 0.1196625344352617, + "grad_norm": 1.7734375, + "learning_rate": 0.00019975585456085716, + "loss": 5.1902, + "step": 13900 + }, + { + "epoch": 0.12009297520661157, + "grad_norm": 1.921875, + "learning_rate": 0.000199753958923472, + "loss": 5.099, + "step": 13950 + }, + { + "epoch": 0.12052341597796143, + "grad_norm": 1.375, + "learning_rate": 0.00019975205596436092, + "loss": 5.3852, + "step": 14000 + }, + { + "epoch": 0.12095385674931129, + "grad_norm": 3.0625, + "learning_rate": 0.00019975014568366351, + "loss": 4.9896, + "step": 14050 + }, + { + "epoch": 0.12138429752066116, + "grad_norm": 2.625, + "learning_rate": 0.00019974822808152003, + "loss": 5.0135, + "step": 14100 + }, + { + "epoch": 0.12181473829201102, + "grad_norm": 2.484375, + "learning_rate": 0.0001997463031580712, + "loss": 5.3912, + "step": 14150 + }, + { + "epoch": 0.12224517906336088, + "grad_norm": 0.609375, + "learning_rate": 0.00019974437091345836, + "loss": 5.245, + "step": 14200 + }, + { + "epoch": 0.12267561983471074, + "grad_norm": 1.4140625, + "learning_rate": 0.0001997424313478233, + "loss": 4.9216, + "step": 14250 + }, + { + "epoch": 0.12310606060606061, + "grad_norm": 1.3671875, + "learning_rate": 0.0001997404844613084, + "loss": 4.7488, + "step": 14300 + }, + { + "epoch": 0.12353650137741047, + "grad_norm": 2.171875, + "learning_rate": 0.00019973853025405652, + "loss": 4.8481, + "step": 14350 + }, + { + "epoch": 0.12396694214876033, + "grad_norm": 2.578125, + "learning_rate": 0.0001997365687262111, + "loss": 4.7889, + "step": 14400 + }, + { + "epoch": 0.1243973829201102, + "grad_norm": 0.51953125, + "learning_rate": 0.00019973459987791614, + "loss": 5.1318, + "step": 14450 + }, + { + "epoch": 0.12482782369146006, + "grad_norm": 1.7734375, + "learning_rate": 0.00019973262370931612, + "loss": 5.2832, + "step": 14500 + }, + { + "epoch": 0.1252582644628099, + "grad_norm": 1.9921875, + "learning_rate": 0.0001997306402205561, + "loss": 5.3014, + "step": 14550 + }, + { + "epoch": 0.1256887052341598, + "grad_norm": 1.9765625, + "learning_rate": 0.00019972864941178166, + "loss": 5.1253, + "step": 14600 + }, + { + "epoch": 0.12611914600550964, + "grad_norm": 1.90625, + "learning_rate": 0.00019972665128313892, + "loss": 5.3287, + "step": 14650 + }, + { + "epoch": 0.1265495867768595, + "grad_norm": 1.578125, + "learning_rate": 0.00019972464583477458, + "loss": 5.1724, + "step": 14700 + }, + { + "epoch": 0.12698002754820936, + "grad_norm": 2.59375, + "learning_rate": 0.00019972263306683577, + "loss": 5.1616, + "step": 14750 + }, + { + "epoch": 0.12741046831955924, + "grad_norm": 2.375, + "learning_rate": 0.00019972061297947023, + "loss": 5.0238, + "step": 14800 + }, + { + "epoch": 0.1278409090909091, + "grad_norm": 1.1015625, + "learning_rate": 0.0001997185855728263, + "loss": 5.0968, + "step": 14850 + }, + { + "epoch": 0.12827134986225897, + "grad_norm": 1.4765625, + "learning_rate": 0.00019971655084705271, + "loss": 5.3728, + "step": 14900 + }, + { + "epoch": 0.12870179063360881, + "grad_norm": 2.53125, + "learning_rate": 0.00019971450880229886, + "loss": 4.845, + "step": 14950 + }, + { + "epoch": 0.12913223140495866, + "grad_norm": 1.8984375, + "learning_rate": 0.00019971245943871456, + "loss": 4.943, + "step": 15000 + }, + { + "epoch": 0.12913223140495866, + "eval_loss": 5.828057765960693, + "eval_runtime": 22.6847, + "eval_samples_per_second": 28.213, + "eval_steps_per_second": 14.106, + "eval_tts_loss": 6.258981663526243, + "step": 15000 + }, + { + "epoch": 0.12956267217630854, + "grad_norm": 0.8359375, + "learning_rate": 0.0001997104027564503, + "loss": 4.6706, + "step": 15050 + }, + { + "epoch": 0.1299931129476584, + "grad_norm": 1.984375, + "learning_rate": 0.00019970833875565704, + "loss": 5.4813, + "step": 15100 + }, + { + "epoch": 0.13042355371900827, + "grad_norm": 1.59375, + "learning_rate": 0.0001997062674364862, + "loss": 4.9743, + "step": 15150 + }, + { + "epoch": 0.13085399449035812, + "grad_norm": 3.53125, + "learning_rate": 0.00019970418879908987, + "loss": 5.1961, + "step": 15200 + }, + { + "epoch": 0.131284435261708, + "grad_norm": 1.5390625, + "learning_rate": 0.00019970210284362062, + "loss": 4.988, + "step": 15250 + }, + { + "epoch": 0.13171487603305784, + "grad_norm": 1.1875, + "learning_rate": 0.0001997000095702315, + "loss": 5.1799, + "step": 15300 + }, + { + "epoch": 0.13214531680440772, + "grad_norm": 1.5390625, + "learning_rate": 0.0001996979089790762, + "loss": 5.1497, + "step": 15350 + }, + { + "epoch": 0.13257575757575757, + "grad_norm": 1.2734375, + "learning_rate": 0.0001996958010703089, + "loss": 5.6535, + "step": 15400 + }, + { + "epoch": 0.13300619834710745, + "grad_norm": 1.5546875, + "learning_rate": 0.00019969368584408427, + "loss": 5.2685, + "step": 15450 + }, + { + "epoch": 0.1334366391184573, + "grad_norm": 1.7734375, + "learning_rate": 0.00019969156330055762, + "loss": 5.1259, + "step": 15500 + }, + { + "epoch": 0.13386707988980717, + "grad_norm": 2.03125, + "learning_rate": 0.0001996894334398847, + "loss": 5.5331, + "step": 15550 + }, + { + "epoch": 0.13429752066115702, + "grad_norm": 1.03125, + "learning_rate": 0.00019968729626222185, + "loss": 5.2926, + "step": 15600 + }, + { + "epoch": 0.1347279614325069, + "grad_norm": 2.125, + "learning_rate": 0.00019968515176772597, + "loss": 5.5779, + "step": 15650 + }, + { + "epoch": 0.13515840220385675, + "grad_norm": 1.1640625, + "learning_rate": 0.00019968299995655443, + "loss": 4.4885, + "step": 15700 + }, + { + "epoch": 0.13558884297520662, + "grad_norm": 3.109375, + "learning_rate": 0.00019968084082886515, + "loss": 5.2712, + "step": 15750 + }, + { + "epoch": 0.13601928374655647, + "grad_norm": 2.875, + "learning_rate": 0.0001996786743848166, + "loss": 5.0487, + "step": 15800 + }, + { + "epoch": 0.13644972451790632, + "grad_norm": 1.984375, + "learning_rate": 0.00019967650062456787, + "loss": 5.0838, + "step": 15850 + }, + { + "epoch": 0.1368801652892562, + "grad_norm": 2.171875, + "learning_rate": 0.0001996743195482784, + "loss": 5.327, + "step": 15900 + }, + { + "epoch": 0.13731060606060605, + "grad_norm": 2.765625, + "learning_rate": 0.00019967213115610835, + "loss": 4.9537, + "step": 15950 + }, + { + "epoch": 0.13774104683195593, + "grad_norm": 2.109375, + "learning_rate": 0.00019966993544821832, + "loss": 5.4254, + "step": 16000 + }, + { + "epoch": 0.13817148760330578, + "grad_norm": 1.984375, + "learning_rate": 0.00019966773242476946, + "loss": 5.1223, + "step": 16050 + }, + { + "epoch": 0.13860192837465565, + "grad_norm": 1.6328125, + "learning_rate": 0.0001996655220859235, + "loss": 5.2873, + "step": 16100 + }, + { + "epoch": 0.1390323691460055, + "grad_norm": 2.609375, + "learning_rate": 0.00019966330443184265, + "loss": 5.3693, + "step": 16150 + }, + { + "epoch": 0.13946280991735538, + "grad_norm": 1.8828125, + "learning_rate": 0.0001996610794626897, + "loss": 5.4051, + "step": 16200 + }, + { + "epoch": 0.13989325068870523, + "grad_norm": 3.390625, + "learning_rate": 0.00019965884717862794, + "loss": 5.1706, + "step": 16250 + }, + { + "epoch": 0.1403236914600551, + "grad_norm": 3.8125, + "learning_rate": 0.00019965660757982119, + "loss": 4.7145, + "step": 16300 + }, + { + "epoch": 0.14075413223140495, + "grad_norm": 2.25, + "learning_rate": 0.00019965436066643388, + "loss": 5.057, + "step": 16350 + }, + { + "epoch": 0.14118457300275483, + "grad_norm": 2.625, + "learning_rate": 0.0001996521064386309, + "loss": 5.2056, + "step": 16400 + }, + { + "epoch": 0.14161501377410468, + "grad_norm": 1.9375, + "learning_rate": 0.00019964984489657773, + "loss": 5.1436, + "step": 16450 + }, + { + "epoch": 0.14204545454545456, + "grad_norm": 1.46875, + "learning_rate": 0.00019964757604044034, + "loss": 5.2476, + "step": 16500 + }, + { + "epoch": 0.1424758953168044, + "grad_norm": 2.40625, + "learning_rate": 0.00019964529987038528, + "loss": 5.2183, + "step": 16550 + }, + { + "epoch": 0.14290633608815426, + "grad_norm": 2.125, + "learning_rate": 0.0001996430163865796, + "loss": 5.3169, + "step": 16600 + }, + { + "epoch": 0.14333677685950413, + "grad_norm": 2.8125, + "learning_rate": 0.00019964072558919087, + "loss": 5.2249, + "step": 16650 + }, + { + "epoch": 0.14376721763085398, + "grad_norm": 2.40625, + "learning_rate": 0.00019963842747838727, + "loss": 5.4004, + "step": 16700 + }, + { + "epoch": 0.14419765840220386, + "grad_norm": 1.2578125, + "learning_rate": 0.0001996361220543375, + "loss": 5.2227, + "step": 16750 + }, + { + "epoch": 0.1446280991735537, + "grad_norm": 1.9453125, + "learning_rate": 0.00019963380931721072, + "loss": 5.0179, + "step": 16800 + }, + { + "epoch": 0.1450585399449036, + "grad_norm": 2.0625, + "learning_rate": 0.00019963148926717671, + "loss": 4.8251, + "step": 16850 + }, + { + "epoch": 0.14548898071625344, + "grad_norm": 3.546875, + "learning_rate": 0.00019962916190440576, + "loss": 5.2592, + "step": 16900 + }, + { + "epoch": 0.1459194214876033, + "grad_norm": 2.296875, + "learning_rate": 0.00019962682722906867, + "loss": 4.9115, + "step": 16950 + }, + { + "epoch": 0.14634986225895316, + "grad_norm": 2.15625, + "learning_rate": 0.00019962448524133683, + "loss": 5.3176, + "step": 17000 + }, + { + "epoch": 0.14678030303030304, + "grad_norm": 2.0625, + "learning_rate": 0.00019962213594138211, + "loss": 5.3436, + "step": 17050 + }, + { + "epoch": 0.1472107438016529, + "grad_norm": 1.1640625, + "learning_rate": 0.00019961977932937697, + "loss": 4.6579, + "step": 17100 + }, + { + "epoch": 0.14764118457300276, + "grad_norm": 2.109375, + "learning_rate": 0.00019961741540549434, + "loss": 4.5179, + "step": 17150 + }, + { + "epoch": 0.14807162534435261, + "grad_norm": 1.8125, + "learning_rate": 0.00019961504416990777, + "loss": 5.3218, + "step": 17200 + }, + { + "epoch": 0.1485020661157025, + "grad_norm": 2.5625, + "learning_rate": 0.00019961266562279129, + "loss": 5.0783, + "step": 17250 + }, + { + "epoch": 0.14893250688705234, + "grad_norm": 2.15625, + "learning_rate": 0.00019961027976431945, + "loss": 5.2808, + "step": 17300 + }, + { + "epoch": 0.14936294765840222, + "grad_norm": 2.09375, + "learning_rate": 0.00019960788659466742, + "loss": 4.6248, + "step": 17350 + }, + { + "epoch": 0.14979338842975207, + "grad_norm": 1.71875, + "learning_rate": 0.00019960548611401083, + "loss": 5.1576, + "step": 17400 + }, + { + "epoch": 0.15022382920110192, + "grad_norm": 1.53125, + "learning_rate": 0.00019960307832252584, + "loss": 5.4072, + "step": 17450 + }, + { + "epoch": 0.1506542699724518, + "grad_norm": 2.65625, + "learning_rate": 0.0001996006632203892, + "loss": 5.401, + "step": 17500 + }, + { + "epoch": 0.15108471074380164, + "grad_norm": 0.7890625, + "learning_rate": 0.00019959824080777817, + "loss": 5.081, + "step": 17550 + }, + { + "epoch": 0.15151515151515152, + "grad_norm": 1.890625, + "learning_rate": 0.00019959581108487057, + "loss": 5.4486, + "step": 17600 + }, + { + "epoch": 0.15194559228650137, + "grad_norm": 1.96875, + "learning_rate": 0.00019959337405184475, + "loss": 5.3831, + "step": 17650 + }, + { + "epoch": 0.15237603305785125, + "grad_norm": 1.71875, + "learning_rate": 0.00019959092970887952, + "loss": 5.0445, + "step": 17700 + }, + { + "epoch": 0.1528064738292011, + "grad_norm": 1.578125, + "learning_rate": 0.00019958847805615432, + "loss": 5.1443, + "step": 17750 + }, + { + "epoch": 0.15323691460055097, + "grad_norm": 1.453125, + "learning_rate": 0.00019958601909384914, + "loss": 5.1912, + "step": 17800 + }, + { + "epoch": 0.15366735537190082, + "grad_norm": 2.203125, + "learning_rate": 0.0001995835528221444, + "loss": 5.4637, + "step": 17850 + }, + { + "epoch": 0.1540977961432507, + "grad_norm": 2.21875, + "learning_rate": 0.00019958107924122114, + "loss": 5.22, + "step": 17900 + }, + { + "epoch": 0.15452823691460055, + "grad_norm": 1.2109375, + "learning_rate": 0.0001995785983512609, + "loss": 4.6821, + "step": 17950 + }, + { + "epoch": 0.15495867768595042, + "grad_norm": 2.40625, + "learning_rate": 0.00019957611015244581, + "loss": 5.3205, + "step": 18000 + }, + { + "epoch": 0.15495867768595042, + "eval_loss": 5.764292240142822, + "eval_runtime": 51.9481, + "eval_samples_per_second": 12.32, + "eval_steps_per_second": 6.16, + "eval_tts_loss": 6.31840074806535, + "step": 18000 + }, + { + "epoch": 0.15538911845730027, + "grad_norm": 2.0625, + "learning_rate": 0.0001995736146449585, + "loss": 5.047, + "step": 18050 + }, + { + "epoch": 0.15581955922865015, + "grad_norm": 3.890625, + "learning_rate": 0.0001995711118289821, + "loss": 5.2397, + "step": 18100 + }, + { + "epoch": 0.15625, + "grad_norm": 1.9296875, + "learning_rate": 0.00019956860170470032, + "loss": 5.2703, + "step": 18150 + }, + { + "epoch": 0.15668044077134985, + "grad_norm": 1.78125, + "learning_rate": 0.00019956608427229737, + "loss": 5.3346, + "step": 18200 + }, + { + "epoch": 0.15711088154269973, + "grad_norm": 2.734375, + "learning_rate": 0.00019956355953195808, + "loss": 5.038, + "step": 18250 + }, + { + "epoch": 0.15754132231404958, + "grad_norm": 3.453125, + "learning_rate": 0.00019956102748386776, + "loss": 4.848, + "step": 18300 + }, + { + "epoch": 0.15797176308539945, + "grad_norm": 2.109375, + "learning_rate": 0.0001995584881282122, + "loss": 5.0021, + "step": 18350 + }, + { + "epoch": 0.1584022038567493, + "grad_norm": 1.4609375, + "learning_rate": 0.00019955594146517784, + "loss": 5.301, + "step": 18400 + }, + { + "epoch": 0.15883264462809918, + "grad_norm": 1.8515625, + "learning_rate": 0.0001995533874949516, + "loss": 5.2125, + "step": 18450 + }, + { + "epoch": 0.15926308539944903, + "grad_norm": 2.65625, + "learning_rate": 0.00019955082621772087, + "loss": 4.8894, + "step": 18500 + }, + { + "epoch": 0.1596935261707989, + "grad_norm": 1.0625, + "learning_rate": 0.0001995482576336737, + "loss": 4.9082, + "step": 18550 + }, + { + "epoch": 0.16012396694214875, + "grad_norm": 1.875, + "learning_rate": 0.0001995456817429986, + "loss": 5.1639, + "step": 18600 + }, + { + "epoch": 0.16055440771349863, + "grad_norm": 3.125, + "learning_rate": 0.0001995430985458846, + "loss": 4.9199, + "step": 18650 + }, + { + "epoch": 0.16098484848484848, + "grad_norm": 1.34375, + "learning_rate": 0.0001995405080425214, + "loss": 5.023, + "step": 18700 + }, + { + "epoch": 0.16141528925619836, + "grad_norm": 1.2734375, + "learning_rate": 0.00019953791023309906, + "loss": 5.451, + "step": 18750 + }, + { + "epoch": 0.1618457300275482, + "grad_norm": 1.65625, + "learning_rate": 0.00019953530511780825, + "loss": 5.2853, + "step": 18800 + }, + { + "epoch": 0.16227617079889808, + "grad_norm": 1.9140625, + "learning_rate": 0.00019953269269684022, + "loss": 5.2157, + "step": 18850 + }, + { + "epoch": 0.16270661157024793, + "grad_norm": 2.28125, + "learning_rate": 0.00019953007297038671, + "loss": 5.4332, + "step": 18900 + }, + { + "epoch": 0.16313705234159778, + "grad_norm": 1.7578125, + "learning_rate": 0.00019952744593863995, + "loss": 5.0494, + "step": 18950 + }, + { + "epoch": 0.16356749311294766, + "grad_norm": 1.3125, + "learning_rate": 0.00019952481160179282, + "loss": 5.0298, + "step": 19000 + }, + { + "epoch": 0.1639979338842975, + "grad_norm": 2.34375, + "learning_rate": 0.00019952216996003866, + "loss": 4.9961, + "step": 19050 + }, + { + "epoch": 0.16442837465564739, + "grad_norm": 1.5859375, + "learning_rate": 0.00019951952101357131, + "loss": 5.2907, + "step": 19100 + }, + { + "epoch": 0.16485881542699724, + "grad_norm": 1.0234375, + "learning_rate": 0.00019951686476258528, + "loss": 5.0851, + "step": 19150 + }, + { + "epoch": 0.1652892561983471, + "grad_norm": 1.65625, + "learning_rate": 0.00019951420120727548, + "loss": 4.832, + "step": 19200 + }, + { + "epoch": 0.16571969696969696, + "grad_norm": 1.625, + "learning_rate": 0.00019951153034783743, + "loss": 5.2386, + "step": 19250 + }, + { + "epoch": 0.16615013774104684, + "grad_norm": 1.34375, + "learning_rate": 0.00019950885218446716, + "loss": 4.9833, + "step": 19300 + }, + { + "epoch": 0.1665805785123967, + "grad_norm": 2.375, + "learning_rate": 0.00019950616671736123, + "loss": 5.1157, + "step": 19350 + }, + { + "epoch": 0.16701101928374656, + "grad_norm": 2.0625, + "learning_rate": 0.00019950347394671677, + "loss": 5.1355, + "step": 19400 + }, + { + "epoch": 0.1674414600550964, + "grad_norm": 0.8203125, + "learning_rate": 0.00019950077387273136, + "loss": 4.8928, + "step": 19450 + }, + { + "epoch": 0.1678719008264463, + "grad_norm": 2.421875, + "learning_rate": 0.00019949806649560326, + "loss": 4.9164, + "step": 19500 + }, + { + "epoch": 0.16830234159779614, + "grad_norm": 2.90625, + "learning_rate": 0.00019949535181553116, + "loss": 4.7298, + "step": 19550 + }, + { + "epoch": 0.16873278236914602, + "grad_norm": 2.015625, + "learning_rate": 0.0001994926298327143, + "loss": 5.0695, + "step": 19600 + }, + { + "epoch": 0.16916322314049587, + "grad_norm": 3.0625, + "learning_rate": 0.0001994899005473525, + "loss": 4.9886, + "step": 19650 + }, + { + "epoch": 0.16959366391184574, + "grad_norm": 1.8046875, + "learning_rate": 0.00019948716395964603, + "loss": 4.9026, + "step": 19700 + }, + { + "epoch": 0.1700241046831956, + "grad_norm": 2.75, + "learning_rate": 0.0001994844200697958, + "loss": 4.9833, + "step": 19750 + }, + { + "epoch": 0.17045454545454544, + "grad_norm": 2.09375, + "learning_rate": 0.00019948166887800314, + "loss": 4.8935, + "step": 19800 + }, + { + "epoch": 0.17088498622589532, + "grad_norm": 1.765625, + "learning_rate": 0.00019947891038447007, + "loss": 5.2164, + "step": 19850 + }, + { + "epoch": 0.17131542699724517, + "grad_norm": 0.953125, + "learning_rate": 0.000199476144589399, + "loss": 5.0647, + "step": 19900 + }, + { + "epoch": 0.17174586776859505, + "grad_norm": 1.453125, + "learning_rate": 0.00019947337149299292, + "loss": 4.9461, + "step": 19950 + }, + { + "epoch": 0.1721763085399449, + "grad_norm": 1.234375, + "learning_rate": 0.00019947059109545542, + "loss": 5.3985, + "step": 20000 + }, + { + "epoch": 0.17260674931129477, + "grad_norm": 1.9140625, + "learning_rate": 0.00019946780339699056, + "loss": 4.8297, + "step": 20050 + }, + { + "epoch": 0.17303719008264462, + "grad_norm": 1.625, + "learning_rate": 0.00019946500839780292, + "loss": 5.1055, + "step": 20100 + }, + { + "epoch": 0.1734676308539945, + "grad_norm": 0.64453125, + "learning_rate": 0.00019946220609809765, + "loss": 5.3082, + "step": 20150 + }, + { + "epoch": 0.17389807162534435, + "grad_norm": 2.578125, + "learning_rate": 0.00019945939649808046, + "loss": 5.178, + "step": 20200 + }, + { + "epoch": 0.17432851239669422, + "grad_norm": 2.671875, + "learning_rate": 0.0001994565795979576, + "loss": 5.0136, + "step": 20250 + }, + { + "epoch": 0.17475895316804407, + "grad_norm": 1.484375, + "learning_rate": 0.00019945375539793576, + "loss": 5.0176, + "step": 20300 + }, + { + "epoch": 0.17518939393939395, + "grad_norm": 1.046875, + "learning_rate": 0.00019945092389822225, + "loss": 4.9571, + "step": 20350 + }, + { + "epoch": 0.1756198347107438, + "grad_norm": 2.46875, + "learning_rate": 0.0001994480850990249, + "loss": 5.1597, + "step": 20400 + }, + { + "epoch": 0.17605027548209368, + "grad_norm": 1.5703125, + "learning_rate": 0.00019944523900055207, + "loss": 5.1464, + "step": 20450 + }, + { + "epoch": 0.17648071625344353, + "grad_norm": 3.125, + "learning_rate": 0.00019944238560301266, + "loss": 4.6427, + "step": 20500 + }, + { + "epoch": 0.17691115702479338, + "grad_norm": 3.46875, + "learning_rate": 0.0001994395249066161, + "loss": 5.2441, + "step": 20550 + }, + { + "epoch": 0.17734159779614325, + "grad_norm": 2.59375, + "learning_rate": 0.00019943665691157239, + "loss": 4.9263, + "step": 20600 + }, + { + "epoch": 0.1777720385674931, + "grad_norm": 1.8203125, + "learning_rate": 0.000199433781618092, + "loss": 5.0064, + "step": 20650 + }, + { + "epoch": 0.17820247933884298, + "grad_norm": 1.7109375, + "learning_rate": 0.00019943089902638595, + "loss": 5.1541, + "step": 20700 + }, + { + "epoch": 0.17863292011019283, + "grad_norm": 1.828125, + "learning_rate": 0.00019942800913666587, + "loss": 5.2126, + "step": 20750 + }, + { + "epoch": 0.1790633608815427, + "grad_norm": 1.8359375, + "learning_rate": 0.00019942511194914378, + "loss": 5.073, + "step": 20800 + }, + { + "epoch": 0.17949380165289255, + "grad_norm": 2.5625, + "learning_rate": 0.00019942220746403246, + "loss": 5.1939, + "step": 20850 + }, + { + "epoch": 0.17992424242424243, + "grad_norm": 1.7109375, + "learning_rate": 0.000199419295681545, + "loss": 4.937, + "step": 20900 + }, + { + "epoch": 0.18035468319559228, + "grad_norm": 1.09375, + "learning_rate": 0.00019941637660189513, + "loss": 5.2756, + "step": 20950 + }, + { + "epoch": 0.18078512396694216, + "grad_norm": 0.93359375, + "learning_rate": 0.00019941345022529716, + "loss": 4.8813, + "step": 21000 + }, + { + "epoch": 0.18078512396694216, + "eval_loss": 5.706298828125, + "eval_runtime": 21.9517, + "eval_samples_per_second": 29.155, + "eval_steps_per_second": 14.577, + "eval_tts_loss": 6.426198953232276, + "step": 21000 + }, + { + "epoch": 0.181215564738292, + "grad_norm": 1.4375, + "learning_rate": 0.0001994105165519658, + "loss": 4.9124, + "step": 21050 + }, + { + "epoch": 0.18164600550964188, + "grad_norm": 1.7890625, + "learning_rate": 0.00019940757558211642, + "loss": 4.8005, + "step": 21100 + }, + { + "epoch": 0.18207644628099173, + "grad_norm": 2.515625, + "learning_rate": 0.00019940462731596488, + "loss": 5.1707, + "step": 21150 + }, + { + "epoch": 0.1825068870523416, + "grad_norm": 1.3046875, + "learning_rate": 0.00019940167175372758, + "loss": 5.1448, + "step": 21200 + }, + { + "epoch": 0.18293732782369146, + "grad_norm": 3.21875, + "learning_rate": 0.00019939870889562142, + "loss": 4.6402, + "step": 21250 + }, + { + "epoch": 0.18336776859504134, + "grad_norm": 2.5, + "learning_rate": 0.0001993957387418639, + "loss": 4.7229, + "step": 21300 + }, + { + "epoch": 0.18379820936639119, + "grad_norm": 1.90625, + "learning_rate": 0.000199392761292673, + "loss": 5.0102, + "step": 21350 + }, + { + "epoch": 0.18422865013774103, + "grad_norm": 1.0390625, + "learning_rate": 0.0001993897765482673, + "loss": 5.0873, + "step": 21400 + }, + { + "epoch": 0.1846590909090909, + "grad_norm": 1.9609375, + "learning_rate": 0.00019938678450886582, + "loss": 4.853, + "step": 21450 + }, + { + "epoch": 0.18508953168044076, + "grad_norm": 1.3359375, + "learning_rate": 0.00019938378517468818, + "loss": 4.9831, + "step": 21500 + }, + { + "epoch": 0.18551997245179064, + "grad_norm": 2.15625, + "learning_rate": 0.00019938077854595454, + "loss": 4.881, + "step": 21550 + }, + { + "epoch": 0.1859504132231405, + "grad_norm": 2.828125, + "learning_rate": 0.0001993777646228856, + "loss": 5.1419, + "step": 21600 + }, + { + "epoch": 0.18638085399449036, + "grad_norm": 1.5078125, + "learning_rate": 0.00019937474340570255, + "loss": 5.1254, + "step": 21650 + }, + { + "epoch": 0.1868112947658402, + "grad_norm": 1.609375, + "learning_rate": 0.00019937171489462714, + "loss": 4.7089, + "step": 21700 + }, + { + "epoch": 0.1872417355371901, + "grad_norm": 1.0234375, + "learning_rate": 0.00019936867908988166, + "loss": 5.0335, + "step": 21750 + }, + { + "epoch": 0.18767217630853994, + "grad_norm": 2.328125, + "learning_rate": 0.00019936563599168893, + "loss": 4.7765, + "step": 21800 + }, + { + "epoch": 0.18810261707988982, + "grad_norm": 2.59375, + "learning_rate": 0.0001993625856002723, + "loss": 4.9271, + "step": 21850 + }, + { + "epoch": 0.18853305785123967, + "grad_norm": 1.421875, + "learning_rate": 0.00019935952791585567, + "loss": 5.09, + "step": 21900 + }, + { + "epoch": 0.18896349862258954, + "grad_norm": 2.0625, + "learning_rate": 0.0001993564629386635, + "loss": 5.1261, + "step": 21950 + }, + { + "epoch": 0.1893939393939394, + "grad_norm": 1.875, + "learning_rate": 0.0001993533906689207, + "loss": 5.0418, + "step": 22000 + }, + { + "epoch": 0.18982438016528927, + "grad_norm": 2.484375, + "learning_rate": 0.00019935031110685278, + "loss": 5.4802, + "step": 22050 + }, + { + "epoch": 0.19025482093663912, + "grad_norm": 2.109375, + "learning_rate": 0.00019934722425268578, + "loss": 5.5118, + "step": 22100 + }, + { + "epoch": 0.19068526170798897, + "grad_norm": 1.7578125, + "learning_rate": 0.0001993441301066463, + "loss": 5.4153, + "step": 22150 + }, + { + "epoch": 0.19111570247933884, + "grad_norm": 1.8359375, + "learning_rate": 0.0001993410286689614, + "loss": 4.8482, + "step": 22200 + }, + { + "epoch": 0.1915461432506887, + "grad_norm": 1.9296875, + "learning_rate": 0.00019933791993985873, + "loss": 5.3068, + "step": 22250 + }, + { + "epoch": 0.19197658402203857, + "grad_norm": 1.2265625, + "learning_rate": 0.00019933480391956648, + "loss": 4.8431, + "step": 22300 + }, + { + "epoch": 0.19240702479338842, + "grad_norm": 2.046875, + "learning_rate": 0.00019933168060831333, + "loss": 5.0834, + "step": 22350 + }, + { + "epoch": 0.1928374655647383, + "grad_norm": 3.125, + "learning_rate": 0.00019932855000632854, + "loss": 5.0882, + "step": 22400 + }, + { + "epoch": 0.19326790633608815, + "grad_norm": 2.890625, + "learning_rate": 0.0001993254121138419, + "loss": 4.9763, + "step": 22450 + }, + { + "epoch": 0.19369834710743802, + "grad_norm": 2.8125, + "learning_rate": 0.00019932226693108372, + "loss": 5.2509, + "step": 22500 + }, + { + "epoch": 0.19412878787878787, + "grad_norm": 0.87890625, + "learning_rate": 0.00019931911445828483, + "loss": 5.0617, + "step": 22550 + }, + { + "epoch": 0.19455922865013775, + "grad_norm": 1.8984375, + "learning_rate": 0.00019931595469567664, + "loss": 5.1761, + "step": 22600 + }, + { + "epoch": 0.1949896694214876, + "grad_norm": 2.484375, + "learning_rate": 0.00019931278764349103, + "loss": 4.7677, + "step": 22650 + }, + { + "epoch": 0.19542011019283748, + "grad_norm": 2.078125, + "learning_rate": 0.0001993096133019605, + "loss": 4.954, + "step": 22700 + }, + { + "epoch": 0.19585055096418733, + "grad_norm": 1.8203125, + "learning_rate": 0.00019930643167131805, + "loss": 5.566, + "step": 22750 + }, + { + "epoch": 0.1962809917355372, + "grad_norm": 1.9453125, + "learning_rate": 0.00019930324275179716, + "loss": 4.9606, + "step": 22800 + }, + { + "epoch": 0.19671143250688705, + "grad_norm": 1.7109375, + "learning_rate": 0.00019930004654363192, + "loss": 5.078, + "step": 22850 + }, + { + "epoch": 0.1971418732782369, + "grad_norm": 1.125, + "learning_rate": 0.00019929684304705693, + "loss": 4.8986, + "step": 22900 + }, + { + "epoch": 0.19757231404958678, + "grad_norm": 1.9609375, + "learning_rate": 0.00019929363226230725, + "loss": 4.6425, + "step": 22950 + }, + { + "epoch": 0.19800275482093663, + "grad_norm": 2.46875, + "learning_rate": 0.00019929041418961864, + "loss": 4.8875, + "step": 23000 + }, + { + "epoch": 0.1984331955922865, + "grad_norm": 1.78125, + "learning_rate": 0.00019928718882922724, + "loss": 4.9262, + "step": 23050 + }, + { + "epoch": 0.19886363636363635, + "grad_norm": 1.8984375, + "learning_rate": 0.00019928395618136978, + "loss": 4.9604, + "step": 23100 + }, + { + "epoch": 0.19929407713498623, + "grad_norm": 1.375, + "learning_rate": 0.00019928071624628359, + "loss": 4.9066, + "step": 23150 + }, + { + "epoch": 0.19972451790633608, + "grad_norm": 2.0, + "learning_rate": 0.00019927746902420642, + "loss": 4.7944, + "step": 23200 + }, + { + "epoch": 0.20015495867768596, + "grad_norm": 2.046875, + "learning_rate": 0.00019927421451537662, + "loss": 5.0945, + "step": 23250 + }, + { + "epoch": 0.2005853994490358, + "grad_norm": 3.703125, + "learning_rate": 0.00019927095272003305, + "loss": 5.3205, + "step": 23300 + }, + { + "epoch": 0.20101584022038568, + "grad_norm": 2.25, + "learning_rate": 0.00019926768363841518, + "loss": 5.0747, + "step": 23350 + }, + { + "epoch": 0.20144628099173553, + "grad_norm": 3.421875, + "learning_rate": 0.0001992644072707629, + "loss": 4.8941, + "step": 23400 + }, + { + "epoch": 0.2018767217630854, + "grad_norm": 1.734375, + "learning_rate": 0.00019926112361731672, + "loss": 4.9982, + "step": 23450 + }, + { + "epoch": 0.20230716253443526, + "grad_norm": 2.390625, + "learning_rate": 0.00019925783267831757, + "loss": 5.2764, + "step": 23500 + }, + { + "epoch": 0.20273760330578514, + "grad_norm": 1.7109375, + "learning_rate": 0.0001992545344540071, + "loss": 4.7052, + "step": 23550 + }, + { + "epoch": 0.20316804407713498, + "grad_norm": 1.578125, + "learning_rate": 0.00019925122894462734, + "loss": 4.958, + "step": 23600 + }, + { + "epoch": 0.20359848484848486, + "grad_norm": 1.421875, + "learning_rate": 0.0001992479161504209, + "loss": 5.0593, + "step": 23650 + }, + { + "epoch": 0.2040289256198347, + "grad_norm": 2.25, + "learning_rate": 0.00019924459607163102, + "loss": 5.4678, + "step": 23700 + }, + { + "epoch": 0.20445936639118456, + "grad_norm": 2.03125, + "learning_rate": 0.00019924126870850126, + "loss": 4.9233, + "step": 23750 + }, + { + "epoch": 0.20488980716253444, + "grad_norm": 2.921875, + "learning_rate": 0.00019923793406127592, + "loss": 5.0045, + "step": 23800 + }, + { + "epoch": 0.2053202479338843, + "grad_norm": 1.6171875, + "learning_rate": 0.00019923459213019973, + "loss": 5.1675, + "step": 23850 + }, + { + "epoch": 0.20575068870523416, + "grad_norm": 2.484375, + "learning_rate": 0.000199231242915518, + "loss": 5.1394, + "step": 23900 + }, + { + "epoch": 0.206181129476584, + "grad_norm": 1.9921875, + "learning_rate": 0.0001992278864174765, + "loss": 5.2315, + "step": 23950 + }, + { + "epoch": 0.2066115702479339, + "grad_norm": 2.15625, + "learning_rate": 0.0001992245226363217, + "loss": 5.1691, + "step": 24000 + }, + { + "epoch": 0.2066115702479339, + "eval_loss": 5.665122032165527, + "eval_runtime": 21.7307, + "eval_samples_per_second": 29.451, + "eval_steps_per_second": 14.726, + "eval_tts_loss": 6.368378734417094, + "step": 24000 + }, + { + "epoch": 0.20704201101928374, + "grad_norm": 2.875, + "learning_rate": 0.00019922115157230036, + "loss": 4.621, + "step": 24050 + }, + { + "epoch": 0.20747245179063362, + "grad_norm": 1.03125, + "learning_rate": 0.00019921777322566003, + "loss": 4.8669, + "step": 24100 + }, + { + "epoch": 0.20790289256198347, + "grad_norm": 2.046875, + "learning_rate": 0.00019921438759664856, + "loss": 5.0948, + "step": 24150 + }, + { + "epoch": 0.20833333333333334, + "grad_norm": 3.015625, + "learning_rate": 0.00019921099468551455, + "loss": 5.3568, + "step": 24200 + }, + { + "epoch": 0.2087637741046832, + "grad_norm": 1.7734375, + "learning_rate": 0.00019920759449250697, + "loss": 5.0607, + "step": 24250 + }, + { + "epoch": 0.20919421487603307, + "grad_norm": 2.109375, + "learning_rate": 0.0001992041870178754, + "loss": 4.8101, + "step": 24300 + }, + { + "epoch": 0.20962465564738292, + "grad_norm": 1.25, + "learning_rate": 0.00019920077226186995, + "loss": 5.199, + "step": 24350 + }, + { + "epoch": 0.2100550964187328, + "grad_norm": 2.390625, + "learning_rate": 0.00019919735022474124, + "loss": 5.0901, + "step": 24400 + }, + { + "epoch": 0.21048553719008264, + "grad_norm": 2.140625, + "learning_rate": 0.00019919392090674047, + "loss": 5.2075, + "step": 24450 + }, + { + "epoch": 0.2109159779614325, + "grad_norm": 2.375, + "learning_rate": 0.00019919048430811932, + "loss": 4.9512, + "step": 24500 + }, + { + "epoch": 0.21134641873278237, + "grad_norm": 1.734375, + "learning_rate": 0.00019918704042913004, + "loss": 4.9328, + "step": 24550 + }, + { + "epoch": 0.21177685950413222, + "grad_norm": 2.046875, + "learning_rate": 0.00019918358927002543, + "loss": 5.1121, + "step": 24600 + }, + { + "epoch": 0.2122073002754821, + "grad_norm": 2.171875, + "learning_rate": 0.00019918013083105872, + "loss": 5.1803, + "step": 24650 + }, + { + "epoch": 0.21263774104683195, + "grad_norm": 2.546875, + "learning_rate": 0.00019917666511248382, + "loss": 4.8848, + "step": 24700 + }, + { + "epoch": 0.21306818181818182, + "grad_norm": 1.734375, + "learning_rate": 0.00019917319211455508, + "loss": 5.0794, + "step": 24750 + }, + { + "epoch": 0.21349862258953167, + "grad_norm": 2.515625, + "learning_rate": 0.00019916971183752742, + "loss": 5.141, + "step": 24800 + }, + { + "epoch": 0.21392906336088155, + "grad_norm": 2.109375, + "learning_rate": 0.00019916622428165629, + "loss": 5.0965, + "step": 24850 + }, + { + "epoch": 0.2143595041322314, + "grad_norm": 2.796875, + "learning_rate": 0.0001991627294471976, + "loss": 5.1589, + "step": 24900 + }, + { + "epoch": 0.21478994490358128, + "grad_norm": 1.171875, + "learning_rate": 0.00019915922733440797, + "loss": 4.7798, + "step": 24950 + }, + { + "epoch": 0.21522038567493113, + "grad_norm": 2.671875, + "learning_rate": 0.00019915571794354442, + "loss": 5.0545, + "step": 25000 + }, + { + "epoch": 0.215650826446281, + "grad_norm": 2.078125, + "learning_rate": 0.00019915220127486447, + "loss": 4.9558, + "step": 25050 + }, + { + "epoch": 0.21608126721763085, + "grad_norm": 1.59375, + "learning_rate": 0.00019914867732862633, + "loss": 4.7492, + "step": 25100 + }, + { + "epoch": 0.21651170798898073, + "grad_norm": 1.625, + "learning_rate": 0.00019914514610508855, + "loss": 4.8764, + "step": 25150 + }, + { + "epoch": 0.21694214876033058, + "grad_norm": 1.265625, + "learning_rate": 0.00019914160760451036, + "loss": 4.8477, + "step": 25200 + }, + { + "epoch": 0.21737258953168045, + "grad_norm": 2.515625, + "learning_rate": 0.00019913806182715152, + "loss": 5.0519, + "step": 25250 + }, + { + "epoch": 0.2178030303030303, + "grad_norm": 2.015625, + "learning_rate": 0.0001991345087732722, + "loss": 5.1269, + "step": 25300 + }, + { + "epoch": 0.21823347107438015, + "grad_norm": 2.796875, + "learning_rate": 0.00019913094844313327, + "loss": 4.9829, + "step": 25350 + }, + { + "epoch": 0.21866391184573003, + "grad_norm": 2.265625, + "learning_rate": 0.00019912738083699598, + "loss": 5.1944, + "step": 25400 + }, + { + "epoch": 0.21909435261707988, + "grad_norm": 1.796875, + "learning_rate": 0.00019912380595512225, + "loss": 4.7664, + "step": 25450 + }, + { + "epoch": 0.21952479338842976, + "grad_norm": 3.03125, + "learning_rate": 0.00019912022379777437, + "loss": 5.0875, + "step": 25500 + }, + { + "epoch": 0.2199552341597796, + "grad_norm": 3.359375, + "learning_rate": 0.00019911663436521538, + "loss": 4.8201, + "step": 25550 + }, + { + "epoch": 0.22038567493112948, + "grad_norm": 1.984375, + "learning_rate": 0.00019911303765770866, + "loss": 5.0903, + "step": 25600 + }, + { + "epoch": 0.22081611570247933, + "grad_norm": 2.1875, + "learning_rate": 0.00019910943367551823, + "loss": 4.6082, + "step": 25650 + }, + { + "epoch": 0.2212465564738292, + "grad_norm": 1.46875, + "learning_rate": 0.0001991058224189086, + "loss": 5.0438, + "step": 25700 + }, + { + "epoch": 0.22167699724517906, + "grad_norm": 2.015625, + "learning_rate": 0.00019910220388814486, + "loss": 5.5352, + "step": 25750 + }, + { + "epoch": 0.22210743801652894, + "grad_norm": 1.1484375, + "learning_rate": 0.00019909857808349254, + "loss": 4.7568, + "step": 25800 + }, + { + "epoch": 0.22253787878787878, + "grad_norm": 1.765625, + "learning_rate": 0.00019909494500521782, + "loss": 5.0173, + "step": 25850 + }, + { + "epoch": 0.22296831955922866, + "grad_norm": 2.25, + "learning_rate": 0.0001990913046535874, + "loss": 4.6996, + "step": 25900 + }, + { + "epoch": 0.2233987603305785, + "grad_norm": 2.25, + "learning_rate": 0.00019908765702886832, + "loss": 5.0819, + "step": 25950 + }, + { + "epoch": 0.2238292011019284, + "grad_norm": 0.8125, + "learning_rate": 0.00019908400213132849, + "loss": 5.0021, + "step": 26000 + }, + { + "epoch": 0.22425964187327824, + "grad_norm": 1.5078125, + "learning_rate": 0.00019908033996123603, + "loss": 4.7302, + "step": 26050 + }, + { + "epoch": 0.2246900826446281, + "grad_norm": 1.8828125, + "learning_rate": 0.00019907667051885983, + "loss": 5.0459, + "step": 26100 + }, + { + "epoch": 0.22512052341597796, + "grad_norm": 2.0625, + "learning_rate": 0.00019907299380446917, + "loss": 5.4258, + "step": 26150 + }, + { + "epoch": 0.2255509641873278, + "grad_norm": 2.046875, + "learning_rate": 0.00019906930981833393, + "loss": 4.7663, + "step": 26200 + }, + { + "epoch": 0.2259814049586777, + "grad_norm": 2.234375, + "learning_rate": 0.0001990656185607245, + "loss": 4.9105, + "step": 26250 + }, + { + "epoch": 0.22641184573002754, + "grad_norm": 2.5625, + "learning_rate": 0.0001990619200319118, + "loss": 4.9766, + "step": 26300 + }, + { + "epoch": 0.22684228650137742, + "grad_norm": 1.4375, + "learning_rate": 0.00019905821423216733, + "loss": 4.5605, + "step": 26350 + }, + { + "epoch": 0.22727272727272727, + "grad_norm": 2.78125, + "learning_rate": 0.00019905450116176304, + "loss": 5.0108, + "step": 26400 + }, + { + "epoch": 0.22770316804407714, + "grad_norm": 2.015625, + "learning_rate": 0.00019905078082097153, + "loss": 5.0003, + "step": 26450 + }, + { + "epoch": 0.228133608815427, + "grad_norm": 2.171875, + "learning_rate": 0.00019904705321006577, + "loss": 5.2441, + "step": 26500 + }, + { + "epoch": 0.22856404958677687, + "grad_norm": 1.5625, + "learning_rate": 0.00019904331832931943, + "loss": 4.8511, + "step": 26550 + }, + { + "epoch": 0.22899449035812672, + "grad_norm": 1.796875, + "learning_rate": 0.0001990395761790066, + "loss": 5.0509, + "step": 26600 + }, + { + "epoch": 0.2294249311294766, + "grad_norm": 2.359375, + "learning_rate": 0.000199035826759402, + "loss": 5.0985, + "step": 26650 + }, + { + "epoch": 0.22985537190082644, + "grad_norm": 2.046875, + "learning_rate": 0.0001990320700707808, + "loss": 4.9686, + "step": 26700 + }, + { + "epoch": 0.23028581267217632, + "grad_norm": 1.7890625, + "learning_rate": 0.00019902830611341873, + "loss": 5.3253, + "step": 26750 + }, + { + "epoch": 0.23071625344352617, + "grad_norm": 2.6875, + "learning_rate": 0.00019902453488759205, + "loss": 5.1206, + "step": 26800 + }, + { + "epoch": 0.23114669421487602, + "grad_norm": 4.15625, + "learning_rate": 0.00019902075639357752, + "loss": 4.9917, + "step": 26850 + }, + { + "epoch": 0.2315771349862259, + "grad_norm": 3.5, + "learning_rate": 0.0001990169706316526, + "loss": 4.4503, + "step": 26900 + }, + { + "epoch": 0.23200757575757575, + "grad_norm": 2.28125, + "learning_rate": 0.000199013177602095, + "loss": 5.2462, + "step": 26950 + }, + { + "epoch": 0.23243801652892562, + "grad_norm": 2.1875, + "learning_rate": 0.00019900937730518326, + "loss": 4.8377, + "step": 27000 + }, + { + "epoch": 0.23243801652892562, + "eval_loss": 5.625140190124512, + "eval_runtime": 21.8035, + "eval_samples_per_second": 29.353, + "eval_steps_per_second": 14.677, + "eval_tts_loss": 6.439033366911451, + "step": 27000 + }, + { + "epoch": 0.23286845730027547, + "grad_norm": 2.75, + "learning_rate": 0.0001990055697411962, + "loss": 5.2129, + "step": 27050 + }, + { + "epoch": 0.23329889807162535, + "grad_norm": 1.6640625, + "learning_rate": 0.00019900175491041337, + "loss": 5.4218, + "step": 27100 + }, + { + "epoch": 0.2337293388429752, + "grad_norm": 2.65625, + "learning_rate": 0.00019899793281311472, + "loss": 4.7001, + "step": 27150 + }, + { + "epoch": 0.23415977961432508, + "grad_norm": 2.53125, + "learning_rate": 0.0001989941034495808, + "loss": 5.4653, + "step": 27200 + }, + { + "epoch": 0.23459022038567492, + "grad_norm": 1.9921875, + "learning_rate": 0.0001989902668200927, + "loss": 4.8957, + "step": 27250 + }, + { + "epoch": 0.2350206611570248, + "grad_norm": 1.875, + "learning_rate": 0.00019898642292493198, + "loss": 5.0856, + "step": 27300 + }, + { + "epoch": 0.23545110192837465, + "grad_norm": 2.71875, + "learning_rate": 0.00019898257176438077, + "loss": 5.1142, + "step": 27350 + }, + { + "epoch": 0.23588154269972453, + "grad_norm": 2.6875, + "learning_rate": 0.0001989787133387218, + "loss": 5.0284, + "step": 27400 + }, + { + "epoch": 0.23631198347107438, + "grad_norm": 1.3359375, + "learning_rate": 0.0001989748476482382, + "loss": 5.4126, + "step": 27450 + }, + { + "epoch": 0.23674242424242425, + "grad_norm": 2.765625, + "learning_rate": 0.00019897097469321375, + "loss": 5.0992, + "step": 27500 + }, + { + "epoch": 0.2371728650137741, + "grad_norm": 2.703125, + "learning_rate": 0.00019896709447393273, + "loss": 5.2379, + "step": 27550 + }, + { + "epoch": 0.23760330578512398, + "grad_norm": 1.1328125, + "learning_rate": 0.00019896320699067986, + "loss": 4.486, + "step": 27600 + }, + { + "epoch": 0.23803374655647383, + "grad_norm": 2.328125, + "learning_rate": 0.00019895931224374054, + "loss": 4.7783, + "step": 27650 + }, + { + "epoch": 0.23846418732782368, + "grad_norm": 1.796875, + "learning_rate": 0.0001989554102334006, + "loss": 4.9106, + "step": 27700 + }, + { + "epoch": 0.23889462809917356, + "grad_norm": 3.296875, + "learning_rate": 0.00019895150095994649, + "loss": 5.1797, + "step": 27750 + }, + { + "epoch": 0.2393250688705234, + "grad_norm": 1.703125, + "learning_rate": 0.00019894758442366505, + "loss": 5.3304, + "step": 27800 + }, + { + "epoch": 0.23975550964187328, + "grad_norm": 1.84375, + "learning_rate": 0.00019894366062484387, + "loss": 5.0072, + "step": 27850 + }, + { + "epoch": 0.24018595041322313, + "grad_norm": 2.1875, + "learning_rate": 0.00019893972956377086, + "loss": 5.0721, + "step": 27900 + }, + { + "epoch": 0.240616391184573, + "grad_norm": 2.21875, + "learning_rate": 0.00019893579124073457, + "loss": 5.1727, + "step": 27950 + }, + { + "epoch": 0.24104683195592286, + "grad_norm": 2.671875, + "learning_rate": 0.00019893184565602409, + "loss": 4.9496, + "step": 28000 + }, + { + "epoch": 0.24147727272727273, + "grad_norm": 3.09375, + "learning_rate": 0.00019892789280992896, + "loss": 5.1087, + "step": 28050 + }, + { + "epoch": 0.24190771349862258, + "grad_norm": 1.671875, + "learning_rate": 0.00019892393270273934, + "loss": 4.8786, + "step": 28100 + }, + { + "epoch": 0.24233815426997246, + "grad_norm": 2.03125, + "learning_rate": 0.00019891996533474593, + "loss": 4.4389, + "step": 28150 + }, + { + "epoch": 0.2427685950413223, + "grad_norm": 2.21875, + "learning_rate": 0.0001989159907062399, + "loss": 4.5667, + "step": 28200 + }, + { + "epoch": 0.2431990358126722, + "grad_norm": 2.0, + "learning_rate": 0.00019891200881751295, + "loss": 4.7674, + "step": 28250 + }, + { + "epoch": 0.24362947658402204, + "grad_norm": 2.953125, + "learning_rate": 0.0001989080196688574, + "loss": 4.8112, + "step": 28300 + }, + { + "epoch": 0.2440599173553719, + "grad_norm": 2.21875, + "learning_rate": 0.000198904023260566, + "loss": 5.1912, + "step": 28350 + }, + { + "epoch": 0.24449035812672176, + "grad_norm": 2.796875, + "learning_rate": 0.00019890001959293208, + "loss": 4.9896, + "step": 28400 + }, + { + "epoch": 0.2449207988980716, + "grad_norm": 3.09375, + "learning_rate": 0.00019889600866624952, + "loss": 4.7702, + "step": 28450 + }, + { + "epoch": 0.2453512396694215, + "grad_norm": 2.796875, + "learning_rate": 0.0001988919904808127, + "loss": 4.751, + "step": 28500 + }, + { + "epoch": 0.24578168044077134, + "grad_norm": 2.609375, + "learning_rate": 0.0001988879650369165, + "loss": 5.1838, + "step": 28550 + }, + { + "epoch": 0.24621212121212122, + "grad_norm": 2.046875, + "learning_rate": 0.0001988839323348565, + "loss": 5.2764, + "step": 28600 + }, + { + "epoch": 0.24664256198347106, + "grad_norm": 1.421875, + "learning_rate": 0.00019887989237492862, + "loss": 5.0927, + "step": 28650 + }, + { + "epoch": 0.24707300275482094, + "grad_norm": 2.921875, + "learning_rate": 0.00019887584515742933, + "loss": 4.6307, + "step": 28700 + }, + { + "epoch": 0.2475034435261708, + "grad_norm": 1.078125, + "learning_rate": 0.00019887179068265578, + "loss": 5.0863, + "step": 28750 + }, + { + "epoch": 0.24793388429752067, + "grad_norm": 3.3125, + "learning_rate": 0.00019886772895090553, + "loss": 4.9633, + "step": 28800 + }, + { + "epoch": 0.24836432506887052, + "grad_norm": 3.25, + "learning_rate": 0.0001988636599624767, + "loss": 5.2618, + "step": 28850 + }, + { + "epoch": 0.2487947658402204, + "grad_norm": 3.046875, + "learning_rate": 0.0001988595837176679, + "loss": 4.9674, + "step": 28900 + }, + { + "epoch": 0.24922520661157024, + "grad_norm": 1.8125, + "learning_rate": 0.00019885550021677838, + "loss": 4.7908, + "step": 28950 + }, + { + "epoch": 0.24965564738292012, + "grad_norm": 2.3125, + "learning_rate": 0.00019885140946010785, + "loss": 5.018, + "step": 29000 + }, + { + "epoch": 0.25008608815426997, + "grad_norm": 0.703125, + "learning_rate": 0.00019884731144795655, + "loss": 5.1767, + "step": 29050 + }, + { + "epoch": 0.2505165289256198, + "grad_norm": 2.375, + "learning_rate": 0.0001988432061806252, + "loss": 4.9314, + "step": 29100 + }, + { + "epoch": 0.2509469696969697, + "grad_norm": 2.234375, + "learning_rate": 0.00019883909365841528, + "loss": 4.8966, + "step": 29150 + }, + { + "epoch": 0.2513774104683196, + "grad_norm": 2.484375, + "learning_rate": 0.00019883497388162847, + "loss": 4.9233, + "step": 29200 + }, + { + "epoch": 0.2518078512396694, + "grad_norm": 2.078125, + "learning_rate": 0.00019883084685056728, + "loss": 5.1622, + "step": 29250 + }, + { + "epoch": 0.25223829201101927, + "grad_norm": 2.265625, + "learning_rate": 0.00019882671256553453, + "loss": 5.2549, + "step": 29300 + }, + { + "epoch": 0.2526687327823691, + "grad_norm": 2.171875, + "learning_rate": 0.00019882257102683376, + "loss": 4.8635, + "step": 29350 + }, + { + "epoch": 0.253099173553719, + "grad_norm": 2.546875, + "learning_rate": 0.00019881842223476884, + "loss": 4.8018, + "step": 29400 + }, + { + "epoch": 0.2535296143250689, + "grad_norm": 1.8671875, + "learning_rate": 0.00019881426618964437, + "loss": 5.2167, + "step": 29450 + }, + { + "epoch": 0.2539600550964187, + "grad_norm": 2.234375, + "learning_rate": 0.00019881010289176537, + "loss": 4.8911, + "step": 29500 + }, + { + "epoch": 0.2543904958677686, + "grad_norm": 1.5390625, + "learning_rate": 0.00019880593234143738, + "loss": 5.5701, + "step": 29550 + }, + { + "epoch": 0.2548209366391185, + "grad_norm": 2.46875, + "learning_rate": 0.00019880175453896657, + "loss": 4.6705, + "step": 29600 + }, + { + "epoch": 0.2552513774104683, + "grad_norm": 1.0859375, + "learning_rate": 0.00019879756948465956, + "loss": 4.6333, + "step": 29650 + }, + { + "epoch": 0.2556818181818182, + "grad_norm": 2.390625, + "learning_rate": 0.00019879337717882353, + "loss": 4.9866, + "step": 29700 + }, + { + "epoch": 0.256112258953168, + "grad_norm": 1.8125, + "learning_rate": 0.00019878917762176612, + "loss": 5.3702, + "step": 29750 + }, + { + "epoch": 0.25654269972451793, + "grad_norm": 2.171875, + "learning_rate": 0.00019878497081379565, + "loss": 4.6589, + "step": 29800 + }, + { + "epoch": 0.2569731404958678, + "grad_norm": 2.03125, + "learning_rate": 0.00019878075675522087, + "loss": 4.8626, + "step": 29850 + }, + { + "epoch": 0.25740358126721763, + "grad_norm": 2.109375, + "learning_rate": 0.00019877653544635109, + "loss": 5.1007, + "step": 29900 + }, + { + "epoch": 0.2578340220385675, + "grad_norm": 1.765625, + "learning_rate": 0.00019877230688749613, + "loss": 4.7402, + "step": 29950 + }, + { + "epoch": 0.25826446280991733, + "grad_norm": 2.484375, + "learning_rate": 0.00019876807107896633, + "loss": 4.6641, + "step": 30000 + }, + { + "epoch": 0.25826446280991733, + "eval_loss": 5.591824531555176, + "eval_runtime": 22.0839, + "eval_samples_per_second": 28.98, + "eval_steps_per_second": 14.49, + "eval_tts_loss": 6.428915743431203, + "step": 30000 + }, + { + "epoch": 0.25869490358126723, + "grad_norm": 1.3984375, + "learning_rate": 0.00019876382802107263, + "loss": 4.726, + "step": 30050 + }, + { + "epoch": 0.2591253443526171, + "grad_norm": 2.125, + "learning_rate": 0.00019875957771412644, + "loss": 5.2665, + "step": 30100 + }, + { + "epoch": 0.25955578512396693, + "grad_norm": 4.28125, + "learning_rate": 0.00019875532015843978, + "loss": 4.9599, + "step": 30150 + }, + { + "epoch": 0.2599862258953168, + "grad_norm": 2.21875, + "learning_rate": 0.00019875105535432507, + "loss": 4.8781, + "step": 30200 + }, + { + "epoch": 0.2604166666666667, + "grad_norm": 2.484375, + "learning_rate": 0.00019874678330209533, + "loss": 5.1677, + "step": 30250 + }, + { + "epoch": 0.26084710743801653, + "grad_norm": 2.90625, + "learning_rate": 0.0001987425040020642, + "loss": 5.4665, + "step": 30300 + }, + { + "epoch": 0.2612775482093664, + "grad_norm": 2.25, + "learning_rate": 0.0001987382174545457, + "loss": 5.0117, + "step": 30350 + }, + { + "epoch": 0.26170798898071623, + "grad_norm": 2.671875, + "learning_rate": 0.00019873392365985448, + "loss": 4.5659, + "step": 30400 + }, + { + "epoch": 0.26213842975206614, + "grad_norm": 1.5234375, + "learning_rate": 0.0001987296226183057, + "loss": 4.8937, + "step": 30450 + }, + { + "epoch": 0.262568870523416, + "grad_norm": 1.3671875, + "learning_rate": 0.00019872531433021506, + "loss": 5.4825, + "step": 30500 + }, + { + "epoch": 0.26299931129476584, + "grad_norm": 1.953125, + "learning_rate": 0.00019872099879589877, + "loss": 5.0801, + "step": 30550 + }, + { + "epoch": 0.2634297520661157, + "grad_norm": 2.96875, + "learning_rate": 0.00019871667601567352, + "loss": 5.1016, + "step": 30600 + }, + { + "epoch": 0.2638601928374656, + "grad_norm": 1.1796875, + "learning_rate": 0.0001987123459898567, + "loss": 5.0152, + "step": 30650 + }, + { + "epoch": 0.26429063360881544, + "grad_norm": 3.984375, + "learning_rate": 0.00019870800871876605, + "loss": 4.9308, + "step": 30700 + }, + { + "epoch": 0.2647210743801653, + "grad_norm": 0.7734375, + "learning_rate": 0.0001987036642027199, + "loss": 4.6529, + "step": 30750 + }, + { + "epoch": 0.26515151515151514, + "grad_norm": 1.8984375, + "learning_rate": 0.0001986993124420372, + "loss": 4.8347, + "step": 30800 + }, + { + "epoch": 0.265581955922865, + "grad_norm": 1.9921875, + "learning_rate": 0.00019869495343703732, + "loss": 4.3844, + "step": 30850 + }, + { + "epoch": 0.2660123966942149, + "grad_norm": 3.03125, + "learning_rate": 0.0001986905871880402, + "loss": 4.7447, + "step": 30900 + }, + { + "epoch": 0.26644283746556474, + "grad_norm": 2.140625, + "learning_rate": 0.00019868621369536634, + "loss": 4.6668, + "step": 30950 + }, + { + "epoch": 0.2668732782369146, + "grad_norm": 2.140625, + "learning_rate": 0.00019868183295933668, + "loss": 4.811, + "step": 31000 + }, + { + "epoch": 0.26730371900826444, + "grad_norm": 3.125, + "learning_rate": 0.00019867744498027287, + "loss": 5.2151, + "step": 31050 + }, + { + "epoch": 0.26773415977961434, + "grad_norm": 1.9921875, + "learning_rate": 0.00019867304975849684, + "loss": 5.2058, + "step": 31100 + }, + { + "epoch": 0.2681646005509642, + "grad_norm": 4.34375, + "learning_rate": 0.00019866864729433127, + "loss": 5.1636, + "step": 31150 + }, + { + "epoch": 0.26859504132231404, + "grad_norm": 1.6796875, + "learning_rate": 0.0001986642375880993, + "loss": 5.0151, + "step": 31200 + }, + { + "epoch": 0.2690254820936639, + "grad_norm": 1.328125, + "learning_rate": 0.00019865982064012455, + "loss": 4.6903, + "step": 31250 + }, + { + "epoch": 0.2694559228650138, + "grad_norm": 2.140625, + "learning_rate": 0.00019865539645073127, + "loss": 5.1308, + "step": 31300 + }, + { + "epoch": 0.26988636363636365, + "grad_norm": 1.7890625, + "learning_rate": 0.0001986509650202441, + "loss": 4.9051, + "step": 31350 + }, + { + "epoch": 0.2703168044077135, + "grad_norm": 1.5703125, + "learning_rate": 0.00019864652634898842, + "loss": 4.8119, + "step": 31400 + }, + { + "epoch": 0.27074724517906334, + "grad_norm": 2.8125, + "learning_rate": 0.0001986420804372899, + "loss": 5.0894, + "step": 31450 + }, + { + "epoch": 0.27117768595041325, + "grad_norm": 2.09375, + "learning_rate": 0.00019863762728547494, + "loss": 5.0927, + "step": 31500 + }, + { + "epoch": 0.2716081267217631, + "grad_norm": 2.140625, + "learning_rate": 0.00019863316689387034, + "loss": 5.1256, + "step": 31550 + }, + { + "epoch": 0.27203856749311295, + "grad_norm": 2.9375, + "learning_rate": 0.00019862869926280351, + "loss": 4.9328, + "step": 31600 + }, + { + "epoch": 0.2724690082644628, + "grad_norm": 1.921875, + "learning_rate": 0.00019862422439260237, + "loss": 4.9261, + "step": 31650 + }, + { + "epoch": 0.27289944903581265, + "grad_norm": 2.203125, + "learning_rate": 0.00019861974228359535, + "loss": 4.8875, + "step": 31700 + }, + { + "epoch": 0.27332988980716255, + "grad_norm": 3.125, + "learning_rate": 0.00019861525293611142, + "loss": 4.849, + "step": 31750 + }, + { + "epoch": 0.2737603305785124, + "grad_norm": 1.8203125, + "learning_rate": 0.00019861075635048013, + "loss": 5.115, + "step": 31800 + }, + { + "epoch": 0.27419077134986225, + "grad_norm": 1.9140625, + "learning_rate": 0.00019860625252703145, + "loss": 4.9396, + "step": 31850 + }, + { + "epoch": 0.2746212121212121, + "grad_norm": 1.5703125, + "learning_rate": 0.000198601741466096, + "loss": 4.9381, + "step": 31900 + }, + { + "epoch": 0.275051652892562, + "grad_norm": 1.4609375, + "learning_rate": 0.0001985972231680049, + "loss": 4.8725, + "step": 31950 + }, + { + "epoch": 0.27548209366391185, + "grad_norm": 3.4375, + "learning_rate": 0.0001985926976330897, + "loss": 4.9087, + "step": 32000 + }, + { + "epoch": 0.2759125344352617, + "grad_norm": 1.2890625, + "learning_rate": 0.0001985881648616827, + "loss": 4.9087, + "step": 32050 + }, + { + "epoch": 0.27634297520661155, + "grad_norm": 2.328125, + "learning_rate": 0.00019858362485411645, + "loss": 5.0181, + "step": 32100 + }, + { + "epoch": 0.27677341597796146, + "grad_norm": 2.125, + "learning_rate": 0.0001985790776107243, + "loss": 5.1998, + "step": 32150 + }, + { + "epoch": 0.2772038567493113, + "grad_norm": 1.3203125, + "learning_rate": 0.00019857452313183993, + "loss": 4.4733, + "step": 32200 + }, + { + "epoch": 0.27763429752066116, + "grad_norm": 1.140625, + "learning_rate": 0.00019856996141779764, + "loss": 5.2766, + "step": 32250 + }, + { + "epoch": 0.278064738292011, + "grad_norm": 2.203125, + "learning_rate": 0.00019856539246893227, + "loss": 5.408, + "step": 32300 + }, + { + "epoch": 0.2784951790633609, + "grad_norm": 1.734375, + "learning_rate": 0.00019856081628557914, + "loss": 4.8507, + "step": 32350 + }, + { + "epoch": 0.27892561983471076, + "grad_norm": 2.5625, + "learning_rate": 0.00019855623286807418, + "loss": 5.0983, + "step": 32400 + }, + { + "epoch": 0.2793560606060606, + "grad_norm": 1.828125, + "learning_rate": 0.00019855164221675375, + "loss": 4.8606, + "step": 32450 + }, + { + "epoch": 0.27978650137741046, + "grad_norm": 2.390625, + "learning_rate": 0.00019854704433195486, + "loss": 4.9416, + "step": 32500 + }, + { + "epoch": 0.2802169421487603, + "grad_norm": 2.203125, + "learning_rate": 0.00019854243921401491, + "loss": 4.8396, + "step": 32550 + }, + { + "epoch": 0.2806473829201102, + "grad_norm": 1.9765625, + "learning_rate": 0.00019853782686327196, + "loss": 5.062, + "step": 32600 + }, + { + "epoch": 0.28107782369146006, + "grad_norm": 2.265625, + "learning_rate": 0.00019853320728006454, + "loss": 4.5996, + "step": 32650 + }, + { + "epoch": 0.2815082644628099, + "grad_norm": 3.296875, + "learning_rate": 0.00019852858046473165, + "loss": 5.3079, + "step": 32700 + }, + { + "epoch": 0.28193870523415976, + "grad_norm": 2.546875, + "learning_rate": 0.00019852394641761302, + "loss": 4.8414, + "step": 32750 + }, + { + "epoch": 0.28236914600550966, + "grad_norm": 1.4140625, + "learning_rate": 0.00019851930513904863, + "loss": 5.0199, + "step": 32800 + }, + { + "epoch": 0.2827995867768595, + "grad_norm": 1.3515625, + "learning_rate": 0.00019851465662937926, + "loss": 4.7016, + "step": 32850 + }, + { + "epoch": 0.28323002754820936, + "grad_norm": 1.21875, + "learning_rate": 0.00019851000088894606, + "loss": 5.0567, + "step": 32900 + }, + { + "epoch": 0.2836604683195592, + "grad_norm": 1.125, + "learning_rate": 0.0001985053379180907, + "loss": 4.6057, + "step": 32950 + }, + { + "epoch": 0.2840909090909091, + "grad_norm": 2.234375, + "learning_rate": 0.00019850066771715552, + "loss": 5.2001, + "step": 33000 + }, + { + "epoch": 0.2840909090909091, + "eval_loss": 5.571471214294434, + "eval_runtime": 21.7382, + "eval_samples_per_second": 29.441, + "eval_steps_per_second": 14.721, + "eval_tts_loss": 6.407271942785936, + "step": 33000 + }, + { + "epoch": 0.28452134986225897, + "grad_norm": 2.109375, + "learning_rate": 0.00019849599028648322, + "loss": 4.811, + "step": 33050 + }, + { + "epoch": 0.2849517906336088, + "grad_norm": 2.46875, + "learning_rate": 0.00019849130562641718, + "loss": 4.6642, + "step": 33100 + }, + { + "epoch": 0.28538223140495866, + "grad_norm": 1.8359375, + "learning_rate": 0.0001984866137373012, + "loss": 4.4969, + "step": 33150 + }, + { + "epoch": 0.2858126721763085, + "grad_norm": 2.1875, + "learning_rate": 0.00019848191461947968, + "loss": 4.6382, + "step": 33200 + }, + { + "epoch": 0.2862431129476584, + "grad_norm": 2.125, + "learning_rate": 0.00019847720827329752, + "loss": 5.1926, + "step": 33250 + }, + { + "epoch": 0.28667355371900827, + "grad_norm": 1.6796875, + "learning_rate": 0.00019847249469910017, + "loss": 5.0532, + "step": 33300 + }, + { + "epoch": 0.2871039944903581, + "grad_norm": 1.9765625, + "learning_rate": 0.00019846777389723354, + "loss": 4.789, + "step": 33350 + }, + { + "epoch": 0.28753443526170797, + "grad_norm": 1.0390625, + "learning_rate": 0.00019846304586804419, + "loss": 4.7021, + "step": 33400 + }, + { + "epoch": 0.28796487603305787, + "grad_norm": 1.6796875, + "learning_rate": 0.00019845831061187912, + "loss": 4.7884, + "step": 33450 + }, + { + "epoch": 0.2883953168044077, + "grad_norm": 3.015625, + "learning_rate": 0.00019845356812908587, + "loss": 4.9174, + "step": 33500 + }, + { + "epoch": 0.28882575757575757, + "grad_norm": 1.1640625, + "learning_rate": 0.00019844881842001258, + "loss": 5.1589, + "step": 33550 + }, + { + "epoch": 0.2892561983471074, + "grad_norm": 1.9140625, + "learning_rate": 0.0001984440614850078, + "loss": 4.7724, + "step": 33600 + }, + { + "epoch": 0.2896866391184573, + "grad_norm": 1.6875, + "learning_rate": 0.00019843929732442071, + "loss": 5.1374, + "step": 33650 + }, + { + "epoch": 0.2901170798898072, + "grad_norm": 2.703125, + "learning_rate": 0.00019843452593860104, + "loss": 4.3952, + "step": 33700 + }, + { + "epoch": 0.290547520661157, + "grad_norm": 2.109375, + "learning_rate": 0.0001984297473278989, + "loss": 5.0098, + "step": 33750 + }, + { + "epoch": 0.29097796143250687, + "grad_norm": 2.140625, + "learning_rate": 0.00019842496149266513, + "loss": 4.8159, + "step": 33800 + }, + { + "epoch": 0.2914084022038568, + "grad_norm": 1.9765625, + "learning_rate": 0.00019842016843325095, + "loss": 5.1525, + "step": 33850 + }, + { + "epoch": 0.2918388429752066, + "grad_norm": 1.921875, + "learning_rate": 0.00019841536815000814, + "loss": 4.7047, + "step": 33900 + }, + { + "epoch": 0.2922692837465565, + "grad_norm": 1.3515625, + "learning_rate": 0.00019841056064328906, + "loss": 5.241, + "step": 33950 + }, + { + "epoch": 0.2926997245179063, + "grad_norm": 2.234375, + "learning_rate": 0.0001984057459134466, + "loss": 5.0182, + "step": 34000 + }, + { + "epoch": 0.2931301652892562, + "grad_norm": 1.6171875, + "learning_rate": 0.00019840092396083408, + "loss": 4.9981, + "step": 34050 + }, + { + "epoch": 0.2935606060606061, + "grad_norm": 2.09375, + "learning_rate": 0.00019839609478580544, + "loss": 5.21, + "step": 34100 + }, + { + "epoch": 0.2939910468319559, + "grad_norm": 2.21875, + "learning_rate": 0.00019839125838871517, + "loss": 4.8529, + "step": 34150 + }, + { + "epoch": 0.2944214876033058, + "grad_norm": 1.203125, + "learning_rate": 0.00019838641476991825, + "loss": 4.7993, + "step": 34200 + }, + { + "epoch": 0.2948519283746556, + "grad_norm": 2.203125, + "learning_rate": 0.00019838156392977012, + "loss": 4.9721, + "step": 34250 + }, + { + "epoch": 0.29528236914600553, + "grad_norm": 1.7109375, + "learning_rate": 0.00019837670586862692, + "loss": 4.8963, + "step": 34300 + }, + { + "epoch": 0.2957128099173554, + "grad_norm": 1.578125, + "learning_rate": 0.0001983718405868451, + "loss": 5.0663, + "step": 34350 + }, + { + "epoch": 0.29614325068870523, + "grad_norm": 1.203125, + "learning_rate": 0.0001983669680847819, + "loss": 4.7316, + "step": 34400 + }, + { + "epoch": 0.2965736914600551, + "grad_norm": 2.0625, + "learning_rate": 0.0001983620883627949, + "loss": 5.0639, + "step": 34450 + }, + { + "epoch": 0.297004132231405, + "grad_norm": 2.546875, + "learning_rate": 0.00019835720142124218, + "loss": 4.6894, + "step": 34500 + }, + { + "epoch": 0.29743457300275483, + "grad_norm": 0.828125, + "learning_rate": 0.00019835230726048252, + "loss": 4.9307, + "step": 34550 + }, + { + "epoch": 0.2978650137741047, + "grad_norm": 2.25, + "learning_rate": 0.00019834740588087514, + "loss": 4.8107, + "step": 34600 + }, + { + "epoch": 0.29829545454545453, + "grad_norm": 3.40625, + "learning_rate": 0.00019834249728277974, + "loss": 4.5763, + "step": 34650 + }, + { + "epoch": 0.29872589531680444, + "grad_norm": 2.53125, + "learning_rate": 0.00019833758146655666, + "loss": 5.0326, + "step": 34700 + }, + { + "epoch": 0.2991563360881543, + "grad_norm": 2.859375, + "learning_rate": 0.00019833265843256665, + "loss": 4.803, + "step": 34750 + }, + { + "epoch": 0.29958677685950413, + "grad_norm": 1.984375, + "learning_rate": 0.0001983277281811711, + "loss": 4.6619, + "step": 34800 + }, + { + "epoch": 0.300017217630854, + "grad_norm": 1.25, + "learning_rate": 0.00019832279071273185, + "loss": 4.7168, + "step": 34850 + }, + { + "epoch": 0.30044765840220383, + "grad_norm": 3.734375, + "learning_rate": 0.0001983178460276113, + "loss": 4.6914, + "step": 34900 + }, + { + "epoch": 0.30087809917355374, + "grad_norm": 1.2890625, + "learning_rate": 0.0001983128941261724, + "loss": 5.0874, + "step": 34950 + }, + { + "epoch": 0.3013085399449036, + "grad_norm": 2.28125, + "learning_rate": 0.0001983079350087786, + "loss": 5.0653, + "step": 35000 + }, + { + "epoch": 0.30173898071625344, + "grad_norm": 1.8671875, + "learning_rate": 0.00019830296867579387, + "loss": 5.085, + "step": 35050 + }, + { + "epoch": 0.3021694214876033, + "grad_norm": 3.53125, + "learning_rate": 0.00019829799512758277, + "loss": 5.2666, + "step": 35100 + }, + { + "epoch": 0.3025998622589532, + "grad_norm": 2.046875, + "learning_rate": 0.0001982930143645103, + "loss": 5.2022, + "step": 35150 + }, + { + "epoch": 0.30303030303030304, + "grad_norm": 3.578125, + "learning_rate": 0.00019828802638694205, + "loss": 4.8083, + "step": 35200 + }, + { + "epoch": 0.3034607438016529, + "grad_norm": 1.859375, + "learning_rate": 0.00019828303119524417, + "loss": 4.9576, + "step": 35250 + }, + { + "epoch": 0.30389118457300274, + "grad_norm": 1.4765625, + "learning_rate": 0.00019827802878978322, + "loss": 4.984, + "step": 35300 + }, + { + "epoch": 0.30432162534435264, + "grad_norm": 2.21875, + "learning_rate": 0.00019827301917092643, + "loss": 4.5902, + "step": 35350 + }, + { + "epoch": 0.3047520661157025, + "grad_norm": 3.9375, + "learning_rate": 0.00019826800233904148, + "loss": 4.8497, + "step": 35400 + }, + { + "epoch": 0.30518250688705234, + "grad_norm": 1.2734375, + "learning_rate": 0.00019826297829449657, + "loss": 5.0224, + "step": 35450 + }, + { + "epoch": 0.3056129476584022, + "grad_norm": 3.21875, + "learning_rate": 0.00019825794703766047, + "loss": 5.0513, + "step": 35500 + }, + { + "epoch": 0.30604338842975204, + "grad_norm": 1.171875, + "learning_rate": 0.00019825290856890249, + "loss": 4.9505, + "step": 35550 + }, + { + "epoch": 0.30647382920110194, + "grad_norm": 2.796875, + "learning_rate": 0.0001982478628885924, + "loss": 4.6933, + "step": 35600 + }, + { + "epoch": 0.3069042699724518, + "grad_norm": 3.21875, + "learning_rate": 0.00019824280999710054, + "loss": 4.8053, + "step": 35650 + }, + { + "epoch": 0.30733471074380164, + "grad_norm": 3.265625, + "learning_rate": 0.00019823774989479782, + "loss": 4.5851, + "step": 35700 + }, + { + "epoch": 0.3077651515151515, + "grad_norm": 1.4609375, + "learning_rate": 0.00019823268258205564, + "loss": 4.8727, + "step": 35750 + }, + { + "epoch": 0.3081955922865014, + "grad_norm": 0.61328125, + "learning_rate": 0.0001982276080592459, + "loss": 4.9059, + "step": 35800 + }, + { + "epoch": 0.30862603305785125, + "grad_norm": 1.9140625, + "learning_rate": 0.00019822252632674105, + "loss": 5.0789, + "step": 35850 + }, + { + "epoch": 0.3090564738292011, + "grad_norm": 2.90625, + "learning_rate": 0.0001982174373849141, + "loss": 5.0924, + "step": 35900 + }, + { + "epoch": 0.30948691460055094, + "grad_norm": 2.5, + "learning_rate": 0.00019821234123413862, + "loss": 5.1402, + "step": 35950 + }, + { + "epoch": 0.30991735537190085, + "grad_norm": 2.03125, + "learning_rate": 0.00019820723787478855, + "loss": 5.0153, + "step": 36000 + }, + { + "epoch": 0.30991735537190085, + "eval_loss": 5.54570198059082, + "eval_runtime": 21.7411, + "eval_samples_per_second": 29.437, + "eval_steps_per_second": 14.719, + "eval_tts_loss": 6.579320323344557, + "step": 36000 + }, + { + "epoch": 0.3103477961432507, + "grad_norm": 2.03125, + "learning_rate": 0.00019820212730723853, + "loss": 4.8887, + "step": 36050 + }, + { + "epoch": 0.31077823691460055, + "grad_norm": 0.98828125, + "learning_rate": 0.00019819700953186366, + "loss": 5.1352, + "step": 36100 + }, + { + "epoch": 0.3112086776859504, + "grad_norm": 3.484375, + "learning_rate": 0.00019819188454903955, + "loss": 5.1846, + "step": 36150 + }, + { + "epoch": 0.3116391184573003, + "grad_norm": 0.94921875, + "learning_rate": 0.0001981867523591424, + "loss": 5.0842, + "step": 36200 + }, + { + "epoch": 0.31206955922865015, + "grad_norm": 1.671875, + "learning_rate": 0.00019818161296254887, + "loss": 4.758, + "step": 36250 + }, + { + "epoch": 0.3125, + "grad_norm": 2.640625, + "learning_rate": 0.0001981764663596362, + "loss": 4.8793, + "step": 36300 + }, + { + "epoch": 0.31293044077134985, + "grad_norm": 2.203125, + "learning_rate": 0.00019817131255078213, + "loss": 4.7948, + "step": 36350 + }, + { + "epoch": 0.3133608815426997, + "grad_norm": 3.40625, + "learning_rate": 0.00019816615153636492, + "loss": 4.7607, + "step": 36400 + }, + { + "epoch": 0.3137913223140496, + "grad_norm": 0.9375, + "learning_rate": 0.0001981609833167634, + "loss": 4.9049, + "step": 36450 + }, + { + "epoch": 0.31422176308539945, + "grad_norm": 2.03125, + "learning_rate": 0.00019815580789235692, + "loss": 4.8472, + "step": 36500 + }, + { + "epoch": 0.3146522038567493, + "grad_norm": 2.109375, + "learning_rate": 0.00019815062526352533, + "loss": 4.8835, + "step": 36550 + }, + { + "epoch": 0.31508264462809915, + "grad_norm": 2.0, + "learning_rate": 0.000198145435430649, + "loss": 4.9046, + "step": 36600 + }, + { + "epoch": 0.31551308539944906, + "grad_norm": 3.40625, + "learning_rate": 0.0001981402383941089, + "loss": 4.6263, + "step": 36650 + }, + { + "epoch": 0.3159435261707989, + "grad_norm": 2.84375, + "learning_rate": 0.00019813503415428644, + "loss": 5.2997, + "step": 36700 + }, + { + "epoch": 0.31637396694214875, + "grad_norm": 2.375, + "learning_rate": 0.00019812982271156363, + "loss": 5.2157, + "step": 36750 + }, + { + "epoch": 0.3168044077134986, + "grad_norm": 1.7734375, + "learning_rate": 0.00019812460406632297, + "loss": 5.0089, + "step": 36800 + }, + { + "epoch": 0.3172348484848485, + "grad_norm": 2.3125, + "learning_rate": 0.00019811937821894747, + "loss": 4.4919, + "step": 36850 + }, + { + "epoch": 0.31766528925619836, + "grad_norm": 1.828125, + "learning_rate": 0.0001981141451698207, + "loss": 4.6964, + "step": 36900 + }, + { + "epoch": 0.3180957300275482, + "grad_norm": 1.53125, + "learning_rate": 0.00019810890491932682, + "loss": 4.7849, + "step": 36950 + }, + { + "epoch": 0.31852617079889806, + "grad_norm": 1.7109375, + "learning_rate": 0.0001981036574678504, + "loss": 4.8493, + "step": 37000 + }, + { + "epoch": 0.31895661157024796, + "grad_norm": 2.1875, + "learning_rate": 0.0001980984028157766, + "loss": 5.0103, + "step": 37050 + }, + { + "epoch": 0.3193870523415978, + "grad_norm": 2.34375, + "learning_rate": 0.00019809314096349106, + "loss": 4.7065, + "step": 37100 + }, + { + "epoch": 0.31981749311294766, + "grad_norm": 2.171875, + "learning_rate": 0.00019808787191138004, + "loss": 4.8871, + "step": 37150 + }, + { + "epoch": 0.3202479338842975, + "grad_norm": 2.859375, + "learning_rate": 0.00019808259565983028, + "loss": 4.7011, + "step": 37200 + }, + { + "epoch": 0.32067837465564736, + "grad_norm": 1.171875, + "learning_rate": 0.00019807731220922902, + "loss": 5.0205, + "step": 37250 + }, + { + "epoch": 0.32110881542699726, + "grad_norm": 2.078125, + "learning_rate": 0.00019807202155996406, + "loss": 5.2195, + "step": 37300 + }, + { + "epoch": 0.3215392561983471, + "grad_norm": 2.640625, + "learning_rate": 0.00019806672371242373, + "loss": 5.2518, + "step": 37350 + }, + { + "epoch": 0.32196969696969696, + "grad_norm": 2.921875, + "learning_rate": 0.00019806141866699685, + "loss": 4.9155, + "step": 37400 + }, + { + "epoch": 0.3224001377410468, + "grad_norm": 1.1953125, + "learning_rate": 0.00019805610642407287, + "loss": 4.7224, + "step": 37450 + }, + { + "epoch": 0.3228305785123967, + "grad_norm": 0.984375, + "learning_rate": 0.0001980507869840416, + "loss": 4.8169, + "step": 37500 + }, + { + "epoch": 0.32326101928374656, + "grad_norm": 1.4921875, + "learning_rate": 0.00019804546034729356, + "loss": 4.7099, + "step": 37550 + }, + { + "epoch": 0.3236914600550964, + "grad_norm": 2.6875, + "learning_rate": 0.0001980401265142197, + "loss": 5.2715, + "step": 37600 + }, + { + "epoch": 0.32412190082644626, + "grad_norm": 1.2109375, + "learning_rate": 0.00019803478548521147, + "loss": 4.791, + "step": 37650 + }, + { + "epoch": 0.32455234159779617, + "grad_norm": 2.078125, + "learning_rate": 0.0001980294372606609, + "loss": 4.7433, + "step": 37700 + }, + { + "epoch": 0.324982782369146, + "grad_norm": 2.734375, + "learning_rate": 0.00019802408184096056, + "loss": 4.679, + "step": 37750 + }, + { + "epoch": 0.32541322314049587, + "grad_norm": 2.3125, + "learning_rate": 0.0001980187192265035, + "loss": 4.6584, + "step": 37800 + }, + { + "epoch": 0.3258436639118457, + "grad_norm": 2.359375, + "learning_rate": 0.00019801334941768338, + "loss": 4.847, + "step": 37850 + }, + { + "epoch": 0.32627410468319556, + "grad_norm": 1.4453125, + "learning_rate": 0.00019800797241489426, + "loss": 4.9452, + "step": 37900 + }, + { + "epoch": 0.32670454545454547, + "grad_norm": 4.1875, + "learning_rate": 0.00019800258821853084, + "loss": 5.0311, + "step": 37950 + }, + { + "epoch": 0.3271349862258953, + "grad_norm": 2.125, + "learning_rate": 0.00019799719682898832, + "loss": 5.1609, + "step": 38000 + }, + { + "epoch": 0.32756542699724517, + "grad_norm": 1.96875, + "learning_rate": 0.00019799179824666238, + "loss": 4.9125, + "step": 38050 + }, + { + "epoch": 0.327995867768595, + "grad_norm": 1.5859375, + "learning_rate": 0.0001979863924719493, + "loss": 5.2746, + "step": 38100 + }, + { + "epoch": 0.3284263085399449, + "grad_norm": 1.15625, + "learning_rate": 0.00019798097950524585, + "loss": 4.5547, + "step": 38150 + }, + { + "epoch": 0.32885674931129477, + "grad_norm": 2.609375, + "learning_rate": 0.00019797555934694927, + "loss": 5.2883, + "step": 38200 + }, + { + "epoch": 0.3292871900826446, + "grad_norm": 1.4296875, + "learning_rate": 0.00019797013199745746, + "loss": 5.2175, + "step": 38250 + }, + { + "epoch": 0.32971763085399447, + "grad_norm": 1.9765625, + "learning_rate": 0.00019796469745716878, + "loss": 4.9056, + "step": 38300 + }, + { + "epoch": 0.3301480716253444, + "grad_norm": 2.265625, + "learning_rate": 0.00019795925572648203, + "loss": 5.3339, + "step": 38350 + }, + { + "epoch": 0.3305785123966942, + "grad_norm": 1.3515625, + "learning_rate": 0.00019795380680579668, + "loss": 4.4741, + "step": 38400 + }, + { + "epoch": 0.3310089531680441, + "grad_norm": 1.984375, + "learning_rate": 0.0001979483506955127, + "loss": 4.9175, + "step": 38450 + }, + { + "epoch": 0.3314393939393939, + "grad_norm": 0.71875, + "learning_rate": 0.0001979428873960305, + "loss": 4.7652, + "step": 38500 + }, + { + "epoch": 0.3318698347107438, + "grad_norm": 3.015625, + "learning_rate": 0.00019793741690775108, + "loss": 5.0055, + "step": 38550 + }, + { + "epoch": 0.3323002754820937, + "grad_norm": 2.015625, + "learning_rate": 0.00019793193923107598, + "loss": 5.0159, + "step": 38600 + }, + { + "epoch": 0.3327307162534435, + "grad_norm": 2.125, + "learning_rate": 0.00019792645436640725, + "loss": 4.5909, + "step": 38650 + }, + { + "epoch": 0.3331611570247934, + "grad_norm": 1.6796875, + "learning_rate": 0.0001979209623141475, + "loss": 4.8598, + "step": 38700 + }, + { + "epoch": 0.3335915977961432, + "grad_norm": 2.515625, + "learning_rate": 0.00019791546307469977, + "loss": 4.8634, + "step": 38750 + }, + { + "epoch": 0.33402203856749313, + "grad_norm": 1.6640625, + "learning_rate": 0.0001979099566484677, + "loss": 5.0617, + "step": 38800 + }, + { + "epoch": 0.334452479338843, + "grad_norm": 2.0, + "learning_rate": 0.0001979044430358555, + "loss": 4.7291, + "step": 38850 + }, + { + "epoch": 0.3348829201101928, + "grad_norm": 2.53125, + "learning_rate": 0.00019789892223726782, + "loss": 4.8745, + "step": 38900 + }, + { + "epoch": 0.3353133608815427, + "grad_norm": 1.5546875, + "learning_rate": 0.00019789339425310988, + "loss": 4.8053, + "step": 38950 + }, + { + "epoch": 0.3357438016528926, + "grad_norm": 1.7734375, + "learning_rate": 0.00019788785908378745, + "loss": 4.6575, + "step": 39000 + }, + { + "epoch": 0.3357438016528926, + "eval_loss": 5.525368690490723, + "eval_runtime": 22.016, + "eval_samples_per_second": 29.07, + "eval_steps_per_second": 14.535, + "eval_tts_loss": 6.599256973241367, + "step": 39000 + }, + { + "epoch": 0.33617424242424243, + "grad_norm": 2.234375, + "learning_rate": 0.00019788231672970675, + "loss": 5.2571, + "step": 39050 + }, + { + "epoch": 0.3366046831955923, + "grad_norm": 2.84375, + "learning_rate": 0.00019787676719127458, + "loss": 5.0282, + "step": 39100 + }, + { + "epoch": 0.33703512396694213, + "grad_norm": 1.6953125, + "learning_rate": 0.00019787121046889832, + "loss": 4.8208, + "step": 39150 + }, + { + "epoch": 0.33746556473829203, + "grad_norm": 3.203125, + "learning_rate": 0.00019786564656298577, + "loss": 5.3306, + "step": 39200 + }, + { + "epoch": 0.3378960055096419, + "grad_norm": 0.83984375, + "learning_rate": 0.00019786007547394535, + "loss": 5.0133, + "step": 39250 + }, + { + "epoch": 0.33832644628099173, + "grad_norm": 2.09375, + "learning_rate": 0.00019785449720218588, + "loss": 4.7902, + "step": 39300 + }, + { + "epoch": 0.3387568870523416, + "grad_norm": 2.8125, + "learning_rate": 0.00019784891174811693, + "loss": 4.8296, + "step": 39350 + }, + { + "epoch": 0.3391873278236915, + "grad_norm": 2.28125, + "learning_rate": 0.00019784331911214836, + "loss": 4.6619, + "step": 39400 + }, + { + "epoch": 0.33961776859504134, + "grad_norm": 1.53125, + "learning_rate": 0.00019783771929469065, + "loss": 4.2706, + "step": 39450 + }, + { + "epoch": 0.3400482093663912, + "grad_norm": 2.65625, + "learning_rate": 0.00019783211229615488, + "loss": 4.7978, + "step": 39500 + }, + { + "epoch": 0.34047865013774103, + "grad_norm": 1.703125, + "learning_rate": 0.00019782649811695256, + "loss": 4.92, + "step": 39550 + }, + { + "epoch": 0.3409090909090909, + "grad_norm": 2.546875, + "learning_rate": 0.00019782087675749573, + "loss": 5.1624, + "step": 39600 + }, + { + "epoch": 0.3413395316804408, + "grad_norm": 1.53125, + "learning_rate": 0.00019781524821819706, + "loss": 4.7182, + "step": 39650 + }, + { + "epoch": 0.34176997245179064, + "grad_norm": 2.09375, + "learning_rate": 0.0001978096124994696, + "loss": 5.1676, + "step": 39700 + }, + { + "epoch": 0.3422004132231405, + "grad_norm": 2.53125, + "learning_rate": 0.000197803969601727, + "loss": 4.8049, + "step": 39750 + }, + { + "epoch": 0.34263085399449034, + "grad_norm": 3.578125, + "learning_rate": 0.0001977983195253835, + "loss": 4.7238, + "step": 39800 + }, + { + "epoch": 0.34306129476584024, + "grad_norm": 2.109375, + "learning_rate": 0.00019779266227085374, + "loss": 4.5067, + "step": 39850 + }, + { + "epoch": 0.3434917355371901, + "grad_norm": 2.109375, + "learning_rate": 0.00019778699783855298, + "loss": 5.2104, + "step": 39900 + }, + { + "epoch": 0.34392217630853994, + "grad_norm": 2.359375, + "learning_rate": 0.000197781326228897, + "loss": 4.7217, + "step": 39950 + }, + { + "epoch": 0.3443526170798898, + "grad_norm": 2.953125, + "learning_rate": 0.00019777564744230202, + "loss": 5.0633, + "step": 40000 + }, + { + "epoch": 0.3447830578512397, + "grad_norm": 1.1640625, + "learning_rate": 0.0001977699614791849, + "loss": 4.6669, + "step": 40050 + }, + { + "epoch": 0.34521349862258954, + "grad_norm": 2.46875, + "learning_rate": 0.00019776426833996297, + "loss": 5.3334, + "step": 40100 + }, + { + "epoch": 0.3456439393939394, + "grad_norm": 2.390625, + "learning_rate": 0.00019775856802505408, + "loss": 4.8324, + "step": 40150 + }, + { + "epoch": 0.34607438016528924, + "grad_norm": 1.1875, + "learning_rate": 0.00019775286053487663, + "loss": 5.0114, + "step": 40200 + }, + { + "epoch": 0.3465048209366391, + "grad_norm": 1.96875, + "learning_rate": 0.00019774714586984955, + "loss": 5.1082, + "step": 40250 + }, + { + "epoch": 0.346935261707989, + "grad_norm": 2.328125, + "learning_rate": 0.00019774142403039226, + "loss": 4.7672, + "step": 40300 + }, + { + "epoch": 0.34736570247933884, + "grad_norm": 2.09375, + "learning_rate": 0.00019773569501692475, + "loss": 4.8024, + "step": 40350 + }, + { + "epoch": 0.3477961432506887, + "grad_norm": 2.40625, + "learning_rate": 0.0001977299588298675, + "loss": 4.7896, + "step": 40400 + }, + { + "epoch": 0.34822658402203854, + "grad_norm": 1.5234375, + "learning_rate": 0.00019772421546964157, + "loss": 4.5769, + "step": 40450 + }, + { + "epoch": 0.34865702479338845, + "grad_norm": 1.1953125, + "learning_rate": 0.00019771846493666842, + "loss": 4.7507, + "step": 40500 + }, + { + "epoch": 0.3490874655647383, + "grad_norm": 1.3046875, + "learning_rate": 0.00019771270723137026, + "loss": 4.4713, + "step": 40550 + }, + { + "epoch": 0.34951790633608815, + "grad_norm": 0.98828125, + "learning_rate": 0.0001977069423541696, + "loss": 5.0715, + "step": 40600 + }, + { + "epoch": 0.349948347107438, + "grad_norm": 3.328125, + "learning_rate": 0.00019770117030548957, + "loss": 5.2178, + "step": 40650 + }, + { + "epoch": 0.3503787878787879, + "grad_norm": 2.34375, + "learning_rate": 0.00019769539108575388, + "loss": 5.0733, + "step": 40700 + }, + { + "epoch": 0.35080922865013775, + "grad_norm": 2.375, + "learning_rate": 0.00019768960469538667, + "loss": 4.7981, + "step": 40750 + }, + { + "epoch": 0.3512396694214876, + "grad_norm": 1.640625, + "learning_rate": 0.00019768381113481268, + "loss": 5.2429, + "step": 40800 + }, + { + "epoch": 0.35167011019283745, + "grad_norm": 2.484375, + "learning_rate": 0.0001976780104044571, + "loss": 5.2152, + "step": 40850 + }, + { + "epoch": 0.35210055096418735, + "grad_norm": 1.0625, + "learning_rate": 0.00019767220250474572, + "loss": 5.3687, + "step": 40900 + }, + { + "epoch": 0.3525309917355372, + "grad_norm": 1.1015625, + "learning_rate": 0.00019766638743610483, + "loss": 4.721, + "step": 40950 + }, + { + "epoch": 0.35296143250688705, + "grad_norm": 2.0625, + "learning_rate": 0.00019766056519896123, + "loss": 4.9641, + "step": 41000 + }, + { + "epoch": 0.3533918732782369, + "grad_norm": 2.71875, + "learning_rate": 0.00019765473579374225, + "loss": 4.9655, + "step": 41050 + }, + { + "epoch": 0.35382231404958675, + "grad_norm": 1.921875, + "learning_rate": 0.00019764889922087582, + "loss": 4.7365, + "step": 41100 + }, + { + "epoch": 0.35425275482093666, + "grad_norm": 2.734375, + "learning_rate": 0.00019764305548079024, + "loss": 4.5452, + "step": 41150 + }, + { + "epoch": 0.3546831955922865, + "grad_norm": 1.75, + "learning_rate": 0.0001976372045739145, + "loss": 4.9724, + "step": 41200 + }, + { + "epoch": 0.35511363636363635, + "grad_norm": 1.90625, + "learning_rate": 0.000197631346500678, + "loss": 5.1451, + "step": 41250 + }, + { + "epoch": 0.3555440771349862, + "grad_norm": 2.8125, + "learning_rate": 0.00019762548126151072, + "loss": 4.6637, + "step": 41300 + }, + { + "epoch": 0.3559745179063361, + "grad_norm": 5.1875, + "learning_rate": 0.00019761960885684315, + "loss": 4.7258, + "step": 41350 + }, + { + "epoch": 0.35640495867768596, + "grad_norm": 2.234375, + "learning_rate": 0.00019761372928710637, + "loss": 4.8053, + "step": 41400 + }, + { + "epoch": 0.3568353994490358, + "grad_norm": 1.828125, + "learning_rate": 0.00019760784255273185, + "loss": 4.8886, + "step": 41450 + }, + { + "epoch": 0.35726584022038566, + "grad_norm": 2.46875, + "learning_rate": 0.0001976019486541517, + "loss": 4.6134, + "step": 41500 + }, + { + "epoch": 0.35769628099173556, + "grad_norm": 1.3828125, + "learning_rate": 0.00019759604759179853, + "loss": 5.0279, + "step": 41550 + }, + { + "epoch": 0.3581267217630854, + "grad_norm": 0.9609375, + "learning_rate": 0.00019759013936610545, + "loss": 4.3883, + "step": 41600 + }, + { + "epoch": 0.35855716253443526, + "grad_norm": 3.84375, + "learning_rate": 0.00019758422397750608, + "loss": 4.7281, + "step": 41650 + }, + { + "epoch": 0.3589876033057851, + "grad_norm": 1.421875, + "learning_rate": 0.00019757830142643466, + "loss": 4.8342, + "step": 41700 + }, + { + "epoch": 0.359418044077135, + "grad_norm": 2.09375, + "learning_rate": 0.00019757237171332584, + "loss": 4.6784, + "step": 41750 + }, + { + "epoch": 0.35984848484848486, + "grad_norm": 2.25, + "learning_rate": 0.00019756643483861486, + "loss": 5.204, + "step": 41800 + }, + { + "epoch": 0.3602789256198347, + "grad_norm": 1.421875, + "learning_rate": 0.0001975604908027375, + "loss": 4.9922, + "step": 41850 + }, + { + "epoch": 0.36070936639118456, + "grad_norm": 1.84375, + "learning_rate": 0.00019755453960613003, + "loss": 5.0035, + "step": 41900 + }, + { + "epoch": 0.3611398071625344, + "grad_norm": 2.125, + "learning_rate": 0.00019754858124922925, + "loss": 4.8395, + "step": 41950 + }, + { + "epoch": 0.3615702479338843, + "grad_norm": 1.8828125, + "learning_rate": 0.00019754261573247246, + "loss": 4.9879, + "step": 42000 + }, + { + "epoch": 0.3615702479338843, + "eval_loss": 5.509988784790039, + "eval_runtime": 21.6827, + "eval_samples_per_second": 29.517, + "eval_steps_per_second": 14.758, + "eval_tts_loss": 6.5986155310177885, + "step": 42000 + }, + { + "epoch": 0.36200068870523416, + "grad_norm": 1.6484375, + "learning_rate": 0.00019753664305629756, + "loss": 4.7334, + "step": 42050 + }, + { + "epoch": 0.362431129476584, + "grad_norm": 2.1875, + "learning_rate": 0.0001975306632211429, + "loss": 4.9475, + "step": 42100 + }, + { + "epoch": 0.36286157024793386, + "grad_norm": 2.171875, + "learning_rate": 0.0001975246762274474, + "loss": 4.9682, + "step": 42150 + }, + { + "epoch": 0.36329201101928377, + "grad_norm": 2.140625, + "learning_rate": 0.00019751868207565053, + "loss": 4.6935, + "step": 42200 + }, + { + "epoch": 0.3637224517906336, + "grad_norm": 1.046875, + "learning_rate": 0.00019751268076619217, + "loss": 4.8626, + "step": 42250 + }, + { + "epoch": 0.36415289256198347, + "grad_norm": 1.8359375, + "learning_rate": 0.0001975066722995129, + "loss": 5.2555, + "step": 42300 + }, + { + "epoch": 0.3645833333333333, + "grad_norm": 0.98046875, + "learning_rate": 0.00019750065667605365, + "loss": 4.9685, + "step": 42350 + }, + { + "epoch": 0.3650137741046832, + "grad_norm": 1.984375, + "learning_rate": 0.00019749463389625595, + "loss": 5.1185, + "step": 42400 + }, + { + "epoch": 0.36544421487603307, + "grad_norm": 2.0, + "learning_rate": 0.0001974886039605619, + "loss": 4.749, + "step": 42450 + }, + { + "epoch": 0.3658746556473829, + "grad_norm": 2.53125, + "learning_rate": 0.00019748256686941408, + "loss": 4.8788, + "step": 42500 + }, + { + "epoch": 0.36630509641873277, + "grad_norm": 2.65625, + "learning_rate": 0.0001974765226232556, + "loss": 5.0896, + "step": 42550 + }, + { + "epoch": 0.36673553719008267, + "grad_norm": 3.078125, + "learning_rate": 0.00019747047122253006, + "loss": 4.7231, + "step": 42600 + }, + { + "epoch": 0.3671659779614325, + "grad_norm": 3.1875, + "learning_rate": 0.0001974644126676817, + "loss": 4.7672, + "step": 42650 + }, + { + "epoch": 0.36759641873278237, + "grad_norm": 3.75, + "learning_rate": 0.0001974583469591551, + "loss": 5.1181, + "step": 42700 + }, + { + "epoch": 0.3680268595041322, + "grad_norm": 1.7265625, + "learning_rate": 0.0001974522740973955, + "loss": 5.2168, + "step": 42750 + }, + { + "epoch": 0.36845730027548207, + "grad_norm": 2.671875, + "learning_rate": 0.0001974461940828487, + "loss": 4.9432, + "step": 42800 + }, + { + "epoch": 0.368887741046832, + "grad_norm": 2.71875, + "learning_rate": 0.0001974401069159609, + "loss": 4.9133, + "step": 42850 + }, + { + "epoch": 0.3693181818181818, + "grad_norm": 2.671875, + "learning_rate": 0.0001974340125971789, + "loss": 4.5893, + "step": 42900 + }, + { + "epoch": 0.3697486225895317, + "grad_norm": 2.390625, + "learning_rate": 0.00019742791112695003, + "loss": 5.0533, + "step": 42950 + }, + { + "epoch": 0.3701790633608815, + "grad_norm": 1.3359375, + "learning_rate": 0.0001974218025057221, + "loss": 4.8669, + "step": 43000 + }, + { + "epoch": 0.3706095041322314, + "grad_norm": 2.3125, + "learning_rate": 0.00019741568673394344, + "loss": 4.7203, + "step": 43050 + }, + { + "epoch": 0.3710399449035813, + "grad_norm": 1.8125, + "learning_rate": 0.000197409563812063, + "loss": 4.8914, + "step": 43100 + }, + { + "epoch": 0.3714703856749311, + "grad_norm": 1.7734375, + "learning_rate": 0.00019740343374053014, + "loss": 4.6111, + "step": 43150 + }, + { + "epoch": 0.371900826446281, + "grad_norm": 1.6796875, + "learning_rate": 0.0001973972965197948, + "loss": 4.9909, + "step": 43200 + }, + { + "epoch": 0.3723312672176309, + "grad_norm": 1.9765625, + "learning_rate": 0.00019739115215030748, + "loss": 4.5076, + "step": 43250 + }, + { + "epoch": 0.37276170798898073, + "grad_norm": 3.296875, + "learning_rate": 0.0001973850006325191, + "loss": 5.1815, + "step": 43300 + }, + { + "epoch": 0.3731921487603306, + "grad_norm": 2.3125, + "learning_rate": 0.00019737884196688126, + "loss": 4.8593, + "step": 43350 + }, + { + "epoch": 0.3736225895316804, + "grad_norm": 0.765625, + "learning_rate": 0.0001973726761538459, + "loss": 5.1336, + "step": 43400 + }, + { + "epoch": 0.3740530303030303, + "grad_norm": 1.6015625, + "learning_rate": 0.00019736650319386562, + "loss": 4.9215, + "step": 43450 + }, + { + "epoch": 0.3744834710743802, + "grad_norm": 1.734375, + "learning_rate": 0.0001973603230873935, + "loss": 4.9942, + "step": 43500 + }, + { + "epoch": 0.37491391184573003, + "grad_norm": 1.859375, + "learning_rate": 0.00019735413583488314, + "loss": 4.55, + "step": 43550 + }, + { + "epoch": 0.3753443526170799, + "grad_norm": 3.5625, + "learning_rate": 0.00019734794143678864, + "loss": 4.8435, + "step": 43600 + }, + { + "epoch": 0.37577479338842973, + "grad_norm": 1.5859375, + "learning_rate": 0.00019734173989356473, + "loss": 4.8101, + "step": 43650 + }, + { + "epoch": 0.37620523415977963, + "grad_norm": 1.921875, + "learning_rate": 0.00019733553120566654, + "loss": 4.5615, + "step": 43700 + }, + { + "epoch": 0.3766356749311295, + "grad_norm": 1.9765625, + "learning_rate": 0.00019732931537354978, + "loss": 4.6809, + "step": 43750 + }, + { + "epoch": 0.37706611570247933, + "grad_norm": 3.125, + "learning_rate": 0.00019732309239767069, + "loss": 5.0443, + "step": 43800 + }, + { + "epoch": 0.3774965564738292, + "grad_norm": 2.109375, + "learning_rate": 0.000197316862278486, + "loss": 4.5044, + "step": 43850 + }, + { + "epoch": 0.3779269972451791, + "grad_norm": 1.9921875, + "learning_rate": 0.000197310625016453, + "loss": 4.8331, + "step": 43900 + }, + { + "epoch": 0.37835743801652894, + "grad_norm": 1.90625, + "learning_rate": 0.00019730438061202948, + "loss": 5.0114, + "step": 43950 + }, + { + "epoch": 0.3787878787878788, + "grad_norm": 1.1015625, + "learning_rate": 0.0001972981290656738, + "loss": 5.1022, + "step": 44000 + }, + { + "epoch": 0.37921831955922863, + "grad_norm": 0.76953125, + "learning_rate": 0.00019729187037784477, + "loss": 4.5445, + "step": 44050 + }, + { + "epoch": 0.37964876033057854, + "grad_norm": 2.671875, + "learning_rate": 0.0001972856045490018, + "loss": 4.8187, + "step": 44100 + }, + { + "epoch": 0.3800792011019284, + "grad_norm": 2.421875, + "learning_rate": 0.00019727933157960478, + "loss": 5.0647, + "step": 44150 + }, + { + "epoch": 0.38050964187327824, + "grad_norm": 3.09375, + "learning_rate": 0.0001972730514701141, + "loss": 5.378, + "step": 44200 + }, + { + "epoch": 0.3809400826446281, + "grad_norm": 2.34375, + "learning_rate": 0.00019726676422099075, + "loss": 5.1012, + "step": 44250 + }, + { + "epoch": 0.38137052341597794, + "grad_norm": 2.4375, + "learning_rate": 0.00019726046983269618, + "loss": 4.9323, + "step": 44300 + }, + { + "epoch": 0.38180096418732784, + "grad_norm": 1.765625, + "learning_rate": 0.00019725416830569236, + "loss": 5.0225, + "step": 44350 + }, + { + "epoch": 0.3822314049586777, + "grad_norm": 2.15625, + "learning_rate": 0.00019724785964044187, + "loss": 4.6229, + "step": 44400 + }, + { + "epoch": 0.38266184573002754, + "grad_norm": 0.98046875, + "learning_rate": 0.0001972415438374077, + "loss": 5.0021, + "step": 44450 + }, + { + "epoch": 0.3830922865013774, + "grad_norm": 1.40625, + "learning_rate": 0.00019723522089705342, + "loss": 4.5124, + "step": 44500 + }, + { + "epoch": 0.3835227272727273, + "grad_norm": 1.125, + "learning_rate": 0.00019722889081984316, + "loss": 4.9181, + "step": 44550 + }, + { + "epoch": 0.38395316804407714, + "grad_norm": 2.203125, + "learning_rate": 0.0001972225536062415, + "loss": 4.9511, + "step": 44600 + }, + { + "epoch": 0.384383608815427, + "grad_norm": 4.0, + "learning_rate": 0.00019721620925671355, + "loss": 4.7302, + "step": 44650 + }, + { + "epoch": 0.38481404958677684, + "grad_norm": 2.09375, + "learning_rate": 0.00019720985777172504, + "loss": 5.0943, + "step": 44700 + }, + { + "epoch": 0.38524449035812675, + "grad_norm": 3.375, + "learning_rate": 0.0001972034991517421, + "loss": 4.7611, + "step": 44750 + }, + { + "epoch": 0.3856749311294766, + "grad_norm": 2.6875, + "learning_rate": 0.0001971971333972315, + "loss": 4.7436, + "step": 44800 + }, + { + "epoch": 0.38610537190082644, + "grad_norm": 2.28125, + "learning_rate": 0.0001971907605086604, + "loss": 4.4675, + "step": 44850 + }, + { + "epoch": 0.3865358126721763, + "grad_norm": 1.8359375, + "learning_rate": 0.0001971843804864966, + "loss": 4.7749, + "step": 44900 + }, + { + "epoch": 0.3869662534435262, + "grad_norm": 2.5, + "learning_rate": 0.0001971779933312084, + "loss": 5.3325, + "step": 44950 + }, + { + "epoch": 0.38739669421487605, + "grad_norm": 1.328125, + "learning_rate": 0.0001971715990432645, + "loss": 4.9475, + "step": 45000 + }, + { + "epoch": 0.38739669421487605, + "eval_loss": 5.486639976501465, + "eval_runtime": 22.0151, + "eval_samples_per_second": 29.071, + "eval_steps_per_second": 14.536, + "eval_tts_loss": 6.651565814050026, + "step": 45000 + }, + { + "epoch": 0.3878271349862259, + "grad_norm": 3.03125, + "learning_rate": 0.00019716519762313434, + "loss": 5.0611, + "step": 45050 + }, + { + "epoch": 0.38825757575757575, + "grad_norm": 2.296875, + "learning_rate": 0.00019715878907128776, + "loss": 5.0095, + "step": 45100 + }, + { + "epoch": 0.3886880165289256, + "grad_norm": 1.0546875, + "learning_rate": 0.0001971523733881951, + "loss": 4.7267, + "step": 45150 + }, + { + "epoch": 0.3891184573002755, + "grad_norm": 1.421875, + "learning_rate": 0.0001971459505743272, + "loss": 4.7917, + "step": 45200 + }, + { + "epoch": 0.38954889807162535, + "grad_norm": 2.046875, + "learning_rate": 0.0001971395206301556, + "loss": 4.4387, + "step": 45250 + }, + { + "epoch": 0.3899793388429752, + "grad_norm": 2.203125, + "learning_rate": 0.00019713308355615218, + "loss": 5.0246, + "step": 45300 + }, + { + "epoch": 0.39040977961432505, + "grad_norm": 1.203125, + "learning_rate": 0.00019712663935278938, + "loss": 4.9227, + "step": 45350 + }, + { + "epoch": 0.39084022038567495, + "grad_norm": 2.0625, + "learning_rate": 0.00019712018802054026, + "loss": 4.914, + "step": 45400 + }, + { + "epoch": 0.3912706611570248, + "grad_norm": 1.75, + "learning_rate": 0.0001971137295598783, + "loss": 5.1644, + "step": 45450 + }, + { + "epoch": 0.39170110192837465, + "grad_norm": 3.796875, + "learning_rate": 0.00019710726397127753, + "loss": 4.3398, + "step": 45500 + }, + { + "epoch": 0.3921315426997245, + "grad_norm": 3.046875, + "learning_rate": 0.0001971007912552125, + "loss": 4.9143, + "step": 45550 + }, + { + "epoch": 0.3925619834710744, + "grad_norm": 2.3125, + "learning_rate": 0.0001970943114121583, + "loss": 4.5839, + "step": 45600 + }, + { + "epoch": 0.39299242424242425, + "grad_norm": 2.5, + "learning_rate": 0.00019708782444259056, + "loss": 5.107, + "step": 45650 + }, + { + "epoch": 0.3934228650137741, + "grad_norm": 2.359375, + "learning_rate": 0.00019708133034698544, + "loss": 4.8543, + "step": 45700 + }, + { + "epoch": 0.39385330578512395, + "grad_norm": 1.8984375, + "learning_rate": 0.0001970748291258195, + "loss": 5.2229, + "step": 45750 + }, + { + "epoch": 0.3942837465564738, + "grad_norm": 2.203125, + "learning_rate": 0.00019706832077956996, + "loss": 4.8235, + "step": 45800 + }, + { + "epoch": 0.3947141873278237, + "grad_norm": 2.75, + "learning_rate": 0.00019706180530871455, + "loss": 4.9943, + "step": 45850 + }, + { + "epoch": 0.39514462809917356, + "grad_norm": 2.0625, + "learning_rate": 0.00019705528271373146, + "loss": 4.9746, + "step": 45900 + }, + { + "epoch": 0.3955750688705234, + "grad_norm": 2.46875, + "learning_rate": 0.00019704875299509942, + "loss": 5.3528, + "step": 45950 + }, + { + "epoch": 0.39600550964187325, + "grad_norm": 1.6484375, + "learning_rate": 0.00019704221615329774, + "loss": 5.0962, + "step": 46000 + }, + { + "epoch": 0.39643595041322316, + "grad_norm": 1.9765625, + "learning_rate": 0.00019703567218880616, + "loss": 5.0475, + "step": 46050 + }, + { + "epoch": 0.396866391184573, + "grad_norm": 2.03125, + "learning_rate": 0.00019702912110210503, + "loss": 4.435, + "step": 46100 + }, + { + "epoch": 0.39729683195592286, + "grad_norm": 1.7265625, + "learning_rate": 0.00019702256289367518, + "loss": 5.1928, + "step": 46150 + }, + { + "epoch": 0.3977272727272727, + "grad_norm": 3.578125, + "learning_rate": 0.00019701599756399797, + "loss": 4.7587, + "step": 46200 + }, + { + "epoch": 0.3981577134986226, + "grad_norm": 2.75, + "learning_rate": 0.00019700942511355525, + "loss": 4.5542, + "step": 46250 + }, + { + "epoch": 0.39858815426997246, + "grad_norm": 2.734375, + "learning_rate": 0.00019700284554282946, + "loss": 5.2381, + "step": 46300 + }, + { + "epoch": 0.3990185950413223, + "grad_norm": 2.09375, + "learning_rate": 0.0001969962588523035, + "loss": 5.0197, + "step": 46350 + }, + { + "epoch": 0.39944903581267216, + "grad_norm": 2.90625, + "learning_rate": 0.0001969896650424609, + "loss": 4.9794, + "step": 46400 + }, + { + "epoch": 0.39987947658402206, + "grad_norm": 1.3828125, + "learning_rate": 0.00019698306411378548, + "loss": 5.0362, + "step": 46450 + }, + { + "epoch": 0.4003099173553719, + "grad_norm": 1.921875, + "learning_rate": 0.00019697645606676185, + "loss": 4.4054, + "step": 46500 + }, + { + "epoch": 0.40074035812672176, + "grad_norm": 2.96875, + "learning_rate": 0.00019696984090187498, + "loss": 5.0158, + "step": 46550 + }, + { + "epoch": 0.4011707988980716, + "grad_norm": 1.984375, + "learning_rate": 0.00019696321861961046, + "loss": 4.9003, + "step": 46600 + }, + { + "epoch": 0.40160123966942146, + "grad_norm": 1.7109375, + "learning_rate": 0.0001969565892204543, + "loss": 4.7631, + "step": 46650 + }, + { + "epoch": 0.40203168044077137, + "grad_norm": 1.5078125, + "learning_rate": 0.00019694995270489306, + "loss": 4.7591, + "step": 46700 + }, + { + "epoch": 0.4024621212121212, + "grad_norm": 2.09375, + "learning_rate": 0.00019694330907341393, + "loss": 4.4943, + "step": 46750 + }, + { + "epoch": 0.40289256198347106, + "grad_norm": 2.03125, + "learning_rate": 0.0001969366583265045, + "loss": 4.812, + "step": 46800 + }, + { + "epoch": 0.4033230027548209, + "grad_norm": 1.0, + "learning_rate": 0.0001969300004646529, + "loss": 4.7527, + "step": 46850 + }, + { + "epoch": 0.4037534435261708, + "grad_norm": 1.5390625, + "learning_rate": 0.0001969233354883478, + "loss": 4.9423, + "step": 46900 + }, + { + "epoch": 0.40418388429752067, + "grad_norm": 2.3125, + "learning_rate": 0.0001969166633980784, + "loss": 4.8899, + "step": 46950 + }, + { + "epoch": 0.4046143250688705, + "grad_norm": 2.90625, + "learning_rate": 0.00019690998419433448, + "loss": 4.7378, + "step": 47000 + }, + { + "epoch": 0.40504476584022037, + "grad_norm": 1.8359375, + "learning_rate": 0.0001969032978776062, + "loss": 4.3138, + "step": 47050 + }, + { + "epoch": 0.40547520661157027, + "grad_norm": 2.203125, + "learning_rate": 0.00019689660444838434, + "loss": 4.676, + "step": 47100 + }, + { + "epoch": 0.4059056473829201, + "grad_norm": 2.328125, + "learning_rate": 0.0001968899039071602, + "loss": 4.9377, + "step": 47150 + }, + { + "epoch": 0.40633608815426997, + "grad_norm": 2.71875, + "learning_rate": 0.00019688319625442557, + "loss": 4.7513, + "step": 47200 + }, + { + "epoch": 0.4067665289256198, + "grad_norm": 1.7265625, + "learning_rate": 0.0001968764814906728, + "loss": 4.5865, + "step": 47250 + }, + { + "epoch": 0.4071969696969697, + "grad_norm": 4.15625, + "learning_rate": 0.00019686975961639468, + "loss": 4.9636, + "step": 47300 + }, + { + "epoch": 0.4076274104683196, + "grad_norm": 2.28125, + "learning_rate": 0.00019686303063208465, + "loss": 4.5476, + "step": 47350 + }, + { + "epoch": 0.4080578512396694, + "grad_norm": 2.640625, + "learning_rate": 0.00019685629453823663, + "loss": 5.0127, + "step": 47400 + }, + { + "epoch": 0.40848829201101927, + "grad_norm": 1.5625, + "learning_rate": 0.00019684955133534492, + "loss": 4.731, + "step": 47450 + }, + { + "epoch": 0.4089187327823691, + "grad_norm": 2.515625, + "learning_rate": 0.00019684280102390457, + "loss": 4.8993, + "step": 47500 + }, + { + "epoch": 0.409349173553719, + "grad_norm": 3.40625, + "learning_rate": 0.00019683604360441092, + "loss": 5.4113, + "step": 47550 + }, + { + "epoch": 0.4097796143250689, + "grad_norm": 4.5625, + "learning_rate": 0.00019682927907736005, + "loss": 4.563, + "step": 47600 + }, + { + "epoch": 0.4102100550964187, + "grad_norm": 3.59375, + "learning_rate": 0.00019682250744324843, + "loss": 4.8675, + "step": 47650 + }, + { + "epoch": 0.4106404958677686, + "grad_norm": 1.84375, + "learning_rate": 0.0001968157287025731, + "loss": 4.8817, + "step": 47700 + }, + { + "epoch": 0.4110709366391185, + "grad_norm": 1.2265625, + "learning_rate": 0.00019680894285583155, + "loss": 5.0027, + "step": 47750 + }, + { + "epoch": 0.4115013774104683, + "grad_norm": 2.21875, + "learning_rate": 0.00019680214990352193, + "loss": 4.8271, + "step": 47800 + }, + { + "epoch": 0.4119318181818182, + "grad_norm": 2.09375, + "learning_rate": 0.00019679534984614274, + "loss": 4.8948, + "step": 47850 + }, + { + "epoch": 0.412362258953168, + "grad_norm": 2.296875, + "learning_rate": 0.00019678854268419314, + "loss": 4.8333, + "step": 47900 + }, + { + "epoch": 0.41279269972451793, + "grad_norm": 2.609375, + "learning_rate": 0.0001967817284181728, + "loss": 4.8425, + "step": 47950 + }, + { + "epoch": 0.4132231404958678, + "grad_norm": 2.453125, + "learning_rate": 0.00019677490704858176, + "loss": 4.9557, + "step": 48000 + }, + { + "epoch": 0.4132231404958678, + "eval_loss": 5.4774956703186035, + "eval_runtime": 21.8718, + "eval_samples_per_second": 29.261, + "eval_steps_per_second": 14.631, + "eval_tts_loss": 6.593891539484386, + "step": 48000 + }, + { + "epoch": 0.41365358126721763, + "grad_norm": 3.765625, + "learning_rate": 0.0001967680785759208, + "loss": 4.9363, + "step": 48050 + }, + { + "epoch": 0.4140840220385675, + "grad_norm": 1.3359375, + "learning_rate": 0.00019676124300069104, + "loss": 4.9799, + "step": 48100 + }, + { + "epoch": 0.41451446280991733, + "grad_norm": 1.4296875, + "learning_rate": 0.00019675440032339427, + "loss": 4.9791, + "step": 48150 + }, + { + "epoch": 0.41494490358126723, + "grad_norm": 1.1015625, + "learning_rate": 0.0001967475505445327, + "loss": 4.8335, + "step": 48200 + }, + { + "epoch": 0.4153753443526171, + "grad_norm": 1.7890625, + "learning_rate": 0.00019674069366460903, + "loss": 4.8282, + "step": 48250 + }, + { + "epoch": 0.41580578512396693, + "grad_norm": 1.7265625, + "learning_rate": 0.00019673382968412662, + "loss": 4.7353, + "step": 48300 + }, + { + "epoch": 0.4162362258953168, + "grad_norm": 1.8515625, + "learning_rate": 0.00019672695860358923, + "loss": 4.3679, + "step": 48350 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 2.640625, + "learning_rate": 0.0001967200804235012, + "loss": 4.8819, + "step": 48400 + }, + { + "epoch": 0.41709710743801653, + "grad_norm": 1.8828125, + "learning_rate": 0.00019671319514436735, + "loss": 4.6303, + "step": 48450 + }, + { + "epoch": 0.4175275482093664, + "grad_norm": 2.0625, + "learning_rate": 0.00019670630276669306, + "loss": 4.6477, + "step": 48500 + }, + { + "epoch": 0.41795798898071623, + "grad_norm": 2.4375, + "learning_rate": 0.0001966994032909842, + "loss": 4.6415, + "step": 48550 + }, + { + "epoch": 0.41838842975206614, + "grad_norm": 3.625, + "learning_rate": 0.00019669249671774722, + "loss": 5.1527, + "step": 48600 + }, + { + "epoch": 0.418818870523416, + "grad_norm": 2.875, + "learning_rate": 0.000196685583047489, + "loss": 4.9671, + "step": 48650 + }, + { + "epoch": 0.41924931129476584, + "grad_norm": 1.453125, + "learning_rate": 0.00019667866228071702, + "loss": 4.906, + "step": 48700 + }, + { + "epoch": 0.4196797520661157, + "grad_norm": 0.73828125, + "learning_rate": 0.00019667173441793927, + "loss": 4.6206, + "step": 48750 + }, + { + "epoch": 0.4201101928374656, + "grad_norm": 2.078125, + "learning_rate": 0.00019666479945966417, + "loss": 4.8596, + "step": 48800 + }, + { + "epoch": 0.42054063360881544, + "grad_norm": 2.1875, + "learning_rate": 0.00019665785740640077, + "loss": 4.8986, + "step": 48850 + }, + { + "epoch": 0.4209710743801653, + "grad_norm": 1.3046875, + "learning_rate": 0.0001966509082586586, + "loss": 4.6868, + "step": 48900 + }, + { + "epoch": 0.42140151515151514, + "grad_norm": 2.28125, + "learning_rate": 0.00019664395201694773, + "loss": 5.3235, + "step": 48950 + }, + { + "epoch": 0.421831955922865, + "grad_norm": 2.6875, + "learning_rate": 0.0001966369886817787, + "loss": 4.627, + "step": 49000 + }, + { + "epoch": 0.4222623966942149, + "grad_norm": 0.890625, + "learning_rate": 0.00019663001825366265, + "loss": 4.6956, + "step": 49050 + }, + { + "epoch": 0.42269283746556474, + "grad_norm": 2.53125, + "learning_rate": 0.00019662304073311116, + "loss": 4.9834, + "step": 49100 + }, + { + "epoch": 0.4231232782369146, + "grad_norm": 1.7265625, + "learning_rate": 0.00019661605612063634, + "loss": 5.1055, + "step": 49150 + }, + { + "epoch": 0.42355371900826444, + "grad_norm": 3.203125, + "learning_rate": 0.0001966090644167509, + "loss": 4.7372, + "step": 49200 + }, + { + "epoch": 0.42398415977961434, + "grad_norm": 1.703125, + "learning_rate": 0.000196602065621968, + "loss": 4.866, + "step": 49250 + }, + { + "epoch": 0.4244146005509642, + "grad_norm": 0.8671875, + "learning_rate": 0.0001965950597368013, + "loss": 4.77, + "step": 49300 + }, + { + "epoch": 0.42484504132231404, + "grad_norm": 1.53125, + "learning_rate": 0.00019658804676176504, + "loss": 5.0861, + "step": 49350 + }, + { + "epoch": 0.4252754820936639, + "grad_norm": 1.265625, + "learning_rate": 0.000196581026697374, + "loss": 4.5885, + "step": 49400 + }, + { + "epoch": 0.4257059228650138, + "grad_norm": 1.96875, + "learning_rate": 0.00019657399954414335, + "loss": 5.1443, + "step": 49450 + }, + { + "epoch": 0.42613636363636365, + "grad_norm": 1.5078125, + "learning_rate": 0.00019656696530258896, + "loss": 4.2604, + "step": 49500 + }, + { + "epoch": 0.4265668044077135, + "grad_norm": 2.375, + "learning_rate": 0.00019655992397322707, + "loss": 4.8881, + "step": 49550 + }, + { + "epoch": 0.42699724517906334, + "grad_norm": 2.21875, + "learning_rate": 0.00019655287555657452, + "loss": 4.8634, + "step": 49600 + }, + { + "epoch": 0.42742768595041325, + "grad_norm": 1.5234375, + "learning_rate": 0.00019654582005314863, + "loss": 5.0439, + "step": 49650 + }, + { + "epoch": 0.4278581267217631, + "grad_norm": 3.015625, + "learning_rate": 0.00019653875746346728, + "loss": 5.151, + "step": 49700 + }, + { + "epoch": 0.42828856749311295, + "grad_norm": 1.2109375, + "learning_rate": 0.00019653168778804885, + "loss": 4.8142, + "step": 49750 + }, + { + "epoch": 0.4287190082644628, + "grad_norm": 1.171875, + "learning_rate": 0.0001965246110274122, + "loss": 5.2469, + "step": 49800 + }, + { + "epoch": 0.42914944903581265, + "grad_norm": 1.640625, + "learning_rate": 0.0001965175271820768, + "loss": 4.9468, + "step": 49850 + }, + { + "epoch": 0.42957988980716255, + "grad_norm": 3.828125, + "learning_rate": 0.00019651043625256259, + "loss": 5.138, + "step": 49900 + }, + { + "epoch": 0.4300103305785124, + "grad_norm": 1.5078125, + "learning_rate": 0.00019650333823938997, + "loss": 4.9163, + "step": 49950 + }, + { + "epoch": 0.43044077134986225, + "grad_norm": 2.078125, + "learning_rate": 0.00019649623314307995, + "loss": 4.7608, + "step": 50000 + }, + { + "epoch": 0.4308712121212121, + "grad_norm": 2.46875, + "learning_rate": 0.00019648912096415408, + "loss": 5.1712, + "step": 50050 + }, + { + "epoch": 0.431301652892562, + "grad_norm": 2.171875, + "learning_rate": 0.00019648200170313432, + "loss": 5.0902, + "step": 50100 + }, + { + "epoch": 0.43173209366391185, + "grad_norm": 3.484375, + "learning_rate": 0.0001964748753605432, + "loss": 5.0191, + "step": 50150 + }, + { + "epoch": 0.4321625344352617, + "grad_norm": 2.40625, + "learning_rate": 0.0001964677419369038, + "loss": 4.9103, + "step": 50200 + }, + { + "epoch": 0.43259297520661155, + "grad_norm": 3.03125, + "learning_rate": 0.0001964606014327397, + "loss": 4.9049, + "step": 50250 + }, + { + "epoch": 0.43302341597796146, + "grad_norm": 3.359375, + "learning_rate": 0.000196453453848575, + "loss": 4.5552, + "step": 50300 + }, + { + "epoch": 0.4334538567493113, + "grad_norm": 2.734375, + "learning_rate": 0.0001964462991849343, + "loss": 4.4175, + "step": 50350 + }, + { + "epoch": 0.43388429752066116, + "grad_norm": 2.046875, + "learning_rate": 0.0001964391374423428, + "loss": 5.0406, + "step": 50400 + }, + { + "epoch": 0.434314738292011, + "grad_norm": 1.71875, + "learning_rate": 0.00019643196862132605, + "loss": 4.4845, + "step": 50450 + }, + { + "epoch": 0.4347451790633609, + "grad_norm": 3.28125, + "learning_rate": 0.0001964247927224103, + "loss": 4.7683, + "step": 50500 + }, + { + "epoch": 0.43517561983471076, + "grad_norm": 1.640625, + "learning_rate": 0.00019641760974612222, + "loss": 4.9496, + "step": 50550 + }, + { + "epoch": 0.4356060606060606, + "grad_norm": 2.640625, + "learning_rate": 0.00019641041969298905, + "loss": 4.7391, + "step": 50600 + }, + { + "epoch": 0.43603650137741046, + "grad_norm": 2.78125, + "learning_rate": 0.0001964032225635385, + "loss": 5.0805, + "step": 50650 + }, + { + "epoch": 0.4364669421487603, + "grad_norm": 2.46875, + "learning_rate": 0.00019639601835829882, + "loss": 4.4855, + "step": 50700 + }, + { + "epoch": 0.4368973829201102, + "grad_norm": 2.890625, + "learning_rate": 0.00019638880707779878, + "loss": 5.1064, + "step": 50750 + }, + { + "epoch": 0.43732782369146006, + "grad_norm": 1.9296875, + "learning_rate": 0.00019638158872256774, + "loss": 4.5543, + "step": 50800 + }, + { + "epoch": 0.4377582644628099, + "grad_norm": 2.234375, + "learning_rate": 0.0001963743632931354, + "loss": 4.9067, + "step": 50850 + }, + { + "epoch": 0.43818870523415976, + "grad_norm": 1.7265625, + "learning_rate": 0.0001963671307900322, + "loss": 4.9135, + "step": 50900 + }, + { + "epoch": 0.43861914600550966, + "grad_norm": 0.734375, + "learning_rate": 0.0001963598912137889, + "loss": 5.1316, + "step": 50950 + }, + { + "epoch": 0.4390495867768595, + "grad_norm": 2.359375, + "learning_rate": 0.00019635264456493694, + "loss": 5.1975, + "step": 51000 + }, + { + "epoch": 0.4390495867768595, + "eval_loss": 5.461007595062256, + "eval_runtime": 21.9788, + "eval_samples_per_second": 29.119, + "eval_steps_per_second": 14.559, + "eval_tts_loss": 6.650621942901377, + "step": 51000 + }, + { + "epoch": 0.43948002754820936, + "grad_norm": 2.109375, + "learning_rate": 0.00019634539084400815, + "loss": 4.9518, + "step": 51050 + }, + { + "epoch": 0.4399104683195592, + "grad_norm": 1.7265625, + "learning_rate": 0.00019633813005153497, + "loss": 4.6406, + "step": 51100 + }, + { + "epoch": 0.4403409090909091, + "grad_norm": 2.859375, + "learning_rate": 0.00019633086218805032, + "loss": 4.8793, + "step": 51150 + }, + { + "epoch": 0.44077134986225897, + "grad_norm": 1.296875, + "learning_rate": 0.00019632358725408765, + "loss": 5.1101, + "step": 51200 + }, + { + "epoch": 0.4412017906336088, + "grad_norm": 3.328125, + "learning_rate": 0.00019631630525018094, + "loss": 5.1925, + "step": 51250 + }, + { + "epoch": 0.44163223140495866, + "grad_norm": 2.78125, + "learning_rate": 0.00019630901617686462, + "loss": 5.0485, + "step": 51300 + }, + { + "epoch": 0.4420626721763085, + "grad_norm": 2.6875, + "learning_rate": 0.00019630172003467375, + "loss": 4.6684, + "step": 51350 + }, + { + "epoch": 0.4424931129476584, + "grad_norm": 2.3125, + "learning_rate": 0.00019629441682414382, + "loss": 5.0334, + "step": 51400 + }, + { + "epoch": 0.44292355371900827, + "grad_norm": 2.375, + "learning_rate": 0.0001962871065458109, + "loss": 4.949, + "step": 51450 + }, + { + "epoch": 0.4433539944903581, + "grad_norm": 2.484375, + "learning_rate": 0.00019627978920021147, + "loss": 4.9946, + "step": 51500 + }, + { + "epoch": 0.44378443526170797, + "grad_norm": 2.3125, + "learning_rate": 0.00019627246478788274, + "loss": 4.7547, + "step": 51550 + }, + { + "epoch": 0.44421487603305787, + "grad_norm": 2.28125, + "learning_rate": 0.00019626513330936218, + "loss": 4.5459, + "step": 51600 + }, + { + "epoch": 0.4446453168044077, + "grad_norm": 1.9453125, + "learning_rate": 0.00019625779476518796, + "loss": 5.0765, + "step": 51650 + }, + { + "epoch": 0.44507575757575757, + "grad_norm": 2.1875, + "learning_rate": 0.0001962504491558987, + "loss": 4.6943, + "step": 51700 + }, + { + "epoch": 0.4455061983471074, + "grad_norm": 1.703125, + "learning_rate": 0.0001962430964820336, + "loss": 4.7206, + "step": 51750 + }, + { + "epoch": 0.4459366391184573, + "grad_norm": 2.390625, + "learning_rate": 0.00019623573674413223, + "loss": 4.7757, + "step": 51800 + }, + { + "epoch": 0.4463670798898072, + "grad_norm": 1.734375, + "learning_rate": 0.00019622836994273487, + "loss": 4.5886, + "step": 51850 + }, + { + "epoch": 0.446797520661157, + "grad_norm": 3.296875, + "learning_rate": 0.00019622099607838223, + "loss": 4.6718, + "step": 51900 + }, + { + "epoch": 0.44722796143250687, + "grad_norm": 1.421875, + "learning_rate": 0.00019621361515161548, + "loss": 4.576, + "step": 51950 + }, + { + "epoch": 0.4476584022038568, + "grad_norm": 1.296875, + "learning_rate": 0.00019620622716297638, + "loss": 4.7298, + "step": 52000 + }, + { + "epoch": 0.4480888429752066, + "grad_norm": 2.484375, + "learning_rate": 0.00019619883211300718, + "loss": 4.7639, + "step": 52050 + }, + { + "epoch": 0.4485192837465565, + "grad_norm": 2.390625, + "learning_rate": 0.0001961914300022507, + "loss": 4.7986, + "step": 52100 + }, + { + "epoch": 0.4489497245179063, + "grad_norm": 1.6484375, + "learning_rate": 0.00019618402083125024, + "loss": 4.9084, + "step": 52150 + }, + { + "epoch": 0.4493801652892562, + "grad_norm": 1.75, + "learning_rate": 0.00019617660460054956, + "loss": 5.2089, + "step": 52200 + }, + { + "epoch": 0.4498106060606061, + "grad_norm": 1.9609375, + "learning_rate": 0.00019616918131069305, + "loss": 5.0402, + "step": 52250 + }, + { + "epoch": 0.4502410468319559, + "grad_norm": 3.515625, + "learning_rate": 0.00019616175096222554, + "loss": 5.2387, + "step": 52300 + }, + { + "epoch": 0.4506714876033058, + "grad_norm": 2.484375, + "learning_rate": 0.0001961543135556924, + "loss": 5.2524, + "step": 52350 + }, + { + "epoch": 0.4511019283746556, + "grad_norm": 2.75, + "learning_rate": 0.00019614686909163952, + "loss": 4.9647, + "step": 52400 + }, + { + "epoch": 0.45153236914600553, + "grad_norm": 1.4765625, + "learning_rate": 0.00019613941757061335, + "loss": 4.9352, + "step": 52450 + }, + { + "epoch": 0.4519628099173554, + "grad_norm": 2.25, + "learning_rate": 0.00019613195899316076, + "loss": 4.6792, + "step": 52500 + }, + { + "epoch": 0.45239325068870523, + "grad_norm": 2.359375, + "learning_rate": 0.00019612449335982922, + "loss": 4.7962, + "step": 52550 + }, + { + "epoch": 0.4528236914600551, + "grad_norm": 2.375, + "learning_rate": 0.00019611702067116668, + "loss": 4.75, + "step": 52600 + }, + { + "epoch": 0.453254132231405, + "grad_norm": 2.3125, + "learning_rate": 0.0001961095409277216, + "loss": 4.6786, + "step": 52650 + }, + { + "epoch": 0.45368457300275483, + "grad_norm": 1.25, + "learning_rate": 0.00019610205413004303, + "loss": 4.2868, + "step": 52700 + }, + { + "epoch": 0.4541150137741047, + "grad_norm": 2.203125, + "learning_rate": 0.00019609456027868045, + "loss": 4.8512, + "step": 52750 + }, + { + "epoch": 0.45454545454545453, + "grad_norm": 0.9140625, + "learning_rate": 0.00019608705937418388, + "loss": 5.1612, + "step": 52800 + }, + { + "epoch": 0.45497589531680444, + "grad_norm": 0.70703125, + "learning_rate": 0.00019607955141710392, + "loss": 4.6844, + "step": 52850 + }, + { + "epoch": 0.4554063360881543, + "grad_norm": 1.7890625, + "learning_rate": 0.0001960720364079916, + "loss": 4.8468, + "step": 52900 + }, + { + "epoch": 0.45583677685950413, + "grad_norm": 0.462890625, + "learning_rate": 0.00019606451434739847, + "loss": 5.0051, + "step": 52950 + }, + { + "epoch": 0.456267217630854, + "grad_norm": 1.328125, + "learning_rate": 0.00019605698523587672, + "loss": 4.7107, + "step": 53000 + }, + { + "epoch": 0.45669765840220383, + "grad_norm": 2.484375, + "learning_rate": 0.0001960494490739789, + "loss": 4.8353, + "step": 53050 + }, + { + "epoch": 0.45712809917355374, + "grad_norm": 2.359375, + "learning_rate": 0.00019604190586225819, + "loss": 4.9876, + "step": 53100 + }, + { + "epoch": 0.4575585399449036, + "grad_norm": 2.59375, + "learning_rate": 0.00019603435560126823, + "loss": 4.9235, + "step": 53150 + }, + { + "epoch": 0.45798898071625344, + "grad_norm": 1.375, + "learning_rate": 0.00019602679829156322, + "loss": 4.6953, + "step": 53200 + }, + { + "epoch": 0.4584194214876033, + "grad_norm": 2.1875, + "learning_rate": 0.00019601923393369777, + "loss": 5.0293, + "step": 53250 + }, + { + "epoch": 0.4588498622589532, + "grad_norm": 2.703125, + "learning_rate": 0.00019601166252822716, + "loss": 4.8473, + "step": 53300 + }, + { + "epoch": 0.45928030303030304, + "grad_norm": 2.609375, + "learning_rate": 0.0001960040840757071, + "loss": 4.9234, + "step": 53350 + }, + { + "epoch": 0.4597107438016529, + "grad_norm": 1.59375, + "learning_rate": 0.00019599649857669383, + "loss": 4.9429, + "step": 53400 + }, + { + "epoch": 0.46014118457300274, + "grad_norm": 1.8671875, + "learning_rate": 0.0001959889060317441, + "loss": 4.5721, + "step": 53450 + }, + { + "epoch": 0.46057162534435264, + "grad_norm": 1.9765625, + "learning_rate": 0.00019598130644141518, + "loss": 4.5661, + "step": 53500 + }, + { + "epoch": 0.4610020661157025, + "grad_norm": 2.015625, + "learning_rate": 0.0001959736998062649, + "loss": 4.886, + "step": 53550 + }, + { + "epoch": 0.46143250688705234, + "grad_norm": 2.375, + "learning_rate": 0.00019596608612685153, + "loss": 4.9356, + "step": 53600 + }, + { + "epoch": 0.4618629476584022, + "grad_norm": 1.5859375, + "learning_rate": 0.0001959584654037339, + "loss": 4.7174, + "step": 53650 + }, + { + "epoch": 0.46229338842975204, + "grad_norm": 4.03125, + "learning_rate": 0.0001959508376374714, + "loss": 4.8117, + "step": 53700 + }, + { + "epoch": 0.46272382920110194, + "grad_norm": 2.5, + "learning_rate": 0.00019594320282862384, + "loss": 4.8152, + "step": 53750 + }, + { + "epoch": 0.4631542699724518, + "grad_norm": 2.109375, + "learning_rate": 0.00019593556097775164, + "loss": 4.7632, + "step": 53800 + }, + { + "epoch": 0.46358471074380164, + "grad_norm": 2.703125, + "learning_rate": 0.00019592791208541567, + "loss": 4.5068, + "step": 53850 + }, + { + "epoch": 0.4640151515151515, + "grad_norm": 2.015625, + "learning_rate": 0.00019592025615217733, + "loss": 4.7496, + "step": 53900 + }, + { + "epoch": 0.4644455922865014, + "grad_norm": 1.96875, + "learning_rate": 0.00019591259317859857, + "loss": 4.8606, + "step": 53950 + }, + { + "epoch": 0.46487603305785125, + "grad_norm": 3.46875, + "learning_rate": 0.00019590492316524184, + "loss": 4.969, + "step": 54000 + }, + { + "epoch": 0.46487603305785125, + "eval_loss": 5.447500705718994, + "eval_runtime": 21.7174, + "eval_samples_per_second": 29.469, + "eval_steps_per_second": 14.735, + "eval_tts_loss": 6.713471357580835, + "step": 54000 + }, + { + "epoch": 0.4653064738292011, + "grad_norm": 1.875, + "learning_rate": 0.00019589724611267012, + "loss": 4.9705, + "step": 54050 + }, + { + "epoch": 0.46573691460055094, + "grad_norm": 0.671875, + "learning_rate": 0.0001958895620214468, + "loss": 4.7252, + "step": 54100 + }, + { + "epoch": 0.46616735537190085, + "grad_norm": 2.5, + "learning_rate": 0.00019588187089213598, + "loss": 4.8425, + "step": 54150 + }, + { + "epoch": 0.4665977961432507, + "grad_norm": 1.8046875, + "learning_rate": 0.00019587417272530212, + "loss": 4.8882, + "step": 54200 + }, + { + "epoch": 0.46702823691460055, + "grad_norm": 3.78125, + "learning_rate": 0.00019586646752151027, + "loss": 4.8315, + "step": 54250 + }, + { + "epoch": 0.4674586776859504, + "grad_norm": 1.109375, + "learning_rate": 0.000195858755281326, + "loss": 4.743, + "step": 54300 + }, + { + "epoch": 0.4678891184573003, + "grad_norm": 1.3046875, + "learning_rate": 0.00019585103600531527, + "loss": 4.571, + "step": 54350 + }, + { + "epoch": 0.46831955922865015, + "grad_norm": 3.328125, + "learning_rate": 0.00019584330969404476, + "loss": 5.2712, + "step": 54400 + }, + { + "epoch": 0.46875, + "grad_norm": 1.875, + "learning_rate": 0.00019583557634808154, + "loss": 4.2545, + "step": 54450 + }, + { + "epoch": 0.46918044077134985, + "grad_norm": 2.890625, + "learning_rate": 0.00019582783596799321, + "loss": 4.5995, + "step": 54500 + }, + { + "epoch": 0.4696108815426997, + "grad_norm": 1.3125, + "learning_rate": 0.0001958200885543479, + "loss": 4.4244, + "step": 54550 + }, + { + "epoch": 0.4700413223140496, + "grad_norm": 2.390625, + "learning_rate": 0.00019581233410771422, + "loss": 4.6487, + "step": 54600 + }, + { + "epoch": 0.47047176308539945, + "grad_norm": 3.3125, + "learning_rate": 0.0001958045726286614, + "loss": 4.7865, + "step": 54650 + }, + { + "epoch": 0.4709022038567493, + "grad_norm": 1.84375, + "learning_rate": 0.00019579680411775904, + "loss": 4.8535, + "step": 54700 + }, + { + "epoch": 0.47133264462809915, + "grad_norm": 2.234375, + "learning_rate": 0.0001957890285755774, + "loss": 4.7888, + "step": 54750 + }, + { + "epoch": 0.47176308539944906, + "grad_norm": 2.546875, + "learning_rate": 0.00019578124600268715, + "loss": 4.4223, + "step": 54800 + }, + { + "epoch": 0.4721935261707989, + "grad_norm": 3.390625, + "learning_rate": 0.00019577345639965952, + "loss": 4.9533, + "step": 54850 + }, + { + "epoch": 0.47262396694214875, + "grad_norm": 2.03125, + "learning_rate": 0.00019576565976706624, + "loss": 5.1331, + "step": 54900 + }, + { + "epoch": 0.4730544077134986, + "grad_norm": 0.953125, + "learning_rate": 0.00019575785610547957, + "loss": 5.0388, + "step": 54950 + }, + { + "epoch": 0.4734848484848485, + "grad_norm": 1.953125, + "learning_rate": 0.00019575004541547234, + "loss": 4.9406, + "step": 55000 + }, + { + "epoch": 0.47391528925619836, + "grad_norm": 2.78125, + "learning_rate": 0.00019574222769761776, + "loss": 4.7529, + "step": 55050 + }, + { + "epoch": 0.4743457300275482, + "grad_norm": 2.296875, + "learning_rate": 0.0001957344029524897, + "loss": 4.5646, + "step": 55100 + }, + { + "epoch": 0.47477617079889806, + "grad_norm": 2.09375, + "learning_rate": 0.00019572657118066235, + "loss": 4.8436, + "step": 55150 + }, + { + "epoch": 0.47520661157024796, + "grad_norm": 1.46875, + "learning_rate": 0.0001957187323827107, + "loss": 4.5989, + "step": 55200 + }, + { + "epoch": 0.4756370523415978, + "grad_norm": 2.390625, + "learning_rate": 0.00019571088655921003, + "loss": 4.8347, + "step": 55250 + }, + { + "epoch": 0.47606749311294766, + "grad_norm": 1.9765625, + "learning_rate": 0.00019570303371073622, + "loss": 4.9279, + "step": 55300 + }, + { + "epoch": 0.4764979338842975, + "grad_norm": 2.796875, + "learning_rate": 0.00019569517383786561, + "loss": 4.7051, + "step": 55350 + }, + { + "epoch": 0.47692837465564736, + "grad_norm": 3.78125, + "learning_rate": 0.0001956873069411752, + "loss": 5.0896, + "step": 55400 + }, + { + "epoch": 0.47735881542699726, + "grad_norm": 3.359375, + "learning_rate": 0.00019567943302124227, + "loss": 4.6047, + "step": 55450 + }, + { + "epoch": 0.4777892561983471, + "grad_norm": 0.7421875, + "learning_rate": 0.00019567155207864486, + "loss": 4.6413, + "step": 55500 + }, + { + "epoch": 0.47821969696969696, + "grad_norm": 2.328125, + "learning_rate": 0.00019566366411396136, + "loss": 4.5122, + "step": 55550 + }, + { + "epoch": 0.4786501377410468, + "grad_norm": 2.015625, + "learning_rate": 0.00019565576912777073, + "loss": 4.851, + "step": 55600 + }, + { + "epoch": 0.4790805785123967, + "grad_norm": 2.859375, + "learning_rate": 0.00019564786712065243, + "loss": 4.8688, + "step": 55650 + }, + { + "epoch": 0.47951101928374656, + "grad_norm": 2.671875, + "learning_rate": 0.00019563995809318652, + "loss": 4.8937, + "step": 55700 + }, + { + "epoch": 0.4799414600550964, + "grad_norm": 3.078125, + "learning_rate": 0.00019563204204595344, + "loss": 4.8586, + "step": 55750 + }, + { + "epoch": 0.48037190082644626, + "grad_norm": 1.6796875, + "learning_rate": 0.00019562411897953424, + "loss": 4.4797, + "step": 55800 + }, + { + "epoch": 0.48080234159779617, + "grad_norm": 2.28125, + "learning_rate": 0.00019561618889451046, + "loss": 4.846, + "step": 55850 + }, + { + "epoch": 0.481232782369146, + "grad_norm": 1.34375, + "learning_rate": 0.0001956082517914641, + "loss": 4.6205, + "step": 55900 + }, + { + "epoch": 0.48166322314049587, + "grad_norm": 3.5, + "learning_rate": 0.0001956003076709778, + "loss": 4.8843, + "step": 55950 + }, + { + "epoch": 0.4820936639118457, + "grad_norm": 1.65625, + "learning_rate": 0.00019559235653363456, + "loss": 5.009, + "step": 56000 + }, + { + "epoch": 0.48252410468319556, + "grad_norm": 1.15625, + "learning_rate": 0.00019558439838001808, + "loss": 4.5382, + "step": 56050 + }, + { + "epoch": 0.48295454545454547, + "grad_norm": 2.203125, + "learning_rate": 0.0001955764332107124, + "loss": 4.1105, + "step": 56100 + }, + { + "epoch": 0.4833849862258953, + "grad_norm": 3.5625, + "learning_rate": 0.00019556846102630212, + "loss": 4.8291, + "step": 56150 + }, + { + "epoch": 0.48381542699724517, + "grad_norm": 1.9296875, + "learning_rate": 0.00019556048182737247, + "loss": 4.5713, + "step": 56200 + }, + { + "epoch": 0.484245867768595, + "grad_norm": 1.7890625, + "learning_rate": 0.00019555249561450903, + "loss": 5.1687, + "step": 56250 + }, + { + "epoch": 0.4846763085399449, + "grad_norm": 0.984375, + "learning_rate": 0.00019554450238829803, + "loss": 4.69, + "step": 56300 + }, + { + "epoch": 0.48510674931129477, + "grad_norm": 2.25, + "learning_rate": 0.0001955365021493261, + "loss": 4.8855, + "step": 56350 + }, + { + "epoch": 0.4855371900826446, + "grad_norm": 3.375, + "learning_rate": 0.00019552849489818045, + "loss": 5.0094, + "step": 56400 + }, + { + "epoch": 0.48596763085399447, + "grad_norm": 2.875, + "learning_rate": 0.00019552048063544884, + "loss": 4.6932, + "step": 56450 + }, + { + "epoch": 0.4863980716253444, + "grad_norm": 2.5625, + "learning_rate": 0.00019551245936171943, + "loss": 4.9196, + "step": 56500 + }, + { + "epoch": 0.4868285123966942, + "grad_norm": 1.8984375, + "learning_rate": 0.00019550443107758104, + "loss": 5.2379, + "step": 56550 + }, + { + "epoch": 0.4872589531680441, + "grad_norm": 2.765625, + "learning_rate": 0.0001954963957836229, + "loss": 4.4327, + "step": 56600 + }, + { + "epoch": 0.4876893939393939, + "grad_norm": 2.796875, + "learning_rate": 0.00019548835348043474, + "loss": 5.1293, + "step": 56650 + }, + { + "epoch": 0.4881198347107438, + "grad_norm": 2.15625, + "learning_rate": 0.0001954803041686069, + "loss": 4.3975, + "step": 56700 + }, + { + "epoch": 0.4885502754820937, + "grad_norm": 2.078125, + "learning_rate": 0.00019547224784873015, + "loss": 4.7219, + "step": 56750 + }, + { + "epoch": 0.4889807162534435, + "grad_norm": 2.0625, + "learning_rate": 0.00019546418452139584, + "loss": 5.1161, + "step": 56800 + }, + { + "epoch": 0.4894111570247934, + "grad_norm": 1.828125, + "learning_rate": 0.00019545611418719577, + "loss": 5.2622, + "step": 56850 + }, + { + "epoch": 0.4898415977961432, + "grad_norm": 1.4375, + "learning_rate": 0.0001954480368467223, + "loss": 5.0309, + "step": 56900 + }, + { + "epoch": 0.49027203856749313, + "grad_norm": 1.953125, + "learning_rate": 0.0001954399525005683, + "loss": 5.0479, + "step": 56950 + }, + { + "epoch": 0.490702479338843, + "grad_norm": 2.625, + "learning_rate": 0.0001954318611493271, + "loss": 4.9872, + "step": 57000 + }, + { + "epoch": 0.490702479338843, + "eval_loss": 5.437960147857666, + "eval_runtime": 21.7735, + "eval_samples_per_second": 29.394, + "eval_steps_per_second": 14.697, + "eval_tts_loss": 6.678905297852088, + "step": 57000 + }, + { + "epoch": 0.4911329201101928, + "grad_norm": 2.640625, + "learning_rate": 0.00019542376279359266, + "loss": 5.0722, + "step": 57050 + }, + { + "epoch": 0.4915633608815427, + "grad_norm": 1.8046875, + "learning_rate": 0.00019541565743395932, + "loss": 4.6575, + "step": 57100 + }, + { + "epoch": 0.4919938016528926, + "grad_norm": 2.4375, + "learning_rate": 0.00019540754507102202, + "loss": 5.1254, + "step": 57150 + }, + { + "epoch": 0.49242424242424243, + "grad_norm": 1.2265625, + "learning_rate": 0.00019539942570537616, + "loss": 4.8798, + "step": 57200 + }, + { + "epoch": 0.4928546831955923, + "grad_norm": 3.265625, + "learning_rate": 0.00019539129933761776, + "loss": 5.0643, + "step": 57250 + }, + { + "epoch": 0.49328512396694213, + "grad_norm": 2.6875, + "learning_rate": 0.00019538316596834322, + "loss": 4.7154, + "step": 57300 + }, + { + "epoch": 0.49371556473829203, + "grad_norm": 2.0625, + "learning_rate": 0.00019537502559814946, + "loss": 4.5703, + "step": 57350 + }, + { + "epoch": 0.4941460055096419, + "grad_norm": 2.96875, + "learning_rate": 0.00019536687822763407, + "loss": 5.1492, + "step": 57400 + }, + { + "epoch": 0.49457644628099173, + "grad_norm": 2.375, + "learning_rate": 0.00019535872385739503, + "loss": 4.771, + "step": 57450 + }, + { + "epoch": 0.4950068870523416, + "grad_norm": 2.59375, + "learning_rate": 0.0001953505624880308, + "loss": 4.6707, + "step": 57500 + }, + { + "epoch": 0.4954373278236915, + "grad_norm": 0.9140625, + "learning_rate": 0.00019534239412014043, + "loss": 5.1132, + "step": 57550 + }, + { + "epoch": 0.49586776859504134, + "grad_norm": 2.734375, + "learning_rate": 0.00019533421875432346, + "loss": 4.7795, + "step": 57600 + }, + { + "epoch": 0.4962982093663912, + "grad_norm": 2.46875, + "learning_rate": 0.00019532603639117998, + "loss": 5.437, + "step": 57650 + }, + { + "epoch": 0.49672865013774103, + "grad_norm": 2.140625, + "learning_rate": 0.00019531784703131054, + "loss": 4.8702, + "step": 57700 + }, + { + "epoch": 0.4971590909090909, + "grad_norm": 2.34375, + "learning_rate": 0.00019530965067531618, + "loss": 4.9499, + "step": 57750 + }, + { + "epoch": 0.4975895316804408, + "grad_norm": 1.2421875, + "learning_rate": 0.0001953014473237985, + "loss": 4.9733, + "step": 57800 + }, + { + "epoch": 0.49801997245179064, + "grad_norm": 2.09375, + "learning_rate": 0.00019529323697735967, + "loss": 4.667, + "step": 57850 + }, + { + "epoch": 0.4984504132231405, + "grad_norm": 2.4375, + "learning_rate": 0.00019528501963660225, + "loss": 4.7631, + "step": 57900 + }, + { + "epoch": 0.49888085399449034, + "grad_norm": 3.09375, + "learning_rate": 0.00019527679530212942, + "loss": 4.8465, + "step": 57950 + }, + { + "epoch": 0.49931129476584024, + "grad_norm": 1.9453125, + "learning_rate": 0.00019526856397454481, + "loss": 4.6451, + "step": 58000 + }, + { + "epoch": 0.4997417355371901, + "grad_norm": 3.515625, + "learning_rate": 0.00019526032565445257, + "loss": 4.7026, + "step": 58050 + }, + { + "epoch": 0.5001721763085399, + "grad_norm": 1.8359375, + "learning_rate": 0.00019525208034245738, + "loss": 4.8911, + "step": 58100 + }, + { + "epoch": 0.5006026170798898, + "grad_norm": 1.953125, + "learning_rate": 0.00019524382803916444, + "loss": 4.4846, + "step": 58150 + }, + { + "epoch": 0.5010330578512396, + "grad_norm": 2.796875, + "learning_rate": 0.00019523556874517946, + "loss": 4.8332, + "step": 58200 + }, + { + "epoch": 0.5014634986225895, + "grad_norm": 1.921875, + "learning_rate": 0.00019522730246110861, + "loss": 4.3156, + "step": 58250 + }, + { + "epoch": 0.5018939393939394, + "grad_norm": 2.953125, + "learning_rate": 0.00019521902918755865, + "loss": 5.1603, + "step": 58300 + }, + { + "epoch": 0.5023243801652892, + "grad_norm": 2.875, + "learning_rate": 0.00019521074892513683, + "loss": 5.1468, + "step": 58350 + }, + { + "epoch": 0.5027548209366391, + "grad_norm": 1.609375, + "learning_rate": 0.0001952024616744509, + "loss": 4.638, + "step": 58400 + }, + { + "epoch": 0.5031852617079889, + "grad_norm": 1.375, + "learning_rate": 0.00019519416743610907, + "loss": 4.8574, + "step": 58450 + }, + { + "epoch": 0.5036157024793388, + "grad_norm": 2.5, + "learning_rate": 0.0001951858662107202, + "loss": 4.7201, + "step": 58500 + }, + { + "epoch": 0.5040461432506887, + "grad_norm": 1.2890625, + "learning_rate": 0.00019517755799889356, + "loss": 4.8031, + "step": 58550 + }, + { + "epoch": 0.5044765840220385, + "grad_norm": 2.484375, + "learning_rate": 0.00019516924280123891, + "loss": 5.17, + "step": 58600 + }, + { + "epoch": 0.5049070247933884, + "grad_norm": 2.203125, + "learning_rate": 0.00019516092061836664, + "loss": 5.0124, + "step": 58650 + }, + { + "epoch": 0.5053374655647382, + "grad_norm": 2.078125, + "learning_rate": 0.0001951525914508875, + "loss": 4.7327, + "step": 58700 + }, + { + "epoch": 0.5057679063360881, + "grad_norm": 1.4453125, + "learning_rate": 0.0001951442552994129, + "loss": 4.8515, + "step": 58750 + }, + { + "epoch": 0.506198347107438, + "grad_norm": 1.0390625, + "learning_rate": 0.00019513591216455467, + "loss": 4.973, + "step": 58800 + }, + { + "epoch": 0.5066287878787878, + "grad_norm": 2.78125, + "learning_rate": 0.00019512756204692516, + "loss": 4.7377, + "step": 58850 + }, + { + "epoch": 0.5070592286501377, + "grad_norm": 2.21875, + "learning_rate": 0.00019511920494713728, + "loss": 4.7408, + "step": 58900 + }, + { + "epoch": 0.5074896694214877, + "grad_norm": 2.3125, + "learning_rate": 0.0001951108408658044, + "loss": 5.0065, + "step": 58950 + }, + { + "epoch": 0.5079201101928374, + "grad_norm": 1.390625, + "learning_rate": 0.00019510246980354045, + "loss": 4.4652, + "step": 59000 + }, + { + "epoch": 0.5083505509641874, + "grad_norm": 2.21875, + "learning_rate": 0.00019509409176095982, + "loss": 4.676, + "step": 59050 + }, + { + "epoch": 0.5087809917355371, + "grad_norm": 2.671875, + "learning_rate": 0.0001950857067386775, + "loss": 4.7497, + "step": 59100 + }, + { + "epoch": 0.509211432506887, + "grad_norm": 2.53125, + "learning_rate": 0.00019507731473730882, + "loss": 4.8371, + "step": 59150 + }, + { + "epoch": 0.509641873278237, + "grad_norm": 2.0625, + "learning_rate": 0.00019506891575746984, + "loss": 4.4019, + "step": 59200 + }, + { + "epoch": 0.5100723140495868, + "grad_norm": 2.046875, + "learning_rate": 0.00019506050979977698, + "loss": 4.7386, + "step": 59250 + }, + { + "epoch": 0.5105027548209367, + "grad_norm": 3.9375, + "learning_rate": 0.00019505209686484725, + "loss": 4.9919, + "step": 59300 + }, + { + "epoch": 0.5109331955922864, + "grad_norm": 2.15625, + "learning_rate": 0.00019504367695329807, + "loss": 4.4062, + "step": 59350 + }, + { + "epoch": 0.5113636363636364, + "grad_norm": 2.28125, + "learning_rate": 0.00019503525006574757, + "loss": 4.5798, + "step": 59400 + }, + { + "epoch": 0.5117940771349863, + "grad_norm": 2.953125, + "learning_rate": 0.0001950268162028141, + "loss": 4.8806, + "step": 59450 + }, + { + "epoch": 0.512224517906336, + "grad_norm": 2.5625, + "learning_rate": 0.00019501837536511685, + "loss": 4.6123, + "step": 59500 + }, + { + "epoch": 0.512654958677686, + "grad_norm": 1.296875, + "learning_rate": 0.00019500992755327524, + "loss": 4.9083, + "step": 59550 + }, + { + "epoch": 0.5130853994490359, + "grad_norm": 2.3125, + "learning_rate": 0.0001950014727679094, + "loss": 5.2236, + "step": 59600 + }, + { + "epoch": 0.5135158402203857, + "grad_norm": 1.546875, + "learning_rate": 0.00019499301100963984, + "loss": 4.8901, + "step": 59650 + }, + { + "epoch": 0.5139462809917356, + "grad_norm": 2.703125, + "learning_rate": 0.00019498454227908764, + "loss": 5.1904, + "step": 59700 + }, + { + "epoch": 0.5143767217630854, + "grad_norm": 2.28125, + "learning_rate": 0.0001949760665768744, + "loss": 5.1363, + "step": 59750 + }, + { + "epoch": 0.5148071625344353, + "grad_norm": 3.28125, + "learning_rate": 0.00019496758390362228, + "loss": 4.4957, + "step": 59800 + }, + { + "epoch": 0.5152376033057852, + "grad_norm": 2.0, + "learning_rate": 0.00019495909425995377, + "loss": 4.9785, + "step": 59850 + }, + { + "epoch": 0.515668044077135, + "grad_norm": 2.71875, + "learning_rate": 0.00019495059764649208, + "loss": 4.8777, + "step": 59900 + }, + { + "epoch": 0.5160984848484849, + "grad_norm": 2.90625, + "learning_rate": 0.0001949420940638608, + "loss": 4.6085, + "step": 59950 + }, + { + "epoch": 0.5165289256198347, + "grad_norm": 3.1875, + "learning_rate": 0.00019493358351268408, + "loss": 4.7669, + "step": 60000 + }, + { + "epoch": 0.5165289256198347, + "eval_loss": 5.428589820861816, + "eval_runtime": 22.0407, + "eval_samples_per_second": 29.037, + "eval_steps_per_second": 14.519, + "eval_tts_loss": 6.65994587151647, + "step": 60000 + }, + { + "epoch": 0.5169593663911846, + "grad_norm": 2.703125, + "learning_rate": 0.0001949250659935866, + "loss": 4.7544, + "step": 60050 + }, + { + "epoch": 0.5173898071625345, + "grad_norm": 2.375, + "learning_rate": 0.00019491654150719353, + "loss": 5.1902, + "step": 60100 + }, + { + "epoch": 0.5178202479338843, + "grad_norm": 2.40625, + "learning_rate": 0.0001949080100541305, + "loss": 4.8204, + "step": 60150 + }, + { + "epoch": 0.5182506887052342, + "grad_norm": 2.265625, + "learning_rate": 0.00019489947163502374, + "loss": 4.2814, + "step": 60200 + }, + { + "epoch": 0.5186811294765841, + "grad_norm": 3.65625, + "learning_rate": 0.00019489092625049996, + "loss": 4.8777, + "step": 60250 + }, + { + "epoch": 0.5191115702479339, + "grad_norm": 2.578125, + "learning_rate": 0.00019488237390118633, + "loss": 5.1736, + "step": 60300 + }, + { + "epoch": 0.5195420110192838, + "grad_norm": 3.25, + "learning_rate": 0.0001948738145877106, + "loss": 4.9632, + "step": 60350 + }, + { + "epoch": 0.5199724517906336, + "grad_norm": 1.2265625, + "learning_rate": 0.00019486524831070103, + "loss": 4.6269, + "step": 60400 + }, + { + "epoch": 0.5204028925619835, + "grad_norm": 1.21875, + "learning_rate": 0.00019485667507078633, + "loss": 5.2265, + "step": 60450 + }, + { + "epoch": 0.5208333333333334, + "grad_norm": 2.34375, + "learning_rate": 0.0001948480948685958, + "loss": 5.2691, + "step": 60500 + }, + { + "epoch": 0.5212637741046832, + "grad_norm": 1.296875, + "learning_rate": 0.00019483950770475913, + "loss": 4.9249, + "step": 60550 + }, + { + "epoch": 0.5216942148760331, + "grad_norm": 2.953125, + "learning_rate": 0.00019483091357990666, + "loss": 4.324, + "step": 60600 + }, + { + "epoch": 0.522124655647383, + "grad_norm": 2.390625, + "learning_rate": 0.00019482231249466918, + "loss": 4.9368, + "step": 60650 + }, + { + "epoch": 0.5225550964187328, + "grad_norm": 3.515625, + "learning_rate": 0.00019481370444967796, + "loss": 4.5046, + "step": 60700 + }, + { + "epoch": 0.5229855371900827, + "grad_norm": 2.515625, + "learning_rate": 0.00019480508944556485, + "loss": 4.9599, + "step": 60750 + }, + { + "epoch": 0.5234159779614325, + "grad_norm": 2.109375, + "learning_rate": 0.00019479646748296215, + "loss": 5.1871, + "step": 60800 + }, + { + "epoch": 0.5238464187327824, + "grad_norm": 1.1796875, + "learning_rate": 0.00019478783856250267, + "loss": 4.8656, + "step": 60850 + }, + { + "epoch": 0.5242768595041323, + "grad_norm": 1.6328125, + "learning_rate": 0.00019477920268481981, + "loss": 4.9236, + "step": 60900 + }, + { + "epoch": 0.5247073002754821, + "grad_norm": 1.9765625, + "learning_rate": 0.00019477055985054738, + "loss": 4.9892, + "step": 60950 + }, + { + "epoch": 0.525137741046832, + "grad_norm": 1.4375, + "learning_rate": 0.00019476191006031977, + "loss": 4.7424, + "step": 61000 + }, + { + "epoch": 0.5255681818181818, + "grad_norm": 1.96875, + "learning_rate": 0.00019475325331477184, + "loss": 5.054, + "step": 61050 + }, + { + "epoch": 0.5259986225895317, + "grad_norm": 2.15625, + "learning_rate": 0.00019474458961453897, + "loss": 4.6265, + "step": 61100 + }, + { + "epoch": 0.5264290633608816, + "grad_norm": 1.953125, + "learning_rate": 0.00019473591896025712, + "loss": 5.1348, + "step": 61150 + }, + { + "epoch": 0.5268595041322314, + "grad_norm": 2.1875, + "learning_rate": 0.00019472724135256262, + "loss": 4.3568, + "step": 61200 + }, + { + "epoch": 0.5272899449035813, + "grad_norm": 2.828125, + "learning_rate": 0.0001947185567920924, + "loss": 4.5678, + "step": 61250 + }, + { + "epoch": 0.5277203856749312, + "grad_norm": 2.265625, + "learning_rate": 0.0001947098652794839, + "loss": 4.8169, + "step": 61300 + }, + { + "epoch": 0.528150826446281, + "grad_norm": 2.125, + "learning_rate": 0.0001947011668153751, + "loss": 4.7715, + "step": 61350 + }, + { + "epoch": 0.5285812672176309, + "grad_norm": 2.078125, + "learning_rate": 0.00019469246140040437, + "loss": 5.2144, + "step": 61400 + }, + { + "epoch": 0.5290117079889807, + "grad_norm": 1.015625, + "learning_rate": 0.00019468374903521075, + "loss": 4.3464, + "step": 61450 + }, + { + "epoch": 0.5294421487603306, + "grad_norm": 1.84375, + "learning_rate": 0.00019467502972043364, + "loss": 4.5928, + "step": 61500 + }, + { + "epoch": 0.5298725895316805, + "grad_norm": 2.375, + "learning_rate": 0.00019466630345671304, + "loss": 5.1056, + "step": 61550 + }, + { + "epoch": 0.5303030303030303, + "grad_norm": 2.109375, + "learning_rate": 0.00019465757024468946, + "loss": 4.2716, + "step": 61600 + }, + { + "epoch": 0.5307334710743802, + "grad_norm": 3.140625, + "learning_rate": 0.00019464883008500386, + "loss": 4.6047, + "step": 61650 + }, + { + "epoch": 0.53116391184573, + "grad_norm": 1.7421875, + "learning_rate": 0.0001946400829782978, + "loss": 5.0897, + "step": 61700 + }, + { + "epoch": 0.5315943526170799, + "grad_norm": 2.03125, + "learning_rate": 0.0001946313289252133, + "loss": 4.6044, + "step": 61750 + }, + { + "epoch": 0.5320247933884298, + "grad_norm": 2.0, + "learning_rate": 0.00019462256792639285, + "loss": 5.1399, + "step": 61800 + }, + { + "epoch": 0.5324552341597796, + "grad_norm": 2.5625, + "learning_rate": 0.00019461379998247946, + "loss": 5.0757, + "step": 61850 + }, + { + "epoch": 0.5328856749311295, + "grad_norm": 2.15625, + "learning_rate": 0.00019460502509411676, + "loss": 4.7783, + "step": 61900 + }, + { + "epoch": 0.5333161157024794, + "grad_norm": 2.5625, + "learning_rate": 0.00019459624326194873, + "loss": 4.5562, + "step": 61950 + }, + { + "epoch": 0.5337465564738292, + "grad_norm": 1.734375, + "learning_rate": 0.00019458745448662, + "loss": 4.5624, + "step": 62000 + }, + { + "epoch": 0.5341769972451791, + "grad_norm": 1.2421875, + "learning_rate": 0.00019457865876877558, + "loss": 4.9627, + "step": 62050 + }, + { + "epoch": 0.5346074380165289, + "grad_norm": 1.78125, + "learning_rate": 0.00019456985610906115, + "loss": 4.5935, + "step": 62100 + }, + { + "epoch": 0.5350378787878788, + "grad_norm": 2.078125, + "learning_rate": 0.00019456104650812273, + "loss": 4.8025, + "step": 62150 + }, + { + "epoch": 0.5354683195592287, + "grad_norm": 2.609375, + "learning_rate": 0.00019455222996660695, + "loss": 5.0809, + "step": 62200 + }, + { + "epoch": 0.5358987603305785, + "grad_norm": 1.8984375, + "learning_rate": 0.00019454340648516092, + "loss": 4.8076, + "step": 62250 + }, + { + "epoch": 0.5363292011019284, + "grad_norm": 1.3125, + "learning_rate": 0.00019453457606443227, + "loss": 4.9953, + "step": 62300 + }, + { + "epoch": 0.5367596418732782, + "grad_norm": 1.8046875, + "learning_rate": 0.00019452573870506915, + "loss": 4.9248, + "step": 62350 + }, + { + "epoch": 0.5371900826446281, + "grad_norm": 2.140625, + "learning_rate": 0.00019451689440772016, + "loss": 4.9168, + "step": 62400 + }, + { + "epoch": 0.537620523415978, + "grad_norm": 2.09375, + "learning_rate": 0.00019450804317303454, + "loss": 4.3827, + "step": 62450 + }, + { + "epoch": 0.5380509641873278, + "grad_norm": 3.5625, + "learning_rate": 0.00019449918500166183, + "loss": 5.1639, + "step": 62500 + }, + { + "epoch": 0.5384814049586777, + "grad_norm": 3.1875, + "learning_rate": 0.00019449031989425227, + "loss": 4.911, + "step": 62550 + }, + { + "epoch": 0.5389118457300276, + "grad_norm": 2.078125, + "learning_rate": 0.00019448144785145653, + "loss": 5.0495, + "step": 62600 + }, + { + "epoch": 0.5393422865013774, + "grad_norm": 3.125, + "learning_rate": 0.0001944725688739258, + "loss": 4.989, + "step": 62650 + }, + { + "epoch": 0.5397727272727273, + "grad_norm": 4.90625, + "learning_rate": 0.00019446368296231178, + "loss": 4.8458, + "step": 62700 + }, + { + "epoch": 0.5402031680440771, + "grad_norm": 2.203125, + "learning_rate": 0.0001944547901172667, + "loss": 5.0329, + "step": 62750 + }, + { + "epoch": 0.540633608815427, + "grad_norm": 3.40625, + "learning_rate": 0.00019444589033944322, + "loss": 4.6332, + "step": 62800 + }, + { + "epoch": 0.5410640495867769, + "grad_norm": 2.25, + "learning_rate": 0.0001944369836294946, + "loss": 5.0479, + "step": 62850 + }, + { + "epoch": 0.5414944903581267, + "grad_norm": 1.5234375, + "learning_rate": 0.0001944280699880746, + "loss": 4.6948, + "step": 62900 + }, + { + "epoch": 0.5419249311294766, + "grad_norm": 1.578125, + "learning_rate": 0.00019441914941583738, + "loss": 4.8837, + "step": 62950 + }, + { + "epoch": 0.5423553719008265, + "grad_norm": 1.6015625, + "learning_rate": 0.00019441022191343777, + "loss": 4.8535, + "step": 63000 + }, + { + "epoch": 0.5423553719008265, + "eval_loss": 5.41285514831543, + "eval_runtime": 21.855, + "eval_samples_per_second": 29.284, + "eval_steps_per_second": 14.642, + "eval_tts_loss": 6.7312361713261595, + "step": 63000 + }, + { + "epoch": 0.5427858126721763, + "grad_norm": 2.671875, + "learning_rate": 0.00019440128748153098, + "loss": 4.6159, + "step": 63050 + }, + { + "epoch": 0.5432162534435262, + "grad_norm": 2.21875, + "learning_rate": 0.00019439234612077282, + "loss": 5.0116, + "step": 63100 + }, + { + "epoch": 0.543646694214876, + "grad_norm": 2.1875, + "learning_rate": 0.00019438339783181953, + "loss": 5.0751, + "step": 63150 + }, + { + "epoch": 0.5440771349862259, + "grad_norm": 3.0625, + "learning_rate": 0.0001943744426153279, + "loss": 4.9378, + "step": 63200 + }, + { + "epoch": 0.5445075757575758, + "grad_norm": 1.28125, + "learning_rate": 0.0001943654804719553, + "loss": 4.4809, + "step": 63250 + }, + { + "epoch": 0.5449380165289256, + "grad_norm": 1.3671875, + "learning_rate": 0.00019435651140235936, + "loss": 4.7089, + "step": 63300 + }, + { + "epoch": 0.5453684573002755, + "grad_norm": 0.8046875, + "learning_rate": 0.0001943475354071986, + "loss": 4.6986, + "step": 63350 + }, + { + "epoch": 0.5457988980716253, + "grad_norm": 3.671875, + "learning_rate": 0.00019433855248713166, + "loss": 4.7862, + "step": 63400 + }, + { + "epoch": 0.5462293388429752, + "grad_norm": 1.2890625, + "learning_rate": 0.00019432956264281797, + "loss": 4.6603, + "step": 63450 + }, + { + "epoch": 0.5466597796143251, + "grad_norm": 1.875, + "learning_rate": 0.00019432056587491734, + "loss": 5.0196, + "step": 63500 + }, + { + "epoch": 0.5470902203856749, + "grad_norm": 2.796875, + "learning_rate": 0.00019431156218409012, + "loss": 4.8541, + "step": 63550 + }, + { + "epoch": 0.5475206611570248, + "grad_norm": 3.03125, + "learning_rate": 0.00019430255157099714, + "loss": 5.0201, + "step": 63600 + }, + { + "epoch": 0.5479511019283747, + "grad_norm": 3.3125, + "learning_rate": 0.00019429353403629978, + "loss": 4.8323, + "step": 63650 + }, + { + "epoch": 0.5483815426997245, + "grad_norm": 1.046875, + "learning_rate": 0.0001942845095806599, + "loss": 4.8278, + "step": 63700 + }, + { + "epoch": 0.5488119834710744, + "grad_norm": 0.6640625, + "learning_rate": 0.00019427547820473982, + "loss": 4.5837, + "step": 63750 + }, + { + "epoch": 0.5492424242424242, + "grad_norm": 2.703125, + "learning_rate": 0.00019426643990920256, + "loss": 4.949, + "step": 63800 + }, + { + "epoch": 0.5496728650137741, + "grad_norm": 2.171875, + "learning_rate": 0.0001942573946947114, + "loss": 4.9298, + "step": 63850 + }, + { + "epoch": 0.550103305785124, + "grad_norm": 2.140625, + "learning_rate": 0.00019424834256193024, + "loss": 5.0459, + "step": 63900 + }, + { + "epoch": 0.5505337465564738, + "grad_norm": 2.421875, + "learning_rate": 0.00019423928351152355, + "loss": 5.2147, + "step": 63950 + }, + { + "epoch": 0.5509641873278237, + "grad_norm": 2.046875, + "learning_rate": 0.0001942302175441562, + "loss": 4.8054, + "step": 64000 + }, + { + "epoch": 0.5513946280991735, + "grad_norm": 2.59375, + "learning_rate": 0.00019422114466049364, + "loss": 4.829, + "step": 64050 + }, + { + "epoch": 0.5518250688705234, + "grad_norm": 1.671875, + "learning_rate": 0.00019421206486120175, + "loss": 4.9328, + "step": 64100 + }, + { + "epoch": 0.5522555096418733, + "grad_norm": 1.5, + "learning_rate": 0.00019420297814694705, + "loss": 4.6441, + "step": 64150 + }, + { + "epoch": 0.5526859504132231, + "grad_norm": 1.9765625, + "learning_rate": 0.0001941938845183964, + "loss": 4.9526, + "step": 64200 + }, + { + "epoch": 0.553116391184573, + "grad_norm": 2.53125, + "learning_rate": 0.0001941847839762173, + "loss": 5.1159, + "step": 64250 + }, + { + "epoch": 0.5535468319559229, + "grad_norm": 2.484375, + "learning_rate": 0.0001941756765210777, + "loss": 4.7393, + "step": 64300 + }, + { + "epoch": 0.5539772727272727, + "grad_norm": 2.359375, + "learning_rate": 0.00019416656215364605, + "loss": 4.9843, + "step": 64350 + }, + { + "epoch": 0.5544077134986226, + "grad_norm": 2.546875, + "learning_rate": 0.00019415744087459137, + "loss": 4.694, + "step": 64400 + }, + { + "epoch": 0.5548381542699724, + "grad_norm": 1.90625, + "learning_rate": 0.0001941483126845831, + "loss": 4.915, + "step": 64450 + }, + { + "epoch": 0.5552685950413223, + "grad_norm": 2.734375, + "learning_rate": 0.00019413917758429126, + "loss": 4.4675, + "step": 64500 + }, + { + "epoch": 0.5556990358126722, + "grad_norm": 2.328125, + "learning_rate": 0.0001941300355743863, + "loss": 4.8105, + "step": 64550 + }, + { + "epoch": 0.556129476584022, + "grad_norm": 1.9765625, + "learning_rate": 0.0001941208866555393, + "loss": 4.9246, + "step": 64600 + }, + { + "epoch": 0.5565599173553719, + "grad_norm": 1.2578125, + "learning_rate": 0.0001941117308284217, + "loss": 5.0171, + "step": 64650 + }, + { + "epoch": 0.5569903581267218, + "grad_norm": 2.046875, + "learning_rate": 0.00019410256809370552, + "loss": 4.8394, + "step": 64700 + }, + { + "epoch": 0.5574207988980716, + "grad_norm": 3.015625, + "learning_rate": 0.00019409339845206334, + "loss": 4.8064, + "step": 64750 + }, + { + "epoch": 0.5578512396694215, + "grad_norm": 2.046875, + "learning_rate": 0.00019408422190416818, + "loss": 4.8562, + "step": 64800 + }, + { + "epoch": 0.5582816804407713, + "grad_norm": 1.625, + "learning_rate": 0.00019407503845069352, + "loss": 5.2035, + "step": 64850 + }, + { + "epoch": 0.5587121212121212, + "grad_norm": 1.9765625, + "learning_rate": 0.00019406584809231346, + "loss": 4.5825, + "step": 64900 + }, + { + "epoch": 0.5591425619834711, + "grad_norm": 4.15625, + "learning_rate": 0.00019405665082970254, + "loss": 5.1706, + "step": 64950 + }, + { + "epoch": 0.5595730027548209, + "grad_norm": 2.203125, + "learning_rate": 0.0001940474466635358, + "loss": 4.7249, + "step": 65000 + }, + { + "epoch": 0.5600034435261708, + "grad_norm": 4.1875, + "learning_rate": 0.00019403823559448882, + "loss": 4.4334, + "step": 65050 + }, + { + "epoch": 0.5604338842975206, + "grad_norm": 2.3125, + "learning_rate": 0.00019402901762323772, + "loss": 5.2081, + "step": 65100 + }, + { + "epoch": 0.5608643250688705, + "grad_norm": 1.453125, + "learning_rate": 0.000194019792750459, + "loss": 4.5441, + "step": 65150 + }, + { + "epoch": 0.5612947658402204, + "grad_norm": 1.7265625, + "learning_rate": 0.00019401056097682976, + "loss": 4.7629, + "step": 65200 + }, + { + "epoch": 0.5617252066115702, + "grad_norm": 1.3203125, + "learning_rate": 0.00019400132230302765, + "loss": 4.8572, + "step": 65250 + }, + { + "epoch": 0.5621556473829201, + "grad_norm": 1.2265625, + "learning_rate": 0.00019399207672973073, + "loss": 4.9189, + "step": 65300 + }, + { + "epoch": 0.56258608815427, + "grad_norm": 2.21875, + "learning_rate": 0.0001939828242576176, + "loss": 4.6595, + "step": 65350 + }, + { + "epoch": 0.5630165289256198, + "grad_norm": 1.8359375, + "learning_rate": 0.00019397356488736739, + "loss": 4.8636, + "step": 65400 + }, + { + "epoch": 0.5634469696969697, + "grad_norm": 2.859375, + "learning_rate": 0.0001939642986196597, + "loss": 4.6871, + "step": 65450 + }, + { + "epoch": 0.5638774104683195, + "grad_norm": 2.078125, + "learning_rate": 0.00019395502545517467, + "loss": 4.6867, + "step": 65500 + }, + { + "epoch": 0.5643078512396694, + "grad_norm": 0.6328125, + "learning_rate": 0.0001939457453945929, + "loss": 4.7733, + "step": 65550 + }, + { + "epoch": 0.5647382920110193, + "grad_norm": 2.15625, + "learning_rate": 0.00019393645843859556, + "loss": 4.6338, + "step": 65600 + }, + { + "epoch": 0.5651687327823691, + "grad_norm": 2.671875, + "learning_rate": 0.00019392716458786427, + "loss": 4.6369, + "step": 65650 + }, + { + "epoch": 0.565599173553719, + "grad_norm": 2.359375, + "learning_rate": 0.00019391786384308118, + "loss": 4.8525, + "step": 65700 + }, + { + "epoch": 0.5660296143250688, + "grad_norm": 2.328125, + "learning_rate": 0.00019390855620492897, + "loss": 4.3963, + "step": 65750 + }, + { + "epoch": 0.5664600550964187, + "grad_norm": 1.4375, + "learning_rate": 0.0001938992416740908, + "loss": 4.6911, + "step": 65800 + }, + { + "epoch": 0.5668904958677686, + "grad_norm": 1.3359375, + "learning_rate": 0.0001938899202512503, + "loss": 4.6394, + "step": 65850 + }, + { + "epoch": 0.5673209366391184, + "grad_norm": 2.328125, + "learning_rate": 0.00019388059193709166, + "loss": 4.9945, + "step": 65900 + }, + { + "epoch": 0.5677513774104683, + "grad_norm": 2.453125, + "learning_rate": 0.00019387125673229956, + "loss": 4.6664, + "step": 65950 + }, + { + "epoch": 0.5681818181818182, + "grad_norm": 2.84375, + "learning_rate": 0.0001938619146375592, + "loss": 4.8697, + "step": 66000 + }, + { + "epoch": 0.5681818181818182, + "eval_loss": 5.408427715301514, + "eval_runtime": 22.0911, + "eval_samples_per_second": 28.971, + "eval_steps_per_second": 14.485, + "eval_tts_loss": 6.665151977341963, + "step": 66000 + }, + { + "epoch": 0.568612258953168, + "grad_norm": 3.359375, + "learning_rate": 0.00019385256565355622, + "loss": 5.2537, + "step": 66050 + }, + { + "epoch": 0.5690426997245179, + "grad_norm": 2.5, + "learning_rate": 0.0001938432097809769, + "loss": 5.1396, + "step": 66100 + }, + { + "epoch": 0.5694731404958677, + "grad_norm": 2.265625, + "learning_rate": 0.00019383384702050788, + "loss": 4.7616, + "step": 66150 + }, + { + "epoch": 0.5699035812672176, + "grad_norm": 2.125, + "learning_rate": 0.00019382447737283632, + "loss": 4.814, + "step": 66200 + }, + { + "epoch": 0.5703340220385675, + "grad_norm": 2.359375, + "learning_rate": 0.00019381510083865005, + "loss": 5.0777, + "step": 66250 + }, + { + "epoch": 0.5707644628099173, + "grad_norm": 2.84375, + "learning_rate": 0.0001938057174186372, + "loss": 4.7864, + "step": 66300 + }, + { + "epoch": 0.5711949035812672, + "grad_norm": 2.875, + "learning_rate": 0.00019379632711348657, + "loss": 4.9105, + "step": 66350 + }, + { + "epoch": 0.571625344352617, + "grad_norm": 2.140625, + "learning_rate": 0.0001937869299238873, + "loss": 4.7395, + "step": 66400 + }, + { + "epoch": 0.5720557851239669, + "grad_norm": 2.328125, + "learning_rate": 0.00019377752585052912, + "loss": 4.8076, + "step": 66450 + }, + { + "epoch": 0.5724862258953168, + "grad_norm": 2.625, + "learning_rate": 0.00019376811489410235, + "loss": 4.6751, + "step": 66500 + }, + { + "epoch": 0.5729166666666666, + "grad_norm": 2.125, + "learning_rate": 0.00019375869705529772, + "loss": 5.0308, + "step": 66550 + }, + { + "epoch": 0.5733471074380165, + "grad_norm": 2.015625, + "learning_rate": 0.00019374927233480643, + "loss": 4.6561, + "step": 66600 + }, + { + "epoch": 0.5737775482093664, + "grad_norm": 2.421875, + "learning_rate": 0.00019373984073332024, + "loss": 4.8788, + "step": 66650 + }, + { + "epoch": 0.5742079889807162, + "grad_norm": 4.0, + "learning_rate": 0.00019373040225153146, + "loss": 4.4974, + "step": 66700 + }, + { + "epoch": 0.5746384297520661, + "grad_norm": 2.265625, + "learning_rate": 0.00019372095689013279, + "loss": 4.7771, + "step": 66750 + }, + { + "epoch": 0.5750688705234159, + "grad_norm": 2.9375, + "learning_rate": 0.00019371150464981754, + "loss": 4.8839, + "step": 66800 + }, + { + "epoch": 0.5754993112947658, + "grad_norm": 2.203125, + "learning_rate": 0.00019370204553127948, + "loss": 4.8058, + "step": 66850 + }, + { + "epoch": 0.5759297520661157, + "grad_norm": 2.421875, + "learning_rate": 0.00019369257953521287, + "loss": 4.8367, + "step": 66900 + }, + { + "epoch": 0.5763601928374655, + "grad_norm": 2.34375, + "learning_rate": 0.00019368310666231252, + "loss": 4.6082, + "step": 66950 + }, + { + "epoch": 0.5767906336088154, + "grad_norm": 1.421875, + "learning_rate": 0.0001936736269132737, + "loss": 5.2713, + "step": 67000 + }, + { + "epoch": 0.5772210743801653, + "grad_norm": 4.375, + "learning_rate": 0.00019366414028879221, + "loss": 5.1557, + "step": 67050 + }, + { + "epoch": 0.5776515151515151, + "grad_norm": 1.453125, + "learning_rate": 0.00019365464678956434, + "loss": 4.841, + "step": 67100 + }, + { + "epoch": 0.578081955922865, + "grad_norm": 2.34375, + "learning_rate": 0.00019364514641628694, + "loss": 4.9736, + "step": 67150 + }, + { + "epoch": 0.5785123966942148, + "grad_norm": 4.03125, + "learning_rate": 0.00019363563916965722, + "loss": 4.921, + "step": 67200 + }, + { + "epoch": 0.5789428374655647, + "grad_norm": 3.078125, + "learning_rate": 0.00019362612505037307, + "loss": 5.083, + "step": 67250 + }, + { + "epoch": 0.5793732782369146, + "grad_norm": 2.765625, + "learning_rate": 0.0001936166040591328, + "loss": 5.2489, + "step": 67300 + }, + { + "epoch": 0.5798037190082644, + "grad_norm": 2.1875, + "learning_rate": 0.00019360707619663517, + "loss": 4.8065, + "step": 67350 + }, + { + "epoch": 0.5802341597796143, + "grad_norm": 3.0625, + "learning_rate": 0.0001935975414635796, + "loss": 4.9339, + "step": 67400 + }, + { + "epoch": 0.5806646005509641, + "grad_norm": 2.046875, + "learning_rate": 0.00019358799986066584, + "loss": 4.7002, + "step": 67450 + }, + { + "epoch": 0.581095041322314, + "grad_norm": 2.015625, + "learning_rate": 0.00019357845138859422, + "loss": 4.8721, + "step": 67500 + }, + { + "epoch": 0.581525482093664, + "grad_norm": 2.0625, + "learning_rate": 0.00019356889604806566, + "loss": 4.8496, + "step": 67550 + }, + { + "epoch": 0.5819559228650137, + "grad_norm": 2.953125, + "learning_rate": 0.00019355933383978142, + "loss": 4.9558, + "step": 67600 + }, + { + "epoch": 0.5823863636363636, + "grad_norm": 2.96875, + "learning_rate": 0.00019354976476444337, + "loss": 5.0111, + "step": 67650 + }, + { + "epoch": 0.5828168044077136, + "grad_norm": 2.3125, + "learning_rate": 0.0001935401888227539, + "loss": 4.5638, + "step": 67700 + }, + { + "epoch": 0.5832472451790633, + "grad_norm": 0.765625, + "learning_rate": 0.0001935306060154158, + "loss": 4.6319, + "step": 67750 + }, + { + "epoch": 0.5836776859504132, + "grad_norm": 2.59375, + "learning_rate": 0.00019352101634313243, + "loss": 4.7251, + "step": 67800 + }, + { + "epoch": 0.584108126721763, + "grad_norm": 3.5625, + "learning_rate": 0.0001935114198066077, + "loss": 4.4962, + "step": 67850 + }, + { + "epoch": 0.584538567493113, + "grad_norm": 3.796875, + "learning_rate": 0.00019350181640654597, + "loss": 4.8778, + "step": 67900 + }, + { + "epoch": 0.5849690082644629, + "grad_norm": 2.75, + "learning_rate": 0.00019349220614365206, + "loss": 4.6719, + "step": 67950 + }, + { + "epoch": 0.5853994490358126, + "grad_norm": 3.296875, + "learning_rate": 0.0001934825890186314, + "loss": 4.5005, + "step": 68000 + }, + { + "epoch": 0.5858298898071626, + "grad_norm": 1.515625, + "learning_rate": 0.0001934729650321898, + "loss": 5.1956, + "step": 68050 + }, + { + "epoch": 0.5862603305785123, + "grad_norm": 1.7421875, + "learning_rate": 0.00019346333418503373, + "loss": 4.8757, + "step": 68100 + }, + { + "epoch": 0.5866907713498623, + "grad_norm": 2.203125, + "learning_rate": 0.00019345369647786998, + "loss": 4.4, + "step": 68150 + }, + { + "epoch": 0.5871212121212122, + "grad_norm": 3.375, + "learning_rate": 0.00019344405191140603, + "loss": 4.8859, + "step": 68200 + }, + { + "epoch": 0.587551652892562, + "grad_norm": 2.875, + "learning_rate": 0.00019343440048634965, + "loss": 4.8281, + "step": 68250 + }, + { + "epoch": 0.5879820936639119, + "grad_norm": 2.359375, + "learning_rate": 0.00019342474220340936, + "loss": 5.076, + "step": 68300 + }, + { + "epoch": 0.5884125344352618, + "grad_norm": 1.078125, + "learning_rate": 0.00019341507706329396, + "loss": 4.6841, + "step": 68350 + }, + { + "epoch": 0.5888429752066116, + "grad_norm": 1.625, + "learning_rate": 0.00019340540506671293, + "loss": 4.8212, + "step": 68400 + }, + { + "epoch": 0.5892734159779615, + "grad_norm": 1.90625, + "learning_rate": 0.00019339572621437608, + "loss": 4.6378, + "step": 68450 + }, + { + "epoch": 0.5897038567493113, + "grad_norm": 4.03125, + "learning_rate": 0.00019338604050699393, + "loss": 4.9896, + "step": 68500 + }, + { + "epoch": 0.5901342975206612, + "grad_norm": 2.15625, + "learning_rate": 0.0001933763479452773, + "loss": 5.0799, + "step": 68550 + }, + { + "epoch": 0.5905647382920111, + "grad_norm": 3.5625, + "learning_rate": 0.00019336664852993767, + "loss": 4.815, + "step": 68600 + }, + { + "epoch": 0.5909951790633609, + "grad_norm": 2.3125, + "learning_rate": 0.0001933569422616869, + "loss": 4.7069, + "step": 68650 + }, + { + "epoch": 0.5914256198347108, + "grad_norm": 2.8125, + "learning_rate": 0.00019334722914123741, + "loss": 4.6069, + "step": 68700 + }, + { + "epoch": 0.5918560606060606, + "grad_norm": 2.609375, + "learning_rate": 0.00019333750916930215, + "loss": 4.9758, + "step": 68750 + }, + { + "epoch": 0.5922865013774105, + "grad_norm": 2.15625, + "learning_rate": 0.00019332778234659457, + "loss": 4.4766, + "step": 68800 + }, + { + "epoch": 0.5927169421487604, + "grad_norm": 1.8828125, + "learning_rate": 0.00019331804867382855, + "loss": 4.9831, + "step": 68850 + }, + { + "epoch": 0.5931473829201102, + "grad_norm": 1.15625, + "learning_rate": 0.00019330830815171852, + "loss": 4.6332, + "step": 68900 + }, + { + "epoch": 0.5935778236914601, + "grad_norm": 1.9296875, + "learning_rate": 0.00019329856078097946, + "loss": 4.5474, + "step": 68950 + }, + { + "epoch": 0.59400826446281, + "grad_norm": 2.4375, + "learning_rate": 0.00019328880656232676, + "loss": 4.7471, + "step": 69000 + }, + { + "epoch": 0.59400826446281, + "eval_loss": 5.3918561935424805, + "eval_runtime": 21.9308, + "eval_samples_per_second": 29.183, + "eval_steps_per_second": 14.591, + "eval_tts_loss": 6.719209646573399, + "step": 69000 + }, + { + "epoch": 0.5944387052341598, + "grad_norm": 1.2421875, + "learning_rate": 0.00019327904549647635, + "loss": 4.8058, + "step": 69050 + }, + { + "epoch": 0.5948691460055097, + "grad_norm": 2.328125, + "learning_rate": 0.0001932692775841447, + "loss": 4.8438, + "step": 69100 + }, + { + "epoch": 0.5952995867768595, + "grad_norm": 2.40625, + "learning_rate": 0.0001932595028260488, + "loss": 4.9355, + "step": 69150 + }, + { + "epoch": 0.5957300275482094, + "grad_norm": 2.40625, + "learning_rate": 0.000193249721222906, + "loss": 4.7622, + "step": 69200 + }, + { + "epoch": 0.5961604683195593, + "grad_norm": 0.62109375, + "learning_rate": 0.0001932399327754343, + "loss": 4.7971, + "step": 69250 + }, + { + "epoch": 0.5965909090909091, + "grad_norm": 1.5390625, + "learning_rate": 0.00019323013748435215, + "loss": 4.6563, + "step": 69300 + }, + { + "epoch": 0.597021349862259, + "grad_norm": 2.640625, + "learning_rate": 0.00019322033535037849, + "loss": 4.7125, + "step": 69350 + }, + { + "epoch": 0.5974517906336089, + "grad_norm": 2.296875, + "learning_rate": 0.0001932105263742328, + "loss": 4.9237, + "step": 69400 + }, + { + "epoch": 0.5978822314049587, + "grad_norm": 1.390625, + "learning_rate": 0.000193200710556635, + "loss": 4.6762, + "step": 69450 + }, + { + "epoch": 0.5983126721763086, + "grad_norm": 1.8828125, + "learning_rate": 0.0001931908878983056, + "loss": 5.0598, + "step": 69500 + }, + { + "epoch": 0.5987431129476584, + "grad_norm": 2.015625, + "learning_rate": 0.0001931810583999655, + "loss": 4.9928, + "step": 69550 + }, + { + "epoch": 0.5991735537190083, + "grad_norm": 2.59375, + "learning_rate": 0.00019317122206233625, + "loss": 4.9787, + "step": 69600 + }, + { + "epoch": 0.5996039944903582, + "grad_norm": 3.0625, + "learning_rate": 0.0001931613788861397, + "loss": 4.8721, + "step": 69650 + }, + { + "epoch": 0.600034435261708, + "grad_norm": 3.296875, + "learning_rate": 0.00019315152887209843, + "loss": 4.7348, + "step": 69700 + }, + { + "epoch": 0.6004648760330579, + "grad_norm": 3.703125, + "learning_rate": 0.00019314167202093535, + "loss": 5.0721, + "step": 69750 + }, + { + "epoch": 0.6008953168044077, + "grad_norm": 1.421875, + "learning_rate": 0.00019313180833337393, + "loss": 4.7295, + "step": 69800 + }, + { + "epoch": 0.6013257575757576, + "grad_norm": 2.359375, + "learning_rate": 0.00019312193781013815, + "loss": 4.2876, + "step": 69850 + }, + { + "epoch": 0.6017561983471075, + "grad_norm": 2.046875, + "learning_rate": 0.0001931120604519525, + "loss": 5.0536, + "step": 69900 + }, + { + "epoch": 0.6021866391184573, + "grad_norm": 2.09375, + "learning_rate": 0.00019310217625954196, + "loss": 4.6664, + "step": 69950 + }, + { + "epoch": 0.6026170798898072, + "grad_norm": 1.078125, + "learning_rate": 0.000193092285233632, + "loss": 4.6545, + "step": 70000 + }, + { + "epoch": 0.6030475206611571, + "grad_norm": 2.078125, + "learning_rate": 0.0001930823873749486, + "loss": 5.2324, + "step": 70050 + }, + { + "epoch": 0.6034779614325069, + "grad_norm": 2.15625, + "learning_rate": 0.0001930724826842182, + "loss": 4.2899, + "step": 70100 + }, + { + "epoch": 0.6039084022038568, + "grad_norm": 2.234375, + "learning_rate": 0.00019306257116216786, + "loss": 5.1086, + "step": 70150 + }, + { + "epoch": 0.6043388429752066, + "grad_norm": 3.625, + "learning_rate": 0.00019305265280952503, + "loss": 4.5098, + "step": 70200 + }, + { + "epoch": 0.6047692837465565, + "grad_norm": 1.4609375, + "learning_rate": 0.00019304272762701765, + "loss": 4.8431, + "step": 70250 + }, + { + "epoch": 0.6051997245179064, + "grad_norm": 1.84375, + "learning_rate": 0.00019303279561537427, + "loss": 4.9329, + "step": 70300 + }, + { + "epoch": 0.6056301652892562, + "grad_norm": 2.640625, + "learning_rate": 0.0001930228567753239, + "loss": 4.8364, + "step": 70350 + }, + { + "epoch": 0.6060606060606061, + "grad_norm": 3.078125, + "learning_rate": 0.00019301291110759592, + "loss": 4.9719, + "step": 70400 + }, + { + "epoch": 0.6064910468319559, + "grad_norm": 1.578125, + "learning_rate": 0.0001930029586129204, + "loss": 4.6531, + "step": 70450 + }, + { + "epoch": 0.6069214876033058, + "grad_norm": 2.0, + "learning_rate": 0.00019299299929202785, + "loss": 4.7328, + "step": 70500 + }, + { + "epoch": 0.6073519283746557, + "grad_norm": 4.0, + "learning_rate": 0.0001929830331456492, + "loss": 4.7255, + "step": 70550 + }, + { + "epoch": 0.6077823691460055, + "grad_norm": 2.609375, + "learning_rate": 0.00019297306017451602, + "loss": 4.7601, + "step": 70600 + }, + { + "epoch": 0.6082128099173554, + "grad_norm": 1.03125, + "learning_rate": 0.00019296308037936025, + "loss": 4.934, + "step": 70650 + }, + { + "epoch": 0.6086432506887053, + "grad_norm": 3.453125, + "learning_rate": 0.00019295309376091434, + "loss": 4.3747, + "step": 70700 + }, + { + "epoch": 0.6090736914600551, + "grad_norm": 2.53125, + "learning_rate": 0.0001929431003199114, + "loss": 4.5453, + "step": 70750 + }, + { + "epoch": 0.609504132231405, + "grad_norm": 2.328125, + "learning_rate": 0.00019293310005708483, + "loss": 4.6482, + "step": 70800 + }, + { + "epoch": 0.6099345730027548, + "grad_norm": 2.875, + "learning_rate": 0.0001929230929731687, + "loss": 5.0405, + "step": 70850 + }, + { + "epoch": 0.6103650137741047, + "grad_norm": 2.125, + "learning_rate": 0.00019291307906889747, + "loss": 4.8022, + "step": 70900 + }, + { + "epoch": 0.6107954545454546, + "grad_norm": 3.265625, + "learning_rate": 0.0001929030583450061, + "loss": 4.6024, + "step": 70950 + }, + { + "epoch": 0.6112258953168044, + "grad_norm": 2.25, + "learning_rate": 0.00019289303080223018, + "loss": 4.329, + "step": 71000 + }, + { + "epoch": 0.6116563360881543, + "grad_norm": 1.75, + "learning_rate": 0.00019288299644130565, + "loss": 4.7542, + "step": 71050 + }, + { + "epoch": 0.6120867768595041, + "grad_norm": 1.953125, + "learning_rate": 0.00019287295526296903, + "loss": 4.7364, + "step": 71100 + }, + { + "epoch": 0.612517217630854, + "grad_norm": 2.015625, + "learning_rate": 0.0001928629072679573, + "loss": 5.1028, + "step": 71150 + }, + { + "epoch": 0.6129476584022039, + "grad_norm": 2.859375, + "learning_rate": 0.00019285285245700798, + "loss": 4.7884, + "step": 71200 + }, + { + "epoch": 0.6133780991735537, + "grad_norm": 1.8203125, + "learning_rate": 0.00019284279083085909, + "loss": 5.159, + "step": 71250 + }, + { + "epoch": 0.6138085399449036, + "grad_norm": 1.5859375, + "learning_rate": 0.0001928327223902491, + "loss": 4.3514, + "step": 71300 + }, + { + "epoch": 0.6142389807162535, + "grad_norm": 2.59375, + "learning_rate": 0.000192822647135917, + "loss": 5.1681, + "step": 71350 + }, + { + "epoch": 0.6146694214876033, + "grad_norm": 1.8515625, + "learning_rate": 0.0001928125650686023, + "loss": 4.4608, + "step": 71400 + }, + { + "epoch": 0.6150998622589532, + "grad_norm": 2.203125, + "learning_rate": 0.00019280247618904507, + "loss": 4.9095, + "step": 71450 + }, + { + "epoch": 0.615530303030303, + "grad_norm": 2.515625, + "learning_rate": 0.00019279238049798572, + "loss": 4.8419, + "step": 71500 + }, + { + "epoch": 0.6159607438016529, + "grad_norm": 2.734375, + "learning_rate": 0.0001927822779961653, + "loss": 4.6071, + "step": 71550 + }, + { + "epoch": 0.6163911845730028, + "grad_norm": 2.8125, + "learning_rate": 0.00019277216868432531, + "loss": 4.6845, + "step": 71600 + }, + { + "epoch": 0.6168216253443526, + "grad_norm": 2.25, + "learning_rate": 0.00019276205256320771, + "loss": 5.2177, + "step": 71650 + }, + { + "epoch": 0.6172520661157025, + "grad_norm": 2.515625, + "learning_rate": 0.00019275192963355507, + "loss": 4.3372, + "step": 71700 + }, + { + "epoch": 0.6176825068870524, + "grad_norm": 0.98046875, + "learning_rate": 0.00019274179989611033, + "loss": 4.8568, + "step": 71750 + }, + { + "epoch": 0.6181129476584022, + "grad_norm": 2.15625, + "learning_rate": 0.00019273166335161707, + "loss": 4.987, + "step": 71800 + }, + { + "epoch": 0.6185433884297521, + "grad_norm": 2.578125, + "learning_rate": 0.00019272152000081918, + "loss": 4.6778, + "step": 71850 + }, + { + "epoch": 0.6189738292011019, + "grad_norm": 1.5234375, + "learning_rate": 0.00019271136984446124, + "loss": 4.6203, + "step": 71900 + }, + { + "epoch": 0.6194042699724518, + "grad_norm": 2.078125, + "learning_rate": 0.0001927012128832882, + "loss": 4.6888, + "step": 71950 + }, + { + "epoch": 0.6198347107438017, + "grad_norm": 2.21875, + "learning_rate": 0.00019269104911804564, + "loss": 4.9668, + "step": 72000 + }, + { + "epoch": 0.6198347107438017, + "eval_loss": 5.390753746032715, + "eval_runtime": 22.0586, + "eval_samples_per_second": 29.014, + "eval_steps_per_second": 14.507, + "eval_tts_loss": 6.741954032634387, + "step": 72000 + }, + { + "epoch": 0.6202651515151515, + "grad_norm": 2.0625, + "learning_rate": 0.00019268087854947945, + "loss": 5.1815, + "step": 72050 + }, + { + "epoch": 0.6206955922865014, + "grad_norm": 2.140625, + "learning_rate": 0.0001926707011783362, + "loss": 4.65, + "step": 72100 + }, + { + "epoch": 0.6211260330578512, + "grad_norm": 2.953125, + "learning_rate": 0.0001926605170053629, + "loss": 4.8605, + "step": 72150 + }, + { + "epoch": 0.6215564738292011, + "grad_norm": 1.2109375, + "learning_rate": 0.00019265032603130698, + "loss": 4.9785, + "step": 72200 + }, + { + "epoch": 0.621986914600551, + "grad_norm": 2.015625, + "learning_rate": 0.00019264012825691648, + "loss": 4.4861, + "step": 72250 + }, + { + "epoch": 0.6224173553719008, + "grad_norm": 3.265625, + "learning_rate": 0.00019262992368293988, + "loss": 4.8425, + "step": 72300 + }, + { + "epoch": 0.6228477961432507, + "grad_norm": 2.15625, + "learning_rate": 0.0001926197123101262, + "loss": 5.143, + "step": 72350 + }, + { + "epoch": 0.6232782369146006, + "grad_norm": 2.671875, + "learning_rate": 0.0001926094941392249, + "loss": 5.0026, + "step": 72400 + }, + { + "epoch": 0.6237086776859504, + "grad_norm": 1.859375, + "learning_rate": 0.00019259926917098596, + "loss": 4.5678, + "step": 72450 + }, + { + "epoch": 0.6241391184573003, + "grad_norm": 2.484375, + "learning_rate": 0.00019258903740615994, + "loss": 4.6358, + "step": 72500 + }, + { + "epoch": 0.6245695592286501, + "grad_norm": 1.2578125, + "learning_rate": 0.00019257879884549775, + "loss": 4.7183, + "step": 72550 + }, + { + "epoch": 0.625, + "grad_norm": 2.296875, + "learning_rate": 0.00019256855348975097, + "loss": 4.7642, + "step": 72600 + }, + { + "epoch": 0.6254304407713499, + "grad_norm": 2.421875, + "learning_rate": 0.00019255830133967146, + "loss": 4.8014, + "step": 72650 + }, + { + "epoch": 0.6258608815426997, + "grad_norm": 1.9296875, + "learning_rate": 0.00019254804239601182, + "loss": 4.5654, + "step": 72700 + }, + { + "epoch": 0.6262913223140496, + "grad_norm": 2.203125, + "learning_rate": 0.00019253777665952497, + "loss": 4.8976, + "step": 72750 + }, + { + "epoch": 0.6267217630853994, + "grad_norm": 1.4375, + "learning_rate": 0.00019252750413096444, + "loss": 5.0384, + "step": 72800 + }, + { + "epoch": 0.6271522038567493, + "grad_norm": 3.1875, + "learning_rate": 0.00019251722481108415, + "loss": 4.4505, + "step": 72850 + }, + { + "epoch": 0.6275826446280992, + "grad_norm": 2.796875, + "learning_rate": 0.00019250693870063863, + "loss": 4.5738, + "step": 72900 + }, + { + "epoch": 0.628013085399449, + "grad_norm": 1.765625, + "learning_rate": 0.00019249664580038282, + "loss": 4.5589, + "step": 72950 + }, + { + "epoch": 0.6284435261707989, + "grad_norm": 2.40625, + "learning_rate": 0.00019248634611107227, + "loss": 4.9839, + "step": 73000 + }, + { + "epoch": 0.6288739669421488, + "grad_norm": 1.640625, + "learning_rate": 0.00019247603963346287, + "loss": 4.5437, + "step": 73050 + }, + { + "epoch": 0.6293044077134986, + "grad_norm": 2.734375, + "learning_rate": 0.00019246572636831113, + "loss": 5.2066, + "step": 73100 + }, + { + "epoch": 0.6297348484848485, + "grad_norm": 3.8125, + "learning_rate": 0.00019245540631637403, + "loss": 4.955, + "step": 73150 + }, + { + "epoch": 0.6301652892561983, + "grad_norm": 1.578125, + "learning_rate": 0.00019244507947840903, + "loss": 4.8249, + "step": 73200 + }, + { + "epoch": 0.6305957300275482, + "grad_norm": 1.3203125, + "learning_rate": 0.0001924347458551741, + "loss": 5.0265, + "step": 73250 + }, + { + "epoch": 0.6310261707988981, + "grad_norm": 1.71875, + "learning_rate": 0.00019242440544742768, + "loss": 4.9371, + "step": 73300 + }, + { + "epoch": 0.6314566115702479, + "grad_norm": 2.265625, + "learning_rate": 0.00019241405825592877, + "loss": 4.8961, + "step": 73350 + }, + { + "epoch": 0.6318870523415978, + "grad_norm": 2.265625, + "learning_rate": 0.00019240370428143683, + "loss": 4.7428, + "step": 73400 + }, + { + "epoch": 0.6323174931129476, + "grad_norm": 2.40625, + "learning_rate": 0.00019239334352471178, + "loss": 4.5569, + "step": 73450 + }, + { + "epoch": 0.6327479338842975, + "grad_norm": 1.25, + "learning_rate": 0.0001923829759865141, + "loss": 4.952, + "step": 73500 + }, + { + "epoch": 0.6331783746556474, + "grad_norm": 2.09375, + "learning_rate": 0.0001923726016676048, + "loss": 4.6965, + "step": 73550 + }, + { + "epoch": 0.6336088154269972, + "grad_norm": 2.265625, + "learning_rate": 0.00019236222056874525, + "loss": 4.6287, + "step": 73600 + }, + { + "epoch": 0.6340392561983471, + "grad_norm": 1.6796875, + "learning_rate": 0.00019235183269069746, + "loss": 5.0443, + "step": 73650 + }, + { + "epoch": 0.634469696969697, + "grad_norm": 2.609375, + "learning_rate": 0.0001923414380342238, + "loss": 4.6662, + "step": 73700 + }, + { + "epoch": 0.6349001377410468, + "grad_norm": 3.0, + "learning_rate": 0.0001923310366000873, + "loss": 4.4377, + "step": 73750 + }, + { + "epoch": 0.6353305785123967, + "grad_norm": 2.40625, + "learning_rate": 0.00019232062838905135, + "loss": 4.8102, + "step": 73800 + }, + { + "epoch": 0.6357610192837465, + "grad_norm": 2.953125, + "learning_rate": 0.00019231021340187994, + "loss": 4.8413, + "step": 73850 + }, + { + "epoch": 0.6361914600550964, + "grad_norm": 2.46875, + "learning_rate": 0.00019229979163933744, + "loss": 4.8162, + "step": 73900 + }, + { + "epoch": 0.6366219008264463, + "grad_norm": 1.3984375, + "learning_rate": 0.00019228936310218883, + "loss": 4.8704, + "step": 73950 + }, + { + "epoch": 0.6370523415977961, + "grad_norm": 1.8515625, + "learning_rate": 0.00019227892779119957, + "loss": 4.5698, + "step": 74000 + }, + { + "epoch": 0.637482782369146, + "grad_norm": 1.828125, + "learning_rate": 0.0001922684857071355, + "loss": 5.0687, + "step": 74050 + }, + { + "epoch": 0.6379132231404959, + "grad_norm": 2.0625, + "learning_rate": 0.00019225803685076316, + "loss": 4.6932, + "step": 74100 + }, + { + "epoch": 0.6383436639118457, + "grad_norm": 1.515625, + "learning_rate": 0.00019224758122284938, + "loss": 4.6711, + "step": 74150 + }, + { + "epoch": 0.6387741046831956, + "grad_norm": 1.8359375, + "learning_rate": 0.0001922371188241616, + "loss": 4.8663, + "step": 74200 + }, + { + "epoch": 0.6392045454545454, + "grad_norm": 2.328125, + "learning_rate": 0.00019222664965546775, + "loss": 4.992, + "step": 74250 + }, + { + "epoch": 0.6396349862258953, + "grad_norm": 1.390625, + "learning_rate": 0.00019221617371753625, + "loss": 4.7318, + "step": 74300 + }, + { + "epoch": 0.6400654269972452, + "grad_norm": 2.328125, + "learning_rate": 0.00019220569101113607, + "loss": 5.1225, + "step": 74350 + }, + { + "epoch": 0.640495867768595, + "grad_norm": 4.625, + "learning_rate": 0.00019219520153703647, + "loss": 4.9056, + "step": 74400 + }, + { + "epoch": 0.6409263085399449, + "grad_norm": 2.609375, + "learning_rate": 0.00019218470529600747, + "loss": 4.8433, + "step": 74450 + }, + { + "epoch": 0.6413567493112947, + "grad_norm": 2.125, + "learning_rate": 0.00019217420228881946, + "loss": 4.6725, + "step": 74500 + }, + { + "epoch": 0.6417871900826446, + "grad_norm": 2.15625, + "learning_rate": 0.00019216369251624332, + "loss": 4.8702, + "step": 74550 + }, + { + "epoch": 0.6422176308539945, + "grad_norm": 2.515625, + "learning_rate": 0.00019215317597905043, + "loss": 4.7003, + "step": 74600 + }, + { + "epoch": 0.6426480716253443, + "grad_norm": 2.640625, + "learning_rate": 0.00019214265267801267, + "loss": 4.9039, + "step": 74650 + }, + { + "epoch": 0.6430785123966942, + "grad_norm": 3.96875, + "learning_rate": 0.0001921321226139025, + "loss": 4.6823, + "step": 74700 + }, + { + "epoch": 0.6435089531680441, + "grad_norm": 1.84375, + "learning_rate": 0.00019212158578749272, + "loss": 4.9247, + "step": 74750 + }, + { + "epoch": 0.6439393939393939, + "grad_norm": 2.078125, + "learning_rate": 0.00019211104219955677, + "loss": 4.6949, + "step": 74800 + }, + { + "epoch": 0.6443698347107438, + "grad_norm": 2.359375, + "learning_rate": 0.00019210049185086853, + "loss": 4.7005, + "step": 74850 + }, + { + "epoch": 0.6448002754820936, + "grad_norm": 1.890625, + "learning_rate": 0.00019208993474220228, + "loss": 4.8432, + "step": 74900 + }, + { + "epoch": 0.6452307162534435, + "grad_norm": 3.171875, + "learning_rate": 0.000192079370874333, + "loss": 4.7056, + "step": 74950 + }, + { + "epoch": 0.6456611570247934, + "grad_norm": 2.75, + "learning_rate": 0.00019206880024803601, + "loss": 4.8833, + "step": 75000 + }, + { + "epoch": 0.6456611570247934, + "eval_loss": 5.383493900299072, + "eval_runtime": 21.7227, + "eval_samples_per_second": 29.462, + "eval_steps_per_second": 14.731, + "eval_tts_loss": 6.751945592916089, + "step": 75000 + }, + { + "epoch": 0.6460915977961432, + "grad_norm": 1.21875, + "learning_rate": 0.00019205822286408718, + "loss": 4.8866, + "step": 75050 + }, + { + "epoch": 0.6465220385674931, + "grad_norm": 2.34375, + "learning_rate": 0.00019204763872326285, + "loss": 4.9559, + "step": 75100 + }, + { + "epoch": 0.6469524793388429, + "grad_norm": 1.6328125, + "learning_rate": 0.00019203704782633988, + "loss": 4.8879, + "step": 75150 + }, + { + "epoch": 0.6473829201101928, + "grad_norm": 1.78125, + "learning_rate": 0.00019202645017409563, + "loss": 4.1499, + "step": 75200 + }, + { + "epoch": 0.6478133608815427, + "grad_norm": 2.46875, + "learning_rate": 0.00019201584576730794, + "loss": 4.6582, + "step": 75250 + }, + { + "epoch": 0.6482438016528925, + "grad_norm": 1.6171875, + "learning_rate": 0.0001920052346067551, + "loss": 5.1874, + "step": 75300 + }, + { + "epoch": 0.6486742424242424, + "grad_norm": 2.546875, + "learning_rate": 0.00019199461669321604, + "loss": 5.3677, + "step": 75350 + }, + { + "epoch": 0.6491046831955923, + "grad_norm": 2.765625, + "learning_rate": 0.00019198399202747005, + "loss": 4.675, + "step": 75400 + }, + { + "epoch": 0.6495351239669421, + "grad_norm": 2.671875, + "learning_rate": 0.00019197336061029695, + "loss": 4.7404, + "step": 75450 + }, + { + "epoch": 0.649965564738292, + "grad_norm": 2.703125, + "learning_rate": 0.00019196272244247707, + "loss": 4.6372, + "step": 75500 + }, + { + "epoch": 0.6503960055096418, + "grad_norm": 1.921875, + "learning_rate": 0.0001919520775247912, + "loss": 4.7628, + "step": 75550 + }, + { + "epoch": 0.6508264462809917, + "grad_norm": 2.859375, + "learning_rate": 0.00019194142585802072, + "loss": 4.6766, + "step": 75600 + }, + { + "epoch": 0.6512568870523416, + "grad_norm": 2.296875, + "learning_rate": 0.00019193076744294737, + "loss": 4.5604, + "step": 75650 + }, + { + "epoch": 0.6516873278236914, + "grad_norm": 2.296875, + "learning_rate": 0.00019192010228035348, + "loss": 5.1918, + "step": 75700 + }, + { + "epoch": 0.6521177685950413, + "grad_norm": 2.734375, + "learning_rate": 0.00019190943037102187, + "loss": 4.7353, + "step": 75750 + }, + { + "epoch": 0.6525482093663911, + "grad_norm": 1.921875, + "learning_rate": 0.00019189875171573583, + "loss": 4.7499, + "step": 75800 + }, + { + "epoch": 0.652978650137741, + "grad_norm": 2.671875, + "learning_rate": 0.00019188806631527912, + "loss": 4.61, + "step": 75850 + }, + { + "epoch": 0.6534090909090909, + "grad_norm": 2.6875, + "learning_rate": 0.00019187737417043603, + "loss": 4.6194, + "step": 75900 + }, + { + "epoch": 0.6538395316804407, + "grad_norm": 2.125, + "learning_rate": 0.00019186667528199136, + "loss": 4.4737, + "step": 75950 + }, + { + "epoch": 0.6542699724517906, + "grad_norm": 1.0390625, + "learning_rate": 0.0001918559696507304, + "loss": 4.7515, + "step": 76000 + }, + { + "epoch": 0.6547004132231405, + "grad_norm": 3.203125, + "learning_rate": 0.00019184525727743889, + "loss": 4.6163, + "step": 76050 + }, + { + "epoch": 0.6551308539944903, + "grad_norm": 4.375, + "learning_rate": 0.00019183453816290312, + "loss": 4.987, + "step": 76100 + }, + { + "epoch": 0.6555612947658402, + "grad_norm": 1.21875, + "learning_rate": 0.00019182381230790983, + "loss": 4.328, + "step": 76150 + }, + { + "epoch": 0.65599173553719, + "grad_norm": 2.453125, + "learning_rate": 0.00019181307971324625, + "loss": 4.7604, + "step": 76200 + }, + { + "epoch": 0.6564221763085399, + "grad_norm": 3.609375, + "learning_rate": 0.0001918023403797002, + "loss": 4.6351, + "step": 76250 + }, + { + "epoch": 0.6568526170798898, + "grad_norm": 2.640625, + "learning_rate": 0.00019179159430805987, + "loss": 4.8329, + "step": 76300 + }, + { + "epoch": 0.6572830578512396, + "grad_norm": 1.0234375, + "learning_rate": 0.00019178084149911403, + "loss": 4.6761, + "step": 76350 + }, + { + "epoch": 0.6577134986225895, + "grad_norm": 2.484375, + "learning_rate": 0.00019177008195365186, + "loss": 4.7454, + "step": 76400 + }, + { + "epoch": 0.6581439393939394, + "grad_norm": 2.078125, + "learning_rate": 0.00019175931567246314, + "loss": 4.5504, + "step": 76450 + }, + { + "epoch": 0.6585743801652892, + "grad_norm": 2.671875, + "learning_rate": 0.00019174854265633807, + "loss": 4.7031, + "step": 76500 + }, + { + "epoch": 0.6590048209366391, + "grad_norm": 2.765625, + "learning_rate": 0.00019173776290606738, + "loss": 4.6054, + "step": 76550 + }, + { + "epoch": 0.6594352617079889, + "grad_norm": 2.265625, + "learning_rate": 0.00019172697642244225, + "loss": 4.5167, + "step": 76600 + }, + { + "epoch": 0.6598657024793388, + "grad_norm": 2.765625, + "learning_rate": 0.00019171618320625445, + "loss": 5.1212, + "step": 76650 + }, + { + "epoch": 0.6602961432506887, + "grad_norm": 2.3125, + "learning_rate": 0.0001917053832582961, + "loss": 4.7065, + "step": 76700 + }, + { + "epoch": 0.6607265840220385, + "grad_norm": 1.71875, + "learning_rate": 0.00019169457657935995, + "loss": 4.8365, + "step": 76750 + }, + { + "epoch": 0.6611570247933884, + "grad_norm": 2.609375, + "learning_rate": 0.00019168376317023916, + "loss": 4.9541, + "step": 76800 + }, + { + "epoch": 0.6615874655647382, + "grad_norm": 1.3984375, + "learning_rate": 0.0001916729430317274, + "loss": 4.766, + "step": 76850 + }, + { + "epoch": 0.6620179063360881, + "grad_norm": 1.25, + "learning_rate": 0.00019166211616461885, + "loss": 4.9038, + "step": 76900 + }, + { + "epoch": 0.662448347107438, + "grad_norm": 2.390625, + "learning_rate": 0.00019165128256970823, + "loss": 5.0386, + "step": 76950 + }, + { + "epoch": 0.6628787878787878, + "grad_norm": 1.046875, + "learning_rate": 0.00019164044224779064, + "loss": 4.7015, + "step": 77000 + }, + { + "epoch": 0.6633092286501377, + "grad_norm": 3.5625, + "learning_rate": 0.00019162959519966174, + "loss": 4.6228, + "step": 77050 + }, + { + "epoch": 0.6637396694214877, + "grad_norm": 2.71875, + "learning_rate": 0.00019161874142611772, + "loss": 4.5877, + "step": 77100 + }, + { + "epoch": 0.6641701101928374, + "grad_norm": 2.03125, + "learning_rate": 0.0001916078809279552, + "loss": 4.9524, + "step": 77150 + }, + { + "epoch": 0.6646005509641874, + "grad_norm": 1.4140625, + "learning_rate": 0.0001915970137059713, + "loss": 4.42, + "step": 77200 + }, + { + "epoch": 0.6650309917355371, + "grad_norm": 1.9921875, + "learning_rate": 0.00019158613976096373, + "loss": 4.789, + "step": 77250 + }, + { + "epoch": 0.665461432506887, + "grad_norm": 1.78125, + "learning_rate": 0.0001915752590937305, + "loss": 4.9101, + "step": 77300 + }, + { + "epoch": 0.665891873278237, + "grad_norm": 3.421875, + "learning_rate": 0.0001915643717050703, + "loss": 4.7548, + "step": 77350 + }, + { + "epoch": 0.6663223140495868, + "grad_norm": 2.828125, + "learning_rate": 0.00019155347759578222, + "loss": 4.7368, + "step": 77400 + }, + { + "epoch": 0.6667527548209367, + "grad_norm": 2.234375, + "learning_rate": 0.00019154257676666587, + "loss": 4.8501, + "step": 77450 + }, + { + "epoch": 0.6671831955922864, + "grad_norm": 2.78125, + "learning_rate": 0.00019153166921852136, + "loss": 4.6698, + "step": 77500 + }, + { + "epoch": 0.6676136363636364, + "grad_norm": 3.4375, + "learning_rate": 0.0001915207549521492, + "loss": 4.8337, + "step": 77550 + }, + { + "epoch": 0.6680440771349863, + "grad_norm": 2.03125, + "learning_rate": 0.0001915098339683506, + "loss": 4.7476, + "step": 77600 + }, + { + "epoch": 0.668474517906336, + "grad_norm": 2.625, + "learning_rate": 0.00019149890626792707, + "loss": 4.4777, + "step": 77650 + }, + { + "epoch": 0.668904958677686, + "grad_norm": 1.7109375, + "learning_rate": 0.00019148797185168065, + "loss": 4.7891, + "step": 77700 + }, + { + "epoch": 0.6693353994490359, + "grad_norm": 1.3203125, + "learning_rate": 0.000191477030720414, + "loss": 4.6878, + "step": 77750 + }, + { + "epoch": 0.6697658402203857, + "grad_norm": 1.71875, + "learning_rate": 0.00019146608287493004, + "loss": 4.7881, + "step": 77800 + }, + { + "epoch": 0.6701962809917356, + "grad_norm": 1.328125, + "learning_rate": 0.00019145512831603243, + "loss": 4.6572, + "step": 77850 + }, + { + "epoch": 0.6706267217630854, + "grad_norm": 2.890625, + "learning_rate": 0.00019144416704452516, + "loss": 4.8722, + "step": 77900 + }, + { + "epoch": 0.6710571625344353, + "grad_norm": 1.4453125, + "learning_rate": 0.00019143319906121277, + "loss": 4.7615, + "step": 77950 + }, + { + "epoch": 0.6714876033057852, + "grad_norm": 1.59375, + "learning_rate": 0.00019142222436690034, + "loss": 4.8712, + "step": 78000 + }, + { + "epoch": 0.6714876033057852, + "eval_loss": 5.375321865081787, + "eval_runtime": 22.0388, + "eval_samples_per_second": 29.04, + "eval_steps_per_second": 14.52, + "eval_tts_loss": 6.785668295288904, + "step": 78000 + }, + { + "epoch": 0.671918044077135, + "grad_norm": 2.609375, + "learning_rate": 0.0001914112429623933, + "loss": 4.644, + "step": 78050 + }, + { + "epoch": 0.6723484848484849, + "grad_norm": 3.4375, + "learning_rate": 0.0001914002548484977, + "loss": 4.6583, + "step": 78100 + }, + { + "epoch": 0.6727789256198347, + "grad_norm": 1.1953125, + "learning_rate": 0.00019138926002602004, + "loss": 4.907, + "step": 78150 + }, + { + "epoch": 0.6732093663911846, + "grad_norm": 1.9609375, + "learning_rate": 0.00019137825849576735, + "loss": 4.6658, + "step": 78200 + }, + { + "epoch": 0.6736398071625345, + "grad_norm": 1.203125, + "learning_rate": 0.00019136725025854708, + "loss": 4.7018, + "step": 78250 + }, + { + "epoch": 0.6740702479338843, + "grad_norm": 2.328125, + "learning_rate": 0.00019135623531516723, + "loss": 4.685, + "step": 78300 + }, + { + "epoch": 0.6745006887052342, + "grad_norm": 2.65625, + "learning_rate": 0.00019134521366643626, + "loss": 4.8097, + "step": 78350 + }, + { + "epoch": 0.6749311294765841, + "grad_norm": 1.4609375, + "learning_rate": 0.00019133418531316315, + "loss": 4.8626, + "step": 78400 + }, + { + "epoch": 0.6753615702479339, + "grad_norm": 3.015625, + "learning_rate": 0.00019132315025615734, + "loss": 4.5555, + "step": 78450 + }, + { + "epoch": 0.6757920110192838, + "grad_norm": 6.40625, + "learning_rate": 0.00019131210849622878, + "loss": 4.5359, + "step": 78500 + }, + { + "epoch": 0.6762224517906336, + "grad_norm": 3.15625, + "learning_rate": 0.00019130106003418794, + "loss": 4.9026, + "step": 78550 + }, + { + "epoch": 0.6766528925619835, + "grad_norm": 2.515625, + "learning_rate": 0.00019129000487084572, + "loss": 4.3526, + "step": 78600 + }, + { + "epoch": 0.6770833333333334, + "grad_norm": 2.328125, + "learning_rate": 0.00019127894300701356, + "loss": 4.7109, + "step": 78650 + }, + { + "epoch": 0.6775137741046832, + "grad_norm": 2.984375, + "learning_rate": 0.00019126787444350338, + "loss": 4.8723, + "step": 78700 + }, + { + "epoch": 0.6779442148760331, + "grad_norm": 0.98828125, + "learning_rate": 0.00019125679918112755, + "loss": 4.4127, + "step": 78750 + }, + { + "epoch": 0.678374655647383, + "grad_norm": 2.375, + "learning_rate": 0.00019124571722069903, + "loss": 5.064, + "step": 78800 + }, + { + "epoch": 0.6788050964187328, + "grad_norm": 2.265625, + "learning_rate": 0.00019123462856303122, + "loss": 4.74, + "step": 78850 + }, + { + "epoch": 0.6792355371900827, + "grad_norm": 2.09375, + "learning_rate": 0.0001912235332089379, + "loss": 4.6177, + "step": 78900 + }, + { + "epoch": 0.6796659779614325, + "grad_norm": 2.15625, + "learning_rate": 0.00019121243115923357, + "loss": 4.7881, + "step": 78950 + }, + { + "epoch": 0.6800964187327824, + "grad_norm": 3.09375, + "learning_rate": 0.00019120132241473304, + "loss": 4.9935, + "step": 79000 + }, + { + "epoch": 0.6805268595041323, + "grad_norm": 1.5859375, + "learning_rate": 0.00019119020697625164, + "loss": 4.9343, + "step": 79050 + }, + { + "epoch": 0.6809573002754821, + "grad_norm": 2.90625, + "learning_rate": 0.00019117908484460528, + "loss": 4.6888, + "step": 79100 + }, + { + "epoch": 0.681387741046832, + "grad_norm": 1.8046875, + "learning_rate": 0.00019116795602061023, + "loss": 4.8264, + "step": 79150 + }, + { + "epoch": 0.6818181818181818, + "grad_norm": 2.890625, + "learning_rate": 0.0001911568205050834, + "loss": 4.7404, + "step": 79200 + }, + { + "epoch": 0.6822486225895317, + "grad_norm": 3.640625, + "learning_rate": 0.00019114567829884205, + "loss": 4.8228, + "step": 79250 + }, + { + "epoch": 0.6826790633608816, + "grad_norm": 2.1875, + "learning_rate": 0.00019113452940270402, + "loss": 4.9754, + "step": 79300 + }, + { + "epoch": 0.6831095041322314, + "grad_norm": 2.875, + "learning_rate": 0.00019112337381748762, + "loss": 4.9272, + "step": 79350 + }, + { + "epoch": 0.6835399449035813, + "grad_norm": 0.78515625, + "learning_rate": 0.00019111221154401165, + "loss": 4.8657, + "step": 79400 + }, + { + "epoch": 0.6839703856749312, + "grad_norm": 3.046875, + "learning_rate": 0.00019110104258309536, + "loss": 4.6926, + "step": 79450 + }, + { + "epoch": 0.684400826446281, + "grad_norm": 2.4375, + "learning_rate": 0.00019108986693555858, + "loss": 4.8111, + "step": 79500 + }, + { + "epoch": 0.6848312672176309, + "grad_norm": 2.015625, + "learning_rate": 0.00019107868460222153, + "loss": 4.7785, + "step": 79550 + }, + { + "epoch": 0.6852617079889807, + "grad_norm": 1.2265625, + "learning_rate": 0.000191067495583905, + "loss": 4.86, + "step": 79600 + }, + { + "epoch": 0.6856921487603306, + "grad_norm": 2.25, + "learning_rate": 0.0001910562998814302, + "loss": 4.6237, + "step": 79650 + }, + { + "epoch": 0.6861225895316805, + "grad_norm": 2.046875, + "learning_rate": 0.00019104509749561896, + "loss": 4.7586, + "step": 79700 + }, + { + "epoch": 0.6865530303030303, + "grad_norm": 3.328125, + "learning_rate": 0.00019103388842729342, + "loss": 5.1268, + "step": 79750 + }, + { + "epoch": 0.6869834710743802, + "grad_norm": 2.921875, + "learning_rate": 0.00019102267267727632, + "loss": 5.0966, + "step": 79800 + }, + { + "epoch": 0.68741391184573, + "grad_norm": 2.53125, + "learning_rate": 0.00019101145024639092, + "loss": 4.7019, + "step": 79850 + }, + { + "epoch": 0.6878443526170799, + "grad_norm": 3.25, + "learning_rate": 0.00019100022113546086, + "loss": 4.4957, + "step": 79900 + }, + { + "epoch": 0.6882747933884298, + "grad_norm": 2.46875, + "learning_rate": 0.00019098898534531034, + "loss": 5.061, + "step": 79950 + }, + { + "epoch": 0.6887052341597796, + "grad_norm": 2.46875, + "learning_rate": 0.00019097774287676414, + "loss": 4.7823, + "step": 80000 + }, + { + "epoch": 0.6891356749311295, + "grad_norm": 2.984375, + "learning_rate": 0.0001909664937306473, + "loss": 4.8135, + "step": 80050 + }, + { + "epoch": 0.6895661157024794, + "grad_norm": 2.03125, + "learning_rate": 0.00019095523790778554, + "loss": 4.794, + "step": 80100 + }, + { + "epoch": 0.6899965564738292, + "grad_norm": 2.03125, + "learning_rate": 0.00019094397540900504, + "loss": 4.8347, + "step": 80150 + }, + { + "epoch": 0.6904269972451791, + "grad_norm": 3.265625, + "learning_rate": 0.0001909327062351324, + "loss": 4.5087, + "step": 80200 + }, + { + "epoch": 0.6908574380165289, + "grad_norm": 1.453125, + "learning_rate": 0.00019092143038699474, + "loss": 4.6743, + "step": 80250 + }, + { + "epoch": 0.6912878787878788, + "grad_norm": 0.8828125, + "learning_rate": 0.00019091014786541975, + "loss": 4.8636, + "step": 80300 + }, + { + "epoch": 0.6917183195592287, + "grad_norm": 2.921875, + "learning_rate": 0.00019089885867123552, + "loss": 4.6687, + "step": 80350 + }, + { + "epoch": 0.6921487603305785, + "grad_norm": 2.0625, + "learning_rate": 0.0001908875628052706, + "loss": 4.7632, + "step": 80400 + }, + { + "epoch": 0.6925792011019284, + "grad_norm": 2.921875, + "learning_rate": 0.00019087626026835413, + "loss": 4.6997, + "step": 80450 + }, + { + "epoch": 0.6930096418732782, + "grad_norm": 3.4375, + "learning_rate": 0.00019086495106131567, + "loss": 4.7043, + "step": 80500 + }, + { + "epoch": 0.6934400826446281, + "grad_norm": 3.3125, + "learning_rate": 0.00019085363518498534, + "loss": 4.4696, + "step": 80550 + }, + { + "epoch": 0.693870523415978, + "grad_norm": 1.625, + "learning_rate": 0.00019084231264019364, + "loss": 5.0989, + "step": 80600 + }, + { + "epoch": 0.6943009641873278, + "grad_norm": 2.75, + "learning_rate": 0.00019083098342777163, + "loss": 4.6036, + "step": 80650 + }, + { + "epoch": 0.6947314049586777, + "grad_norm": 1.625, + "learning_rate": 0.00019081964754855092, + "loss": 4.8058, + "step": 80700 + }, + { + "epoch": 0.6951618457300276, + "grad_norm": 1.8828125, + "learning_rate": 0.0001908083050033634, + "loss": 4.4008, + "step": 80750 + }, + { + "epoch": 0.6955922865013774, + "grad_norm": 2.96875, + "learning_rate": 0.00019079695579304174, + "loss": 4.7096, + "step": 80800 + }, + { + "epoch": 0.6960227272727273, + "grad_norm": 1.2890625, + "learning_rate": 0.00019078559991841885, + "loss": 4.8174, + "step": 80850 + }, + { + "epoch": 0.6964531680440771, + "grad_norm": 2.71875, + "learning_rate": 0.00019077423738032826, + "loss": 4.9544, + "step": 80900 + }, + { + "epoch": 0.696883608815427, + "grad_norm": 3.515625, + "learning_rate": 0.00019076286817960395, + "loss": 4.8073, + "step": 80950 + }, + { + "epoch": 0.6973140495867769, + "grad_norm": 1.984375, + "learning_rate": 0.0001907514923170804, + "loss": 5.1697, + "step": 81000 + }, + { + "epoch": 0.6973140495867769, + "eval_loss": 5.368470191955566, + "eval_runtime": 21.8057, + "eval_samples_per_second": 29.35, + "eval_steps_per_second": 14.675, + "eval_tts_loss": 6.716216654162234, + "step": 81000 + }, + { + "epoch": 0.6977444903581267, + "grad_norm": 2.65625, + "learning_rate": 0.00019074010979359257, + "loss": 4.5545, + "step": 81050 + }, + { + "epoch": 0.6981749311294766, + "grad_norm": 1.203125, + "learning_rate": 0.00019072872060997594, + "loss": 4.9125, + "step": 81100 + }, + { + "epoch": 0.6986053719008265, + "grad_norm": 2.65625, + "learning_rate": 0.00019071732476706637, + "loss": 4.8031, + "step": 81150 + }, + { + "epoch": 0.6990358126721763, + "grad_norm": 4.78125, + "learning_rate": 0.00019070592226570038, + "loss": 4.9183, + "step": 81200 + }, + { + "epoch": 0.6994662534435262, + "grad_norm": 1.5703125, + "learning_rate": 0.00019069451310671487, + "loss": 4.7762, + "step": 81250 + }, + { + "epoch": 0.699896694214876, + "grad_norm": 2.375, + "learning_rate": 0.0001906830972909472, + "loss": 4.8935, + "step": 81300 + }, + { + "epoch": 0.7003271349862259, + "grad_norm": 2.390625, + "learning_rate": 0.00019067167481923533, + "loss": 4.9102, + "step": 81350 + }, + { + "epoch": 0.7007575757575758, + "grad_norm": 2.359375, + "learning_rate": 0.00019066024569241758, + "loss": 4.6657, + "step": 81400 + }, + { + "epoch": 0.7011880165289256, + "grad_norm": 2.359375, + "learning_rate": 0.00019064880991133287, + "loss": 4.589, + "step": 81450 + }, + { + "epoch": 0.7016184573002755, + "grad_norm": 2.984375, + "learning_rate": 0.00019063736747682055, + "loss": 4.8951, + "step": 81500 + }, + { + "epoch": 0.7020488980716253, + "grad_norm": 3.046875, + "learning_rate": 0.00019062591838972046, + "loss": 4.8286, + "step": 81550 + }, + { + "epoch": 0.7024793388429752, + "grad_norm": 2.078125, + "learning_rate": 0.00019061446265087296, + "loss": 5.2785, + "step": 81600 + }, + { + "epoch": 0.7029097796143251, + "grad_norm": 2.765625, + "learning_rate": 0.00019060300026111886, + "loss": 4.6883, + "step": 81650 + }, + { + "epoch": 0.7033402203856749, + "grad_norm": 2.796875, + "learning_rate": 0.00019059153122129947, + "loss": 4.4538, + "step": 81700 + }, + { + "epoch": 0.7037706611570248, + "grad_norm": 3.171875, + "learning_rate": 0.0001905800555322566, + "loss": 4.5534, + "step": 81750 + }, + { + "epoch": 0.7042011019283747, + "grad_norm": 2.515625, + "learning_rate": 0.00019056857319483256, + "loss": 4.9203, + "step": 81800 + }, + { + "epoch": 0.7046315426997245, + "grad_norm": 2.359375, + "learning_rate": 0.00019055708420987011, + "loss": 4.862, + "step": 81850 + }, + { + "epoch": 0.7050619834710744, + "grad_norm": 2.125, + "learning_rate": 0.0001905455885782125, + "loss": 4.6505, + "step": 81900 + }, + { + "epoch": 0.7054924242424242, + "grad_norm": 3.90625, + "learning_rate": 0.0001905340863007035, + "loss": 4.6438, + "step": 81950 + }, + { + "epoch": 0.7059228650137741, + "grad_norm": 2.796875, + "learning_rate": 0.00019052257737818738, + "loss": 4.8286, + "step": 82000 + }, + { + "epoch": 0.706353305785124, + "grad_norm": 2.71875, + "learning_rate": 0.00019051106181150883, + "loss": 4.4646, + "step": 82050 + }, + { + "epoch": 0.7067837465564738, + "grad_norm": 2.125, + "learning_rate": 0.00019049953960151308, + "loss": 4.5731, + "step": 82100 + }, + { + "epoch": 0.7072141873278237, + "grad_norm": 2.53125, + "learning_rate": 0.00019048801074904583, + "loss": 4.9183, + "step": 82150 + }, + { + "epoch": 0.7076446280991735, + "grad_norm": 0.8984375, + "learning_rate": 0.00019047647525495328, + "loss": 4.328, + "step": 82200 + }, + { + "epoch": 0.7080750688705234, + "grad_norm": 0.91015625, + "learning_rate": 0.00019046493312008212, + "loss": 4.5565, + "step": 82250 + }, + { + "epoch": 0.7085055096418733, + "grad_norm": 2.546875, + "learning_rate": 0.00019045338434527952, + "loss": 4.7026, + "step": 82300 + }, + { + "epoch": 0.7089359504132231, + "grad_norm": 2.53125, + "learning_rate": 0.00019044182893139313, + "loss": 4.4458, + "step": 82350 + }, + { + "epoch": 0.709366391184573, + "grad_norm": 2.578125, + "learning_rate": 0.00019043026687927105, + "loss": 4.9716, + "step": 82400 + }, + { + "epoch": 0.7097968319559229, + "grad_norm": 1.703125, + "learning_rate": 0.00019041869818976198, + "loss": 4.6185, + "step": 82450 + }, + { + "epoch": 0.7102272727272727, + "grad_norm": 2.59375, + "learning_rate": 0.000190407122863715, + "loss": 4.4424, + "step": 82500 + }, + { + "epoch": 0.7106577134986226, + "grad_norm": 2.71875, + "learning_rate": 0.0001903955409019797, + "loss": 4.8352, + "step": 82550 + }, + { + "epoch": 0.7110881542699724, + "grad_norm": 2.78125, + "learning_rate": 0.0001903839523054062, + "loss": 4.9915, + "step": 82600 + }, + { + "epoch": 0.7115185950413223, + "grad_norm": 1.2890625, + "learning_rate": 0.00019037235707484507, + "loss": 4.9429, + "step": 82650 + }, + { + "epoch": 0.7119490358126722, + "grad_norm": 3.25, + "learning_rate": 0.00019036075521114738, + "loss": 4.8566, + "step": 82700 + }, + { + "epoch": 0.712379476584022, + "grad_norm": 2.125, + "learning_rate": 0.00019034914671516465, + "loss": 5.0509, + "step": 82750 + }, + { + "epoch": 0.7128099173553719, + "grad_norm": 1.4765625, + "learning_rate": 0.00019033753158774898, + "loss": 4.3636, + "step": 82800 + }, + { + "epoch": 0.7132403581267218, + "grad_norm": 2.453125, + "learning_rate": 0.00019032590982975283, + "loss": 4.8871, + "step": 82850 + }, + { + "epoch": 0.7136707988980716, + "grad_norm": 2.4375, + "learning_rate": 0.00019031428144202926, + "loss": 4.775, + "step": 82900 + }, + { + "epoch": 0.7141012396694215, + "grad_norm": 3.515625, + "learning_rate": 0.0001903026464254317, + "loss": 4.8247, + "step": 82950 + }, + { + "epoch": 0.7145316804407713, + "grad_norm": 2.265625, + "learning_rate": 0.00019029100478081424, + "loss": 4.5673, + "step": 83000 + }, + { + "epoch": 0.7149621212121212, + "grad_norm": 2.328125, + "learning_rate": 0.00019027935650903124, + "loss": 4.3138, + "step": 83050 + }, + { + "epoch": 0.7153925619834711, + "grad_norm": 2.109375, + "learning_rate": 0.00019026770161093776, + "loss": 4.7773, + "step": 83100 + }, + { + "epoch": 0.7158230027548209, + "grad_norm": 2.1875, + "learning_rate": 0.00019025604008738917, + "loss": 4.9081, + "step": 83150 + }, + { + "epoch": 0.7162534435261708, + "grad_norm": 1.234375, + "learning_rate": 0.00019024437193924145, + "loss": 4.5025, + "step": 83200 + }, + { + "epoch": 0.7166838842975206, + "grad_norm": 2.34375, + "learning_rate": 0.00019023269716735098, + "loss": 4.8034, + "step": 83250 + }, + { + "epoch": 0.7171143250688705, + "grad_norm": 4.5, + "learning_rate": 0.0001902210157725747, + "loss": 5.0936, + "step": 83300 + }, + { + "epoch": 0.7175447658402204, + "grad_norm": 1.4296875, + "learning_rate": 0.00019020932775576995, + "loss": 4.7048, + "step": 83350 + }, + { + "epoch": 0.7179752066115702, + "grad_norm": 1.765625, + "learning_rate": 0.00019019763311779465, + "loss": 4.576, + "step": 83400 + }, + { + "epoch": 0.7184056473829201, + "grad_norm": 1.25, + "learning_rate": 0.00019018593185950712, + "loss": 5.0109, + "step": 83450 + }, + { + "epoch": 0.71883608815427, + "grad_norm": 2.46875, + "learning_rate": 0.00019017422398176625, + "loss": 4.8809, + "step": 83500 + }, + { + "epoch": 0.7192665289256198, + "grad_norm": 2.5625, + "learning_rate": 0.00019016250948543134, + "loss": 4.7406, + "step": 83550 + }, + { + "epoch": 0.7196969696969697, + "grad_norm": 2.359375, + "learning_rate": 0.00019015078837136227, + "loss": 4.3789, + "step": 83600 + }, + { + "epoch": 0.7201274104683195, + "grad_norm": 2.15625, + "learning_rate": 0.00019013906064041923, + "loss": 4.5741, + "step": 83650 + }, + { + "epoch": 0.7205578512396694, + "grad_norm": 2.25, + "learning_rate": 0.00019012732629346312, + "loss": 5.1825, + "step": 83700 + }, + { + "epoch": 0.7209882920110193, + "grad_norm": 2.046875, + "learning_rate": 0.0001901155853313552, + "loss": 4.5367, + "step": 83750 + }, + { + "epoch": 0.7214187327823691, + "grad_norm": 3.84375, + "learning_rate": 0.00019010383775495715, + "loss": 4.3739, + "step": 83800 + }, + { + "epoch": 0.721849173553719, + "grad_norm": 3.59375, + "learning_rate": 0.00019009208356513132, + "loss": 4.621, + "step": 83850 + }, + { + "epoch": 0.7222796143250688, + "grad_norm": 1.4453125, + "learning_rate": 0.00019008032276274036, + "loss": 4.3862, + "step": 83900 + }, + { + "epoch": 0.7227100550964187, + "grad_norm": 3.28125, + "learning_rate": 0.00019006855534864752, + "loss": 4.5218, + "step": 83950 + }, + { + "epoch": 0.7231404958677686, + "grad_norm": 2.046875, + "learning_rate": 0.00019005678132371653, + "loss": 5.0559, + "step": 84000 + }, + { + "epoch": 0.7231404958677686, + "eval_loss": 5.360166549682617, + "eval_runtime": 22.1767, + "eval_samples_per_second": 28.859, + "eval_steps_per_second": 14.43, + "eval_tts_loss": 6.821026823562931, + "step": 84000 + }, + { + "epoch": 0.7235709366391184, + "grad_norm": 1.8515625, + "learning_rate": 0.00019004500068881152, + "loss": 4.8812, + "step": 84050 + }, + { + "epoch": 0.7240013774104683, + "grad_norm": 3.140625, + "learning_rate": 0.00019003321344479723, + "loss": 4.5547, + "step": 84100 + }, + { + "epoch": 0.7244318181818182, + "grad_norm": 3.171875, + "learning_rate": 0.00019002141959253876, + "loss": 4.4966, + "step": 84150 + }, + { + "epoch": 0.724862258953168, + "grad_norm": 1.953125, + "learning_rate": 0.0001900096191329018, + "loss": 4.7886, + "step": 84200 + }, + { + "epoch": 0.7252926997245179, + "grad_norm": 3.546875, + "learning_rate": 0.00018999781206675246, + "loss": 4.5249, + "step": 84250 + }, + { + "epoch": 0.7257231404958677, + "grad_norm": 2.546875, + "learning_rate": 0.00018998599839495733, + "loss": 4.7126, + "step": 84300 + }, + { + "epoch": 0.7261535812672176, + "grad_norm": 2.734375, + "learning_rate": 0.00018997417811838352, + "loss": 4.8049, + "step": 84350 + }, + { + "epoch": 0.7265840220385675, + "grad_norm": 3.0625, + "learning_rate": 0.00018996235123789864, + "loss": 4.6832, + "step": 84400 + }, + { + "epoch": 0.7270144628099173, + "grad_norm": 3.28125, + "learning_rate": 0.00018995051775437074, + "loss": 4.3974, + "step": 84450 + }, + { + "epoch": 0.7274449035812672, + "grad_norm": 2.1875, + "learning_rate": 0.00018993867766866834, + "loss": 4.8823, + "step": 84500 + }, + { + "epoch": 0.727875344352617, + "grad_norm": 2.703125, + "learning_rate": 0.00018992683098166055, + "loss": 4.6495, + "step": 84550 + }, + { + "epoch": 0.7283057851239669, + "grad_norm": 2.046875, + "learning_rate": 0.00018991497769421683, + "loss": 4.7448, + "step": 84600 + }, + { + "epoch": 0.7287362258953168, + "grad_norm": 3.28125, + "learning_rate": 0.0001899031178072072, + "loss": 4.7228, + "step": 84650 + }, + { + "epoch": 0.7291666666666666, + "grad_norm": 2.25, + "learning_rate": 0.00018989125132150213, + "loss": 4.7001, + "step": 84700 + }, + { + "epoch": 0.7295971074380165, + "grad_norm": 2.75, + "learning_rate": 0.00018987937823797266, + "loss": 5.123, + "step": 84750 + }, + { + "epoch": 0.7300275482093664, + "grad_norm": 2.875, + "learning_rate": 0.0001898674985574902, + "loss": 4.9325, + "step": 84800 + }, + { + "epoch": 0.7304579889807162, + "grad_norm": 2.453125, + "learning_rate": 0.0001898556122809267, + "loss": 5.0725, + "step": 84850 + }, + { + "epoch": 0.7308884297520661, + "grad_norm": 2.5, + "learning_rate": 0.00018984371940915457, + "loss": 4.6041, + "step": 84900 + }, + { + "epoch": 0.7313188705234159, + "grad_norm": 3.25, + "learning_rate": 0.00018983181994304676, + "loss": 4.5216, + "step": 84950 + }, + { + "epoch": 0.7317493112947658, + "grad_norm": 2.671875, + "learning_rate": 0.00018981991388347663, + "loss": 4.3788, + "step": 85000 + }, + { + "epoch": 0.7321797520661157, + "grad_norm": 1.6796875, + "learning_rate": 0.00018980800123131808, + "loss": 4.8238, + "step": 85050 + }, + { + "epoch": 0.7326101928374655, + "grad_norm": 2.203125, + "learning_rate": 0.00018979608198744545, + "loss": 5.1704, + "step": 85100 + }, + { + "epoch": 0.7330406336088154, + "grad_norm": 2.671875, + "learning_rate": 0.00018978415615273364, + "loss": 4.882, + "step": 85150 + }, + { + "epoch": 0.7334710743801653, + "grad_norm": 2.171875, + "learning_rate": 0.00018977222372805792, + "loss": 4.5257, + "step": 85200 + }, + { + "epoch": 0.7339015151515151, + "grad_norm": 3.171875, + "learning_rate": 0.00018976028471429413, + "loss": 4.7512, + "step": 85250 + }, + { + "epoch": 0.734331955922865, + "grad_norm": 2.59375, + "learning_rate": 0.00018974833911231858, + "loss": 4.7346, + "step": 85300 + }, + { + "epoch": 0.7347623966942148, + "grad_norm": 2.9375, + "learning_rate": 0.00018973638692300798, + "loss": 4.6258, + "step": 85350 + }, + { + "epoch": 0.7351928374655647, + "grad_norm": 1.53125, + "learning_rate": 0.00018972442814723972, + "loss": 4.9166, + "step": 85400 + }, + { + "epoch": 0.7356232782369146, + "grad_norm": 4.09375, + "learning_rate": 0.00018971246278589145, + "loss": 4.5802, + "step": 85450 + }, + { + "epoch": 0.7360537190082644, + "grad_norm": 2.90625, + "learning_rate": 0.00018970049083984146, + "loss": 4.6552, + "step": 85500 + }, + { + "epoch": 0.7364841597796143, + "grad_norm": 2.515625, + "learning_rate": 0.0001896885123099684, + "loss": 4.9737, + "step": 85550 + }, + { + "epoch": 0.7369146005509641, + "grad_norm": 2.328125, + "learning_rate": 0.00018967652719715155, + "loss": 4.7488, + "step": 85600 + }, + { + "epoch": 0.737345041322314, + "grad_norm": 2.9375, + "learning_rate": 0.0001896645355022705, + "loss": 5.1506, + "step": 85650 + }, + { + "epoch": 0.737775482093664, + "grad_norm": 3.015625, + "learning_rate": 0.0001896525372262055, + "loss": 4.7965, + "step": 85700 + }, + { + "epoch": 0.7382059228650137, + "grad_norm": 1.359375, + "learning_rate": 0.00018964053236983713, + "loss": 4.5038, + "step": 85750 + }, + { + "epoch": 0.7386363636363636, + "grad_norm": 1.984375, + "learning_rate": 0.00018962852093404656, + "loss": 4.8172, + "step": 85800 + }, + { + "epoch": 0.7390668044077136, + "grad_norm": 2.015625, + "learning_rate": 0.00018961650291971543, + "loss": 4.5635, + "step": 85850 + }, + { + "epoch": 0.7394972451790633, + "grad_norm": 2.0625, + "learning_rate": 0.00018960447832772577, + "loss": 4.7879, + "step": 85900 + }, + { + "epoch": 0.7399276859504132, + "grad_norm": 4.15625, + "learning_rate": 0.00018959244715896015, + "loss": 4.6641, + "step": 85950 + }, + { + "epoch": 0.740358126721763, + "grad_norm": 2.90625, + "learning_rate": 0.00018958040941430176, + "loss": 4.4173, + "step": 86000 + }, + { + "epoch": 0.740788567493113, + "grad_norm": 1.5, + "learning_rate": 0.000189568365094634, + "loss": 4.8023, + "step": 86050 + }, + { + "epoch": 0.7412190082644629, + "grad_norm": 3.71875, + "learning_rate": 0.00018955631420084098, + "loss": 4.9974, + "step": 86100 + }, + { + "epoch": 0.7416494490358126, + "grad_norm": 3.0625, + "learning_rate": 0.00018954425673380713, + "loss": 4.6843, + "step": 86150 + }, + { + "epoch": 0.7420798898071626, + "grad_norm": 3.671875, + "learning_rate": 0.00018953219269441754, + "loss": 4.5927, + "step": 86200 + }, + { + "epoch": 0.7425103305785123, + "grad_norm": 3.140625, + "learning_rate": 0.00018952012208355763, + "loss": 5.0064, + "step": 86250 + }, + { + "epoch": 0.7429407713498623, + "grad_norm": 1.9609375, + "learning_rate": 0.00018950804490211335, + "loss": 4.7797, + "step": 86300 + }, + { + "epoch": 0.7433712121212122, + "grad_norm": 3.015625, + "learning_rate": 0.00018949596115097118, + "loss": 4.8961, + "step": 86350 + }, + { + "epoch": 0.743801652892562, + "grad_norm": 2.75, + "learning_rate": 0.00018948387083101803, + "loss": 4.9575, + "step": 86400 + }, + { + "epoch": 0.7442320936639119, + "grad_norm": 1.796875, + "learning_rate": 0.00018947177394314127, + "loss": 4.7509, + "step": 86450 + }, + { + "epoch": 0.7446625344352618, + "grad_norm": 1.2734375, + "learning_rate": 0.0001894596704882288, + "loss": 4.542, + "step": 86500 + }, + { + "epoch": 0.7450929752066116, + "grad_norm": 2.46875, + "learning_rate": 0.00018944756046716902, + "loss": 4.4015, + "step": 86550 + }, + { + "epoch": 0.7455234159779615, + "grad_norm": 2.171875, + "learning_rate": 0.0001894354438808507, + "loss": 4.4901, + "step": 86600 + }, + { + "epoch": 0.7459538567493113, + "grad_norm": 1.578125, + "learning_rate": 0.00018942332073016327, + "loss": 4.6675, + "step": 86650 + }, + { + "epoch": 0.7463842975206612, + "grad_norm": 3.765625, + "learning_rate": 0.00018941119101599646, + "loss": 4.9081, + "step": 86700 + }, + { + "epoch": 0.7468147382920111, + "grad_norm": 1.09375, + "learning_rate": 0.00018939905473924065, + "loss": 4.8476, + "step": 86750 + }, + { + "epoch": 0.7472451790633609, + "grad_norm": 1.5703125, + "learning_rate": 0.00018938691190078654, + "loss": 4.4907, + "step": 86800 + }, + { + "epoch": 0.7476756198347108, + "grad_norm": 1.109375, + "learning_rate": 0.00018937476250152543, + "loss": 3.9543, + "step": 86850 + }, + { + "epoch": 0.7481060606060606, + "grad_norm": 2.546875, + "learning_rate": 0.00018936260654234903, + "loss": 4.5492, + "step": 86900 + }, + { + "epoch": 0.7485365013774105, + "grad_norm": 4.625, + "learning_rate": 0.00018935044402414958, + "loss": 4.6187, + "step": 86950 + }, + { + "epoch": 0.7489669421487604, + "grad_norm": 3.671875, + "learning_rate": 0.00018933827494781982, + "loss": 4.553, + "step": 87000 + }, + { + "epoch": 0.7489669421487604, + "eval_loss": 5.351483345031738, + "eval_runtime": 22.7065, + "eval_samples_per_second": 28.186, + "eval_steps_per_second": 14.093, + "eval_tts_loss": 6.789242874380564, + "step": 87000 + }, + { + "epoch": 0.7493973829201102, + "grad_norm": 1.90625, + "learning_rate": 0.00018932609931425284, + "loss": 4.4619, + "step": 87050 + }, + { + "epoch": 0.7498278236914601, + "grad_norm": 2.90625, + "learning_rate": 0.00018931391712434238, + "loss": 4.9802, + "step": 87100 + }, + { + "epoch": 0.75025826446281, + "grad_norm": 3.453125, + "learning_rate": 0.0001893017283789826, + "loss": 4.4043, + "step": 87150 + }, + { + "epoch": 0.7506887052341598, + "grad_norm": 1.6953125, + "learning_rate": 0.00018928953307906804, + "loss": 4.5111, + "step": 87200 + }, + { + "epoch": 0.7511191460055097, + "grad_norm": 3.59375, + "learning_rate": 0.0001892773312254939, + "loss": 4.7247, + "step": 87250 + }, + { + "epoch": 0.7515495867768595, + "grad_norm": 2.4375, + "learning_rate": 0.0001892651228191557, + "loss": 4.7363, + "step": 87300 + }, + { + "epoch": 0.7519800275482094, + "grad_norm": 1.9296875, + "learning_rate": 0.0001892529078609496, + "loss": 4.6117, + "step": 87350 + }, + { + "epoch": 0.7524104683195593, + "grad_norm": 2.921875, + "learning_rate": 0.00018924068635177205, + "loss": 4.7823, + "step": 87400 + }, + { + "epoch": 0.7528409090909091, + "grad_norm": 2.765625, + "learning_rate": 0.00018922845829252017, + "loss": 4.9833, + "step": 87450 + }, + { + "epoch": 0.753271349862259, + "grad_norm": 2.890625, + "learning_rate": 0.00018921622368409138, + "loss": 4.5446, + "step": 87500 + }, + { + "epoch": 0.7537017906336089, + "grad_norm": 1.8984375, + "learning_rate": 0.00018920398252738374, + "loss": 4.7755, + "step": 87550 + }, + { + "epoch": 0.7541322314049587, + "grad_norm": 2.609375, + "learning_rate": 0.00018919173482329573, + "loss": 4.3477, + "step": 87600 + }, + { + "epoch": 0.7545626721763086, + "grad_norm": 1.3359375, + "learning_rate": 0.00018917948057272627, + "loss": 4.348, + "step": 87650 + }, + { + "epoch": 0.7549931129476584, + "grad_norm": 2.21875, + "learning_rate": 0.0001891672197765748, + "loss": 4.7291, + "step": 87700 + }, + { + "epoch": 0.7554235537190083, + "grad_norm": 2.0, + "learning_rate": 0.00018915495243574125, + "loss": 4.8584, + "step": 87750 + }, + { + "epoch": 0.7558539944903582, + "grad_norm": 1.40625, + "learning_rate": 0.00018914267855112604, + "loss": 4.7376, + "step": 87800 + }, + { + "epoch": 0.756284435261708, + "grad_norm": 2.078125, + "learning_rate": 0.00018913039812363, + "loss": 4.7964, + "step": 87850 + }, + { + "epoch": 0.7567148760330579, + "grad_norm": 1.921875, + "learning_rate": 0.0001891181111541545, + "loss": 4.977, + "step": 87900 + }, + { + "epoch": 0.7571453168044077, + "grad_norm": 0.66015625, + "learning_rate": 0.0001891058176436014, + "loss": 4.6962, + "step": 87950 + }, + { + "epoch": 0.7575757575757576, + "grad_norm": 1.796875, + "learning_rate": 0.000189093517592873, + "loss": 4.6742, + "step": 88000 + }, + { + "epoch": 0.7580061983471075, + "grad_norm": 3.078125, + "learning_rate": 0.0001890812110028721, + "loss": 4.6078, + "step": 88050 + }, + { + "epoch": 0.7584366391184573, + "grad_norm": 3.0, + "learning_rate": 0.00018906889787450196, + "loss": 5.1462, + "step": 88100 + }, + { + "epoch": 0.7588670798898072, + "grad_norm": 2.359375, + "learning_rate": 0.00018905657820866636, + "loss": 4.8582, + "step": 88150 + }, + { + "epoch": 0.7592975206611571, + "grad_norm": 2.546875, + "learning_rate": 0.00018904425200626953, + "loss": 4.7233, + "step": 88200 + }, + { + "epoch": 0.7597279614325069, + "grad_norm": 1.8515625, + "learning_rate": 0.0001890319192682162, + "loss": 4.5037, + "step": 88250 + }, + { + "epoch": 0.7601584022038568, + "grad_norm": 1.90625, + "learning_rate": 0.00018901957999541152, + "loss": 4.9128, + "step": 88300 + }, + { + "epoch": 0.7605888429752066, + "grad_norm": 2.484375, + "learning_rate": 0.00018900723418876125, + "loss": 5.3711, + "step": 88350 + }, + { + "epoch": 0.7610192837465565, + "grad_norm": 2.390625, + "learning_rate": 0.00018899488184917146, + "loss": 4.6903, + "step": 88400 + }, + { + "epoch": 0.7614497245179064, + "grad_norm": 2.171875, + "learning_rate": 0.00018898252297754883, + "loss": 5.1068, + "step": 88450 + }, + { + "epoch": 0.7618801652892562, + "grad_norm": 2.1875, + "learning_rate": 0.00018897015757480046, + "loss": 4.8245, + "step": 88500 + }, + { + "epoch": 0.7623106060606061, + "grad_norm": 4.125, + "learning_rate": 0.00018895778564183394, + "loss": 4.9601, + "step": 88550 + }, + { + "epoch": 0.7627410468319559, + "grad_norm": 1.6171875, + "learning_rate": 0.00018894540717955734, + "loss": 4.9908, + "step": 88600 + }, + { + "epoch": 0.7631714876033058, + "grad_norm": 1.5, + "learning_rate": 0.00018893302218887925, + "loss": 4.387, + "step": 88650 + }, + { + "epoch": 0.7636019283746557, + "grad_norm": 2.84375, + "learning_rate": 0.00018892063067070866, + "loss": 4.7879, + "step": 88700 + }, + { + "epoch": 0.7640323691460055, + "grad_norm": 1.4375, + "learning_rate": 0.00018890823262595512, + "loss": 4.1621, + "step": 88750 + }, + { + "epoch": 0.7644628099173554, + "grad_norm": 1.4375, + "learning_rate": 0.00018889582805552856, + "loss": 4.7262, + "step": 88800 + }, + { + "epoch": 0.7648932506887053, + "grad_norm": 2.875, + "learning_rate": 0.0001888834169603395, + "loss": 4.8107, + "step": 88850 + }, + { + "epoch": 0.7653236914600551, + "grad_norm": 2.140625, + "learning_rate": 0.0001888709993412989, + "loss": 4.5427, + "step": 88900 + }, + { + "epoch": 0.765754132231405, + "grad_norm": 3.09375, + "learning_rate": 0.0001888585751993181, + "loss": 4.7586, + "step": 88950 + }, + { + "epoch": 0.7661845730027548, + "grad_norm": 1.46875, + "learning_rate": 0.00018884614453530912, + "loss": 4.5743, + "step": 89000 + }, + { + "epoch": 0.7666150137741047, + "grad_norm": 2.46875, + "learning_rate": 0.0001888337073501843, + "loss": 4.961, + "step": 89050 + }, + { + "epoch": 0.7670454545454546, + "grad_norm": 3.0, + "learning_rate": 0.00018882126364485645, + "loss": 4.7848, + "step": 89100 + }, + { + "epoch": 0.7674758953168044, + "grad_norm": 3.109375, + "learning_rate": 0.00018880881342023896, + "loss": 4.4636, + "step": 89150 + }, + { + "epoch": 0.7679063360881543, + "grad_norm": 3.671875, + "learning_rate": 0.00018879635667724563, + "loss": 4.5567, + "step": 89200 + }, + { + "epoch": 0.7683367768595041, + "grad_norm": 3.375, + "learning_rate": 0.00018878389341679078, + "loss": 4.6852, + "step": 89250 + }, + { + "epoch": 0.768767217630854, + "grad_norm": 1.5, + "learning_rate": 0.0001887714236397892, + "loss": 4.824, + "step": 89300 + }, + { + "epoch": 0.7691976584022039, + "grad_norm": 1.4296875, + "learning_rate": 0.0001887589473471561, + "loss": 4.7838, + "step": 89350 + }, + { + "epoch": 0.7696280991735537, + "grad_norm": 1.859375, + "learning_rate": 0.00018874646453980725, + "loss": 4.5263, + "step": 89400 + }, + { + "epoch": 0.7700585399449036, + "grad_norm": 3.125, + "learning_rate": 0.00018873397521865882, + "loss": 4.9256, + "step": 89450 + }, + { + "epoch": 0.7704889807162535, + "grad_norm": 1.3359375, + "learning_rate": 0.00018872147938462752, + "loss": 4.6703, + "step": 89500 + }, + { + "epoch": 0.7709194214876033, + "grad_norm": 2.3125, + "learning_rate": 0.00018870897703863055, + "loss": 4.8892, + "step": 89550 + }, + { + "epoch": 0.7713498622589532, + "grad_norm": 1.859375, + "learning_rate": 0.00018869646818158554, + "loss": 5.0574, + "step": 89600 + }, + { + "epoch": 0.771780303030303, + "grad_norm": 2.78125, + "learning_rate": 0.00018868395281441055, + "loss": 5.0371, + "step": 89650 + }, + { + "epoch": 0.7722107438016529, + "grad_norm": 2.03125, + "learning_rate": 0.0001886714309380243, + "loss": 4.862, + "step": 89700 + }, + { + "epoch": 0.7726411845730028, + "grad_norm": 1.171875, + "learning_rate": 0.00018865890255334572, + "loss": 4.4832, + "step": 89750 + }, + { + "epoch": 0.7730716253443526, + "grad_norm": 1.3671875, + "learning_rate": 0.0001886463676612945, + "loss": 4.6223, + "step": 89800 + }, + { + "epoch": 0.7735020661157025, + "grad_norm": 1.4609375, + "learning_rate": 0.0001886338262627906, + "loss": 4.6485, + "step": 89850 + }, + { + "epoch": 0.7739325068870524, + "grad_norm": 2.71875, + "learning_rate": 0.00018862127835875457, + "loss": 4.7803, + "step": 89900 + }, + { + "epoch": 0.7743629476584022, + "grad_norm": 2.609375, + "learning_rate": 0.00018860872395010738, + "loss": 4.8075, + "step": 89950 + }, + { + "epoch": 0.7747933884297521, + "grad_norm": 2.609375, + "learning_rate": 0.00018859616303777048, + "loss": 4.6491, + "step": 90000 + }, + { + "epoch": 0.7747933884297521, + "eval_loss": 5.349773406982422, + "eval_runtime": 21.7904, + "eval_samples_per_second": 29.371, + "eval_steps_per_second": 14.685, + "eval_tts_loss": 6.816362142143376, + "step": 90000 + }, + { + "epoch": 0.7752238292011019, + "grad_norm": 3.125, + "learning_rate": 0.00018858359562266586, + "loss": 4.8141, + "step": 90050 + }, + { + "epoch": 0.7756542699724518, + "grad_norm": 1.125, + "learning_rate": 0.0001885710217057159, + "loss": 4.6386, + "step": 90100 + }, + { + "epoch": 0.7760847107438017, + "grad_norm": 3.21875, + "learning_rate": 0.00018855844128784353, + "loss": 4.6615, + "step": 90150 + }, + { + "epoch": 0.7765151515151515, + "grad_norm": 2.140625, + "learning_rate": 0.00018854585436997207, + "loss": 4.7206, + "step": 90200 + }, + { + "epoch": 0.7769455922865014, + "grad_norm": 2.46875, + "learning_rate": 0.00018853326095302544, + "loss": 4.9931, + "step": 90250 + }, + { + "epoch": 0.7773760330578512, + "grad_norm": 2.421875, + "learning_rate": 0.00018852066103792795, + "loss": 4.7669, + "step": 90300 + }, + { + "epoch": 0.7778064738292011, + "grad_norm": 1.7109375, + "learning_rate": 0.00018850805462560435, + "loss": 4.9046, + "step": 90350 + }, + { + "epoch": 0.778236914600551, + "grad_norm": 1.265625, + "learning_rate": 0.00018849544171698003, + "loss": 4.4504, + "step": 90400 + }, + { + "epoch": 0.7786673553719008, + "grad_norm": 1.4453125, + "learning_rate": 0.00018848282231298064, + "loss": 4.3823, + "step": 90450 + }, + { + "epoch": 0.7790977961432507, + "grad_norm": 2.640625, + "learning_rate": 0.00018847019641453247, + "loss": 4.8484, + "step": 90500 + }, + { + "epoch": 0.7795282369146006, + "grad_norm": 2.625, + "learning_rate": 0.00018845756402256227, + "loss": 4.7266, + "step": 90550 + }, + { + "epoch": 0.7799586776859504, + "grad_norm": 2.9375, + "learning_rate": 0.00018844492513799718, + "loss": 4.9104, + "step": 90600 + }, + { + "epoch": 0.7803891184573003, + "grad_norm": 1.9453125, + "learning_rate": 0.00018843227976176485, + "loss": 4.8199, + "step": 90650 + }, + { + "epoch": 0.7808195592286501, + "grad_norm": 2.140625, + "learning_rate": 0.00018841962789479348, + "loss": 4.5509, + "step": 90700 + }, + { + "epoch": 0.78125, + "grad_norm": 2.734375, + "learning_rate": 0.00018840696953801165, + "loss": 5.1658, + "step": 90750 + }, + { + "epoch": 0.7816804407713499, + "grad_norm": 2.21875, + "learning_rate": 0.00018839430469234846, + "loss": 4.6426, + "step": 90800 + }, + { + "epoch": 0.7821108815426997, + "grad_norm": 1.7109375, + "learning_rate": 0.0001883816333587335, + "loss": 4.529, + "step": 90850 + }, + { + "epoch": 0.7825413223140496, + "grad_norm": 2.59375, + "learning_rate": 0.00018836895553809685, + "loss": 4.9304, + "step": 90900 + }, + { + "epoch": 0.7829717630853994, + "grad_norm": 2.921875, + "learning_rate": 0.00018835627123136895, + "loss": 4.5608, + "step": 90950 + }, + { + "epoch": 0.7834022038567493, + "grad_norm": 3.15625, + "learning_rate": 0.00018834358043948087, + "loss": 4.2306, + "step": 91000 + }, + { + "epoch": 0.7838326446280992, + "grad_norm": 2.40625, + "learning_rate": 0.00018833088316336404, + "loss": 4.4561, + "step": 91050 + }, + { + "epoch": 0.784263085399449, + "grad_norm": 2.375, + "learning_rate": 0.0001883181794039504, + "loss": 4.9889, + "step": 91100 + }, + { + "epoch": 0.7846935261707989, + "grad_norm": 3.0, + "learning_rate": 0.00018830546916217246, + "loss": 5.0307, + "step": 91150 + }, + { + "epoch": 0.7851239669421488, + "grad_norm": 2.0, + "learning_rate": 0.00018829275243896308, + "loss": 4.514, + "step": 91200 + }, + { + "epoch": 0.7855544077134986, + "grad_norm": 2.375, + "learning_rate": 0.00018828002923525563, + "loss": 4.6886, + "step": 91250 + }, + { + "epoch": 0.7859848484848485, + "grad_norm": 2.75, + "learning_rate": 0.00018826729955198396, + "loss": 5.1398, + "step": 91300 + }, + { + "epoch": 0.7864152892561983, + "grad_norm": 1.7578125, + "learning_rate": 0.00018825456339008242, + "loss": 4.6828, + "step": 91350 + }, + { + "epoch": 0.7868457300275482, + "grad_norm": 4.0, + "learning_rate": 0.00018824182075048582, + "loss": 4.8537, + "step": 91400 + }, + { + "epoch": 0.7872761707988981, + "grad_norm": 3.046875, + "learning_rate": 0.00018822907163412947, + "loss": 4.3437, + "step": 91450 + }, + { + "epoch": 0.7877066115702479, + "grad_norm": 1.78125, + "learning_rate": 0.00018821631604194904, + "loss": 4.4522, + "step": 91500 + }, + { + "epoch": 0.7881370523415978, + "grad_norm": 2.53125, + "learning_rate": 0.00018820355397488082, + "loss": 4.9165, + "step": 91550 + }, + { + "epoch": 0.7885674931129476, + "grad_norm": 3.203125, + "learning_rate": 0.0001881907854338615, + "loss": 4.5211, + "step": 91600 + }, + { + "epoch": 0.7889979338842975, + "grad_norm": 2.453125, + "learning_rate": 0.00018817801041982833, + "loss": 4.9461, + "step": 91650 + }, + { + "epoch": 0.7894283746556474, + "grad_norm": 2.5625, + "learning_rate": 0.0001881652289337189, + "loss": 4.9684, + "step": 91700 + }, + { + "epoch": 0.7898588154269972, + "grad_norm": 2.21875, + "learning_rate": 0.00018815244097647132, + "loss": 4.8717, + "step": 91750 + }, + { + "epoch": 0.7902892561983471, + "grad_norm": 1.9375, + "learning_rate": 0.00018813964654902426, + "loss": 4.6948, + "step": 91800 + }, + { + "epoch": 0.790719696969697, + "grad_norm": 3.40625, + "learning_rate": 0.00018812684565231678, + "loss": 4.9747, + "step": 91850 + }, + { + "epoch": 0.7911501377410468, + "grad_norm": 2.40625, + "learning_rate": 0.00018811403828728843, + "loss": 4.1586, + "step": 91900 + }, + { + "epoch": 0.7915805785123967, + "grad_norm": 2.0625, + "learning_rate": 0.00018810122445487925, + "loss": 4.6596, + "step": 91950 + }, + { + "epoch": 0.7920110192837465, + "grad_norm": 2.296875, + "learning_rate": 0.00018808840415602976, + "loss": 4.6828, + "step": 92000 + }, + { + "epoch": 0.7924414600550964, + "grad_norm": 2.703125, + "learning_rate": 0.00018807557739168097, + "loss": 4.9194, + "step": 92050 + }, + { + "epoch": 0.7928719008264463, + "grad_norm": 1.5, + "learning_rate": 0.00018806274416277425, + "loss": 4.586, + "step": 92100 + }, + { + "epoch": 0.7933023415977961, + "grad_norm": 2.53125, + "learning_rate": 0.0001880499044702516, + "loss": 4.4376, + "step": 92150 + }, + { + "epoch": 0.793732782369146, + "grad_norm": 2.28125, + "learning_rate": 0.00018803705831505537, + "loss": 4.6227, + "step": 92200 + }, + { + "epoch": 0.7941632231404959, + "grad_norm": 1.875, + "learning_rate": 0.00018802420569812852, + "loss": 4.7545, + "step": 92250 + }, + { + "epoch": 0.7945936639118457, + "grad_norm": 1.7265625, + "learning_rate": 0.00018801134662041436, + "loss": 4.5774, + "step": 92300 + }, + { + "epoch": 0.7950241046831956, + "grad_norm": 3.5, + "learning_rate": 0.0001879984810828567, + "loss": 4.6669, + "step": 92350 + }, + { + "epoch": 0.7954545454545454, + "grad_norm": 2.015625, + "learning_rate": 0.00018798560908639983, + "loss": 4.9228, + "step": 92400 + }, + { + "epoch": 0.7958849862258953, + "grad_norm": 1.71875, + "learning_rate": 0.00018797273063198862, + "loss": 4.5457, + "step": 92450 + }, + { + "epoch": 0.7963154269972452, + "grad_norm": 1.7421875, + "learning_rate": 0.00018795984572056825, + "loss": 5.0458, + "step": 92500 + }, + { + "epoch": 0.796745867768595, + "grad_norm": 2.171875, + "learning_rate": 0.0001879469543530844, + "loss": 4.5707, + "step": 92550 + }, + { + "epoch": 0.7971763085399449, + "grad_norm": 4.25, + "learning_rate": 0.00018793405653048337, + "loss": 4.8589, + "step": 92600 + }, + { + "epoch": 0.7976067493112947, + "grad_norm": 3.328125, + "learning_rate": 0.00018792115225371177, + "loss": 4.4774, + "step": 92650 + }, + { + "epoch": 0.7980371900826446, + "grad_norm": 3.03125, + "learning_rate": 0.0001879082415237168, + "loss": 4.7471, + "step": 92700 + }, + { + "epoch": 0.7984676308539945, + "grad_norm": 1.984375, + "learning_rate": 0.00018789532434144602, + "loss": 4.5363, + "step": 92750 + }, + { + "epoch": 0.7988980716253443, + "grad_norm": 3.203125, + "learning_rate": 0.00018788240070784752, + "loss": 4.6892, + "step": 92800 + }, + { + "epoch": 0.7993285123966942, + "grad_norm": 4.5, + "learning_rate": 0.0001878694706238699, + "loss": 4.4629, + "step": 92850 + }, + { + "epoch": 0.7997589531680441, + "grad_norm": 2.53125, + "learning_rate": 0.0001878565340904622, + "loss": 3.75, + "step": 92900 + }, + { + "epoch": 0.8001893939393939, + "grad_norm": 2.0, + "learning_rate": 0.00018784359110857394, + "loss": 4.4475, + "step": 92950 + }, + { + "epoch": 0.8006198347107438, + "grad_norm": 2.328125, + "learning_rate": 0.0001878306416791551, + "loss": 4.9981, + "step": 93000 + }, + { + "epoch": 0.8006198347107438, + "eval_loss": 5.340872764587402, + "eval_runtime": 21.7484, + "eval_samples_per_second": 29.428, + "eval_steps_per_second": 14.714, + "eval_tts_loss": 6.779757897928123, + "step": 93000 + }, + { + "epoch": 0.8010502754820936, + "grad_norm": 2.0625, + "learning_rate": 0.00018781768580315608, + "loss": 4.7714, + "step": 93050 + }, + { + "epoch": 0.8014807162534435, + "grad_norm": 1.875, + "learning_rate": 0.0001878047234815279, + "loss": 4.4299, + "step": 93100 + }, + { + "epoch": 0.8019111570247934, + "grad_norm": 3.421875, + "learning_rate": 0.00018779175471522194, + "loss": 4.516, + "step": 93150 + }, + { + "epoch": 0.8023415977961432, + "grad_norm": 2.8125, + "learning_rate": 0.00018777877950519007, + "loss": 4.9042, + "step": 93200 + }, + { + "epoch": 0.8027720385674931, + "grad_norm": 2.6875, + "learning_rate": 0.00018776579785238467, + "loss": 4.8926, + "step": 93250 + }, + { + "epoch": 0.8032024793388429, + "grad_norm": 1.78125, + "learning_rate": 0.0001877528097577585, + "loss": 4.9889, + "step": 93300 + }, + { + "epoch": 0.8036329201101928, + "grad_norm": 2.3125, + "learning_rate": 0.0001877398152222649, + "loss": 4.6047, + "step": 93350 + }, + { + "epoch": 0.8040633608815427, + "grad_norm": 3.40625, + "learning_rate": 0.00018772681424685764, + "loss": 4.7777, + "step": 93400 + }, + { + "epoch": 0.8044938016528925, + "grad_norm": 0.671875, + "learning_rate": 0.00018771380683249098, + "loss": 4.6952, + "step": 93450 + }, + { + "epoch": 0.8049242424242424, + "grad_norm": 2.0, + "learning_rate": 0.00018770079298011962, + "loss": 4.8907, + "step": 93500 + }, + { + "epoch": 0.8053546831955923, + "grad_norm": 2.765625, + "learning_rate": 0.00018768777269069875, + "loss": 4.865, + "step": 93550 + }, + { + "epoch": 0.8057851239669421, + "grad_norm": 1.28125, + "learning_rate": 0.00018767474596518403, + "loss": 4.768, + "step": 93600 + }, + { + "epoch": 0.806215564738292, + "grad_norm": 2.484375, + "learning_rate": 0.00018766171280453156, + "loss": 4.7857, + "step": 93650 + }, + { + "epoch": 0.8066460055096418, + "grad_norm": 2.171875, + "learning_rate": 0.000187648673209698, + "loss": 5.0161, + "step": 93700 + }, + { + "epoch": 0.8070764462809917, + "grad_norm": 2.09375, + "learning_rate": 0.00018763562718164043, + "loss": 4.7486, + "step": 93750 + }, + { + "epoch": 0.8075068870523416, + "grad_norm": 1.6640625, + "learning_rate": 0.00018762257472131638, + "loss": 4.986, + "step": 93800 + }, + { + "epoch": 0.8079373278236914, + "grad_norm": 2.6875, + "learning_rate": 0.00018760951582968383, + "loss": 5.0439, + "step": 93850 + }, + { + "epoch": 0.8083677685950413, + "grad_norm": 1.203125, + "learning_rate": 0.00018759645050770135, + "loss": 4.2721, + "step": 93900 + }, + { + "epoch": 0.8087982093663911, + "grad_norm": 3.296875, + "learning_rate": 0.00018758337875632782, + "loss": 4.8746, + "step": 93950 + }, + { + "epoch": 0.809228650137741, + "grad_norm": 3.59375, + "learning_rate": 0.00018757030057652279, + "loss": 4.8101, + "step": 94000 + }, + { + "epoch": 0.8096590909090909, + "grad_norm": 4.09375, + "learning_rate": 0.00018755721596924607, + "loss": 4.863, + "step": 94050 + }, + { + "epoch": 0.8100895316804407, + "grad_norm": 3.1875, + "learning_rate": 0.0001875441249354581, + "loss": 4.6973, + "step": 94100 + }, + { + "epoch": 0.8105199724517906, + "grad_norm": 4.53125, + "learning_rate": 0.00018753102747611972, + "loss": 4.6561, + "step": 94150 + }, + { + "epoch": 0.8109504132231405, + "grad_norm": 1.8203125, + "learning_rate": 0.00018751792359219226, + "loss": 4.9213, + "step": 94200 + }, + { + "epoch": 0.8113808539944903, + "grad_norm": 3.078125, + "learning_rate": 0.00018750481328463748, + "loss": 4.6864, + "step": 94250 + }, + { + "epoch": 0.8118112947658402, + "grad_norm": 2.453125, + "learning_rate": 0.00018749169655441769, + "loss": 5.0092, + "step": 94300 + }, + { + "epoch": 0.81224173553719, + "grad_norm": 2.703125, + "learning_rate": 0.0001874785734024956, + "loss": 4.976, + "step": 94350 + }, + { + "epoch": 0.8126721763085399, + "grad_norm": 2.640625, + "learning_rate": 0.00018746544382983445, + "loss": 4.7885, + "step": 94400 + }, + { + "epoch": 0.8131026170798898, + "grad_norm": 3.578125, + "learning_rate": 0.00018745230783739794, + "loss": 4.9978, + "step": 94450 + }, + { + "epoch": 0.8135330578512396, + "grad_norm": 3.4375, + "learning_rate": 0.00018743916542615015, + "loss": 4.7951, + "step": 94500 + }, + { + "epoch": 0.8139634986225895, + "grad_norm": 2.203125, + "learning_rate": 0.00018742601659705577, + "loss": 4.7587, + "step": 94550 + }, + { + "epoch": 0.8143939393939394, + "grad_norm": 2.71875, + "learning_rate": 0.00018741286135107984, + "loss": 4.5513, + "step": 94600 + }, + { + "epoch": 0.8148243801652892, + "grad_norm": 2.28125, + "learning_rate": 0.00018739969968918803, + "loss": 4.7726, + "step": 94650 + }, + { + "epoch": 0.8152548209366391, + "grad_norm": 2.640625, + "learning_rate": 0.00018738653161234624, + "loss": 4.5777, + "step": 94700 + }, + { + "epoch": 0.8156852617079889, + "grad_norm": 1.609375, + "learning_rate": 0.00018737335712152109, + "loss": 4.7884, + "step": 94750 + }, + { + "epoch": 0.8161157024793388, + "grad_norm": 1.203125, + "learning_rate": 0.0001873601762176795, + "loss": 5.1044, + "step": 94800 + }, + { + "epoch": 0.8165461432506887, + "grad_norm": 2.578125, + "learning_rate": 0.00018734698890178893, + "loss": 5.3587, + "step": 94850 + }, + { + "epoch": 0.8169765840220385, + "grad_norm": 2.328125, + "learning_rate": 0.0001873337951748173, + "loss": 4.839, + "step": 94900 + }, + { + "epoch": 0.8174070247933884, + "grad_norm": 1.296875, + "learning_rate": 0.00018732059503773302, + "loss": 4.2681, + "step": 94950 + }, + { + "epoch": 0.8178374655647382, + "grad_norm": 4.34375, + "learning_rate": 0.00018730738849150494, + "loss": 5.0213, + "step": 95000 + }, + { + "epoch": 0.8182679063360881, + "grad_norm": 2.78125, + "learning_rate": 0.0001872941755371024, + "loss": 4.5184, + "step": 95050 + }, + { + "epoch": 0.818698347107438, + "grad_norm": 1.8046875, + "learning_rate": 0.00018728095617549518, + "loss": 4.903, + "step": 95100 + }, + { + "epoch": 0.8191287878787878, + "grad_norm": 1.203125, + "learning_rate": 0.00018726773040765354, + "loss": 4.7532, + "step": 95150 + }, + { + "epoch": 0.8195592286501377, + "grad_norm": 2.390625, + "learning_rate": 0.00018725449823454827, + "loss": 4.7824, + "step": 95200 + }, + { + "epoch": 0.8199896694214877, + "grad_norm": 1.4765625, + "learning_rate": 0.0001872412596571506, + "loss": 4.8679, + "step": 95250 + }, + { + "epoch": 0.8204201101928374, + "grad_norm": 2.46875, + "learning_rate": 0.00018722801467643214, + "loss": 5.0606, + "step": 95300 + }, + { + "epoch": 0.8208505509641874, + "grad_norm": 2.46875, + "learning_rate": 0.0001872147632933651, + "loss": 4.7142, + "step": 95350 + }, + { + "epoch": 0.8212809917355371, + "grad_norm": 2.765625, + "learning_rate": 0.00018720150550892206, + "loss": 5.1643, + "step": 95400 + }, + { + "epoch": 0.821711432506887, + "grad_norm": 2.28125, + "learning_rate": 0.00018718824132407612, + "loss": 4.6024, + "step": 95450 + }, + { + "epoch": 0.822141873278237, + "grad_norm": 3.546875, + "learning_rate": 0.0001871749707398009, + "loss": 4.7573, + "step": 95500 + }, + { + "epoch": 0.8225723140495868, + "grad_norm": 1.4921875, + "learning_rate": 0.00018716169375707038, + "loss": 4.4401, + "step": 95550 + }, + { + "epoch": 0.8230027548209367, + "grad_norm": 2.65625, + "learning_rate": 0.00018714841037685903, + "loss": 4.3197, + "step": 95600 + }, + { + "epoch": 0.8234331955922864, + "grad_norm": 3.203125, + "learning_rate": 0.0001871351206001419, + "loss": 4.851, + "step": 95650 + }, + { + "epoch": 0.8238636363636364, + "grad_norm": 2.890625, + "learning_rate": 0.0001871218244278944, + "loss": 4.9826, + "step": 95700 + }, + { + "epoch": 0.8242940771349863, + "grad_norm": 2.8125, + "learning_rate": 0.0001871085218610924, + "loss": 5.0037, + "step": 95750 + }, + { + "epoch": 0.824724517906336, + "grad_norm": 1.625, + "learning_rate": 0.00018709521290071236, + "loss": 4.2328, + "step": 95800 + }, + { + "epoch": 0.825154958677686, + "grad_norm": 1.9453125, + "learning_rate": 0.00018708189754773104, + "loss": 4.2897, + "step": 95850 + }, + { + "epoch": 0.8255853994490359, + "grad_norm": 2.0, + "learning_rate": 0.00018706857580312583, + "loss": 4.7004, + "step": 95900 + }, + { + "epoch": 0.8260158402203857, + "grad_norm": 2.359375, + "learning_rate": 0.00018705524766787448, + "loss": 4.8402, + "step": 95950 + }, + { + "epoch": 0.8264462809917356, + "grad_norm": 2.296875, + "learning_rate": 0.00018704191314295525, + "loss": 5.0452, + "step": 96000 + }, + { + "epoch": 0.8264462809917356, + "eval_loss": 5.334881782531738, + "eval_runtime": 22.0398, + "eval_samples_per_second": 29.038, + "eval_steps_per_second": 14.519, + "eval_tts_loss": 6.7898586608838105, + "step": 96000 + }, + { + "epoch": 0.8268767217630854, + "grad_norm": 2.09375, + "learning_rate": 0.00018702857222934686, + "loss": 4.9609, + "step": 96050 + }, + { + "epoch": 0.8273071625344353, + "grad_norm": 1.140625, + "learning_rate": 0.00018701522492802855, + "loss": 4.6768, + "step": 96100 + }, + { + "epoch": 0.8277376033057852, + "grad_norm": 2.1875, + "learning_rate": 0.0001870018712399799, + "loss": 4.9732, + "step": 96150 + }, + { + "epoch": 0.828168044077135, + "grad_norm": 1.8359375, + "learning_rate": 0.00018698851116618115, + "loss": 5.2723, + "step": 96200 + }, + { + "epoch": 0.8285984848484849, + "grad_norm": 1.5546875, + "learning_rate": 0.0001869751447076128, + "loss": 4.7225, + "step": 96250 + }, + { + "epoch": 0.8290289256198347, + "grad_norm": 2.28125, + "learning_rate": 0.00018696177186525596, + "loss": 4.5981, + "step": 96300 + }, + { + "epoch": 0.8294593663911846, + "grad_norm": 1.765625, + "learning_rate": 0.00018694839264009222, + "loss": 4.6406, + "step": 96350 + }, + { + "epoch": 0.8298898071625345, + "grad_norm": 3.34375, + "learning_rate": 0.00018693500703310348, + "loss": 4.1615, + "step": 96400 + }, + { + "epoch": 0.8303202479338843, + "grad_norm": 1.5703125, + "learning_rate": 0.00018692161504527225, + "loss": 4.7821, + "step": 96450 + }, + { + "epoch": 0.8307506887052342, + "grad_norm": 2.515625, + "learning_rate": 0.00018690821667758154, + "loss": 4.5311, + "step": 96500 + }, + { + "epoch": 0.8311811294765841, + "grad_norm": 0.8984375, + "learning_rate": 0.0001868948119310147, + "loss": 4.3184, + "step": 96550 + }, + { + "epoch": 0.8316115702479339, + "grad_norm": 1.0703125, + "learning_rate": 0.00018688140080655563, + "loss": 4.4898, + "step": 96600 + }, + { + "epoch": 0.8320420110192838, + "grad_norm": 1.40625, + "learning_rate": 0.00018686798330518864, + "loss": 4.9426, + "step": 96650 + }, + { + "epoch": 0.8324724517906336, + "grad_norm": 1.9375, + "learning_rate": 0.00018685455942789861, + "loss": 4.8363, + "step": 96700 + }, + { + "epoch": 0.8329028925619835, + "grad_norm": 2.78125, + "learning_rate": 0.0001868411291756708, + "loss": 4.52, + "step": 96750 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 3.046875, + "learning_rate": 0.0001868276925494909, + "loss": 4.7537, + "step": 96800 + }, + { + "epoch": 0.8337637741046832, + "grad_norm": 2.421875, + "learning_rate": 0.00018681424955034522, + "loss": 4.7974, + "step": 96850 + }, + { + "epoch": 0.8341942148760331, + "grad_norm": 2.09375, + "learning_rate": 0.00018680080017922042, + "loss": 4.9911, + "step": 96900 + }, + { + "epoch": 0.834624655647383, + "grad_norm": 2.109375, + "learning_rate": 0.00018678734443710362, + "loss": 4.3628, + "step": 96950 + }, + { + "epoch": 0.8350550964187328, + "grad_norm": 0.6328125, + "learning_rate": 0.00018677388232498247, + "loss": 4.537, + "step": 97000 + }, + { + "epoch": 0.8354855371900827, + "grad_norm": 2.53125, + "learning_rate": 0.00018676041384384505, + "loss": 4.675, + "step": 97050 + }, + { + "epoch": 0.8359159779614325, + "grad_norm": 3.0625, + "learning_rate": 0.00018674693899467995, + "loss": 4.6626, + "step": 97100 + }, + { + "epoch": 0.8363464187327824, + "grad_norm": 2.0625, + "learning_rate": 0.00018673345777847614, + "loss": 4.7018, + "step": 97150 + }, + { + "epoch": 0.8367768595041323, + "grad_norm": 3.203125, + "learning_rate": 0.00018671997019622315, + "loss": 5.0525, + "step": 97200 + }, + { + "epoch": 0.8372073002754821, + "grad_norm": 2.09375, + "learning_rate": 0.00018670647624891094, + "loss": 4.7487, + "step": 97250 + }, + { + "epoch": 0.837637741046832, + "grad_norm": 1.703125, + "learning_rate": 0.0001866929759375299, + "loss": 4.6557, + "step": 97300 + }, + { + "epoch": 0.8380681818181818, + "grad_norm": 2.28125, + "learning_rate": 0.00018667946926307098, + "loss": 4.5398, + "step": 97350 + }, + { + "epoch": 0.8384986225895317, + "grad_norm": 3.09375, + "learning_rate": 0.0001866659562265255, + "loss": 4.7489, + "step": 97400 + }, + { + "epoch": 0.8389290633608816, + "grad_norm": 2.359375, + "learning_rate": 0.00018665243682888528, + "loss": 4.9353, + "step": 97450 + }, + { + "epoch": 0.8393595041322314, + "grad_norm": 2.28125, + "learning_rate": 0.00018663891107114267, + "loss": 4.6758, + "step": 97500 + }, + { + "epoch": 0.8397899449035813, + "grad_norm": 2.5, + "learning_rate": 0.0001866253789542904, + "loss": 4.5842, + "step": 97550 + }, + { + "epoch": 0.8402203856749312, + "grad_norm": 1.4921875, + "learning_rate": 0.00018661184047932166, + "loss": 4.4563, + "step": 97600 + }, + { + "epoch": 0.840650826446281, + "grad_norm": 1.703125, + "learning_rate": 0.0001865982956472302, + "loss": 4.583, + "step": 97650 + }, + { + "epoch": 0.8410812672176309, + "grad_norm": 3.109375, + "learning_rate": 0.00018658474445901015, + "loss": 4.5257, + "step": 97700 + }, + { + "epoch": 0.8415117079889807, + "grad_norm": 2.84375, + "learning_rate": 0.00018657118691565617, + "loss": 4.6481, + "step": 97750 + }, + { + "epoch": 0.8419421487603306, + "grad_norm": 1.7265625, + "learning_rate": 0.0001865576230181633, + "loss": 4.539, + "step": 97800 + }, + { + "epoch": 0.8423725895316805, + "grad_norm": 2.28125, + "learning_rate": 0.00018654405276752716, + "loss": 4.7829, + "step": 97850 + }, + { + "epoch": 0.8428030303030303, + "grad_norm": 1.625, + "learning_rate": 0.00018653047616474372, + "loss": 4.5292, + "step": 97900 + }, + { + "epoch": 0.8432334710743802, + "grad_norm": 1.4453125, + "learning_rate": 0.00018651689321080953, + "loss": 4.7529, + "step": 97950 + }, + { + "epoch": 0.84366391184573, + "grad_norm": 1.7890625, + "learning_rate": 0.00018650330390672154, + "loss": 4.9715, + "step": 98000 + }, + { + "epoch": 0.8440943526170799, + "grad_norm": 2.296875, + "learning_rate": 0.00018648970825347713, + "loss": 4.2531, + "step": 98050 + }, + { + "epoch": 0.8445247933884298, + "grad_norm": 2.90625, + "learning_rate": 0.00018647610625207423, + "loss": 5.0176, + "step": 98100 + }, + { + "epoch": 0.8449552341597796, + "grad_norm": 1.4453125, + "learning_rate": 0.00018646249790351118, + "loss": 4.4414, + "step": 98150 + }, + { + "epoch": 0.8453856749311295, + "grad_norm": 2.375, + "learning_rate": 0.00018644888320878684, + "loss": 4.302, + "step": 98200 + }, + { + "epoch": 0.8458161157024794, + "grad_norm": 2.890625, + "learning_rate": 0.00018643526216890045, + "loss": 4.5189, + "step": 98250 + }, + { + "epoch": 0.8462465564738292, + "grad_norm": 2.6875, + "learning_rate": 0.0001864216347848518, + "loss": 4.624, + "step": 98300 + }, + { + "epoch": 0.8466769972451791, + "grad_norm": 3.703125, + "learning_rate": 0.00018640800105764108, + "loss": 4.6469, + "step": 98350 + }, + { + "epoch": 0.8471074380165289, + "grad_norm": 3.625, + "learning_rate": 0.000186394360988269, + "loss": 4.6857, + "step": 98400 + }, + { + "epoch": 0.8475378787878788, + "grad_norm": 2.625, + "learning_rate": 0.0001863807145777367, + "loss": 4.8966, + "step": 98450 + }, + { + "epoch": 0.8479683195592287, + "grad_norm": 2.53125, + "learning_rate": 0.00018636706182704584, + "loss": 4.5156, + "step": 98500 + }, + { + "epoch": 0.8483987603305785, + "grad_norm": 1.6796875, + "learning_rate": 0.00018635340273719843, + "loss": 4.7085, + "step": 98550 + }, + { + "epoch": 0.8488292011019284, + "grad_norm": 4.40625, + "learning_rate": 0.00018633973730919706, + "loss": 4.7127, + "step": 98600 + }, + { + "epoch": 0.8492596418732782, + "grad_norm": 2.625, + "learning_rate": 0.00018632606554404475, + "loss": 4.4613, + "step": 98650 + }, + { + "epoch": 0.8496900826446281, + "grad_norm": 2.15625, + "learning_rate": 0.00018631238744274496, + "loss": 5.0779, + "step": 98700 + }, + { + "epoch": 0.850120523415978, + "grad_norm": 3.53125, + "learning_rate": 0.0001862987030063016, + "loss": 4.7219, + "step": 98750 + }, + { + "epoch": 0.8505509641873278, + "grad_norm": 1.859375, + "learning_rate": 0.00018628501223571917, + "loss": 4.5248, + "step": 98800 + }, + { + "epoch": 0.8509814049586777, + "grad_norm": 2.328125, + "learning_rate": 0.00018627131513200245, + "loss": 4.5576, + "step": 98850 + }, + { + "epoch": 0.8514118457300276, + "grad_norm": 2.609375, + "learning_rate": 0.00018625761169615684, + "loss": 4.5564, + "step": 98900 + }, + { + "epoch": 0.8518422865013774, + "grad_norm": 2.015625, + "learning_rate": 0.00018624390192918813, + "loss": 4.5104, + "step": 98950 + }, + { + "epoch": 0.8522727272727273, + "grad_norm": 1.3515625, + "learning_rate": 0.00018623018583210254, + "loss": 5.0488, + "step": 99000 + }, + { + "epoch": 0.8522727272727273, + "eval_loss": 5.325535774230957, + "eval_runtime": 21.8544, + "eval_samples_per_second": 29.285, + "eval_steps_per_second": 14.642, + "eval_tts_loss": 6.811281706750041, + "step": 99000 + }, + { + "epoch": 0.8527031680440771, + "grad_norm": 2.390625, + "learning_rate": 0.00018621646340590684, + "loss": 5.008, + "step": 99050 + }, + { + "epoch": 0.853133608815427, + "grad_norm": 2.640625, + "learning_rate": 0.00018620273465160824, + "loss": 4.7662, + "step": 99100 + }, + { + "epoch": 0.8535640495867769, + "grad_norm": 3.984375, + "learning_rate": 0.00018618899957021441, + "loss": 4.8999, + "step": 99150 + }, + { + "epoch": 0.8539944903581267, + "grad_norm": 1.6015625, + "learning_rate": 0.00018617525816273342, + "loss": 4.7202, + "step": 99200 + }, + { + "epoch": 0.8544249311294766, + "grad_norm": 2.6875, + "learning_rate": 0.0001861615104301739, + "loss": 4.3764, + "step": 99250 + }, + { + "epoch": 0.8548553719008265, + "grad_norm": 4.25, + "learning_rate": 0.0001861477563735449, + "loss": 4.6332, + "step": 99300 + }, + { + "epoch": 0.8552858126721763, + "grad_norm": 2.34375, + "learning_rate": 0.00018613399599385594, + "loss": 4.6435, + "step": 99350 + }, + { + "epoch": 0.8557162534435262, + "grad_norm": 2.625, + "learning_rate": 0.000186120229292117, + "loss": 5.0863, + "step": 99400 + }, + { + "epoch": 0.856146694214876, + "grad_norm": 1.328125, + "learning_rate": 0.0001861064562693385, + "loss": 4.4357, + "step": 99450 + }, + { + "epoch": 0.8565771349862259, + "grad_norm": 1.15625, + "learning_rate": 0.00018609267692653138, + "loss": 4.6186, + "step": 99500 + }, + { + "epoch": 0.8570075757575758, + "grad_norm": 2.34375, + "learning_rate": 0.000186078891264707, + "loss": 4.4482, + "step": 99550 + }, + { + "epoch": 0.8574380165289256, + "grad_norm": 2.21875, + "learning_rate": 0.00018606509928487724, + "loss": 4.771, + "step": 99600 + }, + { + "epoch": 0.8578684573002755, + "grad_norm": 3.625, + "learning_rate": 0.00018605130098805432, + "loss": 4.4148, + "step": 99650 + }, + { + "epoch": 0.8582988980716253, + "grad_norm": 2.015625, + "learning_rate": 0.0001860374963752511, + "loss": 4.5078, + "step": 99700 + }, + { + "epoch": 0.8587293388429752, + "grad_norm": 0.8203125, + "learning_rate": 0.00018602368544748073, + "loss": 4.7904, + "step": 99750 + }, + { + "epoch": 0.8591597796143251, + "grad_norm": 1.65625, + "learning_rate": 0.0001860098682057569, + "loss": 4.6289, + "step": 99800 + }, + { + "epoch": 0.8595902203856749, + "grad_norm": 1.9453125, + "learning_rate": 0.00018599604465109387, + "loss": 4.9563, + "step": 99850 + }, + { + "epoch": 0.8600206611570248, + "grad_norm": 3.046875, + "learning_rate": 0.00018598221478450614, + "loss": 4.7274, + "step": 99900 + }, + { + "epoch": 0.8604511019283747, + "grad_norm": 2.0, + "learning_rate": 0.00018596837860700887, + "loss": 4.5151, + "step": 99950 + }, + { + "epoch": 0.8608815426997245, + "grad_norm": 2.75, + "learning_rate": 0.00018595453611961756, + "loss": 4.7865, + "step": 100000 + }, + { + "epoch": 0.8613119834710744, + "grad_norm": 2.4375, + "learning_rate": 0.00018594068732334822, + "loss": 4.7826, + "step": 100050 + }, + { + "epoch": 0.8617424242424242, + "grad_norm": 1.765625, + "learning_rate": 0.00018592683221921736, + "loss": 4.6698, + "step": 100100 + }, + { + "epoch": 0.8621728650137741, + "grad_norm": 2.1875, + "learning_rate": 0.0001859129708082419, + "loss": 4.7061, + "step": 100150 + }, + { + "epoch": 0.862603305785124, + "grad_norm": 2.8125, + "learning_rate": 0.0001858991030914392, + "loss": 4.3987, + "step": 100200 + }, + { + "epoch": 0.8630337465564738, + "grad_norm": 1.65625, + "learning_rate": 0.00018588522906982715, + "loss": 4.8183, + "step": 100250 + }, + { + "epoch": 0.8634641873278237, + "grad_norm": 5.0, + "learning_rate": 0.0001858713487444241, + "loss": 4.6674, + "step": 100300 + }, + { + "epoch": 0.8638946280991735, + "grad_norm": 2.875, + "learning_rate": 0.00018585746211624877, + "loss": 4.9843, + "step": 100350 + }, + { + "epoch": 0.8643250688705234, + "grad_norm": 1.296875, + "learning_rate": 0.00018584356918632046, + "loss": 4.2949, + "step": 100400 + }, + { + "epoch": 0.8647555096418733, + "grad_norm": 3.109375, + "learning_rate": 0.0001858296699556589, + "loss": 4.3828, + "step": 100450 + }, + { + "epoch": 0.8651859504132231, + "grad_norm": 3.296875, + "learning_rate": 0.0001858157644252842, + "loss": 5.1659, + "step": 100500 + }, + { + "epoch": 0.865616391184573, + "grad_norm": 2.640625, + "learning_rate": 0.00018580185259621702, + "loss": 4.993, + "step": 100550 + }, + { + "epoch": 0.8660468319559229, + "grad_norm": 2.84375, + "learning_rate": 0.00018578793446947852, + "loss": 4.7621, + "step": 100600 + }, + { + "epoch": 0.8664772727272727, + "grad_norm": 1.765625, + "learning_rate": 0.00018577401004609017, + "loss": 4.7898, + "step": 100650 + }, + { + "epoch": 0.8669077134986226, + "grad_norm": 3.53125, + "learning_rate": 0.00018576007932707405, + "loss": 4.0649, + "step": 100700 + }, + { + "epoch": 0.8673381542699724, + "grad_norm": 1.3046875, + "learning_rate": 0.0001857461423134526, + "loss": 4.8609, + "step": 100750 + }, + { + "epoch": 0.8677685950413223, + "grad_norm": 2.359375, + "learning_rate": 0.0001857321990062488, + "loss": 5.0485, + "step": 100800 + }, + { + "epoch": 0.8681990358126722, + "grad_norm": 3.328125, + "learning_rate": 0.00018571824940648605, + "loss": 4.8174, + "step": 100850 + }, + { + "epoch": 0.868629476584022, + "grad_norm": 2.3125, + "learning_rate": 0.00018570429351518822, + "loss": 4.5981, + "step": 100900 + }, + { + "epoch": 0.8690599173553719, + "grad_norm": 2.859375, + "learning_rate": 0.00018569033133337964, + "loss": 4.8208, + "step": 100950 + }, + { + "epoch": 0.8694903581267218, + "grad_norm": 3.078125, + "learning_rate": 0.00018567636286208515, + "loss": 4.4019, + "step": 101000 + }, + { + "epoch": 0.8699207988980716, + "grad_norm": 1.3359375, + "learning_rate": 0.0001856623881023299, + "loss": 4.6871, + "step": 101050 + }, + { + "epoch": 0.8703512396694215, + "grad_norm": 2.046875, + "learning_rate": 0.0001856484070551397, + "loss": 4.8664, + "step": 101100 + }, + { + "epoch": 0.8707816804407713, + "grad_norm": 1.3046875, + "learning_rate": 0.00018563441972154073, + "loss": 4.7675, + "step": 101150 + }, + { + "epoch": 0.8712121212121212, + "grad_norm": 2.46875, + "learning_rate": 0.00018562042610255954, + "loss": 4.828, + "step": 101200 + }, + { + "epoch": 0.8716425619834711, + "grad_norm": 1.8984375, + "learning_rate": 0.00018560642619922332, + "loss": 4.5816, + "step": 101250 + }, + { + "epoch": 0.8720730027548209, + "grad_norm": 1.703125, + "learning_rate": 0.0001855924200125596, + "loss": 4.7028, + "step": 101300 + }, + { + "epoch": 0.8725034435261708, + "grad_norm": 1.5, + "learning_rate": 0.0001855784075435964, + "loss": 4.6827, + "step": 101350 + }, + { + "epoch": 0.8729338842975206, + "grad_norm": 3.21875, + "learning_rate": 0.00018556438879336224, + "loss": 4.8132, + "step": 101400 + }, + { + "epoch": 0.8733643250688705, + "grad_norm": 4.84375, + "learning_rate": 0.000185550363762886, + "loss": 4.915, + "step": 101450 + }, + { + "epoch": 0.8737947658402204, + "grad_norm": 1.6875, + "learning_rate": 0.00018553633245319717, + "loss": 4.6117, + "step": 101500 + }, + { + "epoch": 0.8742252066115702, + "grad_norm": 2.234375, + "learning_rate": 0.00018552229486532555, + "loss": 4.8032, + "step": 101550 + }, + { + "epoch": 0.8746556473829201, + "grad_norm": 2.140625, + "learning_rate": 0.0001855082510003015, + "loss": 4.4768, + "step": 101600 + }, + { + "epoch": 0.87508608815427, + "grad_norm": 1.78125, + "learning_rate": 0.0001854942008591558, + "loss": 4.6589, + "step": 101650 + }, + { + "epoch": 0.8755165289256198, + "grad_norm": 3.546875, + "learning_rate": 0.0001854801444429197, + "loss": 4.6911, + "step": 101700 + }, + { + "epoch": 0.8759469696969697, + "grad_norm": 2.25, + "learning_rate": 0.00018546608175262492, + "loss": 4.6856, + "step": 101750 + }, + { + "epoch": 0.8763774104683195, + "grad_norm": 2.65625, + "learning_rate": 0.00018545201278930362, + "loss": 4.6395, + "step": 101800 + }, + { + "epoch": 0.8768078512396694, + "grad_norm": 2.890625, + "learning_rate": 0.00018543793755398843, + "loss": 4.8894, + "step": 101850 + }, + { + "epoch": 0.8772382920110193, + "grad_norm": 2.9375, + "learning_rate": 0.00018542385604771244, + "loss": 4.9716, + "step": 101900 + }, + { + "epoch": 0.8776687327823691, + "grad_norm": 2.125, + "learning_rate": 0.00018540976827150925, + "loss": 4.4848, + "step": 101950 + }, + { + "epoch": 0.878099173553719, + "grad_norm": 2.453125, + "learning_rate": 0.0001853956742264128, + "loss": 4.6957, + "step": 102000 + }, + { + "epoch": 0.878099173553719, + "eval_loss": 5.325944423675537, + "eval_runtime": 21.7557, + "eval_samples_per_second": 29.418, + "eval_steps_per_second": 14.709, + "eval_tts_loss": 6.826836223579188, + "step": 102000 + }, + { + "epoch": 0.8785296143250688, + "grad_norm": 2.46875, + "learning_rate": 0.0001853815739134576, + "loss": 4.9113, + "step": 102050 + }, + { + "epoch": 0.8789600550964187, + "grad_norm": 2.90625, + "learning_rate": 0.00018536746733367857, + "loss": 4.8703, + "step": 102100 + }, + { + "epoch": 0.8793904958677686, + "grad_norm": 2.21875, + "learning_rate": 0.00018535335448811112, + "loss": 4.7105, + "step": 102150 + }, + { + "epoch": 0.8798209366391184, + "grad_norm": 1.3359375, + "learning_rate": 0.0001853392353777911, + "loss": 4.1233, + "step": 102200 + }, + { + "epoch": 0.8802513774104683, + "grad_norm": 2.53125, + "learning_rate": 0.00018532511000375482, + "loss": 4.4242, + "step": 102250 + }, + { + "epoch": 0.8806818181818182, + "grad_norm": 1.125, + "learning_rate": 0.00018531097836703902, + "loss": 4.6817, + "step": 102300 + }, + { + "epoch": 0.881112258953168, + "grad_norm": 2.265625, + "learning_rate": 0.00018529684046868098, + "loss": 4.3535, + "step": 102350 + }, + { + "epoch": 0.8815426997245179, + "grad_norm": 2.359375, + "learning_rate": 0.00018528269630971837, + "loss": 4.6127, + "step": 102400 + }, + { + "epoch": 0.8819731404958677, + "grad_norm": 2.53125, + "learning_rate": 0.0001852685458911893, + "loss": 4.9314, + "step": 102450 + }, + { + "epoch": 0.8824035812672176, + "grad_norm": 0.94140625, + "learning_rate": 0.0001852543892141325, + "loss": 4.7783, + "step": 102500 + }, + { + "epoch": 0.8828340220385675, + "grad_norm": 3.09375, + "learning_rate": 0.0001852402262795869, + "loss": 4.746, + "step": 102550 + }, + { + "epoch": 0.8832644628099173, + "grad_norm": 2.390625, + "learning_rate": 0.0001852260570885921, + "loss": 4.5602, + "step": 102600 + }, + { + "epoch": 0.8836949035812672, + "grad_norm": 2.234375, + "learning_rate": 0.0001852118816421881, + "loss": 4.7384, + "step": 102650 + }, + { + "epoch": 0.884125344352617, + "grad_norm": 2.15625, + "learning_rate": 0.0001851976999414153, + "loss": 4.5998, + "step": 102700 + }, + { + "epoch": 0.8845557851239669, + "grad_norm": 2.75, + "learning_rate": 0.00018518351198731464, + "loss": 4.8648, + "step": 102750 + }, + { + "epoch": 0.8849862258953168, + "grad_norm": 2.484375, + "learning_rate": 0.0001851693177809275, + "loss": 4.5639, + "step": 102800 + }, + { + "epoch": 0.8854166666666666, + "grad_norm": 1.265625, + "learning_rate": 0.00018515511732329566, + "loss": 4.6472, + "step": 102850 + }, + { + "epoch": 0.8858471074380165, + "grad_norm": 2.8125, + "learning_rate": 0.00018514091061546143, + "loss": 4.4335, + "step": 102900 + }, + { + "epoch": 0.8862775482093664, + "grad_norm": 1.2265625, + "learning_rate": 0.00018512669765846754, + "loss": 4.8827, + "step": 102950 + }, + { + "epoch": 0.8867079889807162, + "grad_norm": 1.4921875, + "learning_rate": 0.0001851124784533572, + "loss": 4.7631, + "step": 103000 + }, + { + "epoch": 0.8871384297520661, + "grad_norm": 2.890625, + "learning_rate": 0.0001850982530011741, + "loss": 4.5358, + "step": 103050 + }, + { + "epoch": 0.8875688705234159, + "grad_norm": 2.625, + "learning_rate": 0.00018508402130296226, + "loss": 5.1086, + "step": 103100 + }, + { + "epoch": 0.8879993112947658, + "grad_norm": 2.734375, + "learning_rate": 0.00018506978335976638, + "loss": 4.6903, + "step": 103150 + }, + { + "epoch": 0.8884297520661157, + "grad_norm": 4.8125, + "learning_rate": 0.0001850555391726314, + "loss": 4.7302, + "step": 103200 + }, + { + "epoch": 0.8888601928374655, + "grad_norm": 2.421875, + "learning_rate": 0.00018504128874260282, + "loss": 4.8309, + "step": 103250 + }, + { + "epoch": 0.8892906336088154, + "grad_norm": 3.1875, + "learning_rate": 0.00018502703207072668, + "loss": 4.5214, + "step": 103300 + }, + { + "epoch": 0.8897210743801653, + "grad_norm": 3.0625, + "learning_rate": 0.00018501276915804926, + "loss": 4.7718, + "step": 103350 + }, + { + "epoch": 0.8901515151515151, + "grad_norm": 1.6953125, + "learning_rate": 0.00018499850000561752, + "loss": 4.9352, + "step": 103400 + }, + { + "epoch": 0.890581955922865, + "grad_norm": 0.65625, + "learning_rate": 0.0001849842246144788, + "loss": 4.6532, + "step": 103450 + }, + { + "epoch": 0.8910123966942148, + "grad_norm": 2.703125, + "learning_rate": 0.00018496994298568078, + "loss": 4.7838, + "step": 103500 + }, + { + "epoch": 0.8914428374655647, + "grad_norm": 2.5625, + "learning_rate": 0.0001849556551202718, + "loss": 4.6994, + "step": 103550 + }, + { + "epoch": 0.8918732782369146, + "grad_norm": 1.1328125, + "learning_rate": 0.00018494136101930052, + "loss": 4.7826, + "step": 103600 + }, + { + "epoch": 0.8923037190082644, + "grad_norm": 1.890625, + "learning_rate": 0.00018492706068381609, + "loss": 4.5258, + "step": 103650 + }, + { + "epoch": 0.8927341597796143, + "grad_norm": 2.9375, + "learning_rate": 0.00018491275411486812, + "loss": 5.2615, + "step": 103700 + }, + { + "epoch": 0.8931646005509641, + "grad_norm": 2.25, + "learning_rate": 0.0001848984413135067, + "loss": 4.8224, + "step": 103750 + }, + { + "epoch": 0.893595041322314, + "grad_norm": 1.953125, + "learning_rate": 0.00018488412228078236, + "loss": 4.4506, + "step": 103800 + }, + { + "epoch": 0.894025482093664, + "grad_norm": 2.328125, + "learning_rate": 0.00018486979701774607, + "loss": 4.6331, + "step": 103850 + }, + { + "epoch": 0.8944559228650137, + "grad_norm": 4.03125, + "learning_rate": 0.0001848554655254493, + "loss": 4.556, + "step": 103900 + }, + { + "epoch": 0.8948863636363636, + "grad_norm": 2.96875, + "learning_rate": 0.0001848411278049439, + "loss": 4.9729, + "step": 103950 + }, + { + "epoch": 0.8953168044077136, + "grad_norm": 1.796875, + "learning_rate": 0.00018482678385728227, + "loss": 4.4746, + "step": 104000 + }, + { + "epoch": 0.8957472451790633, + "grad_norm": 2.78125, + "learning_rate": 0.0001848124336835172, + "loss": 4.6459, + "step": 104050 + }, + { + "epoch": 0.8961776859504132, + "grad_norm": 2.421875, + "learning_rate": 0.000184798077284702, + "loss": 5.2198, + "step": 104100 + }, + { + "epoch": 0.896608126721763, + "grad_norm": 2.421875, + "learning_rate": 0.00018478371466189034, + "loss": 4.6399, + "step": 104150 + }, + { + "epoch": 0.897038567493113, + "grad_norm": 2.140625, + "learning_rate": 0.0001847693458161365, + "loss": 4.7582, + "step": 104200 + }, + { + "epoch": 0.8974690082644629, + "grad_norm": 2.6875, + "learning_rate": 0.00018475497074849502, + "loss": 4.8269, + "step": 104250 + }, + { + "epoch": 0.8978994490358126, + "grad_norm": 1.71875, + "learning_rate": 0.00018474058946002107, + "loss": 4.8294, + "step": 104300 + }, + { + "epoch": 0.8983298898071626, + "grad_norm": 1.7890625, + "learning_rate": 0.00018472620195177016, + "loss": 4.8011, + "step": 104350 + }, + { + "epoch": 0.8987603305785123, + "grad_norm": 3.203125, + "learning_rate": 0.0001847118082247983, + "loss": 4.9976, + "step": 104400 + }, + { + "epoch": 0.8991907713498623, + "grad_norm": 0.5859375, + "learning_rate": 0.000184697408280162, + "loss": 4.667, + "step": 104450 + }, + { + "epoch": 0.8996212121212122, + "grad_norm": 1.1796875, + "learning_rate": 0.00018468300211891818, + "loss": 4.4345, + "step": 104500 + }, + { + "epoch": 0.900051652892562, + "grad_norm": 3.609375, + "learning_rate": 0.00018466858974212417, + "loss": 4.7446, + "step": 104550 + }, + { + "epoch": 0.9004820936639119, + "grad_norm": 3.03125, + "learning_rate": 0.00018465417115083788, + "loss": 4.4933, + "step": 104600 + }, + { + "epoch": 0.9009125344352618, + "grad_norm": 2.65625, + "learning_rate": 0.00018463974634611754, + "loss": 4.6429, + "step": 104650 + }, + { + "epoch": 0.9013429752066116, + "grad_norm": 2.53125, + "learning_rate": 0.00018462531532902193, + "loss": 4.6575, + "step": 104700 + }, + { + "epoch": 0.9017734159779615, + "grad_norm": 2.34375, + "learning_rate": 0.00018461087810061027, + "loss": 4.8047, + "step": 104750 + }, + { + "epoch": 0.9022038567493113, + "grad_norm": 2.65625, + "learning_rate": 0.00018459643466194216, + "loss": 4.6825, + "step": 104800 + }, + { + "epoch": 0.9026342975206612, + "grad_norm": 1.953125, + "learning_rate": 0.0001845819850140778, + "loss": 4.1799, + "step": 104850 + }, + { + "epoch": 0.9030647382920111, + "grad_norm": 2.625, + "learning_rate": 0.0001845675291580777, + "loss": 4.3134, + "step": 104900 + }, + { + "epoch": 0.9034951790633609, + "grad_norm": 2.109375, + "learning_rate": 0.0001845530670950029, + "loss": 4.5509, + "step": 104950 + }, + { + "epoch": 0.9039256198347108, + "grad_norm": 2.65625, + "learning_rate": 0.00018453859882591492, + "loss": 4.6409, + "step": 105000 + }, + { + "epoch": 0.9039256198347108, + "eval_loss": 5.319069862365723, + "eval_runtime": 21.8425, + "eval_samples_per_second": 29.301, + "eval_steps_per_second": 14.65, + "eval_tts_loss": 6.835954681379342, + "step": 105000 + }, + { + "epoch": 0.9043560606060606, + "grad_norm": 1.96875, + "learning_rate": 0.0001845241243518757, + "loss": 4.8657, + "step": 105050 + }, + { + "epoch": 0.9047865013774105, + "grad_norm": 2.046875, + "learning_rate": 0.00018450964367394758, + "loss": 4.9152, + "step": 105100 + }, + { + "epoch": 0.9052169421487604, + "grad_norm": 3.984375, + "learning_rate": 0.00018449515679319343, + "loss": 4.8435, + "step": 105150 + }, + { + "epoch": 0.9056473829201102, + "grad_norm": 2.484375, + "learning_rate": 0.00018448066371067658, + "loss": 4.9524, + "step": 105200 + }, + { + "epoch": 0.9060778236914601, + "grad_norm": 2.921875, + "learning_rate": 0.0001844661644274608, + "loss": 5.0342, + "step": 105250 + }, + { + "epoch": 0.90650826446281, + "grad_norm": 3.625, + "learning_rate": 0.00018445165894461022, + "loss": 4.576, + "step": 105300 + }, + { + "epoch": 0.9069387052341598, + "grad_norm": 2.203125, + "learning_rate": 0.00018443714726318965, + "loss": 4.5225, + "step": 105350 + }, + { + "epoch": 0.9073691460055097, + "grad_norm": 2.234375, + "learning_rate": 0.00018442262938426408, + "loss": 4.7826, + "step": 105400 + }, + { + "epoch": 0.9077995867768595, + "grad_norm": 1.359375, + "learning_rate": 0.00018440810530889922, + "loss": 4.2423, + "step": 105450 + }, + { + "epoch": 0.9082300275482094, + "grad_norm": 1.734375, + "learning_rate": 0.000184393575038161, + "loss": 4.6326, + "step": 105500 + }, + { + "epoch": 0.9086604683195593, + "grad_norm": 1.953125, + "learning_rate": 0.00018437903857311592, + "loss": 4.9193, + "step": 105550 + }, + { + "epoch": 0.9090909090909091, + "grad_norm": 2.171875, + "learning_rate": 0.000184364495914831, + "loss": 4.759, + "step": 105600 + }, + { + "epoch": 0.909521349862259, + "grad_norm": 1.9453125, + "learning_rate": 0.00018434994706437359, + "loss": 4.8403, + "step": 105650 + }, + { + "epoch": 0.9099517906336089, + "grad_norm": 2.84375, + "learning_rate": 0.00018433539202281153, + "loss": 4.8115, + "step": 105700 + }, + { + "epoch": 0.9103822314049587, + "grad_norm": 3.8125, + "learning_rate": 0.00018432083079121317, + "loss": 5.1318, + "step": 105750 + }, + { + "epoch": 0.9108126721763086, + "grad_norm": 1.953125, + "learning_rate": 0.00018430626337064725, + "loss": 4.4289, + "step": 105800 + }, + { + "epoch": 0.9112431129476584, + "grad_norm": 2.078125, + "learning_rate": 0.00018429168976218297, + "loss": 4.851, + "step": 105850 + }, + { + "epoch": 0.9116735537190083, + "grad_norm": 2.46875, + "learning_rate": 0.00018427710996689003, + "loss": 4.0955, + "step": 105900 + }, + { + "epoch": 0.9121039944903582, + "grad_norm": 2.625, + "learning_rate": 0.00018426252398583853, + "loss": 4.7545, + "step": 105950 + }, + { + "epoch": 0.912534435261708, + "grad_norm": 1.390625, + "learning_rate": 0.0001842479318200991, + "loss": 4.7372, + "step": 106000 + }, + { + "epoch": 0.9129648760330579, + "grad_norm": 0.83984375, + "learning_rate": 0.00018423333347074273, + "loss": 4.5969, + "step": 106050 + }, + { + "epoch": 0.9133953168044077, + "grad_norm": 3.90625, + "learning_rate": 0.0001842187289388409, + "loss": 4.7007, + "step": 106100 + }, + { + "epoch": 0.9138257575757576, + "grad_norm": 3.34375, + "learning_rate": 0.00018420411822546554, + "loss": 4.5481, + "step": 106150 + }, + { + "epoch": 0.9142561983471075, + "grad_norm": 2.296875, + "learning_rate": 0.0001841895013316891, + "loss": 4.8891, + "step": 106200 + }, + { + "epoch": 0.9146866391184573, + "grad_norm": 3.578125, + "learning_rate": 0.0001841748782585844, + "loss": 4.6893, + "step": 106250 + }, + { + "epoch": 0.9151170798898072, + "grad_norm": 3.0, + "learning_rate": 0.00018416024900722478, + "loss": 4.3488, + "step": 106300 + }, + { + "epoch": 0.9155475206611571, + "grad_norm": 2.4375, + "learning_rate": 0.0001841456135786839, + "loss": 4.8745, + "step": 106350 + }, + { + "epoch": 0.9159779614325069, + "grad_norm": 1.875, + "learning_rate": 0.00018413097197403605, + "loss": 4.272, + "step": 106400 + }, + { + "epoch": 0.9164084022038568, + "grad_norm": 3.578125, + "learning_rate": 0.00018411632419435586, + "loss": 5.0796, + "step": 106450 + }, + { + "epoch": 0.9168388429752066, + "grad_norm": 1.15625, + "learning_rate": 0.00018410167024071847, + "loss": 4.5094, + "step": 106500 + }, + { + "epoch": 0.9172692837465565, + "grad_norm": 1.96875, + "learning_rate": 0.00018408701011419945, + "loss": 4.217, + "step": 106550 + }, + { + "epoch": 0.9176997245179064, + "grad_norm": 1.0859375, + "learning_rate": 0.00018407234381587477, + "loss": 4.8997, + "step": 106600 + }, + { + "epoch": 0.9181301652892562, + "grad_norm": 2.296875, + "learning_rate": 0.00018405767134682094, + "loss": 4.6582, + "step": 106650 + }, + { + "epoch": 0.9185606060606061, + "grad_norm": 2.5625, + "learning_rate": 0.0001840429927081149, + "loss": 4.8529, + "step": 106700 + }, + { + "epoch": 0.9189910468319559, + "grad_norm": 2.25, + "learning_rate": 0.00018402830790083404, + "loss": 4.7468, + "step": 106750 + }, + { + "epoch": 0.9194214876033058, + "grad_norm": 0.9453125, + "learning_rate": 0.00018401361692605615, + "loss": 4.274, + "step": 106800 + }, + { + "epoch": 0.9198519283746557, + "grad_norm": 2.59375, + "learning_rate": 0.00018399891978485955, + "loss": 4.9797, + "step": 106850 + }, + { + "epoch": 0.9202823691460055, + "grad_norm": 3.578125, + "learning_rate": 0.00018398421647832294, + "loss": 4.5853, + "step": 106900 + }, + { + "epoch": 0.9207128099173554, + "grad_norm": 3.921875, + "learning_rate": 0.0001839695070075256, + "loss": 4.5254, + "step": 106950 + }, + { + "epoch": 0.9211432506887053, + "grad_norm": 2.375, + "learning_rate": 0.00018395479137354702, + "loss": 4.9453, + "step": 107000 + }, + { + "epoch": 0.9215736914600551, + "grad_norm": 1.609375, + "learning_rate": 0.00018394006957746744, + "loss": 4.334, + "step": 107050 + }, + { + "epoch": 0.922004132231405, + "grad_norm": 2.109375, + "learning_rate": 0.00018392534162036734, + "loss": 4.6963, + "step": 107100 + }, + { + "epoch": 0.9224345730027548, + "grad_norm": 2.3125, + "learning_rate": 0.00018391060750332775, + "loss": 4.6619, + "step": 107150 + }, + { + "epoch": 0.9228650137741047, + "grad_norm": 3.390625, + "learning_rate": 0.00018389586722743011, + "loss": 4.6646, + "step": 107200 + }, + { + "epoch": 0.9232954545454546, + "grad_norm": 2.546875, + "learning_rate": 0.0001838811207937563, + "loss": 4.9815, + "step": 107250 + }, + { + "epoch": 0.9237258953168044, + "grad_norm": 2.84375, + "learning_rate": 0.0001838663682033887, + "loss": 4.5707, + "step": 107300 + }, + { + "epoch": 0.9241563360881543, + "grad_norm": 2.453125, + "learning_rate": 0.00018385160945741014, + "loss": 5.164, + "step": 107350 + }, + { + "epoch": 0.9245867768595041, + "grad_norm": 1.984375, + "learning_rate": 0.00018383684455690384, + "loss": 3.9016, + "step": 107400 + }, + { + "epoch": 0.925017217630854, + "grad_norm": 3.578125, + "learning_rate": 0.00018382207350295352, + "loss": 4.6926, + "step": 107450 + }, + { + "epoch": 0.9254476584022039, + "grad_norm": 3.953125, + "learning_rate": 0.00018380729629664336, + "loss": 4.6408, + "step": 107500 + }, + { + "epoch": 0.9258780991735537, + "grad_norm": 3.984375, + "learning_rate": 0.000183792512939058, + "loss": 4.909, + "step": 107550 + }, + { + "epoch": 0.9263085399449036, + "grad_norm": 2.203125, + "learning_rate": 0.00018377772343128242, + "loss": 4.9433, + "step": 107600 + }, + { + "epoch": 0.9267389807162535, + "grad_norm": 3.015625, + "learning_rate": 0.00018376292777440222, + "loss": 4.8536, + "step": 107650 + }, + { + "epoch": 0.9271694214876033, + "grad_norm": 1.640625, + "learning_rate": 0.00018374812596950335, + "loss": 4.5243, + "step": 107700 + }, + { + "epoch": 0.9275998622589532, + "grad_norm": 1.671875, + "learning_rate": 0.00018373331801767223, + "loss": 4.8739, + "step": 107750 + }, + { + "epoch": 0.928030303030303, + "grad_norm": 0.796875, + "learning_rate": 0.0001837185039199957, + "loss": 4.587, + "step": 107800 + }, + { + "epoch": 0.9284607438016529, + "grad_norm": 2.765625, + "learning_rate": 0.0001837036836775611, + "loss": 4.3264, + "step": 107850 + }, + { + "epoch": 0.9288911845730028, + "grad_norm": 1.8671875, + "learning_rate": 0.00018368885729145627, + "loss": 4.7946, + "step": 107900 + }, + { + "epoch": 0.9293216253443526, + "grad_norm": 2.171875, + "learning_rate": 0.00018367402476276933, + "loss": 4.7081, + "step": 107950 + }, + { + "epoch": 0.9297520661157025, + "grad_norm": 1.078125, + "learning_rate": 0.00018365918609258898, + "loss": 4.5729, + "step": 108000 + }, + { + "epoch": 0.9297520661157025, + "eval_loss": 5.310000419616699, + "eval_runtime": 22.3217, + "eval_samples_per_second": 28.672, + "eval_steps_per_second": 14.336, + "eval_tts_loss": 6.829048421016426, + "step": 108000 + }, + { + "epoch": 0.9301825068870524, + "grad_norm": 1.5078125, + "learning_rate": 0.0001836443412820044, + "loss": 4.8927, + "step": 108050 + }, + { + "epoch": 0.9306129476584022, + "grad_norm": 2.09375, + "learning_rate": 0.00018362949033210515, + "loss": 4.6305, + "step": 108100 + }, + { + "epoch": 0.9310433884297521, + "grad_norm": 2.375, + "learning_rate": 0.0001836146332439812, + "loss": 5.0496, + "step": 108150 + }, + { + "epoch": 0.9314738292011019, + "grad_norm": 3.1875, + "learning_rate": 0.00018359977001872308, + "loss": 4.814, + "step": 108200 + }, + { + "epoch": 0.9319042699724518, + "grad_norm": 1.0390625, + "learning_rate": 0.00018358490065742173, + "loss": 4.9844, + "step": 108250 + }, + { + "epoch": 0.9323347107438017, + "grad_norm": 1.9375, + "learning_rate": 0.0001835700251611685, + "loss": 4.6898, + "step": 108300 + }, + { + "epoch": 0.9327651515151515, + "grad_norm": 2.421875, + "learning_rate": 0.00018355514353105525, + "loss": 4.841, + "step": 108350 + }, + { + "epoch": 0.9331955922865014, + "grad_norm": 2.5, + "learning_rate": 0.0001835402557681742, + "loss": 4.7741, + "step": 108400 + }, + { + "epoch": 0.9336260330578512, + "grad_norm": 1.9921875, + "learning_rate": 0.00018352536187361815, + "loss": 4.7507, + "step": 108450 + }, + { + "epoch": 0.9340564738292011, + "grad_norm": 2.859375, + "learning_rate": 0.00018351046184848023, + "loss": 4.6393, + "step": 108500 + }, + { + "epoch": 0.934486914600551, + "grad_norm": 2.796875, + "learning_rate": 0.0001834955556938541, + "loss": 4.8567, + "step": 108550 + }, + { + "epoch": 0.9349173553719008, + "grad_norm": 4.625, + "learning_rate": 0.0001834806434108338, + "loss": 4.4327, + "step": 108600 + }, + { + "epoch": 0.9353477961432507, + "grad_norm": 2.453125, + "learning_rate": 0.00018346572500051392, + "loss": 4.7422, + "step": 108650 + }, + { + "epoch": 0.9357782369146006, + "grad_norm": 2.484375, + "learning_rate": 0.0001834508004639894, + "loss": 4.1282, + "step": 108700 + }, + { + "epoch": 0.9362086776859504, + "grad_norm": 1.8359375, + "learning_rate": 0.00018343586980235568, + "loss": 4.7681, + "step": 108750 + }, + { + "epoch": 0.9366391184573003, + "grad_norm": 1.3984375, + "learning_rate": 0.00018342093301670863, + "loss": 4.829, + "step": 108800 + }, + { + "epoch": 0.9370695592286501, + "grad_norm": 2.375, + "learning_rate": 0.00018340599010814457, + "loss": 5.1187, + "step": 108850 + }, + { + "epoch": 0.9375, + "grad_norm": 2.3125, + "learning_rate": 0.00018339104107776029, + "loss": 4.7694, + "step": 108900 + }, + { + "epoch": 0.9379304407713499, + "grad_norm": 1.78125, + "learning_rate": 0.00018337608592665304, + "loss": 4.8252, + "step": 108950 + }, + { + "epoch": 0.9383608815426997, + "grad_norm": 1.4140625, + "learning_rate": 0.0001833611246559204, + "loss": 4.726, + "step": 109000 + }, + { + "epoch": 0.9387913223140496, + "grad_norm": 2.53125, + "learning_rate": 0.00018334615726666063, + "loss": 4.6152, + "step": 109050 + }, + { + "epoch": 0.9392217630853994, + "grad_norm": 1.9296875, + "learning_rate": 0.00018333118375997223, + "loss": 4.592, + "step": 109100 + }, + { + "epoch": 0.9396522038567493, + "grad_norm": 2.484375, + "learning_rate": 0.00018331620413695424, + "loss": 4.6736, + "step": 109150 + }, + { + "epoch": 0.9400826446280992, + "grad_norm": 3.125, + "learning_rate": 0.0001833012183987061, + "loss": 4.689, + "step": 109200 + }, + { + "epoch": 0.940513085399449, + "grad_norm": 2.109375, + "learning_rate": 0.00018328622654632777, + "loss": 4.704, + "step": 109250 + }, + { + "epoch": 0.9409435261707989, + "grad_norm": 2.5625, + "learning_rate": 0.00018327122858091964, + "loss": 4.7188, + "step": 109300 + }, + { + "epoch": 0.9413739669421488, + "grad_norm": 1.84375, + "learning_rate": 0.00018325622450358244, + "loss": 4.9638, + "step": 109350 + }, + { + "epoch": 0.9418044077134986, + "grad_norm": 3.1875, + "learning_rate": 0.00018324121431541752, + "loss": 4.8302, + "step": 109400 + }, + { + "epoch": 0.9422348484848485, + "grad_norm": 3.125, + "learning_rate": 0.00018322619801752657, + "loss": 4.6308, + "step": 109450 + }, + { + "epoch": 0.9426652892561983, + "grad_norm": 2.828125, + "learning_rate": 0.00018321117561101175, + "loss": 4.7341, + "step": 109500 + }, + { + "epoch": 0.9430957300275482, + "grad_norm": 1.6015625, + "learning_rate": 0.00018319614709697566, + "loss": 5.0833, + "step": 109550 + }, + { + "epoch": 0.9435261707988981, + "grad_norm": 1.4453125, + "learning_rate": 0.00018318111247652138, + "loss": 4.6262, + "step": 109600 + }, + { + "epoch": 0.9439566115702479, + "grad_norm": 1.4296875, + "learning_rate": 0.0001831660717507524, + "loss": 4.7921, + "step": 109650 + }, + { + "epoch": 0.9443870523415978, + "grad_norm": 2.34375, + "learning_rate": 0.00018315102492077272, + "loss": 4.4389, + "step": 109700 + }, + { + "epoch": 0.9448174931129476, + "grad_norm": 0.9609375, + "learning_rate": 0.0001831359719876867, + "loss": 4.8367, + "step": 109750 + }, + { + "epoch": 0.9452479338842975, + "grad_norm": 3.765625, + "learning_rate": 0.0001831209129525992, + "loss": 4.8597, + "step": 109800 + }, + { + "epoch": 0.9456783746556474, + "grad_norm": 3.234375, + "learning_rate": 0.00018310584781661553, + "loss": 4.7801, + "step": 109850 + }, + { + "epoch": 0.9461088154269972, + "grad_norm": 3.125, + "learning_rate": 0.00018309077658084145, + "loss": 4.9747, + "step": 109900 + }, + { + "epoch": 0.9465392561983471, + "grad_norm": 2.953125, + "learning_rate": 0.00018307569924638312, + "loss": 4.5881, + "step": 109950 + }, + { + "epoch": 0.946969696969697, + "grad_norm": 2.203125, + "learning_rate": 0.00018306061581434724, + "loss": 5.0307, + "step": 110000 + }, + { + "epoch": 0.9474001377410468, + "grad_norm": 2.359375, + "learning_rate": 0.00018304552628584087, + "loss": 4.7241, + "step": 110050 + }, + { + "epoch": 0.9478305785123967, + "grad_norm": 3.1875, + "learning_rate": 0.0001830304306619715, + "loss": 4.8156, + "step": 110100 + }, + { + "epoch": 0.9482610192837465, + "grad_norm": 2.40625, + "learning_rate": 0.0001830153289438472, + "loss": 4.7013, + "step": 110150 + }, + { + "epoch": 0.9486914600550964, + "grad_norm": 1.203125, + "learning_rate": 0.00018300022113257636, + "loss": 4.6494, + "step": 110200 + }, + { + "epoch": 0.9491219008264463, + "grad_norm": 2.90625, + "learning_rate": 0.00018298510722926787, + "loss": 4.476, + "step": 110250 + }, + { + "epoch": 0.9495523415977961, + "grad_norm": 1.3125, + "learning_rate": 0.00018296998723503106, + "loss": 4.2874, + "step": 110300 + }, + { + "epoch": 0.949982782369146, + "grad_norm": 2.859375, + "learning_rate": 0.0001829548611509757, + "loss": 4.5065, + "step": 110350 + }, + { + "epoch": 0.9504132231404959, + "grad_norm": 2.890625, + "learning_rate": 0.00018293972897821204, + "loss": 4.6838, + "step": 110400 + }, + { + "epoch": 0.9508436639118457, + "grad_norm": 1.9453125, + "learning_rate": 0.0001829245907178507, + "loss": 4.4963, + "step": 110450 + }, + { + "epoch": 0.9512741046831956, + "grad_norm": 1.796875, + "learning_rate": 0.00018290944637100278, + "loss": 4.9266, + "step": 110500 + }, + { + "epoch": 0.9517045454545454, + "grad_norm": 2.359375, + "learning_rate": 0.00018289429593877993, + "loss": 4.4374, + "step": 110550 + }, + { + "epoch": 0.9521349862258953, + "grad_norm": 2.734375, + "learning_rate": 0.0001828791394222941, + "loss": 5.0353, + "step": 110600 + }, + { + "epoch": 0.9525654269972452, + "grad_norm": 2.984375, + "learning_rate": 0.00018286397682265774, + "loss": 4.4734, + "step": 110650 + }, + { + "epoch": 0.952995867768595, + "grad_norm": 2.171875, + "learning_rate": 0.00018284880814098376, + "loss": 4.7412, + "step": 110700 + }, + { + "epoch": 0.9534263085399449, + "grad_norm": 2.109375, + "learning_rate": 0.00018283363337838555, + "loss": 4.5173, + "step": 110750 + }, + { + "epoch": 0.9538567493112947, + "grad_norm": 2.03125, + "learning_rate": 0.00018281845253597686, + "loss": 4.4637, + "step": 110800 + }, + { + "epoch": 0.9542871900826446, + "grad_norm": 3.03125, + "learning_rate": 0.00018280326561487194, + "loss": 4.6908, + "step": 110850 + }, + { + "epoch": 0.9547176308539945, + "grad_norm": 2.40625, + "learning_rate": 0.00018278807261618544, + "loss": 4.3467, + "step": 110900 + }, + { + "epoch": 0.9551480716253443, + "grad_norm": 1.46875, + "learning_rate": 0.00018277287354103255, + "loss": 5.0616, + "step": 110950 + }, + { + "epoch": 0.9555785123966942, + "grad_norm": 2.90625, + "learning_rate": 0.00018275766839052888, + "loss": 4.7608, + "step": 111000 + }, + { + "epoch": 0.9555785123966942, + "eval_loss": 5.300333023071289, + "eval_runtime": 21.9596, + "eval_samples_per_second": 29.144, + "eval_steps_per_second": 14.572, + "eval_tts_loss": 6.812118898581518, + "step": 111000 + }, + { + "epoch": 0.9560089531680441, + "grad_norm": 3.140625, + "learning_rate": 0.0001827424571657904, + "loss": 4.9825, + "step": 111050 + }, + { + "epoch": 0.9564393939393939, + "grad_norm": 2.25, + "learning_rate": 0.00018272723986793355, + "loss": 4.8704, + "step": 111100 + }, + { + "epoch": 0.9568698347107438, + "grad_norm": 1.625, + "learning_rate": 0.0001827120164980753, + "loss": 4.8763, + "step": 111150 + }, + { + "epoch": 0.9573002754820936, + "grad_norm": 2.703125, + "learning_rate": 0.000182696787057333, + "loss": 4.807, + "step": 111200 + }, + { + "epoch": 0.9577307162534435, + "grad_norm": 2.796875, + "learning_rate": 0.00018268155154682446, + "loss": 4.9365, + "step": 111250 + }, + { + "epoch": 0.9581611570247934, + "grad_norm": 2.125, + "learning_rate": 0.00018266630996766794, + "loss": 4.5768, + "step": 111300 + }, + { + "epoch": 0.9585915977961432, + "grad_norm": 1.46875, + "learning_rate": 0.0001826510623209821, + "loss": 4.7168, + "step": 111350 + }, + { + "epoch": 0.9590220385674931, + "grad_norm": 3.515625, + "learning_rate": 0.00018263580860788614, + "loss": 4.6275, + "step": 111400 + }, + { + "epoch": 0.9594524793388429, + "grad_norm": 1.7734375, + "learning_rate": 0.00018262054882949962, + "loss": 4.9334, + "step": 111450 + }, + { + "epoch": 0.9598829201101928, + "grad_norm": 2.390625, + "learning_rate": 0.0001826052829869426, + "loss": 4.7279, + "step": 111500 + }, + { + "epoch": 0.9603133608815427, + "grad_norm": 2.625, + "learning_rate": 0.0001825900110813355, + "loss": 4.8701, + "step": 111550 + }, + { + "epoch": 0.9607438016528925, + "grad_norm": 2.515625, + "learning_rate": 0.00018257473311379929, + "loss": 4.9257, + "step": 111600 + }, + { + "epoch": 0.9611742424242424, + "grad_norm": 0.8125, + "learning_rate": 0.00018255944908545532, + "loss": 4.71, + "step": 111650 + }, + { + "epoch": 0.9616046831955923, + "grad_norm": 3.015625, + "learning_rate": 0.00018254415899742545, + "loss": 4.9018, + "step": 111700 + }, + { + "epoch": 0.9620351239669421, + "grad_norm": 1.703125, + "learning_rate": 0.0001825288628508319, + "loss": 4.6962, + "step": 111750 + }, + { + "epoch": 0.962465564738292, + "grad_norm": 2.40625, + "learning_rate": 0.00018251356064679738, + "loss": 4.4214, + "step": 111800 + }, + { + "epoch": 0.9628960055096418, + "grad_norm": 2.21875, + "learning_rate": 0.00018249825238644504, + "loss": 5.217, + "step": 111850 + }, + { + "epoch": 0.9633264462809917, + "grad_norm": 2.328125, + "learning_rate": 0.00018248293807089847, + "loss": 4.8202, + "step": 111900 + }, + { + "epoch": 0.9637568870523416, + "grad_norm": 3.75, + "learning_rate": 0.0001824676177012817, + "loss": 4.3443, + "step": 111950 + }, + { + "epoch": 0.9641873278236914, + "grad_norm": 3.671875, + "learning_rate": 0.00018245229127871926, + "loss": 4.6238, + "step": 112000 + }, + { + "epoch": 0.9646177685950413, + "grad_norm": 2.40625, + "learning_rate": 0.00018243695880433602, + "loss": 4.5155, + "step": 112050 + }, + { + "epoch": 0.9650482093663911, + "grad_norm": 2.59375, + "learning_rate": 0.00018242162027925737, + "loss": 5.1373, + "step": 112100 + }, + { + "epoch": 0.965478650137741, + "grad_norm": 2.390625, + "learning_rate": 0.00018240627570460913, + "loss": 4.6647, + "step": 112150 + }, + { + "epoch": 0.9659090909090909, + "grad_norm": 2.015625, + "learning_rate": 0.0001823909250815176, + "loss": 4.6351, + "step": 112200 + }, + { + "epoch": 0.9663395316804407, + "grad_norm": 2.6875, + "learning_rate": 0.00018237556841110936, + "loss": 4.6715, + "step": 112250 + }, + { + "epoch": 0.9667699724517906, + "grad_norm": 3.515625, + "learning_rate": 0.00018236020569451167, + "loss": 4.8067, + "step": 112300 + }, + { + "epoch": 0.9672004132231405, + "grad_norm": 3.40625, + "learning_rate": 0.00018234483693285207, + "loss": 4.8991, + "step": 112350 + }, + { + "epoch": 0.9676308539944903, + "grad_norm": 1.6875, + "learning_rate": 0.00018232946212725864, + "loss": 4.2828, + "step": 112400 + }, + { + "epoch": 0.9680612947658402, + "grad_norm": 2.0625, + "learning_rate": 0.0001823140812788598, + "loss": 4.6571, + "step": 112450 + }, + { + "epoch": 0.96849173553719, + "grad_norm": 1.21875, + "learning_rate": 0.00018229869438878452, + "loss": 4.5357, + "step": 112500 + }, + { + "epoch": 0.9689221763085399, + "grad_norm": 2.21875, + "learning_rate": 0.0001822833014581621, + "loss": 4.9415, + "step": 112550 + }, + { + "epoch": 0.9693526170798898, + "grad_norm": 3.28125, + "learning_rate": 0.0001822679024881224, + "loss": 4.646, + "step": 112600 + }, + { + "epoch": 0.9697830578512396, + "grad_norm": 3.171875, + "learning_rate": 0.0001822524974797957, + "loss": 4.6626, + "step": 112650 + }, + { + "epoch": 0.9702134986225895, + "grad_norm": 2.640625, + "learning_rate": 0.00018223708643431264, + "loss": 4.8407, + "step": 112700 + }, + { + "epoch": 0.9706439393939394, + "grad_norm": 2.1875, + "learning_rate": 0.00018222166935280435, + "loss": 4.2004, + "step": 112750 + }, + { + "epoch": 0.9710743801652892, + "grad_norm": 2.375, + "learning_rate": 0.00018220624623640247, + "loss": 4.9094, + "step": 112800 + }, + { + "epoch": 0.9715048209366391, + "grad_norm": 2.703125, + "learning_rate": 0.00018219081708623896, + "loss": 4.444, + "step": 112850 + }, + { + "epoch": 0.9719352617079889, + "grad_norm": 3.3125, + "learning_rate": 0.0001821753819034463, + "loss": 4.8196, + "step": 112900 + }, + { + "epoch": 0.9723657024793388, + "grad_norm": 2.59375, + "learning_rate": 0.00018215994068915744, + "loss": 4.358, + "step": 112950 + }, + { + "epoch": 0.9727961432506887, + "grad_norm": 1.9609375, + "learning_rate": 0.0001821444934445057, + "loss": 4.9661, + "step": 113000 + }, + { + "epoch": 0.9732265840220385, + "grad_norm": 3.125, + "learning_rate": 0.00018212904017062491, + "loss": 4.8289, + "step": 113050 + }, + { + "epoch": 0.9736570247933884, + "grad_norm": 2.578125, + "learning_rate": 0.00018211358086864924, + "loss": 4.964, + "step": 113100 + }, + { + "epoch": 0.9740874655647382, + "grad_norm": 2.40625, + "learning_rate": 0.0001820981155397134, + "loss": 4.5162, + "step": 113150 + }, + { + "epoch": 0.9745179063360881, + "grad_norm": 3.8125, + "learning_rate": 0.00018208264418495253, + "loss": 4.5643, + "step": 113200 + }, + { + "epoch": 0.974948347107438, + "grad_norm": 1.765625, + "learning_rate": 0.0001820671668055022, + "loss": 4.6342, + "step": 113250 + }, + { + "epoch": 0.9753787878787878, + "grad_norm": 2.9375, + "learning_rate": 0.00018205168340249838, + "loss": 4.9864, + "step": 113300 + }, + { + "epoch": 0.9758092286501377, + "grad_norm": 3.90625, + "learning_rate": 0.00018203619397707751, + "loss": 4.7318, + "step": 113350 + }, + { + "epoch": 0.9762396694214877, + "grad_norm": 1.6171875, + "learning_rate": 0.00018202069853037655, + "loss": 4.8014, + "step": 113400 + }, + { + "epoch": 0.9766701101928374, + "grad_norm": 2.640625, + "learning_rate": 0.00018200519706353276, + "loss": 5.043, + "step": 113450 + }, + { + "epoch": 0.9771005509641874, + "grad_norm": 3.171875, + "learning_rate": 0.00018198968957768398, + "loss": 5.0049, + "step": 113500 + }, + { + "epoch": 0.9775309917355371, + "grad_norm": 2.625, + "learning_rate": 0.0001819741760739684, + "loss": 4.7359, + "step": 113550 + }, + { + "epoch": 0.977961432506887, + "grad_norm": 2.84375, + "learning_rate": 0.0001819586565535246, + "loss": 4.519, + "step": 113600 + }, + { + "epoch": 0.978391873278237, + "grad_norm": 1.6953125, + "learning_rate": 0.0001819431310174918, + "loss": 4.6648, + "step": 113650 + }, + { + "epoch": 0.9788223140495868, + "grad_norm": 1.6484375, + "learning_rate": 0.00018192759946700945, + "loss": 4.6725, + "step": 113700 + }, + { + "epoch": 0.9792527548209367, + "grad_norm": 2.390625, + "learning_rate": 0.0001819120619032176, + "loss": 4.8179, + "step": 113750 + }, + { + "epoch": 0.9796831955922864, + "grad_norm": 0.76953125, + "learning_rate": 0.00018189651832725665, + "loss": 4.6351, + "step": 113800 + }, + { + "epoch": 0.9801136363636364, + "grad_norm": 1.578125, + "learning_rate": 0.00018188096874026746, + "loss": 4.5547, + "step": 113850 + }, + { + "epoch": 0.9805440771349863, + "grad_norm": 3.4375, + "learning_rate": 0.00018186541314339132, + "loss": 4.9056, + "step": 113900 + }, + { + "epoch": 0.980974517906336, + "grad_norm": 2.1875, + "learning_rate": 0.00018184985153777, + "loss": 4.4375, + "step": 113950 + }, + { + "epoch": 0.981404958677686, + "grad_norm": 2.53125, + "learning_rate": 0.00018183428392454568, + "loss": 4.8116, + "step": 114000 + }, + { + "epoch": 0.981404958677686, + "eval_loss": 5.294703006744385, + "eval_runtime": 21.9006, + "eval_samples_per_second": 29.223, + "eval_steps_per_second": 14.611, + "eval_tts_loss": 6.886583222421584, + "step": 114000 + }, + { + "epoch": 0.9818353994490359, + "grad_norm": 2.3125, + "learning_rate": 0.000181818710304861, + "loss": 4.935, + "step": 114050 + }, + { + "epoch": 0.9822658402203857, + "grad_norm": 3.265625, + "learning_rate": 0.00018180313067985902, + "loss": 4.5252, + "step": 114100 + }, + { + "epoch": 0.9826962809917356, + "grad_norm": 2.5625, + "learning_rate": 0.00018178754505068323, + "loss": 4.5541, + "step": 114150 + }, + { + "epoch": 0.9831267217630854, + "grad_norm": 1.9765625, + "learning_rate": 0.0001817719534184776, + "loss": 4.732, + "step": 114200 + }, + { + "epoch": 0.9835571625344353, + "grad_norm": 2.96875, + "learning_rate": 0.00018175635578438656, + "loss": 4.6602, + "step": 114250 + }, + { + "epoch": 0.9839876033057852, + "grad_norm": 1.890625, + "learning_rate": 0.00018174075214955488, + "loss": 4.8676, + "step": 114300 + }, + { + "epoch": 0.984418044077135, + "grad_norm": 2.15625, + "learning_rate": 0.00018172514251512789, + "loss": 4.43, + "step": 114350 + }, + { + "epoch": 0.9848484848484849, + "grad_norm": 1.8984375, + "learning_rate": 0.00018170952688225124, + "loss": 4.8476, + "step": 114400 + }, + { + "epoch": 0.9852789256198347, + "grad_norm": 2.203125, + "learning_rate": 0.00018169390525207116, + "loss": 4.5011, + "step": 114450 + }, + { + "epoch": 0.9857093663911846, + "grad_norm": 1.9609375, + "learning_rate": 0.0001816782776257342, + "loss": 4.4709, + "step": 114500 + }, + { + "epoch": 0.9861398071625345, + "grad_norm": 2.625, + "learning_rate": 0.00018166264400438738, + "loss": 5.0821, + "step": 114550 + }, + { + "epoch": 0.9865702479338843, + "grad_norm": 0.57421875, + "learning_rate": 0.00018164700438917815, + "loss": 5.1266, + "step": 114600 + }, + { + "epoch": 0.9870006887052342, + "grad_norm": 1.0625, + "learning_rate": 0.00018163135878125454, + "loss": 4.5975, + "step": 114650 + }, + { + "epoch": 0.9874311294765841, + "grad_norm": 3.453125, + "learning_rate": 0.0001816157071817648, + "loss": 4.861, + "step": 114700 + }, + { + "epoch": 0.9878615702479339, + "grad_norm": 1.921875, + "learning_rate": 0.00018160004959185777, + "loss": 4.4884, + "step": 114750 + }, + { + "epoch": 0.9882920110192838, + "grad_norm": 2.0625, + "learning_rate": 0.00018158438601268264, + "loss": 4.5202, + "step": 114800 + }, + { + "epoch": 0.9887224517906336, + "grad_norm": 1.796875, + "learning_rate": 0.0001815687164453891, + "loss": 4.0955, + "step": 114850 + }, + { + "epoch": 0.9891528925619835, + "grad_norm": 3.828125, + "learning_rate": 0.0001815530408911273, + "loss": 4.4795, + "step": 114900 + }, + { + "epoch": 0.9895833333333334, + "grad_norm": 2.859375, + "learning_rate": 0.00018153735935104776, + "loss": 5.0941, + "step": 114950 + }, + { + "epoch": 0.9900137741046832, + "grad_norm": 1.515625, + "learning_rate": 0.00018152167182630146, + "loss": 4.6825, + "step": 115000 + }, + { + "epoch": 0.9904442148760331, + "grad_norm": 2.046875, + "learning_rate": 0.00018150597831803985, + "loss": 4.8007, + "step": 115050 + }, + { + "epoch": 0.990874655647383, + "grad_norm": 2.515625, + "learning_rate": 0.00018149027882741482, + "loss": 4.787, + "step": 115100 + }, + { + "epoch": 0.9913050964187328, + "grad_norm": 1.3359375, + "learning_rate": 0.00018147457335557864, + "loss": 4.6287, + "step": 115150 + }, + { + "epoch": 0.9917355371900827, + "grad_norm": 3.75, + "learning_rate": 0.00018145886190368407, + "loss": 4.7001, + "step": 115200 + }, + { + "epoch": 0.9921659779614325, + "grad_norm": 2.34375, + "learning_rate": 0.0001814431444728843, + "loss": 4.709, + "step": 115250 + }, + { + "epoch": 0.9925964187327824, + "grad_norm": 2.515625, + "learning_rate": 0.00018142742106433296, + "loss": 4.9991, + "step": 115300 + }, + { + "epoch": 0.9930268595041323, + "grad_norm": 2.921875, + "learning_rate": 0.0001814116916791841, + "loss": 4.2271, + "step": 115350 + }, + { + "epoch": 0.9934573002754821, + "grad_norm": 1.3359375, + "learning_rate": 0.00018139595631859226, + "loss": 4.7682, + "step": 115400 + }, + { + "epoch": 0.993887741046832, + "grad_norm": 3.5, + "learning_rate": 0.0001813802149837123, + "loss": 4.4572, + "step": 115450 + }, + { + "epoch": 0.9943181818181818, + "grad_norm": 2.375, + "learning_rate": 0.00018136446767569973, + "loss": 4.6668, + "step": 115500 + }, + { + "epoch": 0.9947486225895317, + "grad_norm": 2.6875, + "learning_rate": 0.00018134871439571024, + "loss": 4.6613, + "step": 115550 + }, + { + "epoch": 0.9951790633608816, + "grad_norm": 2.34375, + "learning_rate": 0.00018133295514490013, + "loss": 4.4614, + "step": 115600 + }, + { + "epoch": 0.9956095041322314, + "grad_norm": 3.4375, + "learning_rate": 0.00018131718992442613, + "loss": 4.9357, + "step": 115650 + }, + { + "epoch": 0.9960399449035813, + "grad_norm": 3.5, + "learning_rate": 0.00018130141873544536, + "loss": 4.2927, + "step": 115700 + }, + { + "epoch": 0.9964703856749312, + "grad_norm": 2.796875, + "learning_rate": 0.00018128564157911535, + "loss": 4.3704, + "step": 115750 + }, + { + "epoch": 0.996900826446281, + "grad_norm": 2.71875, + "learning_rate": 0.00018126985845659416, + "loss": 4.3264, + "step": 115800 + }, + { + "epoch": 0.9973312672176309, + "grad_norm": 1.921875, + "learning_rate": 0.00018125406936904018, + "loss": 5.1619, + "step": 115850 + }, + { + "epoch": 0.9977617079889807, + "grad_norm": 1.0625, + "learning_rate": 0.00018123827431761237, + "loss": 4.6115, + "step": 115900 + }, + { + "epoch": 0.9981921487603306, + "grad_norm": 4.0625, + "learning_rate": 0.00018122247330347002, + "loss": 4.5484, + "step": 115950 + }, + { + "epoch": 0.9986225895316805, + "grad_norm": 1.4609375, + "learning_rate": 0.00018120666632777285, + "loss": 4.9301, + "step": 116000 + }, + { + "epoch": 0.9990530303030303, + "grad_norm": 2.765625, + "learning_rate": 0.00018119085339168112, + "loss": 4.8622, + "step": 116050 + }, + { + "epoch": 0.9994834710743802, + "grad_norm": 2.671875, + "learning_rate": 0.00018117503449635542, + "loss": 5.0018, + "step": 116100 + }, + { + "epoch": 0.99991391184573, + "grad_norm": 1.2578125, + "learning_rate": 0.00018115920964295685, + "loss": 4.6004, + "step": 116150 + }, + { + "epoch": 1.0003443526170799, + "grad_norm": 2.53125, + "learning_rate": 0.0001811433788326469, + "loss": 4.6662, + "step": 116200 + }, + { + "epoch": 1.0007747933884297, + "grad_norm": 2.546875, + "learning_rate": 0.00018112754206658753, + "loss": 4.9016, + "step": 116250 + }, + { + "epoch": 1.0012052341597797, + "grad_norm": 2.25, + "learning_rate": 0.00018111169934594115, + "loss": 4.6042, + "step": 116300 + }, + { + "epoch": 1.0016356749311295, + "grad_norm": 2.015625, + "learning_rate": 0.00018109585067187052, + "loss": 4.9825, + "step": 116350 + }, + { + "epoch": 1.0020661157024793, + "grad_norm": 4.34375, + "learning_rate": 0.00018107999604553894, + "loss": 4.514, + "step": 116400 + }, + { + "epoch": 1.0024965564738293, + "grad_norm": 0.875, + "learning_rate": 0.0001810641354681101, + "loss": 4.6825, + "step": 116450 + }, + { + "epoch": 1.002926997245179, + "grad_norm": 2.921875, + "learning_rate": 0.00018104826894074812, + "loss": 4.6185, + "step": 116500 + }, + { + "epoch": 1.0033574380165289, + "grad_norm": 3.171875, + "learning_rate": 0.00018103239646461762, + "loss": 4.6123, + "step": 116550 + }, + { + "epoch": 1.003787878787879, + "grad_norm": 2.96875, + "learning_rate": 0.0001810165180408835, + "loss": 4.8008, + "step": 116600 + }, + { + "epoch": 1.0042183195592287, + "grad_norm": 2.53125, + "learning_rate": 0.00018100063367071134, + "loss": 4.9084, + "step": 116650 + }, + { + "epoch": 1.0046487603305785, + "grad_norm": 2.140625, + "learning_rate": 0.00018098474335526688, + "loss": 4.4487, + "step": 116700 + }, + { + "epoch": 1.0050792011019283, + "grad_norm": 3.828125, + "learning_rate": 0.0001809688470957166, + "loss": 4.6371, + "step": 116750 + }, + { + "epoch": 1.0055096418732783, + "grad_norm": 3.390625, + "learning_rate": 0.00018095294489322704, + "loss": 4.7429, + "step": 116800 + }, + { + "epoch": 1.005940082644628, + "grad_norm": 1.8359375, + "learning_rate": 0.00018093703674896557, + "loss": 4.2184, + "step": 116850 + }, + { + "epoch": 1.0063705234159779, + "grad_norm": 2.359375, + "learning_rate": 0.0001809211226640997, + "loss": 4.8171, + "step": 116900 + }, + { + "epoch": 1.006800964187328, + "grad_norm": 4.78125, + "learning_rate": 0.00018090520263979752, + "loss": 4.2607, + "step": 116950 + }, + { + "epoch": 1.0072314049586777, + "grad_norm": 3.71875, + "learning_rate": 0.0001808892766772276, + "loss": 4.5191, + "step": 117000 + }, + { + "epoch": 1.0072314049586777, + "eval_loss": 5.293093204498291, + "eval_runtime": 21.7119, + "eval_samples_per_second": 29.477, + "eval_steps_per_second": 14.738, + "eval_tts_loss": 6.988506331182041, + "step": 117000 + }, + { + "epoch": 1.0076618457300275, + "grad_norm": 3.5, + "learning_rate": 0.00018087334477755879, + "loss": 4.5956, + "step": 117050 + }, + { + "epoch": 1.0080922865013775, + "grad_norm": 2.75, + "learning_rate": 0.00018085740694196044, + "loss": 4.5534, + "step": 117100 + }, + { + "epoch": 1.0085227272727273, + "grad_norm": 3.6875, + "learning_rate": 0.00018084146317160242, + "loss": 4.6093, + "step": 117150 + }, + { + "epoch": 1.008953168044077, + "grad_norm": 1.828125, + "learning_rate": 0.00018082551346765494, + "loss": 4.579, + "step": 117200 + }, + { + "epoch": 1.009383608815427, + "grad_norm": 1.453125, + "learning_rate": 0.00018080955783128866, + "loss": 4.7456, + "step": 117250 + }, + { + "epoch": 1.009814049586777, + "grad_norm": 5.59375, + "learning_rate": 0.0001807935962636747, + "loss": 4.3437, + "step": 117300 + }, + { + "epoch": 1.0102444903581267, + "grad_norm": 2.203125, + "learning_rate": 0.00018077762876598464, + "loss": 4.7591, + "step": 117350 + }, + { + "epoch": 1.0106749311294765, + "grad_norm": 2.359375, + "learning_rate": 0.00018076165533939038, + "loss": 4.666, + "step": 117400 + }, + { + "epoch": 1.0111053719008265, + "grad_norm": 2.515625, + "learning_rate": 0.0001807456759850644, + "loss": 4.5435, + "step": 117450 + }, + { + "epoch": 1.0115358126721763, + "grad_norm": 1.203125, + "learning_rate": 0.00018072969070417953, + "loss": 4.6361, + "step": 117500 + }, + { + "epoch": 1.011966253443526, + "grad_norm": 2.3125, + "learning_rate": 0.00018071369949790907, + "loss": 4.9383, + "step": 117550 + }, + { + "epoch": 1.012396694214876, + "grad_norm": 3.859375, + "learning_rate": 0.00018069770236742668, + "loss": 4.5216, + "step": 117600 + }, + { + "epoch": 1.012827134986226, + "grad_norm": 1.6015625, + "learning_rate": 0.00018068169931390661, + "loss": 4.8076, + "step": 117650 + }, + { + "epoch": 1.0132575757575757, + "grad_norm": 2.828125, + "learning_rate": 0.0001806656903385234, + "loss": 4.3783, + "step": 117700 + }, + { + "epoch": 1.0136880165289257, + "grad_norm": 2.203125, + "learning_rate": 0.00018064967544245206, + "loss": 4.2773, + "step": 117750 + }, + { + "epoch": 1.0141184573002755, + "grad_norm": 1.359375, + "learning_rate": 0.00018063365462686804, + "loss": 4.6129, + "step": 117800 + }, + { + "epoch": 1.0145488980716253, + "grad_norm": 2.71875, + "learning_rate": 0.00018061762789294732, + "loss": 4.8391, + "step": 117850 + }, + { + "epoch": 1.0149793388429753, + "grad_norm": 1.2734375, + "learning_rate": 0.00018060159524186612, + "loss": 4.5374, + "step": 117900 + }, + { + "epoch": 1.015409779614325, + "grad_norm": 2.375, + "learning_rate": 0.0001805855566748013, + "loss": 4.0976, + "step": 117950 + }, + { + "epoch": 1.015840220385675, + "grad_norm": 2.671875, + "learning_rate": 0.0001805695121929299, + "loss": 4.9281, + "step": 118000 + }, + { + "epoch": 1.0162706611570247, + "grad_norm": 1.53125, + "learning_rate": 0.00018055346179742977, + "loss": 4.6582, + "step": 118050 + }, + { + "epoch": 1.0167011019283747, + "grad_norm": 2.71875, + "learning_rate": 0.00018053740548947878, + "loss": 4.776, + "step": 118100 + }, + { + "epoch": 1.0171315426997245, + "grad_norm": 1.6328125, + "learning_rate": 0.00018052134327025554, + "loss": 4.6826, + "step": 118150 + }, + { + "epoch": 1.0175619834710743, + "grad_norm": 1.1328125, + "learning_rate": 0.00018050527514093894, + "loss": 4.5394, + "step": 118200 + }, + { + "epoch": 1.0179924242424243, + "grad_norm": 0.9921875, + "learning_rate": 0.0001804892011027084, + "loss": 4.7749, + "step": 118250 + }, + { + "epoch": 1.018422865013774, + "grad_norm": 2.484375, + "learning_rate": 0.00018047312115674363, + "loss": 4.6088, + "step": 118300 + }, + { + "epoch": 1.018853305785124, + "grad_norm": 1.171875, + "learning_rate": 0.0001804570353042249, + "loss": 5.0518, + "step": 118350 + }, + { + "epoch": 1.019283746556474, + "grad_norm": 3.96875, + "learning_rate": 0.00018044094354633292, + "loss": 4.9322, + "step": 118400 + }, + { + "epoch": 1.0197141873278237, + "grad_norm": 1.84375, + "learning_rate": 0.00018042484588424876, + "loss": 4.6429, + "step": 118450 + }, + { + "epoch": 1.0201446280991735, + "grad_norm": 2.296875, + "learning_rate": 0.0001804087423191539, + "loss": 4.4183, + "step": 118500 + }, + { + "epoch": 1.0205750688705235, + "grad_norm": 3.6875, + "learning_rate": 0.0001803926328522304, + "loss": 4.5771, + "step": 118550 + }, + { + "epoch": 1.0210055096418733, + "grad_norm": 2.0625, + "learning_rate": 0.00018037651748466061, + "loss": 4.3772, + "step": 118600 + }, + { + "epoch": 1.021435950413223, + "grad_norm": 2.046875, + "learning_rate": 0.00018036039621762738, + "loss": 4.8166, + "step": 118650 + }, + { + "epoch": 1.021866391184573, + "grad_norm": 2.1875, + "learning_rate": 0.00018034426905231396, + "loss": 4.7993, + "step": 118700 + }, + { + "epoch": 1.022296831955923, + "grad_norm": 1.671875, + "learning_rate": 0.00018032813598990407, + "loss": 4.9895, + "step": 118750 + }, + { + "epoch": 1.0227272727272727, + "grad_norm": 5.375, + "learning_rate": 0.00018031199703158178, + "loss": 4.2557, + "step": 118800 + }, + { + "epoch": 1.0231577134986225, + "grad_norm": 2.40625, + "learning_rate": 0.00018029585217853175, + "loss": 4.3125, + "step": 118850 + }, + { + "epoch": 1.0235881542699725, + "grad_norm": 3.609375, + "learning_rate": 0.0001802797014319389, + "loss": 4.3647, + "step": 118900 + }, + { + "epoch": 1.0240185950413223, + "grad_norm": 4.09375, + "learning_rate": 0.0001802635447929887, + "loss": 4.7287, + "step": 118950 + }, + { + "epoch": 1.024449035812672, + "grad_norm": 2.4375, + "learning_rate": 0.00018024738226286703, + "loss": 4.3629, + "step": 119000 + }, + { + "epoch": 1.0248794765840221, + "grad_norm": 1.484375, + "learning_rate": 0.00018023121384276014, + "loss": 5.0144, + "step": 119050 + }, + { + "epoch": 1.025309917355372, + "grad_norm": 2.03125, + "learning_rate": 0.0001802150395338548, + "loss": 4.2512, + "step": 119100 + }, + { + "epoch": 1.0257403581267217, + "grad_norm": 1.8828125, + "learning_rate": 0.0001801988593373381, + "loss": 4.4129, + "step": 119150 + }, + { + "epoch": 1.0261707988980717, + "grad_norm": 0.796875, + "learning_rate": 0.0001801826732543977, + "loss": 4.3623, + "step": 119200 + }, + { + "epoch": 1.0266012396694215, + "grad_norm": 2.5625, + "learning_rate": 0.0001801664812862216, + "loss": 4.8789, + "step": 119250 + }, + { + "epoch": 1.0270316804407713, + "grad_norm": 3.796875, + "learning_rate": 0.00018015028343399826, + "loss": 4.4223, + "step": 119300 + }, + { + "epoch": 1.027462121212121, + "grad_norm": 2.859375, + "learning_rate": 0.00018013407969891655, + "loss": 4.5086, + "step": 119350 + }, + { + "epoch": 1.0278925619834711, + "grad_norm": 3.90625, + "learning_rate": 0.00018011787008216583, + "loss": 4.6112, + "step": 119400 + }, + { + "epoch": 1.028323002754821, + "grad_norm": 1.28125, + "learning_rate": 0.0001801016545849358, + "loss": 4.4403, + "step": 119450 + }, + { + "epoch": 1.0287534435261707, + "grad_norm": 3.125, + "learning_rate": 0.00018008543320841667, + "loss": 4.5919, + "step": 119500 + }, + { + "epoch": 1.0291838842975207, + "grad_norm": 2.25, + "learning_rate": 0.00018006920595379903, + "loss": 4.5554, + "step": 119550 + }, + { + "epoch": 1.0296143250688705, + "grad_norm": 2.78125, + "learning_rate": 0.000180052972822274, + "loss": 4.7866, + "step": 119600 + }, + { + "epoch": 1.0300447658402203, + "grad_norm": 2.546875, + "learning_rate": 0.00018003673381503297, + "loss": 4.6108, + "step": 119650 + }, + { + "epoch": 1.0304752066115703, + "grad_norm": 2.984375, + "learning_rate": 0.00018002048893326787, + "loss": 4.3531, + "step": 119700 + }, + { + "epoch": 1.0309056473829201, + "grad_norm": 2.78125, + "learning_rate": 0.00018000423817817108, + "loss": 4.5188, + "step": 119750 + }, + { + "epoch": 1.03133608815427, + "grad_norm": 4.09375, + "learning_rate": 0.00017998798155093533, + "loss": 4.8692, + "step": 119800 + }, + { + "epoch": 1.03176652892562, + "grad_norm": 2.8125, + "learning_rate": 0.00017997171905275382, + "loss": 4.794, + "step": 119850 + }, + { + "epoch": 1.0321969696969697, + "grad_norm": 2.765625, + "learning_rate": 0.0001799554506848202, + "loss": 4.1908, + "step": 119900 + }, + { + "epoch": 1.0326274104683195, + "grad_norm": 1.265625, + "learning_rate": 0.00017993917644832855, + "loss": 4.6614, + "step": 119950 + }, + { + "epoch": 1.0330578512396693, + "grad_norm": 2.421875, + "learning_rate": 0.0001799228963444733, + "loss": 4.9469, + "step": 120000 + }, + { + "epoch": 1.0330578512396693, + "eval_loss": 5.277187824249268, + "eval_runtime": 21.8386, + "eval_samples_per_second": 29.306, + "eval_steps_per_second": 14.653, + "eval_tts_loss": 6.978889931368823, + "step": 120000 + }, + { + "epoch": 1.0334882920110193, + "grad_norm": 2.453125, + "learning_rate": 0.00017990661037444946, + "loss": 4.5529, + "step": 120050 + }, + { + "epoch": 1.0339187327823691, + "grad_norm": 1.578125, + "learning_rate": 0.00017989031853945232, + "loss": 4.6823, + "step": 120100 + }, + { + "epoch": 1.034349173553719, + "grad_norm": 3.21875, + "learning_rate": 0.0001798740208406777, + "loss": 4.581, + "step": 120150 + }, + { + "epoch": 1.034779614325069, + "grad_norm": 2.5, + "learning_rate": 0.0001798577172793218, + "loss": 4.252, + "step": 120200 + }, + { + "epoch": 1.0352100550964187, + "grad_norm": 1.8125, + "learning_rate": 0.00017984140785658125, + "loss": 4.7233, + "step": 120250 + }, + { + "epoch": 1.0356404958677685, + "grad_norm": 2.375, + "learning_rate": 0.00017982509257365316, + "loss": 4.4834, + "step": 120300 + }, + { + "epoch": 1.0360709366391185, + "grad_norm": 1.4609375, + "learning_rate": 0.00017980877143173506, + "loss": 4.3067, + "step": 120350 + }, + { + "epoch": 1.0365013774104683, + "grad_norm": 2.390625, + "learning_rate": 0.00017979244443202478, + "loss": 4.4205, + "step": 120400 + }, + { + "epoch": 1.0369318181818181, + "grad_norm": 1.7109375, + "learning_rate": 0.00017977611157572078, + "loss": 4.7334, + "step": 120450 + }, + { + "epoch": 1.0373622589531681, + "grad_norm": 2.359375, + "learning_rate": 0.00017975977286402187, + "loss": 4.6972, + "step": 120500 + }, + { + "epoch": 1.037792699724518, + "grad_norm": 2.96875, + "learning_rate": 0.00017974342829812722, + "loss": 4.6294, + "step": 120550 + }, + { + "epoch": 1.0382231404958677, + "grad_norm": 2.234375, + "learning_rate": 0.0001797270778792365, + "loss": 4.2679, + "step": 120600 + }, + { + "epoch": 1.0386535812672175, + "grad_norm": 1.9921875, + "learning_rate": 0.0001797107216085498, + "loss": 4.788, + "step": 120650 + }, + { + "epoch": 1.0390840220385675, + "grad_norm": 1.3671875, + "learning_rate": 0.00017969435948726764, + "loss": 4.6148, + "step": 120700 + }, + { + "epoch": 1.0395144628099173, + "grad_norm": 3.15625, + "learning_rate": 0.00017967799151659094, + "loss": 4.5744, + "step": 120750 + }, + { + "epoch": 1.0399449035812671, + "grad_norm": 2.5, + "learning_rate": 0.0001796616176977211, + "loss": 4.8067, + "step": 120800 + }, + { + "epoch": 1.0403753443526171, + "grad_norm": 3.46875, + "learning_rate": 0.00017964523803185993, + "loss": 4.392, + "step": 120850 + }, + { + "epoch": 1.040805785123967, + "grad_norm": 2.28125, + "learning_rate": 0.00017962885252020965, + "loss": 4.6037, + "step": 120900 + }, + { + "epoch": 1.0412362258953167, + "grad_norm": 2.65625, + "learning_rate": 0.00017961246116397295, + "loss": 4.5907, + "step": 120950 + }, + { + "epoch": 1.0416666666666667, + "grad_norm": 3.90625, + "learning_rate": 0.00017959606396435285, + "loss": 4.7166, + "step": 121000 + }, + { + "epoch": 1.0420971074380165, + "grad_norm": 4.59375, + "learning_rate": 0.0001795796609225529, + "loss": 4.5639, + "step": 121050 + }, + { + "epoch": 1.0425275482093663, + "grad_norm": 1.375, + "learning_rate": 0.0001795632520397771, + "loss": 4.7798, + "step": 121100 + }, + { + "epoch": 1.0429579889807163, + "grad_norm": 1.296875, + "learning_rate": 0.00017954683731722976, + "loss": 4.6755, + "step": 121150 + }, + { + "epoch": 1.0433884297520661, + "grad_norm": 1.5859375, + "learning_rate": 0.00017953041675611574, + "loss": 4.4991, + "step": 121200 + }, + { + "epoch": 1.043818870523416, + "grad_norm": 2.5, + "learning_rate": 0.0001795139903576402, + "loss": 5.0583, + "step": 121250 + }, + { + "epoch": 1.044249311294766, + "grad_norm": 3.453125, + "learning_rate": 0.00017949755812300886, + "loss": 4.678, + "step": 121300 + }, + { + "epoch": 1.0446797520661157, + "grad_norm": 3.125, + "learning_rate": 0.00017948112005342778, + "loss": 4.6192, + "step": 121350 + }, + { + "epoch": 1.0451101928374655, + "grad_norm": 3.03125, + "learning_rate": 0.00017946467615010355, + "loss": 4.9013, + "step": 121400 + }, + { + "epoch": 1.0455406336088153, + "grad_norm": 2.671875, + "learning_rate": 0.000179448226414243, + "loss": 4.86, + "step": 121450 + }, + { + "epoch": 1.0459710743801653, + "grad_norm": 1.7578125, + "learning_rate": 0.0001794317708470536, + "loss": 4.3527, + "step": 121500 + }, + { + "epoch": 1.0464015151515151, + "grad_norm": 1.09375, + "learning_rate": 0.00017941530944974314, + "loss": 4.6214, + "step": 121550 + }, + { + "epoch": 1.046831955922865, + "grad_norm": 2.15625, + "learning_rate": 0.0001793988422235198, + "loss": 4.7467, + "step": 121600 + }, + { + "epoch": 1.047262396694215, + "grad_norm": 1.7578125, + "learning_rate": 0.00017938236916959231, + "loss": 4.5602, + "step": 121650 + }, + { + "epoch": 1.0476928374655647, + "grad_norm": 1.6796875, + "learning_rate": 0.00017936589028916968, + "loss": 4.2249, + "step": 121700 + }, + { + "epoch": 1.0481232782369145, + "grad_norm": 3.46875, + "learning_rate": 0.00017934940558346146, + "loss": 4.1235, + "step": 121750 + }, + { + "epoch": 1.0485537190082646, + "grad_norm": 4.46875, + "learning_rate": 0.00017933291505367764, + "loss": 4.6966, + "step": 121800 + }, + { + "epoch": 1.0489841597796143, + "grad_norm": 2.40625, + "learning_rate": 0.00017931641870102852, + "loss": 4.5515, + "step": 121850 + }, + { + "epoch": 1.0494146005509641, + "grad_norm": 1.40625, + "learning_rate": 0.00017929991652672494, + "loss": 4.4808, + "step": 121900 + }, + { + "epoch": 1.0498450413223142, + "grad_norm": 3.8125, + "learning_rate": 0.00017928340853197807, + "loss": 4.7231, + "step": 121950 + }, + { + "epoch": 1.050275482093664, + "grad_norm": 5.375, + "learning_rate": 0.0001792668947179996, + "loss": 4.7616, + "step": 122000 + }, + { + "epoch": 1.0507059228650137, + "grad_norm": 4.03125, + "learning_rate": 0.00017925037508600159, + "loss": 4.3451, + "step": 122050 + }, + { + "epoch": 1.0511363636363635, + "grad_norm": 1.46875, + "learning_rate": 0.00017923384963719661, + "loss": 4.5132, + "step": 122100 + }, + { + "epoch": 1.0515668044077136, + "grad_norm": 2.078125, + "learning_rate": 0.00017921731837279751, + "loss": 4.5463, + "step": 122150 + }, + { + "epoch": 1.0519972451790633, + "grad_norm": 5.375, + "learning_rate": 0.0001792007812940177, + "loss": 5.1975, + "step": 122200 + }, + { + "epoch": 1.0524276859504131, + "grad_norm": 2.40625, + "learning_rate": 0.00017918423840207094, + "loss": 4.6246, + "step": 122250 + }, + { + "epoch": 1.0528581267217632, + "grad_norm": 2.015625, + "learning_rate": 0.00017916768969817145, + "loss": 4.584, + "step": 122300 + }, + { + "epoch": 1.053288567493113, + "grad_norm": 3.609375, + "learning_rate": 0.00017915113518353386, + "loss": 4.6767, + "step": 122350 + }, + { + "epoch": 1.0537190082644627, + "grad_norm": 0.6484375, + "learning_rate": 0.00017913457485937325, + "loss": 4.7067, + "step": 122400 + }, + { + "epoch": 1.0541494490358128, + "grad_norm": 2.9375, + "learning_rate": 0.0001791180087269051, + "loss": 4.7701, + "step": 122450 + }, + { + "epoch": 1.0545798898071626, + "grad_norm": 1.484375, + "learning_rate": 0.00017910143678734537, + "loss": 4.8197, + "step": 122500 + }, + { + "epoch": 1.0550103305785123, + "grad_norm": 2.25, + "learning_rate": 0.00017908485904191034, + "loss": 4.3686, + "step": 122550 + }, + { + "epoch": 1.0554407713498624, + "grad_norm": 3.828125, + "learning_rate": 0.0001790682754918168, + "loss": 4.9852, + "step": 122600 + }, + { + "epoch": 1.0558712121212122, + "grad_norm": 2.96875, + "learning_rate": 0.000179051686138282, + "loss": 4.6628, + "step": 122650 + }, + { + "epoch": 1.056301652892562, + "grad_norm": 2.96875, + "learning_rate": 0.00017903509098252349, + "loss": 4.3822, + "step": 122700 + }, + { + "epoch": 1.0567320936639117, + "grad_norm": 1.46875, + "learning_rate": 0.00017901849002575936, + "loss": 4.7333, + "step": 122750 + }, + { + "epoch": 1.0571625344352618, + "grad_norm": 2.484375, + "learning_rate": 0.00017900188326920808, + "loss": 4.8762, + "step": 122800 + }, + { + "epoch": 1.0575929752066116, + "grad_norm": 2.453125, + "learning_rate": 0.00017898527071408854, + "loss": 5.1889, + "step": 122850 + }, + { + "epoch": 1.0580234159779613, + "grad_norm": 1.625, + "learning_rate": 0.00017896865236162007, + "loss": 4.7234, + "step": 122900 + }, + { + "epoch": 1.0584538567493114, + "grad_norm": 1.4296875, + "learning_rate": 0.0001789520282130224, + "loss": 4.2801, + "step": 122950 + }, + { + "epoch": 1.0588842975206612, + "grad_norm": 3.0625, + "learning_rate": 0.00017893539826951578, + "loss": 4.4105, + "step": 123000 + }, + { + "epoch": 1.0588842975206612, + "eval_loss": 5.272480010986328, + "eval_runtime": 21.8098, + "eval_samples_per_second": 29.345, + "eval_steps_per_second": 14.672, + "eval_tts_loss": 6.962516161429058, + "step": 123000 + }, + { + "epoch": 1.059314738292011, + "grad_norm": 4.8125, + "learning_rate": 0.0001789187625323207, + "loss": 5.0393, + "step": 123050 + }, + { + "epoch": 1.059745179063361, + "grad_norm": 1.0078125, + "learning_rate": 0.0001789021210026583, + "loss": 4.7434, + "step": 123100 + }, + { + "epoch": 1.0601756198347108, + "grad_norm": 3.703125, + "learning_rate": 0.00017888547368174996, + "loss": 4.9379, + "step": 123150 + }, + { + "epoch": 1.0606060606060606, + "grad_norm": 2.390625, + "learning_rate": 0.0001788688205708176, + "loss": 4.8474, + "step": 123200 + }, + { + "epoch": 1.0610365013774106, + "grad_norm": 1.96875, + "learning_rate": 0.00017885216167108346, + "loss": 4.621, + "step": 123250 + }, + { + "epoch": 1.0614669421487604, + "grad_norm": 1.828125, + "learning_rate": 0.00017883549698377034, + "loss": 4.2299, + "step": 123300 + }, + { + "epoch": 1.0618973829201102, + "grad_norm": 3.015625, + "learning_rate": 0.00017881882651010135, + "loss": 4.7655, + "step": 123350 + }, + { + "epoch": 1.06232782369146, + "grad_norm": 2.453125, + "learning_rate": 0.00017880215025130008, + "loss": 5.0066, + "step": 123400 + }, + { + "epoch": 1.06275826446281, + "grad_norm": 3.203125, + "learning_rate": 0.0001787854682085905, + "loss": 4.9436, + "step": 123450 + }, + { + "epoch": 1.0631887052341598, + "grad_norm": 1.3046875, + "learning_rate": 0.0001787687803831971, + "loss": 4.3164, + "step": 123500 + }, + { + "epoch": 1.0636191460055096, + "grad_norm": 2.75, + "learning_rate": 0.0001787520867763447, + "loss": 4.3095, + "step": 123550 + }, + { + "epoch": 1.0640495867768596, + "grad_norm": 2.21875, + "learning_rate": 0.00017873538738925855, + "loss": 3.9903, + "step": 123600 + }, + { + "epoch": 1.0644800275482094, + "grad_norm": 3.453125, + "learning_rate": 0.00017871868222316437, + "loss": 4.5933, + "step": 123650 + }, + { + "epoch": 1.0649104683195592, + "grad_norm": 2.34375, + "learning_rate": 0.0001787019712792883, + "loss": 4.5854, + "step": 123700 + }, + { + "epoch": 1.0653409090909092, + "grad_norm": 4.9375, + "learning_rate": 0.00017868525455885686, + "loss": 5.0299, + "step": 123750 + }, + { + "epoch": 1.065771349862259, + "grad_norm": 2.828125, + "learning_rate": 0.00017866853206309702, + "loss": 4.4883, + "step": 123800 + }, + { + "epoch": 1.0662017906336088, + "grad_norm": 1.5546875, + "learning_rate": 0.00017865180379323617, + "loss": 4.3379, + "step": 123850 + }, + { + "epoch": 1.0666322314049588, + "grad_norm": 2.234375, + "learning_rate": 0.00017863506975050218, + "loss": 4.3761, + "step": 123900 + }, + { + "epoch": 1.0670626721763086, + "grad_norm": 0.9765625, + "learning_rate": 0.00017861832993612325, + "loss": 4.4775, + "step": 123950 + }, + { + "epoch": 1.0674931129476584, + "grad_norm": 2.59375, + "learning_rate": 0.00017860158435132806, + "loss": 4.8059, + "step": 124000 + }, + { + "epoch": 1.0679235537190084, + "grad_norm": 2.875, + "learning_rate": 0.00017858483299734569, + "loss": 4.6028, + "step": 124050 + }, + { + "epoch": 1.0683539944903582, + "grad_norm": 3.390625, + "learning_rate": 0.00017856807587540564, + "loss": 4.6802, + "step": 124100 + }, + { + "epoch": 1.068784435261708, + "grad_norm": 1.859375, + "learning_rate": 0.0001785513129867379, + "loss": 4.3151, + "step": 124150 + }, + { + "epoch": 1.0692148760330578, + "grad_norm": 2.515625, + "learning_rate": 0.00017853454433257277, + "loss": 4.4292, + "step": 124200 + }, + { + "epoch": 1.0696453168044078, + "grad_norm": 1.625, + "learning_rate": 0.00017851776991414104, + "loss": 4.2712, + "step": 124250 + }, + { + "epoch": 1.0700757575757576, + "grad_norm": 4.71875, + "learning_rate": 0.00017850098973267397, + "loss": 4.4969, + "step": 124300 + }, + { + "epoch": 1.0705061983471074, + "grad_norm": 1.6171875, + "learning_rate": 0.00017848420378940313, + "loss": 4.9214, + "step": 124350 + }, + { + "epoch": 1.0709366391184574, + "grad_norm": 5.40625, + "learning_rate": 0.0001784674120855606, + "loss": 4.6779, + "step": 124400 + }, + { + "epoch": 1.0713670798898072, + "grad_norm": 1.7890625, + "learning_rate": 0.00017845061462237887, + "loss": 4.3168, + "step": 124450 + }, + { + "epoch": 1.071797520661157, + "grad_norm": 2.546875, + "learning_rate": 0.0001784338114010908, + "loss": 4.7684, + "step": 124500 + }, + { + "epoch": 1.072227961432507, + "grad_norm": 2.46875, + "learning_rate": 0.0001784170024229297, + "loss": 4.7823, + "step": 124550 + }, + { + "epoch": 1.0726584022038568, + "grad_norm": 3.3125, + "learning_rate": 0.00017840018768912937, + "loss": 4.8714, + "step": 124600 + }, + { + "epoch": 1.0730888429752066, + "grad_norm": 1.875, + "learning_rate": 0.00017838336720092394, + "loss": 4.8894, + "step": 124650 + }, + { + "epoch": 1.0735192837465566, + "grad_norm": 1.78125, + "learning_rate": 0.00017836654095954802, + "loss": 4.6314, + "step": 124700 + }, + { + "epoch": 1.0739497245179064, + "grad_norm": 2.828125, + "learning_rate": 0.00017834970896623658, + "loss": 5.0466, + "step": 124750 + }, + { + "epoch": 1.0743801652892562, + "grad_norm": 2.515625, + "learning_rate": 0.0001783328712222251, + "loss": 4.6268, + "step": 124800 + }, + { + "epoch": 1.074810606060606, + "grad_norm": 1.6328125, + "learning_rate": 0.00017831602772874937, + "loss": 4.2365, + "step": 124850 + }, + { + "epoch": 1.075241046831956, + "grad_norm": 1.859375, + "learning_rate": 0.00017829917848704574, + "loss": 4.7067, + "step": 124900 + }, + { + "epoch": 1.0756714876033058, + "grad_norm": 4.21875, + "learning_rate": 0.00017828232349835087, + "loss": 4.6713, + "step": 124950 + }, + { + "epoch": 1.0761019283746556, + "grad_norm": 2.140625, + "learning_rate": 0.0001782654627639019, + "loss": 4.8225, + "step": 125000 + }, + { + "epoch": 1.0765323691460056, + "grad_norm": 3.125, + "learning_rate": 0.00017824859628493632, + "loss": 4.6114, + "step": 125050 + }, + { + "epoch": 1.0769628099173554, + "grad_norm": 3.75, + "learning_rate": 0.00017823172406269215, + "loss": 4.5789, + "step": 125100 + }, + { + "epoch": 1.0773932506887052, + "grad_norm": 2.96875, + "learning_rate": 0.00017821484609840775, + "loss": 4.5315, + "step": 125150 + }, + { + "epoch": 1.0778236914600552, + "grad_norm": 1.65625, + "learning_rate": 0.0001781979623933219, + "loss": 4.6303, + "step": 125200 + }, + { + "epoch": 1.078254132231405, + "grad_norm": 2.34375, + "learning_rate": 0.0001781810729486739, + "loss": 4.2937, + "step": 125250 + }, + { + "epoch": 1.0786845730027548, + "grad_norm": 2.921875, + "learning_rate": 0.00017816417776570336, + "loss": 4.7388, + "step": 125300 + }, + { + "epoch": 1.0791150137741048, + "grad_norm": 2.1875, + "learning_rate": 0.0001781472768456503, + "loss": 4.4114, + "step": 125350 + }, + { + "epoch": 1.0795454545454546, + "grad_norm": 2.9375, + "learning_rate": 0.0001781303701897553, + "loss": 4.5467, + "step": 125400 + }, + { + "epoch": 1.0799758953168044, + "grad_norm": 3.453125, + "learning_rate": 0.0001781134577992592, + "loss": 4.4847, + "step": 125450 + }, + { + "epoch": 1.0804063360881542, + "grad_norm": 3.875, + "learning_rate": 0.0001780965396754034, + "loss": 4.4528, + "step": 125500 + }, + { + "epoch": 1.0808367768595042, + "grad_norm": 2.875, + "learning_rate": 0.0001780796158194296, + "loss": 4.5023, + "step": 125550 + }, + { + "epoch": 1.081267217630854, + "grad_norm": 2.15625, + "learning_rate": 0.00017806268623257997, + "loss": 4.565, + "step": 125600 + }, + { + "epoch": 1.0816976584022038, + "grad_norm": 3.609375, + "learning_rate": 0.00017804575091609715, + "loss": 4.2734, + "step": 125650 + }, + { + "epoch": 1.0821280991735538, + "grad_norm": 2.1875, + "learning_rate": 0.00017802880987122412, + "loss": 4.6331, + "step": 125700 + }, + { + "epoch": 1.0825585399449036, + "grad_norm": 2.015625, + "learning_rate": 0.00017801186309920436, + "loss": 4.8683, + "step": 125750 + }, + { + "epoch": 1.0829889807162534, + "grad_norm": 3.171875, + "learning_rate": 0.00017799491060128167, + "loss": 4.5946, + "step": 125800 + }, + { + "epoch": 1.0834194214876034, + "grad_norm": 2.390625, + "learning_rate": 0.00017797795237870035, + "loss": 4.7209, + "step": 125850 + }, + { + "epoch": 1.0838498622589532, + "grad_norm": 1.8125, + "learning_rate": 0.0001779609884327051, + "loss": 5.0445, + "step": 125900 + }, + { + "epoch": 1.084280303030303, + "grad_norm": 2.671875, + "learning_rate": 0.00017794401876454103, + "loss": 4.6825, + "step": 125950 + }, + { + "epoch": 1.084710743801653, + "grad_norm": 1.9609375, + "learning_rate": 0.0001779270433754537, + "loss": 4.7229, + "step": 126000 + }, + { + "epoch": 1.084710743801653, + "eval_loss": 5.262632846832275, + "eval_runtime": 21.9137, + "eval_samples_per_second": 29.205, + "eval_steps_per_second": 14.603, + "eval_tts_loss": 6.942831247074225, + "step": 126000 + }, + { + "epoch": 1.0851411845730028, + "grad_norm": 2.34375, + "learning_rate": 0.00017791006226668902, + "loss": 4.9246, + "step": 126050 + }, + { + "epoch": 1.0855716253443526, + "grad_norm": 2.625, + "learning_rate": 0.00017789307543949345, + "loss": 4.3588, + "step": 126100 + }, + { + "epoch": 1.0860020661157024, + "grad_norm": 2.90625, + "learning_rate": 0.0001778760828951137, + "loss": 4.8105, + "step": 126150 + }, + { + "epoch": 1.0864325068870524, + "grad_norm": 3.65625, + "learning_rate": 0.00017785908463479702, + "loss": 4.7491, + "step": 126200 + }, + { + "epoch": 1.0868629476584022, + "grad_norm": 2.84375, + "learning_rate": 0.00017784208065979104, + "loss": 4.7102, + "step": 126250 + }, + { + "epoch": 1.087293388429752, + "grad_norm": 2.859375, + "learning_rate": 0.00017782507097134382, + "loss": 4.8335, + "step": 126300 + }, + { + "epoch": 1.087723829201102, + "grad_norm": 6.40625, + "learning_rate": 0.00017780805557070388, + "loss": 5.1412, + "step": 126350 + }, + { + "epoch": 1.0881542699724518, + "grad_norm": 2.75, + "learning_rate": 0.00017779103445912003, + "loss": 4.5067, + "step": 126400 + }, + { + "epoch": 1.0885847107438016, + "grad_norm": 3.734375, + "learning_rate": 0.00017777400763784162, + "loss": 4.3604, + "step": 126450 + }, + { + "epoch": 1.0890151515151516, + "grad_norm": 2.5625, + "learning_rate": 0.0001777569751081184, + "loss": 4.4491, + "step": 126500 + }, + { + "epoch": 1.0894455922865014, + "grad_norm": 2.625, + "learning_rate": 0.0001777399368712005, + "loss": 4.5945, + "step": 126550 + }, + { + "epoch": 1.0898760330578512, + "grad_norm": 1.75, + "learning_rate": 0.0001777228929283385, + "loss": 4.5258, + "step": 126600 + }, + { + "epoch": 1.0903064738292012, + "grad_norm": 2.6875, + "learning_rate": 0.00017770584328078337, + "loss": 4.9029, + "step": 126650 + }, + { + "epoch": 1.090736914600551, + "grad_norm": 1.9921875, + "learning_rate": 0.00017768878792978653, + "loss": 5.0176, + "step": 126700 + }, + { + "epoch": 1.0911673553719008, + "grad_norm": 4.6875, + "learning_rate": 0.0001776717268765998, + "loss": 4.3295, + "step": 126750 + }, + { + "epoch": 1.0915977961432506, + "grad_norm": 2.109375, + "learning_rate": 0.00017765466012247547, + "loss": 4.6499, + "step": 126800 + }, + { + "epoch": 1.0920282369146006, + "grad_norm": 1.875, + "learning_rate": 0.00017763758766866614, + "loss": 4.8432, + "step": 126850 + }, + { + "epoch": 1.0924586776859504, + "grad_norm": 2.625, + "learning_rate": 0.00017762050951642495, + "loss": 4.4676, + "step": 126900 + }, + { + "epoch": 1.0928891184573002, + "grad_norm": 2.25, + "learning_rate": 0.0001776034256670053, + "loss": 4.7084, + "step": 126950 + }, + { + "epoch": 1.0933195592286502, + "grad_norm": 2.640625, + "learning_rate": 0.00017758633612166124, + "loss": 4.8007, + "step": 127000 + }, + { + "epoch": 1.09375, + "grad_norm": 2.0625, + "learning_rate": 0.00017756924088164702, + "loss": 4.1653, + "step": 127050 + }, + { + "epoch": 1.0941804407713498, + "grad_norm": 1.8984375, + "learning_rate": 0.0001775521399482174, + "loss": 4.2092, + "step": 127100 + }, + { + "epoch": 1.0946108815426998, + "grad_norm": 1.9375, + "learning_rate": 0.00017753503332262755, + "loss": 4.2951, + "step": 127150 + }, + { + "epoch": 1.0950413223140496, + "grad_norm": 2.578125, + "learning_rate": 0.00017751792100613312, + "loss": 4.758, + "step": 127200 + }, + { + "epoch": 1.0954717630853994, + "grad_norm": 3.5, + "learning_rate": 0.00017750080299999005, + "loss": 4.7231, + "step": 127250 + }, + { + "epoch": 1.0959022038567494, + "grad_norm": 1.796875, + "learning_rate": 0.0001774836793054548, + "loss": 4.3676, + "step": 127300 + }, + { + "epoch": 1.0963326446280992, + "grad_norm": 1.4765625, + "learning_rate": 0.00017746654992378415, + "loss": 4.0514, + "step": 127350 + }, + { + "epoch": 1.096763085399449, + "grad_norm": 2.453125, + "learning_rate": 0.00017744941485623548, + "loss": 4.7052, + "step": 127400 + }, + { + "epoch": 1.0971935261707988, + "grad_norm": 6.28125, + "learning_rate": 0.00017743227410406634, + "loss": 4.974, + "step": 127450 + }, + { + "epoch": 1.0976239669421488, + "grad_norm": 2.109375, + "learning_rate": 0.00017741512766853492, + "loss": 4.4805, + "step": 127500 + }, + { + "epoch": 1.0980544077134986, + "grad_norm": 1.8359375, + "learning_rate": 0.00017739797555089965, + "loss": 4.9846, + "step": 127550 + }, + { + "epoch": 1.0984848484848484, + "grad_norm": 0.93359375, + "learning_rate": 0.00017738081775241953, + "loss": 4.2951, + "step": 127600 + }, + { + "epoch": 1.0989152892561984, + "grad_norm": 2.515625, + "learning_rate": 0.0001773636542743539, + "loss": 5.0558, + "step": 127650 + }, + { + "epoch": 1.0993457300275482, + "grad_norm": 3.390625, + "learning_rate": 0.00017734648511796242, + "loss": 4.5819, + "step": 127700 + }, + { + "epoch": 1.099776170798898, + "grad_norm": 3.8125, + "learning_rate": 0.00017732931028450541, + "loss": 4.9886, + "step": 127750 + }, + { + "epoch": 1.100206611570248, + "grad_norm": 2.65625, + "learning_rate": 0.00017731212977524342, + "loss": 4.7401, + "step": 127800 + }, + { + "epoch": 1.1006370523415978, + "grad_norm": 3.28125, + "learning_rate": 0.00017729494359143742, + "loss": 4.6309, + "step": 127850 + }, + { + "epoch": 1.1010674931129476, + "grad_norm": 3.765625, + "learning_rate": 0.00017727775173434883, + "loss": 4.3819, + "step": 127900 + }, + { + "epoch": 1.1014979338842976, + "grad_norm": 2.34375, + "learning_rate": 0.00017726055420523958, + "loss": 4.9363, + "step": 127950 + }, + { + "epoch": 1.1019283746556474, + "grad_norm": 2.390625, + "learning_rate": 0.00017724335100537187, + "loss": 4.8072, + "step": 128000 + }, + { + "epoch": 1.1023588154269972, + "grad_norm": 3.890625, + "learning_rate": 0.00017722614213600837, + "loss": 4.7794, + "step": 128050 + }, + { + "epoch": 1.102789256198347, + "grad_norm": 2.15625, + "learning_rate": 0.0001772089275984122, + "loss": 4.6653, + "step": 128100 + }, + { + "epoch": 1.103219696969697, + "grad_norm": 2.234375, + "learning_rate": 0.00017719170739384686, + "loss": 4.5847, + "step": 128150 + }, + { + "epoch": 1.1036501377410468, + "grad_norm": 2.96875, + "learning_rate": 0.0001771744815235763, + "loss": 4.84, + "step": 128200 + }, + { + "epoch": 1.1040805785123966, + "grad_norm": 2.84375, + "learning_rate": 0.00017715724998886484, + "loss": 4.6217, + "step": 128250 + }, + { + "epoch": 1.1045110192837466, + "grad_norm": 3.109375, + "learning_rate": 0.00017714001279097724, + "loss": 4.6454, + "step": 128300 + }, + { + "epoch": 1.1049414600550964, + "grad_norm": 2.671875, + "learning_rate": 0.00017712276993117864, + "loss": 4.3933, + "step": 128350 + }, + { + "epoch": 1.1053719008264462, + "grad_norm": 2.515625, + "learning_rate": 0.00017710552141073468, + "loss": 4.8818, + "step": 128400 + }, + { + "epoch": 1.1058023415977962, + "grad_norm": 2.625, + "learning_rate": 0.00017708826723091136, + "loss": 4.6885, + "step": 128450 + }, + { + "epoch": 1.106232782369146, + "grad_norm": 3.140625, + "learning_rate": 0.00017707100739297507, + "loss": 4.5041, + "step": 128500 + }, + { + "epoch": 1.1066632231404958, + "grad_norm": 2.203125, + "learning_rate": 0.00017705374189819266, + "loss": 4.8692, + "step": 128550 + }, + { + "epoch": 1.1070936639118458, + "grad_norm": 1.59375, + "learning_rate": 0.00017703647074783139, + "loss": 4.4058, + "step": 128600 + }, + { + "epoch": 1.1075241046831956, + "grad_norm": 2.53125, + "learning_rate": 0.00017701919394315892, + "loss": 5.008, + "step": 128650 + }, + { + "epoch": 1.1079545454545454, + "grad_norm": 1.3515625, + "learning_rate": 0.00017700191148544328, + "loss": 4.3127, + "step": 128700 + }, + { + "epoch": 1.1083849862258952, + "grad_norm": 2.75, + "learning_rate": 0.00017698462337595308, + "loss": 4.3472, + "step": 128750 + }, + { + "epoch": 1.1088154269972452, + "grad_norm": 3.40625, + "learning_rate": 0.0001769673296159571, + "loss": 4.6588, + "step": 128800 + }, + { + "epoch": 1.109245867768595, + "grad_norm": 5.53125, + "learning_rate": 0.00017695003020672475, + "loss": 4.8604, + "step": 128850 + }, + { + "epoch": 1.1096763085399448, + "grad_norm": 2.5, + "learning_rate": 0.00017693272514952575, + "loss": 4.7601, + "step": 128900 + }, + { + "epoch": 1.1101067493112948, + "grad_norm": 3.859375, + "learning_rate": 0.00017691541444563025, + "loss": 4.7327, + "step": 128950 + }, + { + "epoch": 1.1105371900826446, + "grad_norm": 1.9296875, + "learning_rate": 0.0001768980980963088, + "loss": 4.8305, + "step": 129000 + }, + { + "epoch": 1.1105371900826446, + "eval_loss": 5.248120307922363, + "eval_runtime": 21.7745, + "eval_samples_per_second": 29.392, + "eval_steps_per_second": 14.696, + "eval_tts_loss": 6.979110696898764, + "step": 129000 + }, + { + "epoch": 1.1109676308539944, + "grad_norm": 4.21875, + "learning_rate": 0.00017688077610283242, + "loss": 4.9047, + "step": 129050 + }, + { + "epoch": 1.1113980716253444, + "grad_norm": 2.28125, + "learning_rate": 0.00017686344846647249, + "loss": 4.9494, + "step": 129100 + }, + { + "epoch": 1.1118285123966942, + "grad_norm": 3.375, + "learning_rate": 0.0001768461151885008, + "loss": 4.5499, + "step": 129150 + }, + { + "epoch": 1.112258953168044, + "grad_norm": 1.8125, + "learning_rate": 0.0001768287762701896, + "loss": 4.6235, + "step": 129200 + }, + { + "epoch": 1.112689393939394, + "grad_norm": 3.265625, + "learning_rate": 0.00017681143171281152, + "loss": 4.6963, + "step": 129250 + }, + { + "epoch": 1.1131198347107438, + "grad_norm": 1.8203125, + "learning_rate": 0.00017679408151763962, + "loss": 4.2662, + "step": 129300 + }, + { + "epoch": 1.1135502754820936, + "grad_norm": 2.6875, + "learning_rate": 0.00017677672568594739, + "loss": 4.3946, + "step": 129350 + }, + { + "epoch": 1.1139807162534434, + "grad_norm": 1.8359375, + "learning_rate": 0.00017675936421900862, + "loss": 4.6193, + "step": 129400 + }, + { + "epoch": 1.1144111570247934, + "grad_norm": 1.546875, + "learning_rate": 0.00017674199711809773, + "loss": 4.847, + "step": 129450 + }, + { + "epoch": 1.1148415977961432, + "grad_norm": 2.984375, + "learning_rate": 0.00017672462438448933, + "loss": 4.8154, + "step": 129500 + }, + { + "epoch": 1.115272038567493, + "grad_norm": 2.71875, + "learning_rate": 0.00017670724601945862, + "loss": 4.6874, + "step": 129550 + }, + { + "epoch": 1.115702479338843, + "grad_norm": 2.671875, + "learning_rate": 0.00017668986202428106, + "loss": 4.685, + "step": 129600 + }, + { + "epoch": 1.1161329201101928, + "grad_norm": 2.828125, + "learning_rate": 0.00017667247240023263, + "loss": 4.1753, + "step": 129650 + }, + { + "epoch": 1.1165633608815426, + "grad_norm": 3.375, + "learning_rate": 0.0001766550771485897, + "loss": 4.4835, + "step": 129700 + }, + { + "epoch": 1.1169938016528926, + "grad_norm": 2.078125, + "learning_rate": 0.000176637676270629, + "loss": 4.5487, + "step": 129750 + }, + { + "epoch": 1.1174242424242424, + "grad_norm": 3.328125, + "learning_rate": 0.0001766202697676278, + "loss": 4.5791, + "step": 129800 + }, + { + "epoch": 1.1178546831955922, + "grad_norm": 2.34375, + "learning_rate": 0.00017660285764086364, + "loss": 4.5032, + "step": 129850 + }, + { + "epoch": 1.1182851239669422, + "grad_norm": 2.75, + "learning_rate": 0.00017658543989161453, + "loss": 4.3983, + "step": 129900 + }, + { + "epoch": 1.118715564738292, + "grad_norm": 3.140625, + "learning_rate": 0.00017656801652115892, + "loss": 4.5268, + "step": 129950 + }, + { + "epoch": 1.1191460055096418, + "grad_norm": 2.5, + "learning_rate": 0.0001765505875307756, + "loss": 4.8744, + "step": 130000 + }, + { + "epoch": 1.1195764462809916, + "grad_norm": 1.7109375, + "learning_rate": 0.00017653315292174394, + "loss": 4.5245, + "step": 130050 + }, + { + "epoch": 1.1200068870523416, + "grad_norm": 2.203125, + "learning_rate": 0.00017651571269534347, + "loss": 4.7077, + "step": 130100 + }, + { + "epoch": 1.1204373278236914, + "grad_norm": 2.796875, + "learning_rate": 0.0001764982668528543, + "loss": 4.8312, + "step": 130150 + }, + { + "epoch": 1.1208677685950412, + "grad_norm": 2.5625, + "learning_rate": 0.00017648081539555697, + "loss": 4.7503, + "step": 130200 + }, + { + "epoch": 1.1212982093663912, + "grad_norm": 1.53125, + "learning_rate": 0.00017646335832473232, + "loss": 4.768, + "step": 130250 + }, + { + "epoch": 1.121728650137741, + "grad_norm": 2.3125, + "learning_rate": 0.0001764458956416617, + "loss": 4.7932, + "step": 130300 + }, + { + "epoch": 1.1221590909090908, + "grad_norm": 3.8125, + "learning_rate": 0.00017642842734762678, + "loss": 4.5183, + "step": 130350 + }, + { + "epoch": 1.1225895316804408, + "grad_norm": 2.265625, + "learning_rate": 0.0001764109534439098, + "loss": 4.7769, + "step": 130400 + }, + { + "epoch": 1.1230199724517906, + "grad_norm": 0.349609375, + "learning_rate": 0.00017639347393179317, + "loss": 4.6036, + "step": 130450 + }, + { + "epoch": 1.1234504132231404, + "grad_norm": 2.828125, + "learning_rate": 0.00017637598881255995, + "loss": 4.1616, + "step": 130500 + }, + { + "epoch": 1.1238808539944904, + "grad_norm": 1.7421875, + "learning_rate": 0.0001763584980874935, + "loss": 4.3915, + "step": 130550 + }, + { + "epoch": 1.1243112947658402, + "grad_norm": 2.59375, + "learning_rate": 0.00017634100175787753, + "loss": 4.3013, + "step": 130600 + }, + { + "epoch": 1.12474173553719, + "grad_norm": 2.875, + "learning_rate": 0.00017632349982499632, + "loss": 4.6347, + "step": 130650 + }, + { + "epoch": 1.1251721763085398, + "grad_norm": 3.1875, + "learning_rate": 0.00017630599229013442, + "loss": 4.547, + "step": 130700 + }, + { + "epoch": 1.1256026170798898, + "grad_norm": 2.53125, + "learning_rate": 0.00017628847915457686, + "loss": 4.2865, + "step": 130750 + }, + { + "epoch": 1.1260330578512396, + "grad_norm": 1.7421875, + "learning_rate": 0.0001762709604196091, + "loss": 4.6399, + "step": 130800 + }, + { + "epoch": 1.1264634986225897, + "grad_norm": 1.109375, + "learning_rate": 0.0001762534360865169, + "loss": 4.5703, + "step": 130850 + }, + { + "epoch": 1.1268939393939394, + "grad_norm": 4.09375, + "learning_rate": 0.00017623590615658657, + "loss": 4.4677, + "step": 130900 + }, + { + "epoch": 1.1273243801652892, + "grad_norm": 4.625, + "learning_rate": 0.00017621837063110479, + "loss": 4.5387, + "step": 130950 + }, + { + "epoch": 1.127754820936639, + "grad_norm": 2.515625, + "learning_rate": 0.00017620082951135856, + "loss": 4.4496, + "step": 131000 + }, + { + "epoch": 1.128185261707989, + "grad_norm": 2.015625, + "learning_rate": 0.0001761832827986354, + "loss": 4.598, + "step": 131050 + }, + { + "epoch": 1.1286157024793388, + "grad_norm": 2.1875, + "learning_rate": 0.0001761657304942232, + "loss": 4.7248, + "step": 131100 + }, + { + "epoch": 1.1290461432506886, + "grad_norm": 2.40625, + "learning_rate": 0.00017614817259941026, + "loss": 4.7915, + "step": 131150 + }, + { + "epoch": 1.1294765840220387, + "grad_norm": 1.859375, + "learning_rate": 0.00017613060911548527, + "loss": 4.6542, + "step": 131200 + }, + { + "epoch": 1.1299070247933884, + "grad_norm": 2.84375, + "learning_rate": 0.00017611304004373735, + "loss": 4.7307, + "step": 131250 + }, + { + "epoch": 1.1303374655647382, + "grad_norm": 1.90625, + "learning_rate": 0.0001760954653854561, + "loss": 4.5429, + "step": 131300 + }, + { + "epoch": 1.130767906336088, + "grad_norm": 3.578125, + "learning_rate": 0.0001760778851419314, + "loss": 4.9145, + "step": 131350 + }, + { + "epoch": 1.131198347107438, + "grad_norm": 1.640625, + "learning_rate": 0.0001760602993144536, + "loss": 4.5879, + "step": 131400 + }, + { + "epoch": 1.1316287878787878, + "grad_norm": 1.8515625, + "learning_rate": 0.00017604270790431352, + "loss": 4.6708, + "step": 131450 + }, + { + "epoch": 1.1320592286501379, + "grad_norm": 2.4375, + "learning_rate": 0.00017602511091280225, + "loss": 4.6827, + "step": 131500 + }, + { + "epoch": 1.1324896694214877, + "grad_norm": 2.046875, + "learning_rate": 0.00017600750834121143, + "loss": 4.3763, + "step": 131550 + }, + { + "epoch": 1.1329201101928374, + "grad_norm": 3.84375, + "learning_rate": 0.00017598990019083305, + "loss": 4.5403, + "step": 131600 + }, + { + "epoch": 1.1333505509641872, + "grad_norm": 3.890625, + "learning_rate": 0.00017597228646295943, + "loss": 4.704, + "step": 131650 + }, + { + "epoch": 1.1337809917355373, + "grad_norm": 3.625, + "learning_rate": 0.00017595466715888348, + "loss": 4.7661, + "step": 131700 + }, + { + "epoch": 1.134211432506887, + "grad_norm": 2.25, + "learning_rate": 0.0001759370422798984, + "loss": 4.697, + "step": 131750 + }, + { + "epoch": 1.1346418732782368, + "grad_norm": 3.578125, + "learning_rate": 0.00017591941182729776, + "loss": 4.6169, + "step": 131800 + }, + { + "epoch": 1.1350723140495869, + "grad_norm": 1.40625, + "learning_rate": 0.0001759017758023757, + "loss": 4.504, + "step": 131850 + }, + { + "epoch": 1.1355027548209367, + "grad_norm": 2.28125, + "learning_rate": 0.00017588413420642653, + "loss": 4.6758, + "step": 131900 + }, + { + "epoch": 1.1359331955922864, + "grad_norm": 3.453125, + "learning_rate": 0.0001758664870407452, + "loss": 5.0743, + "step": 131950 + }, + { + "epoch": 1.1363636363636362, + "grad_norm": 2.75, + "learning_rate": 0.00017584883430662697, + "loss": 4.6312, + "step": 132000 + }, + { + "epoch": 1.1363636363636362, + "eval_loss": 5.238390922546387, + "eval_runtime": 21.7258, + "eval_samples_per_second": 29.458, + "eval_steps_per_second": 14.729, + "eval_tts_loss": 6.946057393402399, + "step": 132000 + }, + { + "epoch": 1.1367940771349863, + "grad_norm": 3.109375, + "learning_rate": 0.00017583117600536747, + "loss": 4.5312, + "step": 132050 + }, + { + "epoch": 1.137224517906336, + "grad_norm": 1.734375, + "learning_rate": 0.00017581351213826282, + "loss": 4.5015, + "step": 132100 + }, + { + "epoch": 1.137654958677686, + "grad_norm": 3.515625, + "learning_rate": 0.00017579584270660948, + "loss": 4.7982, + "step": 132150 + }, + { + "epoch": 1.1380853994490359, + "grad_norm": 2.0625, + "learning_rate": 0.0001757781677117044, + "loss": 4.7285, + "step": 132200 + }, + { + "epoch": 1.1385158402203857, + "grad_norm": 3.5625, + "learning_rate": 0.00017576048715484482, + "loss": 4.53, + "step": 132250 + }, + { + "epoch": 1.1389462809917354, + "grad_norm": 0.9375, + "learning_rate": 0.00017574280103732854, + "loss": 4.7411, + "step": 132300 + }, + { + "epoch": 1.1393767217630855, + "grad_norm": 2.671875, + "learning_rate": 0.00017572510936045358, + "loss": 4.7267, + "step": 132350 + }, + { + "epoch": 1.1398071625344353, + "grad_norm": 2.40625, + "learning_rate": 0.00017570741212551854, + "loss": 4.8826, + "step": 132400 + }, + { + "epoch": 1.140237603305785, + "grad_norm": 1.5390625, + "learning_rate": 0.0001756897093338223, + "loss": 4.0393, + "step": 132450 + }, + { + "epoch": 1.140668044077135, + "grad_norm": 2.484375, + "learning_rate": 0.00017567200098666426, + "loss": 4.3133, + "step": 132500 + }, + { + "epoch": 1.1410984848484849, + "grad_norm": 2.34375, + "learning_rate": 0.00017565428708534418, + "loss": 4.5982, + "step": 132550 + }, + { + "epoch": 1.1415289256198347, + "grad_norm": 2.46875, + "learning_rate": 0.00017563656763116222, + "loss": 4.4531, + "step": 132600 + }, + { + "epoch": 1.1419593663911844, + "grad_norm": 0.62890625, + "learning_rate": 0.00017561884262541887, + "loss": 4.7224, + "step": 132650 + }, + { + "epoch": 1.1423898071625345, + "grad_norm": 2.53125, + "learning_rate": 0.00017560111206941522, + "loss": 4.2587, + "step": 132700 + }, + { + "epoch": 1.1428202479338843, + "grad_norm": 2.78125, + "learning_rate": 0.0001755833759644526, + "loss": 4.9991, + "step": 132750 + }, + { + "epoch": 1.1432506887052343, + "grad_norm": 2.9375, + "learning_rate": 0.00017556563431183277, + "loss": 4.6288, + "step": 132800 + }, + { + "epoch": 1.143681129476584, + "grad_norm": 2.90625, + "learning_rate": 0.00017554788711285802, + "loss": 5.0109, + "step": 132850 + }, + { + "epoch": 1.1441115702479339, + "grad_norm": 4.9375, + "learning_rate": 0.00017553013436883086, + "loss": 4.6113, + "step": 132900 + }, + { + "epoch": 1.1445420110192837, + "grad_norm": 2.03125, + "learning_rate": 0.00017551237608105438, + "loss": 4.7877, + "step": 132950 + }, + { + "epoch": 1.1449724517906337, + "grad_norm": 1.8125, + "learning_rate": 0.0001754946122508319, + "loss": 4.4095, + "step": 133000 + }, + { + "epoch": 1.1454028925619835, + "grad_norm": 1.6953125, + "learning_rate": 0.00017547684287946738, + "loss": 4.3929, + "step": 133050 + }, + { + "epoch": 1.1458333333333333, + "grad_norm": 3.734375, + "learning_rate": 0.00017545906796826496, + "loss": 4.9235, + "step": 133100 + }, + { + "epoch": 1.1462637741046833, + "grad_norm": 1.5546875, + "learning_rate": 0.0001754412875185293, + "loss": 4.7911, + "step": 133150 + }, + { + "epoch": 1.146694214876033, + "grad_norm": 1.140625, + "learning_rate": 0.00017542350153156545, + "loss": 4.792, + "step": 133200 + }, + { + "epoch": 1.1471246556473829, + "grad_norm": 2.203125, + "learning_rate": 0.00017540571000867886, + "loss": 4.572, + "step": 133250 + }, + { + "epoch": 1.1475550964187329, + "grad_norm": 2.078125, + "learning_rate": 0.00017538791295117543, + "loss": 4.9435, + "step": 133300 + }, + { + "epoch": 1.1479855371900827, + "grad_norm": 2.890625, + "learning_rate": 0.00017537011036036136, + "loss": 4.7983, + "step": 133350 + }, + { + "epoch": 1.1484159779614325, + "grad_norm": 3.640625, + "learning_rate": 0.00017535230223754335, + "loss": 4.3793, + "step": 133400 + }, + { + "epoch": 1.1488464187327825, + "grad_norm": 2.28125, + "learning_rate": 0.0001753344885840285, + "loss": 4.278, + "step": 133450 + }, + { + "epoch": 1.1492768595041323, + "grad_norm": 1.390625, + "learning_rate": 0.00017531666940112425, + "loss": 4.6926, + "step": 133500 + }, + { + "epoch": 1.149707300275482, + "grad_norm": 2.578125, + "learning_rate": 0.00017529884469013853, + "loss": 4.5613, + "step": 133550 + }, + { + "epoch": 1.1501377410468319, + "grad_norm": 0.70703125, + "learning_rate": 0.00017528101445237965, + "loss": 4.2171, + "step": 133600 + }, + { + "epoch": 1.1505681818181819, + "grad_norm": 1.7421875, + "learning_rate": 0.00017526317868915624, + "loss": 4.646, + "step": 133650 + }, + { + "epoch": 1.1509986225895317, + "grad_norm": 2.78125, + "learning_rate": 0.00017524533740177742, + "loss": 4.8612, + "step": 133700 + }, + { + "epoch": 1.1514290633608815, + "grad_norm": 3.265625, + "learning_rate": 0.00017522749059155278, + "loss": 4.2613, + "step": 133750 + }, + { + "epoch": 1.1518595041322315, + "grad_norm": 2.3125, + "learning_rate": 0.00017520963825979217, + "loss": 4.434, + "step": 133800 + }, + { + "epoch": 1.1522899449035813, + "grad_norm": 4.21875, + "learning_rate": 0.00017519178040780592, + "loss": 4.3308, + "step": 133850 + }, + { + "epoch": 1.152720385674931, + "grad_norm": 2.375, + "learning_rate": 0.00017517391703690478, + "loss": 4.4181, + "step": 133900 + }, + { + "epoch": 1.153150826446281, + "grad_norm": 2.28125, + "learning_rate": 0.00017515604814839986, + "loss": 4.7665, + "step": 133950 + }, + { + "epoch": 1.1535812672176309, + "grad_norm": 2.96875, + "learning_rate": 0.00017513817374360273, + "loss": 4.6749, + "step": 134000 + }, + { + "epoch": 1.1540117079889807, + "grad_norm": 3.8125, + "learning_rate": 0.00017512029382382531, + "loss": 4.1754, + "step": 134050 + }, + { + "epoch": 1.1544421487603307, + "grad_norm": 3.546875, + "learning_rate": 0.00017510240839037992, + "loss": 4.4877, + "step": 134100 + }, + { + "epoch": 1.1548725895316805, + "grad_norm": 4.46875, + "learning_rate": 0.00017508451744457934, + "loss": 4.5622, + "step": 134150 + }, + { + "epoch": 1.1553030303030303, + "grad_norm": 3.671875, + "learning_rate": 0.00017506662098773677, + "loss": 4.7159, + "step": 134200 + }, + { + "epoch": 1.15573347107438, + "grad_norm": 1.921875, + "learning_rate": 0.0001750487190211657, + "loss": 4.1376, + "step": 134250 + }, + { + "epoch": 1.15616391184573, + "grad_norm": 3.03125, + "learning_rate": 0.00017503081154618012, + "loss": 4.8649, + "step": 134300 + }, + { + "epoch": 1.1565943526170799, + "grad_norm": 3.234375, + "learning_rate": 0.0001750128985640944, + "loss": 4.6983, + "step": 134350 + }, + { + "epoch": 1.1570247933884297, + "grad_norm": 3.109375, + "learning_rate": 0.00017499498007622337, + "loss": 4.9357, + "step": 134400 + }, + { + "epoch": 1.1574552341597797, + "grad_norm": 3.265625, + "learning_rate": 0.0001749770560838821, + "loss": 4.5523, + "step": 134450 + }, + { + "epoch": 1.1578856749311295, + "grad_norm": 3.890625, + "learning_rate": 0.00017495912658838626, + "loss": 4.8399, + "step": 134500 + }, + { + "epoch": 1.1583161157024793, + "grad_norm": 3.875, + "learning_rate": 0.0001749411915910518, + "loss": 4.736, + "step": 134550 + }, + { + "epoch": 1.1587465564738293, + "grad_norm": 2.828125, + "learning_rate": 0.00017492325109319512, + "loss": 4.4008, + "step": 134600 + }, + { + "epoch": 1.159176997245179, + "grad_norm": 0.9921875, + "learning_rate": 0.00017490530509613303, + "loss": 4.2166, + "step": 134650 + }, + { + "epoch": 1.1596074380165289, + "grad_norm": 1.625, + "learning_rate": 0.00017488735360118266, + "loss": 5.058, + "step": 134700 + }, + { + "epoch": 1.160037878787879, + "grad_norm": 4.1875, + "learning_rate": 0.00017486939660966172, + "loss": 4.625, + "step": 134750 + }, + { + "epoch": 1.1604683195592287, + "grad_norm": 1.140625, + "learning_rate": 0.00017485143412288812, + "loss": 4.2593, + "step": 134800 + }, + { + "epoch": 1.1608987603305785, + "grad_norm": 2.265625, + "learning_rate": 0.0001748334661421803, + "loss": 4.8134, + "step": 134850 + }, + { + "epoch": 1.1613292011019283, + "grad_norm": 2.703125, + "learning_rate": 0.00017481549266885709, + "loss": 4.8117, + "step": 134900 + }, + { + "epoch": 1.1617596418732783, + "grad_norm": 1.296875, + "learning_rate": 0.00017479751370423768, + "loss": 4.5121, + "step": 134950 + }, + { + "epoch": 1.162190082644628, + "grad_norm": 2.171875, + "learning_rate": 0.00017477952924964169, + "loss": 4.7107, + "step": 135000 + }, + { + "epoch": 1.162190082644628, + "eval_loss": 5.229957580566406, + "eval_runtime": 21.9034, + "eval_samples_per_second": 29.219, + "eval_steps_per_second": 14.61, + "eval_tts_loss": 7.000764441175345, + "step": 135000 + }, + { + "epoch": 1.1626205234159779, + "grad_norm": 1.203125, + "learning_rate": 0.00017476153930638915, + "loss": 4.3452, + "step": 135050 + }, + { + "epoch": 1.163050964187328, + "grad_norm": 3.359375, + "learning_rate": 0.00017474354387580045, + "loss": 4.7001, + "step": 135100 + }, + { + "epoch": 1.1634814049586777, + "grad_norm": 5.71875, + "learning_rate": 0.0001747255429591965, + "loss": 4.404, + "step": 135150 + }, + { + "epoch": 1.1639118457300275, + "grad_norm": 3.34375, + "learning_rate": 0.00017470753655789842, + "loss": 4.6584, + "step": 135200 + }, + { + "epoch": 1.1643422865013775, + "grad_norm": 2.265625, + "learning_rate": 0.00017468952467322794, + "loss": 4.3545, + "step": 135250 + }, + { + "epoch": 1.1647727272727273, + "grad_norm": 2.859375, + "learning_rate": 0.00017467150730650702, + "loss": 4.4773, + "step": 135300 + }, + { + "epoch": 1.165203168044077, + "grad_norm": 3.3125, + "learning_rate": 0.0001746534844590581, + "loss": 4.7949, + "step": 135350 + }, + { + "epoch": 1.165633608815427, + "grad_norm": 2.34375, + "learning_rate": 0.00017463545613220408, + "loss": 4.1822, + "step": 135400 + }, + { + "epoch": 1.166064049586777, + "grad_norm": 3.8125, + "learning_rate": 0.00017461742232726817, + "loss": 4.853, + "step": 135450 + }, + { + "epoch": 1.1664944903581267, + "grad_norm": 2.375, + "learning_rate": 0.00017459938304557396, + "loss": 4.443, + "step": 135500 + }, + { + "epoch": 1.1669249311294765, + "grad_norm": 3.5, + "learning_rate": 0.00017458133828844558, + "loss": 4.301, + "step": 135550 + }, + { + "epoch": 1.1673553719008265, + "grad_norm": 2.8125, + "learning_rate": 0.00017456328805720746, + "loss": 4.9344, + "step": 135600 + }, + { + "epoch": 1.1677858126721763, + "grad_norm": 1.875, + "learning_rate": 0.00017454523235318437, + "loss": 4.4283, + "step": 135650 + }, + { + "epoch": 1.168216253443526, + "grad_norm": 1.8203125, + "learning_rate": 0.00017452717117770163, + "loss": 4.5411, + "step": 135700 + }, + { + "epoch": 1.168646694214876, + "grad_norm": 3.40625, + "learning_rate": 0.00017450910453208483, + "loss": 4.6708, + "step": 135750 + }, + { + "epoch": 1.169077134986226, + "grad_norm": 2.234375, + "learning_rate": 0.00017449103241766017, + "loss": 5.1022, + "step": 135800 + }, + { + "epoch": 1.1695075757575757, + "grad_norm": 3.0625, + "learning_rate": 0.00017447295483575393, + "loss": 4.0141, + "step": 135850 + }, + { + "epoch": 1.1699380165289257, + "grad_norm": 2.171875, + "learning_rate": 0.00017445487178769305, + "loss": 4.846, + "step": 135900 + }, + { + "epoch": 1.1703684573002755, + "grad_norm": 3.671875, + "learning_rate": 0.00017443678327480478, + "loss": 4.5868, + "step": 135950 + }, + { + "epoch": 1.1707988980716253, + "grad_norm": 1.2890625, + "learning_rate": 0.00017441868929841676, + "loss": 4.2212, + "step": 136000 + }, + { + "epoch": 1.1712293388429753, + "grad_norm": 3.234375, + "learning_rate": 0.00017440058985985707, + "loss": 5.0071, + "step": 136050 + }, + { + "epoch": 1.171659779614325, + "grad_norm": 3.203125, + "learning_rate": 0.00017438248496045414, + "loss": 4.5556, + "step": 136100 + }, + { + "epoch": 1.172090220385675, + "grad_norm": 1.25, + "learning_rate": 0.0001743643746015369, + "loss": 4.8886, + "step": 136150 + }, + { + "epoch": 1.1725206611570247, + "grad_norm": 3.0, + "learning_rate": 0.00017434625878443453, + "loss": 4.6341, + "step": 136200 + }, + { + "epoch": 1.1729511019283747, + "grad_norm": 5.65625, + "learning_rate": 0.00017432813751047674, + "loss": 4.278, + "step": 136250 + }, + { + "epoch": 1.1733815426997245, + "grad_norm": 3.59375, + "learning_rate": 0.0001743100107809936, + "loss": 4.5555, + "step": 136300 + }, + { + "epoch": 1.1738119834710743, + "grad_norm": 5.15625, + "learning_rate": 0.0001742918785973155, + "loss": 4.6431, + "step": 136350 + }, + { + "epoch": 1.1742424242424243, + "grad_norm": 4.34375, + "learning_rate": 0.00017427374096077336, + "loss": 4.5562, + "step": 136400 + }, + { + "epoch": 1.174672865013774, + "grad_norm": 1.1796875, + "learning_rate": 0.00017425559787269848, + "loss": 4.4798, + "step": 136450 + }, + { + "epoch": 1.175103305785124, + "grad_norm": 3.8125, + "learning_rate": 0.00017423744933442242, + "loss": 4.4478, + "step": 136500 + }, + { + "epoch": 1.175533746556474, + "grad_norm": 2.828125, + "learning_rate": 0.00017421929534727735, + "loss": 4.6886, + "step": 136550 + }, + { + "epoch": 1.1759641873278237, + "grad_norm": 1.9609375, + "learning_rate": 0.00017420113591259568, + "loss": 4.5264, + "step": 136600 + }, + { + "epoch": 1.1763946280991735, + "grad_norm": 1.8046875, + "learning_rate": 0.00017418297103171027, + "loss": 4.8383, + "step": 136650 + }, + { + "epoch": 1.1768250688705235, + "grad_norm": 3.171875, + "learning_rate": 0.00017416480070595443, + "loss": 4.7409, + "step": 136700 + }, + { + "epoch": 1.1772555096418733, + "grad_norm": 1.8359375, + "learning_rate": 0.00017414662493666174, + "loss": 4.5214, + "step": 136750 + }, + { + "epoch": 1.177685950413223, + "grad_norm": 3.5625, + "learning_rate": 0.00017412844372516632, + "loss": 4.6205, + "step": 136800 + }, + { + "epoch": 1.178116391184573, + "grad_norm": 3.109375, + "learning_rate": 0.00017411025707280263, + "loss": 4.6916, + "step": 136850 + }, + { + "epoch": 1.178546831955923, + "grad_norm": 2.609375, + "learning_rate": 0.00017409206498090554, + "loss": 4.3895, + "step": 136900 + }, + { + "epoch": 1.1789772727272727, + "grad_norm": 2.203125, + "learning_rate": 0.00017407386745081028, + "loss": 4.3466, + "step": 136950 + }, + { + "epoch": 1.1794077134986225, + "grad_norm": 2.703125, + "learning_rate": 0.00017405566448385254, + "loss": 4.6823, + "step": 137000 + }, + { + "epoch": 1.1798381542699725, + "grad_norm": 4.28125, + "learning_rate": 0.00017403745608136835, + "loss": 4.1412, + "step": 137050 + }, + { + "epoch": 1.1802685950413223, + "grad_norm": 2.328125, + "learning_rate": 0.00017401924224469414, + "loss": 4.5917, + "step": 137100 + }, + { + "epoch": 1.180699035812672, + "grad_norm": 2.46875, + "learning_rate": 0.00017400102297516687, + "loss": 4.5987, + "step": 137150 + }, + { + "epoch": 1.1811294765840221, + "grad_norm": 3.546875, + "learning_rate": 0.00017398279827412369, + "loss": 4.1835, + "step": 137200 + }, + { + "epoch": 1.181559917355372, + "grad_norm": 2.34375, + "learning_rate": 0.00017396456814290233, + "loss": 4.5742, + "step": 137250 + }, + { + "epoch": 1.1819903581267217, + "grad_norm": 3.671875, + "learning_rate": 0.00017394633258284076, + "loss": 4.5937, + "step": 137300 + }, + { + "epoch": 1.1824207988980717, + "grad_norm": 3.890625, + "learning_rate": 0.0001739280915952775, + "loss": 4.6926, + "step": 137350 + }, + { + "epoch": 1.1828512396694215, + "grad_norm": 3.46875, + "learning_rate": 0.00017390984518155138, + "loss": 4.3917, + "step": 137400 + }, + { + "epoch": 1.1832816804407713, + "grad_norm": 1.296875, + "learning_rate": 0.00017389159334300165, + "loss": 4.4966, + "step": 137450 + }, + { + "epoch": 1.183712121212121, + "grad_norm": 2.9375, + "learning_rate": 0.00017387333608096794, + "loss": 4.4538, + "step": 137500 + }, + { + "epoch": 1.1841425619834711, + "grad_norm": 4.09375, + "learning_rate": 0.0001738550733967903, + "loss": 4.8217, + "step": 137550 + }, + { + "epoch": 1.184573002754821, + "grad_norm": 1.5234375, + "learning_rate": 0.00017383680529180922, + "loss": 4.5733, + "step": 137600 + }, + { + "epoch": 1.1850034435261707, + "grad_norm": 2.359375, + "learning_rate": 0.00017381853176736545, + "loss": 4.3104, + "step": 137650 + }, + { + "epoch": 1.1854338842975207, + "grad_norm": 1.6953125, + "learning_rate": 0.0001738002528248003, + "loss": 4.766, + "step": 137700 + }, + { + "epoch": 1.1858643250688705, + "grad_norm": 2.59375, + "learning_rate": 0.00017378196846545534, + "loss": 4.4348, + "step": 137750 + }, + { + "epoch": 1.1862947658402203, + "grad_norm": 2.328125, + "learning_rate": 0.00017376367869067268, + "loss": 4.7157, + "step": 137800 + }, + { + "epoch": 1.1867252066115703, + "grad_norm": 2.65625, + "learning_rate": 0.0001737453835017947, + "loss": 4.4045, + "step": 137850 + }, + { + "epoch": 1.1871556473829201, + "grad_norm": 3.15625, + "learning_rate": 0.00017372708290016423, + "loss": 4.7365, + "step": 137900 + }, + { + "epoch": 1.18758608815427, + "grad_norm": 1.4140625, + "learning_rate": 0.00017370877688712447, + "loss": 4.3317, + "step": 137950 + }, + { + "epoch": 1.18801652892562, + "grad_norm": 3.125, + "learning_rate": 0.0001736904654640191, + "loss": 4.591, + "step": 138000 + }, + { + "epoch": 1.18801652892562, + "eval_loss": 5.221219062805176, + "eval_runtime": 21.6774, + "eval_samples_per_second": 29.524, + "eval_steps_per_second": 14.762, + "eval_tts_loss": 7.074069197268985, + "step": 138000 + }, + { + "epoch": 1.1884469696969697, + "grad_norm": 1.2421875, + "learning_rate": 0.00017367214863219215, + "loss": 4.481, + "step": 138050 + }, + { + "epoch": 1.1888774104683195, + "grad_norm": 2.453125, + "learning_rate": 0.00017365382639298796, + "loss": 4.6663, + "step": 138100 + }, + { + "epoch": 1.1893078512396693, + "grad_norm": 1.1875, + "learning_rate": 0.0001736354987477514, + "loss": 4.6344, + "step": 138150 + }, + { + "epoch": 1.1897382920110193, + "grad_norm": 2.640625, + "learning_rate": 0.00017361716569782765, + "loss": 4.5625, + "step": 138200 + }, + { + "epoch": 1.1901687327823691, + "grad_norm": 2.84375, + "learning_rate": 0.00017359882724456233, + "loss": 4.4223, + "step": 138250 + }, + { + "epoch": 1.190599173553719, + "grad_norm": 2.171875, + "learning_rate": 0.00017358048338930146, + "loss": 4.7565, + "step": 138300 + }, + { + "epoch": 1.191029614325069, + "grad_norm": 2.71875, + "learning_rate": 0.00017356213413339142, + "loss": 4.3504, + "step": 138350 + }, + { + "epoch": 1.1914600550964187, + "grad_norm": 2.015625, + "learning_rate": 0.00017354377947817899, + "loss": 4.7485, + "step": 138400 + }, + { + "epoch": 1.1918904958677685, + "grad_norm": 2.25, + "learning_rate": 0.0001735254194250114, + "loss": 4.3816, + "step": 138450 + }, + { + "epoch": 1.1923209366391185, + "grad_norm": 3.28125, + "learning_rate": 0.00017350705397523622, + "loss": 4.7629, + "step": 138500 + }, + { + "epoch": 1.1927513774104683, + "grad_norm": 2.0625, + "learning_rate": 0.0001734886831302014, + "loss": 4.496, + "step": 138550 + }, + { + "epoch": 1.1931818181818181, + "grad_norm": 4.625, + "learning_rate": 0.0001734703068912554, + "loss": 4.7535, + "step": 138600 + }, + { + "epoch": 1.1936122589531681, + "grad_norm": 1.7265625, + "learning_rate": 0.0001734519252597469, + "loss": 4.6305, + "step": 138650 + }, + { + "epoch": 1.194042699724518, + "grad_norm": 2.0625, + "learning_rate": 0.00017343353823702514, + "loss": 4.7616, + "step": 138700 + }, + { + "epoch": 1.1944731404958677, + "grad_norm": 2.828125, + "learning_rate": 0.0001734151458244397, + "loss": 4.5813, + "step": 138750 + }, + { + "epoch": 1.1949035812672175, + "grad_norm": 2.046875, + "learning_rate": 0.00017339674802334048, + "loss": 4.5628, + "step": 138800 + }, + { + "epoch": 1.1953340220385675, + "grad_norm": 2.59375, + "learning_rate": 0.00017337834483507786, + "loss": 4.5174, + "step": 138850 + }, + { + "epoch": 1.1957644628099173, + "grad_norm": 2.03125, + "learning_rate": 0.00017335993626100264, + "loss": 4.704, + "step": 138900 + }, + { + "epoch": 1.1961949035812671, + "grad_norm": 3.28125, + "learning_rate": 0.00017334152230246593, + "loss": 4.5928, + "step": 138950 + }, + { + "epoch": 1.1966253443526171, + "grad_norm": 2.46875, + "learning_rate": 0.00017332310296081928, + "loss": 4.8787, + "step": 139000 + }, + { + "epoch": 1.197055785123967, + "grad_norm": 2.5625, + "learning_rate": 0.00017330467823741463, + "loss": 4.4581, + "step": 139050 + }, + { + "epoch": 1.1974862258953167, + "grad_norm": 3.234375, + "learning_rate": 0.0001732862481336043, + "loss": 4.8128, + "step": 139100 + }, + { + "epoch": 1.1979166666666667, + "grad_norm": 3.296875, + "learning_rate": 0.00017326781265074103, + "loss": 4.6923, + "step": 139150 + }, + { + "epoch": 1.1983471074380165, + "grad_norm": 2.671875, + "learning_rate": 0.00017324937179017798, + "loss": 4.732, + "step": 139200 + }, + { + "epoch": 1.1987775482093663, + "grad_norm": 1.6484375, + "learning_rate": 0.00017323092555326865, + "loss": 4.7558, + "step": 139250 + }, + { + "epoch": 1.1992079889807163, + "grad_norm": 2.625, + "learning_rate": 0.00017321247394136693, + "loss": 4.7072, + "step": 139300 + }, + { + "epoch": 1.1996384297520661, + "grad_norm": 3.90625, + "learning_rate": 0.00017319401695582712, + "loss": 4.7033, + "step": 139350 + }, + { + "epoch": 1.200068870523416, + "grad_norm": 3.1875, + "learning_rate": 0.00017317555459800396, + "loss": 4.3473, + "step": 139400 + }, + { + "epoch": 1.2004993112947657, + "grad_norm": 4.25, + "learning_rate": 0.00017315708686925255, + "loss": 4.7289, + "step": 139450 + }, + { + "epoch": 1.2009297520661157, + "grad_norm": 1.1875, + "learning_rate": 0.00017313861377092838, + "loss": 4.594, + "step": 139500 + }, + { + "epoch": 1.2013601928374655, + "grad_norm": 3.078125, + "learning_rate": 0.0001731201353043873, + "loss": 4.549, + "step": 139550 + }, + { + "epoch": 1.2017906336088156, + "grad_norm": 3.140625, + "learning_rate": 0.00017310165147098558, + "loss": 4.599, + "step": 139600 + }, + { + "epoch": 1.2022210743801653, + "grad_norm": 4.15625, + "learning_rate": 0.00017308316227207998, + "loss": 4.7083, + "step": 139650 + }, + { + "epoch": 1.2026515151515151, + "grad_norm": 2.984375, + "learning_rate": 0.00017306466770902745, + "loss": 3.994, + "step": 139700 + }, + { + "epoch": 1.203081955922865, + "grad_norm": 2.625, + "learning_rate": 0.00017304616778318556, + "loss": 4.6629, + "step": 139750 + }, + { + "epoch": 1.203512396694215, + "grad_norm": 2.765625, + "learning_rate": 0.0001730276624959121, + "loss": 5.0117, + "step": 139800 + }, + { + "epoch": 1.2039428374655647, + "grad_norm": 3.109375, + "learning_rate": 0.00017300915184856533, + "loss": 4.5509, + "step": 139850 + }, + { + "epoch": 1.2043732782369145, + "grad_norm": 1.6796875, + "learning_rate": 0.00017299063584250393, + "loss": 4.5971, + "step": 139900 + }, + { + "epoch": 1.2048037190082646, + "grad_norm": 2.046875, + "learning_rate": 0.00017297211447908685, + "loss": 4.5729, + "step": 139950 + }, + { + "epoch": 1.2052341597796143, + "grad_norm": 3.25, + "learning_rate": 0.0001729535877596736, + "loss": 4.608, + "step": 140000 + }, + { + "epoch": 1.2056646005509641, + "grad_norm": 2.8125, + "learning_rate": 0.00017293505568562398, + "loss": 4.6049, + "step": 140050 + }, + { + "epoch": 1.206095041322314, + "grad_norm": 4.75, + "learning_rate": 0.00017291651825829818, + "loss": 4.6043, + "step": 140100 + }, + { + "epoch": 1.206525482093664, + "grad_norm": 3.5, + "learning_rate": 0.00017289797547905686, + "loss": 4.3925, + "step": 140150 + }, + { + "epoch": 1.2069559228650137, + "grad_norm": 3.078125, + "learning_rate": 0.00017287942734926093, + "loss": 4.3748, + "step": 140200 + }, + { + "epoch": 1.2073863636363638, + "grad_norm": 3.609375, + "learning_rate": 0.00017286087387027185, + "loss": 4.7009, + "step": 140250 + }, + { + "epoch": 1.2078168044077136, + "grad_norm": 1.4609375, + "learning_rate": 0.0001728423150434514, + "loss": 4.4022, + "step": 140300 + }, + { + "epoch": 1.2082472451790633, + "grad_norm": 3.1875, + "learning_rate": 0.0001728237508701618, + "loss": 4.6236, + "step": 140350 + }, + { + "epoch": 1.2086776859504131, + "grad_norm": 2.859375, + "learning_rate": 0.0001728051813517655, + "loss": 4.4696, + "step": 140400 + }, + { + "epoch": 1.2091081267217632, + "grad_norm": 3.921875, + "learning_rate": 0.00017278660648962558, + "loss": 4.4388, + "step": 140450 + }, + { + "epoch": 1.209538567493113, + "grad_norm": 2.75, + "learning_rate": 0.00017276802628510531, + "loss": 4.1941, + "step": 140500 + }, + { + "epoch": 1.2099690082644627, + "grad_norm": 2.515625, + "learning_rate": 0.0001727494407395685, + "loss": 4.5451, + "step": 140550 + }, + { + "epoch": 1.2103994490358128, + "grad_norm": 3.40625, + "learning_rate": 0.00017273084985437923, + "loss": 4.5245, + "step": 140600 + }, + { + "epoch": 1.2108298898071626, + "grad_norm": 4.40625, + "learning_rate": 0.0001727122536309021, + "loss": 4.3755, + "step": 140650 + }, + { + "epoch": 1.2112603305785123, + "grad_norm": 1.1328125, + "learning_rate": 0.000172693652070502, + "loss": 4.4781, + "step": 140700 + }, + { + "epoch": 1.2116907713498621, + "grad_norm": 3.328125, + "learning_rate": 0.00017267504517454424, + "loss": 4.1627, + "step": 140750 + }, + { + "epoch": 1.2121212121212122, + "grad_norm": 2.03125, + "learning_rate": 0.0001726564329443945, + "loss": 4.6936, + "step": 140800 + }, + { + "epoch": 1.212551652892562, + "grad_norm": 2.03125, + "learning_rate": 0.00017263781538141894, + "loss": 4.4489, + "step": 140850 + }, + { + "epoch": 1.212982093663912, + "grad_norm": 2.015625, + "learning_rate": 0.000172619192486984, + "loss": 4.6993, + "step": 140900 + }, + { + "epoch": 1.2134125344352618, + "grad_norm": 2.546875, + "learning_rate": 0.00017260056426245658, + "loss": 4.441, + "step": 140950 + }, + { + "epoch": 1.2138429752066116, + "grad_norm": 2.3125, + "learning_rate": 0.00017258193070920395, + "loss": 4.4734, + "step": 141000 + }, + { + "epoch": 1.2138429752066116, + "eval_loss": 5.210572719573975, + "eval_runtime": 22.0704, + "eval_samples_per_second": 28.998, + "eval_steps_per_second": 14.499, + "eval_tts_loss": 7.0870365647599645, + "step": 141000 + }, + { + "epoch": 1.2142734159779613, + "grad_norm": 2.0625, + "learning_rate": 0.00017256329182859377, + "loss": 4.6075, + "step": 141050 + }, + { + "epoch": 1.2147038567493114, + "grad_norm": 2.90625, + "learning_rate": 0.00017254464762199407, + "loss": 4.1875, + "step": 141100 + }, + { + "epoch": 1.2151342975206612, + "grad_norm": 2.6875, + "learning_rate": 0.0001725259980907733, + "loss": 4.7463, + "step": 141150 + }, + { + "epoch": 1.215564738292011, + "grad_norm": 0.81640625, + "learning_rate": 0.00017250734323630038, + "loss": 4.467, + "step": 141200 + }, + { + "epoch": 1.215995179063361, + "grad_norm": 2.125, + "learning_rate": 0.00017248868305994445, + "loss": 4.8383, + "step": 141250 + }, + { + "epoch": 1.2164256198347108, + "grad_norm": 3.078125, + "learning_rate": 0.00017247001756307516, + "loss": 4.849, + "step": 141300 + }, + { + "epoch": 1.2168560606060606, + "grad_norm": 2.390625, + "learning_rate": 0.00017245134674706244, + "loss": 4.5783, + "step": 141350 + }, + { + "epoch": 1.2172865013774103, + "grad_norm": 2.765625, + "learning_rate": 0.0001724326706132768, + "loss": 4.8766, + "step": 141400 + }, + { + "epoch": 1.2177169421487604, + "grad_norm": 2.859375, + "learning_rate": 0.000172413989163089, + "loss": 4.3604, + "step": 141450 + }, + { + "epoch": 1.2181473829201102, + "grad_norm": 2.21875, + "learning_rate": 0.00017239530239787016, + "loss": 4.611, + "step": 141500 + }, + { + "epoch": 1.2185778236914602, + "grad_norm": 2.375, + "learning_rate": 0.0001723766103189919, + "loss": 4.4779, + "step": 141550 + }, + { + "epoch": 1.21900826446281, + "grad_norm": 3.546875, + "learning_rate": 0.00017235791292782615, + "loss": 4.6852, + "step": 141600 + }, + { + "epoch": 1.2194387052341598, + "grad_norm": 2.5, + "learning_rate": 0.0001723392102257453, + "loss": 4.564, + "step": 141650 + }, + { + "epoch": 1.2198691460055096, + "grad_norm": 2.359375, + "learning_rate": 0.00017232050221412207, + "loss": 4.9894, + "step": 141700 + }, + { + "epoch": 1.2202995867768596, + "grad_norm": 3.890625, + "learning_rate": 0.00017230178889432953, + "loss": 5.0421, + "step": 141750 + }, + { + "epoch": 1.2207300275482094, + "grad_norm": 1.546875, + "learning_rate": 0.0001722830702677413, + "loss": 4.5726, + "step": 141800 + }, + { + "epoch": 1.2211604683195592, + "grad_norm": 2.109375, + "learning_rate": 0.0001722643463357312, + "loss": 4.3865, + "step": 141850 + }, + { + "epoch": 1.2215909090909092, + "grad_norm": 2.765625, + "learning_rate": 0.00017224561709967354, + "loss": 4.5016, + "step": 141900 + }, + { + "epoch": 1.222021349862259, + "grad_norm": 2.84375, + "learning_rate": 0.00017222688256094307, + "loss": 4.5605, + "step": 141950 + }, + { + "epoch": 1.2224517906336088, + "grad_norm": 2.109375, + "learning_rate": 0.0001722081427209148, + "loss": 4.6273, + "step": 142000 + }, + { + "epoch": 1.2228822314049586, + "grad_norm": 2.59375, + "learning_rate": 0.00017218939758096422, + "loss": 4.294, + "step": 142050 + }, + { + "epoch": 1.2233126721763086, + "grad_norm": 3.8125, + "learning_rate": 0.00017217064714246716, + "loss": 4.6043, + "step": 142100 + }, + { + "epoch": 1.2237431129476584, + "grad_norm": 2.125, + "learning_rate": 0.00017215189140679988, + "loss": 4.548, + "step": 142150 + }, + { + "epoch": 1.2241735537190084, + "grad_norm": 2.0, + "learning_rate": 0.00017213313037533904, + "loss": 4.503, + "step": 142200 + }, + { + "epoch": 1.2246039944903582, + "grad_norm": 2.6875, + "learning_rate": 0.0001721143640494616, + "loss": 4.1412, + "step": 142250 + }, + { + "epoch": 1.225034435261708, + "grad_norm": 2.578125, + "learning_rate": 0.000172095592430545, + "loss": 4.5492, + "step": 142300 + }, + { + "epoch": 1.2254648760330578, + "grad_norm": 2.703125, + "learning_rate": 0.00017207681551996705, + "loss": 4.9064, + "step": 142350 + }, + { + "epoch": 1.2258953168044078, + "grad_norm": 2.96875, + "learning_rate": 0.0001720580333191059, + "loss": 4.6188, + "step": 142400 + }, + { + "epoch": 1.2263257575757576, + "grad_norm": 3.28125, + "learning_rate": 0.00017203924582934014, + "loss": 4.3496, + "step": 142450 + }, + { + "epoch": 1.2267561983471074, + "grad_norm": 2.984375, + "learning_rate": 0.00017202045305204875, + "loss": 5.1003, + "step": 142500 + }, + { + "epoch": 1.2271866391184574, + "grad_norm": 3.859375, + "learning_rate": 0.00017200165498861106, + "loss": 4.4041, + "step": 142550 + }, + { + "epoch": 1.2276170798898072, + "grad_norm": 3.6875, + "learning_rate": 0.00017198285164040682, + "loss": 4.5644, + "step": 142600 + }, + { + "epoch": 1.228047520661157, + "grad_norm": 2.828125, + "learning_rate": 0.00017196404300881614, + "loss": 4.4952, + "step": 142650 + }, + { + "epoch": 1.228477961432507, + "grad_norm": 3.46875, + "learning_rate": 0.00017194522909521947, + "loss": 4.7108, + "step": 142700 + }, + { + "epoch": 1.2289084022038568, + "grad_norm": 3.84375, + "learning_rate": 0.00017192640990099786, + "loss": 4.4127, + "step": 142750 + }, + { + "epoch": 1.2293388429752066, + "grad_norm": 2.390625, + "learning_rate": 0.0001719075854275325, + "loss": 4.2333, + "step": 142800 + }, + { + "epoch": 1.2297692837465566, + "grad_norm": 2.84375, + "learning_rate": 0.00017188875567620507, + "loss": 4.5475, + "step": 142850 + }, + { + "epoch": 1.2301997245179064, + "grad_norm": 3.15625, + "learning_rate": 0.00017186992064839766, + "loss": 4.4541, + "step": 142900 + }, + { + "epoch": 1.2306301652892562, + "grad_norm": 1.1328125, + "learning_rate": 0.00017185108034549267, + "loss": 4.2658, + "step": 142950 + }, + { + "epoch": 1.231060606060606, + "grad_norm": 0.9765625, + "learning_rate": 0.000171832234768873, + "loss": 4.6635, + "step": 143000 + }, + { + "epoch": 1.231491046831956, + "grad_norm": 1.7265625, + "learning_rate": 0.00017181338391992186, + "loss": 4.5951, + "step": 143050 + }, + { + "epoch": 1.2319214876033058, + "grad_norm": 3.53125, + "learning_rate": 0.00017179452780002286, + "loss": 4.8548, + "step": 143100 + }, + { + "epoch": 1.2323519283746556, + "grad_norm": 2.984375, + "learning_rate": 0.00017177566641055994, + "loss": 5.0237, + "step": 143150 + }, + { + "epoch": 1.2327823691460056, + "grad_norm": 2.5, + "learning_rate": 0.00017175679975291755, + "loss": 4.4822, + "step": 143200 + }, + { + "epoch": 1.2332128099173554, + "grad_norm": 3.734375, + "learning_rate": 0.00017173792782848048, + "loss": 4.8718, + "step": 143250 + }, + { + "epoch": 1.2336432506887052, + "grad_norm": 2.953125, + "learning_rate": 0.00017171905063863386, + "loss": 4.4284, + "step": 143300 + }, + { + "epoch": 1.2340736914600552, + "grad_norm": 3.265625, + "learning_rate": 0.0001717001681847632, + "loss": 4.2758, + "step": 143350 + }, + { + "epoch": 1.234504132231405, + "grad_norm": 3.359375, + "learning_rate": 0.00017168128046825447, + "loss": 4.6864, + "step": 143400 + }, + { + "epoch": 1.2349345730027548, + "grad_norm": 2.453125, + "learning_rate": 0.00017166238749049398, + "loss": 4.1353, + "step": 143450 + }, + { + "epoch": 1.2353650137741048, + "grad_norm": 2.859375, + "learning_rate": 0.00017164348925286844, + "loss": 4.3424, + "step": 143500 + }, + { + "epoch": 1.2357954545454546, + "grad_norm": 2.546875, + "learning_rate": 0.00017162458575676492, + "loss": 4.51, + "step": 143550 + }, + { + "epoch": 1.2362258953168044, + "grad_norm": 2.453125, + "learning_rate": 0.0001716056770035709, + "loss": 4.2697, + "step": 143600 + }, + { + "epoch": 1.2366563360881542, + "grad_norm": 2.78125, + "learning_rate": 0.00017158676299467427, + "loss": 4.4364, + "step": 143650 + }, + { + "epoch": 1.2370867768595042, + "grad_norm": 4.25, + "learning_rate": 0.00017156784373146327, + "loss": 4.6218, + "step": 143700 + }, + { + "epoch": 1.237517217630854, + "grad_norm": 3.140625, + "learning_rate": 0.00017154891921532651, + "loss": 4.691, + "step": 143750 + }, + { + "epoch": 1.2379476584022038, + "grad_norm": 2.765625, + "learning_rate": 0.000171529989447653, + "loss": 4.5416, + "step": 143800 + }, + { + "epoch": 1.2383780991735538, + "grad_norm": 3.109375, + "learning_rate": 0.00017151105442983223, + "loss": 4.651, + "step": 143850 + }, + { + "epoch": 1.2388085399449036, + "grad_norm": 1.4453125, + "learning_rate": 0.00017149211416325386, + "loss": 4.6083, + "step": 143900 + }, + { + "epoch": 1.2392389807162534, + "grad_norm": 2.65625, + "learning_rate": 0.00017147316864930814, + "loss": 4.3779, + "step": 143950 + }, + { + "epoch": 1.2396694214876034, + "grad_norm": 1.1796875, + "learning_rate": 0.00017145421788938565, + "loss": 4.4656, + "step": 144000 + }, + { + "epoch": 1.2396694214876034, + "eval_loss": 5.19620418548584, + "eval_runtime": 21.8281, + "eval_samples_per_second": 29.32, + "eval_steps_per_second": 14.66, + "eval_tts_loss": 7.050007847587131, + "step": 144000 + }, + { + "epoch": 1.2400998622589532, + "grad_norm": 0.984375, + "learning_rate": 0.00017143526188487726, + "loss": 4.7263, + "step": 144050 + }, + { + "epoch": 1.240530303030303, + "grad_norm": 4.0625, + "learning_rate": 0.0001714163006371744, + "loss": 4.5929, + "step": 144100 + }, + { + "epoch": 1.240960743801653, + "grad_norm": 4.34375, + "learning_rate": 0.00017139733414766868, + "loss": 4.6241, + "step": 144150 + }, + { + "epoch": 1.2413911845730028, + "grad_norm": 2.46875, + "learning_rate": 0.0001713783624177523, + "loss": 4.5753, + "step": 144200 + }, + { + "epoch": 1.2418216253443526, + "grad_norm": 2.90625, + "learning_rate": 0.0001713593854488176, + "loss": 4.59, + "step": 144250 + }, + { + "epoch": 1.2422520661157024, + "grad_norm": 2.859375, + "learning_rate": 0.00017134040324225764, + "loss": 4.6879, + "step": 144300 + }, + { + "epoch": 1.2426825068870524, + "grad_norm": 2.46875, + "learning_rate": 0.00017132141579946555, + "loss": 4.5068, + "step": 144350 + }, + { + "epoch": 1.2431129476584022, + "grad_norm": 2.859375, + "learning_rate": 0.00017130242312183498, + "loss": 4.7414, + "step": 144400 + }, + { + "epoch": 1.243543388429752, + "grad_norm": 1.734375, + "learning_rate": 0.00017128342521075996, + "loss": 4.7805, + "step": 144450 + }, + { + "epoch": 1.243973829201102, + "grad_norm": 4.53125, + "learning_rate": 0.00017126442206763488, + "loss": 4.8175, + "step": 144500 + }, + { + "epoch": 1.2444042699724518, + "grad_norm": 1.65625, + "learning_rate": 0.00017124541369385458, + "loss": 4.2131, + "step": 144550 + }, + { + "epoch": 1.2448347107438016, + "grad_norm": 1.8984375, + "learning_rate": 0.00017122640009081417, + "loss": 4.4931, + "step": 144600 + }, + { + "epoch": 1.2452651515151516, + "grad_norm": 2.765625, + "learning_rate": 0.00017120738125990932, + "loss": 4.7294, + "step": 144650 + }, + { + "epoch": 1.2456955922865014, + "grad_norm": 3.5625, + "learning_rate": 0.00017118835720253582, + "loss": 4.7757, + "step": 144700 + }, + { + "epoch": 1.2461260330578512, + "grad_norm": 1.796875, + "learning_rate": 0.00017116932792009008, + "loss": 4.3835, + "step": 144750 + }, + { + "epoch": 1.2465564738292012, + "grad_norm": 4.5, + "learning_rate": 0.0001711502934139688, + "loss": 4.8918, + "step": 144800 + }, + { + "epoch": 1.246986914600551, + "grad_norm": 3.46875, + "learning_rate": 0.0001711312536855691, + "loss": 4.6512, + "step": 144850 + }, + { + "epoch": 1.2474173553719008, + "grad_norm": 1.6796875, + "learning_rate": 0.00017111220873628838, + "loss": 4.7372, + "step": 144900 + }, + { + "epoch": 1.2478477961432506, + "grad_norm": 3.75, + "learning_rate": 0.00017109315856752455, + "loss": 4.6873, + "step": 144950 + }, + { + "epoch": 1.2482782369146006, + "grad_norm": 4.65625, + "learning_rate": 0.00017107410318067582, + "loss": 4.3637, + "step": 145000 + }, + { + "epoch": 1.2487086776859504, + "grad_norm": 3.265625, + "learning_rate": 0.00017105504257714088, + "loss": 4.6875, + "step": 145050 + }, + { + "epoch": 1.2491391184573002, + "grad_norm": 3.578125, + "learning_rate": 0.0001710359767583187, + "loss": 4.4667, + "step": 145100 + }, + { + "epoch": 1.2495695592286502, + "grad_norm": 2.40625, + "learning_rate": 0.00017101690572560863, + "loss": 4.5576, + "step": 145150 + }, + { + "epoch": 1.25, + "grad_norm": 4.0, + "learning_rate": 0.00017099782948041048, + "loss": 4.7459, + "step": 145200 + }, + { + "epoch": 1.2504304407713498, + "grad_norm": 2.71875, + "learning_rate": 0.0001709787480241244, + "loss": 4.8452, + "step": 145250 + }, + { + "epoch": 1.2508608815426996, + "grad_norm": 2.046875, + "learning_rate": 0.00017095966135815093, + "loss": 5.022, + "step": 145300 + }, + { + "epoch": 1.2512913223140496, + "grad_norm": 2.34375, + "learning_rate": 0.000170940569483891, + "loss": 4.8232, + "step": 145350 + }, + { + "epoch": 1.2517217630853994, + "grad_norm": 3.390625, + "learning_rate": 0.0001709214724027459, + "loss": 4.9928, + "step": 145400 + }, + { + "epoch": 1.2521522038567494, + "grad_norm": 0.828125, + "learning_rate": 0.0001709023701161173, + "loss": 4.7853, + "step": 145450 + }, + { + "epoch": 1.2525826446280992, + "grad_norm": 1.859375, + "learning_rate": 0.00017088326262540733, + "loss": 4.1903, + "step": 145500 + }, + { + "epoch": 1.253013085399449, + "grad_norm": 2.0, + "learning_rate": 0.00017086414993201836, + "loss": 4.553, + "step": 145550 + }, + { + "epoch": 1.2534435261707988, + "grad_norm": 2.359375, + "learning_rate": 0.00017084503203735322, + "loss": 4.5055, + "step": 145600 + }, + { + "epoch": 1.2538739669421488, + "grad_norm": 2.3125, + "learning_rate": 0.0001708259089428152, + "loss": 4.3498, + "step": 145650 + }, + { + "epoch": 1.2543044077134986, + "grad_norm": 4.625, + "learning_rate": 0.00017080678064980784, + "loss": 4.765, + "step": 145700 + }, + { + "epoch": 1.2547348484848486, + "grad_norm": 4.40625, + "learning_rate": 0.00017078764715973513, + "loss": 4.595, + "step": 145750 + }, + { + "epoch": 1.2551652892561984, + "grad_norm": 3.515625, + "learning_rate": 0.00017076850847400144, + "loss": 4.4891, + "step": 145800 + }, + { + "epoch": 1.2555957300275482, + "grad_norm": 1.46875, + "learning_rate": 0.00017074936459401146, + "loss": 4.5071, + "step": 145850 + }, + { + "epoch": 1.256026170798898, + "grad_norm": 1.0546875, + "learning_rate": 0.00017073021552117034, + "loss": 4.5827, + "step": 145900 + }, + { + "epoch": 1.256456611570248, + "grad_norm": 2.4375, + "learning_rate": 0.00017071106125688358, + "loss": 4.9186, + "step": 145950 + }, + { + "epoch": 1.2568870523415978, + "grad_norm": 2.5, + "learning_rate": 0.0001706919018025571, + "loss": 4.7654, + "step": 146000 + }, + { + "epoch": 1.2573174931129476, + "grad_norm": 2.171875, + "learning_rate": 0.00017067273715959709, + "loss": 4.5248, + "step": 146050 + }, + { + "epoch": 1.2577479338842976, + "grad_norm": 1.515625, + "learning_rate": 0.00017065356732941022, + "loss": 4.8473, + "step": 146100 + }, + { + "epoch": 1.2581783746556474, + "grad_norm": 0.69140625, + "learning_rate": 0.00017063439231340354, + "loss": 3.9998, + "step": 146150 + }, + { + "epoch": 1.2586088154269972, + "grad_norm": 1.953125, + "learning_rate": 0.00017061521211298442, + "loss": 4.1946, + "step": 146200 + }, + { + "epoch": 1.259039256198347, + "grad_norm": 3.265625, + "learning_rate": 0.0001705960267295607, + "loss": 4.3768, + "step": 146250 + }, + { + "epoch": 1.259469696969697, + "grad_norm": 1.9921875, + "learning_rate": 0.00017057683616454047, + "loss": 4.7711, + "step": 146300 + }, + { + "epoch": 1.2599001377410468, + "grad_norm": 3.3125, + "learning_rate": 0.00017055764041933237, + "loss": 4.4223, + "step": 146350 + }, + { + "epoch": 1.2603305785123968, + "grad_norm": 2.609375, + "learning_rate": 0.00017053843949534523, + "loss": 4.1182, + "step": 146400 + }, + { + "epoch": 1.2607610192837466, + "grad_norm": 1.546875, + "learning_rate": 0.00017051923339398838, + "loss": 4.5495, + "step": 146450 + }, + { + "epoch": 1.2611914600550964, + "grad_norm": 3.78125, + "learning_rate": 0.00017050002211667158, + "loss": 4.3128, + "step": 146500 + }, + { + "epoch": 1.2616219008264462, + "grad_norm": 2.734375, + "learning_rate": 0.0001704808056648048, + "loss": 4.417, + "step": 146550 + }, + { + "epoch": 1.2620523415977962, + "grad_norm": 2.21875, + "learning_rate": 0.00017046158403979855, + "loss": 4.7308, + "step": 146600 + }, + { + "epoch": 1.262482782369146, + "grad_norm": 1.8828125, + "learning_rate": 0.00017044235724306361, + "loss": 4.5017, + "step": 146650 + }, + { + "epoch": 1.2629132231404958, + "grad_norm": 2.125, + "learning_rate": 0.00017042312527601122, + "loss": 4.491, + "step": 146700 + }, + { + "epoch": 1.2633436639118458, + "grad_norm": 1.2578125, + "learning_rate": 0.00017040388814005294, + "loss": 4.6107, + "step": 146750 + }, + { + "epoch": 1.2637741046831956, + "grad_norm": 1.3359375, + "learning_rate": 0.00017038464583660077, + "loss": 4.4281, + "step": 146800 + }, + { + "epoch": 1.2642045454545454, + "grad_norm": 3.625, + "learning_rate": 0.00017036539836706705, + "loss": 4.4565, + "step": 146850 + }, + { + "epoch": 1.2646349862258952, + "grad_norm": 3.640625, + "learning_rate": 0.00017034614573286443, + "loss": 4.3151, + "step": 146900 + }, + { + "epoch": 1.2650654269972452, + "grad_norm": 2.859375, + "learning_rate": 0.0001703268879354061, + "loss": 4.4134, + "step": 146950 + }, + { + "epoch": 1.265495867768595, + "grad_norm": 1.8125, + "learning_rate": 0.00017030762497610553, + "loss": 4.6642, + "step": 147000 + }, + { + "epoch": 1.265495867768595, + "eval_loss": 5.188155651092529, + "eval_runtime": 21.8393, + "eval_samples_per_second": 29.305, + "eval_steps_per_second": 14.653, + "eval_tts_loss": 7.0855276917543994, + "step": 147000 + }, + { + "epoch": 1.265926308539945, + "grad_norm": 3.015625, + "learning_rate": 0.00017028835685637653, + "loss": 4.7554, + "step": 147050 + }, + { + "epoch": 1.2663567493112948, + "grad_norm": 1.265625, + "learning_rate": 0.00017026908357763336, + "loss": 4.7875, + "step": 147100 + }, + { + "epoch": 1.2667871900826446, + "grad_norm": 1.984375, + "learning_rate": 0.00017024980514129064, + "loss": 4.5771, + "step": 147150 + }, + { + "epoch": 1.2672176308539944, + "grad_norm": 1.84375, + "learning_rate": 0.0001702305215487634, + "loss": 4.3038, + "step": 147200 + }, + { + "epoch": 1.2676480716253444, + "grad_norm": 4.96875, + "learning_rate": 0.00017021123280146696, + "loss": 4.801, + "step": 147250 + }, + { + "epoch": 1.2680785123966942, + "grad_norm": 3.65625, + "learning_rate": 0.00017019193890081708, + "loss": 4.3845, + "step": 147300 + }, + { + "epoch": 1.268508953168044, + "grad_norm": 2.71875, + "learning_rate": 0.00017017263984822994, + "loss": 4.2734, + "step": 147350 + }, + { + "epoch": 1.268939393939394, + "grad_norm": 1.5625, + "learning_rate": 0.000170153335645122, + "loss": 4.4577, + "step": 147400 + }, + { + "epoch": 1.2693698347107438, + "grad_norm": 2.125, + "learning_rate": 0.00017013402629291019, + "loss": 3.9716, + "step": 147450 + }, + { + "epoch": 1.2698002754820936, + "grad_norm": 3.78125, + "learning_rate": 0.0001701147117930117, + "loss": 4.5053, + "step": 147500 + }, + { + "epoch": 1.2702307162534434, + "grad_norm": 1.5703125, + "learning_rate": 0.00017009539214684423, + "loss": 4.2726, + "step": 147550 + }, + { + "epoch": 1.2706611570247934, + "grad_norm": 4.125, + "learning_rate": 0.0001700760673558258, + "loss": 4.6494, + "step": 147600 + }, + { + "epoch": 1.2710915977961432, + "grad_norm": 5.65625, + "learning_rate": 0.0001700567374213748, + "loss": 4.7959, + "step": 147650 + }, + { + "epoch": 1.2715220385674932, + "grad_norm": 2.234375, + "learning_rate": 0.00017003740234491002, + "loss": 4.5812, + "step": 147700 + }, + { + "epoch": 1.271952479338843, + "grad_norm": 3.625, + "learning_rate": 0.00017001806212785058, + "loss": 4.5615, + "step": 147750 + }, + { + "epoch": 1.2723829201101928, + "grad_norm": 2.140625, + "learning_rate": 0.00016999871677161603, + "loss": 4.6125, + "step": 147800 + }, + { + "epoch": 1.2728133608815426, + "grad_norm": 3.5625, + "learning_rate": 0.00016997936627762626, + "loss": 4.3393, + "step": 147850 + }, + { + "epoch": 1.2732438016528926, + "grad_norm": 3.296875, + "learning_rate": 0.0001699600106473016, + "loss": 4.7559, + "step": 147900 + }, + { + "epoch": 1.2736742424242424, + "grad_norm": 1.0625, + "learning_rate": 0.00016994064988206268, + "loss": 4.6604, + "step": 147950 + }, + { + "epoch": 1.2741046831955922, + "grad_norm": 1.703125, + "learning_rate": 0.0001699212839833305, + "loss": 4.7125, + "step": 148000 + }, + { + "epoch": 1.2745351239669422, + "grad_norm": 3.25, + "learning_rate": 0.00016990191295252656, + "loss": 4.5814, + "step": 148050 + }, + { + "epoch": 1.274965564738292, + "grad_norm": 2.34375, + "learning_rate": 0.00016988253679107258, + "loss": 4.4965, + "step": 148100 + }, + { + "epoch": 1.2753960055096418, + "grad_norm": 3.046875, + "learning_rate": 0.00016986315550039074, + "loss": 4.9009, + "step": 148150 + }, + { + "epoch": 1.2758264462809916, + "grad_norm": 2.734375, + "learning_rate": 0.00016984376908190364, + "loss": 4.6258, + "step": 148200 + }, + { + "epoch": 1.2762568870523416, + "grad_norm": 3.671875, + "learning_rate": 0.00016982437753703414, + "loss": 4.5483, + "step": 148250 + }, + { + "epoch": 1.2766873278236914, + "grad_norm": 3.875, + "learning_rate": 0.0001698049808672056, + "loss": 4.4415, + "step": 148300 + }, + { + "epoch": 1.2771177685950414, + "grad_norm": 2.421875, + "learning_rate": 0.00016978557907384157, + "loss": 4.4254, + "step": 148350 + }, + { + "epoch": 1.2775482093663912, + "grad_norm": 3.203125, + "learning_rate": 0.00016976617215836627, + "loss": 4.495, + "step": 148400 + }, + { + "epoch": 1.277978650137741, + "grad_norm": 3.234375, + "learning_rate": 0.00016974676012220397, + "loss": 4.2674, + "step": 148450 + }, + { + "epoch": 1.2784090909090908, + "grad_norm": 2.703125, + "learning_rate": 0.00016972734296677962, + "loss": 4.5779, + "step": 148500 + }, + { + "epoch": 1.2788395316804408, + "grad_norm": 1.8984375, + "learning_rate": 0.00016970792069351823, + "loss": 4.622, + "step": 148550 + }, + { + "epoch": 1.2792699724517906, + "grad_norm": 3.953125, + "learning_rate": 0.0001696884933038455, + "loss": 4.37, + "step": 148600 + }, + { + "epoch": 1.2797004132231404, + "grad_norm": 2.75, + "learning_rate": 0.00016966906079918733, + "loss": 3.8526, + "step": 148650 + }, + { + "epoch": 1.2801308539944904, + "grad_norm": 2.203125, + "learning_rate": 0.0001696496231809699, + "loss": 4.3262, + "step": 148700 + }, + { + "epoch": 1.2805612947658402, + "grad_norm": 2.09375, + "learning_rate": 0.00016963018045062004, + "loss": 4.5249, + "step": 148750 + }, + { + "epoch": 1.28099173553719, + "grad_norm": 3.4375, + "learning_rate": 0.00016961073260956475, + "loss": 4.7711, + "step": 148800 + }, + { + "epoch": 1.2814221763085398, + "grad_norm": 3.3125, + "learning_rate": 0.00016959127965923142, + "loss": 4.559, + "step": 148850 + }, + { + "epoch": 1.2818526170798898, + "grad_norm": 1.6796875, + "learning_rate": 0.00016957182160104794, + "loss": 4.3972, + "step": 148900 + }, + { + "epoch": 1.2822830578512396, + "grad_norm": 2.78125, + "learning_rate": 0.00016955235843644242, + "loss": 4.5368, + "step": 148950 + }, + { + "epoch": 1.2827134986225897, + "grad_norm": 4.25, + "learning_rate": 0.00016953289016684344, + "loss": 4.6124, + "step": 149000 + }, + { + "epoch": 1.2831439393939394, + "grad_norm": 4.375, + "learning_rate": 0.00016951341679367993, + "loss": 4.8719, + "step": 149050 + }, + { + "epoch": 1.2835743801652892, + "grad_norm": 1.6328125, + "learning_rate": 0.0001694939383183812, + "loss": 4.663, + "step": 149100 + }, + { + "epoch": 1.284004820936639, + "grad_norm": 2.203125, + "learning_rate": 0.0001694744547423769, + "loss": 4.923, + "step": 149150 + }, + { + "epoch": 1.284435261707989, + "grad_norm": 6.5, + "learning_rate": 0.00016945496606709707, + "loss": 4.7873, + "step": 149200 + }, + { + "epoch": 1.2848657024793388, + "grad_norm": 3.234375, + "learning_rate": 0.00016943547229397222, + "loss": 4.4368, + "step": 149250 + }, + { + "epoch": 1.2852961432506886, + "grad_norm": 2.484375, + "learning_rate": 0.00016941597342443306, + "loss": 4.3052, + "step": 149300 + }, + { + "epoch": 1.2857265840220387, + "grad_norm": 5.53125, + "learning_rate": 0.00016939646945991082, + "loss": 4.6331, + "step": 149350 + }, + { + "epoch": 1.2861570247933884, + "grad_norm": 3.453125, + "learning_rate": 0.00016937696040183705, + "loss": 4.2599, + "step": 149400 + }, + { + "epoch": 1.2865874655647382, + "grad_norm": 3.078125, + "learning_rate": 0.00016935744625164363, + "loss": 4.6169, + "step": 149450 + }, + { + "epoch": 1.287017906336088, + "grad_norm": 2.0625, + "learning_rate": 0.0001693379270107629, + "loss": 4.684, + "step": 149500 + }, + { + "epoch": 1.287448347107438, + "grad_norm": 2.53125, + "learning_rate": 0.0001693184026806275, + "loss": 4.5671, + "step": 149550 + }, + { + "epoch": 1.2878787878787878, + "grad_norm": 3.40625, + "learning_rate": 0.0001692988732626705, + "loss": 4.5103, + "step": 149600 + }, + { + "epoch": 1.2883092286501379, + "grad_norm": 3.671875, + "learning_rate": 0.00016927933875832525, + "loss": 4.4864, + "step": 149650 + }, + { + "epoch": 1.2887396694214877, + "grad_norm": 1.5546875, + "learning_rate": 0.00016925979916902563, + "loss": 4.4726, + "step": 149700 + }, + { + "epoch": 1.2891701101928374, + "grad_norm": 4.46875, + "learning_rate": 0.00016924025449620577, + "loss": 4.6283, + "step": 149750 + }, + { + "epoch": 1.2896005509641872, + "grad_norm": 2.53125, + "learning_rate": 0.0001692207047413002, + "loss": 4.6297, + "step": 149800 + }, + { + "epoch": 1.2900309917355373, + "grad_norm": 2.03125, + "learning_rate": 0.00016920114990574386, + "loss": 4.6217, + "step": 149850 + }, + { + "epoch": 1.290461432506887, + "grad_norm": 2.65625, + "learning_rate": 0.00016918158999097198, + "loss": 4.4836, + "step": 149900 + }, + { + "epoch": 1.2908918732782368, + "grad_norm": 4.3125, + "learning_rate": 0.00016916202499842026, + "loss": 4.7021, + "step": 149950 + }, + { + "epoch": 1.2913223140495869, + "grad_norm": 2.765625, + "learning_rate": 0.00016914245492952466, + "loss": 4.7028, + "step": 150000 + }, + { + "epoch": 1.2913223140495869, + "eval_loss": 5.180142402648926, + "eval_runtime": 21.7269, + "eval_samples_per_second": 29.457, + "eval_steps_per_second": 14.728, + "eval_tts_loss": 7.046059679570362, + "step": 150000 + }, + { + "epoch": 1.2917527548209367, + "grad_norm": 3.546875, + "learning_rate": 0.0001691228797857217, + "loss": 4.5666, + "step": 150050 + }, + { + "epoch": 1.2921831955922864, + "grad_norm": 3.765625, + "learning_rate": 0.00016910329956844805, + "loss": 4.4699, + "step": 150100 + }, + { + "epoch": 1.2926136363636362, + "grad_norm": 2.765625, + "learning_rate": 0.0001690837142791409, + "loss": 4.5514, + "step": 150150 + }, + { + "epoch": 1.2930440771349863, + "grad_norm": 1.984375, + "learning_rate": 0.00016906412391923777, + "loss": 4.2519, + "step": 150200 + }, + { + "epoch": 1.293474517906336, + "grad_norm": 2.6875, + "learning_rate": 0.00016904452849017648, + "loss": 4.67, + "step": 150250 + }, + { + "epoch": 1.293904958677686, + "grad_norm": 3.0, + "learning_rate": 0.0001690249279933954, + "loss": 4.4038, + "step": 150300 + }, + { + "epoch": 1.2943353994490359, + "grad_norm": 2.8125, + "learning_rate": 0.00016900532243033312, + "loss": 4.9448, + "step": 150350 + }, + { + "epoch": 1.2947658402203857, + "grad_norm": 3.46875, + "learning_rate": 0.00016898571180242863, + "loss": 4.4284, + "step": 150400 + }, + { + "epoch": 1.2951962809917354, + "grad_norm": 2.84375, + "learning_rate": 0.00016896609611112136, + "loss": 4.2791, + "step": 150450 + }, + { + "epoch": 1.2956267217630855, + "grad_norm": 3.71875, + "learning_rate": 0.000168946475357851, + "loss": 4.3141, + "step": 150500 + }, + { + "epoch": 1.2960571625344353, + "grad_norm": 3.609375, + "learning_rate": 0.00016892684954405765, + "loss": 4.5364, + "step": 150550 + }, + { + "epoch": 1.296487603305785, + "grad_norm": 2.015625, + "learning_rate": 0.0001689072186711819, + "loss": 4.5861, + "step": 150600 + }, + { + "epoch": 1.296918044077135, + "grad_norm": 2.828125, + "learning_rate": 0.0001688875827406645, + "loss": 4.4741, + "step": 150650 + }, + { + "epoch": 1.2973484848484849, + "grad_norm": 5.0625, + "learning_rate": 0.00016886794175394675, + "loss": 4.5311, + "step": 150700 + }, + { + "epoch": 1.2977789256198347, + "grad_norm": 3.421875, + "learning_rate": 0.0001688482957124703, + "loss": 4.314, + "step": 150750 + }, + { + "epoch": 1.2982093663911844, + "grad_norm": 2.046875, + "learning_rate": 0.00016882864461767704, + "loss": 4.67, + "step": 150800 + }, + { + "epoch": 1.2986398071625345, + "grad_norm": 3.171875, + "learning_rate": 0.00016880898847100934, + "loss": 4.6059, + "step": 150850 + }, + { + "epoch": 1.2990702479338843, + "grad_norm": 2.765625, + "learning_rate": 0.00016878932727390996, + "loss": 4.5343, + "step": 150900 + }, + { + "epoch": 1.2995006887052343, + "grad_norm": 2.09375, + "learning_rate": 0.00016876966102782195, + "loss": 4.6155, + "step": 150950 + }, + { + "epoch": 1.299931129476584, + "grad_norm": 3.0625, + "learning_rate": 0.00016874998973418878, + "loss": 4.4232, + "step": 151000 + }, + { + "epoch": 1.3003615702479339, + "grad_norm": 2.875, + "learning_rate": 0.00016873031339445431, + "loss": 4.6204, + "step": 151050 + }, + { + "epoch": 1.3007920110192837, + "grad_norm": 2.765625, + "learning_rate": 0.0001687106320100627, + "loss": 4.7969, + "step": 151100 + }, + { + "epoch": 1.3012224517906337, + "grad_norm": 3.25, + "learning_rate": 0.00016869094558245854, + "loss": 4.5595, + "step": 151150 + }, + { + "epoch": 1.3016528925619835, + "grad_norm": 2.078125, + "learning_rate": 0.00016867125411308673, + "loss": 4.7172, + "step": 151200 + }, + { + "epoch": 1.3020833333333333, + "grad_norm": 1.78125, + "learning_rate": 0.00016865155760339265, + "loss": 4.6102, + "step": 151250 + }, + { + "epoch": 1.3025137741046833, + "grad_norm": 2.859375, + "learning_rate": 0.00016863185605482195, + "loss": 4.7415, + "step": 151300 + }, + { + "epoch": 1.302944214876033, + "grad_norm": 1.625, + "learning_rate": 0.00016861214946882065, + "loss": 4.3747, + "step": 151350 + }, + { + "epoch": 1.3033746556473829, + "grad_norm": 0.85546875, + "learning_rate": 0.00016859243784683522, + "loss": 4.0872, + "step": 151400 + }, + { + "epoch": 1.3038050964187327, + "grad_norm": 1.2265625, + "learning_rate": 0.00016857272119031244, + "loss": 4.4674, + "step": 151450 + }, + { + "epoch": 1.3042355371900827, + "grad_norm": 2.625, + "learning_rate": 0.00016855299950069947, + "loss": 4.5789, + "step": 151500 + }, + { + "epoch": 1.3046659779614325, + "grad_norm": 3.8125, + "learning_rate": 0.00016853327277944382, + "loss": 4.633, + "step": 151550 + }, + { + "epoch": 1.3050964187327825, + "grad_norm": 1.4140625, + "learning_rate": 0.0001685135410279934, + "loss": 4.5294, + "step": 151600 + }, + { + "epoch": 1.3055268595041323, + "grad_norm": 1.578125, + "learning_rate": 0.00016849380424779648, + "loss": 4.2456, + "step": 151650 + }, + { + "epoch": 1.305957300275482, + "grad_norm": 3.0625, + "learning_rate": 0.0001684740624403017, + "loss": 4.777, + "step": 151700 + }, + { + "epoch": 1.3063877410468319, + "grad_norm": 2.5625, + "learning_rate": 0.00016845431560695806, + "loss": 4.3336, + "step": 151750 + }, + { + "epoch": 1.3068181818181819, + "grad_norm": 1.96875, + "learning_rate": 0.00016843456374921495, + "loss": 4.4481, + "step": 151800 + }, + { + "epoch": 1.3072486225895317, + "grad_norm": 1.65625, + "learning_rate": 0.00016841480686852206, + "loss": 4.4452, + "step": 151850 + }, + { + "epoch": 1.3076790633608815, + "grad_norm": 3.453125, + "learning_rate": 0.00016839504496632957, + "loss": 4.8353, + "step": 151900 + }, + { + "epoch": 1.3081095041322315, + "grad_norm": 2.875, + "learning_rate": 0.00016837527804408796, + "loss": 4.546, + "step": 151950 + }, + { + "epoch": 1.3085399449035813, + "grad_norm": 4.0625, + "learning_rate": 0.00016835550610324807, + "loss": 4.1746, + "step": 152000 + }, + { + "epoch": 1.308970385674931, + "grad_norm": 4.125, + "learning_rate": 0.00016833572914526102, + "loss": 4.6171, + "step": 152050 + }, + { + "epoch": 1.3094008264462809, + "grad_norm": 2.0625, + "learning_rate": 0.00016831594717157855, + "loss": 4.8406, + "step": 152100 + }, + { + "epoch": 1.3098312672176309, + "grad_norm": 3.015625, + "learning_rate": 0.00016829616018365252, + "loss": 4.6286, + "step": 152150 + }, + { + "epoch": 1.3102617079889807, + "grad_norm": 2.8125, + "learning_rate": 0.00016827636818293526, + "loss": 4.79, + "step": 152200 + }, + { + "epoch": 1.3106921487603307, + "grad_norm": 3.265625, + "learning_rate": 0.00016825657117087948, + "loss": 4.7157, + "step": 152250 + }, + { + "epoch": 1.3111225895316805, + "grad_norm": 1.0859375, + "learning_rate": 0.00016823676914893825, + "loss": 4.2032, + "step": 152300 + }, + { + "epoch": 1.3115530303030303, + "grad_norm": 3.40625, + "learning_rate": 0.00016821696211856497, + "loss": 4.2972, + "step": 152350 + }, + { + "epoch": 1.31198347107438, + "grad_norm": 1.8046875, + "learning_rate": 0.00016819715008121346, + "loss": 4.2005, + "step": 152400 + }, + { + "epoch": 1.31241391184573, + "grad_norm": 3.21875, + "learning_rate": 0.00016817733303833786, + "loss": 4.6816, + "step": 152450 + }, + { + "epoch": 1.3128443526170799, + "grad_norm": 3.078125, + "learning_rate": 0.00016815751099139267, + "loss": 4.6942, + "step": 152500 + }, + { + "epoch": 1.3132747933884297, + "grad_norm": 2.34375, + "learning_rate": 0.00016813768394183284, + "loss": 4.3114, + "step": 152550 + }, + { + "epoch": 1.3137052341597797, + "grad_norm": 2.859375, + "learning_rate": 0.00016811785189111363, + "loss": 4.4654, + "step": 152600 + }, + { + "epoch": 1.3141356749311295, + "grad_norm": 2.453125, + "learning_rate": 0.00016809801484069062, + "loss": 4.6403, + "step": 152650 + }, + { + "epoch": 1.3145661157024793, + "grad_norm": 3.171875, + "learning_rate": 0.00016807817279201984, + "loss": 4.6006, + "step": 152700 + }, + { + "epoch": 1.314996556473829, + "grad_norm": 4.3125, + "learning_rate": 0.0001680583257465577, + "loss": 4.5661, + "step": 152750 + }, + { + "epoch": 1.315426997245179, + "grad_norm": 1.3671875, + "learning_rate": 0.00016803847370576084, + "loss": 4.1854, + "step": 152800 + }, + { + "epoch": 1.3158574380165289, + "grad_norm": 3.0625, + "learning_rate": 0.00016801861667108638, + "loss": 4.5186, + "step": 152850 + }, + { + "epoch": 1.316287878787879, + "grad_norm": 3.796875, + "learning_rate": 0.00016799875464399184, + "loss": 5.1423, + "step": 152900 + }, + { + "epoch": 1.3167183195592287, + "grad_norm": 2.84375, + "learning_rate": 0.000167978887625935, + "loss": 4.4366, + "step": 152950 + }, + { + "epoch": 1.3171487603305785, + "grad_norm": 2.53125, + "learning_rate": 0.00016795901561837405, + "loss": 4.3613, + "step": 153000 + }, + { + "epoch": 1.3171487603305785, + "eval_loss": 5.173136234283447, + "eval_runtime": 21.8333, + "eval_samples_per_second": 29.313, + "eval_steps_per_second": 14.657, + "eval_tts_loss": 7.084487522601173, + "step": 153000 + }, + { + "epoch": 1.3175792011019283, + "grad_norm": 1.953125, + "learning_rate": 0.00016793913862276757, + "loss": 4.6094, + "step": 153050 + }, + { + "epoch": 1.3180096418732783, + "grad_norm": 3.328125, + "learning_rate": 0.0001679192566405745, + "loss": 4.3604, + "step": 153100 + }, + { + "epoch": 1.318440082644628, + "grad_norm": 5.09375, + "learning_rate": 0.00016789936967325414, + "loss": 4.5244, + "step": 153150 + }, + { + "epoch": 1.3188705234159779, + "grad_norm": 4.40625, + "learning_rate": 0.00016787947772226613, + "loss": 4.3624, + "step": 153200 + }, + { + "epoch": 1.319300964187328, + "grad_norm": 2.0625, + "learning_rate": 0.0001678595807890705, + "loss": 4.4661, + "step": 153250 + }, + { + "epoch": 1.3197314049586777, + "grad_norm": 2.953125, + "learning_rate": 0.00016783967887512764, + "loss": 4.1379, + "step": 153300 + }, + { + "epoch": 1.3201618457300275, + "grad_norm": 2.171875, + "learning_rate": 0.00016781977198189832, + "loss": 4.6748, + "step": 153350 + }, + { + "epoch": 1.3205922865013773, + "grad_norm": 2.796875, + "learning_rate": 0.0001677998601108436, + "loss": 4.9102, + "step": 153400 + }, + { + "epoch": 1.3210227272727273, + "grad_norm": 3.234375, + "learning_rate": 0.0001677799432634251, + "loss": 4.4248, + "step": 153450 + }, + { + "epoch": 1.321453168044077, + "grad_norm": 2.4375, + "learning_rate": 0.00016776002144110457, + "loss": 4.1825, + "step": 153500 + }, + { + "epoch": 1.321883608815427, + "grad_norm": 2.25, + "learning_rate": 0.00016774009464534426, + "loss": 4.1231, + "step": 153550 + }, + { + "epoch": 1.322314049586777, + "grad_norm": 2.171875, + "learning_rate": 0.00016772016287760672, + "loss": 4.5703, + "step": 153600 + }, + { + "epoch": 1.3227444903581267, + "grad_norm": 2.40625, + "learning_rate": 0.00016770022613935495, + "loss": 4.2335, + "step": 153650 + }, + { + "epoch": 1.3231749311294765, + "grad_norm": 2.296875, + "learning_rate": 0.00016768028443205226, + "loss": 4.7468, + "step": 153700 + }, + { + "epoch": 1.3236053719008265, + "grad_norm": 1.515625, + "learning_rate": 0.00016766033775716228, + "loss": 4.4976, + "step": 153750 + }, + { + "epoch": 1.3240358126721763, + "grad_norm": 2.78125, + "learning_rate": 0.0001676403861161491, + "loss": 4.8121, + "step": 153800 + }, + { + "epoch": 1.3244662534435263, + "grad_norm": 3.03125, + "learning_rate": 0.00016762042951047712, + "loss": 4.4706, + "step": 153850 + }, + { + "epoch": 1.324896694214876, + "grad_norm": 2.4375, + "learning_rate": 0.00016760046794161109, + "loss": 4.5775, + "step": 153900 + }, + { + "epoch": 1.325327134986226, + "grad_norm": 2.90625, + "learning_rate": 0.00016758050141101616, + "loss": 5.0554, + "step": 153950 + }, + { + "epoch": 1.3257575757575757, + "grad_norm": 2.671875, + "learning_rate": 0.00016756052992015783, + "loss": 4.445, + "step": 154000 + }, + { + "epoch": 1.3261880165289255, + "grad_norm": 2.515625, + "learning_rate": 0.00016754055347050194, + "loss": 4.3357, + "step": 154050 + }, + { + "epoch": 1.3266184573002755, + "grad_norm": 3.921875, + "learning_rate": 0.00016752057206351477, + "loss": 4.5034, + "step": 154100 + }, + { + "epoch": 1.3270488980716253, + "grad_norm": 3.171875, + "learning_rate": 0.0001675005857006629, + "loss": 4.7499, + "step": 154150 + }, + { + "epoch": 1.3274793388429753, + "grad_norm": 2.734375, + "learning_rate": 0.00016748059438341325, + "loss": 4.1199, + "step": 154200 + }, + { + "epoch": 1.327909779614325, + "grad_norm": 2.15625, + "learning_rate": 0.00016746059811323315, + "loss": 4.5631, + "step": 154250 + }, + { + "epoch": 1.328340220385675, + "grad_norm": 4.15625, + "learning_rate": 0.00016744059689159028, + "loss": 4.1595, + "step": 154300 + }, + { + "epoch": 1.3287706611570247, + "grad_norm": 2.109375, + "learning_rate": 0.00016742059071995276, + "loss": 4.6359, + "step": 154350 + }, + { + "epoch": 1.3292011019283747, + "grad_norm": 3.1875, + "learning_rate": 0.0001674005795997889, + "loss": 4.7987, + "step": 154400 + }, + { + "epoch": 1.3296315426997245, + "grad_norm": 5.0, + "learning_rate": 0.0001673805635325675, + "loss": 4.8978, + "step": 154450 + }, + { + "epoch": 1.3300619834710745, + "grad_norm": 4.5, + "learning_rate": 0.00016736054251975774, + "loss": 4.4571, + "step": 154500 + }, + { + "epoch": 1.3304924242424243, + "grad_norm": 5.53125, + "learning_rate": 0.00016734051656282909, + "loss": 4.4233, + "step": 154550 + }, + { + "epoch": 1.330922865013774, + "grad_norm": 2.921875, + "learning_rate": 0.0001673204856632514, + "loss": 4.5482, + "step": 154600 + }, + { + "epoch": 1.331353305785124, + "grad_norm": 3.0, + "learning_rate": 0.0001673004498224949, + "loss": 4.7337, + "step": 154650 + }, + { + "epoch": 1.3317837465564737, + "grad_norm": 2.359375, + "learning_rate": 0.00016728040904203018, + "loss": 4.5273, + "step": 154700 + }, + { + "epoch": 1.3322141873278237, + "grad_norm": 3.265625, + "learning_rate": 0.0001672603633233282, + "loss": 4.7702, + "step": 154750 + }, + { + "epoch": 1.3326446280991735, + "grad_norm": 2.453125, + "learning_rate": 0.00016724031266786028, + "loss": 4.463, + "step": 154800 + }, + { + "epoch": 1.3330750688705235, + "grad_norm": 4.5625, + "learning_rate": 0.00016722025707709807, + "loss": 4.1421, + "step": 154850 + }, + { + "epoch": 1.3335055096418733, + "grad_norm": 3.140625, + "learning_rate": 0.00016720019655251363, + "loss": 4.437, + "step": 154900 + }, + { + "epoch": 1.333935950413223, + "grad_norm": 3.203125, + "learning_rate": 0.00016718013109557933, + "loss": 4.3212, + "step": 154950 + }, + { + "epoch": 1.334366391184573, + "grad_norm": 2.796875, + "learning_rate": 0.00016716006070776796, + "loss": 4.8673, + "step": 155000 + }, + { + "epoch": 1.334796831955923, + "grad_norm": 3.3125, + "learning_rate": 0.0001671399853905526, + "loss": 4.5034, + "step": 155050 + }, + { + "epoch": 1.3352272727272727, + "grad_norm": 2.390625, + "learning_rate": 0.00016711990514540684, + "loss": 4.3151, + "step": 155100 + }, + { + "epoch": 1.3356577134986227, + "grad_norm": 2.84375, + "learning_rate": 0.00016709981997380438, + "loss": 4.4263, + "step": 155150 + }, + { + "epoch": 1.3360881542699725, + "grad_norm": 3.03125, + "learning_rate": 0.00016707972987721953, + "loss": 4.665, + "step": 155200 + }, + { + "epoch": 1.3365185950413223, + "grad_norm": 4.28125, + "learning_rate": 0.00016705963485712686, + "loss": 4.498, + "step": 155250 + }, + { + "epoch": 1.336949035812672, + "grad_norm": 3.140625, + "learning_rate": 0.00016703953491500125, + "loss": 4.5109, + "step": 155300 + }, + { + "epoch": 1.3373794765840221, + "grad_norm": 2.890625, + "learning_rate": 0.00016701943005231802, + "loss": 4.4373, + "step": 155350 + }, + { + "epoch": 1.337809917355372, + "grad_norm": 1.671875, + "learning_rate": 0.0001669993202705528, + "loss": 4.6884, + "step": 155400 + }, + { + "epoch": 1.3382403581267217, + "grad_norm": 3.5, + "learning_rate": 0.00016697920557118165, + "loss": 4.3216, + "step": 155450 + }, + { + "epoch": 1.3386707988980717, + "grad_norm": 2.953125, + "learning_rate": 0.00016695908595568094, + "loss": 4.6211, + "step": 155500 + }, + { + "epoch": 1.3391012396694215, + "grad_norm": 1.953125, + "learning_rate": 0.00016693896142552735, + "loss": 4.4004, + "step": 155550 + }, + { + "epoch": 1.3395316804407713, + "grad_norm": 1.625, + "learning_rate": 0.000166918831982198, + "loss": 4.7398, + "step": 155600 + }, + { + "epoch": 1.339962121212121, + "grad_norm": 2.9375, + "learning_rate": 0.00016689869762717037, + "loss": 4.7281, + "step": 155650 + }, + { + "epoch": 1.3403925619834711, + "grad_norm": 4.0625, + "learning_rate": 0.0001668785583619223, + "loss": 4.6139, + "step": 155700 + }, + { + "epoch": 1.340823002754821, + "grad_norm": 3.0, + "learning_rate": 0.00016685841418793195, + "loss": 4.6512, + "step": 155750 + }, + { + "epoch": 1.341253443526171, + "grad_norm": 1.375, + "learning_rate": 0.0001668382651066778, + "loss": 4.4863, + "step": 155800 + }, + { + "epoch": 1.3416838842975207, + "grad_norm": 1.4375, + "learning_rate": 0.00016681811111963883, + "loss": 4.6242, + "step": 155850 + }, + { + "epoch": 1.3421143250688705, + "grad_norm": 2.4375, + "learning_rate": 0.00016679795222829426, + "loss": 4.7162, + "step": 155900 + }, + { + "epoch": 1.3425447658402203, + "grad_norm": 2.546875, + "learning_rate": 0.00016677778843412374, + "loss": 4.5976, + "step": 155950 + }, + { + "epoch": 1.3429752066115703, + "grad_norm": 2.125, + "learning_rate": 0.00016675761973860718, + "loss": 4.4855, + "step": 156000 + }, + { + "epoch": 1.3429752066115703, + "eval_loss": 5.165329456329346, + "eval_runtime": 21.8131, + "eval_samples_per_second": 29.34, + "eval_steps_per_second": 14.67, + "eval_tts_loss": 7.059406952023572, + "step": 156000 + }, + { + "epoch": 1.3434056473829201, + "grad_norm": 3.03125, + "learning_rate": 0.00016673744614322498, + "loss": 4.548, + "step": 156050 + }, + { + "epoch": 1.34383608815427, + "grad_norm": 1.4609375, + "learning_rate": 0.00016671726764945784, + "loss": 4.543, + "step": 156100 + }, + { + "epoch": 1.34426652892562, + "grad_norm": 2.4375, + "learning_rate": 0.0001666970842587868, + "loss": 4.6498, + "step": 156150 + }, + { + "epoch": 1.3446969696969697, + "grad_norm": 4.4375, + "learning_rate": 0.00016667689597269324, + "loss": 4.3392, + "step": 156200 + }, + { + "epoch": 1.3451274104683195, + "grad_norm": 1.9765625, + "learning_rate": 0.000166656702792659, + "loss": 4.6564, + "step": 156250 + }, + { + "epoch": 1.3455578512396693, + "grad_norm": 2.4375, + "learning_rate": 0.0001666365047201662, + "loss": 4.4692, + "step": 156300 + }, + { + "epoch": 1.3459882920110193, + "grad_norm": 1.9921875, + "learning_rate": 0.0001666163017566973, + "loss": 4.6336, + "step": 156350 + }, + { + "epoch": 1.3464187327823691, + "grad_norm": 3.484375, + "learning_rate": 0.0001665960939037352, + "loss": 4.5028, + "step": 156400 + }, + { + "epoch": 1.3468491735537191, + "grad_norm": 2.328125, + "learning_rate": 0.0001665758811627631, + "loss": 4.7692, + "step": 156450 + }, + { + "epoch": 1.347279614325069, + "grad_norm": 1.546875, + "learning_rate": 0.00016655566353526456, + "loss": 4.4678, + "step": 156500 + }, + { + "epoch": 1.3477100550964187, + "grad_norm": 5.21875, + "learning_rate": 0.0001665354410227235, + "loss": 4.7771, + "step": 156550 + }, + { + "epoch": 1.3481404958677685, + "grad_norm": 0.96875, + "learning_rate": 0.00016651521362662423, + "loss": 4.8281, + "step": 156600 + }, + { + "epoch": 1.3485709366391185, + "grad_norm": 1.125, + "learning_rate": 0.00016649498134845142, + "loss": 4.6899, + "step": 156650 + }, + { + "epoch": 1.3490013774104683, + "grad_norm": 2.703125, + "learning_rate": 0.00016647474418968997, + "loss": 4.3369, + "step": 156700 + }, + { + "epoch": 1.3494318181818181, + "grad_norm": 2.453125, + "learning_rate": 0.0001664545021518254, + "loss": 4.3659, + "step": 156750 + }, + { + "epoch": 1.3498622589531681, + "grad_norm": 3.046875, + "learning_rate": 0.00016643425523634332, + "loss": 4.5338, + "step": 156800 + }, + { + "epoch": 1.350292699724518, + "grad_norm": 1.390625, + "learning_rate": 0.0001664140034447298, + "loss": 4.1846, + "step": 156850 + }, + { + "epoch": 1.3507231404958677, + "grad_norm": 1.671875, + "learning_rate": 0.0001663937467784714, + "loss": 4.5744, + "step": 156900 + }, + { + "epoch": 1.3511535812672175, + "grad_norm": 3.015625, + "learning_rate": 0.00016637348523905482, + "loss": 4.6909, + "step": 156950 + }, + { + "epoch": 1.3515840220385675, + "grad_norm": 2.828125, + "learning_rate": 0.0001663532188279672, + "loss": 4.5887, + "step": 157000 + }, + { + "epoch": 1.3520144628099173, + "grad_norm": 3.28125, + "learning_rate": 0.00016633294754669607, + "loss": 4.2306, + "step": 157050 + }, + { + "epoch": 1.3524449035812673, + "grad_norm": 2.953125, + "learning_rate": 0.00016631267139672934, + "loss": 4.4633, + "step": 157100 + }, + { + "epoch": 1.3528753443526171, + "grad_norm": 2.828125, + "learning_rate": 0.0001662923903795552, + "loss": 4.6043, + "step": 157150 + }, + { + "epoch": 1.353305785123967, + "grad_norm": 2.921875, + "learning_rate": 0.00016627210449666224, + "loss": 4.4723, + "step": 157200 + }, + { + "epoch": 1.3537362258953167, + "grad_norm": 1.4140625, + "learning_rate": 0.00016625181374953937, + "loss": 4.8493, + "step": 157250 + }, + { + "epoch": 1.3541666666666667, + "grad_norm": 3.203125, + "learning_rate": 0.00016623151813967594, + "loss": 4.3919, + "step": 157300 + }, + { + "epoch": 1.3545971074380165, + "grad_norm": 3.265625, + "learning_rate": 0.00016621121766856157, + "loss": 4.9115, + "step": 157350 + }, + { + "epoch": 1.3550275482093663, + "grad_norm": 3.3125, + "learning_rate": 0.0001661909123376863, + "loss": 4.4, + "step": 157400 + }, + { + "epoch": 1.3554579889807163, + "grad_norm": 2.640625, + "learning_rate": 0.00016617060214854046, + "loss": 4.796, + "step": 157450 + }, + { + "epoch": 1.3558884297520661, + "grad_norm": 6.125, + "learning_rate": 0.00016615028710261477, + "loss": 4.6368, + "step": 157500 + }, + { + "epoch": 1.356318870523416, + "grad_norm": 2.5, + "learning_rate": 0.00016612996720140037, + "loss": 4.7879, + "step": 157550 + }, + { + "epoch": 1.3567493112947657, + "grad_norm": 3.5, + "learning_rate": 0.00016610964244638865, + "loss": 4.2282, + "step": 157600 + }, + { + "epoch": 1.3571797520661157, + "grad_norm": 3.765625, + "learning_rate": 0.00016608931283907142, + "loss": 4.4556, + "step": 157650 + }, + { + "epoch": 1.3576101928374655, + "grad_norm": 3.34375, + "learning_rate": 0.0001660689783809408, + "loss": 4.8659, + "step": 157700 + }, + { + "epoch": 1.3580406336088156, + "grad_norm": 3.0, + "learning_rate": 0.00016604863907348935, + "loss": 4.2695, + "step": 157750 + }, + { + "epoch": 1.3584710743801653, + "grad_norm": 3.03125, + "learning_rate": 0.0001660282949182099, + "loss": 4.4837, + "step": 157800 + }, + { + "epoch": 1.3589015151515151, + "grad_norm": 1.734375, + "learning_rate": 0.00016600794591659562, + "loss": 4.2264, + "step": 157850 + }, + { + "epoch": 1.359331955922865, + "grad_norm": 1.8515625, + "learning_rate": 0.00016598759207014018, + "loss": 4.5096, + "step": 157900 + }, + { + "epoch": 1.359762396694215, + "grad_norm": 1.7890625, + "learning_rate": 0.00016596723338033743, + "loss": 4.637, + "step": 157950 + }, + { + "epoch": 1.3601928374655647, + "grad_norm": 2.59375, + "learning_rate": 0.00016594686984868168, + "loss": 4.5702, + "step": 158000 + }, + { + "epoch": 1.3606232782369145, + "grad_norm": 2.34375, + "learning_rate": 0.0001659265014766676, + "loss": 4.687, + "step": 158050 + }, + { + "epoch": 1.3610537190082646, + "grad_norm": 3.203125, + "learning_rate": 0.0001659061282657901, + "loss": 4.3978, + "step": 158100 + }, + { + "epoch": 1.3614841597796143, + "grad_norm": 3.46875, + "learning_rate": 0.00016588575021754464, + "loss": 4.4582, + "step": 158150 + }, + { + "epoch": 1.3619146005509641, + "grad_norm": 4.15625, + "learning_rate": 0.00016586536733342684, + "loss": 4.3739, + "step": 158200 + }, + { + "epoch": 1.362345041322314, + "grad_norm": 3.265625, + "learning_rate": 0.0001658449796149328, + "loss": 4.4914, + "step": 158250 + }, + { + "epoch": 1.362775482093664, + "grad_norm": 1.8046875, + "learning_rate": 0.00016582458706355894, + "loss": 4.7275, + "step": 158300 + }, + { + "epoch": 1.3632059228650137, + "grad_norm": 2.9375, + "learning_rate": 0.000165804189680802, + "loss": 4.3543, + "step": 158350 + }, + { + "epoch": 1.3636363636363638, + "grad_norm": 4.15625, + "learning_rate": 0.0001657837874681591, + "loss": 4.5953, + "step": 158400 + }, + { + "epoch": 1.3640668044077136, + "grad_norm": 1.203125, + "learning_rate": 0.00016576338042712777, + "loss": 4.5376, + "step": 158450 + }, + { + "epoch": 1.3644972451790633, + "grad_norm": 2.90625, + "learning_rate": 0.0001657429685592058, + "loss": 4.7171, + "step": 158500 + }, + { + "epoch": 1.3649276859504131, + "grad_norm": 2.390625, + "learning_rate": 0.0001657225518658914, + "loss": 4.9503, + "step": 158550 + }, + { + "epoch": 1.3653581267217632, + "grad_norm": 1.9296875, + "learning_rate": 0.00016570213034868307, + "loss": 4.6662, + "step": 158600 + }, + { + "epoch": 1.365788567493113, + "grad_norm": 2.171875, + "learning_rate": 0.00016568170400907975, + "loss": 4.7016, + "step": 158650 + }, + { + "epoch": 1.3662190082644627, + "grad_norm": 2.703125, + "learning_rate": 0.0001656612728485807, + "loss": 4.5563, + "step": 158700 + }, + { + "epoch": 1.3666494490358128, + "grad_norm": 3.15625, + "learning_rate": 0.00016564083686868543, + "loss": 4.8444, + "step": 158750 + }, + { + "epoch": 1.3670798898071626, + "grad_norm": 2.640625, + "learning_rate": 0.00016562039607089402, + "loss": 4.3974, + "step": 158800 + }, + { + "epoch": 1.3675103305785123, + "grad_norm": 2.703125, + "learning_rate": 0.00016559995045670672, + "loss": 5.0154, + "step": 158850 + }, + { + "epoch": 1.3679407713498621, + "grad_norm": 2.46875, + "learning_rate": 0.00016557950002762416, + "loss": 4.4174, + "step": 158900 + }, + { + "epoch": 1.3683712121212122, + "grad_norm": 3.609375, + "learning_rate": 0.00016555904478514742, + "loss": 4.7853, + "step": 158950 + }, + { + "epoch": 1.368801652892562, + "grad_norm": 2.28125, + "learning_rate": 0.00016553858473077786, + "loss": 3.9965, + "step": 159000 + }, + { + "epoch": 1.368801652892562, + "eval_loss": 5.159815311431885, + "eval_runtime": 21.7663, + "eval_samples_per_second": 29.403, + "eval_steps_per_second": 14.702, + "eval_tts_loss": 7.143070609614872, + "step": 159000 + }, + { + "epoch": 1.369232093663912, + "grad_norm": 3.71875, + "learning_rate": 0.00016551811986601717, + "loss": 4.6505, + "step": 159050 + }, + { + "epoch": 1.3696625344352618, + "grad_norm": 3.734375, + "learning_rate": 0.00016549765019236747, + "loss": 4.5226, + "step": 159100 + }, + { + "epoch": 1.3700929752066116, + "grad_norm": 2.203125, + "learning_rate": 0.00016547717571133115, + "loss": 4.4056, + "step": 159150 + }, + { + "epoch": 1.3705234159779613, + "grad_norm": 2.84375, + "learning_rate": 0.00016545669642441103, + "loss": 4.6952, + "step": 159200 + }, + { + "epoch": 1.3709538567493114, + "grad_norm": 3.65625, + "learning_rate": 0.0001654362123331102, + "loss": 4.3651, + "step": 159250 + }, + { + "epoch": 1.3713842975206612, + "grad_norm": 3.296875, + "learning_rate": 0.0001654157234389322, + "loss": 4.5984, + "step": 159300 + }, + { + "epoch": 1.371814738292011, + "grad_norm": 1.9921875, + "learning_rate": 0.00016539522974338083, + "loss": 4.7625, + "step": 159350 + }, + { + "epoch": 1.372245179063361, + "grad_norm": 2.34375, + "learning_rate": 0.00016537473124796027, + "loss": 4.357, + "step": 159400 + }, + { + "epoch": 1.3726756198347108, + "grad_norm": 1.4296875, + "learning_rate": 0.00016535422795417514, + "loss": 4.0961, + "step": 159450 + }, + { + "epoch": 1.3731060606060606, + "grad_norm": 2.515625, + "learning_rate": 0.0001653337198635303, + "loss": 4.3652, + "step": 159500 + }, + { + "epoch": 1.3735365013774103, + "grad_norm": 1.4609375, + "learning_rate": 0.00016531320697753095, + "loss": 4.4528, + "step": 159550 + }, + { + "epoch": 1.3739669421487604, + "grad_norm": 2.8125, + "learning_rate": 0.00016529268929768272, + "loss": 4.686, + "step": 159600 + }, + { + "epoch": 1.3743973829201102, + "grad_norm": 1.6484375, + "learning_rate": 0.00016527216682549163, + "loss": 4.4016, + "step": 159650 + }, + { + "epoch": 1.3748278236914602, + "grad_norm": 3.84375, + "learning_rate": 0.0001652516395624639, + "loss": 4.4997, + "step": 159700 + }, + { + "epoch": 1.37525826446281, + "grad_norm": 3.0, + "learning_rate": 0.0001652311075101062, + "loss": 4.4726, + "step": 159750 + }, + { + "epoch": 1.3756887052341598, + "grad_norm": 3.65625, + "learning_rate": 0.0001652105706699256, + "loss": 4.888, + "step": 159800 + }, + { + "epoch": 1.3761191460055096, + "grad_norm": 4.59375, + "learning_rate": 0.0001651900290434294, + "loss": 4.5492, + "step": 159850 + }, + { + "epoch": 1.3765495867768596, + "grad_norm": 2.046875, + "learning_rate": 0.00016516948263212532, + "loss": 4.7991, + "step": 159900 + }, + { + "epoch": 1.3769800275482094, + "grad_norm": 3.84375, + "learning_rate": 0.00016514893143752146, + "loss": 4.5327, + "step": 159950 + }, + { + "epoch": 1.3774104683195592, + "grad_norm": 1.8671875, + "learning_rate": 0.00016512837546112617, + "loss": 4.3377, + "step": 160000 + }, + { + "epoch": 1.3778409090909092, + "grad_norm": 3.796875, + "learning_rate": 0.00016510781470444825, + "loss": 4.4729, + "step": 160050 + }, + { + "epoch": 1.378271349862259, + "grad_norm": 2.09375, + "learning_rate": 0.00016508724916899684, + "loss": 4.6799, + "step": 160100 + }, + { + "epoch": 1.3787017906336088, + "grad_norm": 2.984375, + "learning_rate": 0.00016506667885628136, + "loss": 4.7163, + "step": 160150 + }, + { + "epoch": 1.3791322314049586, + "grad_norm": 3.6875, + "learning_rate": 0.00016504610376781166, + "loss": 4.5325, + "step": 160200 + }, + { + "epoch": 1.3795626721763086, + "grad_norm": 2.25, + "learning_rate": 0.00016502552390509787, + "loss": 4.7286, + "step": 160250 + }, + { + "epoch": 1.3799931129476584, + "grad_norm": 2.5625, + "learning_rate": 0.00016500493926965054, + "loss": 4.6325, + "step": 160300 + }, + { + "epoch": 1.3804235537190084, + "grad_norm": 2.84375, + "learning_rate": 0.00016498434986298056, + "loss": 4.7649, + "step": 160350 + }, + { + "epoch": 1.3808539944903582, + "grad_norm": 1.328125, + "learning_rate": 0.00016496375568659908, + "loss": 4.7286, + "step": 160400 + }, + { + "epoch": 1.381284435261708, + "grad_norm": 3.28125, + "learning_rate": 0.0001649431567420177, + "loss": 4.6121, + "step": 160450 + }, + { + "epoch": 1.3817148760330578, + "grad_norm": 1.5078125, + "learning_rate": 0.00016492255303074838, + "loss": 4.1601, + "step": 160500 + }, + { + "epoch": 1.3821453168044078, + "grad_norm": 2.765625, + "learning_rate": 0.00016490194455430333, + "loss": 4.2581, + "step": 160550 + }, + { + "epoch": 1.3825757575757576, + "grad_norm": 2.578125, + "learning_rate": 0.0001648813313141952, + "loss": 4.8742, + "step": 160600 + }, + { + "epoch": 1.3830061983471074, + "grad_norm": 1.7578125, + "learning_rate": 0.00016486071331193696, + "loss": 4.6559, + "step": 160650 + }, + { + "epoch": 1.3834366391184574, + "grad_norm": 2.640625, + "learning_rate": 0.00016484009054904187, + "loss": 4.6533, + "step": 160700 + }, + { + "epoch": 1.3838670798898072, + "grad_norm": 5.65625, + "learning_rate": 0.00016481946302702367, + "loss": 4.5375, + "step": 160750 + }, + { + "epoch": 1.384297520661157, + "grad_norm": 2.453125, + "learning_rate": 0.00016479883074739635, + "loss": 4.5037, + "step": 160800 + }, + { + "epoch": 1.3847279614325068, + "grad_norm": 3.640625, + "learning_rate": 0.00016477819371167428, + "loss": 4.5175, + "step": 160850 + }, + { + "epoch": 1.3851584022038568, + "grad_norm": 1.546875, + "learning_rate": 0.00016475755192137213, + "loss": 4.6932, + "step": 160900 + }, + { + "epoch": 1.3855888429752066, + "grad_norm": 2.875, + "learning_rate": 0.000164736905378005, + "loss": 4.2476, + "step": 160950 + }, + { + "epoch": 1.3860192837465566, + "grad_norm": 1.796875, + "learning_rate": 0.00016471625408308836, + "loss": 4.5071, + "step": 161000 + }, + { + "epoch": 1.3864497245179064, + "grad_norm": 3.421875, + "learning_rate": 0.00016469559803813786, + "loss": 4.4198, + "step": 161050 + }, + { + "epoch": 1.3868801652892562, + "grad_norm": 3.375, + "learning_rate": 0.00016467493724466964, + "loss": 4.6224, + "step": 161100 + }, + { + "epoch": 1.387310606060606, + "grad_norm": 3.9375, + "learning_rate": 0.00016465427170420024, + "loss": 5.0826, + "step": 161150 + }, + { + "epoch": 1.387741046831956, + "grad_norm": 3.921875, + "learning_rate": 0.00016463360141824636, + "loss": 4.6965, + "step": 161200 + }, + { + "epoch": 1.3881714876033058, + "grad_norm": 2.640625, + "learning_rate": 0.0001646129263883252, + "loss": 5.0396, + "step": 161250 + }, + { + "epoch": 1.3886019283746556, + "grad_norm": 2.203125, + "learning_rate": 0.00016459224661595427, + "loss": 4.8041, + "step": 161300 + }, + { + "epoch": 1.3890323691460056, + "grad_norm": 1.8828125, + "learning_rate": 0.00016457156210265142, + "loss": 4.7975, + "step": 161350 + }, + { + "epoch": 1.3894628099173554, + "grad_norm": 2.578125, + "learning_rate": 0.00016455087284993484, + "loss": 4.6357, + "step": 161400 + }, + { + "epoch": 1.3898932506887052, + "grad_norm": 2.90625, + "learning_rate": 0.00016453017885932308, + "loss": 4.7784, + "step": 161450 + }, + { + "epoch": 1.390323691460055, + "grad_norm": 1.828125, + "learning_rate": 0.00016450948013233503, + "loss": 4.7899, + "step": 161500 + }, + { + "epoch": 1.390754132231405, + "grad_norm": 2.90625, + "learning_rate": 0.0001644887766704899, + "loss": 4.9918, + "step": 161550 + }, + { + "epoch": 1.3911845730027548, + "grad_norm": 1.5078125, + "learning_rate": 0.00016446806847530735, + "loss": 4.4822, + "step": 161600 + }, + { + "epoch": 1.3916150137741048, + "grad_norm": 3.5, + "learning_rate": 0.0001644473555483073, + "loss": 4.664, + "step": 161650 + }, + { + "epoch": 1.3920454545454546, + "grad_norm": 3.65625, + "learning_rate": 0.00016442663789100997, + "loss": 4.8967, + "step": 161700 + }, + { + "epoch": 1.3924758953168044, + "grad_norm": 2.390625, + "learning_rate": 0.00016440591550493606, + "loss": 4.2254, + "step": 161750 + }, + { + "epoch": 1.3929063360881542, + "grad_norm": 2.75, + "learning_rate": 0.0001643851883916065, + "loss": 4.8994, + "step": 161800 + }, + { + "epoch": 1.3933367768595042, + "grad_norm": 2.0625, + "learning_rate": 0.00016436445655254267, + "loss": 4.4175, + "step": 161850 + }, + { + "epoch": 1.393767217630854, + "grad_norm": 3.578125, + "learning_rate": 0.0001643437199892662, + "loss": 4.4351, + "step": 161900 + }, + { + "epoch": 1.3941976584022038, + "grad_norm": 4.8125, + "learning_rate": 0.00016432297870329913, + "loss": 4.7114, + "step": 161950 + }, + { + "epoch": 1.3946280991735538, + "grad_norm": 3.5625, + "learning_rate": 0.0001643022326961638, + "loss": 4.4985, + "step": 162000 + }, + { + "epoch": 1.3946280991735538, + "eval_loss": 5.1462507247924805, + "eval_runtime": 22.1152, + "eval_samples_per_second": 28.939, + "eval_steps_per_second": 14.47, + "eval_tts_loss": 7.109295402450957, + "step": 162000 + }, + { + "epoch": 1.3950585399449036, + "grad_norm": 2.484375, + "learning_rate": 0.00016428148196938295, + "loss": 3.7045, + "step": 162050 + }, + { + "epoch": 1.3954889807162534, + "grad_norm": 3.34375, + "learning_rate": 0.00016426072652447963, + "loss": 4.4363, + "step": 162100 + }, + { + "epoch": 1.3959194214876032, + "grad_norm": 2.828125, + "learning_rate": 0.00016423996636297724, + "loss": 4.4125, + "step": 162150 + }, + { + "epoch": 1.3963498622589532, + "grad_norm": 3.046875, + "learning_rate": 0.00016421920148639957, + "loss": 4.6629, + "step": 162200 + }, + { + "epoch": 1.396780303030303, + "grad_norm": 1.8515625, + "learning_rate": 0.0001641984318962706, + "loss": 4.8781, + "step": 162250 + }, + { + "epoch": 1.397210743801653, + "grad_norm": 3.828125, + "learning_rate": 0.00016417765759411493, + "loss": 4.6714, + "step": 162300 + }, + { + "epoch": 1.3976411845730028, + "grad_norm": 2.890625, + "learning_rate": 0.00016415687858145725, + "loss": 4.8539, + "step": 162350 + }, + { + "epoch": 1.3980716253443526, + "grad_norm": 2.671875, + "learning_rate": 0.00016413609485982274, + "loss": 4.546, + "step": 162400 + }, + { + "epoch": 1.3985020661157024, + "grad_norm": 1.8984375, + "learning_rate": 0.0001641153064307368, + "loss": 4.3027, + "step": 162450 + }, + { + "epoch": 1.3989325068870524, + "grad_norm": 3.953125, + "learning_rate": 0.00016409451329572536, + "loss": 4.5593, + "step": 162500 + }, + { + "epoch": 1.3993629476584022, + "grad_norm": 1.8203125, + "learning_rate": 0.00016407371545631457, + "loss": 4.276, + "step": 162550 + }, + { + "epoch": 1.399793388429752, + "grad_norm": 2.71875, + "learning_rate": 0.00016405291291403086, + "loss": 4.5486, + "step": 162600 + }, + { + "epoch": 1.400223829201102, + "grad_norm": 2.875, + "learning_rate": 0.00016403210567040119, + "loss": 4.7962, + "step": 162650 + }, + { + "epoch": 1.4006542699724518, + "grad_norm": 4.34375, + "learning_rate": 0.00016401129372695277, + "loss": 4.9889, + "step": 162700 + }, + { + "epoch": 1.4010847107438016, + "grad_norm": 3.125, + "learning_rate": 0.00016399047708521306, + "loss": 4.482, + "step": 162750 + }, + { + "epoch": 1.4015151515151514, + "grad_norm": 3.4375, + "learning_rate": 0.00016396965574671003, + "loss": 4.6839, + "step": 162800 + }, + { + "epoch": 1.4019455922865014, + "grad_norm": 2.8125, + "learning_rate": 0.0001639488297129719, + "loss": 4.3398, + "step": 162850 + }, + { + "epoch": 1.4023760330578512, + "grad_norm": 1.1953125, + "learning_rate": 0.00016392799898552727, + "loss": 4.6088, + "step": 162900 + }, + { + "epoch": 1.4028064738292012, + "grad_norm": 1.671875, + "learning_rate": 0.00016390716356590504, + "loss": 4.1766, + "step": 162950 + }, + { + "epoch": 1.403236914600551, + "grad_norm": 2.109375, + "learning_rate": 0.0001638863234556345, + "loss": 4.2907, + "step": 163000 + }, + { + "epoch": 1.4036673553719008, + "grad_norm": 2.71875, + "learning_rate": 0.00016386547865624527, + "loss": 4.5716, + "step": 163050 + }, + { + "epoch": 1.4040977961432506, + "grad_norm": 2.59375, + "learning_rate": 0.00016384462916926734, + "loss": 4.4531, + "step": 163100 + }, + { + "epoch": 1.4045282369146006, + "grad_norm": 2.796875, + "learning_rate": 0.000163823774996231, + "loss": 4.5312, + "step": 163150 + }, + { + "epoch": 1.4049586776859504, + "grad_norm": 4.09375, + "learning_rate": 0.00016380291613866688, + "loss": 4.747, + "step": 163200 + }, + { + "epoch": 1.4053891184573004, + "grad_norm": 3.203125, + "learning_rate": 0.000163782052598106, + "loss": 4.3799, + "step": 163250 + }, + { + "epoch": 1.4058195592286502, + "grad_norm": 1.609375, + "learning_rate": 0.00016376118437607968, + "loss": 4.6006, + "step": 163300 + }, + { + "epoch": 1.40625, + "grad_norm": 3.40625, + "learning_rate": 0.0001637403114741196, + "loss": 4.7187, + "step": 163350 + }, + { + "epoch": 1.4066804407713498, + "grad_norm": 1.6796875, + "learning_rate": 0.0001637194338937578, + "loss": 4.2554, + "step": 163400 + }, + { + "epoch": 1.4071108815426996, + "grad_norm": 2.875, + "learning_rate": 0.0001636985516365267, + "loss": 4.7399, + "step": 163450 + }, + { + "epoch": 1.4075413223140496, + "grad_norm": 2.78125, + "learning_rate": 0.00016367766470395893, + "loss": 4.4203, + "step": 163500 + }, + { + "epoch": 1.4079717630853994, + "grad_norm": 1.9453125, + "learning_rate": 0.00016365677309758755, + "loss": 4.2266, + "step": 163550 + }, + { + "epoch": 1.4084022038567494, + "grad_norm": 6.75, + "learning_rate": 0.000163635876818946, + "loss": 4.1866, + "step": 163600 + }, + { + "epoch": 1.4088326446280992, + "grad_norm": 3.15625, + "learning_rate": 0.000163614975869568, + "loss": 4.3326, + "step": 163650 + }, + { + "epoch": 1.409263085399449, + "grad_norm": 1.859375, + "learning_rate": 0.00016359407025098768, + "loss": 4.5099, + "step": 163700 + }, + { + "epoch": 1.4096935261707988, + "grad_norm": 1.625, + "learning_rate": 0.0001635731599647394, + "loss": 4.6093, + "step": 163750 + }, + { + "epoch": 1.4101239669421488, + "grad_norm": 1.75, + "learning_rate": 0.00016355224501235797, + "loss": 4.1367, + "step": 163800 + }, + { + "epoch": 1.4105544077134986, + "grad_norm": 4.3125, + "learning_rate": 0.0001635313253953785, + "loss": 4.2216, + "step": 163850 + }, + { + "epoch": 1.4109848484848486, + "grad_norm": 2.65625, + "learning_rate": 0.00016351040111533643, + "loss": 4.6569, + "step": 163900 + }, + { + "epoch": 1.4114152892561984, + "grad_norm": 1.3359375, + "learning_rate": 0.00016348947217376757, + "loss": 4.4413, + "step": 163950 + }, + { + "epoch": 1.4118457300275482, + "grad_norm": 1.7734375, + "learning_rate": 0.00016346853857220802, + "loss": 4.5604, + "step": 164000 + }, + { + "epoch": 1.412276170798898, + "grad_norm": 2.765625, + "learning_rate": 0.0001634476003121943, + "loss": 3.991, + "step": 164050 + }, + { + "epoch": 1.412706611570248, + "grad_norm": 1.796875, + "learning_rate": 0.00016342665739526327, + "loss": 4.5542, + "step": 164100 + }, + { + "epoch": 1.4131370523415978, + "grad_norm": 3.65625, + "learning_rate": 0.000163405709822952, + "loss": 4.3613, + "step": 164150 + }, + { + "epoch": 1.4135674931129476, + "grad_norm": 3.125, + "learning_rate": 0.00016338475759679813, + "loss": 4.5651, + "step": 164200 + }, + { + "epoch": 1.4139979338842976, + "grad_norm": 2.703125, + "learning_rate": 0.00016336380071833934, + "loss": 4.1581, + "step": 164250 + }, + { + "epoch": 1.4144283746556474, + "grad_norm": 2.890625, + "learning_rate": 0.00016334283918911395, + "loss": 4.3491, + "step": 164300 + }, + { + "epoch": 1.4148588154269972, + "grad_norm": 1.78125, + "learning_rate": 0.00016332187301066044, + "loss": 4.4377, + "step": 164350 + }, + { + "epoch": 1.415289256198347, + "grad_norm": 1.9609375, + "learning_rate": 0.00016330090218451767, + "loss": 4.2134, + "step": 164400 + }, + { + "epoch": 1.415719696969697, + "grad_norm": 6.40625, + "learning_rate": 0.00016327992671222492, + "loss": 4.9712, + "step": 164450 + }, + { + "epoch": 1.4161501377410468, + "grad_norm": 3.65625, + "learning_rate": 0.0001632589465953217, + "loss": 4.3055, + "step": 164500 + }, + { + "epoch": 1.4165805785123968, + "grad_norm": 1.0625, + "learning_rate": 0.00016323796183534788, + "loss": 4.5056, + "step": 164550 + }, + { + "epoch": 1.4170110192837466, + "grad_norm": 3.484375, + "learning_rate": 0.00016321697243384373, + "loss": 4.2459, + "step": 164600 + }, + { + "epoch": 1.4174414600550964, + "grad_norm": 1.4375, + "learning_rate": 0.0001631959783923498, + "loss": 4.7942, + "step": 164650 + }, + { + "epoch": 1.4178719008264462, + "grad_norm": 2.65625, + "learning_rate": 0.00016317497971240705, + "loss": 4.9029, + "step": 164700 + }, + { + "epoch": 1.4183023415977962, + "grad_norm": 3.453125, + "learning_rate": 0.0001631539763955567, + "loss": 4.7152, + "step": 164750 + }, + { + "epoch": 1.418732782369146, + "grad_norm": 3.640625, + "learning_rate": 0.00016313296844334036, + "loss": 4.8096, + "step": 164800 + }, + { + "epoch": 1.4191632231404958, + "grad_norm": 2.828125, + "learning_rate": 0.00016311195585729999, + "loss": 4.4885, + "step": 164850 + }, + { + "epoch": 1.4195936639118458, + "grad_norm": 2.09375, + "learning_rate": 0.00016309093863897783, + "loss": 4.3724, + "step": 164900 + }, + { + "epoch": 1.4200241046831956, + "grad_norm": 1.296875, + "learning_rate": 0.00016306991678991653, + "loss": 4.7366, + "step": 164950 + }, + { + "epoch": 1.4204545454545454, + "grad_norm": 3.96875, + "learning_rate": 0.000163048890311659, + "loss": 4.6269, + "step": 165000 + }, + { + "epoch": 1.4204545454545454, + "eval_loss": 5.140933036804199, + "eval_runtime": 21.9888, + "eval_samples_per_second": 29.106, + "eval_steps_per_second": 14.553, + "eval_tts_loss": 7.1610827883010035, + "step": 165000 + }, + { + "epoch": 1.4208849862258952, + "grad_norm": 2.1875, + "learning_rate": 0.00016302785920574865, + "loss": 4.0005, + "step": 165050 + }, + { + "epoch": 1.4213154269972452, + "grad_norm": 2.21875, + "learning_rate": 0.00016300682347372898, + "loss": 5.0, + "step": 165100 + }, + { + "epoch": 1.421745867768595, + "grad_norm": 1.3984375, + "learning_rate": 0.00016298578311714403, + "loss": 4.4025, + "step": 165150 + }, + { + "epoch": 1.422176308539945, + "grad_norm": 2.96875, + "learning_rate": 0.00016296473813753813, + "loss": 4.3078, + "step": 165200 + }, + { + "epoch": 1.4226067493112948, + "grad_norm": 2.953125, + "learning_rate": 0.0001629436885364559, + "loss": 4.7616, + "step": 165250 + }, + { + "epoch": 1.4230371900826446, + "grad_norm": 4.09375, + "learning_rate": 0.00016292263431544243, + "loss": 4.6323, + "step": 165300 + }, + { + "epoch": 1.4234676308539944, + "grad_norm": 6.03125, + "learning_rate": 0.00016290157547604292, + "loss": 4.5392, + "step": 165350 + }, + { + "epoch": 1.4238980716253444, + "grad_norm": 2.46875, + "learning_rate": 0.00016288051201980311, + "loss": 4.6293, + "step": 165400 + }, + { + "epoch": 1.4243285123966942, + "grad_norm": 1.0859375, + "learning_rate": 0.00016285944394826905, + "loss": 4.4712, + "step": 165450 + }, + { + "epoch": 1.424758953168044, + "grad_norm": 3.640625, + "learning_rate": 0.000162838371262987, + "loss": 4.5395, + "step": 165500 + }, + { + "epoch": 1.425189393939394, + "grad_norm": 2.8125, + "learning_rate": 0.0001628172939655037, + "loss": 4.8306, + "step": 165550 + }, + { + "epoch": 1.4256198347107438, + "grad_norm": 3.609375, + "learning_rate": 0.0001627962120573662, + "loss": 4.7463, + "step": 165600 + }, + { + "epoch": 1.4260502754820936, + "grad_norm": 2.796875, + "learning_rate": 0.00016277512554012188, + "loss": 4.1847, + "step": 165650 + }, + { + "epoch": 1.4264807162534434, + "grad_norm": 2.953125, + "learning_rate": 0.00016275403441531837, + "loss": 4.5358, + "step": 165700 + }, + { + "epoch": 1.4269111570247934, + "grad_norm": 2.5, + "learning_rate": 0.00016273293868450376, + "loss": 4.8443, + "step": 165750 + }, + { + "epoch": 1.4273415977961432, + "grad_norm": 2.953125, + "learning_rate": 0.00016271183834922642, + "loss": 4.5064, + "step": 165800 + }, + { + "epoch": 1.4277720385674932, + "grad_norm": 3.359375, + "learning_rate": 0.00016269073341103503, + "loss": 3.9674, + "step": 165850 + }, + { + "epoch": 1.428202479338843, + "grad_norm": 1.984375, + "learning_rate": 0.00016266962387147875, + "loss": 4.3991, + "step": 165900 + }, + { + "epoch": 1.4286329201101928, + "grad_norm": 3.296875, + "learning_rate": 0.00016264850973210688, + "loss": 4.7838, + "step": 165950 + }, + { + "epoch": 1.4290633608815426, + "grad_norm": 3.203125, + "learning_rate": 0.0001626273909944692, + "loss": 4.3483, + "step": 166000 + }, + { + "epoch": 1.4294938016528926, + "grad_norm": 3.09375, + "learning_rate": 0.00016260626766011579, + "loss": 4.3884, + "step": 166050 + }, + { + "epoch": 1.4299242424242424, + "grad_norm": 3.625, + "learning_rate": 0.000162585139730597, + "loss": 4.5349, + "step": 166100 + }, + { + "epoch": 1.4303546831955922, + "grad_norm": 3.75, + "learning_rate": 0.00016256400720746363, + "loss": 4.3026, + "step": 166150 + }, + { + "epoch": 1.4307851239669422, + "grad_norm": 0.96875, + "learning_rate": 0.00016254287009226674, + "loss": 4.2588, + "step": 166200 + }, + { + "epoch": 1.431215564738292, + "grad_norm": 2.90625, + "learning_rate": 0.00016252172838655775, + "loss": 4.3752, + "step": 166250 + }, + { + "epoch": 1.4316460055096418, + "grad_norm": 4.4375, + "learning_rate": 0.0001625005820918884, + "loss": 4.5317, + "step": 166300 + }, + { + "epoch": 1.4320764462809916, + "grad_norm": 1.84375, + "learning_rate": 0.00016247943120981082, + "loss": 4.6431, + "step": 166350 + }, + { + "epoch": 1.4325068870523416, + "grad_norm": 1.2578125, + "learning_rate": 0.00016245827574187742, + "loss": 4.848, + "step": 166400 + }, + { + "epoch": 1.4329373278236914, + "grad_norm": 2.71875, + "learning_rate": 0.00016243711568964092, + "loss": 4.6549, + "step": 166450 + }, + { + "epoch": 1.4333677685950414, + "grad_norm": 1.4609375, + "learning_rate": 0.0001624159510546545, + "loss": 4.4376, + "step": 166500 + }, + { + "epoch": 1.4337982093663912, + "grad_norm": 3.0, + "learning_rate": 0.00016239478183847157, + "loss": 4.783, + "step": 166550 + }, + { + "epoch": 1.434228650137741, + "grad_norm": 4.03125, + "learning_rate": 0.0001623736080426459, + "loss": 4.6719, + "step": 166600 + }, + { + "epoch": 1.4346590909090908, + "grad_norm": 3.90625, + "learning_rate": 0.00016235242966873158, + "loss": 4.398, + "step": 166650 + }, + { + "epoch": 1.4350895316804408, + "grad_norm": 6.15625, + "learning_rate": 0.00016233124671828308, + "loss": 4.7081, + "step": 166700 + }, + { + "epoch": 1.4355199724517906, + "grad_norm": 4.46875, + "learning_rate": 0.0001623100591928552, + "loss": 4.3986, + "step": 166750 + }, + { + "epoch": 1.4359504132231404, + "grad_norm": 2.546875, + "learning_rate": 0.000162288867094003, + "loss": 4.6497, + "step": 166800 + }, + { + "epoch": 1.4363808539944904, + "grad_norm": 2.84375, + "learning_rate": 0.000162267670423282, + "loss": 4.7188, + "step": 166850 + }, + { + "epoch": 1.4368112947658402, + "grad_norm": 3.0, + "learning_rate": 0.00016224646918224796, + "loss": 4.4252, + "step": 166900 + }, + { + "epoch": 1.43724173553719, + "grad_norm": 2.4375, + "learning_rate": 0.00016222526337245704, + "loss": 4.4618, + "step": 166950 + }, + { + "epoch": 1.4376721763085398, + "grad_norm": 3.296875, + "learning_rate": 0.0001622040529954656, + "loss": 4.4996, + "step": 167000 + }, + { + "epoch": 1.4381026170798898, + "grad_norm": 2.484375, + "learning_rate": 0.00016218283805283056, + "loss": 4.1393, + "step": 167050 + }, + { + "epoch": 1.4385330578512396, + "grad_norm": 4.125, + "learning_rate": 0.00016216161854610897, + "loss": 4.6059, + "step": 167100 + }, + { + "epoch": 1.4389634986225897, + "grad_norm": 2.6875, + "learning_rate": 0.00016214039447685834, + "loss": 4.4537, + "step": 167150 + }, + { + "epoch": 1.4393939393939394, + "grad_norm": 2.734375, + "learning_rate": 0.00016211916584663646, + "loss": 4.6736, + "step": 167200 + }, + { + "epoch": 1.4398243801652892, + "grad_norm": 2.8125, + "learning_rate": 0.00016209793265700145, + "loss": 4.5261, + "step": 167250 + }, + { + "epoch": 1.440254820936639, + "grad_norm": 2.109375, + "learning_rate": 0.00016207669490951178, + "loss": 4.4991, + "step": 167300 + }, + { + "epoch": 1.440685261707989, + "grad_norm": 3.671875, + "learning_rate": 0.00016205545260572626, + "loss": 4.4003, + "step": 167350 + }, + { + "epoch": 1.4411157024793388, + "grad_norm": 1.9765625, + "learning_rate": 0.00016203420574720405, + "loss": 4.3696, + "step": 167400 + }, + { + "epoch": 1.4415461432506886, + "grad_norm": 1.9140625, + "learning_rate": 0.00016201295433550462, + "loss": 5.0686, + "step": 167450 + }, + { + "epoch": 1.4419765840220387, + "grad_norm": 2.0625, + "learning_rate": 0.00016199169837218772, + "loss": 4.3716, + "step": 167500 + }, + { + "epoch": 1.4424070247933884, + "grad_norm": 2.078125, + "learning_rate": 0.00016197043785881357, + "loss": 4.4272, + "step": 167550 + }, + { + "epoch": 1.4428374655647382, + "grad_norm": 3.0, + "learning_rate": 0.00016194917279694263, + "loss": 4.4247, + "step": 167600 + }, + { + "epoch": 1.443267906336088, + "grad_norm": 2.359375, + "learning_rate": 0.00016192790318813566, + "loss": 4.3105, + "step": 167650 + }, + { + "epoch": 1.443698347107438, + "grad_norm": 2.40625, + "learning_rate": 0.00016190662903395388, + "loss": 4.7667, + "step": 167700 + }, + { + "epoch": 1.4441287878787878, + "grad_norm": 3.8125, + "learning_rate": 0.00016188535033595866, + "loss": 4.5131, + "step": 167750 + }, + { + "epoch": 1.4445592286501379, + "grad_norm": 3.296875, + "learning_rate": 0.00016186406709571194, + "loss": 4.3132, + "step": 167800 + }, + { + "epoch": 1.4449896694214877, + "grad_norm": 2.921875, + "learning_rate": 0.00016184277931477578, + "loss": 4.5048, + "step": 167850 + }, + { + "epoch": 1.4454201101928374, + "grad_norm": 2.3125, + "learning_rate": 0.00016182148699471266, + "loss": 4.4808, + "step": 167900 + }, + { + "epoch": 1.4458505509641872, + "grad_norm": 3.265625, + "learning_rate": 0.00016180019013708541, + "loss": 4.6591, + "step": 167950 + }, + { + "epoch": 1.4462809917355373, + "grad_norm": 2.1875, + "learning_rate": 0.00016177888874345717, + "loss": 4.8858, + "step": 168000 + }, + { + "epoch": 1.4462809917355373, + "eval_loss": 5.131041526794434, + "eval_runtime": 21.9964, + "eval_samples_per_second": 29.096, + "eval_steps_per_second": 14.548, + "eval_tts_loss": 7.119232905988722, + "step": 168000 + }, + { + "epoch": 1.446711432506887, + "grad_norm": 2.3125, + "learning_rate": 0.00016175758281539144, + "loss": 4.8953, + "step": 168050 + }, + { + "epoch": 1.4471418732782368, + "grad_norm": 3.890625, + "learning_rate": 0.00016173627235445198, + "loss": 4.8951, + "step": 168100 + }, + { + "epoch": 1.4475723140495869, + "grad_norm": 2.515625, + "learning_rate": 0.00016171495736220297, + "loss": 4.7312, + "step": 168150 + }, + { + "epoch": 1.4480027548209367, + "grad_norm": 3.984375, + "learning_rate": 0.00016169363784020887, + "loss": 4.6748, + "step": 168200 + }, + { + "epoch": 1.4484331955922864, + "grad_norm": 4.25, + "learning_rate": 0.0001616723137900345, + "loss": 4.646, + "step": 168250 + }, + { + "epoch": 1.4488636363636362, + "grad_norm": 2.078125, + "learning_rate": 0.00016165098521324497, + "loss": 4.3641, + "step": 168300 + }, + { + "epoch": 1.4492940771349863, + "grad_norm": 1.625, + "learning_rate": 0.0001616296521114058, + "loss": 4.0898, + "step": 168350 + }, + { + "epoch": 1.449724517906336, + "grad_norm": 3.0, + "learning_rate": 0.00016160831448608273, + "loss": 4.9034, + "step": 168400 + }, + { + "epoch": 1.450154958677686, + "grad_norm": 2.71875, + "learning_rate": 0.00016158697233884195, + "loss": 4.6063, + "step": 168450 + }, + { + "epoch": 1.4505853994490359, + "grad_norm": 1.4921875, + "learning_rate": 0.00016156562567124988, + "loss": 4.2414, + "step": 168500 + }, + { + "epoch": 1.4510158402203857, + "grad_norm": 1.34375, + "learning_rate": 0.00016154427448487337, + "loss": 4.4975, + "step": 168550 + }, + { + "epoch": 1.4514462809917354, + "grad_norm": 2.859375, + "learning_rate": 0.00016152291878127954, + "loss": 4.7147, + "step": 168600 + }, + { + "epoch": 1.4518767217630855, + "grad_norm": 3.703125, + "learning_rate": 0.00016150155856203583, + "loss": 4.6297, + "step": 168650 + }, + { + "epoch": 1.4523071625344353, + "grad_norm": 2.15625, + "learning_rate": 0.00016148019382871006, + "loss": 4.464, + "step": 168700 + }, + { + "epoch": 1.452737603305785, + "grad_norm": 3.453125, + "learning_rate": 0.0001614588245828703, + "loss": 4.4556, + "step": 168750 + }, + { + "epoch": 1.453168044077135, + "grad_norm": 4.0625, + "learning_rate": 0.0001614374508260851, + "loss": 4.5968, + "step": 168800 + }, + { + "epoch": 1.4535984848484849, + "grad_norm": 2.625, + "learning_rate": 0.00016141607255992319, + "loss": 4.3659, + "step": 168850 + }, + { + "epoch": 1.4540289256198347, + "grad_norm": 4.71875, + "learning_rate": 0.00016139468978595365, + "loss": 4.696, + "step": 168900 + }, + { + "epoch": 1.4544593663911844, + "grad_norm": 3.015625, + "learning_rate": 0.000161373302505746, + "loss": 4.6857, + "step": 168950 + }, + { + "epoch": 1.4548898071625345, + "grad_norm": 2.484375, + "learning_rate": 0.00016135191072086998, + "loss": 4.5399, + "step": 169000 + }, + { + "epoch": 1.4553202479338843, + "grad_norm": 1.7109375, + "learning_rate": 0.00016133051443289575, + "loss": 4.0464, + "step": 169050 + }, + { + "epoch": 1.4557506887052343, + "grad_norm": 2.75, + "learning_rate": 0.00016130911364339368, + "loss": 4.4852, + "step": 169100 + }, + { + "epoch": 1.456181129476584, + "grad_norm": 3.65625, + "learning_rate": 0.00016128770835393462, + "loss": 4.5114, + "step": 169150 + }, + { + "epoch": 1.4566115702479339, + "grad_norm": 2.453125, + "learning_rate": 0.0001612662985660896, + "loss": 4.591, + "step": 169200 + }, + { + "epoch": 1.4570420110192837, + "grad_norm": 3.21875, + "learning_rate": 0.00016124488428143007, + "loss": 4.5943, + "step": 169250 + }, + { + "epoch": 1.4574724517906337, + "grad_norm": 3.015625, + "learning_rate": 0.00016122346550152784, + "loss": 4.6022, + "step": 169300 + }, + { + "epoch": 1.4579028925619835, + "grad_norm": 2.28125, + "learning_rate": 0.00016120204222795493, + "loss": 4.3769, + "step": 169350 + }, + { + "epoch": 1.4583333333333333, + "grad_norm": 3.578125, + "learning_rate": 0.00016118061446228384, + "loss": 4.8728, + "step": 169400 + }, + { + "epoch": 1.4587637741046833, + "grad_norm": 2.453125, + "learning_rate": 0.00016115918220608726, + "loss": 4.464, + "step": 169450 + }, + { + "epoch": 1.459194214876033, + "grad_norm": 3.875, + "learning_rate": 0.0001611377454609383, + "loss": 4.2808, + "step": 169500 + }, + { + "epoch": 1.4596246556473829, + "grad_norm": 2.859375, + "learning_rate": 0.00016111630422841036, + "loss": 4.5325, + "step": 169550 + }, + { + "epoch": 1.4600550964187327, + "grad_norm": 3.640625, + "learning_rate": 0.00016109485851007718, + "loss": 4.4474, + "step": 169600 + }, + { + "epoch": 1.4604855371900827, + "grad_norm": 1.671875, + "learning_rate": 0.00016107340830751285, + "loss": 4.1831, + "step": 169650 + }, + { + "epoch": 1.4609159779614325, + "grad_norm": 3.21875, + "learning_rate": 0.00016105195362229173, + "loss": 4.6718, + "step": 169700 + }, + { + "epoch": 1.4613464187327825, + "grad_norm": 1.515625, + "learning_rate": 0.0001610304944559886, + "loss": 4.6422, + "step": 169750 + }, + { + "epoch": 1.4617768595041323, + "grad_norm": 4.125, + "learning_rate": 0.00016100903081017845, + "loss": 4.7274, + "step": 169800 + }, + { + "epoch": 1.462207300275482, + "grad_norm": 3.296875, + "learning_rate": 0.00016098756268643672, + "loss": 4.6843, + "step": 169850 + }, + { + "epoch": 1.4626377410468319, + "grad_norm": 2.25, + "learning_rate": 0.00016096609008633915, + "loss": 4.7582, + "step": 169900 + }, + { + "epoch": 1.4630681818181819, + "grad_norm": 1.375, + "learning_rate": 0.00016094461301146166, + "loss": 4.4084, + "step": 169950 + }, + { + "epoch": 1.4634986225895317, + "grad_norm": 2.75, + "learning_rate": 0.00016092313146338075, + "loss": 4.3323, + "step": 170000 + }, + { + "epoch": 1.4639290633608815, + "grad_norm": 2.484375, + "learning_rate": 0.00016090164544367306, + "loss": 4.8541, + "step": 170050 + }, + { + "epoch": 1.4643595041322315, + "grad_norm": 1.1484375, + "learning_rate": 0.00016088015495391563, + "loss": 4.8426, + "step": 170100 + }, + { + "epoch": 1.4647899449035813, + "grad_norm": 3.609375, + "learning_rate": 0.00016085865999568584, + "loss": 4.6635, + "step": 170150 + }, + { + "epoch": 1.465220385674931, + "grad_norm": 4.34375, + "learning_rate": 0.0001608371605705613, + "loss": 4.4282, + "step": 170200 + }, + { + "epoch": 1.4656508264462809, + "grad_norm": 2.484375, + "learning_rate": 0.00016081565668012008, + "loss": 4.2122, + "step": 170250 + }, + { + "epoch": 1.4660812672176309, + "grad_norm": 2.78125, + "learning_rate": 0.00016079414832594053, + "loss": 4.5892, + "step": 170300 + }, + { + "epoch": 1.4665117079889807, + "grad_norm": 3.59375, + "learning_rate": 0.00016077263550960127, + "loss": 4.7288, + "step": 170350 + }, + { + "epoch": 1.4669421487603307, + "grad_norm": 2.15625, + "learning_rate": 0.00016075111823268134, + "loss": 4.1907, + "step": 170400 + }, + { + "epoch": 1.4673725895316805, + "grad_norm": 2.421875, + "learning_rate": 0.00016072959649676003, + "loss": 4.8959, + "step": 170450 + }, + { + "epoch": 1.4678030303030303, + "grad_norm": 2.984375, + "learning_rate": 0.000160708070303417, + "loss": 4.906, + "step": 170500 + }, + { + "epoch": 1.46823347107438, + "grad_norm": 3.75, + "learning_rate": 0.00016068653965423222, + "loss": 4.8136, + "step": 170550 + }, + { + "epoch": 1.46866391184573, + "grad_norm": 1.46875, + "learning_rate": 0.000160665004550786, + "loss": 4.5021, + "step": 170600 + }, + { + "epoch": 1.4690943526170799, + "grad_norm": 2.3125, + "learning_rate": 0.00016064346499465902, + "loss": 4.3826, + "step": 170650 + }, + { + "epoch": 1.4695247933884297, + "grad_norm": 2.671875, + "learning_rate": 0.00016062192098743213, + "loss": 4.61, + "step": 170700 + }, + { + "epoch": 1.4699552341597797, + "grad_norm": 2.078125, + "learning_rate": 0.0001606003725306867, + "loss": 4.7178, + "step": 170750 + }, + { + "epoch": 1.4703856749311295, + "grad_norm": 2.296875, + "learning_rate": 0.00016057881962600428, + "loss": 4.34, + "step": 170800 + }, + { + "epoch": 1.4708161157024793, + "grad_norm": 1.4453125, + "learning_rate": 0.00016055726227496683, + "loss": 4.5995, + "step": 170850 + }, + { + "epoch": 1.471246556473829, + "grad_norm": 1.7734375, + "learning_rate": 0.00016053570047915667, + "loss": 4.5458, + "step": 170900 + }, + { + "epoch": 1.471676997245179, + "grad_norm": 4.59375, + "learning_rate": 0.00016051413424015632, + "loss": 4.8168, + "step": 170950 + }, + { + "epoch": 1.4721074380165289, + "grad_norm": 2.921875, + "learning_rate": 0.0001604925635595487, + "loss": 4.6998, + "step": 171000 + }, + { + "epoch": 1.4721074380165289, + "eval_loss": 5.125603675842285, + "eval_runtime": 21.8112, + "eval_samples_per_second": 29.343, + "eval_steps_per_second": 14.671, + "eval_tts_loss": 7.1247039040745905, + "step": 171000 + }, + { + "epoch": 1.472537878787879, + "grad_norm": 2.546875, + "learning_rate": 0.0001604709884389171, + "loss": 4.6223, + "step": 171050 + }, + { + "epoch": 1.4729683195592287, + "grad_norm": 1.453125, + "learning_rate": 0.00016044940887984503, + "loss": 3.9704, + "step": 171100 + }, + { + "epoch": 1.4733987603305785, + "grad_norm": 3.65625, + "learning_rate": 0.00016042782488391642, + "loss": 4.463, + "step": 171150 + }, + { + "epoch": 1.4738292011019283, + "grad_norm": 2.65625, + "learning_rate": 0.00016040623645271546, + "loss": 4.5133, + "step": 171200 + }, + { + "epoch": 1.4742596418732783, + "grad_norm": 3.234375, + "learning_rate": 0.0001603846435878267, + "loss": 4.949, + "step": 171250 + }, + { + "epoch": 1.474690082644628, + "grad_norm": 1.8984375, + "learning_rate": 0.00016036304629083508, + "loss": 4.2462, + "step": 171300 + }, + { + "epoch": 1.4751205234159779, + "grad_norm": 2.40625, + "learning_rate": 0.0001603414445633257, + "loss": 4.3681, + "step": 171350 + }, + { + "epoch": 1.475550964187328, + "grad_norm": 1.609375, + "learning_rate": 0.00016031983840688411, + "loss": 4.4016, + "step": 171400 + }, + { + "epoch": 1.4759814049586777, + "grad_norm": 4.125, + "learning_rate": 0.00016029822782309618, + "loss": 4.4909, + "step": 171450 + }, + { + "epoch": 1.4764118457300275, + "grad_norm": 2.28125, + "learning_rate": 0.00016027661281354806, + "loss": 4.7323, + "step": 171500 + }, + { + "epoch": 1.4768422865013773, + "grad_norm": 3.453125, + "learning_rate": 0.00016025499337982626, + "loss": 4.4793, + "step": 171550 + }, + { + "epoch": 1.4772727272727273, + "grad_norm": 3.171875, + "learning_rate": 0.00016023336952351755, + "loss": 4.8037, + "step": 171600 + }, + { + "epoch": 1.477703168044077, + "grad_norm": 2.734375, + "learning_rate": 0.00016021174124620914, + "loss": 4.2385, + "step": 171650 + }, + { + "epoch": 1.478133608815427, + "grad_norm": 3.234375, + "learning_rate": 0.00016019010854948846, + "loss": 4.4692, + "step": 171700 + }, + { + "epoch": 1.478564049586777, + "grad_norm": 2.140625, + "learning_rate": 0.0001601684714349433, + "loss": 4.3944, + "step": 171750 + }, + { + "epoch": 1.4789944903581267, + "grad_norm": 1.921875, + "learning_rate": 0.00016014682990416184, + "loss": 4.5051, + "step": 171800 + }, + { + "epoch": 1.4794249311294765, + "grad_norm": 0.89453125, + "learning_rate": 0.00016012518395873245, + "loss": 4.4617, + "step": 171850 + }, + { + "epoch": 1.4798553719008265, + "grad_norm": 1.9140625, + "learning_rate": 0.0001601035336002439, + "loss": 3.916, + "step": 171900 + }, + { + "epoch": 1.4802858126721763, + "grad_norm": 1.828125, + "learning_rate": 0.0001600818788302853, + "loss": 4.1627, + "step": 171950 + }, + { + "epoch": 1.4807162534435263, + "grad_norm": 3.640625, + "learning_rate": 0.00016006021965044612, + "loss": 4.5495, + "step": 172000 + }, + { + "epoch": 1.481146694214876, + "grad_norm": 2.6875, + "learning_rate": 0.00016003855606231597, + "loss": 4.5397, + "step": 172050 + }, + { + "epoch": 1.481577134986226, + "grad_norm": 2.125, + "learning_rate": 0.000160016888067485, + "loss": 4.1693, + "step": 172100 + }, + { + "epoch": 1.4820075757575757, + "grad_norm": 2.765625, + "learning_rate": 0.0001599952156675436, + "loss": 4.7106, + "step": 172150 + }, + { + "epoch": 1.4824380165289255, + "grad_norm": 4.84375, + "learning_rate": 0.00015997353886408245, + "loss": 4.5839, + "step": 172200 + }, + { + "epoch": 1.4828684573002755, + "grad_norm": 2.15625, + "learning_rate": 0.00015995185765869257, + "loss": 4.1468, + "step": 172250 + }, + { + "epoch": 1.4832988980716253, + "grad_norm": 2.015625, + "learning_rate": 0.00015993017205296535, + "loss": 4.5094, + "step": 172300 + }, + { + "epoch": 1.4837293388429753, + "grad_norm": 2.8125, + "learning_rate": 0.0001599084820484924, + "loss": 4.4477, + "step": 172350 + }, + { + "epoch": 1.484159779614325, + "grad_norm": 2.40625, + "learning_rate": 0.0001598867876468658, + "loss": 4.5997, + "step": 172400 + }, + { + "epoch": 1.484590220385675, + "grad_norm": 0.94921875, + "learning_rate": 0.0001598650888496778, + "loss": 4.4454, + "step": 172450 + }, + { + "epoch": 1.4850206611570247, + "grad_norm": 4.1875, + "learning_rate": 0.00015984338565852113, + "loss": 4.3022, + "step": 172500 + }, + { + "epoch": 1.4854511019283747, + "grad_norm": 3.25, + "learning_rate": 0.0001598216780749887, + "loss": 4.7081, + "step": 172550 + }, + { + "epoch": 1.4858815426997245, + "grad_norm": 1.8359375, + "learning_rate": 0.0001597999661006738, + "loss": 4.5399, + "step": 172600 + }, + { + "epoch": 1.4863119834710745, + "grad_norm": 4.15625, + "learning_rate": 0.00015977824973717003, + "loss": 4.6684, + "step": 172650 + }, + { + "epoch": 1.4867424242424243, + "grad_norm": 2.8125, + "learning_rate": 0.00015975652898607136, + "loss": 4.6484, + "step": 172700 + }, + { + "epoch": 1.487172865013774, + "grad_norm": 2.984375, + "learning_rate": 0.00015973480384897206, + "loss": 4.4801, + "step": 172750 + }, + { + "epoch": 1.487603305785124, + "grad_norm": 2.640625, + "learning_rate": 0.00015971307432746662, + "loss": 4.717, + "step": 172800 + }, + { + "epoch": 1.4880337465564737, + "grad_norm": 2.4375, + "learning_rate": 0.00015969134042315006, + "loss": 4.7288, + "step": 172850 + }, + { + "epoch": 1.4884641873278237, + "grad_norm": 2.59375, + "learning_rate": 0.00015966960213761752, + "loss": 4.8935, + "step": 172900 + }, + { + "epoch": 1.4888946280991735, + "grad_norm": 1.4453125, + "learning_rate": 0.00015964785947246454, + "loss": 4.7739, + "step": 172950 + }, + { + "epoch": 1.4893250688705235, + "grad_norm": 2.46875, + "learning_rate": 0.00015962611242928705, + "loss": 4.4058, + "step": 173000 + }, + { + "epoch": 1.4897555096418733, + "grad_norm": 1.6640625, + "learning_rate": 0.00015960436100968117, + "loss": 4.6265, + "step": 173050 + }, + { + "epoch": 1.490185950413223, + "grad_norm": 2.921875, + "learning_rate": 0.00015958260521524347, + "loss": 4.5567, + "step": 173100 + }, + { + "epoch": 1.490616391184573, + "grad_norm": 2.890625, + "learning_rate": 0.0001595608450475707, + "loss": 4.5831, + "step": 173150 + }, + { + "epoch": 1.491046831955923, + "grad_norm": 2.890625, + "learning_rate": 0.00015953908050826007, + "loss": 4.1565, + "step": 173200 + }, + { + "epoch": 1.4914772727272727, + "grad_norm": 3.0, + "learning_rate": 0.00015951731159890903, + "loss": 4.1816, + "step": 173250 + }, + { + "epoch": 1.4919077134986227, + "grad_norm": 3.296875, + "learning_rate": 0.00015949553832111536, + "loss": 4.4599, + "step": 173300 + }, + { + "epoch": 1.4923381542699725, + "grad_norm": 4.3125, + "learning_rate": 0.00015947376067647722, + "loss": 4.6852, + "step": 173350 + }, + { + "epoch": 1.4927685950413223, + "grad_norm": 2.71875, + "learning_rate": 0.00015945197866659299, + "loss": 4.3893, + "step": 173400 + }, + { + "epoch": 1.493199035812672, + "grad_norm": 4.125, + "learning_rate": 0.00015943019229306145, + "loss": 4.891, + "step": 173450 + }, + { + "epoch": 1.4936294765840221, + "grad_norm": 4.46875, + "learning_rate": 0.00015940840155748162, + "loss": 4.1655, + "step": 173500 + }, + { + "epoch": 1.494059917355372, + "grad_norm": 1.8671875, + "learning_rate": 0.00015938660646145296, + "loss": 4.6828, + "step": 173550 + }, + { + "epoch": 1.4944903581267217, + "grad_norm": 2.15625, + "learning_rate": 0.00015936480700657517, + "loss": 4.6513, + "step": 173600 + }, + { + "epoch": 1.4949207988980717, + "grad_norm": 3.53125, + "learning_rate": 0.00015934300319444828, + "loss": 4.2477, + "step": 173650 + }, + { + "epoch": 1.4953512396694215, + "grad_norm": 3.0, + "learning_rate": 0.0001593211950266726, + "loss": 4.7214, + "step": 173700 + }, + { + "epoch": 1.4957816804407713, + "grad_norm": 1.9453125, + "learning_rate": 0.00015929938250484885, + "loss": 4.2496, + "step": 173750 + }, + { + "epoch": 1.496212121212121, + "grad_norm": 2.53125, + "learning_rate": 0.00015927756563057806, + "loss": 4.233, + "step": 173800 + }, + { + "epoch": 1.4966425619834711, + "grad_norm": 4.59375, + "learning_rate": 0.00015925574440546142, + "loss": 4.3277, + "step": 173850 + }, + { + "epoch": 1.497073002754821, + "grad_norm": 4.15625, + "learning_rate": 0.0001592339188311007, + "loss": 4.2822, + "step": 173900 + }, + { + "epoch": 1.497503443526171, + "grad_norm": 2.8125, + "learning_rate": 0.00015921208890909774, + "loss": 4.3395, + "step": 173950 + }, + { + "epoch": 1.4979338842975207, + "grad_norm": 3.171875, + "learning_rate": 0.00015919025464105485, + "loss": 4.2793, + "step": 174000 + }, + { + "epoch": 1.4979338842975207, + "eval_loss": 5.116052150726318, + "eval_runtime": 21.9341, + "eval_samples_per_second": 29.178, + "eval_steps_per_second": 14.589, + "eval_tts_loss": 7.171269260336755, + "step": 174000 + }, + { + "epoch": 1.4983643250688705, + "grad_norm": 2.75, + "learning_rate": 0.00015916841602857463, + "loss": 4.4916, + "step": 174050 + }, + { + "epoch": 1.4987947658402203, + "grad_norm": 2.375, + "learning_rate": 0.00015914657307325997, + "loss": 4.2553, + "step": 174100 + }, + { + "epoch": 1.4992252066115703, + "grad_norm": 2.4375, + "learning_rate": 0.0001591247257767141, + "loss": 4.3339, + "step": 174150 + }, + { + "epoch": 1.4996556473829201, + "grad_norm": 1.828125, + "learning_rate": 0.0001591028741405406, + "loss": 4.4354, + "step": 174200 + }, + { + "epoch": 1.5000860881542701, + "grad_norm": 1.6640625, + "learning_rate": 0.00015908101816634328, + "loss": 4.3378, + "step": 174250 + }, + { + "epoch": 1.50051652892562, + "grad_norm": 2.359375, + "learning_rate": 0.00015905915785572634, + "loss": 4.7259, + "step": 174300 + }, + { + "epoch": 1.5009469696969697, + "grad_norm": 1.828125, + "learning_rate": 0.0001590372932102943, + "loss": 4.6156, + "step": 174350 + }, + { + "epoch": 1.5013774104683195, + "grad_norm": 3.625, + "learning_rate": 0.00015901542423165193, + "loss": 4.5779, + "step": 174400 + }, + { + "epoch": 1.5018078512396693, + "grad_norm": 3.109375, + "learning_rate": 0.00015899355092140443, + "loss": 4.6976, + "step": 174450 + }, + { + "epoch": 1.5022382920110193, + "grad_norm": 3.015625, + "learning_rate": 0.00015897167328115722, + "loss": 4.0949, + "step": 174500 + }, + { + "epoch": 1.5026687327823691, + "grad_norm": 2.5, + "learning_rate": 0.00015894979131251603, + "loss": 4.2544, + "step": 174550 + }, + { + "epoch": 1.5030991735537191, + "grad_norm": 2.65625, + "learning_rate": 0.00015892790501708702, + "loss": 4.5139, + "step": 174600 + }, + { + "epoch": 1.503529614325069, + "grad_norm": 6.53125, + "learning_rate": 0.00015890601439647658, + "loss": 4.9411, + "step": 174650 + }, + { + "epoch": 1.5039600550964187, + "grad_norm": 3.5, + "learning_rate": 0.00015888411945229138, + "loss": 4.7355, + "step": 174700 + }, + { + "epoch": 1.5043904958677685, + "grad_norm": 2.109375, + "learning_rate": 0.00015886222018613854, + "loss": 4.4482, + "step": 174750 + }, + { + "epoch": 1.5048209366391183, + "grad_norm": 2.171875, + "learning_rate": 0.00015884031659962536, + "loss": 4.0418, + "step": 174800 + }, + { + "epoch": 1.5052513774104683, + "grad_norm": 1.609375, + "learning_rate": 0.00015881840869435955, + "loss": 4.3027, + "step": 174850 + }, + { + "epoch": 1.5056818181818183, + "grad_norm": 3.125, + "learning_rate": 0.00015879649647194914, + "loss": 4.4568, + "step": 174900 + }, + { + "epoch": 1.5061122589531681, + "grad_norm": 2.953125, + "learning_rate": 0.00015877457993400233, + "loss": 4.4248, + "step": 174950 + }, + { + "epoch": 1.506542699724518, + "grad_norm": 1.96875, + "learning_rate": 0.00015875265908212784, + "loss": 4.1974, + "step": 175000 + }, + { + "epoch": 1.5069731404958677, + "grad_norm": 3.53125, + "learning_rate": 0.00015873073391793456, + "loss": 4.5189, + "step": 175050 + }, + { + "epoch": 1.5074035812672175, + "grad_norm": 3.375, + "learning_rate": 0.00015870880444303177, + "loss": 4.2854, + "step": 175100 + }, + { + "epoch": 1.5078340220385675, + "grad_norm": 4.5625, + "learning_rate": 0.00015868687065902912, + "loss": 4.2583, + "step": 175150 + }, + { + "epoch": 1.5082644628099173, + "grad_norm": 2.375, + "learning_rate": 0.00015866493256753636, + "loss": 4.3532, + "step": 175200 + }, + { + "epoch": 1.5086949035812673, + "grad_norm": 2.4375, + "learning_rate": 0.0001586429901701638, + "loss": 4.573, + "step": 175250 + }, + { + "epoch": 1.5091253443526171, + "grad_norm": 2.46875, + "learning_rate": 0.0001586210434685219, + "loss": 4.4158, + "step": 175300 + }, + { + "epoch": 1.509555785123967, + "grad_norm": 3.4375, + "learning_rate": 0.00015859909246422155, + "loss": 4.5303, + "step": 175350 + }, + { + "epoch": 1.5099862258953167, + "grad_norm": 3.515625, + "learning_rate": 0.00015857713715887394, + "loss": 4.5698, + "step": 175400 + }, + { + "epoch": 1.5104166666666665, + "grad_norm": 1.59375, + "learning_rate": 0.0001585551775540904, + "loss": 4.4585, + "step": 175450 + }, + { + "epoch": 1.5108471074380165, + "grad_norm": 3.25, + "learning_rate": 0.0001585332136514829, + "loss": 4.4231, + "step": 175500 + }, + { + "epoch": 1.5112775482093666, + "grad_norm": 4.84375, + "learning_rate": 0.00015851124545266342, + "loss": 4.2434, + "step": 175550 + }, + { + "epoch": 1.5117079889807163, + "grad_norm": 1.890625, + "learning_rate": 0.00015848927295924442, + "loss": 4.5051, + "step": 175600 + }, + { + "epoch": 1.5121384297520661, + "grad_norm": 3.28125, + "learning_rate": 0.0001584672961728386, + "loss": 4.2596, + "step": 175650 + }, + { + "epoch": 1.512568870523416, + "grad_norm": 2.796875, + "learning_rate": 0.0001584453150950591, + "loss": 4.4772, + "step": 175700 + }, + { + "epoch": 1.5129993112947657, + "grad_norm": 1.875, + "learning_rate": 0.0001584233297275192, + "loss": 4.2572, + "step": 175750 + }, + { + "epoch": 1.5134297520661157, + "grad_norm": 1.7734375, + "learning_rate": 0.00015840134007183255, + "loss": 4.6716, + "step": 175800 + }, + { + "epoch": 1.5138601928374655, + "grad_norm": 3.5, + "learning_rate": 0.00015837934612961322, + "loss": 4.8343, + "step": 175850 + }, + { + "epoch": 1.5142906336088156, + "grad_norm": 4.125, + "learning_rate": 0.00015835734790247552, + "loss": 4.4672, + "step": 175900 + }, + { + "epoch": 1.5147210743801653, + "grad_norm": 4.4375, + "learning_rate": 0.00015833534539203402, + "loss": 4.4801, + "step": 175950 + }, + { + "epoch": 1.5151515151515151, + "grad_norm": 2.3125, + "learning_rate": 0.00015831333859990364, + "loss": 4.3717, + "step": 176000 + }, + { + "epoch": 1.515581955922865, + "grad_norm": 2.34375, + "learning_rate": 0.00015829132752769972, + "loss": 4.2226, + "step": 176050 + }, + { + "epoch": 1.5160123966942147, + "grad_norm": 3.140625, + "learning_rate": 0.00015826931217703777, + "loss": 4.31, + "step": 176100 + }, + { + "epoch": 1.5164428374655647, + "grad_norm": 1.734375, + "learning_rate": 0.0001582472925495337, + "loss": 4.3418, + "step": 176150 + }, + { + "epoch": 1.5168732782369148, + "grad_norm": 4.0, + "learning_rate": 0.00015822526864680363, + "loss": 4.5728, + "step": 176200 + }, + { + "epoch": 1.5173037190082646, + "grad_norm": 6.0625, + "learning_rate": 0.00015820324047046414, + "loss": 4.3989, + "step": 176250 + }, + { + "epoch": 1.5177341597796143, + "grad_norm": 2.546875, + "learning_rate": 0.000158181208022132, + "loss": 4.9379, + "step": 176300 + }, + { + "epoch": 1.5181646005509641, + "grad_norm": 2.265625, + "learning_rate": 0.0001581591713034244, + "loss": 4.6618, + "step": 176350 + }, + { + "epoch": 1.518595041322314, + "grad_norm": 1.859375, + "learning_rate": 0.00015813713031595877, + "loss": 4.2438, + "step": 176400 + }, + { + "epoch": 1.519025482093664, + "grad_norm": 2.96875, + "learning_rate": 0.00015811508506135283, + "loss": 4.3404, + "step": 176450 + }, + { + "epoch": 1.5194559228650137, + "grad_norm": 2.046875, + "learning_rate": 0.0001580930355412247, + "loss": 4.5756, + "step": 176500 + }, + { + "epoch": 1.5198863636363638, + "grad_norm": 2.359375, + "learning_rate": 0.00015807098175719278, + "loss": 4.6958, + "step": 176550 + }, + { + "epoch": 1.5203168044077136, + "grad_norm": 5.625, + "learning_rate": 0.0001580489237108757, + "loss": 4.6835, + "step": 176600 + }, + { + "epoch": 1.5207472451790633, + "grad_norm": 2.859375, + "learning_rate": 0.00015802686140389252, + "loss": 4.9358, + "step": 176650 + }, + { + "epoch": 1.5211776859504131, + "grad_norm": 2.3125, + "learning_rate": 0.00015800479483786257, + "loss": 4.7549, + "step": 176700 + }, + { + "epoch": 1.521608126721763, + "grad_norm": 3.609375, + "learning_rate": 0.00015798272401440548, + "loss": 4.8873, + "step": 176750 + }, + { + "epoch": 1.522038567493113, + "grad_norm": 3.15625, + "learning_rate": 0.0001579606489351412, + "loss": 4.6265, + "step": 176800 + }, + { + "epoch": 1.522469008264463, + "grad_norm": 4.71875, + "learning_rate": 0.00015793856960168997, + "loss": 4.6135, + "step": 176850 + }, + { + "epoch": 1.5228994490358128, + "grad_norm": 3.4375, + "learning_rate": 0.00015791648601567242, + "loss": 4.9678, + "step": 176900 + }, + { + "epoch": 1.5233298898071626, + "grad_norm": 2.109375, + "learning_rate": 0.0001578943981787094, + "loss": 4.5739, + "step": 176950 + }, + { + "epoch": 1.5237603305785123, + "grad_norm": 2.40625, + "learning_rate": 0.0001578723060924221, + "loss": 4.6492, + "step": 177000 + }, + { + "epoch": 1.5237603305785123, + "eval_loss": 5.11269474029541, + "eval_runtime": 21.9227, + "eval_samples_per_second": 29.194, + "eval_steps_per_second": 14.597, + "eval_tts_loss": 7.1675677132981805, + "step": 177000 + }, + { + "epoch": 1.5241907713498621, + "grad_norm": 3.96875, + "learning_rate": 0.0001578502097584321, + "loss": 4.6664, + "step": 177050 + }, + { + "epoch": 1.5246212121212122, + "grad_norm": 4.875, + "learning_rate": 0.00015782810917836112, + "loss": 4.1409, + "step": 177100 + }, + { + "epoch": 1.525051652892562, + "grad_norm": 2.03125, + "learning_rate": 0.00015780600435383134, + "loss": 4.3525, + "step": 177150 + }, + { + "epoch": 1.525482093663912, + "grad_norm": 3.28125, + "learning_rate": 0.00015778389528646523, + "loss": 4.3918, + "step": 177200 + }, + { + "epoch": 1.5259125344352618, + "grad_norm": 1.125, + "learning_rate": 0.00015776178197788557, + "loss": 4.5024, + "step": 177250 + }, + { + "epoch": 1.5263429752066116, + "grad_norm": 3.4375, + "learning_rate": 0.00015773966442971532, + "loss": 4.3126, + "step": 177300 + }, + { + "epoch": 1.5267734159779613, + "grad_norm": 3.75, + "learning_rate": 0.00015771754264357798, + "loss": 4.6656, + "step": 177350 + }, + { + "epoch": 1.5272038567493111, + "grad_norm": 2.375, + "learning_rate": 0.0001576954166210972, + "loss": 4.4017, + "step": 177400 + }, + { + "epoch": 1.5276342975206612, + "grad_norm": 2.71875, + "learning_rate": 0.00015767328636389694, + "loss": 4.3906, + "step": 177450 + }, + { + "epoch": 1.5280647382920112, + "grad_norm": 1.5, + "learning_rate": 0.00015765115187360156, + "loss": 4.4006, + "step": 177500 + }, + { + "epoch": 1.528495179063361, + "grad_norm": 3.203125, + "learning_rate": 0.00015762901315183566, + "loss": 4.5229, + "step": 177550 + }, + { + "epoch": 1.5289256198347108, + "grad_norm": 1.578125, + "learning_rate": 0.0001576068702002242, + "loss": 4.4675, + "step": 177600 + }, + { + "epoch": 1.5293560606060606, + "grad_norm": 2.828125, + "learning_rate": 0.00015758472302039245, + "loss": 4.5464, + "step": 177650 + }, + { + "epoch": 1.5297865013774103, + "grad_norm": 3.46875, + "learning_rate": 0.00015756257161396588, + "loss": 4.4539, + "step": 177700 + }, + { + "epoch": 1.5302169421487604, + "grad_norm": 1.109375, + "learning_rate": 0.00015754041598257042, + "loss": 4.192, + "step": 177750 + }, + { + "epoch": 1.5306473829201102, + "grad_norm": 2.53125, + "learning_rate": 0.0001575182561278322, + "loss": 4.6458, + "step": 177800 + }, + { + "epoch": 1.5310778236914602, + "grad_norm": 2.078125, + "learning_rate": 0.00015749609205137778, + "loss": 4.2214, + "step": 177850 + }, + { + "epoch": 1.53150826446281, + "grad_norm": 4.0, + "learning_rate": 0.00015747392375483384, + "loss": 4.2799, + "step": 177900 + }, + { + "epoch": 1.5319387052341598, + "grad_norm": 4.3125, + "learning_rate": 0.00015745175123982764, + "loss": 4.77, + "step": 177950 + }, + { + "epoch": 1.5323691460055096, + "grad_norm": 3.65625, + "learning_rate": 0.00015742957450798646, + "loss": 4.6492, + "step": 178000 + }, + { + "epoch": 1.5327995867768593, + "grad_norm": 3.8125, + "learning_rate": 0.00015740739356093808, + "loss": 4.7453, + "step": 178050 + }, + { + "epoch": 1.5332300275482094, + "grad_norm": 2.59375, + "learning_rate": 0.00015738520840031056, + "loss": 4.8577, + "step": 178100 + }, + { + "epoch": 1.5336604683195594, + "grad_norm": 2.5625, + "learning_rate": 0.00015736301902773215, + "loss": 4.4629, + "step": 178150 + }, + { + "epoch": 1.5340909090909092, + "grad_norm": 3.703125, + "learning_rate": 0.0001573408254448316, + "loss": 4.6416, + "step": 178200 + }, + { + "epoch": 1.534521349862259, + "grad_norm": 2.015625, + "learning_rate": 0.00015731862765323783, + "loss": 4.3571, + "step": 178250 + }, + { + "epoch": 1.5349517906336088, + "grad_norm": 2.03125, + "learning_rate": 0.0001572964256545801, + "loss": 4.4044, + "step": 178300 + }, + { + "epoch": 1.5353822314049586, + "grad_norm": 2.59375, + "learning_rate": 0.00015727421945048803, + "loss": 4.3758, + "step": 178350 + }, + { + "epoch": 1.5358126721763086, + "grad_norm": 1.5, + "learning_rate": 0.00015725200904259147, + "loss": 4.239, + "step": 178400 + }, + { + "epoch": 1.5362431129476584, + "grad_norm": 5.46875, + "learning_rate": 0.0001572297944325206, + "loss": 4.2754, + "step": 178450 + }, + { + "epoch": 1.5366735537190084, + "grad_norm": 1.921875, + "learning_rate": 0.000157207575621906, + "loss": 4.4921, + "step": 178500 + }, + { + "epoch": 1.5371039944903582, + "grad_norm": 1.203125, + "learning_rate": 0.00015718535261237838, + "loss": 4.2033, + "step": 178550 + }, + { + "epoch": 1.537534435261708, + "grad_norm": 1.5625, + "learning_rate": 0.000157163125405569, + "loss": 4.2648, + "step": 178600 + }, + { + "epoch": 1.5379648760330578, + "grad_norm": 3.640625, + "learning_rate": 0.00015714089400310912, + "loss": 4.3139, + "step": 178650 + }, + { + "epoch": 1.5383953168044076, + "grad_norm": 3.5625, + "learning_rate": 0.00015711865840663058, + "loss": 4.0926, + "step": 178700 + }, + { + "epoch": 1.5388257575757576, + "grad_norm": 2.703125, + "learning_rate": 0.00015709641861776544, + "loss": 4.1119, + "step": 178750 + }, + { + "epoch": 1.5392561983471076, + "grad_norm": 3.125, + "learning_rate": 0.00015707417463814598, + "loss": 4.4247, + "step": 178800 + }, + { + "epoch": 1.5396866391184574, + "grad_norm": 2.96875, + "learning_rate": 0.00015705192646940495, + "loss": 3.8769, + "step": 178850 + }, + { + "epoch": 1.5401170798898072, + "grad_norm": 2.9375, + "learning_rate": 0.0001570296741131752, + "loss": 4.9352, + "step": 178900 + }, + { + "epoch": 1.540547520661157, + "grad_norm": 3.078125, + "learning_rate": 0.00015700741757109011, + "loss": 4.5678, + "step": 178950 + }, + { + "epoch": 1.5409779614325068, + "grad_norm": 2.375, + "learning_rate": 0.00015698515684478323, + "loss": 4.179, + "step": 179000 + }, + { + "epoch": 1.5414084022038568, + "grad_norm": 2.796875, + "learning_rate": 0.00015696289193588845, + "loss": 4.0854, + "step": 179050 + }, + { + "epoch": 1.5418388429752066, + "grad_norm": 1.5703125, + "learning_rate": 0.00015694062284603993, + "loss": 4.7211, + "step": 179100 + }, + { + "epoch": 1.5422692837465566, + "grad_norm": 2.46875, + "learning_rate": 0.00015691834957687223, + "loss": 4.9799, + "step": 179150 + }, + { + "epoch": 1.5426997245179064, + "grad_norm": 2.796875, + "learning_rate": 0.00015689607213002015, + "loss": 4.777, + "step": 179200 + }, + { + "epoch": 1.5431301652892562, + "grad_norm": 3.921875, + "learning_rate": 0.00015687379050711878, + "loss": 4.6938, + "step": 179250 + }, + { + "epoch": 1.543560606060606, + "grad_norm": 0.859375, + "learning_rate": 0.00015685150470980355, + "loss": 4.4715, + "step": 179300 + }, + { + "epoch": 1.5439910468319558, + "grad_norm": 1.5390625, + "learning_rate": 0.00015682921473971018, + "loss": 4.6943, + "step": 179350 + }, + { + "epoch": 1.5444214876033058, + "grad_norm": 2.515625, + "learning_rate": 0.0001568069205984747, + "loss": 4.0785, + "step": 179400 + }, + { + "epoch": 1.5448519283746558, + "grad_norm": 2.265625, + "learning_rate": 0.00015678462228773353, + "loss": 4.8702, + "step": 179450 + }, + { + "epoch": 1.5452823691460056, + "grad_norm": 3.09375, + "learning_rate": 0.0001567623198091232, + "loss": 4.5307, + "step": 179500 + }, + { + "epoch": 1.5457128099173554, + "grad_norm": 2.609375, + "learning_rate": 0.00015674001316428075, + "loss": 4.3296, + "step": 179550 + }, + { + "epoch": 1.5461432506887052, + "grad_norm": 2.109375, + "learning_rate": 0.0001567177023548434, + "loss": 4.3114, + "step": 179600 + }, + { + "epoch": 1.546573691460055, + "grad_norm": 3.21875, + "learning_rate": 0.00015669538738244875, + "loss": 4.4844, + "step": 179650 + }, + { + "epoch": 1.547004132231405, + "grad_norm": 3.53125, + "learning_rate": 0.00015667306824873465, + "loss": 4.9129, + "step": 179700 + }, + { + "epoch": 1.5474345730027548, + "grad_norm": 3.46875, + "learning_rate": 0.00015665074495533926, + "loss": 4.4515, + "step": 179750 + }, + { + "epoch": 1.5478650137741048, + "grad_norm": 2.796875, + "learning_rate": 0.0001566284175039011, + "loss": 4.6278, + "step": 179800 + }, + { + "epoch": 1.5482954545454546, + "grad_norm": 1.3515625, + "learning_rate": 0.00015660608589605893, + "loss": 4.3313, + "step": 179850 + }, + { + "epoch": 1.5487258953168044, + "grad_norm": 2.703125, + "learning_rate": 0.00015658375013345182, + "loss": 4.4498, + "step": 179900 + }, + { + "epoch": 1.5491563360881542, + "grad_norm": 3.140625, + "learning_rate": 0.0001565614102177192, + "loss": 4.3749, + "step": 179950 + }, + { + "epoch": 1.549586776859504, + "grad_norm": 3.453125, + "learning_rate": 0.00015653906615050077, + "loss": 4.1746, + "step": 180000 + }, + { + "epoch": 1.549586776859504, + "eval_loss": 5.1044020652771, + "eval_runtime": 21.9398, + "eval_samples_per_second": 29.171, + "eval_steps_per_second": 14.585, + "eval_tts_loss": 7.21328017280712, + "step": 180000 + }, + { + "epoch": 1.550017217630854, + "grad_norm": 2.296875, + "learning_rate": 0.00015651671793343654, + "loss": 4.3718, + "step": 180050 + }, + { + "epoch": 1.550447658402204, + "grad_norm": 2.734375, + "learning_rate": 0.0001564943655681668, + "loss": 4.4271, + "step": 180100 + }, + { + "epoch": 1.5508780991735538, + "grad_norm": 2.734375, + "learning_rate": 0.00015647200905633218, + "loss": 4.0395, + "step": 180150 + }, + { + "epoch": 1.5513085399449036, + "grad_norm": 2.078125, + "learning_rate": 0.0001564496483995736, + "loss": 4.6101, + "step": 180200 + }, + { + "epoch": 1.5517389807162534, + "grad_norm": 2.234375, + "learning_rate": 0.00015642728359953226, + "loss": 4.5181, + "step": 180250 + }, + { + "epoch": 1.5521694214876032, + "grad_norm": 2.59375, + "learning_rate": 0.00015640491465784977, + "loss": 4.7049, + "step": 180300 + }, + { + "epoch": 1.5525998622589532, + "grad_norm": 2.828125, + "learning_rate": 0.00015638254157616786, + "loss": 4.2113, + "step": 180350 + }, + { + "epoch": 1.553030303030303, + "grad_norm": 4.625, + "learning_rate": 0.00015636016435612872, + "loss": 4.4224, + "step": 180400 + }, + { + "epoch": 1.553460743801653, + "grad_norm": 1.921875, + "learning_rate": 0.00015633778299937475, + "loss": 4.4158, + "step": 180450 + }, + { + "epoch": 1.5538911845730028, + "grad_norm": 2.71875, + "learning_rate": 0.00015631539750754877, + "loss": 4.4066, + "step": 180500 + }, + { + "epoch": 1.5543216253443526, + "grad_norm": 1.34375, + "learning_rate": 0.00015629300788229378, + "loss": 4.6345, + "step": 180550 + }, + { + "epoch": 1.5547520661157024, + "grad_norm": 2.203125, + "learning_rate": 0.00015627061412525309, + "loss": 4.6628, + "step": 180600 + }, + { + "epoch": 1.5551825068870524, + "grad_norm": 2.703125, + "learning_rate": 0.00015624821623807042, + "loss": 4.3681, + "step": 180650 + }, + { + "epoch": 1.5556129476584022, + "grad_norm": 1.3125, + "learning_rate": 0.00015622581422238967, + "loss": 4.7182, + "step": 180700 + }, + { + "epoch": 1.5560433884297522, + "grad_norm": 5.125, + "learning_rate": 0.0001562034080798551, + "loss": 4.6946, + "step": 180750 + }, + { + "epoch": 1.556473829201102, + "grad_norm": 2.859375, + "learning_rate": 0.00015618099781211138, + "loss": 4.4063, + "step": 180800 + }, + { + "epoch": 1.5569042699724518, + "grad_norm": 3.234375, + "learning_rate": 0.00015615858342080325, + "loss": 4.4838, + "step": 180850 + }, + { + "epoch": 1.5573347107438016, + "grad_norm": 2.1875, + "learning_rate": 0.00015613616490757595, + "loss": 4.5482, + "step": 180900 + }, + { + "epoch": 1.5577651515151514, + "grad_norm": 1.625, + "learning_rate": 0.0001561137422740749, + "loss": 4.6375, + "step": 180950 + }, + { + "epoch": 1.5581955922865014, + "grad_norm": 3.734375, + "learning_rate": 0.00015609131552194594, + "loss": 4.6877, + "step": 181000 + }, + { + "epoch": 1.5586260330578512, + "grad_norm": 3.0, + "learning_rate": 0.00015606888465283507, + "loss": 4.3437, + "step": 181050 + }, + { + "epoch": 1.5590564738292012, + "grad_norm": 2.171875, + "learning_rate": 0.0001560464496683887, + "loss": 4.664, + "step": 181100 + }, + { + "epoch": 1.559486914600551, + "grad_norm": 3.25, + "learning_rate": 0.00015602401057025352, + "loss": 4.3147, + "step": 181150 + }, + { + "epoch": 1.5599173553719008, + "grad_norm": 2.75, + "learning_rate": 0.00015600156736007652, + "loss": 4.6034, + "step": 181200 + }, + { + "epoch": 1.5603477961432506, + "grad_norm": 1.2578125, + "learning_rate": 0.00015597912003950494, + "loss": 4.4191, + "step": 181250 + }, + { + "epoch": 1.5607782369146006, + "grad_norm": 2.6875, + "learning_rate": 0.0001559566686101864, + "loss": 4.4776, + "step": 181300 + }, + { + "epoch": 1.5612086776859504, + "grad_norm": 1.96875, + "learning_rate": 0.0001559342130737688, + "loss": 4.4152, + "step": 181350 + }, + { + "epoch": 1.5616391184573004, + "grad_norm": 2.015625, + "learning_rate": 0.00015591175343190026, + "loss": 4.5063, + "step": 181400 + }, + { + "epoch": 1.5620695592286502, + "grad_norm": 3.625, + "learning_rate": 0.00015588928968622936, + "loss": 4.3081, + "step": 181450 + }, + { + "epoch": 1.5625, + "grad_norm": 1.9453125, + "learning_rate": 0.0001558668218384048, + "loss": 4.6451, + "step": 181500 + }, + { + "epoch": 1.5629304407713498, + "grad_norm": 1.0703125, + "learning_rate": 0.00015584434989007577, + "loss": 4.6819, + "step": 181550 + }, + { + "epoch": 1.5633608815426996, + "grad_norm": 3.875, + "learning_rate": 0.00015582187384289157, + "loss": 4.5812, + "step": 181600 + }, + { + "epoch": 1.5637913223140496, + "grad_norm": 1.796875, + "learning_rate": 0.00015579939369850197, + "loss": 4.4379, + "step": 181650 + }, + { + "epoch": 1.5642217630853994, + "grad_norm": 2.609375, + "learning_rate": 0.00015577690945855688, + "loss": 4.6937, + "step": 181700 + }, + { + "epoch": 1.5646522038567494, + "grad_norm": 3.1875, + "learning_rate": 0.00015575442112470667, + "loss": 4.32, + "step": 181750 + }, + { + "epoch": 1.5650826446280992, + "grad_norm": 1.7421875, + "learning_rate": 0.00015573192869860192, + "loss": 4.0504, + "step": 181800 + }, + { + "epoch": 1.565513085399449, + "grad_norm": 1.203125, + "learning_rate": 0.00015570943218189349, + "loss": 4.3327, + "step": 181850 + }, + { + "epoch": 1.5659435261707988, + "grad_norm": 2.5625, + "learning_rate": 0.0001556869315762326, + "loss": 4.4529, + "step": 181900 + }, + { + "epoch": 1.5663739669421488, + "grad_norm": 3.21875, + "learning_rate": 0.00015566442688327078, + "loss": 4.5494, + "step": 181950 + }, + { + "epoch": 1.5668044077134986, + "grad_norm": 3.84375, + "learning_rate": 0.00015564191810465974, + "loss": 4.7399, + "step": 182000 + }, + { + "epoch": 1.5672348484848486, + "grad_norm": 3.15625, + "learning_rate": 0.00015561940524205166, + "loss": 4.5206, + "step": 182050 + }, + { + "epoch": 1.5676652892561984, + "grad_norm": 2.640625, + "learning_rate": 0.00015559688829709894, + "loss": 4.1723, + "step": 182100 + }, + { + "epoch": 1.5680957300275482, + "grad_norm": 2.5, + "learning_rate": 0.00015557436727145415, + "loss": 4.7992, + "step": 182150 + }, + { + "epoch": 1.568526170798898, + "grad_norm": 2.703125, + "learning_rate": 0.00015555184216677046, + "loss": 4.282, + "step": 182200 + }, + { + "epoch": 1.5689566115702478, + "grad_norm": 2.25, + "learning_rate": 0.00015552931298470102, + "loss": 4.5177, + "step": 182250 + }, + { + "epoch": 1.5693870523415978, + "grad_norm": 4.40625, + "learning_rate": 0.0001555067797268995, + "loss": 4.5946, + "step": 182300 + }, + { + "epoch": 1.5698174931129476, + "grad_norm": 2.828125, + "learning_rate": 0.0001554842423950198, + "loss": 4.8026, + "step": 182350 + }, + { + "epoch": 1.5702479338842976, + "grad_norm": 1.3359375, + "learning_rate": 0.00015546170099071604, + "loss": 4.9309, + "step": 182400 + }, + { + "epoch": 1.5706783746556474, + "grad_norm": 2.34375, + "learning_rate": 0.0001554391555156428, + "loss": 4.3014, + "step": 182450 + }, + { + "epoch": 1.5711088154269972, + "grad_norm": 2.6875, + "learning_rate": 0.00015541660597145478, + "loss": 4.5463, + "step": 182500 + }, + { + "epoch": 1.571539256198347, + "grad_norm": 3.546875, + "learning_rate": 0.00015539405235980713, + "loss": 4.2778, + "step": 182550 + }, + { + "epoch": 1.571969696969697, + "grad_norm": 4.8125, + "learning_rate": 0.00015537149468235523, + "loss": 4.1458, + "step": 182600 + }, + { + "epoch": 1.5724001377410468, + "grad_norm": 2.96875, + "learning_rate": 0.00015534893294075474, + "loss": 4.178, + "step": 182650 + }, + { + "epoch": 1.5728305785123968, + "grad_norm": 2.46875, + "learning_rate": 0.00015532636713666169, + "loss": 4.5607, + "step": 182700 + }, + { + "epoch": 1.5732610192837466, + "grad_norm": 2.4375, + "learning_rate": 0.0001553037972717323, + "loss": 4.3971, + "step": 182750 + }, + { + "epoch": 1.5736914600550964, + "grad_norm": 2.84375, + "learning_rate": 0.0001552812233476232, + "loss": 4.0441, + "step": 182800 + }, + { + "epoch": 1.5741219008264462, + "grad_norm": 6.8125, + "learning_rate": 0.00015525864536599122, + "loss": 4.2612, + "step": 182850 + }, + { + "epoch": 1.574552341597796, + "grad_norm": 2.75, + "learning_rate": 0.00015523606332849357, + "loss": 4.0635, + "step": 182900 + }, + { + "epoch": 1.574982782369146, + "grad_norm": 0.87109375, + "learning_rate": 0.00015521347723678773, + "loss": 4.5155, + "step": 182950 + }, + { + "epoch": 1.575413223140496, + "grad_norm": 2.0, + "learning_rate": 0.00015519088709253147, + "loss": 4.4464, + "step": 183000 + }, + { + "epoch": 1.575413223140496, + "eval_loss": 5.101893424987793, + "eval_runtime": 21.716, + "eval_samples_per_second": 29.471, + "eval_steps_per_second": 14.736, + "eval_tts_loss": 7.200182005268934, + "step": 183000 + }, + { + "epoch": 1.5758436639118458, + "grad_norm": 2.8125, + "learning_rate": 0.00015516829289738282, + "loss": 4.3117, + "step": 183050 + }, + { + "epoch": 1.5762741046831956, + "grad_norm": 1.34375, + "learning_rate": 0.00015514569465300017, + "loss": 4.495, + "step": 183100 + }, + { + "epoch": 1.5767045454545454, + "grad_norm": 2.53125, + "learning_rate": 0.0001551230923610422, + "loss": 4.8702, + "step": 183150 + }, + { + "epoch": 1.5771349862258952, + "grad_norm": 1.9140625, + "learning_rate": 0.0001551004860231678, + "loss": 4.6704, + "step": 183200 + }, + { + "epoch": 1.5775654269972452, + "grad_norm": 1.6640625, + "learning_rate": 0.00015507787564103632, + "loss": 4.45, + "step": 183250 + }, + { + "epoch": 1.577995867768595, + "grad_norm": 2.671875, + "learning_rate": 0.00015505526121630726, + "loss": 4.448, + "step": 183300 + }, + { + "epoch": 1.578426308539945, + "grad_norm": 2.75, + "learning_rate": 0.00015503264275064052, + "loss": 4.4778, + "step": 183350 + }, + { + "epoch": 1.5788567493112948, + "grad_norm": 2.515625, + "learning_rate": 0.00015501002024569615, + "loss": 4.317, + "step": 183400 + }, + { + "epoch": 1.5792871900826446, + "grad_norm": 2.84375, + "learning_rate": 0.00015498739370313467, + "loss": 4.7482, + "step": 183450 + }, + { + "epoch": 1.5797176308539944, + "grad_norm": 1.859375, + "learning_rate": 0.0001549647631246168, + "loss": 4.6417, + "step": 183500 + }, + { + "epoch": 1.5801480716253442, + "grad_norm": 4.46875, + "learning_rate": 0.00015494212851180358, + "loss": 4.3391, + "step": 183550 + }, + { + "epoch": 1.5805785123966942, + "grad_norm": 3.390625, + "learning_rate": 0.00015491948986635635, + "loss": 4.28, + "step": 183600 + }, + { + "epoch": 1.5810089531680442, + "grad_norm": 1.3984375, + "learning_rate": 0.0001548968471899367, + "loss": 4.6358, + "step": 183650 + }, + { + "epoch": 1.581439393939394, + "grad_norm": 4.0625, + "learning_rate": 0.00015487420048420657, + "loss": 4.6617, + "step": 183700 + }, + { + "epoch": 1.5818698347107438, + "grad_norm": 2.859375, + "learning_rate": 0.00015485154975082826, + "loss": 4.4271, + "step": 183750 + }, + { + "epoch": 1.5823002754820936, + "grad_norm": 3.03125, + "learning_rate": 0.00015482889499146416, + "loss": 4.4431, + "step": 183800 + }, + { + "epoch": 1.5827307162534434, + "grad_norm": 2.59375, + "learning_rate": 0.00015480623620777711, + "loss": 4.7784, + "step": 183850 + }, + { + "epoch": 1.5831611570247934, + "grad_norm": 3.875, + "learning_rate": 0.0001547835734014303, + "loss": 4.4549, + "step": 183900 + }, + { + "epoch": 1.5835915977961432, + "grad_norm": 0.98046875, + "learning_rate": 0.00015476090657408703, + "loss": 4.5275, + "step": 183950 + }, + { + "epoch": 1.5840220385674932, + "grad_norm": 1.46875, + "learning_rate": 0.00015473823572741108, + "loss": 4.622, + "step": 184000 + }, + { + "epoch": 1.584452479338843, + "grad_norm": 2.640625, + "learning_rate": 0.00015471556086306637, + "loss": 4.5452, + "step": 184050 + }, + { + "epoch": 1.5848829201101928, + "grad_norm": 2.734375, + "learning_rate": 0.0001546928819827172, + "loss": 4.5012, + "step": 184100 + }, + { + "epoch": 1.5853133608815426, + "grad_norm": 1.6171875, + "learning_rate": 0.0001546701990880282, + "loss": 4.6538, + "step": 184150 + }, + { + "epoch": 1.5857438016528924, + "grad_norm": 2.953125, + "learning_rate": 0.0001546475121806642, + "loss": 4.3889, + "step": 184200 + }, + { + "epoch": 1.5861742424242424, + "grad_norm": 1.3828125, + "learning_rate": 0.00015462482126229042, + "loss": 4.3978, + "step": 184250 + }, + { + "epoch": 1.5866046831955924, + "grad_norm": 2.109375, + "learning_rate": 0.00015460212633457222, + "loss": 4.4821, + "step": 184300 + }, + { + "epoch": 1.5870351239669422, + "grad_norm": 2.6875, + "learning_rate": 0.00015457942739917547, + "loss": 4.4233, + "step": 184350 + }, + { + "epoch": 1.587465564738292, + "grad_norm": 6.28125, + "learning_rate": 0.00015455672445776616, + "loss": 4.6526, + "step": 184400 + }, + { + "epoch": 1.5878960055096418, + "grad_norm": 4.84375, + "learning_rate": 0.00015453401751201068, + "loss": 4.3834, + "step": 184450 + }, + { + "epoch": 1.5883264462809916, + "grad_norm": 2.125, + "learning_rate": 0.00015451130656357563, + "loss": 4.5531, + "step": 184500 + }, + { + "epoch": 1.5887568870523416, + "grad_norm": 5.03125, + "learning_rate": 0.00015448859161412796, + "loss": 4.6788, + "step": 184550 + }, + { + "epoch": 1.5891873278236914, + "grad_norm": 1.96875, + "learning_rate": 0.0001544658726653349, + "loss": 4.2766, + "step": 184600 + }, + { + "epoch": 1.5896177685950414, + "grad_norm": 2.609375, + "learning_rate": 0.00015444314971886397, + "loss": 4.7039, + "step": 184650 + }, + { + "epoch": 1.5900482093663912, + "grad_norm": 2.5625, + "learning_rate": 0.000154420422776383, + "loss": 4.6241, + "step": 184700 + }, + { + "epoch": 1.590478650137741, + "grad_norm": 2.421875, + "learning_rate": 0.00015439769183956008, + "loss": 4.4409, + "step": 184750 + }, + { + "epoch": 1.5909090909090908, + "grad_norm": 3.328125, + "learning_rate": 0.00015437495691006362, + "loss": 4.2831, + "step": 184800 + }, + { + "epoch": 1.5913395316804406, + "grad_norm": 3.1875, + "learning_rate": 0.00015435221798956232, + "loss": 4.5333, + "step": 184850 + }, + { + "epoch": 1.5917699724517906, + "grad_norm": 1.796875, + "learning_rate": 0.00015432947507972516, + "loss": 4.4133, + "step": 184900 + }, + { + "epoch": 1.5922004132231407, + "grad_norm": 3.765625, + "learning_rate": 0.00015430672818222144, + "loss": 4.6597, + "step": 184950 + }, + { + "epoch": 1.5926308539944904, + "grad_norm": 2.984375, + "learning_rate": 0.00015428397729872067, + "loss": 4.7734, + "step": 185000 + }, + { + "epoch": 1.5930612947658402, + "grad_norm": 3.5625, + "learning_rate": 0.00015426122243089282, + "loss": 4.5071, + "step": 185050 + }, + { + "epoch": 1.59349173553719, + "grad_norm": 3.4375, + "learning_rate": 0.00015423846358040793, + "loss": 4.7066, + "step": 185100 + }, + { + "epoch": 1.5939221763085398, + "grad_norm": 3.546875, + "learning_rate": 0.00015421570074893655, + "loss": 4.4004, + "step": 185150 + }, + { + "epoch": 1.5943526170798898, + "grad_norm": 2.109375, + "learning_rate": 0.00015419293393814935, + "loss": 4.5699, + "step": 185200 + }, + { + "epoch": 1.5947830578512396, + "grad_norm": 1.59375, + "learning_rate": 0.00015417016314971742, + "loss": 4.4724, + "step": 185250 + }, + { + "epoch": 1.5952134986225897, + "grad_norm": 2.609375, + "learning_rate": 0.00015414738838531207, + "loss": 4.2948, + "step": 185300 + }, + { + "epoch": 1.5956439393939394, + "grad_norm": 1.265625, + "learning_rate": 0.00015412460964660488, + "loss": 4.5494, + "step": 185350 + }, + { + "epoch": 1.5960743801652892, + "grad_norm": 0.86328125, + "learning_rate": 0.00015410182693526781, + "loss": 4.0839, + "step": 185400 + }, + { + "epoch": 1.596504820936639, + "grad_norm": 2.21875, + "learning_rate": 0.00015407904025297307, + "loss": 4.5799, + "step": 185450 + }, + { + "epoch": 1.5969352617079888, + "grad_norm": 4.84375, + "learning_rate": 0.00015405624960139312, + "loss": 4.5788, + "step": 185500 + }, + { + "epoch": 1.5973657024793388, + "grad_norm": 2.6875, + "learning_rate": 0.00015403345498220077, + "loss": 4.4688, + "step": 185550 + }, + { + "epoch": 1.5977961432506889, + "grad_norm": 2.703125, + "learning_rate": 0.00015401065639706905, + "loss": 4.4344, + "step": 185600 + }, + { + "epoch": 1.5982265840220387, + "grad_norm": 3.484375, + "learning_rate": 0.00015398785384767135, + "loss": 4.6916, + "step": 185650 + }, + { + "epoch": 1.5986570247933884, + "grad_norm": 2.96875, + "learning_rate": 0.00015396504733568135, + "loss": 4.6307, + "step": 185700 + }, + { + "epoch": 1.5990874655647382, + "grad_norm": 2.859375, + "learning_rate": 0.00015394223686277298, + "loss": 4.7216, + "step": 185750 + }, + { + "epoch": 1.599517906336088, + "grad_norm": 3.9375, + "learning_rate": 0.00015391942243062051, + "loss": 4.4658, + "step": 185800 + }, + { + "epoch": 1.599948347107438, + "grad_norm": 3.203125, + "learning_rate": 0.0001538966040408984, + "loss": 4.5408, + "step": 185850 + }, + { + "epoch": 1.6003787878787878, + "grad_norm": 3.453125, + "learning_rate": 0.00015387378169528154, + "loss": 4.1321, + "step": 185900 + }, + { + "epoch": 1.6008092286501379, + "grad_norm": 3.921875, + "learning_rate": 0.00015385095539544502, + "loss": 4.4429, + "step": 185950 + }, + { + "epoch": 1.6012396694214877, + "grad_norm": 5.28125, + "learning_rate": 0.00015382812514306424, + "loss": 4.4706, + "step": 186000 + }, + { + "epoch": 1.6012396694214877, + "eval_loss": 5.098446846008301, + "eval_runtime": 22.2285, + "eval_samples_per_second": 28.792, + "eval_steps_per_second": 14.396, + "eval_tts_loss": 7.176906156351496, + "step": 186000 + }, + { + "epoch": 1.6016701101928374, + "grad_norm": 3.234375, + "learning_rate": 0.00015380529093981488, + "loss": 4.5768, + "step": 186050 + }, + { + "epoch": 1.6021005509641872, + "grad_norm": 2.46875, + "learning_rate": 0.00015378245278737293, + "loss": 4.2978, + "step": 186100 + }, + { + "epoch": 1.602530991735537, + "grad_norm": 2.421875, + "learning_rate": 0.00015375961068741468, + "loss": 4.6696, + "step": 186150 + }, + { + "epoch": 1.602961432506887, + "grad_norm": 2.015625, + "learning_rate": 0.00015373676464161667, + "loss": 4.2091, + "step": 186200 + }, + { + "epoch": 1.603391873278237, + "grad_norm": 1.859375, + "learning_rate": 0.00015371391465165572, + "loss": 4.1663, + "step": 186250 + }, + { + "epoch": 1.6038223140495869, + "grad_norm": 2.125, + "learning_rate": 0.00015369106071920903, + "loss": 4.6291, + "step": 186300 + }, + { + "epoch": 1.6042527548209367, + "grad_norm": 1.8515625, + "learning_rate": 0.000153668202845954, + "loss": 3.8614, + "step": 186350 + }, + { + "epoch": 1.6046831955922864, + "grad_norm": 2.140625, + "learning_rate": 0.00015364534103356833, + "loss": 4.5498, + "step": 186400 + }, + { + "epoch": 1.6051136363636362, + "grad_norm": 4.09375, + "learning_rate": 0.00015362247528373008, + "loss": 4.4633, + "step": 186450 + }, + { + "epoch": 1.6055440771349863, + "grad_norm": 3.546875, + "learning_rate": 0.00015359960559811747, + "loss": 4.5751, + "step": 186500 + }, + { + "epoch": 1.605974517906336, + "grad_norm": 2.84375, + "learning_rate": 0.00015357673197840917, + "loss": 4.6192, + "step": 186550 + }, + { + "epoch": 1.606404958677686, + "grad_norm": 4.21875, + "learning_rate": 0.000153553854426284, + "loss": 4.9957, + "step": 186600 + }, + { + "epoch": 1.6068353994490359, + "grad_norm": 2.5625, + "learning_rate": 0.00015353097294342117, + "loss": 4.5131, + "step": 186650 + }, + { + "epoch": 1.6072658402203857, + "grad_norm": 0.99609375, + "learning_rate": 0.00015350808753150007, + "loss": 4.3742, + "step": 186700 + }, + { + "epoch": 1.6076962809917354, + "grad_norm": 2.9375, + "learning_rate": 0.00015348519819220047, + "loss": 4.55, + "step": 186750 + }, + { + "epoch": 1.6081267217630852, + "grad_norm": 2.40625, + "learning_rate": 0.00015346230492720243, + "loss": 4.6985, + "step": 186800 + }, + { + "epoch": 1.6085571625344353, + "grad_norm": 4.25, + "learning_rate": 0.00015343940773818618, + "loss": 4.6359, + "step": 186850 + }, + { + "epoch": 1.6089876033057853, + "grad_norm": 2.34375, + "learning_rate": 0.00015341650662683241, + "loss": 4.5115, + "step": 186900 + }, + { + "epoch": 1.609418044077135, + "grad_norm": 0.9375, + "learning_rate": 0.000153393601594822, + "loss": 3.9881, + "step": 186950 + }, + { + "epoch": 1.6098484848484849, + "grad_norm": 2.671875, + "learning_rate": 0.0001533706926438361, + "loss": 4.2635, + "step": 187000 + }, + { + "epoch": 1.6102789256198347, + "grad_norm": 2.890625, + "learning_rate": 0.00015334777977555616, + "loss": 4.1501, + "step": 187050 + }, + { + "epoch": 1.6107093663911844, + "grad_norm": 1.609375, + "learning_rate": 0.000153324862991664, + "loss": 4.1152, + "step": 187100 + }, + { + "epoch": 1.6111398071625345, + "grad_norm": 2.390625, + "learning_rate": 0.00015330194229384162, + "loss": 3.8501, + "step": 187150 + }, + { + "epoch": 1.6115702479338843, + "grad_norm": 3.546875, + "learning_rate": 0.00015327901768377137, + "loss": 4.6216, + "step": 187200 + }, + { + "epoch": 1.6120006887052343, + "grad_norm": 2.59375, + "learning_rate": 0.00015325608916313583, + "loss": 4.8409, + "step": 187250 + }, + { + "epoch": 1.612431129476584, + "grad_norm": 3.09375, + "learning_rate": 0.00015323315673361796, + "loss": 4.0541, + "step": 187300 + }, + { + "epoch": 1.6128615702479339, + "grad_norm": 3.421875, + "learning_rate": 0.0001532102203969009, + "loss": 4.5262, + "step": 187350 + }, + { + "epoch": 1.6132920110192837, + "grad_norm": 2.125, + "learning_rate": 0.00015318728015466815, + "loss": 4.6309, + "step": 187400 + }, + { + "epoch": 1.6137224517906334, + "grad_norm": 2.875, + "learning_rate": 0.00015316433600860347, + "loss": 4.4653, + "step": 187450 + }, + { + "epoch": 1.6141528925619835, + "grad_norm": 2.59375, + "learning_rate": 0.00015314138796039092, + "loss": 4.3707, + "step": 187500 + }, + { + "epoch": 1.6145833333333335, + "grad_norm": 3.546875, + "learning_rate": 0.00015311843601171482, + "loss": 4.5553, + "step": 187550 + }, + { + "epoch": 1.6150137741046833, + "grad_norm": 3.375, + "learning_rate": 0.00015309548016425982, + "loss": 4.2366, + "step": 187600 + }, + { + "epoch": 1.615444214876033, + "grad_norm": 2.953125, + "learning_rate": 0.0001530725204197108, + "loss": 4.2739, + "step": 187650 + }, + { + "epoch": 1.6158746556473829, + "grad_norm": 2.6875, + "learning_rate": 0.00015304955677975296, + "loss": 4.6743, + "step": 187700 + }, + { + "epoch": 1.6163050964187327, + "grad_norm": 3.1875, + "learning_rate": 0.0001530265892460718, + "loss": 4.9168, + "step": 187750 + }, + { + "epoch": 1.6167355371900827, + "grad_norm": 1.5234375, + "learning_rate": 0.00015300361782035312, + "loss": 4.8937, + "step": 187800 + }, + { + "epoch": 1.6171659779614325, + "grad_norm": 3.171875, + "learning_rate": 0.00015298064250428287, + "loss": 4.6395, + "step": 187850 + }, + { + "epoch": 1.6175964187327825, + "grad_norm": 5.375, + "learning_rate": 0.00015295766329954748, + "loss": 4.5475, + "step": 187900 + }, + { + "epoch": 1.6180268595041323, + "grad_norm": 2.171875, + "learning_rate": 0.00015293468020783352, + "loss": 4.2386, + "step": 187950 + }, + { + "epoch": 1.618457300275482, + "grad_norm": 3.640625, + "learning_rate": 0.00015291169323082798, + "loss": 3.9456, + "step": 188000 + }, + { + "epoch": 1.6188877410468319, + "grad_norm": 1.3828125, + "learning_rate": 0.00015288870237021795, + "loss": 4.2349, + "step": 188050 + }, + { + "epoch": 1.6193181818181817, + "grad_norm": 1.890625, + "learning_rate": 0.00015286570762769098, + "loss": 4.4596, + "step": 188100 + }, + { + "epoch": 1.6197486225895317, + "grad_norm": 3.609375, + "learning_rate": 0.0001528427090049348, + "loss": 4.2438, + "step": 188150 + }, + { + "epoch": 1.6201790633608817, + "grad_norm": 1.28125, + "learning_rate": 0.00015281970650363747, + "loss": 4.5402, + "step": 188200 + }, + { + "epoch": 1.6206095041322315, + "grad_norm": 2.84375, + "learning_rate": 0.00015279670012548734, + "loss": 4.7755, + "step": 188250 + }, + { + "epoch": 1.6210399449035813, + "grad_norm": 3.953125, + "learning_rate": 0.00015277368987217301, + "loss": 4.5909, + "step": 188300 + }, + { + "epoch": 1.621470385674931, + "grad_norm": 1.0859375, + "learning_rate": 0.00015275067574538337, + "loss": 4.1169, + "step": 188350 + }, + { + "epoch": 1.6219008264462809, + "grad_norm": 2.734375, + "learning_rate": 0.00015272765774680767, + "loss": 4.47, + "step": 188400 + }, + { + "epoch": 1.6223312672176309, + "grad_norm": 2.4375, + "learning_rate": 0.0001527046358781353, + "loss": 4.5197, + "step": 188450 + }, + { + "epoch": 1.6227617079889807, + "grad_norm": 0.875, + "learning_rate": 0.0001526816101410561, + "loss": 4.3642, + "step": 188500 + }, + { + "epoch": 1.6231921487603307, + "grad_norm": 3.203125, + "learning_rate": 0.00015265858053726002, + "loss": 4.4812, + "step": 188550 + }, + { + "epoch": 1.6236225895316805, + "grad_norm": 2.625, + "learning_rate": 0.00015263554706843743, + "loss": 4.1564, + "step": 188600 + }, + { + "epoch": 1.6240530303030303, + "grad_norm": 2.90625, + "learning_rate": 0.00015261250973627895, + "loss": 4.4735, + "step": 188650 + }, + { + "epoch": 1.62448347107438, + "grad_norm": 3.90625, + "learning_rate": 0.00015258946854247545, + "loss": 4.3295, + "step": 188700 + }, + { + "epoch": 1.6249139118457299, + "grad_norm": 2.078125, + "learning_rate": 0.00015256642348871814, + "loss": 4.4475, + "step": 188750 + }, + { + "epoch": 1.6253443526170799, + "grad_norm": 2.40625, + "learning_rate": 0.00015254337457669837, + "loss": 4.5497, + "step": 188800 + }, + { + "epoch": 1.62577479338843, + "grad_norm": 2.765625, + "learning_rate": 0.000152520321808108, + "loss": 4.4441, + "step": 188850 + }, + { + "epoch": 1.6262052341597797, + "grad_norm": 4.4375, + "learning_rate": 0.00015249726518463903, + "loss": 4.3371, + "step": 188900 + }, + { + "epoch": 1.6266356749311295, + "grad_norm": 2.359375, + "learning_rate": 0.00015247420470798368, + "loss": 4.3851, + "step": 188950 + }, + { + "epoch": 1.6270661157024793, + "grad_norm": 3.046875, + "learning_rate": 0.0001524511403798347, + "loss": 4.4025, + "step": 189000 + }, + { + "epoch": 1.6270661157024793, + "eval_loss": 5.09128475189209, + "eval_runtime": 21.7228, + "eval_samples_per_second": 29.462, + "eval_steps_per_second": 14.731, + "eval_tts_loss": 7.2094475086891014, + "step": 189000 + }, + { + "epoch": 1.627496556473829, + "grad_norm": 4.90625, + "learning_rate": 0.0001524280722018848, + "loss": 4.5485, + "step": 189050 + }, + { + "epoch": 1.627926997245179, + "grad_norm": 2.640625, + "learning_rate": 0.0001524050001758272, + "loss": 4.8776, + "step": 189100 + }, + { + "epoch": 1.6283574380165289, + "grad_norm": 2.53125, + "learning_rate": 0.00015238192430335536, + "loss": 4.4951, + "step": 189150 + }, + { + "epoch": 1.628787878787879, + "grad_norm": 4.625, + "learning_rate": 0.00015235884458616298, + "loss": 4.064, + "step": 189200 + }, + { + "epoch": 1.6292183195592287, + "grad_norm": 2.890625, + "learning_rate": 0.00015233576102594406, + "loss": 4.4201, + "step": 189250 + }, + { + "epoch": 1.6296487603305785, + "grad_norm": 3.328125, + "learning_rate": 0.00015231267362439285, + "loss": 4.2791, + "step": 189300 + }, + { + "epoch": 1.6300792011019283, + "grad_norm": 2.546875, + "learning_rate": 0.000152289582383204, + "loss": 4.7889, + "step": 189350 + }, + { + "epoch": 1.630509641873278, + "grad_norm": 2.5625, + "learning_rate": 0.0001522664873040723, + "loss": 4.3743, + "step": 189400 + }, + { + "epoch": 1.630940082644628, + "grad_norm": 2.484375, + "learning_rate": 0.00015224338838869286, + "loss": 4.4437, + "step": 189450 + }, + { + "epoch": 1.631370523415978, + "grad_norm": 2.53125, + "learning_rate": 0.00015222028563876113, + "loss": 4.7092, + "step": 189500 + }, + { + "epoch": 1.631800964187328, + "grad_norm": 2.609375, + "learning_rate": 0.00015219717905597281, + "loss": 4.5256, + "step": 189550 + }, + { + "epoch": 1.6322314049586777, + "grad_norm": 5.09375, + "learning_rate": 0.00015217406864202382, + "loss": 4.5503, + "step": 189600 + }, + { + "epoch": 1.6326618457300275, + "grad_norm": 3.21875, + "learning_rate": 0.00015215095439861047, + "loss": 4.4078, + "step": 189650 + }, + { + "epoch": 1.6330922865013773, + "grad_norm": 2.640625, + "learning_rate": 0.00015212783632742927, + "loss": 4.8465, + "step": 189700 + }, + { + "epoch": 1.6335227272727273, + "grad_norm": 2.625, + "learning_rate": 0.000152104714430177, + "loss": 4.704, + "step": 189750 + }, + { + "epoch": 1.633953168044077, + "grad_norm": 2.484375, + "learning_rate": 0.00015208158870855086, + "loss": 4.7097, + "step": 189800 + }, + { + "epoch": 1.634383608815427, + "grad_norm": 2.59375, + "learning_rate": 0.00015205845916424813, + "loss": 4.1001, + "step": 189850 + }, + { + "epoch": 1.634814049586777, + "grad_norm": 2.4375, + "learning_rate": 0.0001520353257989665, + "loss": 4.47, + "step": 189900 + }, + { + "epoch": 1.6352444903581267, + "grad_norm": 1.6171875, + "learning_rate": 0.00015201218861440392, + "loss": 4.299, + "step": 189950 + }, + { + "epoch": 1.6356749311294765, + "grad_norm": 2.453125, + "learning_rate": 0.00015198904761225856, + "loss": 4.4968, + "step": 190000 + }, + { + "epoch": 1.6361053719008265, + "grad_norm": 2.8125, + "learning_rate": 0.00015196590279422905, + "loss": 4.8038, + "step": 190050 + }, + { + "epoch": 1.6365358126721763, + "grad_norm": 3.53125, + "learning_rate": 0.00015194275416201402, + "loss": 4.4972, + "step": 190100 + }, + { + "epoch": 1.6369662534435263, + "grad_norm": 3.671875, + "learning_rate": 0.00015191960171731262, + "loss": 4.456, + "step": 190150 + }, + { + "epoch": 1.637396694214876, + "grad_norm": 1.7890625, + "learning_rate": 0.00015189644546182416, + "loss": 4.8478, + "step": 190200 + }, + { + "epoch": 1.637827134986226, + "grad_norm": 2.78125, + "learning_rate": 0.00015187328539724822, + "loss": 4.5881, + "step": 190250 + }, + { + "epoch": 1.6382575757575757, + "grad_norm": 2.625, + "learning_rate": 0.0001518501215252848, + "loss": 4.3995, + "step": 190300 + }, + { + "epoch": 1.6386880165289255, + "grad_norm": 2.046875, + "learning_rate": 0.00015182695384763397, + "loss": 4.3681, + "step": 190350 + }, + { + "epoch": 1.6391184573002755, + "grad_norm": 2.828125, + "learning_rate": 0.00015180378236599626, + "loss": 3.9152, + "step": 190400 + }, + { + "epoch": 1.6395488980716253, + "grad_norm": 1.9375, + "learning_rate": 0.00015178060708207237, + "loss": 4.5084, + "step": 190450 + }, + { + "epoch": 1.6399793388429753, + "grad_norm": 3.875, + "learning_rate": 0.00015175742799756337, + "loss": 4.7896, + "step": 190500 + }, + { + "epoch": 1.640409779614325, + "grad_norm": 4.09375, + "learning_rate": 0.0001517342451141705, + "loss": 4.5116, + "step": 190550 + }, + { + "epoch": 1.640840220385675, + "grad_norm": 1.765625, + "learning_rate": 0.00015171105843359532, + "loss": 4.305, + "step": 190600 + }, + { + "epoch": 1.6412706611570247, + "grad_norm": 2.34375, + "learning_rate": 0.0001516878679575397, + "loss": 4.2678, + "step": 190650 + }, + { + "epoch": 1.6417011019283747, + "grad_norm": 3.140625, + "learning_rate": 0.0001516646736877058, + "loss": 4.5873, + "step": 190700 + }, + { + "epoch": 1.6421315426997245, + "grad_norm": 3.109375, + "learning_rate": 0.00015164147562579604, + "loss": 4.5, + "step": 190750 + }, + { + "epoch": 1.6425619834710745, + "grad_norm": 2.984375, + "learning_rate": 0.00015161827377351304, + "loss": 4.5827, + "step": 190800 + }, + { + "epoch": 1.6429924242424243, + "grad_norm": 2.234375, + "learning_rate": 0.00015159506813255982, + "loss": 4.2364, + "step": 190850 + }, + { + "epoch": 1.643422865013774, + "grad_norm": 2.828125, + "learning_rate": 0.00015157185870463962, + "loss": 4.5109, + "step": 190900 + }, + { + "epoch": 1.643853305785124, + "grad_norm": 1.3125, + "learning_rate": 0.0001515486454914559, + "loss": 4.3653, + "step": 190950 + }, + { + "epoch": 1.6442837465564737, + "grad_norm": 1.3984375, + "learning_rate": 0.00015152542849471257, + "loss": 4.5856, + "step": 191000 + }, + { + "epoch": 1.6447141873278237, + "grad_norm": 1.9140625, + "learning_rate": 0.00015150220771611362, + "loss": 4.163, + "step": 191050 + }, + { + "epoch": 1.6451446280991735, + "grad_norm": 2.609375, + "learning_rate": 0.00015147898315736344, + "loss": 4.4394, + "step": 191100 + }, + { + "epoch": 1.6455750688705235, + "grad_norm": 2.796875, + "learning_rate": 0.00015145575482016665, + "loss": 4.8728, + "step": 191150 + }, + { + "epoch": 1.6460055096418733, + "grad_norm": 1.78125, + "learning_rate": 0.0001514325227062282, + "loss": 4.4098, + "step": 191200 + }, + { + "epoch": 1.646435950413223, + "grad_norm": 1.9140625, + "learning_rate": 0.0001514092868172532, + "loss": 4.7922, + "step": 191250 + }, + { + "epoch": 1.646866391184573, + "grad_norm": 2.53125, + "learning_rate": 0.00015138604715494715, + "loss": 4.5203, + "step": 191300 + }, + { + "epoch": 1.647296831955923, + "grad_norm": 1.640625, + "learning_rate": 0.00015136280372101584, + "loss": 4.603, + "step": 191350 + }, + { + "epoch": 1.6477272727272727, + "grad_norm": 3.40625, + "learning_rate": 0.00015133955651716523, + "loss": 4.2557, + "step": 191400 + }, + { + "epoch": 1.6481577134986227, + "grad_norm": 2.140625, + "learning_rate": 0.00015131630554510164, + "loss": 4.4931, + "step": 191450 + }, + { + "epoch": 1.6485881542699725, + "grad_norm": 3.4375, + "learning_rate": 0.0001512930508065316, + "loss": 4.4463, + "step": 191500 + }, + { + "epoch": 1.6490185950413223, + "grad_norm": 1.9140625, + "learning_rate": 0.00015126979230316203, + "loss": 4.1648, + "step": 191550 + }, + { + "epoch": 1.649449035812672, + "grad_norm": 3.703125, + "learning_rate": 0.0001512465300367, + "loss": 4.6763, + "step": 191600 + }, + { + "epoch": 1.649879476584022, + "grad_norm": 2.8125, + "learning_rate": 0.0001512232640088529, + "loss": 4.342, + "step": 191650 + }, + { + "epoch": 1.650309917355372, + "grad_norm": 3.75, + "learning_rate": 0.00015119999422132848, + "loss": 4.6833, + "step": 191700 + }, + { + "epoch": 1.650740358126722, + "grad_norm": 3.609375, + "learning_rate": 0.0001511767206758346, + "loss": 4.6345, + "step": 191750 + }, + { + "epoch": 1.6511707988980717, + "grad_norm": 3.0625, + "learning_rate": 0.00015115344337407954, + "loss": 4.7899, + "step": 191800 + }, + { + "epoch": 1.6516012396694215, + "grad_norm": 2.859375, + "learning_rate": 0.00015113016231777182, + "loss": 4.3758, + "step": 191850 + }, + { + "epoch": 1.6520316804407713, + "grad_norm": 1.671875, + "learning_rate": 0.00015110687750862018, + "loss": 4.597, + "step": 191900 + }, + { + "epoch": 1.652462121212121, + "grad_norm": 3.140625, + "learning_rate": 0.0001510835889483337, + "loss": 4.2432, + "step": 191950 + }, + { + "epoch": 1.6528925619834711, + "grad_norm": 4.46875, + "learning_rate": 0.0001510602966386217, + "loss": 4.4862, + "step": 192000 + }, + { + "epoch": 1.6528925619834711, + "eval_loss": 5.084532260894775, + "eval_runtime": 21.8396, + "eval_samples_per_second": 29.305, + "eval_steps_per_second": 14.652, + "eval_tts_loss": 7.2402833804910385, + "step": 192000 + }, + { + "epoch": 1.653323002754821, + "grad_norm": 3.40625, + "learning_rate": 0.00015103700058119375, + "loss": 4.6847, + "step": 192050 + }, + { + "epoch": 1.653753443526171, + "grad_norm": 2.359375, + "learning_rate": 0.0001510137007777598, + "loss": 4.4774, + "step": 192100 + }, + { + "epoch": 1.6541838842975207, + "grad_norm": 3.671875, + "learning_rate": 0.00015099039723002997, + "loss": 4.2566, + "step": 192150 + }, + { + "epoch": 1.6546143250688705, + "grad_norm": 2.4375, + "learning_rate": 0.00015096708993971467, + "loss": 4.6564, + "step": 192200 + }, + { + "epoch": 1.6550447658402203, + "grad_norm": 3.078125, + "learning_rate": 0.00015094377890852466, + "loss": 4.8912, + "step": 192250 + }, + { + "epoch": 1.65547520661157, + "grad_norm": 2.8125, + "learning_rate": 0.00015092046413817087, + "loss": 4.1929, + "step": 192300 + }, + { + "epoch": 1.6559056473829201, + "grad_norm": 1.1328125, + "learning_rate": 0.00015089714563036458, + "loss": 4.3709, + "step": 192350 + }, + { + "epoch": 1.6563360881542701, + "grad_norm": 2.78125, + "learning_rate": 0.00015087382338681727, + "loss": 4.2319, + "step": 192400 + }, + { + "epoch": 1.65676652892562, + "grad_norm": 3.28125, + "learning_rate": 0.00015085049740924084, + "loss": 4.5131, + "step": 192450 + }, + { + "epoch": 1.6571969696969697, + "grad_norm": 4.1875, + "learning_rate": 0.00015082716769934727, + "loss": 4.3805, + "step": 192500 + }, + { + "epoch": 1.6576274104683195, + "grad_norm": 3.40625, + "learning_rate": 0.00015080383425884896, + "loss": 4.6315, + "step": 192550 + }, + { + "epoch": 1.6580578512396693, + "grad_norm": 6.5, + "learning_rate": 0.0001507804970894585, + "loss": 4.4115, + "step": 192600 + }, + { + "epoch": 1.6584882920110193, + "grad_norm": 4.65625, + "learning_rate": 0.00015075715619288884, + "loss": 4.2834, + "step": 192650 + }, + { + "epoch": 1.6589187327823691, + "grad_norm": 2.53125, + "learning_rate": 0.00015073381157085312, + "loss": 4.2, + "step": 192700 + }, + { + "epoch": 1.6593491735537191, + "grad_norm": 2.765625, + "learning_rate": 0.00015071046322506478, + "loss": 4.5527, + "step": 192750 + }, + { + "epoch": 1.659779614325069, + "grad_norm": 3.234375, + "learning_rate": 0.00015068711115723754, + "loss": 4.6736, + "step": 192800 + }, + { + "epoch": 1.6602100550964187, + "grad_norm": 3.03125, + "learning_rate": 0.00015066375536908541, + "loss": 4.5932, + "step": 192850 + }, + { + "epoch": 1.6606404958677685, + "grad_norm": 2.65625, + "learning_rate": 0.00015064039586232264, + "loss": 4.5725, + "step": 192900 + }, + { + "epoch": 1.6610709366391183, + "grad_norm": 3.484375, + "learning_rate": 0.00015061703263866377, + "loss": 4.4514, + "step": 192950 + }, + { + "epoch": 1.6615013774104683, + "grad_norm": 3.15625, + "learning_rate": 0.0001505936656998236, + "loss": 4.2902, + "step": 193000 + }, + { + "epoch": 1.6619318181818183, + "grad_norm": 3.921875, + "learning_rate": 0.00015057029504751722, + "loss": 4.3745, + "step": 193050 + }, + { + "epoch": 1.6623622589531681, + "grad_norm": 3.03125, + "learning_rate": 0.00015054692068346, + "loss": 4.1348, + "step": 193100 + }, + { + "epoch": 1.662792699724518, + "grad_norm": 2.71875, + "learning_rate": 0.00015052354260936755, + "loss": 4.3627, + "step": 193150 + }, + { + "epoch": 1.6632231404958677, + "grad_norm": 2.75, + "learning_rate": 0.00015050016082695578, + "loss": 4.3502, + "step": 193200 + }, + { + "epoch": 1.6636535812672175, + "grad_norm": 2.3125, + "learning_rate": 0.00015047677533794085, + "loss": 4.0829, + "step": 193250 + }, + { + "epoch": 1.6640840220385675, + "grad_norm": 3.25, + "learning_rate": 0.00015045338614403923, + "loss": 4.6117, + "step": 193300 + }, + { + "epoch": 1.6645144628099173, + "grad_norm": 2.15625, + "learning_rate": 0.00015042999324696758, + "loss": 4.1233, + "step": 193350 + }, + { + "epoch": 1.6649449035812673, + "grad_norm": 3.265625, + "learning_rate": 0.00015040659664844296, + "loss": 4.6023, + "step": 193400 + }, + { + "epoch": 1.6653753443526171, + "grad_norm": 3.328125, + "learning_rate": 0.00015038319635018264, + "loss": 4.8547, + "step": 193450 + }, + { + "epoch": 1.665805785123967, + "grad_norm": 4.46875, + "learning_rate": 0.00015035979235390406, + "loss": 4.1835, + "step": 193500 + }, + { + "epoch": 1.6662362258953167, + "grad_norm": 4.25, + "learning_rate": 0.00015033638466132508, + "loss": 4.5288, + "step": 193550 + }, + { + "epoch": 1.6666666666666665, + "grad_norm": 2.28125, + "learning_rate": 0.00015031297327416376, + "loss": 4.5139, + "step": 193600 + }, + { + "epoch": 1.6670971074380165, + "grad_norm": 3.15625, + "learning_rate": 0.00015028955819413846, + "loss": 4.1563, + "step": 193650 + }, + { + "epoch": 1.6675275482093666, + "grad_norm": 2.28125, + "learning_rate": 0.0001502661394229678, + "loss": 4.5309, + "step": 193700 + }, + { + "epoch": 1.6679579889807163, + "grad_norm": 3.453125, + "learning_rate": 0.00015024271696237063, + "loss": 4.2894, + "step": 193750 + }, + { + "epoch": 1.6683884297520661, + "grad_norm": 0.76171875, + "learning_rate": 0.00015021929081406618, + "loss": 4.2516, + "step": 193800 + }, + { + "epoch": 1.668818870523416, + "grad_norm": 3.859375, + "learning_rate": 0.00015019586097977377, + "loss": 4.7012, + "step": 193850 + }, + { + "epoch": 1.6692493112947657, + "grad_norm": 3.859375, + "learning_rate": 0.00015017242746121322, + "loss": 4.2681, + "step": 193900 + }, + { + "epoch": 1.6696797520661157, + "grad_norm": 2.296875, + "learning_rate": 0.0001501489902601044, + "loss": 4.5689, + "step": 193950 + }, + { + "epoch": 1.6701101928374655, + "grad_norm": 3.8125, + "learning_rate": 0.00015012554937816766, + "loss": 4.0734, + "step": 194000 + }, + { + "epoch": 1.6705406336088156, + "grad_norm": 2.4375, + "learning_rate": 0.0001501021048171234, + "loss": 4.3694, + "step": 194050 + }, + { + "epoch": 1.6709710743801653, + "grad_norm": 2.046875, + "learning_rate": 0.00015007865657869242, + "loss": 4.2628, + "step": 194100 + }, + { + "epoch": 1.6714015151515151, + "grad_norm": 4.34375, + "learning_rate": 0.00015005520466459583, + "loss": 4.424, + "step": 194150 + }, + { + "epoch": 1.671831955922865, + "grad_norm": 2.34375, + "learning_rate": 0.0001500317490765549, + "loss": 4.4128, + "step": 194200 + }, + { + "epoch": 1.6722623966942147, + "grad_norm": 2.515625, + "learning_rate": 0.0001500082898162912, + "loss": 4.1318, + "step": 194250 + }, + { + "epoch": 1.6726928374655647, + "grad_norm": 2.171875, + "learning_rate": 0.0001499848268855267, + "loss": 4.6032, + "step": 194300 + }, + { + "epoch": 1.6731232782369148, + "grad_norm": 2.875, + "learning_rate": 0.0001499613602859834, + "loss": 4.4496, + "step": 194350 + }, + { + "epoch": 1.6735537190082646, + "grad_norm": 2.3125, + "learning_rate": 0.00014993789001938377, + "loss": 4.098, + "step": 194400 + }, + { + "epoch": 1.6739841597796143, + "grad_norm": 3.34375, + "learning_rate": 0.00014991441608745043, + "loss": 4.5486, + "step": 194450 + }, + { + "epoch": 1.6744146005509641, + "grad_norm": 3.1875, + "learning_rate": 0.00014989093849190636, + "loss": 4.3258, + "step": 194500 + }, + { + "epoch": 1.674845041322314, + "grad_norm": 2.546875, + "learning_rate": 0.00014986745723447476, + "loss": 4.0024, + "step": 194550 + }, + { + "epoch": 1.675275482093664, + "grad_norm": 3.90625, + "learning_rate": 0.00014984397231687906, + "loss": 4.1615, + "step": 194600 + }, + { + "epoch": 1.6757059228650137, + "grad_norm": 2.703125, + "learning_rate": 0.00014982048374084306, + "loss": 4.5995, + "step": 194650 + }, + { + "epoch": 1.6761363636363638, + "grad_norm": 2.15625, + "learning_rate": 0.00014979699150809075, + "loss": 4.1943, + "step": 194700 + }, + { + "epoch": 1.6765668044077136, + "grad_norm": 1.1796875, + "learning_rate": 0.0001497734956203464, + "loss": 4.5542, + "step": 194750 + }, + { + "epoch": 1.6769972451790633, + "grad_norm": 1.15625, + "learning_rate": 0.00014974999607933454, + "loss": 4.0834, + "step": 194800 + }, + { + "epoch": 1.6774276859504131, + "grad_norm": 2.953125, + "learning_rate": 0.00014972649288678, + "loss": 4.4329, + "step": 194850 + }, + { + "epoch": 1.677858126721763, + "grad_norm": 3.578125, + "learning_rate": 0.00014970298604440792, + "loss": 4.3069, + "step": 194900 + }, + { + "epoch": 1.678288567493113, + "grad_norm": 1.8984375, + "learning_rate": 0.00014967947555394358, + "loss": 4.1882, + "step": 194950 + }, + { + "epoch": 1.678719008264463, + "grad_norm": 2.40625, + "learning_rate": 0.00014965596141711262, + "loss": 4.1222, + "step": 195000 + }, + { + "epoch": 1.678719008264463, + "eval_loss": 5.083550453186035, + "eval_runtime": 21.8258, + "eval_samples_per_second": 29.323, + "eval_steps_per_second": 14.662, + "eval_tts_loss": 7.167528697308984, + "step": 195000 + }, + { + "epoch": 1.6791494490358128, + "grad_norm": 3.6875, + "learning_rate": 0.00014963244363564094, + "loss": 4.3102, + "step": 195050 + }, + { + "epoch": 1.6795798898071626, + "grad_norm": 4.03125, + "learning_rate": 0.00014960892221125468, + "loss": 4.3747, + "step": 195100 + }, + { + "epoch": 1.6800103305785123, + "grad_norm": 3.0, + "learning_rate": 0.00014958539714568026, + "loss": 4.2361, + "step": 195150 + }, + { + "epoch": 1.6804407713498621, + "grad_norm": 3.640625, + "learning_rate": 0.00014956186844064433, + "loss": 4.5603, + "step": 195200 + }, + { + "epoch": 1.6808712121212122, + "grad_norm": 2.296875, + "learning_rate": 0.00014953833609787396, + "loss": 4.4756, + "step": 195250 + }, + { + "epoch": 1.681301652892562, + "grad_norm": 3.65625, + "learning_rate": 0.00014951480011909628, + "loss": 4.6928, + "step": 195300 + }, + { + "epoch": 1.681732093663912, + "grad_norm": 1.5546875, + "learning_rate": 0.0001494912605060388, + "loss": 4.3995, + "step": 195350 + }, + { + "epoch": 1.6821625344352618, + "grad_norm": 1.8671875, + "learning_rate": 0.0001494677172604293, + "loss": 4.5686, + "step": 195400 + }, + { + "epoch": 1.6825929752066116, + "grad_norm": 3.40625, + "learning_rate": 0.00014944417038399577, + "loss": 4.3506, + "step": 195450 + }, + { + "epoch": 1.6830234159779613, + "grad_norm": 2.875, + "learning_rate": 0.0001494206198784665, + "loss": 4.6538, + "step": 195500 + }, + { + "epoch": 1.6834538567493111, + "grad_norm": 2.625, + "learning_rate": 0.00014939706574557007, + "loss": 4.3817, + "step": 195550 + }, + { + "epoch": 1.6838842975206612, + "grad_norm": 3.203125, + "learning_rate": 0.00014937350798703534, + "loss": 4.6453, + "step": 195600 + }, + { + "epoch": 1.6843147382920112, + "grad_norm": 1.8828125, + "learning_rate": 0.0001493499466045913, + "loss": 4.3793, + "step": 195650 + }, + { + "epoch": 1.684745179063361, + "grad_norm": 6.46875, + "learning_rate": 0.0001493263815999674, + "loss": 4.252, + "step": 195700 + }, + { + "epoch": 1.6851756198347108, + "grad_norm": 2.671875, + "learning_rate": 0.0001493028129748932, + "loss": 4.5178, + "step": 195750 + }, + { + "epoch": 1.6856060606060606, + "grad_norm": 2.015625, + "learning_rate": 0.00014927924073109857, + "loss": 4.6309, + "step": 195800 + }, + { + "epoch": 1.6860365013774103, + "grad_norm": 3.203125, + "learning_rate": 0.00014925566487031375, + "loss": 4.4545, + "step": 195850 + }, + { + "epoch": 1.6864669421487604, + "grad_norm": 0.87890625, + "learning_rate": 0.00014923208539426906, + "loss": 4.5176, + "step": 195900 + }, + { + "epoch": 1.6868973829201102, + "grad_norm": 4.71875, + "learning_rate": 0.00014920850230469527, + "loss": 4.4551, + "step": 195950 + }, + { + "epoch": 1.6873278236914602, + "grad_norm": 2.953125, + "learning_rate": 0.00014918491560332326, + "loss": 4.4299, + "step": 196000 + }, + { + "epoch": 1.68775826446281, + "grad_norm": 6.03125, + "learning_rate": 0.00014916132529188428, + "loss": 4.6912, + "step": 196050 + }, + { + "epoch": 1.6881887052341598, + "grad_norm": 2.15625, + "learning_rate": 0.0001491377313721098, + "loss": 4.459, + "step": 196100 + }, + { + "epoch": 1.6886191460055096, + "grad_norm": 1.8984375, + "learning_rate": 0.00014911413384573153, + "loss": 4.695, + "step": 196150 + }, + { + "epoch": 1.6890495867768593, + "grad_norm": 2.359375, + "learning_rate": 0.00014909053271448155, + "loss": 4.691, + "step": 196200 + }, + { + "epoch": 1.6894800275482094, + "grad_norm": 4.375, + "learning_rate": 0.00014906692798009207, + "loss": 4.4259, + "step": 196250 + }, + { + "epoch": 1.6899104683195594, + "grad_norm": 2.59375, + "learning_rate": 0.00014904331964429564, + "loss": 4.3836, + "step": 196300 + }, + { + "epoch": 1.6903409090909092, + "grad_norm": 2.515625, + "learning_rate": 0.00014901970770882507, + "loss": 4.6083, + "step": 196350 + }, + { + "epoch": 1.690771349862259, + "grad_norm": 4.75, + "learning_rate": 0.00014899609217541344, + "loss": 4.3447, + "step": 196400 + }, + { + "epoch": 1.6912017906336088, + "grad_norm": 3.890625, + "learning_rate": 0.00014897247304579406, + "loss": 4.5977, + "step": 196450 + }, + { + "epoch": 1.6916322314049586, + "grad_norm": 3.4375, + "learning_rate": 0.00014894885032170051, + "loss": 4.5123, + "step": 196500 + }, + { + "epoch": 1.6920626721763086, + "grad_norm": 2.390625, + "learning_rate": 0.00014892522400486669, + "loss": 4.6142, + "step": 196550 + }, + { + "epoch": 1.6924931129476584, + "grad_norm": 3.375, + "learning_rate": 0.00014890159409702665, + "loss": 4.3423, + "step": 196600 + }, + { + "epoch": 1.6929235537190084, + "grad_norm": 2.328125, + "learning_rate": 0.00014887796059991483, + "loss": 4.5783, + "step": 196650 + }, + { + "epoch": 1.6933539944903582, + "grad_norm": 2.953125, + "learning_rate": 0.00014885432351526587, + "loss": 4.1651, + "step": 196700 + }, + { + "epoch": 1.693784435261708, + "grad_norm": 2.96875, + "learning_rate": 0.0001488306828448147, + "loss": 4.4109, + "step": 196750 + }, + { + "epoch": 1.6942148760330578, + "grad_norm": 3.03125, + "learning_rate": 0.00014880703859029645, + "loss": 4.3027, + "step": 196800 + }, + { + "epoch": 1.6946453168044076, + "grad_norm": 3.703125, + "learning_rate": 0.00014878339075344657, + "loss": 4.6697, + "step": 196850 + }, + { + "epoch": 1.6950757575757576, + "grad_norm": 2.34375, + "learning_rate": 0.00014875973933600078, + "loss": 4.5617, + "step": 196900 + }, + { + "epoch": 1.6955061983471076, + "grad_norm": 2.59375, + "learning_rate": 0.00014873608433969504, + "loss": 4.4808, + "step": 196950 + }, + { + "epoch": 1.6959366391184574, + "grad_norm": 1.8515625, + "learning_rate": 0.00014871242576626557, + "loss": 4.5233, + "step": 197000 + }, + { + "epoch": 1.6963670798898072, + "grad_norm": 5.03125, + "learning_rate": 0.00014868876361744882, + "loss": 4.802, + "step": 197050 + }, + { + "epoch": 1.696797520661157, + "grad_norm": 1.9375, + "learning_rate": 0.00014866509789498162, + "loss": 4.5043, + "step": 197100 + }, + { + "epoch": 1.6972279614325068, + "grad_norm": 2.25, + "learning_rate": 0.0001486414286006009, + "loss": 4.6736, + "step": 197150 + }, + { + "epoch": 1.6976584022038568, + "grad_norm": 1.921875, + "learning_rate": 0.000148617755736044, + "loss": 4.8234, + "step": 197200 + }, + { + "epoch": 1.6980888429752066, + "grad_norm": 2.515625, + "learning_rate": 0.00014859407930304844, + "loss": 4.7812, + "step": 197250 + }, + { + "epoch": 1.6985192837465566, + "grad_norm": 1.1171875, + "learning_rate": 0.000148570399303352, + "loss": 4.5574, + "step": 197300 + }, + { + "epoch": 1.6989497245179064, + "grad_norm": 3.0, + "learning_rate": 0.00014854671573869276, + "loss": 4.3152, + "step": 197350 + }, + { + "epoch": 1.6993801652892562, + "grad_norm": 4.125, + "learning_rate": 0.00014852302861080905, + "loss": 4.6025, + "step": 197400 + }, + { + "epoch": 1.699810606060606, + "grad_norm": 1.96875, + "learning_rate": 0.0001484993379214394, + "loss": 4.721, + "step": 197450 + }, + { + "epoch": 1.7002410468319558, + "grad_norm": 3.265625, + "learning_rate": 0.00014847564367232274, + "loss": 4.4474, + "step": 197500 + }, + { + "epoch": 1.7006714876033058, + "grad_norm": 2.28125, + "learning_rate": 0.0001484519458651981, + "loss": 4.3136, + "step": 197550 + }, + { + "epoch": 1.7011019283746558, + "grad_norm": 2.8125, + "learning_rate": 0.00014842824450180488, + "loss": 4.4143, + "step": 197600 + }, + { + "epoch": 1.7015323691460056, + "grad_norm": 2.078125, + "learning_rate": 0.0001484045395838827, + "loss": 4.3645, + "step": 197650 + }, + { + "epoch": 1.7019628099173554, + "grad_norm": 1.2421875, + "learning_rate": 0.0001483808311131715, + "loss": 4.5915, + "step": 197700 + }, + { + "epoch": 1.7023932506887052, + "grad_norm": 3.328125, + "learning_rate": 0.00014835711909141138, + "loss": 4.3323, + "step": 197750 + }, + { + "epoch": 1.702823691460055, + "grad_norm": 2.046875, + "learning_rate": 0.00014833340352034274, + "loss": 4.3499, + "step": 197800 + }, + { + "epoch": 1.703254132231405, + "grad_norm": 2.171875, + "learning_rate": 0.00014830968440170627, + "loss": 4.3072, + "step": 197850 + }, + { + "epoch": 1.7036845730027548, + "grad_norm": 3.859375, + "learning_rate": 0.00014828596173724294, + "loss": 4.3554, + "step": 197900 + }, + { + "epoch": 1.7041150137741048, + "grad_norm": 1.578125, + "learning_rate": 0.0001482622355286939, + "loss": 4.1934, + "step": 197950 + }, + { + "epoch": 1.7045454545454546, + "grad_norm": 2.265625, + "learning_rate": 0.0001482385057778006, + "loss": 4.1522, + "step": 198000 + }, + { + "epoch": 1.7045454545454546, + "eval_loss": 5.076416492462158, + "eval_runtime": 21.6723, + "eval_samples_per_second": 29.531, + "eval_steps_per_second": 14.765, + "eval_tts_loss": 7.241367056405855, + "step": 198000 + }, + { + "epoch": 1.7049758953168044, + "grad_norm": 3.0625, + "learning_rate": 0.00014821477248630474, + "loss": 4.1593, + "step": 198050 + }, + { + "epoch": 1.7054063360881542, + "grad_norm": 1.3359375, + "learning_rate": 0.00014819103565594837, + "loss": 4.7742, + "step": 198100 + }, + { + "epoch": 1.705836776859504, + "grad_norm": 2.484375, + "learning_rate": 0.00014816729528847365, + "loss": 4.3037, + "step": 198150 + }, + { + "epoch": 1.706267217630854, + "grad_norm": 2.46875, + "learning_rate": 0.00014814355138562305, + "loss": 4.3153, + "step": 198200 + }, + { + "epoch": 1.706697658402204, + "grad_norm": 2.46875, + "learning_rate": 0.0001481198039491394, + "loss": 4.4642, + "step": 198250 + }, + { + "epoch": 1.7071280991735538, + "grad_norm": 4.1875, + "learning_rate": 0.00014809605298076568, + "loss": 4.4042, + "step": 198300 + }, + { + "epoch": 1.7075585399449036, + "grad_norm": 2.71875, + "learning_rate": 0.00014807229848224512, + "loss": 4.6936, + "step": 198350 + }, + { + "epoch": 1.7079889807162534, + "grad_norm": 1.8046875, + "learning_rate": 0.0001480485404553213, + "loss": 4.2344, + "step": 198400 + }, + { + "epoch": 1.7084194214876032, + "grad_norm": 1.84375, + "learning_rate": 0.000148024778901738, + "loss": 4.0703, + "step": 198450 + }, + { + "epoch": 1.7088498622589532, + "grad_norm": 3.3125, + "learning_rate": 0.00014800101382323922, + "loss": 3.8407, + "step": 198500 + }, + { + "epoch": 1.709280303030303, + "grad_norm": 2.890625, + "learning_rate": 0.00014797724522156935, + "loss": 4.6688, + "step": 198550 + }, + { + "epoch": 1.709710743801653, + "grad_norm": 3.578125, + "learning_rate": 0.00014795347309847287, + "loss": 4.6164, + "step": 198600 + }, + { + "epoch": 1.7101411845730028, + "grad_norm": 3.171875, + "learning_rate": 0.00014792969745569467, + "loss": 4.4741, + "step": 198650 + }, + { + "epoch": 1.7105716253443526, + "grad_norm": 2.203125, + "learning_rate": 0.00014790591829497976, + "loss": 4.261, + "step": 198700 + }, + { + "epoch": 1.7110020661157024, + "grad_norm": 2.90625, + "learning_rate": 0.00014788213561807355, + "loss": 4.3749, + "step": 198750 + }, + { + "epoch": 1.7114325068870524, + "grad_norm": 2.765625, + "learning_rate": 0.0001478583494267216, + "loss": 4.3727, + "step": 198800 + }, + { + "epoch": 1.7118629476584022, + "grad_norm": 1.46875, + "learning_rate": 0.00014783455972266975, + "loss": 4.1389, + "step": 198850 + }, + { + "epoch": 1.7122933884297522, + "grad_norm": 3.390625, + "learning_rate": 0.00014781076650766418, + "loss": 4.4756, + "step": 198900 + }, + { + "epoch": 1.712723829201102, + "grad_norm": 2.875, + "learning_rate": 0.0001477869697834512, + "loss": 4.4756, + "step": 198950 + }, + { + "epoch": 1.7131542699724518, + "grad_norm": 1.6875, + "learning_rate": 0.00014776316955177743, + "loss": 4.7666, + "step": 199000 + }, + { + "epoch": 1.7135847107438016, + "grad_norm": 3.4375, + "learning_rate": 0.0001477393658143898, + "loss": 4.5628, + "step": 199050 + }, + { + "epoch": 1.7140151515151514, + "grad_norm": 1.5859375, + "learning_rate": 0.0001477155585730354, + "loss": 4.4252, + "step": 199100 + }, + { + "epoch": 1.7144455922865014, + "grad_norm": 2.78125, + "learning_rate": 0.00014769174782946175, + "loss": 4.5871, + "step": 199150 + }, + { + "epoch": 1.7148760330578512, + "grad_norm": 2.484375, + "learning_rate": 0.00014766793358541634, + "loss": 4.3632, + "step": 199200 + }, + { + "epoch": 1.7153064738292012, + "grad_norm": 2.265625, + "learning_rate": 0.00014764411584264716, + "loss": 4.3026, + "step": 199250 + }, + { + "epoch": 1.715736914600551, + "grad_norm": 2.875, + "learning_rate": 0.00014762029460290243, + "loss": 4.6209, + "step": 199300 + }, + { + "epoch": 1.7161673553719008, + "grad_norm": 0.84765625, + "learning_rate": 0.0001475964698679305, + "loss": 3.9673, + "step": 199350 + }, + { + "epoch": 1.7165977961432506, + "grad_norm": 3.328125, + "learning_rate": 0.00014757264163948013, + "loss": 4.2442, + "step": 199400 + }, + { + "epoch": 1.7170282369146006, + "grad_norm": 3.609375, + "learning_rate": 0.00014754880991930017, + "loss": 4.4071, + "step": 199450 + }, + { + "epoch": 1.7174586776859504, + "grad_norm": 2.65625, + "learning_rate": 0.0001475249747091399, + "loss": 4.3913, + "step": 199500 + }, + { + "epoch": 1.7178891184573004, + "grad_norm": 3.796875, + "learning_rate": 0.00014750113601074875, + "loss": 4.8984, + "step": 199550 + }, + { + "epoch": 1.7183195592286502, + "grad_norm": 2.328125, + "learning_rate": 0.0001474772938258764, + "loss": 4.3203, + "step": 199600 + }, + { + "epoch": 1.71875, + "grad_norm": 2.515625, + "learning_rate": 0.00014745344815627283, + "loss": 4.2185, + "step": 199650 + }, + { + "epoch": 1.7191804407713498, + "grad_norm": 3.0625, + "learning_rate": 0.00014742959900368826, + "loss": 4.3581, + "step": 199700 + }, + { + "epoch": 1.7196108815426996, + "grad_norm": 1.2890625, + "learning_rate": 0.00014740574636987316, + "loss": 4.3952, + "step": 199750 + }, + { + "epoch": 1.7200413223140496, + "grad_norm": 2.59375, + "learning_rate": 0.00014738189025657829, + "loss": 4.0192, + "step": 199800 + }, + { + "epoch": 1.7204717630853994, + "grad_norm": 2.625, + "learning_rate": 0.00014735803066555462, + "loss": 4.435, + "step": 199850 + }, + { + "epoch": 1.7209022038567494, + "grad_norm": 2.78125, + "learning_rate": 0.00014733416759855339, + "loss": 4.6761, + "step": 199900 + }, + { + "epoch": 1.7213326446280992, + "grad_norm": 4.09375, + "learning_rate": 0.0001473103010573261, + "loss": 4.3119, + "step": 199950 + }, + { + "epoch": 1.721763085399449, + "grad_norm": 2.78125, + "learning_rate": 0.0001472864310436245, + "loss": 4.5503, + "step": 200000 + }, + { + "epoch": 1.7221935261707988, + "grad_norm": 2.90625, + "learning_rate": 0.00014726255755920063, + "loss": 4.4613, + "step": 200050 + }, + { + "epoch": 1.7226239669421488, + "grad_norm": 3.734375, + "learning_rate": 0.0001472386806058067, + "loss": 4.5029, + "step": 200100 + }, + { + "epoch": 1.7230544077134986, + "grad_norm": 3.609375, + "learning_rate": 0.00014721480018519523, + "loss": 4.5508, + "step": 200150 + }, + { + "epoch": 1.7234848484848486, + "grad_norm": 2.25, + "learning_rate": 0.00014719091629911905, + "loss": 4.2141, + "step": 200200 + }, + { + "epoch": 1.7239152892561984, + "grad_norm": 1.046875, + "learning_rate": 0.00014716702894933113, + "loss": 4.1925, + "step": 200250 + }, + { + "epoch": 1.7243457300275482, + "grad_norm": 4.09375, + "learning_rate": 0.00014714313813758476, + "loss": 4.5075, + "step": 200300 + }, + { + "epoch": 1.724776170798898, + "grad_norm": 2.28125, + "learning_rate": 0.00014711924386563347, + "loss": 4.4054, + "step": 200350 + }, + { + "epoch": 1.7252066115702478, + "grad_norm": 1.5703125, + "learning_rate": 0.00014709534613523107, + "loss": 3.9823, + "step": 200400 + }, + { + "epoch": 1.7256370523415978, + "grad_norm": 3.4375, + "learning_rate": 0.00014707144494813162, + "loss": 4.1744, + "step": 200450 + }, + { + "epoch": 1.7260674931129476, + "grad_norm": 3.703125, + "learning_rate": 0.00014704754030608937, + "loss": 4.2115, + "step": 200500 + }, + { + "epoch": 1.7264979338842976, + "grad_norm": 5.9375, + "learning_rate": 0.00014702363221085886, + "loss": 4.6492, + "step": 200550 + }, + { + "epoch": 1.7269283746556474, + "grad_norm": 3.328125, + "learning_rate": 0.00014699972066419497, + "loss": 4.589, + "step": 200600 + }, + { + "epoch": 1.7273588154269972, + "grad_norm": 4.65625, + "learning_rate": 0.00014697580566785267, + "loss": 4.4187, + "step": 200650 + }, + { + "epoch": 1.727789256198347, + "grad_norm": 2.640625, + "learning_rate": 0.00014695188722358733, + "loss": 4.2957, + "step": 200700 + }, + { + "epoch": 1.728219696969697, + "grad_norm": 1.09375, + "learning_rate": 0.00014692796533315446, + "loss": 4.7696, + "step": 200750 + }, + { + "epoch": 1.7286501377410468, + "grad_norm": 3.71875, + "learning_rate": 0.0001469040399983099, + "loss": 4.7456, + "step": 200800 + }, + { + "epoch": 1.7290805785123968, + "grad_norm": 4.59375, + "learning_rate": 0.00014688011122080975, + "loss": 4.3568, + "step": 200850 + }, + { + "epoch": 1.7295110192837466, + "grad_norm": 1.265625, + "learning_rate": 0.00014685617900241026, + "loss": 4.2912, + "step": 200900 + }, + { + "epoch": 1.7299414600550964, + "grad_norm": 3.515625, + "learning_rate": 0.00014683224334486808, + "loss": 4.4503, + "step": 200950 + }, + { + "epoch": 1.7303719008264462, + "grad_norm": 3.46875, + "learning_rate": 0.00014680830424994, + "loss": 4.3639, + "step": 201000 + }, + { + "epoch": 1.7303719008264462, + "eval_loss": 5.069631576538086, + "eval_runtime": 21.9831, + "eval_samples_per_second": 29.113, + "eval_steps_per_second": 14.557, + "eval_tts_loss": 7.2692779077950265, + "step": 201000 + }, + { + "epoch": 1.730802341597796, + "grad_norm": 1.984375, + "learning_rate": 0.00014678436171938308, + "loss": 4.4839, + "step": 201050 + }, + { + "epoch": 1.731232782369146, + "grad_norm": 2.890625, + "learning_rate": 0.0001467604157549547, + "loss": 4.4284, + "step": 201100 + }, + { + "epoch": 1.731663223140496, + "grad_norm": 1.1484375, + "learning_rate": 0.00014673646635841235, + "loss": 4.491, + "step": 201150 + }, + { + "epoch": 1.7320936639118458, + "grad_norm": 2.578125, + "learning_rate": 0.00014671251353151398, + "loss": 4.5108, + "step": 201200 + }, + { + "epoch": 1.7325241046831956, + "grad_norm": 2.796875, + "learning_rate": 0.0001466885572760176, + "loss": 4.4842, + "step": 201250 + }, + { + "epoch": 1.7329545454545454, + "grad_norm": 2.90625, + "learning_rate": 0.0001466645975936816, + "loss": 4.6091, + "step": 201300 + }, + { + "epoch": 1.7333849862258952, + "grad_norm": 2.921875, + "learning_rate": 0.00014664063448626452, + "loss": 4.4633, + "step": 201350 + }, + { + "epoch": 1.7338154269972452, + "grad_norm": 3.21875, + "learning_rate": 0.0001466166679555252, + "loss": 4.3776, + "step": 201400 + }, + { + "epoch": 1.734245867768595, + "grad_norm": 3.328125, + "learning_rate": 0.00014659269800322276, + "loss": 4.281, + "step": 201450 + }, + { + "epoch": 1.734676308539945, + "grad_norm": 4.21875, + "learning_rate": 0.00014656872463111653, + "loss": 4.2784, + "step": 201500 + }, + { + "epoch": 1.7351067493112948, + "grad_norm": 2.53125, + "learning_rate": 0.00014654474784096613, + "loss": 4.501, + "step": 201550 + }, + { + "epoch": 1.7355371900826446, + "grad_norm": 2.90625, + "learning_rate": 0.00014652076763453138, + "loss": 4.1394, + "step": 201600 + }, + { + "epoch": 1.7359676308539944, + "grad_norm": 2.40625, + "learning_rate": 0.00014649678401357235, + "loss": 4.3451, + "step": 201650 + }, + { + "epoch": 1.7363980716253442, + "grad_norm": 3.265625, + "learning_rate": 0.00014647279697984944, + "loss": 4.2643, + "step": 201700 + }, + { + "epoch": 1.7368285123966942, + "grad_norm": 2.375, + "learning_rate": 0.0001464488065351232, + "loss": 4.2888, + "step": 201750 + }, + { + "epoch": 1.7372589531680442, + "grad_norm": 5.4375, + "learning_rate": 0.0001464248126811545, + "loss": 4.6217, + "step": 201800 + }, + { + "epoch": 1.737689393939394, + "grad_norm": 3.203125, + "learning_rate": 0.00014640081541970442, + "loss": 4.5291, + "step": 201850 + }, + { + "epoch": 1.7381198347107438, + "grad_norm": 2.90625, + "learning_rate": 0.00014637681475253438, + "loss": 4.3518, + "step": 201900 + }, + { + "epoch": 1.7385502754820936, + "grad_norm": 2.03125, + "learning_rate": 0.00014635281068140586, + "loss": 4.1754, + "step": 201950 + }, + { + "epoch": 1.7389807162534434, + "grad_norm": 3.875, + "learning_rate": 0.0001463288032080808, + "loss": 4.7852, + "step": 202000 + }, + { + "epoch": 1.7394111570247934, + "grad_norm": 1.6953125, + "learning_rate": 0.00014630479233432127, + "loss": 4.4412, + "step": 202050 + }, + { + "epoch": 1.7398415977961432, + "grad_norm": 1.34375, + "learning_rate": 0.00014628077806188956, + "loss": 4.3081, + "step": 202100 + }, + { + "epoch": 1.7402720385674932, + "grad_norm": 3.359375, + "learning_rate": 0.00014625676039254834, + "loss": 4.1681, + "step": 202150 + }, + { + "epoch": 1.740702479338843, + "grad_norm": 2.453125, + "learning_rate": 0.00014623273932806046, + "loss": 4.19, + "step": 202200 + }, + { + "epoch": 1.7411329201101928, + "grad_norm": 3.3125, + "learning_rate": 0.00014620871487018897, + "loss": 4.0179, + "step": 202250 + }, + { + "epoch": 1.7415633608815426, + "grad_norm": 2.859375, + "learning_rate": 0.00014618468702069722, + "loss": 4.6226, + "step": 202300 + }, + { + "epoch": 1.7419938016528924, + "grad_norm": 1.203125, + "learning_rate": 0.0001461606557813488, + "loss": 4.565, + "step": 202350 + }, + { + "epoch": 1.7424242424242424, + "grad_norm": 3.125, + "learning_rate": 0.0001461366211539076, + "loss": 4.8175, + "step": 202400 + }, + { + "epoch": 1.7428546831955924, + "grad_norm": 3.234375, + "learning_rate": 0.00014611258314013763, + "loss": 4.6324, + "step": 202450 + }, + { + "epoch": 1.7432851239669422, + "grad_norm": 2.546875, + "learning_rate": 0.0001460885417418033, + "loss": 4.2822, + "step": 202500 + }, + { + "epoch": 1.743715564738292, + "grad_norm": 2.8125, + "learning_rate": 0.00014606449696066918, + "loss": 4.1335, + "step": 202550 + }, + { + "epoch": 1.7441460055096418, + "grad_norm": 1.7890625, + "learning_rate": 0.00014604044879850005, + "loss": 4.3431, + "step": 202600 + }, + { + "epoch": 1.7445764462809916, + "grad_norm": 1.734375, + "learning_rate": 0.00014601639725706107, + "loss": 4.2001, + "step": 202650 + }, + { + "epoch": 1.7450068870523416, + "grad_norm": 1.984375, + "learning_rate": 0.00014599234233811752, + "loss": 4.0488, + "step": 202700 + }, + { + "epoch": 1.7454373278236914, + "grad_norm": 2.84375, + "learning_rate": 0.00014596828404343502, + "loss": 4.3412, + "step": 202750 + }, + { + "epoch": 1.7458677685950414, + "grad_norm": 2.4375, + "learning_rate": 0.00014594422237477935, + "loss": 4.5877, + "step": 202800 + }, + { + "epoch": 1.7462982093663912, + "grad_norm": 1.171875, + "learning_rate": 0.0001459201573339166, + "loss": 4.2314, + "step": 202850 + }, + { + "epoch": 1.746728650137741, + "grad_norm": 1.6171875, + "learning_rate": 0.00014589608892261316, + "loss": 4.6578, + "step": 202900 + }, + { + "epoch": 1.7471590909090908, + "grad_norm": 3.171875, + "learning_rate": 0.0001458720171426355, + "loss": 4.5862, + "step": 202950 + }, + { + "epoch": 1.7475895316804406, + "grad_norm": 4.8125, + "learning_rate": 0.00014584794199575045, + "loss": 4.347, + "step": 203000 + }, + { + "epoch": 1.7480199724517906, + "grad_norm": 6.15625, + "learning_rate": 0.00014582386348372514, + "loss": 4.2003, + "step": 203050 + }, + { + "epoch": 1.7484504132231407, + "grad_norm": 2.84375, + "learning_rate": 0.00014579978160832684, + "loss": 4.5226, + "step": 203100 + }, + { + "epoch": 1.7488808539944904, + "grad_norm": 1.703125, + "learning_rate": 0.0001457756963713231, + "loss": 4.3868, + "step": 203150 + }, + { + "epoch": 1.7493112947658402, + "grad_norm": 2.359375, + "learning_rate": 0.00014575160777448174, + "loss": 4.0611, + "step": 203200 + }, + { + "epoch": 1.74974173553719, + "grad_norm": 2.9375, + "learning_rate": 0.00014572751581957082, + "loss": 4.3162, + "step": 203250 + }, + { + "epoch": 1.7501721763085398, + "grad_norm": 3.125, + "learning_rate": 0.00014570342050835861, + "loss": 4.0677, + "step": 203300 + }, + { + "epoch": 1.7506026170798898, + "grad_norm": 3.046875, + "learning_rate": 0.00014567932184261366, + "loss": 4.6464, + "step": 203350 + }, + { + "epoch": 1.7510330578512396, + "grad_norm": 3.609375, + "learning_rate": 0.0001456552198241048, + "loss": 4.2298, + "step": 203400 + }, + { + "epoch": 1.7514634986225897, + "grad_norm": 3.109375, + "learning_rate": 0.000145631114454601, + "loss": 4.4299, + "step": 203450 + }, + { + "epoch": 1.7518939393939394, + "grad_norm": 3.609375, + "learning_rate": 0.0001456070057358716, + "loss": 4.6576, + "step": 203500 + }, + { + "epoch": 1.7523243801652892, + "grad_norm": 1.984375, + "learning_rate": 0.00014558289366968608, + "loss": 4.7713, + "step": 203550 + }, + { + "epoch": 1.752754820936639, + "grad_norm": 3.515625, + "learning_rate": 0.00014555877825781426, + "loss": 4.8008, + "step": 203600 + }, + { + "epoch": 1.7531852617079888, + "grad_norm": 1.828125, + "learning_rate": 0.00014553465950202613, + "loss": 4.3375, + "step": 203650 + }, + { + "epoch": 1.7536157024793388, + "grad_norm": 4.5625, + "learning_rate": 0.00014551053740409197, + "loss": 4.4108, + "step": 203700 + }, + { + "epoch": 1.7540461432506889, + "grad_norm": 2.421875, + "learning_rate": 0.00014548641196578227, + "loss": 4.8124, + "step": 203750 + }, + { + "epoch": 1.7544765840220387, + "grad_norm": 3.234375, + "learning_rate": 0.00014546228318886782, + "loss": 4.6107, + "step": 203800 + }, + { + "epoch": 1.7549070247933884, + "grad_norm": 3.28125, + "learning_rate": 0.00014543815107511958, + "loss": 4.3705, + "step": 203850 + }, + { + "epoch": 1.7553374655647382, + "grad_norm": 4.34375, + "learning_rate": 0.0001454140156263088, + "loss": 4.3269, + "step": 203900 + }, + { + "epoch": 1.755767906336088, + "grad_norm": 1.9453125, + "learning_rate": 0.00014538987684420702, + "loss": 4.5044, + "step": 203950 + }, + { + "epoch": 1.756198347107438, + "grad_norm": 2.546875, + "learning_rate": 0.00014536573473058592, + "loss": 4.1559, + "step": 204000 + }, + { + "epoch": 1.756198347107438, + "eval_loss": 5.066303253173828, + "eval_runtime": 21.8339, + "eval_samples_per_second": 29.312, + "eval_steps_per_second": 14.656, + "eval_tts_loss": 7.257139390627936, + "step": 204000 + }, + { + "epoch": 1.7566287878787878, + "grad_norm": 1.9453125, + "learning_rate": 0.0001453415892872175, + "loss": 3.9364, + "step": 204050 + }, + { + "epoch": 1.7570592286501379, + "grad_norm": 3.078125, + "learning_rate": 0.000145317440515874, + "loss": 4.7497, + "step": 204100 + }, + { + "epoch": 1.7574896694214877, + "grad_norm": 3.421875, + "learning_rate": 0.00014529328841832783, + "loss": 3.8812, + "step": 204150 + }, + { + "epoch": 1.7579201101928374, + "grad_norm": 3.4375, + "learning_rate": 0.00014526913299635176, + "loss": 4.3653, + "step": 204200 + }, + { + "epoch": 1.7583505509641872, + "grad_norm": 1.7421875, + "learning_rate": 0.00014524497425171874, + "loss": 4.3812, + "step": 204250 + }, + { + "epoch": 1.758780991735537, + "grad_norm": 5.8125, + "learning_rate": 0.00014522081218620194, + "loss": 4.8898, + "step": 204300 + }, + { + "epoch": 1.759211432506887, + "grad_norm": 2.421875, + "learning_rate": 0.0001451966468015748, + "loss": 4.2977, + "step": 204350 + }, + { + "epoch": 1.759641873278237, + "grad_norm": 3.546875, + "learning_rate": 0.00014517247809961106, + "loss": 4.728, + "step": 204400 + }, + { + "epoch": 1.7600723140495869, + "grad_norm": 3.109375, + "learning_rate": 0.00014514830608208462, + "loss": 4.5724, + "step": 204450 + }, + { + "epoch": 1.7605027548209367, + "grad_norm": 4.03125, + "learning_rate": 0.00014512413075076962, + "loss": 4.3678, + "step": 204500 + }, + { + "epoch": 1.7609331955922864, + "grad_norm": 3.3125, + "learning_rate": 0.00014509995210744056, + "loss": 4.2542, + "step": 204550 + }, + { + "epoch": 1.7613636363636362, + "grad_norm": 5.6875, + "learning_rate": 0.00014507577015387203, + "loss": 4.3262, + "step": 204600 + }, + { + "epoch": 1.7617940771349863, + "grad_norm": 3.125, + "learning_rate": 0.00014505158489183895, + "loss": 4.7296, + "step": 204650 + }, + { + "epoch": 1.762224517906336, + "grad_norm": 3.375, + "learning_rate": 0.00014502739632311646, + "loss": 4.7251, + "step": 204700 + }, + { + "epoch": 1.762654958677686, + "grad_norm": 4.8125, + "learning_rate": 0.00014500320444947994, + "loss": 4.4522, + "step": 204750 + }, + { + "epoch": 1.7630853994490359, + "grad_norm": 2.140625, + "learning_rate": 0.0001449790092727051, + "loss": 4.8276, + "step": 204800 + }, + { + "epoch": 1.7635158402203857, + "grad_norm": 1.4921875, + "learning_rate": 0.00014495481079456772, + "loss": 4.3861, + "step": 204850 + }, + { + "epoch": 1.7639462809917354, + "grad_norm": 2.84375, + "learning_rate": 0.00014493060901684394, + "loss": 4.711, + "step": 204900 + }, + { + "epoch": 1.7643767217630852, + "grad_norm": 2.453125, + "learning_rate": 0.00014490640394131015, + "loss": 4.2437, + "step": 204950 + }, + { + "epoch": 1.7648071625344353, + "grad_norm": 2.859375, + "learning_rate": 0.00014488219556974292, + "loss": 4.2778, + "step": 205000 + }, + { + "epoch": 1.7652376033057853, + "grad_norm": 2.921875, + "learning_rate": 0.00014485798390391906, + "loss": 4.7075, + "step": 205050 + }, + { + "epoch": 1.765668044077135, + "grad_norm": 3.828125, + "learning_rate": 0.00014483376894561576, + "loss": 4.5107, + "step": 205100 + }, + { + "epoch": 1.7660984848484849, + "grad_norm": 5.1875, + "learning_rate": 0.00014480955069661028, + "loss": 4.893, + "step": 205150 + }, + { + "epoch": 1.7665289256198347, + "grad_norm": 3.1875, + "learning_rate": 0.00014478532915868016, + "loss": 4.4953, + "step": 205200 + }, + { + "epoch": 1.7669593663911844, + "grad_norm": 1.9296875, + "learning_rate": 0.00014476110433360323, + "loss": 4.2584, + "step": 205250 + }, + { + "epoch": 1.7673898071625345, + "grad_norm": 2.4375, + "learning_rate": 0.00014473687622315758, + "loss": 4.6173, + "step": 205300 + }, + { + "epoch": 1.7678202479338843, + "grad_norm": 2.921875, + "learning_rate": 0.00014471264482912146, + "loss": 4.5259, + "step": 205350 + }, + { + "epoch": 1.7682506887052343, + "grad_norm": 4.1875, + "learning_rate": 0.0001446884101532734, + "loss": 4.5646, + "step": 205400 + }, + { + "epoch": 1.768681129476584, + "grad_norm": 2.015625, + "learning_rate": 0.0001446641721973922, + "loss": 4.5003, + "step": 205450 + }, + { + "epoch": 1.7691115702479339, + "grad_norm": 1.3203125, + "learning_rate": 0.00014463993096325682, + "loss": 4.6141, + "step": 205500 + }, + { + "epoch": 1.7695420110192837, + "grad_norm": 2.9375, + "learning_rate": 0.00014461568645264658, + "loss": 4.5447, + "step": 205550 + }, + { + "epoch": 1.7699724517906334, + "grad_norm": 2.5, + "learning_rate": 0.00014459143866734096, + "loss": 4.262, + "step": 205600 + }, + { + "epoch": 1.7704028925619835, + "grad_norm": 3.28125, + "learning_rate": 0.00014456718760911966, + "loss": 4.6289, + "step": 205650 + }, + { + "epoch": 1.7708333333333335, + "grad_norm": 1.6328125, + "learning_rate": 0.0001445429332797627, + "loss": 4.2575, + "step": 205700 + }, + { + "epoch": 1.7712637741046833, + "grad_norm": 2.390625, + "learning_rate": 0.00014451867568105026, + "loss": 4.4797, + "step": 205750 + }, + { + "epoch": 1.771694214876033, + "grad_norm": 3.734375, + "learning_rate": 0.0001444944148147628, + "loss": 4.5051, + "step": 205800 + }, + { + "epoch": 1.7721246556473829, + "grad_norm": 2.984375, + "learning_rate": 0.00014447015068268107, + "loss": 4.4409, + "step": 205850 + }, + { + "epoch": 1.7725550964187327, + "grad_norm": 3.9375, + "learning_rate": 0.0001444458832865859, + "loss": 4.6527, + "step": 205900 + }, + { + "epoch": 1.7729855371900827, + "grad_norm": 2.546875, + "learning_rate": 0.00014442161262825853, + "loss": 4.8302, + "step": 205950 + }, + { + "epoch": 1.7734159779614325, + "grad_norm": 3.1875, + "learning_rate": 0.00014439733870948043, + "loss": 4.2343, + "step": 206000 + }, + { + "epoch": 1.7738464187327825, + "grad_norm": 2.0625, + "learning_rate": 0.00014437306153203313, + "loss": 4.4547, + "step": 206050 + }, + { + "epoch": 1.7742768595041323, + "grad_norm": 2.75, + "learning_rate": 0.0001443487810976986, + "loss": 4.5386, + "step": 206100 + }, + { + "epoch": 1.774707300275482, + "grad_norm": 4.71875, + "learning_rate": 0.000144324497408259, + "loss": 4.71, + "step": 206150 + }, + { + "epoch": 1.7751377410468319, + "grad_norm": 4.34375, + "learning_rate": 0.00014430021046549658, + "loss": 4.3663, + "step": 206200 + }, + { + "epoch": 1.7755681818181817, + "grad_norm": 2.109375, + "learning_rate": 0.00014427592027119407, + "loss": 4.1685, + "step": 206250 + }, + { + "epoch": 1.7759986225895317, + "grad_norm": 3.203125, + "learning_rate": 0.00014425162682713428, + "loss": 4.1937, + "step": 206300 + }, + { + "epoch": 1.7764290633608817, + "grad_norm": 3.734375, + "learning_rate": 0.0001442273301351003, + "loss": 4.6254, + "step": 206350 + }, + { + "epoch": 1.7768595041322315, + "grad_norm": 2.328125, + "learning_rate": 0.0001442030301968754, + "loss": 4.7547, + "step": 206400 + }, + { + "epoch": 1.7772899449035813, + "grad_norm": 2.515625, + "learning_rate": 0.0001441787270142432, + "loss": 4.548, + "step": 206450 + }, + { + "epoch": 1.777720385674931, + "grad_norm": 5.03125, + "learning_rate": 0.00014415442058898754, + "loss": 4.6682, + "step": 206500 + }, + { + "epoch": 1.7781508264462809, + "grad_norm": 5.09375, + "learning_rate": 0.00014413011092289234, + "loss": 4.4968, + "step": 206550 + }, + { + "epoch": 1.7785812672176309, + "grad_norm": 1.9609375, + "learning_rate": 0.00014410579801774198, + "loss": 4.5569, + "step": 206600 + }, + { + "epoch": 1.7790117079889807, + "grad_norm": 3.109375, + "learning_rate": 0.00014408148187532096, + "loss": 4.5855, + "step": 206650 + }, + { + "epoch": 1.7794421487603307, + "grad_norm": 2.828125, + "learning_rate": 0.000144057162497414, + "loss": 4.4967, + "step": 206700 + }, + { + "epoch": 1.7798725895316805, + "grad_norm": 2.59375, + "learning_rate": 0.0001440328398858061, + "loss": 4.2051, + "step": 206750 + }, + { + "epoch": 1.7803030303030303, + "grad_norm": 2.6875, + "learning_rate": 0.00014400851404228248, + "loss": 4.5489, + "step": 206800 + }, + { + "epoch": 1.78073347107438, + "grad_norm": 2.90625, + "learning_rate": 0.00014398418496862863, + "loss": 4.4709, + "step": 206850 + }, + { + "epoch": 1.7811639118457299, + "grad_norm": 2.71875, + "learning_rate": 0.00014395985266663024, + "loss": 4.6817, + "step": 206900 + }, + { + "epoch": 1.7815943526170799, + "grad_norm": 1.5234375, + "learning_rate": 0.00014393551713807323, + "loss": 3.7956, + "step": 206950 + }, + { + "epoch": 1.78202479338843, + "grad_norm": 2.171875, + "learning_rate": 0.0001439111783847438, + "loss": 4.3871, + "step": 207000 + }, + { + "epoch": 1.78202479338843, + "eval_loss": 5.065958499908447, + "eval_runtime": 21.7359, + "eval_samples_per_second": 29.444, + "eval_steps_per_second": 14.722, + "eval_tts_loss": 7.261567631698336, + "step": 207000 + }, + { + "epoch": 1.7824552341597797, + "grad_norm": 4.34375, + "learning_rate": 0.00014388683640842838, + "loss": 4.305, + "step": 207050 + }, + { + "epoch": 1.7828856749311295, + "grad_norm": 4.625, + "learning_rate": 0.00014386249121091355, + "loss": 4.181, + "step": 207100 + }, + { + "epoch": 1.7833161157024793, + "grad_norm": 3.078125, + "learning_rate": 0.00014383814279398627, + "loss": 4.5487, + "step": 207150 + }, + { + "epoch": 1.783746556473829, + "grad_norm": 3.3125, + "learning_rate": 0.00014381379115943363, + "loss": 4.2781, + "step": 207200 + }, + { + "epoch": 1.784176997245179, + "grad_norm": 4.78125, + "learning_rate": 0.00014378943630904297, + "loss": 4.693, + "step": 207250 + }, + { + "epoch": 1.7846074380165289, + "grad_norm": 2.296875, + "learning_rate": 0.0001437650782446019, + "loss": 4.6197, + "step": 207300 + }, + { + "epoch": 1.785037878787879, + "grad_norm": 3.203125, + "learning_rate": 0.00014374071696789825, + "loss": 4.8426, + "step": 207350 + }, + { + "epoch": 1.7854683195592287, + "grad_norm": 1.5546875, + "learning_rate": 0.0001437163524807201, + "loss": 4.4313, + "step": 207400 + }, + { + "epoch": 1.7858987603305785, + "grad_norm": 2.75, + "learning_rate": 0.00014369198478485573, + "loss": 4.4223, + "step": 207450 + }, + { + "epoch": 1.7863292011019283, + "grad_norm": 2.546875, + "learning_rate": 0.00014366761388209367, + "loss": 4.5474, + "step": 207500 + }, + { + "epoch": 1.786759641873278, + "grad_norm": 2.78125, + "learning_rate": 0.00014364323977422271, + "loss": 4.8404, + "step": 207550 + }, + { + "epoch": 1.787190082644628, + "grad_norm": 3.90625, + "learning_rate": 0.00014361886246303185, + "loss": 4.3914, + "step": 207600 + }, + { + "epoch": 1.787620523415978, + "grad_norm": 2.96875, + "learning_rate": 0.00014359448195031034, + "loss": 4.5706, + "step": 207650 + }, + { + "epoch": 1.788050964187328, + "grad_norm": 2.890625, + "learning_rate": 0.00014357009823784766, + "loss": 4.4997, + "step": 207700 + }, + { + "epoch": 1.7884814049586777, + "grad_norm": 3.296875, + "learning_rate": 0.00014354571132743347, + "loss": 4.336, + "step": 207750 + }, + { + "epoch": 1.7889118457300275, + "grad_norm": 3.5625, + "learning_rate": 0.0001435213212208578, + "loss": 4.7328, + "step": 207800 + }, + { + "epoch": 1.7893422865013773, + "grad_norm": 1.5546875, + "learning_rate": 0.00014349692791991081, + "loss": 4.3584, + "step": 207850 + }, + { + "epoch": 1.7897727272727273, + "grad_norm": 3.703125, + "learning_rate": 0.00014347253142638286, + "loss": 4.5277, + "step": 207900 + }, + { + "epoch": 1.790203168044077, + "grad_norm": 3.125, + "learning_rate": 0.00014344813174206464, + "loss": 4.695, + "step": 207950 + }, + { + "epoch": 1.790633608815427, + "grad_norm": 2.828125, + "learning_rate": 0.000143423728868747, + "loss": 4.5818, + "step": 208000 + }, + { + "epoch": 1.791064049586777, + "grad_norm": 1.9375, + "learning_rate": 0.00014339932280822118, + "loss": 4.3562, + "step": 208050 + }, + { + "epoch": 1.7914944903581267, + "grad_norm": 2.875, + "learning_rate": 0.00014337491356227836, + "loss": 4.7021, + "step": 208100 + }, + { + "epoch": 1.7919249311294765, + "grad_norm": 3.703125, + "learning_rate": 0.00014335050113271024, + "loss": 3.7902, + "step": 208150 + }, + { + "epoch": 1.7923553719008265, + "grad_norm": 3.140625, + "learning_rate": 0.00014332608552130858, + "loss": 4.5522, + "step": 208200 + }, + { + "epoch": 1.7927858126721763, + "grad_norm": 3.109375, + "learning_rate": 0.0001433016667298655, + "loss": 4.377, + "step": 208250 + }, + { + "epoch": 1.7932162534435263, + "grad_norm": 3.25, + "learning_rate": 0.00014327724476017326, + "loss": 4.3705, + "step": 208300 + }, + { + "epoch": 1.793646694214876, + "grad_norm": 1.828125, + "learning_rate": 0.0001432528196140243, + "loss": 4.4624, + "step": 208350 + }, + { + "epoch": 1.794077134986226, + "grad_norm": 2.03125, + "learning_rate": 0.0001432283912932115, + "loss": 4.3584, + "step": 208400 + }, + { + "epoch": 1.7945075757575757, + "grad_norm": 1.203125, + "learning_rate": 0.00014320395979952778, + "loss": 4.2264, + "step": 208450 + }, + { + "epoch": 1.7949380165289255, + "grad_norm": 2.734375, + "learning_rate": 0.00014317952513476633, + "loss": 4.3237, + "step": 208500 + }, + { + "epoch": 1.7953684573002755, + "grad_norm": 2.984375, + "learning_rate": 0.00014315508730072067, + "loss": 4.3732, + "step": 208550 + }, + { + "epoch": 1.7957988980716253, + "grad_norm": 2.15625, + "learning_rate": 0.00014313064629918447, + "loss": 4.4379, + "step": 208600 + }, + { + "epoch": 1.7962293388429753, + "grad_norm": 4.65625, + "learning_rate": 0.00014310620213195158, + "loss": 4.611, + "step": 208650 + }, + { + "epoch": 1.796659779614325, + "grad_norm": 3.3125, + "learning_rate": 0.00014308175480081623, + "loss": 3.929, + "step": 208700 + }, + { + "epoch": 1.797090220385675, + "grad_norm": 3.625, + "learning_rate": 0.00014305730430757278, + "loss": 4.4699, + "step": 208750 + }, + { + "epoch": 1.7975206611570247, + "grad_norm": 2.546875, + "learning_rate": 0.0001430328506540158, + "loss": 4.4323, + "step": 208800 + }, + { + "epoch": 1.7979511019283747, + "grad_norm": 3.421875, + "learning_rate": 0.0001430083938419402, + "loss": 4.6324, + "step": 208850 + }, + { + "epoch": 1.7983815426997245, + "grad_norm": 2.65625, + "learning_rate": 0.00014298393387314102, + "loss": 4.3147, + "step": 208900 + }, + { + "epoch": 1.7988119834710745, + "grad_norm": 2.359375, + "learning_rate": 0.00014295947074941358, + "loss": 3.8383, + "step": 208950 + }, + { + "epoch": 1.7992424242424243, + "grad_norm": 2.109375, + "learning_rate": 0.0001429350044725534, + "loss": 4.5066, + "step": 209000 + }, + { + "epoch": 1.799672865013774, + "grad_norm": 4.59375, + "learning_rate": 0.00014291053504435629, + "loss": 4.4437, + "step": 209050 + }, + { + "epoch": 1.800103305785124, + "grad_norm": 3.171875, + "learning_rate": 0.00014288606246661822, + "loss": 4.6211, + "step": 209100 + }, + { + "epoch": 1.8005337465564737, + "grad_norm": 2.609375, + "learning_rate": 0.0001428615867411354, + "loss": 4.4462, + "step": 209150 + }, + { + "epoch": 1.8009641873278237, + "grad_norm": 4.03125, + "learning_rate": 0.0001428371078697044, + "loss": 4.5513, + "step": 209200 + }, + { + "epoch": 1.8013946280991735, + "grad_norm": 3.234375, + "learning_rate": 0.0001428126258541218, + "loss": 4.5234, + "step": 209250 + }, + { + "epoch": 1.8018250688705235, + "grad_norm": 1.7578125, + "learning_rate": 0.00014278814069618463, + "loss": 4.546, + "step": 209300 + }, + { + "epoch": 1.8022555096418733, + "grad_norm": 2.46875, + "learning_rate": 0.00014276365239768995, + "loss": 4.3456, + "step": 209350 + }, + { + "epoch": 1.802685950413223, + "grad_norm": 2.65625, + "learning_rate": 0.00014273916096043518, + "loss": 4.4293, + "step": 209400 + }, + { + "epoch": 1.803116391184573, + "grad_norm": 3.53125, + "learning_rate": 0.000142714666386218, + "loss": 4.3945, + "step": 209450 + }, + { + "epoch": 1.803546831955923, + "grad_norm": 2.609375, + "learning_rate": 0.00014269016867683615, + "loss": 4.497, + "step": 209500 + }, + { + "epoch": 1.8039772727272727, + "grad_norm": 2.578125, + "learning_rate": 0.0001426656678340878, + "loss": 4.576, + "step": 209550 + }, + { + "epoch": 1.8044077134986227, + "grad_norm": 3.390625, + "learning_rate": 0.00014264116385977123, + "loss": 4.0808, + "step": 209600 + }, + { + "epoch": 1.8048381542699725, + "grad_norm": 1.3671875, + "learning_rate": 0.000142616656755685, + "loss": 4.2861, + "step": 209650 + }, + { + "epoch": 1.8052685950413223, + "grad_norm": 3.3125, + "learning_rate": 0.00014259214652362782, + "loss": 4.4298, + "step": 209700 + }, + { + "epoch": 1.805699035812672, + "grad_norm": 3.9375, + "learning_rate": 0.00014256763316539874, + "loss": 4.3557, + "step": 209750 + }, + { + "epoch": 1.806129476584022, + "grad_norm": 3.515625, + "learning_rate": 0.00014254311668279696, + "loss": 4.4118, + "step": 209800 + }, + { + "epoch": 1.806559917355372, + "grad_norm": 3.0, + "learning_rate": 0.00014251859707762197, + "loss": 4.1832, + "step": 209850 + }, + { + "epoch": 1.806990358126722, + "grad_norm": 3.828125, + "learning_rate": 0.0001424940743516734, + "loss": 4.4702, + "step": 209900 + }, + { + "epoch": 1.8074207988980717, + "grad_norm": 3.203125, + "learning_rate": 0.00014246954850675127, + "loss": 4.1901, + "step": 209950 + }, + { + "epoch": 1.8078512396694215, + "grad_norm": 3.34375, + "learning_rate": 0.00014244501954465558, + "loss": 4.5261, + "step": 210000 + }, + { + "epoch": 1.8078512396694215, + "eval_loss": 5.058772087097168, + "eval_runtime": 21.8275, + "eval_samples_per_second": 29.321, + "eval_steps_per_second": 14.66, + "eval_tts_loss": 7.2322371725440675, + "step": 210000 + }, + { + "epoch": 1.8082816804407713, + "grad_norm": 2.1875, + "learning_rate": 0.0001424204874671868, + "loss": 4.5437, + "step": 210050 + }, + { + "epoch": 1.808712121212121, + "grad_norm": 2.46875, + "learning_rate": 0.00014239595227614555, + "loss": 4.5104, + "step": 210100 + }, + { + "epoch": 1.8091425619834711, + "grad_norm": 4.375, + "learning_rate": 0.00014237141397333258, + "loss": 4.326, + "step": 210150 + }, + { + "epoch": 1.809573002754821, + "grad_norm": 4.84375, + "learning_rate": 0.000142346872560549, + "loss": 4.8106, + "step": 210200 + }, + { + "epoch": 1.810003443526171, + "grad_norm": 1.3984375, + "learning_rate": 0.00014232232803959607, + "loss": 3.9326, + "step": 210250 + }, + { + "epoch": 1.8104338842975207, + "grad_norm": 2.71875, + "learning_rate": 0.00014229778041227533, + "loss": 4.3245, + "step": 210300 + }, + { + "epoch": 1.8108643250688705, + "grad_norm": 2.84375, + "learning_rate": 0.00014227322968038853, + "loss": 4.7487, + "step": 210350 + }, + { + "epoch": 1.8112947658402203, + "grad_norm": 2.9375, + "learning_rate": 0.0001422486758457376, + "loss": 4.6411, + "step": 210400 + }, + { + "epoch": 1.81172520661157, + "grad_norm": 6.21875, + "learning_rate": 0.00014222411891012476, + "loss": 4.3358, + "step": 210450 + }, + { + "epoch": 1.8121556473829201, + "grad_norm": 2.578125, + "learning_rate": 0.0001421995588753525, + "loss": 4.1035, + "step": 210500 + }, + { + "epoch": 1.8125860881542701, + "grad_norm": 3.40625, + "learning_rate": 0.00014217499574322335, + "loss": 4.4316, + "step": 210550 + }, + { + "epoch": 1.81301652892562, + "grad_norm": 3.015625, + "learning_rate": 0.00014215042951554028, + "loss": 4.356, + "step": 210600 + }, + { + "epoch": 1.8134469696969697, + "grad_norm": 3.578125, + "learning_rate": 0.00014212586019410637, + "loss": 4.3285, + "step": 210650 + }, + { + "epoch": 1.8138774104683195, + "grad_norm": 4.28125, + "learning_rate": 0.00014210128778072492, + "loss": 4.4144, + "step": 210700 + }, + { + "epoch": 1.8143078512396693, + "grad_norm": 4.21875, + "learning_rate": 0.00014207671227719958, + "loss": 4.4266, + "step": 210750 + }, + { + "epoch": 1.8147382920110193, + "grad_norm": 3.46875, + "learning_rate": 0.00014205213368533408, + "loss": 4.7974, + "step": 210800 + }, + { + "epoch": 1.8151687327823691, + "grad_norm": 2.90625, + "learning_rate": 0.00014202755200693246, + "loss": 4.4121, + "step": 210850 + }, + { + "epoch": 1.8155991735537191, + "grad_norm": 3.0625, + "learning_rate": 0.0001420029672437989, + "loss": 4.5135, + "step": 210900 + }, + { + "epoch": 1.816029614325069, + "grad_norm": 3.546875, + "learning_rate": 0.00014197837939773796, + "loss": 4.49, + "step": 210950 + }, + { + "epoch": 1.8164600550964187, + "grad_norm": 1.7890625, + "learning_rate": 0.00014195378847055428, + "loss": 4.3935, + "step": 211000 + }, + { + "epoch": 1.8168904958677685, + "grad_norm": 2.9375, + "learning_rate": 0.00014192919446405275, + "loss": 4.6175, + "step": 211050 + }, + { + "epoch": 1.8173209366391183, + "grad_norm": 3.203125, + "learning_rate": 0.0001419045973800386, + "loss": 4.268, + "step": 211100 + }, + { + "epoch": 1.8177513774104683, + "grad_norm": 2.34375, + "learning_rate": 0.00014187999722031717, + "loss": 4.5076, + "step": 211150 + }, + { + "epoch": 1.8181818181818183, + "grad_norm": 2.515625, + "learning_rate": 0.00014185539398669402, + "loss": 4.4933, + "step": 211200 + }, + { + "epoch": 1.8186122589531681, + "grad_norm": 3.125, + "learning_rate": 0.00014183078768097504, + "loss": 4.7157, + "step": 211250 + }, + { + "epoch": 1.819042699724518, + "grad_norm": 3.296875, + "learning_rate": 0.00014180617830496624, + "loss": 4.4672, + "step": 211300 + }, + { + "epoch": 1.8194731404958677, + "grad_norm": 1.21875, + "learning_rate": 0.00014178156586047385, + "loss": 4.1881, + "step": 211350 + }, + { + "epoch": 1.8199035812672175, + "grad_norm": 2.796875, + "learning_rate": 0.0001417569503493045, + "loss": 4.6438, + "step": 211400 + }, + { + "epoch": 1.8203340220385675, + "grad_norm": 2.578125, + "learning_rate": 0.0001417323317732648, + "loss": 4.2103, + "step": 211450 + }, + { + "epoch": 1.8207644628099173, + "grad_norm": 3.171875, + "learning_rate": 0.00014170771013416174, + "loss": 4.0585, + "step": 211500 + }, + { + "epoch": 1.8211949035812673, + "grad_norm": 3.25, + "learning_rate": 0.00014168308543380245, + "loss": 4.6537, + "step": 211550 + }, + { + "epoch": 1.8216253443526171, + "grad_norm": 2.765625, + "learning_rate": 0.00014165845767399442, + "loss": 4.6123, + "step": 211600 + }, + { + "epoch": 1.822055785123967, + "grad_norm": 3.046875, + "learning_rate": 0.00014163382685654523, + "loss": 4.3003, + "step": 211650 + }, + { + "epoch": 1.8224862258953167, + "grad_norm": 3.65625, + "learning_rate": 0.0001416091929832627, + "loss": 4.2193, + "step": 211700 + }, + { + "epoch": 1.8229166666666665, + "grad_norm": 3.109375, + "learning_rate": 0.00014158455605595496, + "loss": 4.4595, + "step": 211750 + }, + { + "epoch": 1.8233471074380165, + "grad_norm": 1.5859375, + "learning_rate": 0.0001415599160764302, + "loss": 4.1979, + "step": 211800 + }, + { + "epoch": 1.8237775482093666, + "grad_norm": 3.59375, + "learning_rate": 0.00014153527304649712, + "loss": 4.7068, + "step": 211850 + }, + { + "epoch": 1.8242079889807163, + "grad_norm": 3.28125, + "learning_rate": 0.00014151062696796433, + "loss": 4.1244, + "step": 211900 + }, + { + "epoch": 1.8246384297520661, + "grad_norm": 2.203125, + "learning_rate": 0.00014148597784264082, + "loss": 4.2694, + "step": 211950 + }, + { + "epoch": 1.825068870523416, + "grad_norm": 2.90625, + "learning_rate": 0.0001414613256723358, + "loss": 4.365, + "step": 212000 + }, + { + "epoch": 1.8254993112947657, + "grad_norm": 3.0625, + "learning_rate": 0.0001414366704588587, + "loss": 4.5256, + "step": 212050 + }, + { + "epoch": 1.8259297520661157, + "grad_norm": 3.953125, + "learning_rate": 0.0001414120122040191, + "loss": 4.5772, + "step": 212100 + }, + { + "epoch": 1.8263601928374655, + "grad_norm": 4.4375, + "learning_rate": 0.00014138735090962694, + "loss": 4.6309, + "step": 212150 + }, + { + "epoch": 1.8267906336088156, + "grad_norm": 2.734375, + "learning_rate": 0.00014136268657749225, + "loss": 4.134, + "step": 212200 + }, + { + "epoch": 1.8272210743801653, + "grad_norm": 2.546875, + "learning_rate": 0.00014133801920942536, + "loss": 4.503, + "step": 212250 + }, + { + "epoch": 1.8276515151515151, + "grad_norm": 3.375, + "learning_rate": 0.00014131334880723682, + "loss": 4.6735, + "step": 212300 + }, + { + "epoch": 1.828081955922865, + "grad_norm": 3.453125, + "learning_rate": 0.00014128867537273736, + "loss": 4.6235, + "step": 212350 + }, + { + "epoch": 1.8285123966942147, + "grad_norm": 3.484375, + "learning_rate": 0.00014126399890773797, + "loss": 4.3432, + "step": 212400 + }, + { + "epoch": 1.8289428374655647, + "grad_norm": 1.6953125, + "learning_rate": 0.00014123931941404982, + "loss": 4.8241, + "step": 212450 + }, + { + "epoch": 1.8293732782369148, + "grad_norm": 2.890625, + "learning_rate": 0.00014121463689348434, + "loss": 4.5459, + "step": 212500 + }, + { + "epoch": 1.8298037190082646, + "grad_norm": 4.0625, + "learning_rate": 0.00014118995134785326, + "loss": 4.1721, + "step": 212550 + }, + { + "epoch": 1.8302341597796143, + "grad_norm": 4.5625, + "learning_rate": 0.0001411652627789683, + "loss": 4.4798, + "step": 212600 + }, + { + "epoch": 1.8306646005509641, + "grad_norm": 2.84375, + "learning_rate": 0.00014114057118864163, + "loss": 4.7275, + "step": 212650 + }, + { + "epoch": 1.831095041322314, + "grad_norm": 2.296875, + "learning_rate": 0.00014111587657868557, + "loss": 4.4862, + "step": 212700 + }, + { + "epoch": 1.831525482093664, + "grad_norm": 2.765625, + "learning_rate": 0.0001410911789509126, + "loss": 4.8968, + "step": 212750 + }, + { + "epoch": 1.8319559228650137, + "grad_norm": 4.25, + "learning_rate": 0.00014106647830713556, + "loss": 4.4673, + "step": 212800 + }, + { + "epoch": 1.8323863636363638, + "grad_norm": 3.109375, + "learning_rate": 0.00014104177464916736, + "loss": 4.5319, + "step": 212850 + }, + { + "epoch": 1.8328168044077136, + "grad_norm": 1.5078125, + "learning_rate": 0.0001410170679788212, + "loss": 4.4805, + "step": 212900 + }, + { + "epoch": 1.8332472451790633, + "grad_norm": 4.25, + "learning_rate": 0.00014099235829791046, + "loss": 4.4092, + "step": 212950 + }, + { + "epoch": 1.8336776859504131, + "grad_norm": 3.265625, + "learning_rate": 0.00014096764560824886, + "loss": 4.5109, + "step": 213000 + }, + { + "epoch": 1.8336776859504131, + "eval_loss": 5.0560622215271, + "eval_runtime": 21.7274, + "eval_samples_per_second": 29.456, + "eval_steps_per_second": 14.728, + "eval_tts_loss": 7.280787373036879, + "step": 213000 + }, + { + "epoch": 1.834108126721763, + "grad_norm": 2.34375, + "learning_rate": 0.0001409429299116502, + "loss": 4.5675, + "step": 213050 + }, + { + "epoch": 1.834538567493113, + "grad_norm": 2.734375, + "learning_rate": 0.00014091821120992858, + "loss": 4.4511, + "step": 213100 + }, + { + "epoch": 1.834969008264463, + "grad_norm": 3.546875, + "learning_rate": 0.00014089348950489827, + "loss": 4.7129, + "step": 213150 + }, + { + "epoch": 1.8353994490358128, + "grad_norm": 1.7578125, + "learning_rate": 0.00014086876479837388, + "loss": 4.5207, + "step": 213200 + }, + { + "epoch": 1.8358298898071626, + "grad_norm": 1.9921875, + "learning_rate": 0.00014084403709217002, + "loss": 4.2102, + "step": 213250 + }, + { + "epoch": 1.8362603305785123, + "grad_norm": 4.375, + "learning_rate": 0.00014081930638810175, + "loss": 4.3774, + "step": 213300 + }, + { + "epoch": 1.8366907713498621, + "grad_norm": 1.4921875, + "learning_rate": 0.0001407945726879842, + "loss": 4.7277, + "step": 213350 + }, + { + "epoch": 1.8371212121212122, + "grad_norm": 0.93359375, + "learning_rate": 0.00014076983599363278, + "loss": 4.7125, + "step": 213400 + }, + { + "epoch": 1.837551652892562, + "grad_norm": 4.09375, + "learning_rate": 0.00014074509630686315, + "loss": 4.5953, + "step": 213450 + }, + { + "epoch": 1.837982093663912, + "grad_norm": 1.625, + "learning_rate": 0.00014072035362949108, + "loss": 4.4593, + "step": 213500 + }, + { + "epoch": 1.8384125344352618, + "grad_norm": 2.03125, + "learning_rate": 0.0001406956079633327, + "loss": 4.235, + "step": 213550 + }, + { + "epoch": 1.8388429752066116, + "grad_norm": 3.34375, + "learning_rate": 0.00014067085931020423, + "loss": 4.6461, + "step": 213600 + }, + { + "epoch": 1.8392734159779613, + "grad_norm": 3.015625, + "learning_rate": 0.00014064610767192215, + "loss": 4.5389, + "step": 213650 + }, + { + "epoch": 1.8397038567493111, + "grad_norm": 4.09375, + "learning_rate": 0.00014062135305030333, + "loss": 4.4978, + "step": 213700 + }, + { + "epoch": 1.8401342975206612, + "grad_norm": 2.515625, + "learning_rate": 0.0001405965954471645, + "loss": 4.0845, + "step": 213750 + }, + { + "epoch": 1.8405647382920112, + "grad_norm": 0.78125, + "learning_rate": 0.00014057183486432294, + "loss": 4.6071, + "step": 213800 + }, + { + "epoch": 1.840995179063361, + "grad_norm": 1.0078125, + "learning_rate": 0.00014054707130359598, + "loss": 4.5696, + "step": 213850 + }, + { + "epoch": 1.8414256198347108, + "grad_norm": 2.390625, + "learning_rate": 0.00014052230476680125, + "loss": 4.5999, + "step": 213900 + }, + { + "epoch": 1.8418560606060606, + "grad_norm": 3.609375, + "learning_rate": 0.00014049753525575653, + "loss": 4.384, + "step": 213950 + }, + { + "epoch": 1.8422865013774103, + "grad_norm": 3.25, + "learning_rate": 0.00014047276277227985, + "loss": 4.5181, + "step": 214000 + }, + { + "epoch": 1.8427169421487604, + "grad_norm": 5.59375, + "learning_rate": 0.00014044798731818948, + "loss": 4.6399, + "step": 214050 + }, + { + "epoch": 1.8431473829201102, + "grad_norm": 4.03125, + "learning_rate": 0.00014042320889530384, + "loss": 4.6073, + "step": 214100 + }, + { + "epoch": 1.8435778236914602, + "grad_norm": 3.03125, + "learning_rate": 0.00014039842750544166, + "loss": 4.4025, + "step": 214150 + }, + { + "epoch": 1.84400826446281, + "grad_norm": 4.25, + "learning_rate": 0.00014037364315042181, + "loss": 4.5702, + "step": 214200 + }, + { + "epoch": 1.8444387052341598, + "grad_norm": 4.6875, + "learning_rate": 0.0001403488558320634, + "loss": 4.6863, + "step": 214250 + }, + { + "epoch": 1.8448691460055096, + "grad_norm": 3.453125, + "learning_rate": 0.0001403240655521858, + "loss": 4.5646, + "step": 214300 + }, + { + "epoch": 1.8452995867768593, + "grad_norm": 3.078125, + "learning_rate": 0.00014029927231260859, + "loss": 4.3433, + "step": 214350 + }, + { + "epoch": 1.8457300275482094, + "grad_norm": 2.203125, + "learning_rate": 0.00014027447611515145, + "loss": 4.3073, + "step": 214400 + }, + { + "epoch": 1.8461604683195594, + "grad_norm": 3.640625, + "learning_rate": 0.00014024967696163443, + "loss": 4.3947, + "step": 214450 + }, + { + "epoch": 1.8465909090909092, + "grad_norm": 3.59375, + "learning_rate": 0.0001402248748538777, + "loss": 4.3048, + "step": 214500 + }, + { + "epoch": 1.847021349862259, + "grad_norm": 2.09375, + "learning_rate": 0.00014020006979370174, + "loss": 4.4919, + "step": 214550 + }, + { + "epoch": 1.8474517906336088, + "grad_norm": 3.40625, + "learning_rate": 0.00014017526178292713, + "loss": 4.3821, + "step": 214600 + }, + { + "epoch": 1.8478822314049586, + "grad_norm": 2.765625, + "learning_rate": 0.00014015045082337472, + "loss": 4.3122, + "step": 214650 + }, + { + "epoch": 1.8483126721763086, + "grad_norm": 1.9140625, + "learning_rate": 0.00014012563691686563, + "loss": 4.1348, + "step": 214700 + }, + { + "epoch": 1.8487431129476584, + "grad_norm": 2.46875, + "learning_rate": 0.00014010082006522115, + "loss": 4.1318, + "step": 214750 + }, + { + "epoch": 1.8491735537190084, + "grad_norm": 2.984375, + "learning_rate": 0.0001400760002702627, + "loss": 4.6662, + "step": 214800 + }, + { + "epoch": 1.8496039944903582, + "grad_norm": 3.09375, + "learning_rate": 0.0001400511775338121, + "loss": 4.787, + "step": 214850 + }, + { + "epoch": 1.850034435261708, + "grad_norm": 2.359375, + "learning_rate": 0.00014002635185769122, + "loss": 4.3495, + "step": 214900 + }, + { + "epoch": 1.8504648760330578, + "grad_norm": 2.75, + "learning_rate": 0.00014000152324372223, + "loss": 4.5445, + "step": 214950 + }, + { + "epoch": 1.8508953168044076, + "grad_norm": 3.15625, + "learning_rate": 0.0001399766916937275, + "loss": 4.73, + "step": 215000 + }, + { + "epoch": 1.8513257575757576, + "grad_norm": 3.75, + "learning_rate": 0.00013995185720952963, + "loss": 4.316, + "step": 215050 + }, + { + "epoch": 1.8517561983471076, + "grad_norm": 3.234375, + "learning_rate": 0.0001399270197929514, + "loss": 4.7898, + "step": 215100 + }, + { + "epoch": 1.8521866391184574, + "grad_norm": 2.890625, + "learning_rate": 0.0001399021794458158, + "loss": 4.731, + "step": 215150 + }, + { + "epoch": 1.8526170798898072, + "grad_norm": 1.6640625, + "learning_rate": 0.00013987733616994608, + "loss": 4.2848, + "step": 215200 + }, + { + "epoch": 1.853047520661157, + "grad_norm": 1.9140625, + "learning_rate": 0.0001398524899671657, + "loss": 4.3253, + "step": 215250 + }, + { + "epoch": 1.8534779614325068, + "grad_norm": 3.109375, + "learning_rate": 0.00013982764083929832, + "loss": 4.6984, + "step": 215300 + }, + { + "epoch": 1.8539084022038568, + "grad_norm": 6.65625, + "learning_rate": 0.00013980278878816775, + "loss": 4.2792, + "step": 215350 + }, + { + "epoch": 1.8543388429752066, + "grad_norm": 2.984375, + "learning_rate": 0.00013977793381559814, + "loss": 4.4643, + "step": 215400 + }, + { + "epoch": 1.8547692837465566, + "grad_norm": 2.734375, + "learning_rate": 0.00013975307592341377, + "loss": 4.3965, + "step": 215450 + }, + { + "epoch": 1.8551997245179064, + "grad_norm": 3.65625, + "learning_rate": 0.00013972821511343917, + "loss": 4.3163, + "step": 215500 + }, + { + "epoch": 1.8556301652892562, + "grad_norm": 4.75, + "learning_rate": 0.00013970335138749904, + "loss": 4.5453, + "step": 215550 + }, + { + "epoch": 1.856060606060606, + "grad_norm": 3.515625, + "learning_rate": 0.00013967848474741838, + "loss": 4.3151, + "step": 215600 + }, + { + "epoch": 1.8564910468319558, + "grad_norm": 2.96875, + "learning_rate": 0.00013965361519502226, + "loss": 4.1963, + "step": 215650 + }, + { + "epoch": 1.8569214876033058, + "grad_norm": 3.203125, + "learning_rate": 0.00013962874273213611, + "loss": 4.1927, + "step": 215700 + }, + { + "epoch": 1.8573519283746558, + "grad_norm": 4.125, + "learning_rate": 0.00013960386736058553, + "loss": 4.1732, + "step": 215750 + }, + { + "epoch": 1.8577823691460056, + "grad_norm": 1.15625, + "learning_rate": 0.00013957898908219628, + "loss": 4.1553, + "step": 215800 + }, + { + "epoch": 1.8582128099173554, + "grad_norm": 1.8125, + "learning_rate": 0.0001395541078987944, + "loss": 4.4413, + "step": 215850 + }, + { + "epoch": 1.8586432506887052, + "grad_norm": 3.96875, + "learning_rate": 0.0001395292238122061, + "loss": 4.4824, + "step": 215900 + }, + { + "epoch": 1.859073691460055, + "grad_norm": 2.640625, + "learning_rate": 0.00013950433682425781, + "loss": 4.5343, + "step": 215950 + }, + { + "epoch": 1.859504132231405, + "grad_norm": 1.640625, + "learning_rate": 0.00013947944693677621, + "loss": 4.6324, + "step": 216000 + }, + { + "epoch": 1.859504132231405, + "eval_loss": 5.0532026290893555, + "eval_runtime": 22.153, + "eval_samples_per_second": 28.89, + "eval_steps_per_second": 14.445, + "eval_tts_loss": 7.206952735664344, + "step": 216000 + }, + { + "epoch": 1.8599345730027548, + "grad_norm": 1.3671875, + "learning_rate": 0.00013945455415158813, + "loss": 4.1854, + "step": 216050 + }, + { + "epoch": 1.8603650137741048, + "grad_norm": 2.25, + "learning_rate": 0.00013942965847052067, + "loss": 4.4626, + "step": 216100 + }, + { + "epoch": 1.8607954545454546, + "grad_norm": 3.109375, + "learning_rate": 0.00013940475989540114, + "loss": 4.649, + "step": 216150 + }, + { + "epoch": 1.8612258953168044, + "grad_norm": 2.46875, + "learning_rate": 0.00013937985842805696, + "loss": 4.3841, + "step": 216200 + }, + { + "epoch": 1.8616563360881542, + "grad_norm": 3.03125, + "learning_rate": 0.00013935495407031592, + "loss": 4.8282, + "step": 216250 + }, + { + "epoch": 1.862086776859504, + "grad_norm": 3.390625, + "learning_rate": 0.00013933004682400597, + "loss": 4.7881, + "step": 216300 + }, + { + "epoch": 1.862517217630854, + "grad_norm": 0.7578125, + "learning_rate": 0.00013930513669095515, + "loss": 4.0291, + "step": 216350 + }, + { + "epoch": 1.862947658402204, + "grad_norm": 4.375, + "learning_rate": 0.0001392802236729919, + "loss": 4.6901, + "step": 216400 + }, + { + "epoch": 1.8633780991735538, + "grad_norm": 3.28125, + "learning_rate": 0.00013925530777194472, + "loss": 4.3857, + "step": 216450 + }, + { + "epoch": 1.8638085399449036, + "grad_norm": 3.625, + "learning_rate": 0.00013923038898964242, + "loss": 4.4497, + "step": 216500 + }, + { + "epoch": 1.8642389807162534, + "grad_norm": 3.296875, + "learning_rate": 0.00013920546732791396, + "loss": 4.2481, + "step": 216550 + }, + { + "epoch": 1.8646694214876032, + "grad_norm": 3.140625, + "learning_rate": 0.00013918054278858858, + "loss": 4.3764, + "step": 216600 + }, + { + "epoch": 1.8650998622589532, + "grad_norm": 2.453125, + "learning_rate": 0.00013915561537349564, + "loss": 4.5323, + "step": 216650 + }, + { + "epoch": 1.865530303030303, + "grad_norm": 5.46875, + "learning_rate": 0.00013913068508446478, + "loss": 4.3292, + "step": 216700 + }, + { + "epoch": 1.865960743801653, + "grad_norm": 1.1796875, + "learning_rate": 0.0001391057519233258, + "loss": 4.1943, + "step": 216750 + }, + { + "epoch": 1.8663911845730028, + "grad_norm": 3.15625, + "learning_rate": 0.00013908081589190883, + "loss": 4.6298, + "step": 216800 + }, + { + "epoch": 1.8668216253443526, + "grad_norm": 3.546875, + "learning_rate": 0.000139055876992044, + "loss": 4.5038, + "step": 216850 + }, + { + "epoch": 1.8672520661157024, + "grad_norm": 2.21875, + "learning_rate": 0.00013903093522556187, + "loss": 4.3473, + "step": 216900 + }, + { + "epoch": 1.8676825068870524, + "grad_norm": 2.890625, + "learning_rate": 0.00013900599059429305, + "loss": 4.7651, + "step": 216950 + }, + { + "epoch": 1.8681129476584022, + "grad_norm": 2.65625, + "learning_rate": 0.00013898104310006843, + "loss": 4.7159, + "step": 217000 + }, + { + "epoch": 1.8685433884297522, + "grad_norm": 2.671875, + "learning_rate": 0.00013895609274471913, + "loss": 4.0896, + "step": 217050 + }, + { + "epoch": 1.868973829201102, + "grad_norm": 2.953125, + "learning_rate": 0.00013893113953007645, + "loss": 4.5924, + "step": 217100 + }, + { + "epoch": 1.8694042699724518, + "grad_norm": 2.40625, + "learning_rate": 0.0001389061834579719, + "loss": 4.5477, + "step": 217150 + }, + { + "epoch": 1.8698347107438016, + "grad_norm": 2.609375, + "learning_rate": 0.00013888122453023716, + "loss": 4.7615, + "step": 217200 + }, + { + "epoch": 1.8702651515151514, + "grad_norm": 1.984375, + "learning_rate": 0.0001388562627487042, + "loss": 4.369, + "step": 217250 + }, + { + "epoch": 1.8706955922865014, + "grad_norm": 2.21875, + "learning_rate": 0.0001388312981152052, + "loss": 4.5305, + "step": 217300 + }, + { + "epoch": 1.8711260330578512, + "grad_norm": 2.0, + "learning_rate": 0.0001388063306315724, + "loss": 4.477, + "step": 217350 + }, + { + "epoch": 1.8715564738292012, + "grad_norm": 2.4375, + "learning_rate": 0.0001387813602996385, + "loss": 4.3883, + "step": 217400 + }, + { + "epoch": 1.871986914600551, + "grad_norm": 2.921875, + "learning_rate": 0.0001387563871212361, + "loss": 4.6034, + "step": 217450 + }, + { + "epoch": 1.8724173553719008, + "grad_norm": 3.296875, + "learning_rate": 0.00013873141109819833, + "loss": 4.3873, + "step": 217500 + }, + { + "epoch": 1.8728477961432506, + "grad_norm": 2.25, + "learning_rate": 0.00013870643223235833, + "loss": 4.765, + "step": 217550 + }, + { + "epoch": 1.8732782369146006, + "grad_norm": 2.1875, + "learning_rate": 0.00013868145052554947, + "loss": 4.6269, + "step": 217600 + }, + { + "epoch": 1.8737086776859504, + "grad_norm": 2.65625, + "learning_rate": 0.00013865646597960535, + "loss": 4.395, + "step": 217650 + }, + { + "epoch": 1.8741391184573004, + "grad_norm": 1.890625, + "learning_rate": 0.00013863147859635984, + "loss": 4.3026, + "step": 217700 + }, + { + "epoch": 1.8745695592286502, + "grad_norm": 3.90625, + "learning_rate": 0.00013860648837764686, + "loss": 4.5575, + "step": 217750 + }, + { + "epoch": 1.875, + "grad_norm": 1.7265625, + "learning_rate": 0.00013858149532530072, + "loss": 4.3687, + "step": 217800 + }, + { + "epoch": 1.8754304407713498, + "grad_norm": 2.328125, + "learning_rate": 0.00013855649944115585, + "loss": 4.2108, + "step": 217850 + }, + { + "epoch": 1.8758608815426996, + "grad_norm": 2.546875, + "learning_rate": 0.00013853150072704685, + "loss": 4.2543, + "step": 217900 + }, + { + "epoch": 1.8762913223140496, + "grad_norm": 2.265625, + "learning_rate": 0.00013850649918480863, + "loss": 4.3372, + "step": 217950 + }, + { + "epoch": 1.8767217630853994, + "grad_norm": 2.421875, + "learning_rate": 0.0001384814948162762, + "loss": 4.7017, + "step": 218000 + }, + { + "epoch": 1.8771522038567494, + "grad_norm": 2.78125, + "learning_rate": 0.00013845648762328487, + "loss": 4.6242, + "step": 218050 + }, + { + "epoch": 1.8775826446280992, + "grad_norm": 3.359375, + "learning_rate": 0.00013843147760767005, + "loss": 4.213, + "step": 218100 + }, + { + "epoch": 1.878013085399449, + "grad_norm": 3.109375, + "learning_rate": 0.0001384064647712675, + "loss": 4.5054, + "step": 218150 + }, + { + "epoch": 1.8784435261707988, + "grad_norm": 2.953125, + "learning_rate": 0.00013838144911591305, + "loss": 4.4854, + "step": 218200 + }, + { + "epoch": 1.8788739669421488, + "grad_norm": 3.96875, + "learning_rate": 0.0001383564306434428, + "loss": 4.0335, + "step": 218250 + }, + { + "epoch": 1.8793044077134986, + "grad_norm": 4.28125, + "learning_rate": 0.0001383314093556931, + "loss": 4.4872, + "step": 218300 + }, + { + "epoch": 1.8797348484848486, + "grad_norm": 2.0, + "learning_rate": 0.00013830638525450038, + "loss": 4.3535, + "step": 218350 + }, + { + "epoch": 1.8801652892561984, + "grad_norm": 1.578125, + "learning_rate": 0.00013828135834170144, + "loss": 4.3971, + "step": 218400 + }, + { + "epoch": 1.8805957300275482, + "grad_norm": 2.3125, + "learning_rate": 0.00013825632861913317, + "loss": 4.0632, + "step": 218450 + }, + { + "epoch": 1.881026170798898, + "grad_norm": 3.46875, + "learning_rate": 0.00013823129608863267, + "loss": 4.2979, + "step": 218500 + }, + { + "epoch": 1.8814566115702478, + "grad_norm": 2.828125, + "learning_rate": 0.00013820626075203727, + "loss": 4.8421, + "step": 218550 + }, + { + "epoch": 1.8818870523415978, + "grad_norm": 2.9375, + "learning_rate": 0.00013818122261118458, + "loss": 4.5021, + "step": 218600 + }, + { + "epoch": 1.8823174931129476, + "grad_norm": 3.140625, + "learning_rate": 0.00013815618166791228, + "loss": 4.3414, + "step": 218650 + }, + { + "epoch": 1.8827479338842976, + "grad_norm": 2.140625, + "learning_rate": 0.00013813113792405835, + "loss": 4.5276, + "step": 218700 + }, + { + "epoch": 1.8831783746556474, + "grad_norm": 3.765625, + "learning_rate": 0.00013810609138146092, + "loss": 4.3439, + "step": 218750 + }, + { + "epoch": 1.8836088154269972, + "grad_norm": 3.78125, + "learning_rate": 0.00013808104204195834, + "loss": 4.6214, + "step": 218800 + }, + { + "epoch": 1.884039256198347, + "grad_norm": 3.109375, + "learning_rate": 0.00013805598990738926, + "loss": 4.6648, + "step": 218850 + }, + { + "epoch": 1.884469696969697, + "grad_norm": 2.609375, + "learning_rate": 0.0001380309349795924, + "loss": 4.4938, + "step": 218900 + }, + { + "epoch": 1.8849001377410468, + "grad_norm": 3.765625, + "learning_rate": 0.0001380058772604067, + "loss": 4.5276, + "step": 218950 + }, + { + "epoch": 1.8853305785123968, + "grad_norm": 2.921875, + "learning_rate": 0.0001379808167516714, + "loss": 4.408, + "step": 219000 + }, + { + "epoch": 1.8853305785123968, + "eval_loss": 5.048763275146484, + "eval_runtime": 21.6468, + "eval_samples_per_second": 29.566, + "eval_steps_per_second": 14.783, + "eval_tts_loss": 7.257337959361985, + "step": 219000 + }, + { + "epoch": 1.8857610192837466, + "grad_norm": 1.3984375, + "learning_rate": 0.00013795575345522588, + "loss": 4.0965, + "step": 219050 + }, + { + "epoch": 1.8861914600550964, + "grad_norm": 2.34375, + "learning_rate": 0.00013793068737290973, + "loss": 4.3708, + "step": 219100 + }, + { + "epoch": 1.8866219008264462, + "grad_norm": 2.65625, + "learning_rate": 0.00013790561850656269, + "loss": 4.6446, + "step": 219150 + }, + { + "epoch": 1.887052341597796, + "grad_norm": 1.8359375, + "learning_rate": 0.00013788054685802487, + "loss": 4.6007, + "step": 219200 + }, + { + "epoch": 1.887482782369146, + "grad_norm": 3.890625, + "learning_rate": 0.00013785547242913638, + "loss": 4.075, + "step": 219250 + }, + { + "epoch": 1.887913223140496, + "grad_norm": 1.015625, + "learning_rate": 0.0001378303952217376, + "loss": 4.1568, + "step": 219300 + }, + { + "epoch": 1.8883436639118458, + "grad_norm": 1.7109375, + "learning_rate": 0.00013780531523766931, + "loss": 4.7238, + "step": 219350 + }, + { + "epoch": 1.8887741046831956, + "grad_norm": 2.28125, + "learning_rate": 0.00013778023247877215, + "loss": 4.2892, + "step": 219400 + }, + { + "epoch": 1.8892045454545454, + "grad_norm": 1.625, + "learning_rate": 0.00013775514694688726, + "loss": 4.0651, + "step": 219450 + }, + { + "epoch": 1.8896349862258952, + "grad_norm": 4.0, + "learning_rate": 0.00013773005864385577, + "loss": 4.6462, + "step": 219500 + }, + { + "epoch": 1.8900654269972452, + "grad_norm": 2.734375, + "learning_rate": 0.0001377049675715192, + "loss": 3.9879, + "step": 219550 + }, + { + "epoch": 1.890495867768595, + "grad_norm": 2.6875, + "learning_rate": 0.00013767987373171914, + "loss": 4.7724, + "step": 219600 + }, + { + "epoch": 1.890926308539945, + "grad_norm": 2.8125, + "learning_rate": 0.00013765477712629736, + "loss": 4.7653, + "step": 219650 + }, + { + "epoch": 1.8913567493112948, + "grad_norm": 4.0625, + "learning_rate": 0.000137629677757096, + "loss": 4.3554, + "step": 219700 + }, + { + "epoch": 1.8917871900826446, + "grad_norm": 4.0625, + "learning_rate": 0.00013760457562595724, + "loss": 4.0532, + "step": 219750 + }, + { + "epoch": 1.8922176308539944, + "grad_norm": 3.546875, + "learning_rate": 0.00013757947073472353, + "loss": 4.4743, + "step": 219800 + }, + { + "epoch": 1.8926480716253442, + "grad_norm": 2.296875, + "learning_rate": 0.00013755436308523754, + "loss": 4.5887, + "step": 219850 + }, + { + "epoch": 1.8930785123966942, + "grad_norm": 2.515625, + "learning_rate": 0.00013752925267934208, + "loss": 4.2288, + "step": 219900 + }, + { + "epoch": 1.8935089531680442, + "grad_norm": 1.3671875, + "learning_rate": 0.00013750413951888024, + "loss": 4.4342, + "step": 219950 + }, + { + "epoch": 1.893939393939394, + "grad_norm": 3.96875, + "learning_rate": 0.00013747902360569526, + "loss": 4.5701, + "step": 220000 + }, + { + "epoch": 1.8943698347107438, + "grad_norm": 2.859375, + "learning_rate": 0.00013745390494163058, + "loss": 4.4464, + "step": 220050 + }, + { + "epoch": 1.8948002754820936, + "grad_norm": 2.578125, + "learning_rate": 0.00013742878352852983, + "loss": 4.3805, + "step": 220100 + }, + { + "epoch": 1.8952307162534434, + "grad_norm": 2.015625, + "learning_rate": 0.0001374036593682369, + "loss": 4.1921, + "step": 220150 + }, + { + "epoch": 1.8956611570247934, + "grad_norm": 3.984375, + "learning_rate": 0.0001373785324625959, + "loss": 4.7629, + "step": 220200 + }, + { + "epoch": 1.8960915977961432, + "grad_norm": 1.234375, + "learning_rate": 0.00013735340281345102, + "loss": 4.3835, + "step": 220250 + }, + { + "epoch": 1.8965220385674932, + "grad_norm": 2.59375, + "learning_rate": 0.00013732827042264672, + "loss": 4.2564, + "step": 220300 + }, + { + "epoch": 1.896952479338843, + "grad_norm": 3.46875, + "learning_rate": 0.00013730313529202769, + "loss": 4.6334, + "step": 220350 + }, + { + "epoch": 1.8973829201101928, + "grad_norm": 3.796875, + "learning_rate": 0.00013727799742343878, + "loss": 4.1464, + "step": 220400 + }, + { + "epoch": 1.8978133608815426, + "grad_norm": 3.015625, + "learning_rate": 0.00013725285681872508, + "loss": 4.4526, + "step": 220450 + }, + { + "epoch": 1.8982438016528924, + "grad_norm": 3.140625, + "learning_rate": 0.00013722771347973186, + "loss": 4.5367, + "step": 220500 + }, + { + "epoch": 1.8986742424242424, + "grad_norm": 2.59375, + "learning_rate": 0.00013720256740830454, + "loss": 4.0994, + "step": 220550 + }, + { + "epoch": 1.8991046831955924, + "grad_norm": 2.84375, + "learning_rate": 0.0001371774186062888, + "loss": 4.7075, + "step": 220600 + }, + { + "epoch": 1.8995351239669422, + "grad_norm": 2.9375, + "learning_rate": 0.00013715226707553056, + "loss": 4.3286, + "step": 220650 + }, + { + "epoch": 1.899965564738292, + "grad_norm": 3.6875, + "learning_rate": 0.00013712711281787583, + "loss": 4.2165, + "step": 220700 + }, + { + "epoch": 1.9003960055096418, + "grad_norm": 3.0, + "learning_rate": 0.00013710195583517093, + "loss": 3.693, + "step": 220750 + }, + { + "epoch": 1.9008264462809916, + "grad_norm": 2.921875, + "learning_rate": 0.00013707679612926226, + "loss": 4.5443, + "step": 220800 + }, + { + "epoch": 1.9012568870523416, + "grad_norm": 3.578125, + "learning_rate": 0.00013705163370199652, + "loss": 4.3232, + "step": 220850 + }, + { + "epoch": 1.9016873278236914, + "grad_norm": 2.28125, + "learning_rate": 0.00013702646855522064, + "loss": 4.0892, + "step": 220900 + }, + { + "epoch": 1.9021177685950414, + "grad_norm": 2.171875, + "learning_rate": 0.00013700130069078158, + "loss": 4.2569, + "step": 220950 + }, + { + "epoch": 1.9025482093663912, + "grad_norm": 3.0, + "learning_rate": 0.0001369761301105267, + "loss": 4.4526, + "step": 221000 + }, + { + "epoch": 1.902978650137741, + "grad_norm": 3.828125, + "learning_rate": 0.0001369509568163034, + "loss": 4.0168, + "step": 221050 + }, + { + "epoch": 1.9034090909090908, + "grad_norm": 2.953125, + "learning_rate": 0.00013692578080995937, + "loss": 4.5407, + "step": 221100 + }, + { + "epoch": 1.9038395316804406, + "grad_norm": 1.9140625, + "learning_rate": 0.0001369006020933425, + "loss": 4.3903, + "step": 221150 + }, + { + "epoch": 1.9042699724517906, + "grad_norm": 2.796875, + "learning_rate": 0.00013687542066830082, + "loss": 4.6166, + "step": 221200 + }, + { + "epoch": 1.9047004132231407, + "grad_norm": 3.046875, + "learning_rate": 0.0001368502365366826, + "loss": 4.5691, + "step": 221250 + }, + { + "epoch": 1.9051308539944904, + "grad_norm": 3.453125, + "learning_rate": 0.0001368250497003363, + "loss": 4.5418, + "step": 221300 + }, + { + "epoch": 1.9055612947658402, + "grad_norm": 2.84375, + "learning_rate": 0.00013679986016111055, + "loss": 3.9199, + "step": 221350 + }, + { + "epoch": 1.90599173553719, + "grad_norm": 3.40625, + "learning_rate": 0.0001367746679208543, + "loss": 4.613, + "step": 221400 + }, + { + "epoch": 1.9064221763085398, + "grad_norm": 3.0, + "learning_rate": 0.0001367494729814165, + "loss": 4.3766, + "step": 221450 + }, + { + "epoch": 1.9068526170798898, + "grad_norm": 3.0625, + "learning_rate": 0.00013672427534464648, + "loss": 4.7049, + "step": 221500 + }, + { + "epoch": 1.9072830578512396, + "grad_norm": 2.375, + "learning_rate": 0.00013669907501239368, + "loss": 4.6986, + "step": 221550 + }, + { + "epoch": 1.9077134986225897, + "grad_norm": 2.921875, + "learning_rate": 0.0001366738719865077, + "loss": 4.4958, + "step": 221600 + }, + { + "epoch": 1.9081439393939394, + "grad_norm": 2.0, + "learning_rate": 0.00013664866626883845, + "loss": 4.5369, + "step": 221650 + }, + { + "epoch": 1.9085743801652892, + "grad_norm": 1.6171875, + "learning_rate": 0.00013662345786123593, + "loss": 4.4422, + "step": 221700 + }, + { + "epoch": 1.909004820936639, + "grad_norm": 2.71875, + "learning_rate": 0.00013659824676555039, + "loss": 4.3376, + "step": 221750 + }, + { + "epoch": 1.9094352617079888, + "grad_norm": 2.703125, + "learning_rate": 0.0001365730329836323, + "loss": 4.6718, + "step": 221800 + }, + { + "epoch": 1.9098657024793388, + "grad_norm": 2.875, + "learning_rate": 0.00013654781651733225, + "loss": 4.5326, + "step": 221850 + }, + { + "epoch": 1.9102961432506889, + "grad_norm": 2.96875, + "learning_rate": 0.00013652259736850116, + "loss": 4.5264, + "step": 221900 + }, + { + "epoch": 1.9107265840220387, + "grad_norm": 3.078125, + "learning_rate": 0.00013649737553898995, + "loss": 4.2224, + "step": 221950 + }, + { + "epoch": 1.9111570247933884, + "grad_norm": 2.515625, + "learning_rate": 0.00013647215103064993, + "loss": 4.3864, + "step": 222000 + }, + { + "epoch": 1.9111570247933884, + "eval_loss": 5.045414924621582, + "eval_runtime": 22.036, + "eval_samples_per_second": 29.043, + "eval_steps_per_second": 14.522, + "eval_tts_loss": 7.304645935329636, + "step": 222000 + }, + { + "epoch": 1.9115874655647382, + "grad_norm": 3.28125, + "learning_rate": 0.00013644692384533246, + "loss": 4.4359, + "step": 222050 + }, + { + "epoch": 1.912017906336088, + "grad_norm": 4.53125, + "learning_rate": 0.00013642169398488924, + "loss": 4.819, + "step": 222100 + }, + { + "epoch": 1.912448347107438, + "grad_norm": 2.703125, + "learning_rate": 0.00013639646145117203, + "loss": 4.5313, + "step": 222150 + }, + { + "epoch": 1.9128787878787878, + "grad_norm": 2.125, + "learning_rate": 0.00013637122624603285, + "loss": 4.2293, + "step": 222200 + }, + { + "epoch": 1.9133092286501379, + "grad_norm": 3.65625, + "learning_rate": 0.0001363459883713239, + "loss": 4.1539, + "step": 222250 + }, + { + "epoch": 1.9137396694214877, + "grad_norm": 3.15625, + "learning_rate": 0.00013632074782889765, + "loss": 4.4515, + "step": 222300 + }, + { + "epoch": 1.9141701101928374, + "grad_norm": 3.390625, + "learning_rate": 0.00013629550462060662, + "loss": 4.6567, + "step": 222350 + }, + { + "epoch": 1.9146005509641872, + "grad_norm": 3.484375, + "learning_rate": 0.00013627025874830365, + "loss": 4.243, + "step": 222400 + }, + { + "epoch": 1.915030991735537, + "grad_norm": 2.484375, + "learning_rate": 0.00013624501021384174, + "loss": 4.2362, + "step": 222450 + }, + { + "epoch": 1.915461432506887, + "grad_norm": 1.59375, + "learning_rate": 0.00013621975901907405, + "loss": 4.3312, + "step": 222500 + }, + { + "epoch": 1.915891873278237, + "grad_norm": 1.6875, + "learning_rate": 0.000136194505165854, + "loss": 4.2704, + "step": 222550 + }, + { + "epoch": 1.9163223140495869, + "grad_norm": 2.765625, + "learning_rate": 0.0001361692486560351, + "loss": 4.2205, + "step": 222600 + }, + { + "epoch": 1.9167527548209367, + "grad_norm": 4.3125, + "learning_rate": 0.0001361439894914712, + "loss": 4.1049, + "step": 222650 + }, + { + "epoch": 1.9171831955922864, + "grad_norm": 3.171875, + "learning_rate": 0.00013611872767401624, + "loss": 4.3834, + "step": 222700 + }, + { + "epoch": 1.9176136363636362, + "grad_norm": 2.65625, + "learning_rate": 0.00013609346320552437, + "loss": 4.3754, + "step": 222750 + }, + { + "epoch": 1.9180440771349863, + "grad_norm": 3.546875, + "learning_rate": 0.00013606819608785, + "loss": 4.8799, + "step": 222800 + }, + { + "epoch": 1.918474517906336, + "grad_norm": 4.15625, + "learning_rate": 0.00013604292632284757, + "loss": 4.5783, + "step": 222850 + }, + { + "epoch": 1.918904958677686, + "grad_norm": 3.625, + "learning_rate": 0.00013601765391237192, + "loss": 4.1957, + "step": 222900 + }, + { + "epoch": 1.9193353994490359, + "grad_norm": 4.5625, + "learning_rate": 0.00013599237885827798, + "loss": 4.4157, + "step": 222950 + }, + { + "epoch": 1.9197658402203857, + "grad_norm": 2.828125, + "learning_rate": 0.00013596710116242086, + "loss": 4.7131, + "step": 223000 + }, + { + "epoch": 1.9201962809917354, + "grad_norm": 2.421875, + "learning_rate": 0.00013594182082665593, + "loss": 4.839, + "step": 223050 + }, + { + "epoch": 1.9206267217630852, + "grad_norm": 1.7265625, + "learning_rate": 0.00013591653785283863, + "loss": 4.655, + "step": 223100 + }, + { + "epoch": 1.9210571625344353, + "grad_norm": 2.234375, + "learning_rate": 0.00013589125224282476, + "loss": 4.4033, + "step": 223150 + }, + { + "epoch": 1.9214876033057853, + "grad_norm": 4.03125, + "learning_rate": 0.0001358659639984702, + "loss": 4.9092, + "step": 223200 + }, + { + "epoch": 1.921918044077135, + "grad_norm": 2.34375, + "learning_rate": 0.00013584067312163103, + "loss": 4.5733, + "step": 223250 + }, + { + "epoch": 1.9223484848484849, + "grad_norm": 3.015625, + "learning_rate": 0.0001358153796141636, + "loss": 4.4031, + "step": 223300 + }, + { + "epoch": 1.9227789256198347, + "grad_norm": 4.15625, + "learning_rate": 0.00013579008347792433, + "loss": 4.2904, + "step": 223350 + }, + { + "epoch": 1.9232093663911844, + "grad_norm": 3.9375, + "learning_rate": 0.00013576478471476994, + "loss": 4.2628, + "step": 223400 + }, + { + "epoch": 1.9236398071625345, + "grad_norm": 2.578125, + "learning_rate": 0.00013573948332655733, + "loss": 4.1737, + "step": 223450 + }, + { + "epoch": 1.9240702479338843, + "grad_norm": 1.8515625, + "learning_rate": 0.0001357141793151435, + "loss": 4.3229, + "step": 223500 + }, + { + "epoch": 1.9245006887052343, + "grad_norm": 3.8125, + "learning_rate": 0.00013568887268238577, + "loss": 4.4558, + "step": 223550 + }, + { + "epoch": 1.924931129476584, + "grad_norm": 2.890625, + "learning_rate": 0.00013566356343014158, + "loss": 4.7687, + "step": 223600 + }, + { + "epoch": 1.9253615702479339, + "grad_norm": 3.296875, + "learning_rate": 0.00013563825156026857, + "loss": 4.3129, + "step": 223650 + }, + { + "epoch": 1.9257920110192837, + "grad_norm": 1.4921875, + "learning_rate": 0.00013561293707462456, + "loss": 4.2717, + "step": 223700 + }, + { + "epoch": 1.9262224517906334, + "grad_norm": 2.484375, + "learning_rate": 0.0001355876199750676, + "loss": 4.4938, + "step": 223750 + }, + { + "epoch": 1.9266528925619835, + "grad_norm": 1.578125, + "learning_rate": 0.00013556230026345588, + "loss": 4.3552, + "step": 223800 + }, + { + "epoch": 1.9270833333333335, + "grad_norm": 3.03125, + "learning_rate": 0.00013553697794164787, + "loss": 4.5425, + "step": 223850 + }, + { + "epoch": 1.9275137741046833, + "grad_norm": 1.4765625, + "learning_rate": 0.0001355116530115021, + "loss": 4.5694, + "step": 223900 + }, + { + "epoch": 1.927944214876033, + "grad_norm": 2.34375, + "learning_rate": 0.00013548632547487745, + "loss": 4.297, + "step": 223950 + }, + { + "epoch": 1.9283746556473829, + "grad_norm": 2.546875, + "learning_rate": 0.00013546099533363282, + "loss": 4.1554, + "step": 224000 + }, + { + "epoch": 1.9288050964187327, + "grad_norm": 2.171875, + "learning_rate": 0.00013543566258962745, + "loss": 4.0928, + "step": 224050 + }, + { + "epoch": 1.9292355371900827, + "grad_norm": 2.734375, + "learning_rate": 0.0001354103272447207, + "loss": 4.5334, + "step": 224100 + }, + { + "epoch": 1.9296659779614325, + "grad_norm": 2.15625, + "learning_rate": 0.00013538498930077208, + "loss": 4.562, + "step": 224150 + }, + { + "epoch": 1.9300964187327825, + "grad_norm": 1.46875, + "learning_rate": 0.0001353596487596414, + "loss": 4.3581, + "step": 224200 + }, + { + "epoch": 1.9305268595041323, + "grad_norm": 4.09375, + "learning_rate": 0.00013533430562318857, + "loss": 4.2937, + "step": 224250 + }, + { + "epoch": 1.930957300275482, + "grad_norm": 1.765625, + "learning_rate": 0.00013530895989327373, + "loss": 4.7866, + "step": 224300 + }, + { + "epoch": 1.9313877410468319, + "grad_norm": 2.25, + "learning_rate": 0.00013528361157175725, + "loss": 4.1617, + "step": 224350 + }, + { + "epoch": 1.9318181818181817, + "grad_norm": 3.765625, + "learning_rate": 0.00013525826066049953, + "loss": 4.3879, + "step": 224400 + }, + { + "epoch": 1.9322486225895317, + "grad_norm": 3.1875, + "learning_rate": 0.00013523290716136135, + "loss": 4.3734, + "step": 224450 + }, + { + "epoch": 1.9326790633608817, + "grad_norm": 3.65625, + "learning_rate": 0.00013520755107620359, + "loss": 4.5149, + "step": 224500 + }, + { + "epoch": 1.9331095041322315, + "grad_norm": 1.25, + "learning_rate": 0.00013518219240688737, + "loss": 4.6907, + "step": 224550 + }, + { + "epoch": 1.9335399449035813, + "grad_norm": 2.21875, + "learning_rate": 0.0001351568311552739, + "loss": 4.4333, + "step": 224600 + }, + { + "epoch": 1.933970385674931, + "grad_norm": 3.140625, + "learning_rate": 0.00013513146732322466, + "loss": 4.5083, + "step": 224650 + }, + { + "epoch": 1.9344008264462809, + "grad_norm": 2.9375, + "learning_rate": 0.00013510610091260132, + "loss": 4.499, + "step": 224700 + }, + { + "epoch": 1.9348312672176309, + "grad_norm": 2.875, + "learning_rate": 0.0001350807319252657, + "loss": 4.4249, + "step": 224750 + }, + { + "epoch": 1.9352617079889807, + "grad_norm": 2.34375, + "learning_rate": 0.00013505536036307982, + "loss": 4.2964, + "step": 224800 + }, + { + "epoch": 1.9356921487603307, + "grad_norm": 1.7265625, + "learning_rate": 0.00013502998622790593, + "loss": 4.1366, + "step": 224850 + }, + { + "epoch": 1.9361225895316805, + "grad_norm": 3.34375, + "learning_rate": 0.00013500460952160643, + "loss": 4.278, + "step": 224900 + }, + { + "epoch": 1.9365530303030303, + "grad_norm": 3.40625, + "learning_rate": 0.00013497923024604385, + "loss": 4.2314, + "step": 224950 + }, + { + "epoch": 1.93698347107438, + "grad_norm": 4.21875, + "learning_rate": 0.0001349538484030811, + "loss": 4.2852, + "step": 225000 + }, + { + "epoch": 1.93698347107438, + "eval_loss": 5.044693470001221, + "eval_runtime": 21.8144, + "eval_samples_per_second": 29.338, + "eval_steps_per_second": 14.669, + "eval_tts_loss": 7.318111731969122, + "step": 225000 + }, + { + "epoch": 1.9374139118457299, + "grad_norm": 3.78125, + "learning_rate": 0.00013492846399458104, + "loss": 4.4199, + "step": 225050 + }, + { + "epoch": 1.9378443526170799, + "grad_norm": 1.8671875, + "learning_rate": 0.00013490307702240686, + "loss": 4.5072, + "step": 225100 + }, + { + "epoch": 1.93827479338843, + "grad_norm": 1.375, + "learning_rate": 0.00013487768748842197, + "loss": 4.442, + "step": 225150 + }, + { + "epoch": 1.9387052341597797, + "grad_norm": 3.40625, + "learning_rate": 0.00013485229539448985, + "loss": 4.8625, + "step": 225200 + }, + { + "epoch": 1.9391356749311295, + "grad_norm": 2.734375, + "learning_rate": 0.0001348269007424742, + "loss": 4.3945, + "step": 225250 + }, + { + "epoch": 1.9395661157024793, + "grad_norm": 3.09375, + "learning_rate": 0.000134801503534239, + "loss": 3.9488, + "step": 225300 + }, + { + "epoch": 1.939996556473829, + "grad_norm": 4.1875, + "learning_rate": 0.0001347761037716483, + "loss": 4.631, + "step": 225350 + }, + { + "epoch": 1.940426997245179, + "grad_norm": 3.25, + "learning_rate": 0.00013475070145656644, + "loss": 4.5675, + "step": 225400 + }, + { + "epoch": 1.9408574380165289, + "grad_norm": 1.0078125, + "learning_rate": 0.0001347252965908578, + "loss": 3.746, + "step": 225450 + }, + { + "epoch": 1.941287878787879, + "grad_norm": 2.25, + "learning_rate": 0.00013469988917638718, + "loss": 4.2301, + "step": 225500 + }, + { + "epoch": 1.9417183195592287, + "grad_norm": 2.71875, + "learning_rate": 0.0001346744792150193, + "loss": 4.2385, + "step": 225550 + }, + { + "epoch": 1.9421487603305785, + "grad_norm": 7.15625, + "learning_rate": 0.00013464906670861924, + "loss": 4.552, + "step": 225600 + }, + { + "epoch": 1.9425792011019283, + "grad_norm": 3.75, + "learning_rate": 0.00013462365165905227, + "loss": 4.8005, + "step": 225650 + }, + { + "epoch": 1.943009641873278, + "grad_norm": 1.7734375, + "learning_rate": 0.00013459823406818374, + "loss": 4.6385, + "step": 225700 + }, + { + "epoch": 1.943440082644628, + "grad_norm": 3.484375, + "learning_rate": 0.0001345728139378793, + "loss": 4.3224, + "step": 225750 + }, + { + "epoch": 1.943870523415978, + "grad_norm": 3.109375, + "learning_rate": 0.00013454739127000465, + "loss": 4.242, + "step": 225800 + }, + { + "epoch": 1.944300964187328, + "grad_norm": 1.453125, + "learning_rate": 0.00013452196606642583, + "loss": 4.5361, + "step": 225850 + }, + { + "epoch": 1.9447314049586777, + "grad_norm": 3.953125, + "learning_rate": 0.000134496538329009, + "loss": 4.65, + "step": 225900 + }, + { + "epoch": 1.9451618457300275, + "grad_norm": 2.890625, + "learning_rate": 0.00013447110805962046, + "loss": 4.4073, + "step": 225950 + }, + { + "epoch": 1.9455922865013773, + "grad_norm": 2.578125, + "learning_rate": 0.00013444567526012675, + "loss": 4.4841, + "step": 226000 + }, + { + "epoch": 1.9460227272727273, + "grad_norm": 3.125, + "learning_rate": 0.00013442023993239462, + "loss": 4.1725, + "step": 226050 + }, + { + "epoch": 1.946453168044077, + "grad_norm": 3.109375, + "learning_rate": 0.00013439480207829088, + "loss": 4.1709, + "step": 226100 + }, + { + "epoch": 1.946883608815427, + "grad_norm": 3.125, + "learning_rate": 0.00013436936169968272, + "loss": 4.2776, + "step": 226150 + }, + { + "epoch": 1.947314049586777, + "grad_norm": 4.6875, + "learning_rate": 0.00013434391879843733, + "loss": 4.5293, + "step": 226200 + }, + { + "epoch": 1.9477444903581267, + "grad_norm": 3.6875, + "learning_rate": 0.00013431847337642223, + "loss": 4.7208, + "step": 226250 + }, + { + "epoch": 1.9481749311294765, + "grad_norm": 4.625, + "learning_rate": 0.000134293025435505, + "loss": 4.4366, + "step": 226300 + }, + { + "epoch": 1.9486053719008265, + "grad_norm": 3.046875, + "learning_rate": 0.00013426757497755345, + "loss": 4.2549, + "step": 226350 + }, + { + "epoch": 1.9490358126721763, + "grad_norm": 2.71875, + "learning_rate": 0.00013424212200443568, + "loss": 4.5855, + "step": 226400 + }, + { + "epoch": 1.9494662534435263, + "grad_norm": 3.546875, + "learning_rate": 0.0001342166665180198, + "loss": 4.449, + "step": 226450 + }, + { + "epoch": 1.949896694214876, + "grad_norm": 2.75, + "learning_rate": 0.00013419120852017422, + "loss": 4.3388, + "step": 226500 + }, + { + "epoch": 1.950327134986226, + "grad_norm": 4.1875, + "learning_rate": 0.0001341657480127675, + "loss": 4.229, + "step": 226550 + }, + { + "epoch": 1.9507575757575757, + "grad_norm": 1.7890625, + "learning_rate": 0.0001341402849976684, + "loss": 4.3056, + "step": 226600 + }, + { + "epoch": 1.9511880165289255, + "grad_norm": 0.99609375, + "learning_rate": 0.00013411481947674583, + "loss": 4.2969, + "step": 226650 + }, + { + "epoch": 1.9516184573002755, + "grad_norm": 4.09375, + "learning_rate": 0.0001340893514518689, + "loss": 4.2659, + "step": 226700 + }, + { + "epoch": 1.9520488980716253, + "grad_norm": 3.25, + "learning_rate": 0.0001340638809249069, + "loss": 4.3612, + "step": 226750 + }, + { + "epoch": 1.9524793388429753, + "grad_norm": 2.859375, + "learning_rate": 0.00013403840789772938, + "loss": 4.516, + "step": 226800 + }, + { + "epoch": 1.952909779614325, + "grad_norm": 6.53125, + "learning_rate": 0.00013401293237220592, + "loss": 4.211, + "step": 226850 + }, + { + "epoch": 1.953340220385675, + "grad_norm": 3.921875, + "learning_rate": 0.0001339874543502064, + "loss": 4.338, + "step": 226900 + }, + { + "epoch": 1.9537706611570247, + "grad_norm": 2.703125, + "learning_rate": 0.0001339619738336009, + "loss": 4.6376, + "step": 226950 + }, + { + "epoch": 1.9542011019283747, + "grad_norm": 2.4375, + "learning_rate": 0.00013393649082425955, + "loss": 4.4115, + "step": 227000 + }, + { + "epoch": 1.9546315426997245, + "grad_norm": 3.046875, + "learning_rate": 0.0001339110053240528, + "loss": 4.6546, + "step": 227050 + }, + { + "epoch": 1.9550619834710745, + "grad_norm": 1.2578125, + "learning_rate": 0.0001338855173348512, + "loss": 4.6077, + "step": 227100 + }, + { + "epoch": 1.9554924242424243, + "grad_norm": 4.09375, + "learning_rate": 0.00013386002685852557, + "loss": 4.2572, + "step": 227150 + }, + { + "epoch": 1.955922865013774, + "grad_norm": 2.375, + "learning_rate": 0.0001338345338969468, + "loss": 4.1396, + "step": 227200 + }, + { + "epoch": 1.956353305785124, + "grad_norm": 1.734375, + "learning_rate": 0.00013380903845198607, + "loss": 4.3685, + "step": 227250 + }, + { + "epoch": 1.9567837465564737, + "grad_norm": 1.8125, + "learning_rate": 0.00013378354052551462, + "loss": 4.4616, + "step": 227300 + }, + { + "epoch": 1.9572141873278237, + "grad_norm": 4.28125, + "learning_rate": 0.00013375804011940402, + "loss": 4.7125, + "step": 227350 + }, + { + "epoch": 1.9576446280991735, + "grad_norm": 3.09375, + "learning_rate": 0.00013373253723552588, + "loss": 4.434, + "step": 227400 + }, + { + "epoch": 1.9580750688705235, + "grad_norm": 5.46875, + "learning_rate": 0.0001337070318757521, + "loss": 4.1449, + "step": 227450 + }, + { + "epoch": 1.9585055096418733, + "grad_norm": 3.515625, + "learning_rate": 0.00013368152404195466, + "loss": 4.6336, + "step": 227500 + }, + { + "epoch": 1.958935950413223, + "grad_norm": 2.484375, + "learning_rate": 0.00013365601373600586, + "loss": 4.3233, + "step": 227550 + }, + { + "epoch": 1.959366391184573, + "grad_norm": 2.078125, + "learning_rate": 0.00013363050095977806, + "loss": 4.2766, + "step": 227600 + }, + { + "epoch": 1.959796831955923, + "grad_norm": 2.71875, + "learning_rate": 0.00013360498571514384, + "loss": 4.3526, + "step": 227650 + }, + { + "epoch": 1.9602272727272727, + "grad_norm": 2.078125, + "learning_rate": 0.000133579468003976, + "loss": 4.6202, + "step": 227700 + }, + { + "epoch": 1.9606577134986227, + "grad_norm": 2.1875, + "learning_rate": 0.00013355394782814742, + "loss": 4.5862, + "step": 227750 + }, + { + "epoch": 1.9610881542699725, + "grad_norm": 2.765625, + "learning_rate": 0.0001335284251895313, + "loss": 4.2518, + "step": 227800 + }, + { + "epoch": 1.9615185950413223, + "grad_norm": 2.375, + "learning_rate": 0.00013350290009000087, + "loss": 4.0589, + "step": 227850 + }, + { + "epoch": 1.961949035812672, + "grad_norm": 1.4453125, + "learning_rate": 0.00013347737253142968, + "loss": 4.4233, + "step": 227900 + }, + { + "epoch": 1.962379476584022, + "grad_norm": 3.109375, + "learning_rate": 0.0001334518425156914, + "loss": 4.3156, + "step": 227950 + }, + { + "epoch": 1.962809917355372, + "grad_norm": 2.8125, + "learning_rate": 0.00013342631004465978, + "loss": 4.3603, + "step": 228000 + }, + { + "epoch": 1.962809917355372, + "eval_loss": 5.03814697265625, + "eval_runtime": 21.7978, + "eval_samples_per_second": 29.361, + "eval_steps_per_second": 14.68, + "eval_tts_loss": 7.288137049212746, + "step": 228000 + }, + { + "epoch": 1.963240358126722, + "grad_norm": 2.6875, + "learning_rate": 0.00013340077512020897, + "loss": 4.3022, + "step": 228050 + }, + { + "epoch": 1.9636707988980717, + "grad_norm": 2.515625, + "learning_rate": 0.00013337523774421313, + "loss": 4.5097, + "step": 228100 + }, + { + "epoch": 1.9641012396694215, + "grad_norm": 2.265625, + "learning_rate": 0.00013334969791854663, + "loss": 4.3277, + "step": 228150 + }, + { + "epoch": 1.9645316804407713, + "grad_norm": 3.328125, + "learning_rate": 0.00013332415564508407, + "loss": 4.3447, + "step": 228200 + }, + { + "epoch": 1.964962121212121, + "grad_norm": 1.90625, + "learning_rate": 0.00013329861092570018, + "loss": 4.6091, + "step": 228250 + }, + { + "epoch": 1.9653925619834711, + "grad_norm": 2.421875, + "learning_rate": 0.0001332730637622699, + "loss": 4.4401, + "step": 228300 + }, + { + "epoch": 1.965823002754821, + "grad_norm": 3.78125, + "learning_rate": 0.00013324751415666832, + "loss": 4.6507, + "step": 228350 + }, + { + "epoch": 1.966253443526171, + "grad_norm": 4.21875, + "learning_rate": 0.00013322196211077074, + "loss": 4.6663, + "step": 228400 + }, + { + "epoch": 1.9666838842975207, + "grad_norm": 2.5625, + "learning_rate": 0.00013319640762645261, + "loss": 4.3732, + "step": 228450 + }, + { + "epoch": 1.9671143250688705, + "grad_norm": 3.59375, + "learning_rate": 0.00013317085070558958, + "loss": 4.6632, + "step": 228500 + }, + { + "epoch": 1.9675447658402203, + "grad_norm": 4.21875, + "learning_rate": 0.00013314529135005748, + "loss": 4.3959, + "step": 228550 + }, + { + "epoch": 1.96797520661157, + "grad_norm": 2.234375, + "learning_rate": 0.00013311972956173235, + "loss": 4.5491, + "step": 228600 + }, + { + "epoch": 1.9684056473829201, + "grad_norm": 4.59375, + "learning_rate": 0.0001330941653424903, + "loss": 4.4633, + "step": 228650 + }, + { + "epoch": 1.9688360881542701, + "grad_norm": 1.609375, + "learning_rate": 0.0001330685986942077, + "loss": 4.3238, + "step": 228700 + }, + { + "epoch": 1.96926652892562, + "grad_norm": 3.046875, + "learning_rate": 0.00013304302961876112, + "loss": 4.624, + "step": 228750 + }, + { + "epoch": 1.9696969696969697, + "grad_norm": 3.578125, + "learning_rate": 0.00013301745811802727, + "loss": 4.3781, + "step": 228800 + }, + { + "epoch": 1.9701274104683195, + "grad_norm": 2.890625, + "learning_rate": 0.00013299188419388302, + "loss": 4.391, + "step": 228850 + }, + { + "epoch": 1.9705578512396693, + "grad_norm": 3.78125, + "learning_rate": 0.00013296630784820546, + "loss": 4.3632, + "step": 228900 + }, + { + "epoch": 1.9709882920110193, + "grad_norm": 1.75, + "learning_rate": 0.00013294072908287183, + "loss": 4.452, + "step": 228950 + }, + { + "epoch": 1.9714187327823691, + "grad_norm": 1.1953125, + "learning_rate": 0.00013291514789975958, + "loss": 4.5408, + "step": 229000 + }, + { + "epoch": 1.9718491735537191, + "grad_norm": 3.109375, + "learning_rate": 0.00013288956430074623, + "loss": 4.357, + "step": 229050 + }, + { + "epoch": 1.972279614325069, + "grad_norm": 4.6875, + "learning_rate": 0.00013286397828770968, + "loss": 4.6441, + "step": 229100 + }, + { + "epoch": 1.9727100550964187, + "grad_norm": 1.6328125, + "learning_rate": 0.00013283838986252782, + "loss": 3.9945, + "step": 229150 + }, + { + "epoch": 1.9731404958677685, + "grad_norm": 4.1875, + "learning_rate": 0.00013281279902707877, + "loss": 4.6092, + "step": 229200 + }, + { + "epoch": 1.9735709366391183, + "grad_norm": 2.828125, + "learning_rate": 0.0001327872057832409, + "loss": 4.447, + "step": 229250 + }, + { + "epoch": 1.9740013774104683, + "grad_norm": 2.0625, + "learning_rate": 0.00013276161013289268, + "loss": 4.6184, + "step": 229300 + }, + { + "epoch": 1.9744318181818183, + "grad_norm": 1.8359375, + "learning_rate": 0.00013273601207791275, + "loss": 4.5302, + "step": 229350 + }, + { + "epoch": 1.9748622589531681, + "grad_norm": 3.625, + "learning_rate": 0.00013271041162017997, + "loss": 4.0027, + "step": 229400 + }, + { + "epoch": 1.975292699724518, + "grad_norm": 2.328125, + "learning_rate": 0.00013268480876157333, + "loss": 4.07, + "step": 229450 + }, + { + "epoch": 1.9757231404958677, + "grad_norm": 2.5, + "learning_rate": 0.0001326592035039721, + "loss": 4.5722, + "step": 229500 + }, + { + "epoch": 1.9761535812672175, + "grad_norm": 3.125, + "learning_rate": 0.00013263359584925554, + "loss": 4.256, + "step": 229550 + }, + { + "epoch": 1.9765840220385675, + "grad_norm": 2.875, + "learning_rate": 0.0001326079857993033, + "loss": 4.0357, + "step": 229600 + }, + { + "epoch": 1.9770144628099173, + "grad_norm": 2.765625, + "learning_rate": 0.00013258237335599506, + "loss": 4.5307, + "step": 229650 + }, + { + "epoch": 1.9774449035812673, + "grad_norm": 2.40625, + "learning_rate": 0.0001325567585212107, + "loss": 4.2672, + "step": 229700 + }, + { + "epoch": 1.9778753443526171, + "grad_norm": 2.03125, + "learning_rate": 0.00013253114129683032, + "loss": 4.1747, + "step": 229750 + }, + { + "epoch": 1.978305785123967, + "grad_norm": 2.203125, + "learning_rate": 0.00013250552168473417, + "loss": 4.5218, + "step": 229800 + }, + { + "epoch": 1.9787362258953167, + "grad_norm": 4.53125, + "learning_rate": 0.00013247989968680264, + "loss": 4.4838, + "step": 229850 + }, + { + "epoch": 1.9791666666666665, + "grad_norm": 2.546875, + "learning_rate": 0.0001324542753049164, + "loss": 4.1306, + "step": 229900 + }, + { + "epoch": 1.9795971074380165, + "grad_norm": 3.53125, + "learning_rate": 0.00013242864854095613, + "loss": 4.5694, + "step": 229950 + }, + { + "epoch": 1.9800275482093666, + "grad_norm": 2.390625, + "learning_rate": 0.0001324030193968029, + "loss": 4.3008, + "step": 230000 + }, + { + "epoch": 1.9804579889807163, + "grad_norm": 1.1484375, + "learning_rate": 0.00013237738787433767, + "loss": 4.2187, + "step": 230050 + }, + { + "epoch": 1.9808884297520661, + "grad_norm": 1.1640625, + "learning_rate": 0.00013235175397544185, + "loss": 4.4121, + "step": 230100 + }, + { + "epoch": 1.981318870523416, + "grad_norm": 4.03125, + "learning_rate": 0.00013232611770199693, + "loss": 4.1831, + "step": 230150 + }, + { + "epoch": 1.9817493112947657, + "grad_norm": 4.46875, + "learning_rate": 0.00013230047905588455, + "loss": 4.4687, + "step": 230200 + }, + { + "epoch": 1.9821797520661157, + "grad_norm": 1.3984375, + "learning_rate": 0.00013227483803898645, + "loss": 4.5879, + "step": 230250 + }, + { + "epoch": 1.9826101928374655, + "grad_norm": 3.328125, + "learning_rate": 0.00013224919465318468, + "loss": 4.6668, + "step": 230300 + }, + { + "epoch": 1.9830406336088156, + "grad_norm": 2.0625, + "learning_rate": 0.00013222354890036144, + "loss": 4.1026, + "step": 230350 + }, + { + "epoch": 1.9834710743801653, + "grad_norm": 1.890625, + "learning_rate": 0.00013219790078239902, + "loss": 4.3291, + "step": 230400 + }, + { + "epoch": 1.9839015151515151, + "grad_norm": 2.359375, + "learning_rate": 0.00013217225030117996, + "loss": 4.6322, + "step": 230450 + }, + { + "epoch": 1.984331955922865, + "grad_norm": 2.8125, + "learning_rate": 0.00013214659745858698, + "loss": 4.8035, + "step": 230500 + }, + { + "epoch": 1.9847623966942147, + "grad_norm": 2.703125, + "learning_rate": 0.00013212094225650286, + "loss": 4.4787, + "step": 230550 + }, + { + "epoch": 1.9851928374655647, + "grad_norm": 2.890625, + "learning_rate": 0.0001320952846968107, + "loss": 4.4221, + "step": 230600 + }, + { + "epoch": 1.9856232782369148, + "grad_norm": 1.796875, + "learning_rate": 0.00013206962478139373, + "loss": 4.1563, + "step": 230650 + }, + { + "epoch": 1.9860537190082646, + "grad_norm": 1.90625, + "learning_rate": 0.0001320439625121353, + "loss": 4.6338, + "step": 230700 + }, + { + "epoch": 1.9864841597796143, + "grad_norm": 1.5078125, + "learning_rate": 0.00013201829789091893, + "loss": 4.6254, + "step": 230750 + }, + { + "epoch": 1.9869146005509641, + "grad_norm": 4.65625, + "learning_rate": 0.00013199263091962838, + "loss": 4.6582, + "step": 230800 + }, + { + "epoch": 1.987345041322314, + "grad_norm": 1.0234375, + "learning_rate": 0.0001319669616001476, + "loss": 4.4411, + "step": 230850 + }, + { + "epoch": 1.987775482093664, + "grad_norm": 3.65625, + "learning_rate": 0.00013194128993436062, + "loss": 4.4935, + "step": 230900 + }, + { + "epoch": 1.9882059228650137, + "grad_norm": 3.21875, + "learning_rate": 0.00013191561592415163, + "loss": 4.2984, + "step": 230950 + }, + { + "epoch": 1.9886363636363638, + "grad_norm": 3.296875, + "learning_rate": 0.00013188993957140513, + "loss": 4.3819, + "step": 231000 + }, + { + "epoch": 1.9886363636363638, + "eval_loss": 5.036816120147705, + "eval_runtime": 21.8132, + "eval_samples_per_second": 29.34, + "eval_steps_per_second": 14.67, + "eval_tts_loss": 7.275183283766534, + "step": 231000 + }, + { + "epoch": 1.9890668044077136, + "grad_norm": 2.96875, + "learning_rate": 0.00013186426087800568, + "loss": 4.5505, + "step": 231050 + }, + { + "epoch": 1.9894972451790633, + "grad_norm": 2.59375, + "learning_rate": 0.00013183857984583802, + "loss": 4.0794, + "step": 231100 + }, + { + "epoch": 1.9899276859504131, + "grad_norm": 2.90625, + "learning_rate": 0.00013181289647678712, + "loss": 4.3185, + "step": 231150 + }, + { + "epoch": 1.990358126721763, + "grad_norm": 2.90625, + "learning_rate": 0.00013178721077273807, + "loss": 4.5574, + "step": 231200 + }, + { + "epoch": 1.990788567493113, + "grad_norm": 3.78125, + "learning_rate": 0.00013176152273557609, + "loss": 4.4835, + "step": 231250 + }, + { + "epoch": 1.991219008264463, + "grad_norm": 2.65625, + "learning_rate": 0.00013173583236718674, + "loss": 4.383, + "step": 231300 + }, + { + "epoch": 1.9916494490358128, + "grad_norm": 2.984375, + "learning_rate": 0.00013171013966945552, + "loss": 4.5291, + "step": 231350 + }, + { + "epoch": 1.9920798898071626, + "grad_norm": 1.3984375, + "learning_rate": 0.00013168444464426834, + "loss": 4.2791, + "step": 231400 + }, + { + "epoch": 1.9925103305785123, + "grad_norm": 2.84375, + "learning_rate": 0.00013165874729351105, + "loss": 4.3751, + "step": 231450 + }, + { + "epoch": 1.9929407713498621, + "grad_norm": 2.234375, + "learning_rate": 0.00013163304761906983, + "loss": 4.714, + "step": 231500 + }, + { + "epoch": 1.9933712121212122, + "grad_norm": 4.09375, + "learning_rate": 0.000131607345622831, + "loss": 4.8913, + "step": 231550 + }, + { + "epoch": 1.993801652892562, + "grad_norm": 3.453125, + "learning_rate": 0.000131581641306681, + "loss": 4.1482, + "step": 231600 + }, + { + "epoch": 1.994232093663912, + "grad_norm": 4.46875, + "learning_rate": 0.00013155593467250643, + "loss": 4.7114, + "step": 231650 + }, + { + "epoch": 1.9946625344352618, + "grad_norm": 4.0625, + "learning_rate": 0.0001315302257221942, + "loss": 4.5604, + "step": 231700 + }, + { + "epoch": 1.9950929752066116, + "grad_norm": 3.5, + "learning_rate": 0.00013150451445763123, + "loss": 4.2361, + "step": 231750 + }, + { + "epoch": 1.9955234159779613, + "grad_norm": 2.328125, + "learning_rate": 0.0001314788008807047, + "loss": 4.4734, + "step": 231800 + }, + { + "epoch": 1.9959538567493111, + "grad_norm": 2.875, + "learning_rate": 0.00013145308499330186, + "loss": 4.2232, + "step": 231850 + }, + { + "epoch": 1.9963842975206612, + "grad_norm": 1.4296875, + "learning_rate": 0.0001314273667973103, + "loss": 4.4652, + "step": 231900 + }, + { + "epoch": 1.9968147382920112, + "grad_norm": 3.046875, + "learning_rate": 0.00013140164629461763, + "loss": 4.4417, + "step": 231950 + }, + { + "epoch": 1.997245179063361, + "grad_norm": 3.171875, + "learning_rate": 0.0001313759234871117, + "loss": 4.5901, + "step": 232000 + }, + { + "epoch": 1.9976756198347108, + "grad_norm": 4.09375, + "learning_rate": 0.00013135019837668045, + "loss": 4.3243, + "step": 232050 + }, + { + "epoch": 1.9981060606060606, + "grad_norm": 3.875, + "learning_rate": 0.00013132447096521213, + "loss": 4.2767, + "step": 232100 + }, + { + "epoch": 1.9985365013774103, + "grad_norm": 3.234375, + "learning_rate": 0.000131298741254595, + "loss": 4.7821, + "step": 232150 + }, + { + "epoch": 1.9989669421487604, + "grad_norm": 2.65625, + "learning_rate": 0.00013127300924671762, + "loss": 4.6669, + "step": 232200 + }, + { + "epoch": 1.9993973829201102, + "grad_norm": 4.03125, + "learning_rate": 0.00013124727494346865, + "loss": 4.0649, + "step": 232250 + }, + { + "epoch": 1.9998278236914602, + "grad_norm": 2.90625, + "learning_rate": 0.00013122153834673692, + "loss": 4.6496, + "step": 232300 + }, + { + "epoch": 2.00025826446281, + "grad_norm": 1.9609375, + "learning_rate": 0.00013119579945841144, + "loss": 4.3524, + "step": 232350 + }, + { + "epoch": 2.0006887052341598, + "grad_norm": 3.296875, + "learning_rate": 0.00013117005828038142, + "loss": 4.4407, + "step": 232400 + }, + { + "epoch": 2.0011191460055096, + "grad_norm": 3.921875, + "learning_rate": 0.0001311443148145362, + "loss": 4.6714, + "step": 232450 + }, + { + "epoch": 2.0015495867768593, + "grad_norm": 3.46875, + "learning_rate": 0.00013111856906276525, + "loss": 4.2322, + "step": 232500 + }, + { + "epoch": 2.0019800275482096, + "grad_norm": 0.703125, + "learning_rate": 0.00013109282102695833, + "loss": 4.5528, + "step": 232550 + }, + { + "epoch": 2.0024104683195594, + "grad_norm": 2.859375, + "learning_rate": 0.0001310670707090052, + "loss": 4.3319, + "step": 232600 + }, + { + "epoch": 2.002840909090909, + "grad_norm": 1.734375, + "learning_rate": 0.0001310413181107959, + "loss": 4.4109, + "step": 232650 + }, + { + "epoch": 2.003271349862259, + "grad_norm": 2.71875, + "learning_rate": 0.00013101556323422072, + "loss": 4.5476, + "step": 232700 + }, + { + "epoch": 2.0037017906336088, + "grad_norm": 3.984375, + "learning_rate": 0.00013098980608116986, + "loss": 4.3996, + "step": 232750 + }, + { + "epoch": 2.0041322314049586, + "grad_norm": 2.296875, + "learning_rate": 0.00013096404665353392, + "loss": 4.4785, + "step": 232800 + }, + { + "epoch": 2.0045626721763083, + "grad_norm": 3.84375, + "learning_rate": 0.00013093828495320362, + "loss": 4.2747, + "step": 232850 + }, + { + "epoch": 2.0049931129476586, + "grad_norm": 3.015625, + "learning_rate": 0.00013091252098206975, + "loss": 4.3942, + "step": 232900 + }, + { + "epoch": 2.0054235537190084, + "grad_norm": 2.5625, + "learning_rate": 0.00013088675474202333, + "loss": 3.7228, + "step": 232950 + }, + { + "epoch": 2.005853994490358, + "grad_norm": 2.328125, + "learning_rate": 0.00013086098623495555, + "loss": 4.5769, + "step": 233000 + }, + { + "epoch": 2.006284435261708, + "grad_norm": 1.359375, + "learning_rate": 0.00013083521546275781, + "loss": 4.2394, + "step": 233050 + }, + { + "epoch": 2.0067148760330578, + "grad_norm": 2.734375, + "learning_rate": 0.00013080944242732163, + "loss": 4.2471, + "step": 233100 + }, + { + "epoch": 2.0071453168044076, + "grad_norm": 2.0, + "learning_rate": 0.0001307836671305386, + "loss": 4.3415, + "step": 233150 + }, + { + "epoch": 2.007575757575758, + "grad_norm": 3.140625, + "learning_rate": 0.00013075788957430065, + "loss": 4.6546, + "step": 233200 + }, + { + "epoch": 2.0080061983471076, + "grad_norm": 3.4375, + "learning_rate": 0.00013073210976049984, + "loss": 4.2723, + "step": 233250 + }, + { + "epoch": 2.0084366391184574, + "grad_norm": 3.1875, + "learning_rate": 0.00013070632769102822, + "loss": 4.6354, + "step": 233300 + }, + { + "epoch": 2.008867079889807, + "grad_norm": 3.96875, + "learning_rate": 0.00013068054336777825, + "loss": 4.5433, + "step": 233350 + }, + { + "epoch": 2.009297520661157, + "grad_norm": 3.15625, + "learning_rate": 0.00013065475679264237, + "loss": 4.3098, + "step": 233400 + }, + { + "epoch": 2.0097279614325068, + "grad_norm": 4.40625, + "learning_rate": 0.00013062896796751336, + "loss": 4.2104, + "step": 233450 + }, + { + "epoch": 2.0101584022038566, + "grad_norm": 2.40625, + "learning_rate": 0.000130603176894284, + "loss": 4.7517, + "step": 233500 + }, + { + "epoch": 2.010588842975207, + "grad_norm": 5.0625, + "learning_rate": 0.00013057738357484723, + "loss": 4.4149, + "step": 233550 + }, + { + "epoch": 2.0110192837465566, + "grad_norm": 5.75, + "learning_rate": 0.00013055158801109635, + "loss": 4.2466, + "step": 233600 + }, + { + "epoch": 2.0114497245179064, + "grad_norm": 2.421875, + "learning_rate": 0.00013052579020492465, + "loss": 4.4027, + "step": 233650 + }, + { + "epoch": 2.011880165289256, + "grad_norm": 2.1875, + "learning_rate": 0.00013049999015822558, + "loss": 4.196, + "step": 233700 + }, + { + "epoch": 2.012310606060606, + "grad_norm": 2.109375, + "learning_rate": 0.00013047418787289286, + "loss": 4.2452, + "step": 233750 + }, + { + "epoch": 2.0127410468319558, + "grad_norm": 4.53125, + "learning_rate": 0.00013044838335082038, + "loss": 4.4962, + "step": 233800 + }, + { + "epoch": 2.013171487603306, + "grad_norm": 1.7890625, + "learning_rate": 0.000130422576593902, + "loss": 4.1795, + "step": 233850 + }, + { + "epoch": 2.013601928374656, + "grad_norm": 2.953125, + "learning_rate": 0.00013039676760403198, + "loss": 4.1406, + "step": 233900 + }, + { + "epoch": 2.0140323691460056, + "grad_norm": 2.265625, + "learning_rate": 0.00013037095638310464, + "loss": 4.5681, + "step": 233950 + }, + { + "epoch": 2.0144628099173554, + "grad_norm": 1.1796875, + "learning_rate": 0.00013034514293301446, + "loss": 4.1719, + "step": 234000 + }, + { + "epoch": 2.0144628099173554, + "eval_loss": 5.037001609802246, + "eval_runtime": 22.0979, + "eval_samples_per_second": 28.962, + "eval_steps_per_second": 14.481, + "eval_tts_loss": 7.315695545191628, + "step": 234000 + }, + { + "epoch": 2.014893250688705, + "grad_norm": 3.15625, + "learning_rate": 0.00013031932725565603, + "loss": 4.1027, + "step": 234050 + }, + { + "epoch": 2.015323691460055, + "grad_norm": 3.578125, + "learning_rate": 0.00013029350935292423, + "loss": 4.6561, + "step": 234100 + }, + { + "epoch": 2.0157541322314048, + "grad_norm": 1.9609375, + "learning_rate": 0.00013026768922671402, + "loss": 4.5103, + "step": 234150 + }, + { + "epoch": 2.016184573002755, + "grad_norm": 5.96875, + "learning_rate": 0.00013024186687892052, + "loss": 4.2305, + "step": 234200 + }, + { + "epoch": 2.016615013774105, + "grad_norm": 2.3125, + "learning_rate": 0.0001302160423114391, + "loss": 3.971, + "step": 234250 + }, + { + "epoch": 2.0170454545454546, + "grad_norm": 2.484375, + "learning_rate": 0.0001301902155261652, + "loss": 4.8936, + "step": 234300 + }, + { + "epoch": 2.0174758953168044, + "grad_norm": 4.625, + "learning_rate": 0.00013016438652499438, + "loss": 4.6421, + "step": 234350 + }, + { + "epoch": 2.017906336088154, + "grad_norm": 4.71875, + "learning_rate": 0.00013013855530982253, + "loss": 4.1621, + "step": 234400 + }, + { + "epoch": 2.018336776859504, + "grad_norm": 2.125, + "learning_rate": 0.00013011272188254557, + "loss": 4.1475, + "step": 234450 + }, + { + "epoch": 2.018767217630854, + "grad_norm": 2.015625, + "learning_rate": 0.00013008688624505959, + "loss": 4.0935, + "step": 234500 + }, + { + "epoch": 2.019197658402204, + "grad_norm": 2.765625, + "learning_rate": 0.00013006104839926086, + "loss": 4.6147, + "step": 234550 + }, + { + "epoch": 2.019628099173554, + "grad_norm": 4.15625, + "learning_rate": 0.0001300352083470459, + "loss": 4.7151, + "step": 234600 + }, + { + "epoch": 2.0200585399449036, + "grad_norm": 1.75, + "learning_rate": 0.0001300093660903113, + "loss": 4.2808, + "step": 234650 + }, + { + "epoch": 2.0204889807162534, + "grad_norm": 3.78125, + "learning_rate": 0.00012998352163095373, + "loss": 4.1704, + "step": 234700 + }, + { + "epoch": 2.020919421487603, + "grad_norm": 2.5625, + "learning_rate": 0.0001299576749708702, + "loss": 4.2644, + "step": 234750 + }, + { + "epoch": 2.021349862258953, + "grad_norm": 1.5234375, + "learning_rate": 0.00012993182611195783, + "loss": 4.1252, + "step": 234800 + }, + { + "epoch": 2.021780303030303, + "grad_norm": 3.46875, + "learning_rate": 0.00012990597505611374, + "loss": 4.6791, + "step": 234850 + }, + { + "epoch": 2.022210743801653, + "grad_norm": 5.03125, + "learning_rate": 0.0001298801218052355, + "loss": 4.3523, + "step": 234900 + }, + { + "epoch": 2.022641184573003, + "grad_norm": 3.921875, + "learning_rate": 0.00012985426636122056, + "loss": 4.272, + "step": 234950 + }, + { + "epoch": 2.0230716253443526, + "grad_norm": 1.5, + "learning_rate": 0.00012982840872596675, + "loss": 4.784, + "step": 235000 + }, + { + "epoch": 2.0235020661157024, + "grad_norm": 3.75, + "learning_rate": 0.00012980254890137188, + "loss": 4.5586, + "step": 235050 + }, + { + "epoch": 2.023932506887052, + "grad_norm": 2.59375, + "learning_rate": 0.00012977668688933405, + "loss": 4.5532, + "step": 235100 + }, + { + "epoch": 2.0243629476584024, + "grad_norm": 2.09375, + "learning_rate": 0.00012975082269175148, + "loss": 4.5497, + "step": 235150 + }, + { + "epoch": 2.024793388429752, + "grad_norm": 3.21875, + "learning_rate": 0.00012972495631052253, + "loss": 4.0533, + "step": 235200 + }, + { + "epoch": 2.025223829201102, + "grad_norm": 1.734375, + "learning_rate": 0.00012969908774754574, + "loss": 4.3129, + "step": 235250 + }, + { + "epoch": 2.025654269972452, + "grad_norm": 2.96875, + "learning_rate": 0.0001296732170047198, + "loss": 4.37, + "step": 235300 + }, + { + "epoch": 2.0260847107438016, + "grad_norm": 3.390625, + "learning_rate": 0.0001296473440839436, + "loss": 4.4195, + "step": 235350 + }, + { + "epoch": 2.0265151515151514, + "grad_norm": 3.921875, + "learning_rate": 0.0001296214689871161, + "loss": 4.2989, + "step": 235400 + }, + { + "epoch": 2.026945592286501, + "grad_norm": 4.34375, + "learning_rate": 0.00012959559171613652, + "loss": 4.688, + "step": 235450 + }, + { + "epoch": 2.0273760330578514, + "grad_norm": 2.859375, + "learning_rate": 0.00012956971227290418, + "loss": 4.5615, + "step": 235500 + }, + { + "epoch": 2.027806473829201, + "grad_norm": 3.53125, + "learning_rate": 0.0001295438306593186, + "loss": 4.2132, + "step": 235550 + }, + { + "epoch": 2.028236914600551, + "grad_norm": 2.21875, + "learning_rate": 0.00012951794687727938, + "loss": 4.336, + "step": 235600 + }, + { + "epoch": 2.028667355371901, + "grad_norm": 1.953125, + "learning_rate": 0.0001294920609286864, + "loss": 4.3663, + "step": 235650 + }, + { + "epoch": 2.0290977961432506, + "grad_norm": 2.53125, + "learning_rate": 0.0001294661728154396, + "loss": 4.6438, + "step": 235700 + }, + { + "epoch": 2.0295282369146004, + "grad_norm": 2.984375, + "learning_rate": 0.0001294402825394391, + "loss": 4.5965, + "step": 235750 + }, + { + "epoch": 2.0299586776859506, + "grad_norm": 2.78125, + "learning_rate": 0.00012941439010258522, + "loss": 4.0466, + "step": 235800 + }, + { + "epoch": 2.0303891184573004, + "grad_norm": 2.078125, + "learning_rate": 0.00012938849550677838, + "loss": 4.269, + "step": 235850 + }, + { + "epoch": 2.03081955922865, + "grad_norm": 3.046875, + "learning_rate": 0.00012936259875391922, + "loss": 4.2691, + "step": 235900 + }, + { + "epoch": 2.03125, + "grad_norm": 3.421875, + "learning_rate": 0.00012933669984590846, + "loss": 4.3433, + "step": 235950 + }, + { + "epoch": 2.03168044077135, + "grad_norm": 3.171875, + "learning_rate": 0.00012931079878464706, + "loss": 4.4711, + "step": 236000 + }, + { + "epoch": 2.0321108815426996, + "grad_norm": 4.21875, + "learning_rate": 0.0001292848955720361, + "loss": 4.4537, + "step": 236050 + }, + { + "epoch": 2.0325413223140494, + "grad_norm": 1.796875, + "learning_rate": 0.0001292589902099768, + "loss": 4.1053, + "step": 236100 + }, + { + "epoch": 2.0329717630853996, + "grad_norm": 3.734375, + "learning_rate": 0.0001292330827003706, + "loss": 4.6452, + "step": 236150 + }, + { + "epoch": 2.0334022038567494, + "grad_norm": 4.15625, + "learning_rate": 0.000129207173045119, + "loss": 4.4227, + "step": 236200 + }, + { + "epoch": 2.033832644628099, + "grad_norm": 1.1796875, + "learning_rate": 0.00012918126124612374, + "loss": 4.2074, + "step": 236250 + }, + { + "epoch": 2.034263085399449, + "grad_norm": 3.203125, + "learning_rate": 0.00012915534730528668, + "loss": 4.4036, + "step": 236300 + }, + { + "epoch": 2.034693526170799, + "grad_norm": 1.6484375, + "learning_rate": 0.0001291294312245099, + "loss": 4.1443, + "step": 236350 + }, + { + "epoch": 2.0351239669421486, + "grad_norm": 3.4375, + "learning_rate": 0.00012910351300569547, + "loss": 4.5357, + "step": 236400 + }, + { + "epoch": 2.035554407713499, + "grad_norm": 3.8125, + "learning_rate": 0.00012907759265074587, + "loss": 4.3159, + "step": 236450 + }, + { + "epoch": 2.0359848484848486, + "grad_norm": 2.53125, + "learning_rate": 0.0001290516701615635, + "loss": 4.1391, + "step": 236500 + }, + { + "epoch": 2.0364152892561984, + "grad_norm": 4.40625, + "learning_rate": 0.00012902574554005106, + "loss": 4.4062, + "step": 236550 + }, + { + "epoch": 2.036845730027548, + "grad_norm": 2.765625, + "learning_rate": 0.00012899981878811134, + "loss": 4.3044, + "step": 236600 + }, + { + "epoch": 2.037276170798898, + "grad_norm": 2.625, + "learning_rate": 0.0001289738899076473, + "loss": 4.2948, + "step": 236650 + }, + { + "epoch": 2.037706611570248, + "grad_norm": 2.90625, + "learning_rate": 0.00012894795890056211, + "loss": 4.7039, + "step": 236700 + }, + { + "epoch": 2.0381370523415976, + "grad_norm": 2.0625, + "learning_rate": 0.000128922025768759, + "loss": 4.5328, + "step": 236750 + }, + { + "epoch": 2.038567493112948, + "grad_norm": 3.453125, + "learning_rate": 0.00012889609051414144, + "loss": 4.2461, + "step": 236800 + }, + { + "epoch": 2.0389979338842976, + "grad_norm": 4.75, + "learning_rate": 0.00012887015313861301, + "loss": 4.005, + "step": 236850 + }, + { + "epoch": 2.0394283746556474, + "grad_norm": 4.5625, + "learning_rate": 0.00012884421364407745, + "loss": 4.2708, + "step": 236900 + }, + { + "epoch": 2.039858815426997, + "grad_norm": 3.078125, + "learning_rate": 0.00012881827203243865, + "loss": 4.4036, + "step": 236950 + }, + { + "epoch": 2.040289256198347, + "grad_norm": 2.296875, + "learning_rate": 0.0001287923283056007, + "loss": 4.6053, + "step": 237000 + }, + { + "epoch": 2.040289256198347, + "eval_loss": 5.032306671142578, + "eval_runtime": 21.7709, + "eval_samples_per_second": 29.397, + "eval_steps_per_second": 14.699, + "eval_tts_loss": 7.360729168440964, + "step": 237000 + }, + { + "epoch": 2.040719696969697, + "grad_norm": 2.9375, + "learning_rate": 0.00012876638246546782, + "loss": 4.4661, + "step": 237050 + }, + { + "epoch": 2.041150137741047, + "grad_norm": 3.09375, + "learning_rate": 0.00012874043451394432, + "loss": 4.5194, + "step": 237100 + }, + { + "epoch": 2.041580578512397, + "grad_norm": 3.296875, + "learning_rate": 0.0001287144844529348, + "loss": 4.674, + "step": 237150 + }, + { + "epoch": 2.0420110192837466, + "grad_norm": 2.59375, + "learning_rate": 0.00012868853228434387, + "loss": 4.6751, + "step": 237200 + }, + { + "epoch": 2.0424414600550964, + "grad_norm": 2.828125, + "learning_rate": 0.00012866257801007644, + "loss": 4.5797, + "step": 237250 + }, + { + "epoch": 2.042871900826446, + "grad_norm": 2.671875, + "learning_rate": 0.0001286366216320374, + "loss": 4.3509, + "step": 237300 + }, + { + "epoch": 2.043302341597796, + "grad_norm": 2.796875, + "learning_rate": 0.00012861066315213198, + "loss": 4.2642, + "step": 237350 + }, + { + "epoch": 2.043732782369146, + "grad_norm": 3.34375, + "learning_rate": 0.00012858470257226542, + "loss": 4.7571, + "step": 237400 + }, + { + "epoch": 2.044163223140496, + "grad_norm": 6.125, + "learning_rate": 0.00012855873989434325, + "loss": 3.9321, + "step": 237450 + }, + { + "epoch": 2.044593663911846, + "grad_norm": 1.328125, + "learning_rate": 0.00012853277512027094, + "loss": 4.1185, + "step": 237500 + }, + { + "epoch": 2.0450241046831956, + "grad_norm": 2.75, + "learning_rate": 0.00012850680825195437, + "loss": 4.7012, + "step": 237550 + }, + { + "epoch": 2.0454545454545454, + "grad_norm": 1.5390625, + "learning_rate": 0.0001284808392912994, + "loss": 4.3283, + "step": 237600 + }, + { + "epoch": 2.045884986225895, + "grad_norm": 1.7265625, + "learning_rate": 0.00012845486824021208, + "loss": 4.4183, + "step": 237650 + }, + { + "epoch": 2.046315426997245, + "grad_norm": 3.28125, + "learning_rate": 0.00012842889510059871, + "loss": 4.6658, + "step": 237700 + }, + { + "epoch": 2.0467458677685952, + "grad_norm": 1.046875, + "learning_rate": 0.00012840291987436557, + "loss": 4.4295, + "step": 237750 + }, + { + "epoch": 2.047176308539945, + "grad_norm": 1.84375, + "learning_rate": 0.0001283769425634192, + "loss": 4.0736, + "step": 237800 + }, + { + "epoch": 2.047606749311295, + "grad_norm": 3.46875, + "learning_rate": 0.00012835096316966631, + "loss": 4.2706, + "step": 237850 + }, + { + "epoch": 2.0480371900826446, + "grad_norm": 3.484375, + "learning_rate": 0.00012832498169501375, + "loss": 4.3896, + "step": 237900 + }, + { + "epoch": 2.0484676308539944, + "grad_norm": 3.3125, + "learning_rate": 0.00012829899814136846, + "loss": 4.7148, + "step": 237950 + }, + { + "epoch": 2.048898071625344, + "grad_norm": 1.1953125, + "learning_rate": 0.00012827301251063758, + "loss": 4.3816, + "step": 238000 + }, + { + "epoch": 2.049328512396694, + "grad_norm": 2.609375, + "learning_rate": 0.00012824702480472846, + "loss": 4.5911, + "step": 238050 + }, + { + "epoch": 2.0497589531680442, + "grad_norm": 4.25, + "learning_rate": 0.00012822103502554847, + "loss": 4.0037, + "step": 238100 + }, + { + "epoch": 2.050189393939394, + "grad_norm": 3.421875, + "learning_rate": 0.00012819504317500519, + "loss": 4.3853, + "step": 238150 + }, + { + "epoch": 2.050619834710744, + "grad_norm": 1.359375, + "learning_rate": 0.0001281690492550064, + "loss": 4.4044, + "step": 238200 + }, + { + "epoch": 2.0510502754820936, + "grad_norm": 6.15625, + "learning_rate": 0.00012814305326746007, + "loss": 4.6058, + "step": 238250 + }, + { + "epoch": 2.0514807162534434, + "grad_norm": 2.78125, + "learning_rate": 0.00012811705521427415, + "loss": 4.4907, + "step": 238300 + }, + { + "epoch": 2.051911157024793, + "grad_norm": 3.671875, + "learning_rate": 0.00012809105509735687, + "loss": 4.401, + "step": 238350 + }, + { + "epoch": 2.0523415977961434, + "grad_norm": 3.921875, + "learning_rate": 0.0001280650529186166, + "loss": 4.2815, + "step": 238400 + }, + { + "epoch": 2.0527720385674932, + "grad_norm": 3.40625, + "learning_rate": 0.00012803904867996182, + "loss": 4.486, + "step": 238450 + }, + { + "epoch": 2.053202479338843, + "grad_norm": 2.859375, + "learning_rate": 0.0001280130423833012, + "loss": 4.2161, + "step": 238500 + }, + { + "epoch": 2.053632920110193, + "grad_norm": 1.7109375, + "learning_rate": 0.00012798703403054347, + "loss": 4.0947, + "step": 238550 + }, + { + "epoch": 2.0540633608815426, + "grad_norm": 5.21875, + "learning_rate": 0.00012796102362359776, + "loss": 4.047, + "step": 238600 + }, + { + "epoch": 2.0544938016528924, + "grad_norm": 3.75, + "learning_rate": 0.00012793501116437303, + "loss": 4.5175, + "step": 238650 + }, + { + "epoch": 2.054924242424242, + "grad_norm": 2.8125, + "learning_rate": 0.00012790899665477857, + "loss": 4.5597, + "step": 238700 + }, + { + "epoch": 2.0553546831955924, + "grad_norm": 1.625, + "learning_rate": 0.00012788298009672387, + "loss": 4.3696, + "step": 238750 + }, + { + "epoch": 2.0557851239669422, + "grad_norm": 3.09375, + "learning_rate": 0.00012785696149211835, + "loss": 4.3517, + "step": 238800 + }, + { + "epoch": 2.056215564738292, + "grad_norm": 1.7265625, + "learning_rate": 0.0001278309408428718, + "loss": 3.9974, + "step": 238850 + }, + { + "epoch": 2.056646005509642, + "grad_norm": 5.03125, + "learning_rate": 0.0001278049181508941, + "loss": 4.7621, + "step": 238900 + }, + { + "epoch": 2.0570764462809916, + "grad_norm": 1.8984375, + "learning_rate": 0.00012777889341809522, + "loss": 4.3698, + "step": 238950 + }, + { + "epoch": 2.0575068870523414, + "grad_norm": 4.21875, + "learning_rate": 0.00012775286664638532, + "loss": 4.456, + "step": 239000 + }, + { + "epoch": 2.0579373278236917, + "grad_norm": 1.015625, + "learning_rate": 0.00012772683783767474, + "loss": 4.093, + "step": 239050 + }, + { + "epoch": 2.0583677685950414, + "grad_norm": 3.734375, + "learning_rate": 0.0001277008069938739, + "loss": 4.5206, + "step": 239100 + }, + { + "epoch": 2.0587982093663912, + "grad_norm": 3.140625, + "learning_rate": 0.00012767477411689346, + "loss": 3.8517, + "step": 239150 + }, + { + "epoch": 2.059228650137741, + "grad_norm": 3.21875, + "learning_rate": 0.00012764873920864412, + "loss": 4.3505, + "step": 239200 + }, + { + "epoch": 2.059659090909091, + "grad_norm": 3.046875, + "learning_rate": 0.0001276227022710368, + "loss": 4.3288, + "step": 239250 + }, + { + "epoch": 2.0600895316804406, + "grad_norm": 3.359375, + "learning_rate": 0.00012759666330598257, + "loss": 4.4192, + "step": 239300 + }, + { + "epoch": 2.0605199724517904, + "grad_norm": 4.3125, + "learning_rate": 0.00012757062231539263, + "loss": 4.2234, + "step": 239350 + }, + { + "epoch": 2.0609504132231407, + "grad_norm": 2.46875, + "learning_rate": 0.00012754457930117834, + "loss": 4.259, + "step": 239400 + }, + { + "epoch": 2.0613808539944904, + "grad_norm": 2.140625, + "learning_rate": 0.00012751853426525117, + "loss": 4.2679, + "step": 239450 + }, + { + "epoch": 2.0618112947658402, + "grad_norm": 4.25, + "learning_rate": 0.00012749248720952285, + "loss": 4.2597, + "step": 239500 + }, + { + "epoch": 2.06224173553719, + "grad_norm": 3.765625, + "learning_rate": 0.00012746643813590507, + "loss": 4.5196, + "step": 239550 + }, + { + "epoch": 2.06267217630854, + "grad_norm": 2.46875, + "learning_rate": 0.00012744038704630984, + "loss": 4.4117, + "step": 239600 + }, + { + "epoch": 2.0631026170798896, + "grad_norm": 3.78125, + "learning_rate": 0.00012741433394264923, + "loss": 4.2253, + "step": 239650 + }, + { + "epoch": 2.06353305785124, + "grad_norm": 2.96875, + "learning_rate": 0.0001273882788268355, + "loss": 4.6198, + "step": 239700 + }, + { + "epoch": 2.0639634986225897, + "grad_norm": 3.359375, + "learning_rate": 0.00012736222170078103, + "loss": 4.7058, + "step": 239750 + }, + { + "epoch": 2.0643939393939394, + "grad_norm": 4.28125, + "learning_rate": 0.0001273361625663984, + "loss": 4.3315, + "step": 239800 + }, + { + "epoch": 2.0648243801652892, + "grad_norm": 2.09375, + "learning_rate": 0.00012731010142560015, + "loss": 3.9417, + "step": 239850 + }, + { + "epoch": 2.065254820936639, + "grad_norm": 2.15625, + "learning_rate": 0.00012728403828029928, + "loss": 4.8033, + "step": 239900 + }, + { + "epoch": 2.065685261707989, + "grad_norm": 1.6796875, + "learning_rate": 0.00012725797313240873, + "loss": 4.5976, + "step": 239950 + }, + { + "epoch": 2.0661157024793386, + "grad_norm": 4.5625, + "learning_rate": 0.00012723190598384155, + "loss": 4.4649, + "step": 240000 + }, + { + "epoch": 2.0661157024793386, + "eval_loss": 5.033450126647949, + "eval_runtime": 21.8456, + "eval_samples_per_second": 29.296, + "eval_steps_per_second": 14.648, + "eval_tts_loss": 7.34856754614598, + "step": 240000 + }, + { + "epoch": 2.066546143250689, + "grad_norm": 2.953125, + "learning_rate": 0.00012720583683651108, + "loss": 4.1617, + "step": 240050 + }, + { + "epoch": 2.0669765840220387, + "grad_norm": 1.203125, + "learning_rate": 0.00012717976569233074, + "loss": 4.3832, + "step": 240100 + }, + { + "epoch": 2.0674070247933884, + "grad_norm": 3.078125, + "learning_rate": 0.00012715369255321404, + "loss": 4.5626, + "step": 240150 + }, + { + "epoch": 2.0678374655647382, + "grad_norm": 2.9375, + "learning_rate": 0.00012712761742107476, + "loss": 4.1653, + "step": 240200 + }, + { + "epoch": 2.068267906336088, + "grad_norm": 2.234375, + "learning_rate": 0.0001271015402978267, + "loss": 4.2125, + "step": 240250 + }, + { + "epoch": 2.068698347107438, + "grad_norm": 3.453125, + "learning_rate": 0.00012707546118538395, + "loss": 4.2864, + "step": 240300 + }, + { + "epoch": 2.069128787878788, + "grad_norm": 1.8359375, + "learning_rate": 0.00012704938008566055, + "loss": 4.1236, + "step": 240350 + }, + { + "epoch": 2.069559228650138, + "grad_norm": 3.203125, + "learning_rate": 0.00012702329700057085, + "loss": 4.3714, + "step": 240400 + }, + { + "epoch": 2.0699896694214877, + "grad_norm": 1.6640625, + "learning_rate": 0.0001269972119320293, + "loss": 4.2462, + "step": 240450 + }, + { + "epoch": 2.0704201101928374, + "grad_norm": 2.8125, + "learning_rate": 0.00012697112488195048, + "loss": 4.0073, + "step": 240500 + }, + { + "epoch": 2.0708505509641872, + "grad_norm": 1.9296875, + "learning_rate": 0.00012694503585224915, + "loss": 4.2194, + "step": 240550 + }, + { + "epoch": 2.071280991735537, + "grad_norm": 2.625, + "learning_rate": 0.00012691894484484012, + "loss": 4.3994, + "step": 240600 + }, + { + "epoch": 2.071711432506887, + "grad_norm": 2.671875, + "learning_rate": 0.0001268928518616385, + "loss": 4.6783, + "step": 240650 + }, + { + "epoch": 2.072141873278237, + "grad_norm": 0.88671875, + "learning_rate": 0.00012686675690455938, + "loss": 4.1149, + "step": 240700 + }, + { + "epoch": 2.072572314049587, + "grad_norm": 4.59375, + "learning_rate": 0.0001268406599755181, + "loss": 4.3801, + "step": 240750 + }, + { + "epoch": 2.0730027548209367, + "grad_norm": 3.75, + "learning_rate": 0.00012681456107643016, + "loss": 4.1964, + "step": 240800 + }, + { + "epoch": 2.0734331955922864, + "grad_norm": 1.5625, + "learning_rate": 0.00012678846020921115, + "loss": 4.2355, + "step": 240850 + }, + { + "epoch": 2.0738636363636362, + "grad_norm": 2.84375, + "learning_rate": 0.00012676235737577672, + "loss": 4.3201, + "step": 240900 + }, + { + "epoch": 2.074294077134986, + "grad_norm": 4.59375, + "learning_rate": 0.0001267362525780429, + "loss": 4.825, + "step": 240950 + }, + { + "epoch": 2.0747245179063363, + "grad_norm": 2.671875, + "learning_rate": 0.00012671014581792567, + "loss": 4.1755, + "step": 241000 + }, + { + "epoch": 2.075154958677686, + "grad_norm": 4.5625, + "learning_rate": 0.00012668403709734119, + "loss": 4.5968, + "step": 241050 + }, + { + "epoch": 2.075585399449036, + "grad_norm": 3.640625, + "learning_rate": 0.00012665792641820578, + "loss": 4.3305, + "step": 241100 + }, + { + "epoch": 2.0760158402203857, + "grad_norm": 1.53125, + "learning_rate": 0.00012663181378243593, + "loss": 4.5825, + "step": 241150 + }, + { + "epoch": 2.0764462809917354, + "grad_norm": 4.3125, + "learning_rate": 0.00012660569919194827, + "loss": 4.4997, + "step": 241200 + }, + { + "epoch": 2.0768767217630852, + "grad_norm": 3.234375, + "learning_rate": 0.0001265795826486595, + "loss": 4.6448, + "step": 241250 + }, + { + "epoch": 2.077307162534435, + "grad_norm": 1.25, + "learning_rate": 0.00012655346415448658, + "loss": 4.1226, + "step": 241300 + }, + { + "epoch": 2.0777376033057853, + "grad_norm": 3.5625, + "learning_rate": 0.00012652734371134652, + "loss": 4.3799, + "step": 241350 + }, + { + "epoch": 2.078168044077135, + "grad_norm": 1.96875, + "learning_rate": 0.0001265012213211565, + "loss": 4.4141, + "step": 241400 + }, + { + "epoch": 2.078598484848485, + "grad_norm": 3.25, + "learning_rate": 0.00012647509698583386, + "loss": 4.4535, + "step": 241450 + }, + { + "epoch": 2.0790289256198347, + "grad_norm": 2.453125, + "learning_rate": 0.00012644897070729607, + "loss": 4.254, + "step": 241500 + }, + { + "epoch": 2.0794593663911844, + "grad_norm": 1.7734375, + "learning_rate": 0.0001264228424874607, + "loss": 4.7315, + "step": 241550 + }, + { + "epoch": 2.0798898071625342, + "grad_norm": 1.2734375, + "learning_rate": 0.0001263967123282456, + "loss": 4.4795, + "step": 241600 + }, + { + "epoch": 2.0803202479338845, + "grad_norm": 2.453125, + "learning_rate": 0.00012637058023156855, + "loss": 4.3565, + "step": 241650 + }, + { + "epoch": 2.0807506887052343, + "grad_norm": 1.625, + "learning_rate": 0.0001263444461993477, + "loss": 4.2859, + "step": 241700 + }, + { + "epoch": 2.081181129476584, + "grad_norm": 3.65625, + "learning_rate": 0.00012631831023350113, + "loss": 4.4222, + "step": 241750 + }, + { + "epoch": 2.081611570247934, + "grad_norm": 8.75, + "learning_rate": 0.00012629217233594723, + "loss": 4.4645, + "step": 241800 + }, + { + "epoch": 2.0820420110192837, + "grad_norm": 2.921875, + "learning_rate": 0.0001262660325086045, + "loss": 4.6954, + "step": 241850 + }, + { + "epoch": 2.0824724517906334, + "grad_norm": 3.8125, + "learning_rate": 0.00012623989075339145, + "loss": 4.1415, + "step": 241900 + }, + { + "epoch": 2.0829028925619832, + "grad_norm": 6.125, + "learning_rate": 0.00012621374707222688, + "loss": 3.9138, + "step": 241950 + }, + { + "epoch": 2.0833333333333335, + "grad_norm": 3.265625, + "learning_rate": 0.00012618760146702968, + "loss": 4.4802, + "step": 242000 + }, + { + "epoch": 2.0837637741046833, + "grad_norm": 1.8828125, + "learning_rate": 0.00012616145393971886, + "loss": 4.4809, + "step": 242050 + }, + { + "epoch": 2.084194214876033, + "grad_norm": 2.15625, + "learning_rate": 0.00012613530449221363, + "loss": 4.0511, + "step": 242100 + }, + { + "epoch": 2.084624655647383, + "grad_norm": 1.6796875, + "learning_rate": 0.00012610915312643325, + "loss": 4.2237, + "step": 242150 + }, + { + "epoch": 2.0850550964187327, + "grad_norm": 3.09375, + "learning_rate": 0.00012608299984429724, + "loss": 4.3035, + "step": 242200 + }, + { + "epoch": 2.0854855371900825, + "grad_norm": 3.53125, + "learning_rate": 0.00012605684464772515, + "loss": 4.531, + "step": 242250 + }, + { + "epoch": 2.0859159779614327, + "grad_norm": 3.625, + "learning_rate": 0.0001260306875386367, + "loss": 4.5293, + "step": 242300 + }, + { + "epoch": 2.0863464187327825, + "grad_norm": 2.5625, + "learning_rate": 0.00012600452851895186, + "loss": 4.4599, + "step": 242350 + }, + { + "epoch": 2.0867768595041323, + "grad_norm": 4.25, + "learning_rate": 0.0001259783675905905, + "loss": 4.1468, + "step": 242400 + }, + { + "epoch": 2.087207300275482, + "grad_norm": 0.921875, + "learning_rate": 0.00012595220475547292, + "loss": 4.2179, + "step": 242450 + }, + { + "epoch": 2.087637741046832, + "grad_norm": 2.546875, + "learning_rate": 0.0001259260400155193, + "loss": 3.8571, + "step": 242500 + }, + { + "epoch": 2.0880681818181817, + "grad_norm": 2.890625, + "learning_rate": 0.00012589987337265014, + "loss": 4.7711, + "step": 242550 + }, + { + "epoch": 2.088498622589532, + "grad_norm": 3.546875, + "learning_rate": 0.00012587370482878603, + "loss": 4.2143, + "step": 242600 + }, + { + "epoch": 2.0889290633608817, + "grad_norm": 2.78125, + "learning_rate": 0.0001258475343858476, + "loss": 4.3296, + "step": 242650 + }, + { + "epoch": 2.0893595041322315, + "grad_norm": 3.34375, + "learning_rate": 0.0001258213620457558, + "loss": 4.4828, + "step": 242700 + }, + { + "epoch": 2.0897899449035813, + "grad_norm": 4.59375, + "learning_rate": 0.00012579518781043157, + "loss": 4.7032, + "step": 242750 + }, + { + "epoch": 2.090220385674931, + "grad_norm": 1.953125, + "learning_rate": 0.00012576901168179601, + "loss": 4.3263, + "step": 242800 + }, + { + "epoch": 2.090650826446281, + "grad_norm": 3.421875, + "learning_rate": 0.00012574283366177048, + "loss": 4.3343, + "step": 242850 + }, + { + "epoch": 2.0910812672176307, + "grad_norm": 3.46875, + "learning_rate": 0.00012571665375227635, + "loss": 4.6081, + "step": 242900 + }, + { + "epoch": 2.091511707988981, + "grad_norm": 3.28125, + "learning_rate": 0.00012569047195523514, + "loss": 4.062, + "step": 242950 + }, + { + "epoch": 2.0919421487603307, + "grad_norm": 4.78125, + "learning_rate": 0.0001256642882725686, + "loss": 4.4874, + "step": 243000 + }, + { + "epoch": 2.0919421487603307, + "eval_loss": 5.032301425933838, + "eval_runtime": 21.7671, + "eval_samples_per_second": 29.402, + "eval_steps_per_second": 14.701, + "eval_tts_loss": 7.369165301432359, + "step": 243000 + }, + { + "epoch": 2.0923725895316805, + "grad_norm": 3.140625, + "learning_rate": 0.0001256381027061985, + "loss": 4.187, + "step": 243050 + }, + { + "epoch": 2.0928030303030303, + "grad_norm": 3.859375, + "learning_rate": 0.00012561191525804684, + "loss": 4.2545, + "step": 243100 + }, + { + "epoch": 2.09323347107438, + "grad_norm": 3.515625, + "learning_rate": 0.00012558572593003564, + "loss": 4.5492, + "step": 243150 + }, + { + "epoch": 2.09366391184573, + "grad_norm": 3.90625, + "learning_rate": 0.00012555953472408727, + "loss": 4.3254, + "step": 243200 + }, + { + "epoch": 2.09409435261708, + "grad_norm": 4.25, + "learning_rate": 0.00012553334164212403, + "loss": 4.7374, + "step": 243250 + }, + { + "epoch": 2.09452479338843, + "grad_norm": 2.890625, + "learning_rate": 0.00012550714668606845, + "loss": 4.067, + "step": 243300 + }, + { + "epoch": 2.0949552341597797, + "grad_norm": 2.265625, + "learning_rate": 0.00012548094985784318, + "loss": 4.5255, + "step": 243350 + }, + { + "epoch": 2.0953856749311295, + "grad_norm": 2.953125, + "learning_rate": 0.00012545475115937103, + "loss": 4.436, + "step": 243400 + }, + { + "epoch": 2.0958161157024793, + "grad_norm": 3.28125, + "learning_rate": 0.0001254285505925749, + "loss": 4.7913, + "step": 243450 + }, + { + "epoch": 2.096246556473829, + "grad_norm": 4.15625, + "learning_rate": 0.00012540234815937784, + "loss": 4.4039, + "step": 243500 + }, + { + "epoch": 2.096676997245179, + "grad_norm": 2.6875, + "learning_rate": 0.0001253761438617031, + "loss": 4.3459, + "step": 243550 + }, + { + "epoch": 2.097107438016529, + "grad_norm": 4.3125, + "learning_rate": 0.000125349937701474, + "loss": 4.4695, + "step": 243600 + }, + { + "epoch": 2.097537878787879, + "grad_norm": 3.0625, + "learning_rate": 0.00012532372968061402, + "loss": 4.1749, + "step": 243650 + }, + { + "epoch": 2.0979683195592287, + "grad_norm": 1.6171875, + "learning_rate": 0.00012529751980104679, + "loss": 4.5597, + "step": 243700 + }, + { + "epoch": 2.0983987603305785, + "grad_norm": 1.65625, + "learning_rate": 0.00012527130806469597, + "loss": 4.0737, + "step": 243750 + }, + { + "epoch": 2.0988292011019283, + "grad_norm": 3.15625, + "learning_rate": 0.00012524509447348555, + "loss": 4.762, + "step": 243800 + }, + { + "epoch": 2.099259641873278, + "grad_norm": 0.44140625, + "learning_rate": 0.00012521887902933952, + "loss": 4.3837, + "step": 243850 + }, + { + "epoch": 2.0996900826446283, + "grad_norm": 2.984375, + "learning_rate": 0.00012519266173418203, + "loss": 4.8533, + "step": 243900 + }, + { + "epoch": 2.100120523415978, + "grad_norm": 3.296875, + "learning_rate": 0.00012516644258993734, + "loss": 4.2632, + "step": 243950 + }, + { + "epoch": 2.100550964187328, + "grad_norm": 3.796875, + "learning_rate": 0.00012514022159852996, + "loss": 4.3544, + "step": 244000 + }, + { + "epoch": 2.1009814049586777, + "grad_norm": 2.578125, + "learning_rate": 0.00012511399876188433, + "loss": 4.5486, + "step": 244050 + }, + { + "epoch": 2.1014118457300275, + "grad_norm": 1.046875, + "learning_rate": 0.0001250877740819253, + "loss": 4.4981, + "step": 244100 + }, + { + "epoch": 2.1018422865013773, + "grad_norm": 3.046875, + "learning_rate": 0.00012506154756057762, + "loss": 4.4835, + "step": 244150 + }, + { + "epoch": 2.102272727272727, + "grad_norm": 3.328125, + "learning_rate": 0.00012503531919976624, + "loss": 4.0523, + "step": 244200 + }, + { + "epoch": 2.1027031680440773, + "grad_norm": 2.5, + "learning_rate": 0.0001250090890014163, + "loss": 4.4435, + "step": 244250 + }, + { + "epoch": 2.103133608815427, + "grad_norm": 3.0625, + "learning_rate": 0.00012498285696745307, + "loss": 4.3182, + "step": 244300 + }, + { + "epoch": 2.103564049586777, + "grad_norm": 3.5, + "learning_rate": 0.00012495662309980186, + "loss": 4.3892, + "step": 244350 + }, + { + "epoch": 2.1039944903581267, + "grad_norm": 5.15625, + "learning_rate": 0.0001249303874003882, + "loss": 4.4317, + "step": 244400 + }, + { + "epoch": 2.1044249311294765, + "grad_norm": 1.4140625, + "learning_rate": 0.00012490414987113775, + "loss": 4.2303, + "step": 244450 + }, + { + "epoch": 2.1048553719008263, + "grad_norm": 2.765625, + "learning_rate": 0.0001248779105139763, + "loss": 4.5734, + "step": 244500 + }, + { + "epoch": 2.1052858126721765, + "grad_norm": 4.34375, + "learning_rate": 0.00012485166933082974, + "loss": 4.6328, + "step": 244550 + }, + { + "epoch": 2.1057162534435263, + "grad_norm": 3.84375, + "learning_rate": 0.00012482542632362412, + "loss": 4.0772, + "step": 244600 + }, + { + "epoch": 2.106146694214876, + "grad_norm": 2.75, + "learning_rate": 0.00012479918149428565, + "loss": 4.3002, + "step": 244650 + }, + { + "epoch": 2.106577134986226, + "grad_norm": 1.4375, + "learning_rate": 0.00012477293484474056, + "loss": 4.1697, + "step": 244700 + }, + { + "epoch": 2.1070075757575757, + "grad_norm": 3.765625, + "learning_rate": 0.0001247466863769154, + "loss": 4.3076, + "step": 244750 + }, + { + "epoch": 2.1074380165289255, + "grad_norm": 2.203125, + "learning_rate": 0.00012472043609273667, + "loss": 4.2345, + "step": 244800 + }, + { + "epoch": 2.1078684573002753, + "grad_norm": 4.375, + "learning_rate": 0.0001246941839941311, + "loss": 4.3255, + "step": 244850 + }, + { + "epoch": 2.1082988980716255, + "grad_norm": 2.765625, + "learning_rate": 0.0001246679300830256, + "loss": 4.5231, + "step": 244900 + }, + { + "epoch": 2.1087293388429753, + "grad_norm": 5.5625, + "learning_rate": 0.0001246416743613471, + "loss": 4.045, + "step": 244950 + }, + { + "epoch": 2.109159779614325, + "grad_norm": 3.734375, + "learning_rate": 0.0001246154168310227, + "loss": 4.467, + "step": 245000 + }, + { + "epoch": 2.109590220385675, + "grad_norm": 4.125, + "learning_rate": 0.00012458915749397967, + "loss": 4.4776, + "step": 245050 + }, + { + "epoch": 2.1100206611570247, + "grad_norm": 4.375, + "learning_rate": 0.00012456289635214538, + "loss": 3.9669, + "step": 245100 + }, + { + "epoch": 2.1104511019283745, + "grad_norm": 2.828125, + "learning_rate": 0.00012453663340744736, + "loss": 4.4018, + "step": 245150 + }, + { + "epoch": 2.1108815426997247, + "grad_norm": 2.796875, + "learning_rate": 0.00012451036866181325, + "loss": 4.3882, + "step": 245200 + }, + { + "epoch": 2.1113119834710745, + "grad_norm": 3.671875, + "learning_rate": 0.00012448410211717082, + "loss": 4.0448, + "step": 245250 + }, + { + "epoch": 2.1117424242424243, + "grad_norm": 3.140625, + "learning_rate": 0.00012445783377544795, + "loss": 4.309, + "step": 245300 + }, + { + "epoch": 2.112172865013774, + "grad_norm": 2.8125, + "learning_rate": 0.0001244315636385727, + "loss": 3.9951, + "step": 245350 + }, + { + "epoch": 2.112603305785124, + "grad_norm": 3.453125, + "learning_rate": 0.00012440529170847323, + "loss": 4.1266, + "step": 245400 + }, + { + "epoch": 2.1130337465564737, + "grad_norm": 1.6015625, + "learning_rate": 0.00012437901798707793, + "loss": 4.203, + "step": 245450 + }, + { + "epoch": 2.1134641873278235, + "grad_norm": 3.484375, + "learning_rate": 0.0001243527424763151, + "loss": 4.4071, + "step": 245500 + }, + { + "epoch": 2.1138946280991737, + "grad_norm": 3.90625, + "learning_rate": 0.00012432646517811338, + "loss": 4.8537, + "step": 245550 + }, + { + "epoch": 2.1143250688705235, + "grad_norm": 2.09375, + "learning_rate": 0.00012430018609440146, + "loss": 4.7115, + "step": 245600 + }, + { + "epoch": 2.1147555096418733, + "grad_norm": 2.375, + "learning_rate": 0.00012427390522710816, + "loss": 4.1112, + "step": 245650 + }, + { + "epoch": 2.115185950413223, + "grad_norm": 4.90625, + "learning_rate": 0.00012424762257816244, + "loss": 4.5935, + "step": 245700 + }, + { + "epoch": 2.115616391184573, + "grad_norm": 2.6875, + "learning_rate": 0.0001242213381494934, + "loss": 4.5632, + "step": 245750 + }, + { + "epoch": 2.1160468319559227, + "grad_norm": 2.40625, + "learning_rate": 0.00012419505194303027, + "loss": 4.3616, + "step": 245800 + }, + { + "epoch": 2.116477272727273, + "grad_norm": 1.9609375, + "learning_rate": 0.00012416876396070236, + "loss": 4.2153, + "step": 245850 + }, + { + "epoch": 2.1169077134986227, + "grad_norm": 2.421875, + "learning_rate": 0.00012414247420443916, + "loss": 4.4025, + "step": 245900 + }, + { + "epoch": 2.1173381542699725, + "grad_norm": 2.359375, + "learning_rate": 0.00012411618267617035, + "loss": 4.4464, + "step": 245950 + }, + { + "epoch": 2.1177685950413223, + "grad_norm": 4.59375, + "learning_rate": 0.0001240898893778256, + "loss": 4.3501, + "step": 246000 + }, + { + "epoch": 2.1177685950413223, + "eval_loss": 5.029580116271973, + "eval_runtime": 22.1853, + "eval_samples_per_second": 28.848, + "eval_steps_per_second": 14.424, + "eval_tts_loss": 7.369533711766074, + "step": 246000 + }, + { + "epoch": 2.118199035812672, + "grad_norm": 3.125, + "learning_rate": 0.00012406359431133473, + "loss": 4.3839, + "step": 246050 + }, + { + "epoch": 2.118629476584022, + "grad_norm": 5.0, + "learning_rate": 0.00012403729747862788, + "loss": 4.2713, + "step": 246100 + }, + { + "epoch": 2.1190599173553717, + "grad_norm": 2.765625, + "learning_rate": 0.00012401099888163512, + "loss": 4.363, + "step": 246150 + }, + { + "epoch": 2.119490358126722, + "grad_norm": 3.015625, + "learning_rate": 0.0001239846985222867, + "loss": 4.3895, + "step": 246200 + }, + { + "epoch": 2.1199207988980717, + "grad_norm": 2.40625, + "learning_rate": 0.000123958396402513, + "loss": 4.2328, + "step": 246250 + }, + { + "epoch": 2.1203512396694215, + "grad_norm": 3.375, + "learning_rate": 0.00012393209252424452, + "loss": 4.5248, + "step": 246300 + }, + { + "epoch": 2.1207816804407713, + "grad_norm": 2.515625, + "learning_rate": 0.000123905786889412, + "loss": 4.1609, + "step": 246350 + }, + { + "epoch": 2.121212121212121, + "grad_norm": 2.59375, + "learning_rate": 0.00012387947949994612, + "loss": 4.3887, + "step": 246400 + }, + { + "epoch": 2.121642561983471, + "grad_norm": 4.28125, + "learning_rate": 0.00012385317035777785, + "loss": 4.3359, + "step": 246450 + }, + { + "epoch": 2.122073002754821, + "grad_norm": 1.390625, + "learning_rate": 0.00012382685946483817, + "loss": 4.5074, + "step": 246500 + }, + { + "epoch": 2.122503443526171, + "grad_norm": 2.578125, + "learning_rate": 0.00012380054682305827, + "loss": 4.2195, + "step": 246550 + }, + { + "epoch": 2.1229338842975207, + "grad_norm": 3.96875, + "learning_rate": 0.00012377423243436947, + "loss": 3.7267, + "step": 246600 + }, + { + "epoch": 2.1233643250688705, + "grad_norm": 2.125, + "learning_rate": 0.00012374791630070313, + "loss": 4.2385, + "step": 246650 + }, + { + "epoch": 2.1237947658402203, + "grad_norm": 2.859375, + "learning_rate": 0.00012372159842399084, + "loss": 4.2527, + "step": 246700 + }, + { + "epoch": 2.12422520661157, + "grad_norm": 3.28125, + "learning_rate": 0.00012369527880616425, + "loss": 4.003, + "step": 246750 + }, + { + "epoch": 2.12465564738292, + "grad_norm": 3.015625, + "learning_rate": 0.00012366895744915518, + "loss": 4.3187, + "step": 246800 + }, + { + "epoch": 2.12508608815427, + "grad_norm": 2.84375, + "learning_rate": 0.00012364263435489558, + "loss": 4.2765, + "step": 246850 + }, + { + "epoch": 2.12551652892562, + "grad_norm": 1.859375, + "learning_rate": 0.00012361630952531746, + "loss": 4.5432, + "step": 246900 + }, + { + "epoch": 2.1259469696969697, + "grad_norm": 2.421875, + "learning_rate": 0.00012358998296235307, + "loss": 4.543, + "step": 246950 + }, + { + "epoch": 2.1263774104683195, + "grad_norm": 4.53125, + "learning_rate": 0.00012356365466793467, + "loss": 4.3545, + "step": 247000 + }, + { + "epoch": 2.1268078512396693, + "grad_norm": 2.84375, + "learning_rate": 0.0001235373246439947, + "loss": 4.393, + "step": 247050 + }, + { + "epoch": 2.127238292011019, + "grad_norm": 2.8125, + "learning_rate": 0.00012351099289246574, + "loss": 4.5236, + "step": 247100 + }, + { + "epoch": 2.1276687327823693, + "grad_norm": 3.921875, + "learning_rate": 0.0001234846594152805, + "loss": 4.2457, + "step": 247150 + }, + { + "epoch": 2.128099173553719, + "grad_norm": 3.0625, + "learning_rate": 0.00012345832421437178, + "loss": 4.2482, + "step": 247200 + }, + { + "epoch": 2.128529614325069, + "grad_norm": 1.7734375, + "learning_rate": 0.00012343198729167256, + "loss": 4.3731, + "step": 247250 + }, + { + "epoch": 2.1289600550964187, + "grad_norm": 1.75, + "learning_rate": 0.00012340564864911585, + "loss": 4.2384, + "step": 247300 + }, + { + "epoch": 2.1293904958677685, + "grad_norm": 4.6875, + "learning_rate": 0.00012337930828863492, + "loss": 4.0476, + "step": 247350 + }, + { + "epoch": 2.1298209366391183, + "grad_norm": 1.7734375, + "learning_rate": 0.00012335296621216306, + "loss": 4.4302, + "step": 247400 + }, + { + "epoch": 2.1302513774104685, + "grad_norm": 3.0625, + "learning_rate": 0.00012332662242163367, + "loss": 4.2237, + "step": 247450 + }, + { + "epoch": 2.1306818181818183, + "grad_norm": 3.921875, + "learning_rate": 0.00012330027691898046, + "loss": 4.4013, + "step": 247500 + }, + { + "epoch": 2.131112258953168, + "grad_norm": 3.140625, + "learning_rate": 0.00012327392970613702, + "loss": 4.2359, + "step": 247550 + }, + { + "epoch": 2.131542699724518, + "grad_norm": 2.859375, + "learning_rate": 0.00012324758078503724, + "loss": 4.0828, + "step": 247600 + }, + { + "epoch": 2.1319731404958677, + "grad_norm": 3.140625, + "learning_rate": 0.000123221230157615, + "loss": 4.4656, + "step": 247650 + }, + { + "epoch": 2.1324035812672175, + "grad_norm": 2.859375, + "learning_rate": 0.00012319487782580448, + "loss": 4.7965, + "step": 247700 + }, + { + "epoch": 2.1328340220385673, + "grad_norm": 3.1875, + "learning_rate": 0.00012316852379153982, + "loss": 4.5846, + "step": 247750 + }, + { + "epoch": 2.1332644628099175, + "grad_norm": 1.703125, + "learning_rate": 0.0001231421680567553, + "loss": 4.3269, + "step": 247800 + }, + { + "epoch": 2.1336949035812673, + "grad_norm": 2.90625, + "learning_rate": 0.0001231158106233855, + "loss": 4.6014, + "step": 247850 + }, + { + "epoch": 2.134125344352617, + "grad_norm": 4.78125, + "learning_rate": 0.00012308945149336494, + "loss": 4.5338, + "step": 247900 + }, + { + "epoch": 2.134555785123967, + "grad_norm": 2.078125, + "learning_rate": 0.00012306309066862828, + "loss": 4.6047, + "step": 247950 + }, + { + "epoch": 2.1349862258953167, + "grad_norm": 3.296875, + "learning_rate": 0.00012303672815111043, + "loss": 4.171, + "step": 248000 + }, + { + "epoch": 2.1354166666666665, + "grad_norm": 3.8125, + "learning_rate": 0.00012301036394274628, + "loss": 4.1846, + "step": 248050 + }, + { + "epoch": 2.1358471074380168, + "grad_norm": 4.0625, + "learning_rate": 0.0001229839980454709, + "loss": 4.416, + "step": 248100 + }, + { + "epoch": 2.1362775482093666, + "grad_norm": 3.015625, + "learning_rate": 0.00012295763046121956, + "loss": 4.4332, + "step": 248150 + }, + { + "epoch": 2.1367079889807163, + "grad_norm": 2.28125, + "learning_rate": 0.0001229312611919275, + "loss": 4.3031, + "step": 248200 + }, + { + "epoch": 2.137138429752066, + "grad_norm": 0.88671875, + "learning_rate": 0.00012290489023953025, + "loss": 4.3623, + "step": 248250 + }, + { + "epoch": 2.137568870523416, + "grad_norm": 3.203125, + "learning_rate": 0.00012287851760596328, + "loss": 4.352, + "step": 248300 + }, + { + "epoch": 2.1379993112947657, + "grad_norm": 4.71875, + "learning_rate": 0.00012285214329316237, + "loss": 4.5255, + "step": 248350 + }, + { + "epoch": 2.1384297520661155, + "grad_norm": 3.703125, + "learning_rate": 0.0001228257673030633, + "loss": 4.334, + "step": 248400 + }, + { + "epoch": 2.1388601928374658, + "grad_norm": 5.03125, + "learning_rate": 0.00012279938963760204, + "loss": 4.6355, + "step": 248450 + }, + { + "epoch": 2.1392906336088156, + "grad_norm": 3.0625, + "learning_rate": 0.00012277301029871464, + "loss": 4.587, + "step": 248500 + }, + { + "epoch": 2.1397210743801653, + "grad_norm": 3.15625, + "learning_rate": 0.00012274662928833728, + "loss": 4.3958, + "step": 248550 + }, + { + "epoch": 2.140151515151515, + "grad_norm": 3.203125, + "learning_rate": 0.00012272024660840624, + "loss": 4.2842, + "step": 248600 + }, + { + "epoch": 2.140581955922865, + "grad_norm": 1.4609375, + "learning_rate": 0.00012269386226085798, + "loss": 3.9295, + "step": 248650 + }, + { + "epoch": 2.1410123966942147, + "grad_norm": 3.40625, + "learning_rate": 0.0001226674762476291, + "loss": 4.4017, + "step": 248700 + }, + { + "epoch": 2.141442837465565, + "grad_norm": 3.34375, + "learning_rate": 0.00012264108857065623, + "loss": 4.5864, + "step": 248750 + }, + { + "epoch": 2.1418732782369148, + "grad_norm": 4.25, + "learning_rate": 0.00012261469923187615, + "loss": 4.2633, + "step": 248800 + }, + { + "epoch": 2.1423037190082646, + "grad_norm": 1.765625, + "learning_rate": 0.0001225883082332258, + "loss": 4.6043, + "step": 248850 + }, + { + "epoch": 2.1427341597796143, + "grad_norm": 2.5625, + "learning_rate": 0.00012256191557664226, + "loss": 4.4089, + "step": 248900 + }, + { + "epoch": 2.143164600550964, + "grad_norm": 3.03125, + "learning_rate": 0.0001225355212640626, + "loss": 4.4099, + "step": 248950 + }, + { + "epoch": 2.143595041322314, + "grad_norm": 4.5625, + "learning_rate": 0.00012250912529742419, + "loss": 4.3886, + "step": 249000 + }, + { + "epoch": 2.143595041322314, + "eval_loss": 5.030261039733887, + "eval_runtime": 22.19, + "eval_samples_per_second": 28.842, + "eval_steps_per_second": 14.421, + "eval_tts_loss": 7.399030686667878, + "step": 249000 + }, + { + "epoch": 2.1440254820936637, + "grad_norm": 3.4375, + "learning_rate": 0.00012248272767866443, + "loss": 4.7167, + "step": 249050 + }, + { + "epoch": 2.144455922865014, + "grad_norm": 3.34375, + "learning_rate": 0.0001224563284097208, + "loss": 4.3494, + "step": 249100 + }, + { + "epoch": 2.1448863636363638, + "grad_norm": 2.359375, + "learning_rate": 0.00012242992749253104, + "loss": 3.9664, + "step": 249150 + }, + { + "epoch": 2.1453168044077136, + "grad_norm": 2.484375, + "learning_rate": 0.00012240352492903282, + "loss": 4.3974, + "step": 249200 + }, + { + "epoch": 2.1457472451790633, + "grad_norm": 3.78125, + "learning_rate": 0.00012237712072116406, + "loss": 4.3286, + "step": 249250 + }, + { + "epoch": 2.146177685950413, + "grad_norm": 2.484375, + "learning_rate": 0.00012235071487086283, + "loss": 4.3842, + "step": 249300 + }, + { + "epoch": 2.146608126721763, + "grad_norm": 3.765625, + "learning_rate": 0.00012232430738006716, + "loss": 4.6674, + "step": 249350 + }, + { + "epoch": 2.147038567493113, + "grad_norm": 2.640625, + "learning_rate": 0.00012229789825071542, + "loss": 4.3949, + "step": 249400 + }, + { + "epoch": 2.147469008264463, + "grad_norm": 5.3125, + "learning_rate": 0.00012227148748474587, + "loss": 3.5729, + "step": 249450 + }, + { + "epoch": 2.1478994490358128, + "grad_norm": 4.25, + "learning_rate": 0.00012224507508409705, + "loss": 4.5456, + "step": 249500 + }, + { + "epoch": 2.1483298898071626, + "grad_norm": 3.890625, + "learning_rate": 0.00012221866105070766, + "loss": 4.4121, + "step": 249550 + }, + { + "epoch": 2.1487603305785123, + "grad_norm": 3.3125, + "learning_rate": 0.0001221922453865163, + "loss": 4.3994, + "step": 249600 + }, + { + "epoch": 2.149190771349862, + "grad_norm": 2.640625, + "learning_rate": 0.0001221658280934619, + "loss": 4.1728, + "step": 249650 + }, + { + "epoch": 2.149621212121212, + "grad_norm": 2.015625, + "learning_rate": 0.00012213940917348335, + "loss": 3.9191, + "step": 249700 + }, + { + "epoch": 2.150051652892562, + "grad_norm": 1.9453125, + "learning_rate": 0.00012211298862851985, + "loss": 4.5975, + "step": 249750 + }, + { + "epoch": 2.150482093663912, + "grad_norm": 3.734375, + "learning_rate": 0.00012208656646051056, + "loss": 4.6434, + "step": 249800 + }, + { + "epoch": 2.1509125344352618, + "grad_norm": 2.21875, + "learning_rate": 0.00012206014267139478, + "loss": 4.3724, + "step": 249850 + }, + { + "epoch": 2.1513429752066116, + "grad_norm": 3.8125, + "learning_rate": 0.00012203371726311204, + "loss": 4.6818, + "step": 249900 + }, + { + "epoch": 2.1517734159779613, + "grad_norm": 2.65625, + "learning_rate": 0.00012200729023760184, + "loss": 4.5516, + "step": 249950 + }, + { + "epoch": 2.152203856749311, + "grad_norm": 2.96875, + "learning_rate": 0.00012198086159680387, + "loss": 4.4988, + "step": 250000 + }, + { + "epoch": 2.1526342975206614, + "grad_norm": 2.53125, + "learning_rate": 0.00012195443134265795, + "loss": 4.3184, + "step": 250050 + }, + { + "epoch": 2.153064738292011, + "grad_norm": 3.390625, + "learning_rate": 0.00012192799947710405, + "loss": 4.4347, + "step": 250100 + }, + { + "epoch": 2.153495179063361, + "grad_norm": 2.875, + "learning_rate": 0.00012190156600208211, + "loss": 4.5737, + "step": 250150 + }, + { + "epoch": 2.1539256198347108, + "grad_norm": 1.9609375, + "learning_rate": 0.00012187513091953237, + "loss": 4.5308, + "step": 250200 + }, + { + "epoch": 2.1543560606060606, + "grad_norm": 1.828125, + "learning_rate": 0.00012184869423139511, + "loss": 3.9899, + "step": 250250 + }, + { + "epoch": 2.1547865013774103, + "grad_norm": 4.0, + "learning_rate": 0.00012182225593961069, + "loss": 4.4631, + "step": 250300 + }, + { + "epoch": 2.15521694214876, + "grad_norm": 5.40625, + "learning_rate": 0.00012179581604611963, + "loss": 4.515, + "step": 250350 + }, + { + "epoch": 2.1556473829201104, + "grad_norm": 3.078125, + "learning_rate": 0.00012176937455286257, + "loss": 4.2414, + "step": 250400 + }, + { + "epoch": 2.15607782369146, + "grad_norm": 4.25, + "learning_rate": 0.00012174293146178027, + "loss": 4.5981, + "step": 250450 + }, + { + "epoch": 2.15650826446281, + "grad_norm": 3.25, + "learning_rate": 0.00012171648677481353, + "loss": 4.5456, + "step": 250500 + }, + { + "epoch": 2.1569387052341598, + "grad_norm": 2.359375, + "learning_rate": 0.00012169004049390342, + "loss": 4.3493, + "step": 250550 + }, + { + "epoch": 2.1573691460055096, + "grad_norm": 2.46875, + "learning_rate": 0.00012166359262099103, + "loss": 4.3432, + "step": 250600 + }, + { + "epoch": 2.1577995867768593, + "grad_norm": 2.484375, + "learning_rate": 0.0001216371431580175, + "loss": 4.2171, + "step": 250650 + }, + { + "epoch": 2.1582300275482096, + "grad_norm": 1.9921875, + "learning_rate": 0.00012161069210692426, + "loss": 4.3573, + "step": 250700 + }, + { + "epoch": 2.1586604683195594, + "grad_norm": 4.53125, + "learning_rate": 0.00012158423946965269, + "loss": 4.4684, + "step": 250750 + }, + { + "epoch": 2.159090909090909, + "grad_norm": 3.96875, + "learning_rate": 0.00012155778524814438, + "loss": 4.4885, + "step": 250800 + }, + { + "epoch": 2.159521349862259, + "grad_norm": 3.0625, + "learning_rate": 0.00012153132944434103, + "loss": 4.1797, + "step": 250850 + }, + { + "epoch": 2.1599517906336088, + "grad_norm": 5.1875, + "learning_rate": 0.00012150487206018442, + "loss": 4.2264, + "step": 250900 + }, + { + "epoch": 2.1603822314049586, + "grad_norm": 2.859375, + "learning_rate": 0.00012147841309761649, + "loss": 4.1449, + "step": 250950 + }, + { + "epoch": 2.1608126721763083, + "grad_norm": 2.875, + "learning_rate": 0.00012145195255857922, + "loss": 4.4546, + "step": 251000 + }, + { + "epoch": 2.1612431129476586, + "grad_norm": 2.859375, + "learning_rate": 0.00012142549044501477, + "loss": 4.461, + "step": 251050 + }, + { + "epoch": 2.1616735537190084, + "grad_norm": 2.609375, + "learning_rate": 0.00012139902675886548, + "loss": 4.5465, + "step": 251100 + }, + { + "epoch": 2.162103994490358, + "grad_norm": 3.15625, + "learning_rate": 0.00012137256150207364, + "loss": 4.4751, + "step": 251150 + }, + { + "epoch": 2.162534435261708, + "grad_norm": 2.6875, + "learning_rate": 0.00012134609467658176, + "loss": 4.5424, + "step": 251200 + }, + { + "epoch": 2.1629648760330578, + "grad_norm": 3.09375, + "learning_rate": 0.00012131962628433244, + "loss": 4.7024, + "step": 251250 + }, + { + "epoch": 2.1633953168044076, + "grad_norm": 2.421875, + "learning_rate": 0.00012129315632726845, + "loss": 4.3189, + "step": 251300 + }, + { + "epoch": 2.163825757575758, + "grad_norm": 1.8125, + "learning_rate": 0.00012126668480733261, + "loss": 4.0801, + "step": 251350 + }, + { + "epoch": 2.1642561983471076, + "grad_norm": 3.4375, + "learning_rate": 0.00012124021172646782, + "loss": 4.7692, + "step": 251400 + }, + { + "epoch": 2.1646866391184574, + "grad_norm": 3.1875, + "learning_rate": 0.00012121373708661725, + "loss": 4.2609, + "step": 251450 + }, + { + "epoch": 2.165117079889807, + "grad_norm": 3.65625, + "learning_rate": 0.00012118726088972396, + "loss": 4.1146, + "step": 251500 + }, + { + "epoch": 2.165547520661157, + "grad_norm": 2.21875, + "learning_rate": 0.00012116078313773131, + "loss": 4.3169, + "step": 251550 + }, + { + "epoch": 2.1659779614325068, + "grad_norm": 4.84375, + "learning_rate": 0.00012113430383258275, + "loss": 4.5276, + "step": 251600 + }, + { + "epoch": 2.1664084022038566, + "grad_norm": 2.921875, + "learning_rate": 0.00012110782297622173, + "loss": 4.1889, + "step": 251650 + }, + { + "epoch": 2.166838842975207, + "grad_norm": 1.7890625, + "learning_rate": 0.00012108134057059193, + "loss": 4.3648, + "step": 251700 + }, + { + "epoch": 2.1672692837465566, + "grad_norm": 2.703125, + "learning_rate": 0.00012105485661763709, + "loss": 4.3965, + "step": 251750 + }, + { + "epoch": 2.1676997245179064, + "grad_norm": 0.7109375, + "learning_rate": 0.00012102837111930108, + "loss": 3.9214, + "step": 251800 + }, + { + "epoch": 2.168130165289256, + "grad_norm": 3.65625, + "learning_rate": 0.00012100188407752786, + "loss": 4.3281, + "step": 251850 + }, + { + "epoch": 2.168560606060606, + "grad_norm": 2.6875, + "learning_rate": 0.00012097539549426151, + "loss": 4.2369, + "step": 251900 + }, + { + "epoch": 2.1689910468319558, + "grad_norm": 3.5, + "learning_rate": 0.00012094890537144632, + "loss": 4.2376, + "step": 251950 + }, + { + "epoch": 2.169421487603306, + "grad_norm": 2.4375, + "learning_rate": 0.0001209224137110265, + "loss": 4.2486, + "step": 252000 + }, + { + "epoch": 2.169421487603306, + "eval_loss": 5.0270843505859375, + "eval_runtime": 21.9413, + "eval_samples_per_second": 29.169, + "eval_steps_per_second": 14.584, + "eval_tts_loss": 7.36063719755612, + "step": 252000 + }, + { + "epoch": 2.169851928374656, + "grad_norm": 1.4609375, + "learning_rate": 0.00012089592051494654, + "loss": 4.2264, + "step": 252050 + }, + { + "epoch": 2.1702823691460056, + "grad_norm": 1.796875, + "learning_rate": 0.00012086942578515099, + "loss": 4.2205, + "step": 252100 + }, + { + "epoch": 2.1707128099173554, + "grad_norm": 3.265625, + "learning_rate": 0.00012084292952358449, + "loss": 4.4578, + "step": 252150 + }, + { + "epoch": 2.171143250688705, + "grad_norm": 2.609375, + "learning_rate": 0.00012081643173219178, + "loss": 4.4302, + "step": 252200 + }, + { + "epoch": 2.171573691460055, + "grad_norm": 2.765625, + "learning_rate": 0.00012078993241291779, + "loss": 4.379, + "step": 252250 + }, + { + "epoch": 2.1720041322314048, + "grad_norm": 3.21875, + "learning_rate": 0.00012076343156770748, + "loss": 4.3006, + "step": 252300 + }, + { + "epoch": 2.172434573002755, + "grad_norm": 3.40625, + "learning_rate": 0.00012073692919850598, + "loss": 4.5328, + "step": 252350 + }, + { + "epoch": 2.172865013774105, + "grad_norm": 5.8125, + "learning_rate": 0.00012071042530725849, + "loss": 4.3117, + "step": 252400 + }, + { + "epoch": 2.1732954545454546, + "grad_norm": 2.28125, + "learning_rate": 0.00012068391989591035, + "loss": 4.3748, + "step": 252450 + }, + { + "epoch": 2.1737258953168044, + "grad_norm": 3.9375, + "learning_rate": 0.00012065741296640698, + "loss": 4.3187, + "step": 252500 + }, + { + "epoch": 2.174156336088154, + "grad_norm": 2.984375, + "learning_rate": 0.00012063090452069392, + "loss": 4.6749, + "step": 252550 + }, + { + "epoch": 2.174586776859504, + "grad_norm": 2.640625, + "learning_rate": 0.00012060439456071689, + "loss": 4.2573, + "step": 252600 + }, + { + "epoch": 2.175017217630854, + "grad_norm": 2.71875, + "learning_rate": 0.00012057788308842164, + "loss": 4.1282, + "step": 252650 + }, + { + "epoch": 2.175447658402204, + "grad_norm": 1.671875, + "learning_rate": 0.00012055137010575402, + "loss": 4.4505, + "step": 252700 + }, + { + "epoch": 2.175878099173554, + "grad_norm": 3.390625, + "learning_rate": 0.00012052485561466004, + "loss": 4.2804, + "step": 252750 + }, + { + "epoch": 2.1763085399449036, + "grad_norm": 2.9375, + "learning_rate": 0.00012049833961708588, + "loss": 4.221, + "step": 252800 + }, + { + "epoch": 2.1767389807162534, + "grad_norm": 4.03125, + "learning_rate": 0.00012047182211497762, + "loss": 4.4549, + "step": 252850 + }, + { + "epoch": 2.177169421487603, + "grad_norm": 2.875, + "learning_rate": 0.00012044530311028171, + "loss": 4.488, + "step": 252900 + }, + { + "epoch": 2.177599862258953, + "grad_norm": 3.375, + "learning_rate": 0.0001204187826049445, + "loss": 4.2177, + "step": 252950 + }, + { + "epoch": 2.178030303030303, + "grad_norm": 2.203125, + "learning_rate": 0.00012039226060091264, + "loss": 4.2626, + "step": 253000 + }, + { + "epoch": 2.178460743801653, + "grad_norm": 2.109375, + "learning_rate": 0.00012036573710013268, + "loss": 4.4399, + "step": 253050 + }, + { + "epoch": 2.178891184573003, + "grad_norm": 2.203125, + "learning_rate": 0.00012033921210455144, + "loss": 4.427, + "step": 253100 + }, + { + "epoch": 2.1793216253443526, + "grad_norm": 2.6875, + "learning_rate": 0.0001203126856161158, + "loss": 4.6042, + "step": 253150 + }, + { + "epoch": 2.1797520661157024, + "grad_norm": 3.703125, + "learning_rate": 0.00012028615763677276, + "loss": 4.1871, + "step": 253200 + }, + { + "epoch": 2.180182506887052, + "grad_norm": 3.234375, + "learning_rate": 0.00012025962816846936, + "loss": 4.4172, + "step": 253250 + }, + { + "epoch": 2.1806129476584024, + "grad_norm": 2.03125, + "learning_rate": 0.00012023309721315285, + "loss": 4.6173, + "step": 253300 + }, + { + "epoch": 2.181043388429752, + "grad_norm": 2.8125, + "learning_rate": 0.00012020656477277056, + "loss": 4.1272, + "step": 253350 + }, + { + "epoch": 2.181473829201102, + "grad_norm": 0.70703125, + "learning_rate": 0.00012018003084926988, + "loss": 3.895, + "step": 253400 + }, + { + "epoch": 2.181904269972452, + "grad_norm": 1.78125, + "learning_rate": 0.00012015349544459832, + "loss": 4.3051, + "step": 253450 + }, + { + "epoch": 2.1823347107438016, + "grad_norm": 3.578125, + "learning_rate": 0.00012012695856070361, + "loss": 4.7618, + "step": 253500 + }, + { + "epoch": 2.1827651515151514, + "grad_norm": 2.40625, + "learning_rate": 0.00012010042019953343, + "loss": 3.9043, + "step": 253550 + }, + { + "epoch": 2.183195592286501, + "grad_norm": 1.3203125, + "learning_rate": 0.00012007388036303565, + "loss": 4.1297, + "step": 253600 + }, + { + "epoch": 2.1836260330578514, + "grad_norm": 3.21875, + "learning_rate": 0.00012004733905315824, + "loss": 4.2037, + "step": 253650 + }, + { + "epoch": 2.184056473829201, + "grad_norm": 2.109375, + "learning_rate": 0.00012002079627184927, + "loss": 4.3872, + "step": 253700 + }, + { + "epoch": 2.184486914600551, + "grad_norm": 1.75, + "learning_rate": 0.00011999425202105691, + "loss": 4.1089, + "step": 253750 + }, + { + "epoch": 2.184917355371901, + "grad_norm": 2.71875, + "learning_rate": 0.0001199677063027295, + "loss": 4.3687, + "step": 253800 + }, + { + "epoch": 2.1853477961432506, + "grad_norm": 2.59375, + "learning_rate": 0.00011994115911881542, + "loss": 4.0994, + "step": 253850 + }, + { + "epoch": 2.1857782369146004, + "grad_norm": 2.890625, + "learning_rate": 0.00011991461047126316, + "loss": 4.5209, + "step": 253900 + }, + { + "epoch": 2.1862086776859506, + "grad_norm": 5.8125, + "learning_rate": 0.00011988806036202132, + "loss": 4.366, + "step": 253950 + }, + { + "epoch": 2.1866391184573004, + "grad_norm": 1.7109375, + "learning_rate": 0.00011986150879303866, + "loss": 4.3154, + "step": 254000 + }, + { + "epoch": 2.18706955922865, + "grad_norm": 2.65625, + "learning_rate": 0.00011983495576626398, + "loss": 4.2244, + "step": 254050 + }, + { + "epoch": 2.1875, + "grad_norm": 4.4375, + "learning_rate": 0.00011980840128364619, + "loss": 4.1018, + "step": 254100 + }, + { + "epoch": 2.18793044077135, + "grad_norm": 2.453125, + "learning_rate": 0.00011978184534713441, + "loss": 4.4684, + "step": 254150 + }, + { + "epoch": 2.1883608815426996, + "grad_norm": 3.609375, + "learning_rate": 0.00011975528795867774, + "loss": 4.0454, + "step": 254200 + }, + { + "epoch": 2.1887913223140494, + "grad_norm": 3.171875, + "learning_rate": 0.00011972872912022541, + "loss": 4.4463, + "step": 254250 + }, + { + "epoch": 2.1892217630853996, + "grad_norm": 3.046875, + "learning_rate": 0.00011970216883372683, + "loss": 4.3833, + "step": 254300 + }, + { + "epoch": 2.1896522038567494, + "grad_norm": 2.4375, + "learning_rate": 0.00011967560710113143, + "loss": 4.3826, + "step": 254350 + }, + { + "epoch": 2.190082644628099, + "grad_norm": 2.46875, + "learning_rate": 0.00011964904392438883, + "loss": 4.7634, + "step": 254400 + }, + { + "epoch": 2.190513085399449, + "grad_norm": 3.328125, + "learning_rate": 0.00011962247930544866, + "loss": 4.87, + "step": 254450 + }, + { + "epoch": 2.190943526170799, + "grad_norm": 4.65625, + "learning_rate": 0.00011959591324626071, + "loss": 4.3389, + "step": 254500 + }, + { + "epoch": 2.1913739669421486, + "grad_norm": 2.96875, + "learning_rate": 0.00011956934574877494, + "loss": 4.2565, + "step": 254550 + }, + { + "epoch": 2.191804407713499, + "grad_norm": 2.859375, + "learning_rate": 0.00011954277681494126, + "loss": 4.7334, + "step": 254600 + }, + { + "epoch": 2.1922348484848486, + "grad_norm": 2.296875, + "learning_rate": 0.00011951620644670982, + "loss": 4.2053, + "step": 254650 + }, + { + "epoch": 2.1926652892561984, + "grad_norm": 2.609375, + "learning_rate": 0.00011948963464603084, + "loss": 4.2323, + "step": 254700 + }, + { + "epoch": 2.193095730027548, + "grad_norm": 3.40625, + "learning_rate": 0.00011946306141485457, + "loss": 4.4563, + "step": 254750 + }, + { + "epoch": 2.193526170798898, + "grad_norm": 2.65625, + "learning_rate": 0.00011943648675513148, + "loss": 4.5507, + "step": 254800 + }, + { + "epoch": 2.193956611570248, + "grad_norm": 3.859375, + "learning_rate": 0.00011940991066881208, + "loss": 4.3949, + "step": 254850 + }, + { + "epoch": 2.1943870523415976, + "grad_norm": 1.2578125, + "learning_rate": 0.00011938333315784702, + "loss": 4.3002, + "step": 254900 + }, + { + "epoch": 2.194817493112948, + "grad_norm": 4.0625, + "learning_rate": 0.000119356754224187, + "loss": 4.2587, + "step": 254950 + }, + { + "epoch": 2.1952479338842976, + "grad_norm": 7.03125, + "learning_rate": 0.00011933017386978284, + "loss": 4.3259, + "step": 255000 + }, + { + "epoch": 2.1952479338842976, + "eval_loss": 5.025729656219482, + "eval_runtime": 22.1733, + "eval_samples_per_second": 28.864, + "eval_steps_per_second": 14.432, + "eval_tts_loss": 7.315380834432901, + "step": 255000 + }, + { + "epoch": 2.1956783746556474, + "grad_norm": 3.65625, + "learning_rate": 0.00011930359209658555, + "loss": 4.4452, + "step": 255050 + }, + { + "epoch": 2.196108815426997, + "grad_norm": 1.78125, + "learning_rate": 0.00011927700890654609, + "loss": 4.5726, + "step": 255100 + }, + { + "epoch": 2.196539256198347, + "grad_norm": 3.453125, + "learning_rate": 0.00011925042430161568, + "loss": 4.2654, + "step": 255150 + }, + { + "epoch": 2.196969696969697, + "grad_norm": 2.828125, + "learning_rate": 0.00011922383828374555, + "loss": 4.309, + "step": 255200 + }, + { + "epoch": 2.197400137741047, + "grad_norm": 2.34375, + "learning_rate": 0.00011919725085488704, + "loss": 4.2618, + "step": 255250 + }, + { + "epoch": 2.197830578512397, + "grad_norm": 1.3515625, + "learning_rate": 0.00011917066201699161, + "loss": 3.8487, + "step": 255300 + }, + { + "epoch": 2.1982610192837466, + "grad_norm": 3.15625, + "learning_rate": 0.00011914407177201083, + "loss": 4.6028, + "step": 255350 + }, + { + "epoch": 2.1986914600550964, + "grad_norm": 2.03125, + "learning_rate": 0.0001191174801218964, + "loss": 3.9963, + "step": 255400 + }, + { + "epoch": 2.199121900826446, + "grad_norm": 2.0, + "learning_rate": 0.00011909088706860005, + "loss": 4.3256, + "step": 255450 + }, + { + "epoch": 2.199552341597796, + "grad_norm": 1.2578125, + "learning_rate": 0.00011906429261407366, + "loss": 4.2647, + "step": 255500 + }, + { + "epoch": 2.199982782369146, + "grad_norm": 3.265625, + "learning_rate": 0.00011903769676026921, + "loss": 4.1903, + "step": 255550 + }, + { + "epoch": 2.200413223140496, + "grad_norm": 2.890625, + "learning_rate": 0.0001190110995091388, + "loss": 4.6628, + "step": 255600 + }, + { + "epoch": 2.200843663911846, + "grad_norm": 4.21875, + "learning_rate": 0.00011898450086263455, + "loss": 4.3471, + "step": 255650 + }, + { + "epoch": 2.2012741046831956, + "grad_norm": 2.625, + "learning_rate": 0.0001189579008227088, + "loss": 4.4032, + "step": 255700 + }, + { + "epoch": 2.2017045454545454, + "grad_norm": 4.875, + "learning_rate": 0.00011893129939131395, + "loss": 4.3063, + "step": 255750 + }, + { + "epoch": 2.202134986225895, + "grad_norm": 2.453125, + "learning_rate": 0.00011890469657040242, + "loss": 4.4818, + "step": 255800 + }, + { + "epoch": 2.202565426997245, + "grad_norm": 4.5, + "learning_rate": 0.00011887809236192685, + "loss": 4.196, + "step": 255850 + }, + { + "epoch": 2.2029958677685952, + "grad_norm": 3.421875, + "learning_rate": 0.00011885148676783991, + "loss": 4.292, + "step": 255900 + }, + { + "epoch": 2.203426308539945, + "grad_norm": 3.640625, + "learning_rate": 0.00011882487979009443, + "loss": 4.293, + "step": 255950 + }, + { + "epoch": 2.203856749311295, + "grad_norm": 2.484375, + "learning_rate": 0.00011879827143064322, + "loss": 4.5005, + "step": 256000 + }, + { + "epoch": 2.2042871900826446, + "grad_norm": 4.78125, + "learning_rate": 0.00011877166169143936, + "loss": 4.572, + "step": 256050 + }, + { + "epoch": 2.2047176308539944, + "grad_norm": 4.03125, + "learning_rate": 0.00011874505057443594, + "loss": 4.3167, + "step": 256100 + }, + { + "epoch": 2.205148071625344, + "grad_norm": 4.46875, + "learning_rate": 0.00011871843808158609, + "loss": 4.5634, + "step": 256150 + }, + { + "epoch": 2.205578512396694, + "grad_norm": 4.5, + "learning_rate": 0.00011869182421484319, + "loss": 4.4854, + "step": 256200 + }, + { + "epoch": 2.2060089531680442, + "grad_norm": 5.59375, + "learning_rate": 0.00011866520897616063, + "loss": 4.331, + "step": 256250 + }, + { + "epoch": 2.206439393939394, + "grad_norm": 1.0546875, + "learning_rate": 0.00011863859236749186, + "loss": 4.5912, + "step": 256300 + }, + { + "epoch": 2.206869834710744, + "grad_norm": 2.890625, + "learning_rate": 0.00011861197439079053, + "loss": 4.2557, + "step": 256350 + }, + { + "epoch": 2.2073002754820936, + "grad_norm": 2.25, + "learning_rate": 0.00011858535504801031, + "loss": 4.0516, + "step": 256400 + }, + { + "epoch": 2.2077307162534434, + "grad_norm": 3.078125, + "learning_rate": 0.00011855873434110503, + "loss": 4.2145, + "step": 256450 + }, + { + "epoch": 2.208161157024793, + "grad_norm": 2.609375, + "learning_rate": 0.00011853211227202856, + "loss": 4.7137, + "step": 256500 + }, + { + "epoch": 2.2085915977961434, + "grad_norm": 2.1875, + "learning_rate": 0.00011850548884273493, + "loss": 4.6057, + "step": 256550 + }, + { + "epoch": 2.2090220385674932, + "grad_norm": 3.078125, + "learning_rate": 0.00011847886405517828, + "loss": 4.1449, + "step": 256600 + }, + { + "epoch": 2.209452479338843, + "grad_norm": 3.046875, + "learning_rate": 0.0001184522379113127, + "loss": 4.3742, + "step": 256650 + }, + { + "epoch": 2.209882920110193, + "grad_norm": 4.59375, + "learning_rate": 0.00011842561041309257, + "loss": 4.622, + "step": 256700 + }, + { + "epoch": 2.2103133608815426, + "grad_norm": 1.890625, + "learning_rate": 0.00011839898156247231, + "loss": 4.4993, + "step": 256750 + }, + { + "epoch": 2.2107438016528924, + "grad_norm": 5.375, + "learning_rate": 0.00011837235136140636, + "loss": 4.4557, + "step": 256800 + }, + { + "epoch": 2.211174242424242, + "grad_norm": 3.390625, + "learning_rate": 0.00011834571981184939, + "loss": 3.9743, + "step": 256850 + }, + { + "epoch": 2.2116046831955924, + "grad_norm": 3.65625, + "learning_rate": 0.00011831908691575602, + "loss": 4.6455, + "step": 256900 + }, + { + "epoch": 2.2120351239669422, + "grad_norm": 4.71875, + "learning_rate": 0.00011829245267508109, + "loss": 4.3311, + "step": 256950 + }, + { + "epoch": 2.212465564738292, + "grad_norm": 0.87109375, + "learning_rate": 0.00011826581709177951, + "loss": 4.3819, + "step": 257000 + }, + { + "epoch": 2.212896005509642, + "grad_norm": 3.140625, + "learning_rate": 0.00011823918016780622, + "loss": 4.3464, + "step": 257050 + }, + { + "epoch": 2.2133264462809916, + "grad_norm": 3.09375, + "learning_rate": 0.00011821254190511637, + "loss": 4.4691, + "step": 257100 + }, + { + "epoch": 2.2137568870523414, + "grad_norm": 1.7578125, + "learning_rate": 0.00011818590230566516, + "loss": 4.3192, + "step": 257150 + }, + { + "epoch": 2.2141873278236917, + "grad_norm": 2.734375, + "learning_rate": 0.00011815926137140778, + "loss": 4.4428, + "step": 257200 + }, + { + "epoch": 2.2146177685950414, + "grad_norm": 2.875, + "learning_rate": 0.00011813261910429974, + "loss": 4.4558, + "step": 257250 + }, + { + "epoch": 2.2150482093663912, + "grad_norm": 3.453125, + "learning_rate": 0.00011810597550629645, + "loss": 4.2951, + "step": 257300 + }, + { + "epoch": 2.215478650137741, + "grad_norm": 4.125, + "learning_rate": 0.00011807933057935352, + "loss": 4.1378, + "step": 257350 + }, + { + "epoch": 2.215909090909091, + "grad_norm": 4.6875, + "learning_rate": 0.00011805268432542662, + "loss": 4.3002, + "step": 257400 + }, + { + "epoch": 2.2163395316804406, + "grad_norm": 2.8125, + "learning_rate": 0.00011802603674647155, + "loss": 4.3459, + "step": 257450 + }, + { + "epoch": 2.2167699724517904, + "grad_norm": 3.375, + "learning_rate": 0.00011799938784444415, + "loss": 4.1584, + "step": 257500 + }, + { + "epoch": 2.2172004132231407, + "grad_norm": 2.578125, + "learning_rate": 0.00011797273762130041, + "loss": 4.0454, + "step": 257550 + }, + { + "epoch": 2.2176308539944904, + "grad_norm": 2.953125, + "learning_rate": 0.0001179460860789964, + "loss": 4.5808, + "step": 257600 + }, + { + "epoch": 2.2180612947658402, + "grad_norm": 3.90625, + "learning_rate": 0.00011791943321948828, + "loss": 4.4137, + "step": 257650 + }, + { + "epoch": 2.21849173553719, + "grad_norm": 3.125, + "learning_rate": 0.0001178927790447323, + "loss": 4.6564, + "step": 257700 + }, + { + "epoch": 2.21892217630854, + "grad_norm": 2.84375, + "learning_rate": 0.00011786612355668488, + "loss": 4.3404, + "step": 257750 + }, + { + "epoch": 2.2193526170798896, + "grad_norm": 4.96875, + "learning_rate": 0.0001178394667573024, + "loss": 4.0181, + "step": 257800 + }, + { + "epoch": 2.21978305785124, + "grad_norm": 2.375, + "learning_rate": 0.00011781280864854143, + "loss": 4.3928, + "step": 257850 + }, + { + "epoch": 2.2202134986225897, + "grad_norm": 2.84375, + "learning_rate": 0.00011778614923235866, + "loss": 4.2034, + "step": 257900 + }, + { + "epoch": 2.2206439393939394, + "grad_norm": 1.8359375, + "learning_rate": 0.00011775948851071079, + "loss": 4.3024, + "step": 257950 + }, + { + "epoch": 2.2210743801652892, + "grad_norm": 3.265625, + "learning_rate": 0.00011773282648555465, + "loss": 4.1851, + "step": 258000 + }, + { + "epoch": 2.2210743801652892, + "eval_loss": 5.025103569030762, + "eval_runtime": 24.1477, + "eval_samples_per_second": 26.504, + "eval_steps_per_second": 13.252, + "eval_tts_loss": 7.391507342525869, + "step": 258000 + }, + { + "epoch": 2.221504820936639, + "grad_norm": 3.234375, + "learning_rate": 0.00011770616315884726, + "loss": 4.6283, + "step": 258050 + }, + { + "epoch": 2.221935261707989, + "grad_norm": 3.25, + "learning_rate": 0.00011767949853254554, + "loss": 4.5709, + "step": 258100 + }, + { + "epoch": 2.2223657024793386, + "grad_norm": 2.328125, + "learning_rate": 0.00011765283260860671, + "loss": 4.2582, + "step": 258150 + }, + { + "epoch": 2.222796143250689, + "grad_norm": 3.359375, + "learning_rate": 0.0001176261653889879, + "loss": 4.1063, + "step": 258200 + }, + { + "epoch": 2.2232265840220387, + "grad_norm": 0.9453125, + "learning_rate": 0.00011759949687564646, + "loss": 4.2151, + "step": 258250 + }, + { + "epoch": 2.2236570247933884, + "grad_norm": 5.3125, + "learning_rate": 0.0001175728270705399, + "loss": 4.4871, + "step": 258300 + }, + { + "epoch": 2.2240874655647382, + "grad_norm": 4.25, + "learning_rate": 0.00011754615597562557, + "loss": 4.1392, + "step": 258350 + }, + { + "epoch": 2.224517906336088, + "grad_norm": 4.59375, + "learning_rate": 0.00011751948359286117, + "loss": 4.1695, + "step": 258400 + }, + { + "epoch": 2.224948347107438, + "grad_norm": 2.078125, + "learning_rate": 0.00011749280992420436, + "loss": 4.7462, + "step": 258450 + }, + { + "epoch": 2.225378787878788, + "grad_norm": 2.65625, + "learning_rate": 0.00011746613497161294, + "loss": 4.5731, + "step": 258500 + }, + { + "epoch": 2.225809228650138, + "grad_norm": 3.359375, + "learning_rate": 0.00011743945873704478, + "loss": 4.0189, + "step": 258550 + }, + { + "epoch": 2.2262396694214877, + "grad_norm": 4.15625, + "learning_rate": 0.00011741278122245789, + "loss": 4.1192, + "step": 258600 + }, + { + "epoch": 2.2266701101928374, + "grad_norm": 1.4453125, + "learning_rate": 0.00011738610242981034, + "loss": 4.2686, + "step": 258650 + }, + { + "epoch": 2.2271005509641872, + "grad_norm": 2.03125, + "learning_rate": 0.00011735942236106023, + "loss": 4.2397, + "step": 258700 + }, + { + "epoch": 2.227530991735537, + "grad_norm": 2.109375, + "learning_rate": 0.00011733274101816589, + "loss": 4.4109, + "step": 258750 + }, + { + "epoch": 2.227961432506887, + "grad_norm": 3.578125, + "learning_rate": 0.00011730605840308566, + "loss": 4.3267, + "step": 258800 + }, + { + "epoch": 2.228391873278237, + "grad_norm": 0.7265625, + "learning_rate": 0.00011727937451777797, + "loss": 4.4647, + "step": 258850 + }, + { + "epoch": 2.228822314049587, + "grad_norm": 4.84375, + "learning_rate": 0.00011725268936420138, + "loss": 4.3085, + "step": 258900 + }, + { + "epoch": 2.2292527548209367, + "grad_norm": 4.71875, + "learning_rate": 0.00011722600294431449, + "loss": 4.588, + "step": 258950 + }, + { + "epoch": 2.2296831955922864, + "grad_norm": 3.90625, + "learning_rate": 0.00011719931526007606, + "loss": 4.2792, + "step": 259000 + }, + { + "epoch": 2.2301136363636362, + "grad_norm": 2.4375, + "learning_rate": 0.00011717262631344492, + "loss": 4.5527, + "step": 259050 + }, + { + "epoch": 2.230544077134986, + "grad_norm": 1.6796875, + "learning_rate": 0.00011714593610637992, + "loss": 4.4178, + "step": 259100 + }, + { + "epoch": 2.2309745179063363, + "grad_norm": 1.7109375, + "learning_rate": 0.0001171192446408401, + "loss": 4.8266, + "step": 259150 + }, + { + "epoch": 2.231404958677686, + "grad_norm": 4.90625, + "learning_rate": 0.00011709255191878458, + "loss": 3.7358, + "step": 259200 + }, + { + "epoch": 2.231835399449036, + "grad_norm": 3.15625, + "learning_rate": 0.00011706585794217248, + "loss": 4.3325, + "step": 259250 + }, + { + "epoch": 2.2322658402203857, + "grad_norm": 5.53125, + "learning_rate": 0.00011703916271296318, + "loss": 4.5312, + "step": 259300 + }, + { + "epoch": 2.2326962809917354, + "grad_norm": 1.7890625, + "learning_rate": 0.00011701246623311597, + "loss": 4.2548, + "step": 259350 + }, + { + "epoch": 2.2331267217630852, + "grad_norm": 4.15625, + "learning_rate": 0.00011698576850459036, + "loss": 4.7601, + "step": 259400 + }, + { + "epoch": 2.233557162534435, + "grad_norm": 2.5, + "learning_rate": 0.00011695906952934585, + "loss": 4.361, + "step": 259450 + }, + { + "epoch": 2.2339876033057853, + "grad_norm": 3.21875, + "learning_rate": 0.00011693236930934217, + "loss": 4.277, + "step": 259500 + }, + { + "epoch": 2.234418044077135, + "grad_norm": 1.78125, + "learning_rate": 0.000116905667846539, + "loss": 4.5201, + "step": 259550 + }, + { + "epoch": 2.234848484848485, + "grad_norm": 4.6875, + "learning_rate": 0.00011687896514289616, + "loss": 4.5688, + "step": 259600 + }, + { + "epoch": 2.2352789256198347, + "grad_norm": 3.4375, + "learning_rate": 0.00011685226120037363, + "loss": 4.4471, + "step": 259650 + }, + { + "epoch": 2.2357093663911844, + "grad_norm": 4.46875, + "learning_rate": 0.00011682555602093134, + "loss": 4.4415, + "step": 259700 + }, + { + "epoch": 2.2361398071625342, + "grad_norm": 2.84375, + "learning_rate": 0.00011679884960652947, + "loss": 4.5308, + "step": 259750 + }, + { + "epoch": 2.2365702479338845, + "grad_norm": 3.03125, + "learning_rate": 0.00011677214195912815, + "loss": 4.6011, + "step": 259800 + }, + { + "epoch": 2.2370006887052343, + "grad_norm": 3.34375, + "learning_rate": 0.00011674543308068777, + "loss": 4.5214, + "step": 259850 + }, + { + "epoch": 2.237431129476584, + "grad_norm": 6.3125, + "learning_rate": 0.00011671872297316857, + "loss": 4.1105, + "step": 259900 + }, + { + "epoch": 2.237861570247934, + "grad_norm": 3.671875, + "learning_rate": 0.0001166920116385311, + "loss": 4.0888, + "step": 259950 + }, + { + "epoch": 2.2382920110192837, + "grad_norm": 3.0, + "learning_rate": 0.00011666529907873587, + "loss": 4.1547, + "step": 260000 + }, + { + "epoch": 2.2387224517906334, + "grad_norm": 2.96875, + "learning_rate": 0.00011663858529574354, + "loss": 4.5505, + "step": 260050 + }, + { + "epoch": 2.2391528925619832, + "grad_norm": 3.71875, + "learning_rate": 0.00011661187029151488, + "loss": 4.7665, + "step": 260100 + }, + { + "epoch": 2.2395833333333335, + "grad_norm": 2.109375, + "learning_rate": 0.00011658515406801065, + "loss": 4.4342, + "step": 260150 + }, + { + "epoch": 2.2400137741046833, + "grad_norm": 0.99609375, + "learning_rate": 0.00011655843662719183, + "loss": 4.1525, + "step": 260200 + }, + { + "epoch": 2.240444214876033, + "grad_norm": 4.6875, + "learning_rate": 0.00011653171797101936, + "loss": 4.8385, + "step": 260250 + }, + { + "epoch": 2.240874655647383, + "grad_norm": 3.8125, + "learning_rate": 0.00011650499810145437, + "loss": 4.3841, + "step": 260300 + }, + { + "epoch": 2.2413050964187327, + "grad_norm": 2.71875, + "learning_rate": 0.00011647827702045801, + "loss": 4.4282, + "step": 260350 + }, + { + "epoch": 2.2417355371900825, + "grad_norm": 3.5, + "learning_rate": 0.00011645155472999161, + "loss": 4.2831, + "step": 260400 + }, + { + "epoch": 2.2421659779614327, + "grad_norm": 2.5625, + "learning_rate": 0.00011642483123201647, + "loss": 4.017, + "step": 260450 + }, + { + "epoch": 2.2425964187327825, + "grad_norm": 2.84375, + "learning_rate": 0.00011639810652849403, + "loss": 3.8839, + "step": 260500 + }, + { + "epoch": 2.2430268595041323, + "grad_norm": 1.890625, + "learning_rate": 0.00011637138062138588, + "loss": 3.9701, + "step": 260550 + }, + { + "epoch": 2.243457300275482, + "grad_norm": 3.5625, + "learning_rate": 0.00011634465351265364, + "loss": 4.311, + "step": 260600 + }, + { + "epoch": 2.243887741046832, + "grad_norm": 4.375, + "learning_rate": 0.00011631792520425895, + "loss": 4.4238, + "step": 260650 + }, + { + "epoch": 2.2443181818181817, + "grad_norm": 2.65625, + "learning_rate": 0.00011629119569816369, + "loss": 4.2005, + "step": 260700 + }, + { + "epoch": 2.2447486225895315, + "grad_norm": 2.171875, + "learning_rate": 0.0001162644649963297, + "loss": 4.2634, + "step": 260750 + }, + { + "epoch": 2.2451790633608817, + "grad_norm": 3.0625, + "learning_rate": 0.00011623773310071897, + "loss": 4.2966, + "step": 260800 + }, + { + "epoch": 2.2456095041322315, + "grad_norm": 3.1875, + "learning_rate": 0.0001162110000132936, + "loss": 4.3746, + "step": 260850 + }, + { + "epoch": 2.2460399449035813, + "grad_norm": 2.953125, + "learning_rate": 0.00011618426573601568, + "loss": 4.6821, + "step": 260900 + }, + { + "epoch": 2.246470385674931, + "grad_norm": 2.703125, + "learning_rate": 0.00011615753027084747, + "loss": 4.4889, + "step": 260950 + }, + { + "epoch": 2.246900826446281, + "grad_norm": 3.40625, + "learning_rate": 0.00011613079361975132, + "loss": 4.1965, + "step": 261000 + }, + { + "epoch": 2.246900826446281, + "eval_loss": 5.025378227233887, + "eval_runtime": 25.0094, + "eval_samples_per_second": 25.59, + "eval_steps_per_second": 12.795, + "eval_tts_loss": 7.371634570443995, + "step": 261000 + }, + { + "epoch": 2.2473312672176307, + "grad_norm": 1.5703125, + "learning_rate": 0.00011610405578468964, + "loss": 4.2573, + "step": 261050 + }, + { + "epoch": 2.247761707988981, + "grad_norm": 2.78125, + "learning_rate": 0.00011607731676762487, + "loss": 4.4789, + "step": 261100 + }, + { + "epoch": 2.2481921487603307, + "grad_norm": 2.625, + "learning_rate": 0.00011605057657051967, + "loss": 4.5357, + "step": 261150 + }, + { + "epoch": 2.2486225895316805, + "grad_norm": 1.0546875, + "learning_rate": 0.00011602383519533669, + "loss": 4.294, + "step": 261200 + }, + { + "epoch": 2.2490530303030303, + "grad_norm": 0.6953125, + "learning_rate": 0.00011599709264403866, + "loss": 4.2997, + "step": 261250 + }, + { + "epoch": 2.24948347107438, + "grad_norm": 2.5, + "learning_rate": 0.00011597034891858844, + "loss": 4.3767, + "step": 261300 + }, + { + "epoch": 2.24991391184573, + "grad_norm": 2.75, + "learning_rate": 0.00011594360402094899, + "loss": 4.4321, + "step": 261350 + }, + { + "epoch": 2.2503443526170797, + "grad_norm": 3.234375, + "learning_rate": 0.00011591685795308334, + "loss": 4.4887, + "step": 261400 + }, + { + "epoch": 2.25077479338843, + "grad_norm": 2.71875, + "learning_rate": 0.0001158901107169545, + "loss": 4.2412, + "step": 261450 + }, + { + "epoch": 2.2512052341597797, + "grad_norm": 3.125, + "learning_rate": 0.00011586336231452576, + "loss": 4.3327, + "step": 261500 + }, + { + "epoch": 2.2516356749311295, + "grad_norm": 4.8125, + "learning_rate": 0.00011583661274776036, + "loss": 4.2752, + "step": 261550 + }, + { + "epoch": 2.2520661157024793, + "grad_norm": 6.875, + "learning_rate": 0.00011580986201862163, + "loss": 4.8654, + "step": 261600 + }, + { + "epoch": 2.252496556473829, + "grad_norm": 3.65625, + "learning_rate": 0.00011578311012907306, + "loss": 4.4581, + "step": 261650 + }, + { + "epoch": 2.2529269972451793, + "grad_norm": 1.6796875, + "learning_rate": 0.00011575635708107815, + "loss": 3.9214, + "step": 261700 + }, + { + "epoch": 2.253357438016529, + "grad_norm": 3.125, + "learning_rate": 0.00011572960287660057, + "loss": 4.491, + "step": 261750 + }, + { + "epoch": 2.253787878787879, + "grad_norm": 3.234375, + "learning_rate": 0.00011570284751760395, + "loss": 4.59, + "step": 261800 + }, + { + "epoch": 2.2542183195592287, + "grad_norm": 3.546875, + "learning_rate": 0.0001156760910060521, + "loss": 4.3888, + "step": 261850 + }, + { + "epoch": 2.2546487603305785, + "grad_norm": 3.1875, + "learning_rate": 0.00011564933334390893, + "loss": 4.4528, + "step": 261900 + }, + { + "epoch": 2.2550792011019283, + "grad_norm": 4.40625, + "learning_rate": 0.00011562257453313833, + "loss": 4.2057, + "step": 261950 + }, + { + "epoch": 2.255509641873278, + "grad_norm": 4.5625, + "learning_rate": 0.00011559581457570441, + "loss": 4.2181, + "step": 262000 + }, + { + "epoch": 2.255940082644628, + "grad_norm": 3.921875, + "learning_rate": 0.00011556905347357121, + "loss": 4.5553, + "step": 262050 + }, + { + "epoch": 2.256370523415978, + "grad_norm": 2.953125, + "learning_rate": 0.00011554229122870302, + "loss": 4.434, + "step": 262100 + }, + { + "epoch": 2.256800964187328, + "grad_norm": 3.21875, + "learning_rate": 0.00011551552784306408, + "loss": 4.2748, + "step": 262150 + }, + { + "epoch": 2.2572314049586777, + "grad_norm": 4.03125, + "learning_rate": 0.00011548876331861875, + "loss": 4.2321, + "step": 262200 + }, + { + "epoch": 2.2576618457300275, + "grad_norm": 2.296875, + "learning_rate": 0.00011546199765733158, + "loss": 4.338, + "step": 262250 + }, + { + "epoch": 2.2580922865013773, + "grad_norm": 3.140625, + "learning_rate": 0.00011543523086116701, + "loss": 4.031, + "step": 262300 + }, + { + "epoch": 2.2585227272727275, + "grad_norm": 2.96875, + "learning_rate": 0.00011540846293208969, + "loss": 4.0706, + "step": 262350 + }, + { + "epoch": 2.2589531680440773, + "grad_norm": 3.40625, + "learning_rate": 0.00011538169387206436, + "loss": 4.5887, + "step": 262400 + }, + { + "epoch": 2.259383608815427, + "grad_norm": 2.71875, + "learning_rate": 0.00011535492368305583, + "loss": 4.5115, + "step": 262450 + }, + { + "epoch": 2.259814049586777, + "grad_norm": 2.75, + "learning_rate": 0.0001153281523670289, + "loss": 4.2577, + "step": 262500 + }, + { + "epoch": 2.2602444903581267, + "grad_norm": 3.75, + "learning_rate": 0.00011530137992594856, + "loss": 4.6653, + "step": 262550 + }, + { + "epoch": 2.2606749311294765, + "grad_norm": 4.6875, + "learning_rate": 0.00011527460636177987, + "loss": 4.4077, + "step": 262600 + }, + { + "epoch": 2.2611053719008263, + "grad_norm": 2.390625, + "learning_rate": 0.00011524783167648793, + "loss": 4.6283, + "step": 262650 + }, + { + "epoch": 2.261535812672176, + "grad_norm": 2.40625, + "learning_rate": 0.00011522105587203795, + "loss": 4.2903, + "step": 262700 + }, + { + "epoch": 2.2619662534435263, + "grad_norm": 4.34375, + "learning_rate": 0.00011519427895039524, + "loss": 4.0598, + "step": 262750 + }, + { + "epoch": 2.262396694214876, + "grad_norm": 2.875, + "learning_rate": 0.00011516750091352513, + "loss": 4.5757, + "step": 262800 + }, + { + "epoch": 2.262827134986226, + "grad_norm": 3.609375, + "learning_rate": 0.00011514072176339308, + "loss": 4.6233, + "step": 262850 + }, + { + "epoch": 2.2632575757575757, + "grad_norm": 3.375, + "learning_rate": 0.00011511394150196464, + "loss": 4.5223, + "step": 262900 + }, + { + "epoch": 2.2636880165289255, + "grad_norm": 3.1875, + "learning_rate": 0.00011508716013120539, + "loss": 4.5717, + "step": 262950 + }, + { + "epoch": 2.2641184573002757, + "grad_norm": 1.875, + "learning_rate": 0.00011506037765308106, + "loss": 4.1961, + "step": 263000 + }, + { + "epoch": 2.2645488980716255, + "grad_norm": 2.375, + "learning_rate": 0.00011503359406955743, + "loss": 4.1396, + "step": 263050 + }, + { + "epoch": 2.2649793388429753, + "grad_norm": 3.953125, + "learning_rate": 0.00011500680938260033, + "loss": 4.0465, + "step": 263100 + }, + { + "epoch": 2.265409779614325, + "grad_norm": 2.75, + "learning_rate": 0.00011498002359417573, + "loss": 4.1431, + "step": 263150 + }, + { + "epoch": 2.265840220385675, + "grad_norm": 2.0625, + "learning_rate": 0.00011495323670624958, + "loss": 4.1064, + "step": 263200 + }, + { + "epoch": 2.2662706611570247, + "grad_norm": 4.90625, + "learning_rate": 0.00011492644872078804, + "loss": 4.2736, + "step": 263250 + }, + { + "epoch": 2.2667011019283745, + "grad_norm": 4.1875, + "learning_rate": 0.00011489965963975733, + "loss": 4.3057, + "step": 263300 + }, + { + "epoch": 2.2671315426997243, + "grad_norm": 3.671875, + "learning_rate": 0.00011487286946512362, + "loss": 4.1887, + "step": 263350 + }, + { + "epoch": 2.2675619834710745, + "grad_norm": 3.6875, + "learning_rate": 0.0001148460781988533, + "loss": 4.6514, + "step": 263400 + }, + { + "epoch": 2.2679924242424243, + "grad_norm": 3.46875, + "learning_rate": 0.00011481928584291277, + "loss": 4.422, + "step": 263450 + }, + { + "epoch": 2.268422865013774, + "grad_norm": 3.046875, + "learning_rate": 0.00011479249239926854, + "loss": 4.3908, + "step": 263500 + }, + { + "epoch": 2.268853305785124, + "grad_norm": 3.359375, + "learning_rate": 0.00011476569786988722, + "loss": 4.4813, + "step": 263550 + }, + { + "epoch": 2.2692837465564737, + "grad_norm": 2.953125, + "learning_rate": 0.0001147389022567354, + "loss": 4.2909, + "step": 263600 + }, + { + "epoch": 2.269714187327824, + "grad_norm": 2.453125, + "learning_rate": 0.00011471210556177992, + "loss": 4.3768, + "step": 263650 + }, + { + "epoch": 2.2701446280991737, + "grad_norm": 1.5078125, + "learning_rate": 0.00011468530778698751, + "loss": 4.2772, + "step": 263700 + }, + { + "epoch": 2.2705750688705235, + "grad_norm": 3.703125, + "learning_rate": 0.0001146585089343251, + "loss": 4.3767, + "step": 263750 + }, + { + "epoch": 2.2710055096418733, + "grad_norm": 4.59375, + "learning_rate": 0.00011463170900575972, + "loss": 4.4768, + "step": 263800 + }, + { + "epoch": 2.271435950413223, + "grad_norm": 2.15625, + "learning_rate": 0.00011460490800325836, + "loss": 4.2622, + "step": 263850 + }, + { + "epoch": 2.271866391184573, + "grad_norm": 2.625, + "learning_rate": 0.00011457810592878815, + "loss": 4.4093, + "step": 263900 + }, + { + "epoch": 2.2722968319559227, + "grad_norm": 3.765625, + "learning_rate": 0.00011455130278431635, + "loss": 4.5669, + "step": 263950 + }, + { + "epoch": 2.2727272727272725, + "grad_norm": 4.59375, + "learning_rate": 0.00011452449857181022, + "loss": 4.2765, + "step": 264000 + }, + { + "epoch": 2.2727272727272725, + "eval_loss": 5.020726203918457, + "eval_runtime": 24.0931, + "eval_samples_per_second": 26.564, + "eval_steps_per_second": 13.282, + "eval_tts_loss": 7.375918081878187, + "step": 264000 + }, + { + "epoch": 2.2731577134986227, + "grad_norm": 2.3125, + "learning_rate": 0.00011449769329323719, + "loss": 4.144, + "step": 264050 + }, + { + "epoch": 2.2735881542699725, + "grad_norm": 2.5, + "learning_rate": 0.00011447088695056461, + "loss": 4.5033, + "step": 264100 + }, + { + "epoch": 2.2740185950413223, + "grad_norm": 0.78515625, + "learning_rate": 0.00011444407954576008, + "loss": 4.4551, + "step": 264150 + }, + { + "epoch": 2.274449035812672, + "grad_norm": 2.0, + "learning_rate": 0.00011441727108079118, + "loss": 4.5068, + "step": 264200 + }, + { + "epoch": 2.274879476584022, + "grad_norm": 2.421875, + "learning_rate": 0.00011439046155762562, + "loss": 4.0674, + "step": 264250 + }, + { + "epoch": 2.275309917355372, + "grad_norm": 3.515625, + "learning_rate": 0.00011436365097823113, + "loss": 4.3386, + "step": 264300 + }, + { + "epoch": 2.275740358126722, + "grad_norm": 3.34375, + "learning_rate": 0.0001143368393445756, + "loss": 4.5215, + "step": 264350 + }, + { + "epoch": 2.2761707988980717, + "grad_norm": 2.296875, + "learning_rate": 0.00011431002665862684, + "loss": 4.0957, + "step": 264400 + }, + { + "epoch": 2.2766012396694215, + "grad_norm": 2.578125, + "learning_rate": 0.00011428321292235298, + "loss": 4.5167, + "step": 264450 + }, + { + "epoch": 2.2770316804407713, + "grad_norm": 3.34375, + "learning_rate": 0.000114256398137722, + "loss": 4.1995, + "step": 264500 + }, + { + "epoch": 2.277462121212121, + "grad_norm": 3.125, + "learning_rate": 0.00011422958230670204, + "loss": 4.5269, + "step": 264550 + }, + { + "epoch": 2.277892561983471, + "grad_norm": 3.078125, + "learning_rate": 0.00011420276543126139, + "loss": 4.659, + "step": 264600 + }, + { + "epoch": 2.2783230027548207, + "grad_norm": 3.234375, + "learning_rate": 0.00011417594751336831, + "loss": 4.4911, + "step": 264650 + }, + { + "epoch": 2.278753443526171, + "grad_norm": 2.84375, + "learning_rate": 0.00011414912855499118, + "loss": 4.1821, + "step": 264700 + }, + { + "epoch": 2.2791838842975207, + "grad_norm": 2.140625, + "learning_rate": 0.00011412230855809843, + "loss": 4.5744, + "step": 264750 + }, + { + "epoch": 2.2796143250688705, + "grad_norm": 2.0, + "learning_rate": 0.00011409548752465866, + "loss": 4.463, + "step": 264800 + }, + { + "epoch": 2.2800447658402203, + "grad_norm": 3.046875, + "learning_rate": 0.00011406866545664041, + "loss": 4.1471, + "step": 264850 + }, + { + "epoch": 2.28047520661157, + "grad_norm": 3.078125, + "learning_rate": 0.00011404184235601236, + "loss": 4.2949, + "step": 264900 + }, + { + "epoch": 2.2809056473829203, + "grad_norm": 2.125, + "learning_rate": 0.00011401501822474334, + "loss": 4.3699, + "step": 264950 + }, + { + "epoch": 2.28133608815427, + "grad_norm": 2.40625, + "learning_rate": 0.00011398819306480214, + "loss": 4.3122, + "step": 265000 + }, + { + "epoch": 2.28176652892562, + "grad_norm": 3.671875, + "learning_rate": 0.00011396136687815762, + "loss": 4.395, + "step": 265050 + }, + { + "epoch": 2.2821969696969697, + "grad_norm": 4.09375, + "learning_rate": 0.00011393453966677883, + "loss": 4.5916, + "step": 265100 + }, + { + "epoch": 2.2826274104683195, + "grad_norm": 2.703125, + "learning_rate": 0.00011390771143263481, + "loss": 4.291, + "step": 265150 + }, + { + "epoch": 2.2830578512396693, + "grad_norm": 3.25, + "learning_rate": 0.0001138808821776947, + "loss": 4.1604, + "step": 265200 + }, + { + "epoch": 2.283488292011019, + "grad_norm": 3.296875, + "learning_rate": 0.00011385405190392769, + "loss": 4.2222, + "step": 265250 + }, + { + "epoch": 2.283918732782369, + "grad_norm": 4.40625, + "learning_rate": 0.00011382722061330306, + "loss": 4.3219, + "step": 265300 + }, + { + "epoch": 2.284349173553719, + "grad_norm": 2.625, + "learning_rate": 0.00011380038830779025, + "loss": 4.4364, + "step": 265350 + }, + { + "epoch": 2.284779614325069, + "grad_norm": 2.90625, + "learning_rate": 0.00011377355498935857, + "loss": 3.9465, + "step": 265400 + }, + { + "epoch": 2.2852100550964187, + "grad_norm": 2.75, + "learning_rate": 0.00011374672065997761, + "loss": 4.1925, + "step": 265450 + }, + { + "epoch": 2.2856404958677685, + "grad_norm": 3.796875, + "learning_rate": 0.00011371988532161697, + "loss": 3.9821, + "step": 265500 + }, + { + "epoch": 2.2860709366391183, + "grad_norm": 2.890625, + "learning_rate": 0.00011369304897624622, + "loss": 4.3449, + "step": 265550 + }, + { + "epoch": 2.2865013774104685, + "grad_norm": 5.625, + "learning_rate": 0.00011366621162583515, + "loss": 4.4995, + "step": 265600 + }, + { + "epoch": 2.2869318181818183, + "grad_norm": 4.875, + "learning_rate": 0.00011363937327235352, + "loss": 4.4606, + "step": 265650 + }, + { + "epoch": 2.287362258953168, + "grad_norm": 2.546875, + "learning_rate": 0.00011361253391777126, + "loss": 4.1662, + "step": 265700 + }, + { + "epoch": 2.287792699724518, + "grad_norm": 4.59375, + "learning_rate": 0.00011358569356405832, + "loss": 4.0176, + "step": 265750 + }, + { + "epoch": 2.2882231404958677, + "grad_norm": 2.921875, + "learning_rate": 0.00011355885221318468, + "loss": 4.3935, + "step": 265800 + }, + { + "epoch": 2.2886535812672175, + "grad_norm": 1.859375, + "learning_rate": 0.00011353200986712046, + "loss": 4.544, + "step": 265850 + }, + { + "epoch": 2.2890840220385673, + "grad_norm": 6.25, + "learning_rate": 0.00011350516652783585, + "loss": 4.2888, + "step": 265900 + }, + { + "epoch": 2.289514462809917, + "grad_norm": 1.0703125, + "learning_rate": 0.00011347832219730104, + "loss": 4.6411, + "step": 265950 + }, + { + "epoch": 2.2899449035812673, + "grad_norm": 3.921875, + "learning_rate": 0.0001134514768774864, + "loss": 3.9569, + "step": 266000 + }, + { + "epoch": 2.290375344352617, + "grad_norm": 3.453125, + "learning_rate": 0.00011342463057036233, + "loss": 4.2417, + "step": 266050 + }, + { + "epoch": 2.290805785123967, + "grad_norm": 3.53125, + "learning_rate": 0.00011339778327789923, + "loss": 4.1836, + "step": 266100 + }, + { + "epoch": 2.2912362258953167, + "grad_norm": 1.6875, + "learning_rate": 0.00011337093500206764, + "loss": 4.4778, + "step": 266150 + }, + { + "epoch": 2.2916666666666665, + "grad_norm": 2.296875, + "learning_rate": 0.00011334408574483821, + "loss": 4.4123, + "step": 266200 + }, + { + "epoch": 2.2920971074380168, + "grad_norm": 2.890625, + "learning_rate": 0.00011331723550818161, + "loss": 4.4007, + "step": 266250 + }, + { + "epoch": 2.2925275482093666, + "grad_norm": 3.71875, + "learning_rate": 0.00011329038429406855, + "loss": 4.6705, + "step": 266300 + }, + { + "epoch": 2.2929579889807163, + "grad_norm": 2.34375, + "learning_rate": 0.00011326353210446992, + "loss": 4.2245, + "step": 266350 + }, + { + "epoch": 2.293388429752066, + "grad_norm": 3.484375, + "learning_rate": 0.00011323667894135654, + "loss": 4.6366, + "step": 266400 + }, + { + "epoch": 2.293818870523416, + "grad_norm": 3.34375, + "learning_rate": 0.0001132098248066994, + "loss": 4.3999, + "step": 266450 + }, + { + "epoch": 2.2942493112947657, + "grad_norm": 2.65625, + "learning_rate": 0.00011318296970246956, + "loss": 4.2901, + "step": 266500 + }, + { + "epoch": 2.2946797520661155, + "grad_norm": 1.484375, + "learning_rate": 0.0001131561136306381, + "loss": 4.2358, + "step": 266550 + }, + { + "epoch": 2.2951101928374658, + "grad_norm": 2.640625, + "learning_rate": 0.00011312925659317617, + "loss": 4.4223, + "step": 266600 + }, + { + "epoch": 2.2955406336088156, + "grad_norm": 2.96875, + "learning_rate": 0.00011310239859205509, + "loss": 4.399, + "step": 266650 + }, + { + "epoch": 2.2959710743801653, + "grad_norm": 2.25, + "learning_rate": 0.00011307553962924618, + "loss": 4.3867, + "step": 266700 + }, + { + "epoch": 2.296401515151515, + "grad_norm": 1.3046875, + "learning_rate": 0.00011304867970672075, + "loss": 4.3279, + "step": 266750 + }, + { + "epoch": 2.296831955922865, + "grad_norm": 3.78125, + "learning_rate": 0.00011302181882645029, + "loss": 4.3662, + "step": 266800 + }, + { + "epoch": 2.2972623966942147, + "grad_norm": 4.40625, + "learning_rate": 0.00011299495699040634, + "loss": 3.9623, + "step": 266850 + }, + { + "epoch": 2.297692837465565, + "grad_norm": 1.4296875, + "learning_rate": 0.00011296809420056055, + "loss": 4.2957, + "step": 266900 + }, + { + "epoch": 2.2981232782369148, + "grad_norm": 3.390625, + "learning_rate": 0.00011294123045888454, + "loss": 4.1539, + "step": 266950 + }, + { + "epoch": 2.2985537190082646, + "grad_norm": 4.0625, + "learning_rate": 0.00011291436576735004, + "loss": 4.5285, + "step": 267000 + }, + { + "epoch": 2.2985537190082646, + "eval_loss": 5.019803524017334, + "eval_runtime": 24.235, + "eval_samples_per_second": 26.408, + "eval_steps_per_second": 13.204, + "eval_tts_loss": 7.400831753020406, + "step": 267000 + }, + { + "epoch": 2.2989841597796143, + "grad_norm": 6.71875, + "learning_rate": 0.0001128875001279289, + "loss": 4.3166, + "step": 267050 + }, + { + "epoch": 2.299414600550964, + "grad_norm": 3.390625, + "learning_rate": 0.00011286063354259295, + "loss": 4.0372, + "step": 267100 + }, + { + "epoch": 2.299845041322314, + "grad_norm": 1.6484375, + "learning_rate": 0.00011283376601331419, + "loss": 4.228, + "step": 267150 + }, + { + "epoch": 2.3002754820936637, + "grad_norm": 2.203125, + "learning_rate": 0.00011280689754206459, + "loss": 4.1248, + "step": 267200 + }, + { + "epoch": 2.300705922865014, + "grad_norm": 2.953125, + "learning_rate": 0.0001127800281308163, + "loss": 4.3304, + "step": 267250 + }, + { + "epoch": 2.3011363636363638, + "grad_norm": 2.875, + "learning_rate": 0.00011275315778154144, + "loss": 4.3871, + "step": 267300 + }, + { + "epoch": 2.3015668044077136, + "grad_norm": 1.7890625, + "learning_rate": 0.0001127262864962122, + "loss": 4.7217, + "step": 267350 + }, + { + "epoch": 2.3019972451790633, + "grad_norm": 2.59375, + "learning_rate": 0.00011269941427680097, + "loss": 4.4358, + "step": 267400 + }, + { + "epoch": 2.302427685950413, + "grad_norm": 3.75, + "learning_rate": 0.00011267254112528001, + "loss": 4.6437, + "step": 267450 + }, + { + "epoch": 2.302858126721763, + "grad_norm": 2.5625, + "learning_rate": 0.00011264566704362178, + "loss": 4.4117, + "step": 267500 + }, + { + "epoch": 2.303288567493113, + "grad_norm": 3.109375, + "learning_rate": 0.00011261879203379884, + "loss": 4.1981, + "step": 267550 + }, + { + "epoch": 2.303719008264463, + "grad_norm": 4.15625, + "learning_rate": 0.0001125919160977837, + "loss": 4.7515, + "step": 267600 + }, + { + "epoch": 2.3041494490358128, + "grad_norm": 3.875, + "learning_rate": 0.00011256503923754902, + "loss": 4.0131, + "step": 267650 + }, + { + "epoch": 2.3045798898071626, + "grad_norm": 3.453125, + "learning_rate": 0.00011253816145506748, + "loss": 4.292, + "step": 267700 + }, + { + "epoch": 2.3050103305785123, + "grad_norm": 2.75, + "learning_rate": 0.00011251128275231189, + "loss": 4.2676, + "step": 267750 + }, + { + "epoch": 2.305440771349862, + "grad_norm": 3.265625, + "learning_rate": 0.00011248440313125504, + "loss": 4.3254, + "step": 267800 + }, + { + "epoch": 2.305871212121212, + "grad_norm": 2.078125, + "learning_rate": 0.00011245752259386985, + "loss": 4.4629, + "step": 267850 + }, + { + "epoch": 2.306301652892562, + "grad_norm": 3.390625, + "learning_rate": 0.00011243064114212933, + "loss": 4.1554, + "step": 267900 + }, + { + "epoch": 2.306732093663912, + "grad_norm": 2.59375, + "learning_rate": 0.0001124037587780065, + "loss": 4.3493, + "step": 267950 + }, + { + "epoch": 2.3071625344352618, + "grad_norm": 2.40625, + "learning_rate": 0.00011237687550347444, + "loss": 4.0415, + "step": 268000 + }, + { + "epoch": 2.3075929752066116, + "grad_norm": 3.21875, + "learning_rate": 0.00011234999132050636, + "loss": 4.3151, + "step": 268050 + }, + { + "epoch": 2.3080234159779613, + "grad_norm": 3.6875, + "learning_rate": 0.00011232310623107549, + "loss": 4.3389, + "step": 268100 + }, + { + "epoch": 2.308453856749311, + "grad_norm": 3.15625, + "learning_rate": 0.00011229622023715512, + "loss": 4.6592, + "step": 268150 + }, + { + "epoch": 2.3088842975206614, + "grad_norm": 2.34375, + "learning_rate": 0.00011226933334071868, + "loss": 4.2798, + "step": 268200 + }, + { + "epoch": 2.309314738292011, + "grad_norm": 1.9609375, + "learning_rate": 0.00011224244554373953, + "loss": 4.4876, + "step": 268250 + }, + { + "epoch": 2.309745179063361, + "grad_norm": 2.203125, + "learning_rate": 0.00011221555684819125, + "loss": 4.323, + "step": 268300 + }, + { + "epoch": 2.3101756198347108, + "grad_norm": 0.9921875, + "learning_rate": 0.00011218866725604735, + "loss": 4.522, + "step": 268350 + }, + { + "epoch": 2.3106060606060606, + "grad_norm": 3.078125, + "learning_rate": 0.00011216177676928152, + "loss": 4.3923, + "step": 268400 + }, + { + "epoch": 2.3110365013774103, + "grad_norm": 1.6484375, + "learning_rate": 0.00011213488538986743, + "loss": 4.6585, + "step": 268450 + }, + { + "epoch": 2.31146694214876, + "grad_norm": 2.421875, + "learning_rate": 0.00011210799311977884, + "loss": 4.6634, + "step": 268500 + }, + { + "epoch": 2.3118973829201104, + "grad_norm": 3.53125, + "learning_rate": 0.00011208109996098965, + "loss": 4.2848, + "step": 268550 + }, + { + "epoch": 2.31232782369146, + "grad_norm": 2.96875, + "learning_rate": 0.00011205420591547371, + "loss": 4.0101, + "step": 268600 + }, + { + "epoch": 2.31275826446281, + "grad_norm": 3.046875, + "learning_rate": 0.00011202731098520496, + "loss": 4.5398, + "step": 268650 + }, + { + "epoch": 2.3131887052341598, + "grad_norm": 2.609375, + "learning_rate": 0.00011200041517215747, + "loss": 4.1761, + "step": 268700 + }, + { + "epoch": 2.3136191460055096, + "grad_norm": 2.265625, + "learning_rate": 0.00011197351847830535, + "loss": 4.2692, + "step": 268750 + }, + { + "epoch": 2.3140495867768593, + "grad_norm": 2.09375, + "learning_rate": 0.00011194662090562274, + "loss": 4.3893, + "step": 268800 + }, + { + "epoch": 2.3144800275482096, + "grad_norm": 2.90625, + "learning_rate": 0.00011191972245608382, + "loss": 4.1133, + "step": 268850 + }, + { + "epoch": 2.3149104683195594, + "grad_norm": 2.984375, + "learning_rate": 0.00011189282313166292, + "loss": 4.4693, + "step": 268900 + }, + { + "epoch": 2.315340909090909, + "grad_norm": 2.40625, + "learning_rate": 0.00011186592293433443, + "loss": 4.1404, + "step": 268950 + }, + { + "epoch": 2.315771349862259, + "grad_norm": 4.90625, + "learning_rate": 0.00011183902186607268, + "loss": 4.6434, + "step": 269000 + }, + { + "epoch": 2.3162017906336088, + "grad_norm": 4.40625, + "learning_rate": 0.00011181211992885222, + "loss": 4.5971, + "step": 269050 + }, + { + "epoch": 2.3166322314049586, + "grad_norm": 3.1875, + "learning_rate": 0.00011178521712464758, + "loss": 4.4282, + "step": 269100 + }, + { + "epoch": 2.3170626721763083, + "grad_norm": 5.28125, + "learning_rate": 0.00011175831345543335, + "loss": 4.1139, + "step": 269150 + }, + { + "epoch": 2.3174931129476586, + "grad_norm": 2.34375, + "learning_rate": 0.0001117314089231842, + "loss": 4.3137, + "step": 269200 + }, + { + "epoch": 2.3179235537190084, + "grad_norm": 2.84375, + "learning_rate": 0.0001117045035298749, + "loss": 4.0266, + "step": 269250 + }, + { + "epoch": 2.318353994490358, + "grad_norm": 2.65625, + "learning_rate": 0.00011167759727748022, + "loss": 4.4134, + "step": 269300 + }, + { + "epoch": 2.318784435261708, + "grad_norm": 3.109375, + "learning_rate": 0.00011165069016797502, + "loss": 4.4916, + "step": 269350 + }, + { + "epoch": 2.3192148760330578, + "grad_norm": 2.828125, + "learning_rate": 0.00011162378220333421, + "loss": 4.1318, + "step": 269400 + }, + { + "epoch": 2.3196453168044076, + "grad_norm": 3.96875, + "learning_rate": 0.00011159687338553282, + "loss": 4.3287, + "step": 269450 + }, + { + "epoch": 2.320075757575758, + "grad_norm": 3.203125, + "learning_rate": 0.00011156996371654587, + "loss": 4.0735, + "step": 269500 + }, + { + "epoch": 2.3205061983471076, + "grad_norm": 1.9140625, + "learning_rate": 0.00011154305319834846, + "loss": 4.0443, + "step": 269550 + }, + { + "epoch": 2.3209366391184574, + "grad_norm": 3.234375, + "learning_rate": 0.0001115161418329158, + "loss": 5.0241, + "step": 269600 + }, + { + "epoch": 2.321367079889807, + "grad_norm": 2.546875, + "learning_rate": 0.00011148922962222311, + "loss": 4.2335, + "step": 269650 + }, + { + "epoch": 2.321797520661157, + "grad_norm": 2.84375, + "learning_rate": 0.00011146231656824568, + "loss": 4.7398, + "step": 269700 + }, + { + "epoch": 2.3222279614325068, + "grad_norm": 4.21875, + "learning_rate": 0.00011143540267295887, + "loss": 4.1892, + "step": 269750 + }, + { + "epoch": 2.3226584022038566, + "grad_norm": 4.03125, + "learning_rate": 0.00011140848793833812, + "loss": 4.2207, + "step": 269800 + }, + { + "epoch": 2.323088842975207, + "grad_norm": 2.890625, + "learning_rate": 0.0001113815723663589, + "loss": 4.3673, + "step": 269850 + }, + { + "epoch": 2.3235192837465566, + "grad_norm": 2.984375, + "learning_rate": 0.00011135465595899672, + "loss": 4.5524, + "step": 269900 + }, + { + "epoch": 2.3239497245179064, + "grad_norm": 1.9375, + "learning_rate": 0.00011132773871822725, + "loss": 4.3183, + "step": 269950 + }, + { + "epoch": 2.324380165289256, + "grad_norm": 2.15625, + "learning_rate": 0.00011130082064602613, + "loss": 3.9652, + "step": 270000 + }, + { + "epoch": 2.324380165289256, + "eval_loss": 5.020001411437988, + "eval_runtime": 24.2049, + "eval_samples_per_second": 26.441, + "eval_steps_per_second": 13.22, + "eval_tts_loss": 7.4423839900824404, + "step": 270000 + }, + { + "epoch": 2.324810606060606, + "grad_norm": 1.96875, + "learning_rate": 0.00011127390174436909, + "loss": 4.2847, + "step": 270050 + }, + { + "epoch": 2.3252410468319558, + "grad_norm": 2.1875, + "learning_rate": 0.00011124698201523192, + "loss": 4.2435, + "step": 270100 + }, + { + "epoch": 2.325671487603306, + "grad_norm": 4.25, + "learning_rate": 0.00011122006146059048, + "loss": 4.2181, + "step": 270150 + }, + { + "epoch": 2.326101928374656, + "grad_norm": 2.546875, + "learning_rate": 0.00011119314008242063, + "loss": 4.3434, + "step": 270200 + }, + { + "epoch": 2.3265323691460056, + "grad_norm": 1.5546875, + "learning_rate": 0.00011116621788269841, + "loss": 4.2894, + "step": 270250 + }, + { + "epoch": 2.3269628099173554, + "grad_norm": 2.203125, + "learning_rate": 0.00011113929486339982, + "loss": 4.6308, + "step": 270300 + }, + { + "epoch": 2.327393250688705, + "grad_norm": 4.21875, + "learning_rate": 0.00011111237102650096, + "loss": 4.503, + "step": 270350 + }, + { + "epoch": 2.327823691460055, + "grad_norm": 3.140625, + "learning_rate": 0.00011108544637397798, + "loss": 4.5456, + "step": 270400 + }, + { + "epoch": 2.3282541322314048, + "grad_norm": 1.3359375, + "learning_rate": 0.00011105852090780703, + "loss": 4.0705, + "step": 270450 + }, + { + "epoch": 2.328684573002755, + "grad_norm": 1.3984375, + "learning_rate": 0.00011103159462996453, + "loss": 4.5411, + "step": 270500 + }, + { + "epoch": 2.329115013774105, + "grad_norm": 2.03125, + "learning_rate": 0.00011100466754242665, + "loss": 4.6688, + "step": 270550 + }, + { + "epoch": 2.3295454545454546, + "grad_norm": 1.7578125, + "learning_rate": 0.00011097773964716989, + "loss": 4.7192, + "step": 270600 + }, + { + "epoch": 2.3299758953168044, + "grad_norm": 2.21875, + "learning_rate": 0.00011095081094617063, + "loss": 4.3651, + "step": 270650 + }, + { + "epoch": 2.330406336088154, + "grad_norm": 3.796875, + "learning_rate": 0.00011092388144140541, + "loss": 4.3971, + "step": 270700 + }, + { + "epoch": 2.330836776859504, + "grad_norm": 4.375, + "learning_rate": 0.00011089695113485081, + "loss": 4.0952, + "step": 270750 + }, + { + "epoch": 2.331267217630854, + "grad_norm": 5.25, + "learning_rate": 0.00011087002002848343, + "loss": 4.1595, + "step": 270800 + }, + { + "epoch": 2.331697658402204, + "grad_norm": 1.9921875, + "learning_rate": 0.00011084308812427999, + "loss": 4.3974, + "step": 270850 + }, + { + "epoch": 2.332128099173554, + "grad_norm": 3.203125, + "learning_rate": 0.00011081615542421719, + "loss": 4.5905, + "step": 270900 + }, + { + "epoch": 2.3325585399449036, + "grad_norm": 5.0, + "learning_rate": 0.00011078922193027186, + "loss": 4.2248, + "step": 270950 + }, + { + "epoch": 2.3329889807162534, + "grad_norm": 3.703125, + "learning_rate": 0.00011076228764442088, + "loss": 4.3198, + "step": 271000 + }, + { + "epoch": 2.333419421487603, + "grad_norm": 2.78125, + "learning_rate": 0.00011073535256864109, + "loss": 4.2229, + "step": 271050 + }, + { + "epoch": 2.333849862258953, + "grad_norm": 5.25, + "learning_rate": 0.00011070841670490952, + "loss": 4.4069, + "step": 271100 + }, + { + "epoch": 2.334280303030303, + "grad_norm": 4.78125, + "learning_rate": 0.00011068148005520325, + "loss": 4.3388, + "step": 271150 + }, + { + "epoch": 2.334710743801653, + "grad_norm": 3.25, + "learning_rate": 0.0001106545426214993, + "loss": 3.9887, + "step": 271200 + }, + { + "epoch": 2.335141184573003, + "grad_norm": 2.109375, + "learning_rate": 0.00011062760440577487, + "loss": 4.433, + "step": 271250 + }, + { + "epoch": 2.3355716253443526, + "grad_norm": 2.15625, + "learning_rate": 0.00011060066541000712, + "loss": 4.2246, + "step": 271300 + }, + { + "epoch": 2.3360020661157024, + "grad_norm": 2.796875, + "learning_rate": 0.00011057372563617331, + "loss": 4.4175, + "step": 271350 + }, + { + "epoch": 2.336432506887052, + "grad_norm": 2.734375, + "learning_rate": 0.00011054678508625085, + "loss": 4.5034, + "step": 271400 + }, + { + "epoch": 2.3368629476584024, + "grad_norm": 3.578125, + "learning_rate": 0.000110519843762217, + "loss": 4.4328, + "step": 271450 + }, + { + "epoch": 2.337293388429752, + "grad_norm": 2.09375, + "learning_rate": 0.00011049290166604929, + "loss": 4.7694, + "step": 271500 + }, + { + "epoch": 2.337723829201102, + "grad_norm": 1.84375, + "learning_rate": 0.00011046595879972517, + "loss": 4.4014, + "step": 271550 + }, + { + "epoch": 2.338154269972452, + "grad_norm": 1.3515625, + "learning_rate": 0.00011043901516522215, + "loss": 4.517, + "step": 271600 + }, + { + "epoch": 2.3385847107438016, + "grad_norm": 3.71875, + "learning_rate": 0.00011041207076451792, + "loss": 4.0097, + "step": 271650 + }, + { + "epoch": 2.3390151515151514, + "grad_norm": 3.109375, + "learning_rate": 0.00011038512559959009, + "loss": 4.3613, + "step": 271700 + }, + { + "epoch": 2.339445592286501, + "grad_norm": 4.625, + "learning_rate": 0.00011035817967241638, + "loss": 3.7529, + "step": 271750 + }, + { + "epoch": 2.3398760330578514, + "grad_norm": 3.828125, + "learning_rate": 0.00011033123298497456, + "loss": 4.2193, + "step": 271800 + }, + { + "epoch": 2.340306473829201, + "grad_norm": 4.6875, + "learning_rate": 0.00011030428553924248, + "loss": 4.3163, + "step": 271850 + }, + { + "epoch": 2.340736914600551, + "grad_norm": 2.765625, + "learning_rate": 0.00011027733733719803, + "loss": 4.4093, + "step": 271900 + }, + { + "epoch": 2.341167355371901, + "grad_norm": 1.8671875, + "learning_rate": 0.00011025038838081909, + "loss": 4.5671, + "step": 271950 + }, + { + "epoch": 2.3415977961432506, + "grad_norm": 2.796875, + "learning_rate": 0.00011022343867208373, + "loss": 4.2148, + "step": 272000 + }, + { + "epoch": 2.3420282369146004, + "grad_norm": 3.4375, + "learning_rate": 0.00011019648821296996, + "loss": 4.5127, + "step": 272050 + }, + { + "epoch": 2.3424586776859506, + "grad_norm": 3.0625, + "learning_rate": 0.00011016953700545587, + "loss": 4.1278, + "step": 272100 + }, + { + "epoch": 2.3428891184573004, + "grad_norm": 1.5, + "learning_rate": 0.00011014258505151966, + "loss": 4.1266, + "step": 272150 + }, + { + "epoch": 2.34331955922865, + "grad_norm": 2.71875, + "learning_rate": 0.00011011563235313955, + "loss": 4.3159, + "step": 272200 + }, + { + "epoch": 2.34375, + "grad_norm": 1.28125, + "learning_rate": 0.00011008867891229375, + "loss": 4.4189, + "step": 272250 + }, + { + "epoch": 2.34418044077135, + "grad_norm": 3.515625, + "learning_rate": 0.00011006172473096066, + "loss": 4.4422, + "step": 272300 + }, + { + "epoch": 2.3446108815426996, + "grad_norm": 2.40625, + "learning_rate": 0.00011003476981111862, + "loss": 4.1728, + "step": 272350 + }, + { + "epoch": 2.3450413223140494, + "grad_norm": 3.65625, + "learning_rate": 0.00011000781415474605, + "loss": 4.4759, + "step": 272400 + }, + { + "epoch": 2.3454717630853996, + "grad_norm": 3.8125, + "learning_rate": 0.00010998085776382145, + "loss": 4.4576, + "step": 272450 + }, + { + "epoch": 2.3459022038567494, + "grad_norm": 2.34375, + "learning_rate": 0.00010995390064032336, + "loss": 4.6311, + "step": 272500 + }, + { + "epoch": 2.346332644628099, + "grad_norm": 3.28125, + "learning_rate": 0.00010992694278623041, + "loss": 4.5312, + "step": 272550 + }, + { + "epoch": 2.346763085399449, + "grad_norm": 1.4921875, + "learning_rate": 0.00010989998420352118, + "loss": 4.2317, + "step": 272600 + }, + { + "epoch": 2.347193526170799, + "grad_norm": 4.0, + "learning_rate": 0.00010987302489417443, + "loss": 4.4326, + "step": 272650 + }, + { + "epoch": 2.3476239669421486, + "grad_norm": 4.5, + "learning_rate": 0.0001098460648601689, + "loss": 4.163, + "step": 272700 + }, + { + "epoch": 2.348054407713499, + "grad_norm": 3.96875, + "learning_rate": 0.00010981910410348339, + "loss": 4.1036, + "step": 272750 + }, + { + "epoch": 2.3484848484848486, + "grad_norm": 3.609375, + "learning_rate": 0.00010979214262609679, + "loss": 4.1812, + "step": 272800 + }, + { + "epoch": 2.3489152892561984, + "grad_norm": 2.875, + "learning_rate": 0.00010976518042998793, + "loss": 4.122, + "step": 272850 + }, + { + "epoch": 2.349345730027548, + "grad_norm": 1.8515625, + "learning_rate": 0.0001097382175171359, + "loss": 4.3523, + "step": 272900 + }, + { + "epoch": 2.349776170798898, + "grad_norm": 3.671875, + "learning_rate": 0.00010971125388951962, + "loss": 4.5591, + "step": 272950 + }, + { + "epoch": 2.350206611570248, + "grad_norm": 6.34375, + "learning_rate": 0.0001096842895491182, + "loss": 4.3939, + "step": 273000 + }, + { + "epoch": 2.350206611570248, + "eval_loss": 5.015591621398926, + "eval_runtime": 23.9688, + "eval_samples_per_second": 26.701, + "eval_steps_per_second": 13.351, + "eval_tts_loss": 7.413518178344671, + "step": 273000 + }, + { + "epoch": 2.3506370523415976, + "grad_norm": 2.578125, + "learning_rate": 0.00010965732449791077, + "loss": 4.45, + "step": 273050 + }, + { + "epoch": 2.351067493112948, + "grad_norm": 4.25, + "learning_rate": 0.0001096303587378765, + "loss": 4.5807, + "step": 273100 + }, + { + "epoch": 2.3514979338842976, + "grad_norm": 5.09375, + "learning_rate": 0.0001096033922709946, + "loss": 4.5846, + "step": 273150 + }, + { + "epoch": 2.3519283746556474, + "grad_norm": 3.234375, + "learning_rate": 0.00010957642509924442, + "loss": 4.8317, + "step": 273200 + }, + { + "epoch": 2.352358815426997, + "grad_norm": 1.078125, + "learning_rate": 0.00010954945722460521, + "loss": 4.3433, + "step": 273250 + }, + { + "epoch": 2.352789256198347, + "grad_norm": 3.28125, + "learning_rate": 0.00010952248864905638, + "loss": 4.3074, + "step": 273300 + }, + { + "epoch": 2.353219696969697, + "grad_norm": 3.203125, + "learning_rate": 0.00010949551937457738, + "loss": 4.3437, + "step": 273350 + }, + { + "epoch": 2.353650137741047, + "grad_norm": 4.46875, + "learning_rate": 0.00010946854940314768, + "loss": 4.5188, + "step": 273400 + }, + { + "epoch": 2.354080578512397, + "grad_norm": 3.65625, + "learning_rate": 0.00010944157873674682, + "loss": 4.1051, + "step": 273450 + }, + { + "epoch": 2.3545110192837466, + "grad_norm": 2.203125, + "learning_rate": 0.0001094146073773544, + "loss": 4.2475, + "step": 273500 + }, + { + "epoch": 2.3549414600550964, + "grad_norm": 1.8984375, + "learning_rate": 0.0001093876353269501, + "loss": 4.3901, + "step": 273550 + }, + { + "epoch": 2.355371900826446, + "grad_norm": 1.6484375, + "learning_rate": 0.00010936066258751351, + "loss": 4.4052, + "step": 273600 + }, + { + "epoch": 2.355802341597796, + "grad_norm": 2.359375, + "learning_rate": 0.00010933368916102442, + "loss": 4.4126, + "step": 273650 + }, + { + "epoch": 2.356232782369146, + "grad_norm": 3.1875, + "learning_rate": 0.00010930671504946266, + "loss": 3.9544, + "step": 273700 + }, + { + "epoch": 2.356663223140496, + "grad_norm": 3.78125, + "learning_rate": 0.00010927974025480805, + "loss": 4.4966, + "step": 273750 + }, + { + "epoch": 2.357093663911846, + "grad_norm": 1.828125, + "learning_rate": 0.00010925276477904043, + "loss": 4.0124, + "step": 273800 + }, + { + "epoch": 2.3575241046831956, + "grad_norm": 0.90234375, + "learning_rate": 0.00010922578862413981, + "loss": 4.6764, + "step": 273850 + }, + { + "epoch": 2.3579545454545454, + "grad_norm": 1.3359375, + "learning_rate": 0.00010919881179208616, + "loss": 3.9896, + "step": 273900 + }, + { + "epoch": 2.358384986225895, + "grad_norm": 2.515625, + "learning_rate": 0.00010917183428485951, + "loss": 4.0156, + "step": 273950 + }, + { + "epoch": 2.358815426997245, + "grad_norm": 2.40625, + "learning_rate": 0.00010914485610443991, + "loss": 4.4215, + "step": 274000 + }, + { + "epoch": 2.3592458677685952, + "grad_norm": 2.875, + "learning_rate": 0.00010911787725280759, + "loss": 3.8769, + "step": 274050 + }, + { + "epoch": 2.359676308539945, + "grad_norm": 3.0625, + "learning_rate": 0.00010909089773194273, + "loss": 4.6535, + "step": 274100 + }, + { + "epoch": 2.360106749311295, + "grad_norm": 2.65625, + "learning_rate": 0.00010906391754382547, + "loss": 4.4374, + "step": 274150 + }, + { + "epoch": 2.3605371900826446, + "grad_norm": 1.421875, + "learning_rate": 0.00010903693669043622, + "loss": 4.5197, + "step": 274200 + }, + { + "epoch": 2.3609676308539944, + "grad_norm": 4.40625, + "learning_rate": 0.00010900995517375523, + "loss": 4.3713, + "step": 274250 + }, + { + "epoch": 2.361398071625344, + "grad_norm": 3.359375, + "learning_rate": 0.00010898297299576292, + "loss": 4.3623, + "step": 274300 + }, + { + "epoch": 2.361828512396694, + "grad_norm": 2.796875, + "learning_rate": 0.00010895599015843971, + "loss": 4.3241, + "step": 274350 + }, + { + "epoch": 2.3622589531680442, + "grad_norm": 2.765625, + "learning_rate": 0.00010892900666376611, + "loss": 4.4181, + "step": 274400 + }, + { + "epoch": 2.362689393939394, + "grad_norm": 5.21875, + "learning_rate": 0.00010890202251372261, + "loss": 4.2732, + "step": 274450 + }, + { + "epoch": 2.363119834710744, + "grad_norm": 0.98828125, + "learning_rate": 0.00010887503771028984, + "loss": 4.3896, + "step": 274500 + }, + { + "epoch": 2.3635502754820936, + "grad_norm": 2.953125, + "learning_rate": 0.00010884805225544837, + "loss": 4.2706, + "step": 274550 + }, + { + "epoch": 2.3639807162534434, + "grad_norm": 5.53125, + "learning_rate": 0.00010882106615117896, + "loss": 4.5979, + "step": 274600 + }, + { + "epoch": 2.364411157024793, + "grad_norm": 4.28125, + "learning_rate": 0.0001087940793994622, + "loss": 4.3754, + "step": 274650 + }, + { + "epoch": 2.3648415977961434, + "grad_norm": 2.9375, + "learning_rate": 0.00010876709200227895, + "loss": 4.6806, + "step": 274700 + }, + { + "epoch": 2.3652720385674932, + "grad_norm": 3.703125, + "learning_rate": 0.00010874010396161002, + "loss": 4.1623, + "step": 274750 + }, + { + "epoch": 2.365702479338843, + "grad_norm": 5.625, + "learning_rate": 0.00010871311527943628, + "loss": 4.3409, + "step": 274800 + }, + { + "epoch": 2.366132920110193, + "grad_norm": 5.375, + "learning_rate": 0.00010868612595773862, + "loss": 4.3937, + "step": 274850 + }, + { + "epoch": 2.3665633608815426, + "grad_norm": 2.03125, + "learning_rate": 0.00010865913599849798, + "loss": 4.5467, + "step": 274900 + }, + { + "epoch": 2.3669938016528924, + "grad_norm": 1.59375, + "learning_rate": 0.00010863214540369539, + "loss": 4.1924, + "step": 274950 + }, + { + "epoch": 2.367424242424242, + "grad_norm": 2.578125, + "learning_rate": 0.00010860515417531192, + "loss": 4.3766, + "step": 275000 + }, + { + "epoch": 2.3678546831955924, + "grad_norm": 5.0, + "learning_rate": 0.00010857816231532862, + "loss": 4.6195, + "step": 275050 + }, + { + "epoch": 2.3682851239669422, + "grad_norm": 4.90625, + "learning_rate": 0.00010855116982572671, + "loss": 4.5974, + "step": 275100 + }, + { + "epoch": 2.368715564738292, + "grad_norm": 4.09375, + "learning_rate": 0.00010852417670848731, + "loss": 4.1674, + "step": 275150 + }, + { + "epoch": 2.369146005509642, + "grad_norm": 2.828125, + "learning_rate": 0.00010849718296559165, + "loss": 4.3197, + "step": 275200 + }, + { + "epoch": 2.3695764462809916, + "grad_norm": 2.078125, + "learning_rate": 0.00010847018859902107, + "loss": 4.2036, + "step": 275250 + }, + { + "epoch": 2.3700068870523414, + "grad_norm": 2.140625, + "learning_rate": 0.00010844319361075688, + "loss": 4.4151, + "step": 275300 + }, + { + "epoch": 2.3704373278236917, + "grad_norm": 1.3671875, + "learning_rate": 0.00010841619800278045, + "loss": 4.282, + "step": 275350 + }, + { + "epoch": 2.3708677685950414, + "grad_norm": 3.453125, + "learning_rate": 0.00010838920177707317, + "loss": 4.363, + "step": 275400 + }, + { + "epoch": 2.3712982093663912, + "grad_norm": 3.1875, + "learning_rate": 0.00010836220493561654, + "loss": 4.0632, + "step": 275450 + }, + { + "epoch": 2.371728650137741, + "grad_norm": 2.296875, + "learning_rate": 0.00010833520748039205, + "loss": 4.602, + "step": 275500 + }, + { + "epoch": 2.372159090909091, + "grad_norm": 4.96875, + "learning_rate": 0.00010830820941338127, + "loss": 4.0662, + "step": 275550 + }, + { + "epoch": 2.3725895316804406, + "grad_norm": 5.21875, + "learning_rate": 0.00010828121073656579, + "loss": 4.3643, + "step": 275600 + }, + { + "epoch": 2.3730199724517904, + "grad_norm": 2.6875, + "learning_rate": 0.00010825421145192727, + "loss": 4.6803, + "step": 275650 + }, + { + "epoch": 2.3734504132231407, + "grad_norm": 3.828125, + "learning_rate": 0.00010822721156144738, + "loss": 4.3229, + "step": 275700 + }, + { + "epoch": 2.3738808539944904, + "grad_norm": 3.640625, + "learning_rate": 0.00010820021106710787, + "loss": 4.3691, + "step": 275750 + }, + { + "epoch": 2.3743112947658402, + "grad_norm": 3.140625, + "learning_rate": 0.00010817320997089051, + "loss": 4.4262, + "step": 275800 + }, + { + "epoch": 2.37474173553719, + "grad_norm": 2.53125, + "learning_rate": 0.00010814620827477711, + "loss": 4.344, + "step": 275850 + }, + { + "epoch": 2.37517217630854, + "grad_norm": 2.328125, + "learning_rate": 0.00010811920598074956, + "loss": 4.1819, + "step": 275900 + }, + { + "epoch": 2.3756026170798896, + "grad_norm": 4.3125, + "learning_rate": 0.00010809220309078976, + "loss": 4.3527, + "step": 275950 + }, + { + "epoch": 2.37603305785124, + "grad_norm": 2.96875, + "learning_rate": 0.00010806519960687968, + "loss": 4.1432, + "step": 276000 + }, + { + "epoch": 2.37603305785124, + "eval_loss": 5.014104843139648, + "eval_runtime": 24.4776, + "eval_samples_per_second": 26.146, + "eval_steps_per_second": 13.073, + "eval_tts_loss": 7.396875379435267, + "step": 276000 + }, + { + "epoch": 2.3764634986225897, + "grad_norm": 3.296875, + "learning_rate": 0.00010803819553100127, + "loss": 4.2043, + "step": 276050 + }, + { + "epoch": 2.3768939393939394, + "grad_norm": 3.453125, + "learning_rate": 0.00010801119086513662, + "loss": 4.3224, + "step": 276100 + }, + { + "epoch": 2.3773243801652892, + "grad_norm": 1.65625, + "learning_rate": 0.00010798418561126781, + "loss": 4.1211, + "step": 276150 + }, + { + "epoch": 2.377754820936639, + "grad_norm": 2.203125, + "learning_rate": 0.00010795717977137694, + "loss": 4.2971, + "step": 276200 + }, + { + "epoch": 2.378185261707989, + "grad_norm": 3.546875, + "learning_rate": 0.0001079301733474462, + "loss": 4.2575, + "step": 276250 + }, + { + "epoch": 2.3786157024793386, + "grad_norm": 3.59375, + "learning_rate": 0.00010790316634145778, + "loss": 4.6188, + "step": 276300 + }, + { + "epoch": 2.379046143250689, + "grad_norm": 3.8125, + "learning_rate": 0.00010787615875539396, + "loss": 4.2032, + "step": 276350 + }, + { + "epoch": 2.3794765840220387, + "grad_norm": 1.8828125, + "learning_rate": 0.00010784915059123706, + "loss": 4.2535, + "step": 276400 + }, + { + "epoch": 2.3799070247933884, + "grad_norm": 1.8828125, + "learning_rate": 0.00010782214185096938, + "loss": 4.4132, + "step": 276450 + }, + { + "epoch": 2.3803374655647382, + "grad_norm": 5.8125, + "learning_rate": 0.00010779513253657329, + "loss": 4.7309, + "step": 276500 + }, + { + "epoch": 2.380767906336088, + "grad_norm": 3.75, + "learning_rate": 0.00010776812265003129, + "loss": 4.5798, + "step": 276550 + }, + { + "epoch": 2.381198347107438, + "grad_norm": 2.84375, + "learning_rate": 0.00010774111219332577, + "loss": 4.4564, + "step": 276600 + }, + { + "epoch": 2.381628787878788, + "grad_norm": 3.25, + "learning_rate": 0.00010771410116843927, + "loss": 4.3156, + "step": 276650 + }, + { + "epoch": 2.382059228650138, + "grad_norm": 5.4375, + "learning_rate": 0.00010768708957735436, + "loss": 4.523, + "step": 276700 + }, + { + "epoch": 2.3824896694214877, + "grad_norm": 3.625, + "learning_rate": 0.0001076600774220536, + "loss": 4.3499, + "step": 276750 + }, + { + "epoch": 2.3829201101928374, + "grad_norm": 5.46875, + "learning_rate": 0.00010763306470451966, + "loss": 4.1044, + "step": 276800 + }, + { + "epoch": 2.3833505509641872, + "grad_norm": 4.3125, + "learning_rate": 0.00010760605142673517, + "loss": 4.4604, + "step": 276850 + }, + { + "epoch": 2.383780991735537, + "grad_norm": 3.53125, + "learning_rate": 0.00010757903759068288, + "loss": 4.3727, + "step": 276900 + }, + { + "epoch": 2.384211432506887, + "grad_norm": 3.546875, + "learning_rate": 0.00010755202319834552, + "loss": 3.8865, + "step": 276950 + }, + { + "epoch": 2.384641873278237, + "grad_norm": 4.34375, + "learning_rate": 0.00010752500825170593, + "loss": 4.707, + "step": 277000 + }, + { + "epoch": 2.385072314049587, + "grad_norm": 2.828125, + "learning_rate": 0.00010749799275274693, + "loss": 4.1706, + "step": 277050 + }, + { + "epoch": 2.3855027548209367, + "grad_norm": 2.859375, + "learning_rate": 0.00010747097670345136, + "loss": 4.4873, + "step": 277100 + }, + { + "epoch": 2.3859331955922864, + "grad_norm": 4.28125, + "learning_rate": 0.0001074439601058022, + "loss": 3.9552, + "step": 277150 + }, + { + "epoch": 2.3863636363636362, + "grad_norm": 1.53125, + "learning_rate": 0.00010741694296178239, + "loss": 4.073, + "step": 277200 + }, + { + "epoch": 2.386794077134986, + "grad_norm": 3.03125, + "learning_rate": 0.0001073899252733749, + "loss": 4.7507, + "step": 277250 + }, + { + "epoch": 2.3872245179063363, + "grad_norm": 3.234375, + "learning_rate": 0.0001073629070425628, + "loss": 4.4815, + "step": 277300 + }, + { + "epoch": 2.387654958677686, + "grad_norm": 3.90625, + "learning_rate": 0.00010733588827132919, + "loss": 4.5385, + "step": 277350 + }, + { + "epoch": 2.388085399449036, + "grad_norm": 4.40625, + "learning_rate": 0.00010730886896165713, + "loss": 4.4679, + "step": 277400 + }, + { + "epoch": 2.3885158402203857, + "grad_norm": 2.734375, + "learning_rate": 0.00010728184911552983, + "loss": 4.3386, + "step": 277450 + }, + { + "epoch": 2.3889462809917354, + "grad_norm": 2.734375, + "learning_rate": 0.00010725482873493049, + "loss": 4.0653, + "step": 277500 + }, + { + "epoch": 2.3893767217630852, + "grad_norm": 3.6875, + "learning_rate": 0.00010722780782184232, + "loss": 4.215, + "step": 277550 + }, + { + "epoch": 2.389807162534435, + "grad_norm": 2.6875, + "learning_rate": 0.00010720078637824857, + "loss": 4.1995, + "step": 277600 + }, + { + "epoch": 2.3902376033057853, + "grad_norm": 2.28125, + "learning_rate": 0.00010717376440613263, + "loss": 4.4691, + "step": 277650 + }, + { + "epoch": 2.390668044077135, + "grad_norm": 4.71875, + "learning_rate": 0.00010714674190747782, + "loss": 4.4872, + "step": 277700 + }, + { + "epoch": 2.391098484848485, + "grad_norm": 3.9375, + "learning_rate": 0.0001071197188842675, + "loss": 4.4085, + "step": 277750 + }, + { + "epoch": 2.3915289256198347, + "grad_norm": 1.25, + "learning_rate": 0.00010709269533848518, + "loss": 4.1456, + "step": 277800 + }, + { + "epoch": 2.3919593663911844, + "grad_norm": 4.28125, + "learning_rate": 0.00010706567127211426, + "loss": 4.1615, + "step": 277850 + }, + { + "epoch": 2.3923898071625342, + "grad_norm": 2.9375, + "learning_rate": 0.00010703864668713826, + "loss": 4.3844, + "step": 277900 + }, + { + "epoch": 2.3928202479338845, + "grad_norm": 2.84375, + "learning_rate": 0.00010701162158554074, + "loss": 4.4029, + "step": 277950 + }, + { + "epoch": 2.3932506887052343, + "grad_norm": 2.84375, + "learning_rate": 0.00010698459596930526, + "loss": 4.4122, + "step": 278000 + }, + { + "epoch": 2.393681129476584, + "grad_norm": 2.46875, + "learning_rate": 0.00010695756984041552, + "loss": 4.4777, + "step": 278050 + }, + { + "epoch": 2.394111570247934, + "grad_norm": 3.609375, + "learning_rate": 0.00010693054320085507, + "loss": 4.6485, + "step": 278100 + }, + { + "epoch": 2.3945420110192837, + "grad_norm": 3.734375, + "learning_rate": 0.00010690351605260769, + "loss": 4.5897, + "step": 278150 + }, + { + "epoch": 2.3949724517906334, + "grad_norm": 4.0, + "learning_rate": 0.0001068764883976571, + "loss": 4.115, + "step": 278200 + }, + { + "epoch": 2.3954028925619832, + "grad_norm": 3.5625, + "learning_rate": 0.00010684946023798701, + "loss": 4.7298, + "step": 278250 + }, + { + "epoch": 2.3958333333333335, + "grad_norm": 3.0625, + "learning_rate": 0.00010682243157558129, + "loss": 4.6303, + "step": 278300 + }, + { + "epoch": 2.3962637741046833, + "grad_norm": 2.34375, + "learning_rate": 0.00010679540241242379, + "loss": 4.5638, + "step": 278350 + }, + { + "epoch": 2.396694214876033, + "grad_norm": 3.453125, + "learning_rate": 0.00010676837275049836, + "loss": 3.9242, + "step": 278400 + }, + { + "epoch": 2.397124655647383, + "grad_norm": 2.375, + "learning_rate": 0.00010674134259178896, + "loss": 4.4327, + "step": 278450 + }, + { + "epoch": 2.3975550964187327, + "grad_norm": 3.1875, + "learning_rate": 0.00010671431193827948, + "loss": 4.3538, + "step": 278500 + }, + { + "epoch": 2.397985537190083, + "grad_norm": 2.03125, + "learning_rate": 0.00010668728079195396, + "loss": 4.1395, + "step": 278550 + }, + { + "epoch": 2.3984159779614327, + "grad_norm": 2.875, + "learning_rate": 0.00010666024915479642, + "loss": 4.4853, + "step": 278600 + }, + { + "epoch": 2.3988464187327825, + "grad_norm": 2.90625, + "learning_rate": 0.00010663321702879091, + "loss": 4.739, + "step": 278650 + }, + { + "epoch": 2.3992768595041323, + "grad_norm": 3.078125, + "learning_rate": 0.00010660618441592158, + "loss": 4.3426, + "step": 278700 + }, + { + "epoch": 2.399707300275482, + "grad_norm": 3.4375, + "learning_rate": 0.00010657915131817251, + "loss": 4.4866, + "step": 278750 + }, + { + "epoch": 2.400137741046832, + "grad_norm": 1.7734375, + "learning_rate": 0.00010655211773752786, + "loss": 4.0232, + "step": 278800 + }, + { + "epoch": 2.4005681818181817, + "grad_norm": 4.0625, + "learning_rate": 0.00010652508367597189, + "loss": 4.3151, + "step": 278850 + }, + { + "epoch": 2.4009986225895315, + "grad_norm": 2.125, + "learning_rate": 0.00010649804913548885, + "loss": 4.55, + "step": 278900 + }, + { + "epoch": 2.4014290633608817, + "grad_norm": 1.8671875, + "learning_rate": 0.00010647101411806292, + "loss": 4.3104, + "step": 278950 + }, + { + "epoch": 2.4018595041322315, + "grad_norm": 3.125, + "learning_rate": 0.00010644397862567852, + "loss": 4.1131, + "step": 279000 + }, + { + "epoch": 2.4018595041322315, + "eval_loss": 5.014746189117432, + "eval_runtime": 24.8643, + "eval_samples_per_second": 25.74, + "eval_steps_per_second": 12.87, + "eval_tts_loss": 7.389301508190825, + "step": 279000 + }, + { + "epoch": 2.4022899449035813, + "grad_norm": 1.8046875, + "learning_rate": 0.00010641694266031994, + "loss": 4.4027, + "step": 279050 + }, + { + "epoch": 2.402720385674931, + "grad_norm": 1.0625, + "learning_rate": 0.00010638990622397158, + "loss": 3.9444, + "step": 279100 + }, + { + "epoch": 2.403150826446281, + "grad_norm": 2.546875, + "learning_rate": 0.00010636286931861783, + "loss": 4.1661, + "step": 279150 + }, + { + "epoch": 2.403581267217631, + "grad_norm": 1.953125, + "learning_rate": 0.0001063358319462432, + "loss": 4.4234, + "step": 279200 + }, + { + "epoch": 2.404011707988981, + "grad_norm": 3.140625, + "learning_rate": 0.0001063087941088321, + "loss": 4.3915, + "step": 279250 + }, + { + "epoch": 2.4044421487603307, + "grad_norm": 6.34375, + "learning_rate": 0.0001062817558083691, + "loss": 4.5487, + "step": 279300 + }, + { + "epoch": 2.4048725895316805, + "grad_norm": 2.203125, + "learning_rate": 0.00010625471704683874, + "loss": 3.7869, + "step": 279350 + }, + { + "epoch": 2.4053030303030303, + "grad_norm": 1.8515625, + "learning_rate": 0.00010622767782622562, + "loss": 4.2739, + "step": 279400 + }, + { + "epoch": 2.40573347107438, + "grad_norm": 4.3125, + "learning_rate": 0.0001062006381485143, + "loss": 4.0549, + "step": 279450 + }, + { + "epoch": 2.40616391184573, + "grad_norm": 2.5, + "learning_rate": 0.00010617359801568954, + "loss": 4.56, + "step": 279500 + }, + { + "epoch": 2.4065943526170797, + "grad_norm": 2.578125, + "learning_rate": 0.00010614655742973591, + "loss": 4.1112, + "step": 279550 + }, + { + "epoch": 2.40702479338843, + "grad_norm": 2.5625, + "learning_rate": 0.00010611951639263826, + "loss": 4.2552, + "step": 279600 + }, + { + "epoch": 2.4074552341597797, + "grad_norm": 5.5625, + "learning_rate": 0.00010609247490638123, + "loss": 4.2132, + "step": 279650 + }, + { + "epoch": 2.4078856749311295, + "grad_norm": 2.890625, + "learning_rate": 0.00010606543297294964, + "loss": 4.5925, + "step": 279700 + }, + { + "epoch": 2.4083161157024793, + "grad_norm": 2.90625, + "learning_rate": 0.00010603839059432837, + "loss": 4.1446, + "step": 279750 + }, + { + "epoch": 2.408746556473829, + "grad_norm": 2.734375, + "learning_rate": 0.00010601134777250217, + "loss": 4.5189, + "step": 279800 + }, + { + "epoch": 2.4091769972451793, + "grad_norm": 3.375, + "learning_rate": 0.000105984304509456, + "loss": 4.4627, + "step": 279850 + }, + { + "epoch": 2.409607438016529, + "grad_norm": 2.171875, + "learning_rate": 0.00010595726080717474, + "loss": 4.1316, + "step": 279900 + }, + { + "epoch": 2.410037878787879, + "grad_norm": 3.78125, + "learning_rate": 0.00010593021666764335, + "loss": 4.3968, + "step": 279950 + }, + { + "epoch": 2.4104683195592287, + "grad_norm": 3.453125, + "learning_rate": 0.00010590317209284686, + "loss": 4.5615, + "step": 280000 + }, + { + "epoch": 2.4108987603305785, + "grad_norm": 1.78125, + "learning_rate": 0.00010587612708477017, + "loss": 4.2917, + "step": 280050 + }, + { + "epoch": 2.4113292011019283, + "grad_norm": 3.40625, + "learning_rate": 0.00010584908164539846, + "loss": 4.3689, + "step": 280100 + }, + { + "epoch": 2.411759641873278, + "grad_norm": 3.21875, + "learning_rate": 0.00010582203577671672, + "loss": 4.3412, + "step": 280150 + }, + { + "epoch": 2.412190082644628, + "grad_norm": 2.140625, + "learning_rate": 0.00010579498948071005, + "loss": 4.328, + "step": 280200 + }, + { + "epoch": 2.412620523415978, + "grad_norm": 6.25, + "learning_rate": 0.00010576794275936366, + "loss": 4.4093, + "step": 280250 + }, + { + "epoch": 2.413050964187328, + "grad_norm": 3.078125, + "learning_rate": 0.00010574089561466267, + "loss": 4.3293, + "step": 280300 + }, + { + "epoch": 2.4134814049586777, + "grad_norm": 5.0625, + "learning_rate": 0.00010571384804859228, + "loss": 4.4423, + "step": 280350 + }, + { + "epoch": 2.4139118457300275, + "grad_norm": 5.15625, + "learning_rate": 0.00010568680006313776, + "loss": 4.4732, + "step": 280400 + }, + { + "epoch": 2.4143422865013773, + "grad_norm": 2.84375, + "learning_rate": 0.00010565975166028435, + "loss": 4.7185, + "step": 280450 + }, + { + "epoch": 2.4147727272727275, + "grad_norm": 2.9375, + "learning_rate": 0.00010563270284201734, + "loss": 4.1013, + "step": 280500 + }, + { + "epoch": 2.4152031680440773, + "grad_norm": 1.828125, + "learning_rate": 0.00010560565361032204, + "loss": 4.4158, + "step": 280550 + }, + { + "epoch": 2.415633608815427, + "grad_norm": 3.8125, + "learning_rate": 0.00010557860396718384, + "loss": 4.1939, + "step": 280600 + }, + { + "epoch": 2.416064049586777, + "grad_norm": 2.0625, + "learning_rate": 0.00010555155391458813, + "loss": 3.9688, + "step": 280650 + }, + { + "epoch": 2.4164944903581267, + "grad_norm": 4.15625, + "learning_rate": 0.00010552450345452028, + "loss": 4.1904, + "step": 280700 + }, + { + "epoch": 2.4169249311294765, + "grad_norm": 2.828125, + "learning_rate": 0.00010549745258896577, + "loss": 4.2963, + "step": 280750 + }, + { + "epoch": 2.4173553719008263, + "grad_norm": 3.859375, + "learning_rate": 0.00010547040131991009, + "loss": 4.2181, + "step": 280800 + }, + { + "epoch": 2.417785812672176, + "grad_norm": 2.78125, + "learning_rate": 0.00010544334964933866, + "loss": 4.1302, + "step": 280850 + }, + { + "epoch": 2.4182162534435263, + "grad_norm": 2.921875, + "learning_rate": 0.00010541629757923714, + "loss": 4.4083, + "step": 280900 + }, + { + "epoch": 2.418646694214876, + "grad_norm": 3.515625, + "learning_rate": 0.00010538924511159103, + "loss": 4.5556, + "step": 280950 + }, + { + "epoch": 2.419077134986226, + "grad_norm": 2.703125, + "learning_rate": 0.00010536219224838588, + "loss": 4.2889, + "step": 281000 + }, + { + "epoch": 2.4195075757575757, + "grad_norm": 4.5, + "learning_rate": 0.00010533513899160739, + "loss": 4.4081, + "step": 281050 + }, + { + "epoch": 2.4199380165289255, + "grad_norm": 3.875, + "learning_rate": 0.00010530808534324117, + "loss": 4.0653, + "step": 281100 + }, + { + "epoch": 2.4203684573002757, + "grad_norm": 3.234375, + "learning_rate": 0.00010528103130527292, + "loss": 4.5311, + "step": 281150 + }, + { + "epoch": 2.4207988980716255, + "grad_norm": 3.5625, + "learning_rate": 0.0001052539768796883, + "loss": 4.4613, + "step": 281200 + }, + { + "epoch": 2.4212293388429753, + "grad_norm": 4.0625, + "learning_rate": 0.00010522692206847309, + "loss": 4.507, + "step": 281250 + }, + { + "epoch": 2.421659779614325, + "grad_norm": 5.84375, + "learning_rate": 0.0001051998668736131, + "loss": 4.244, + "step": 281300 + }, + { + "epoch": 2.422090220385675, + "grad_norm": 4.1875, + "learning_rate": 0.000105172811297094, + "loss": 4.127, + "step": 281350 + }, + { + "epoch": 2.4225206611570247, + "grad_norm": 3.0, + "learning_rate": 0.00010514575534090172, + "loss": 4.4536, + "step": 281400 + }, + { + "epoch": 2.4229511019283745, + "grad_norm": 3.0625, + "learning_rate": 0.00010511869900702204, + "loss": 4.4533, + "step": 281450 + }, + { + "epoch": 2.4233815426997243, + "grad_norm": 4.375, + "learning_rate": 0.00010509164229744089, + "loss": 4.4307, + "step": 281500 + }, + { + "epoch": 2.4238119834710745, + "grad_norm": 4.53125, + "learning_rate": 0.00010506458521414415, + "loss": 4.2277, + "step": 281550 + }, + { + "epoch": 2.4242424242424243, + "grad_norm": 4.28125, + "learning_rate": 0.00010503752775911773, + "loss": 4.6191, + "step": 281600 + }, + { + "epoch": 2.424672865013774, + "grad_norm": 3.125, + "learning_rate": 0.00010501046993434765, + "loss": 4.3869, + "step": 281650 + }, + { + "epoch": 2.425103305785124, + "grad_norm": 6.75, + "learning_rate": 0.00010498341174181983, + "loss": 4.1685, + "step": 281700 + }, + { + "epoch": 2.4255337465564737, + "grad_norm": 1.8359375, + "learning_rate": 0.00010495635318352029, + "loss": 4.2421, + "step": 281750 + }, + { + "epoch": 2.425964187327824, + "grad_norm": 1.9765625, + "learning_rate": 0.00010492929426143512, + "loss": 4.3515, + "step": 281800 + }, + { + "epoch": 2.4263946280991737, + "grad_norm": 2.15625, + "learning_rate": 0.00010490223497755036, + "loss": 4.0464, + "step": 281850 + }, + { + "epoch": 2.4268250688705235, + "grad_norm": 3.96875, + "learning_rate": 0.00010487517533385207, + "loss": 4.2715, + "step": 281900 + }, + { + "epoch": 2.4272555096418733, + "grad_norm": 2.171875, + "learning_rate": 0.00010484811533232644, + "loss": 4.6647, + "step": 281950 + }, + { + "epoch": 2.427685950413223, + "grad_norm": 2.515625, + "learning_rate": 0.00010482105497495955, + "loss": 4.4087, + "step": 282000 + }, + { + "epoch": 2.427685950413223, + "eval_loss": 5.012827396392822, + "eval_runtime": 25.3249, + "eval_samples_per_second": 25.272, + "eval_steps_per_second": 12.636, + "eval_tts_loss": 7.408275696474385, + "step": 282000 + }, + { + "epoch": 2.428116391184573, + "grad_norm": 2.703125, + "learning_rate": 0.00010479399426373761, + "loss": 4.7418, + "step": 282050 + }, + { + "epoch": 2.4285468319559227, + "grad_norm": 2.953125, + "learning_rate": 0.00010476693320064678, + "loss": 4.465, + "step": 282100 + }, + { + "epoch": 2.4289772727272725, + "grad_norm": 4.625, + "learning_rate": 0.00010473987178767334, + "loss": 4.4881, + "step": 282150 + }, + { + "epoch": 2.4294077134986227, + "grad_norm": 1.90625, + "learning_rate": 0.00010471281002680351, + "loss": 4.5577, + "step": 282200 + }, + { + "epoch": 2.4298381542699725, + "grad_norm": 3.125, + "learning_rate": 0.00010468574792002353, + "loss": 4.5803, + "step": 282250 + }, + { + "epoch": 2.4302685950413223, + "grad_norm": 3.375, + "learning_rate": 0.00010465868546931976, + "loss": 4.1768, + "step": 282300 + }, + { + "epoch": 2.430699035812672, + "grad_norm": 3.0625, + "learning_rate": 0.0001046316226766785, + "loss": 4.6854, + "step": 282350 + }, + { + "epoch": 2.431129476584022, + "grad_norm": 3.578125, + "learning_rate": 0.00010460455954408608, + "loss": 4.343, + "step": 282400 + }, + { + "epoch": 2.431559917355372, + "grad_norm": 3.34375, + "learning_rate": 0.00010457749607352892, + "loss": 4.566, + "step": 282450 + }, + { + "epoch": 2.431990358126722, + "grad_norm": 2.453125, + "learning_rate": 0.00010455043226699339, + "loss": 4.4499, + "step": 282500 + }, + { + "epoch": 2.4324207988980717, + "grad_norm": 3.21875, + "learning_rate": 0.00010452336812646594, + "loss": 4.4572, + "step": 282550 + }, + { + "epoch": 2.4328512396694215, + "grad_norm": 2.078125, + "learning_rate": 0.00010449630365393297, + "loss": 4.1014, + "step": 282600 + }, + { + "epoch": 2.4332816804407713, + "grad_norm": 1.90625, + "learning_rate": 0.00010446923885138101, + "loss": 4.2052, + "step": 282650 + }, + { + "epoch": 2.433712121212121, + "grad_norm": 4.15625, + "learning_rate": 0.00010444217372079652, + "loss": 4.1167, + "step": 282700 + }, + { + "epoch": 2.434142561983471, + "grad_norm": 2.9375, + "learning_rate": 0.00010441510826416603, + "loss": 4.4092, + "step": 282750 + }, + { + "epoch": 2.4345730027548207, + "grad_norm": 3.28125, + "learning_rate": 0.00010438804248347609, + "loss": 4.0695, + "step": 282800 + }, + { + "epoch": 2.435003443526171, + "grad_norm": 1.9140625, + "learning_rate": 0.00010436097638071331, + "loss": 4.3029, + "step": 282850 + }, + { + "epoch": 2.4354338842975207, + "grad_norm": 0.82421875, + "learning_rate": 0.0001043339099578642, + "loss": 4.471, + "step": 282900 + }, + { + "epoch": 2.4358643250688705, + "grad_norm": 6.96875, + "learning_rate": 0.00010430684321691547, + "loss": 4.39, + "step": 282950 + }, + { + "epoch": 2.4362947658402203, + "grad_norm": 2.484375, + "learning_rate": 0.0001042797761598537, + "loss": 4.2399, + "step": 283000 + }, + { + "epoch": 2.43672520661157, + "grad_norm": 3.15625, + "learning_rate": 0.00010425270878866553, + "loss": 4.3552, + "step": 283050 + }, + { + "epoch": 2.4371556473829203, + "grad_norm": 3.0, + "learning_rate": 0.00010422564110533772, + "loss": 4.3942, + "step": 283100 + }, + { + "epoch": 2.43758608815427, + "grad_norm": 1.5390625, + "learning_rate": 0.00010419857311185693, + "loss": 3.9753, + "step": 283150 + }, + { + "epoch": 2.43801652892562, + "grad_norm": 3.34375, + "learning_rate": 0.00010417150481020994, + "loss": 4.5873, + "step": 283200 + }, + { + "epoch": 2.4384469696969697, + "grad_norm": 3.640625, + "learning_rate": 0.00010414443620238343, + "loss": 4.5218, + "step": 283250 + }, + { + "epoch": 2.4388774104683195, + "grad_norm": 4.8125, + "learning_rate": 0.00010411736729036421, + "loss": 4.5451, + "step": 283300 + }, + { + "epoch": 2.4393078512396693, + "grad_norm": 4.3125, + "learning_rate": 0.00010409029807613914, + "loss": 4.564, + "step": 283350 + }, + { + "epoch": 2.439738292011019, + "grad_norm": 3.9375, + "learning_rate": 0.00010406322856169497, + "loss": 4.5764, + "step": 283400 + }, + { + "epoch": 2.440168732782369, + "grad_norm": 3.09375, + "learning_rate": 0.00010403615874901857, + "loss": 4.0948, + "step": 283450 + }, + { + "epoch": 2.440599173553719, + "grad_norm": 2.46875, + "learning_rate": 0.00010400908864009677, + "loss": 4.5149, + "step": 283500 + }, + { + "epoch": 2.441029614325069, + "grad_norm": 3.21875, + "learning_rate": 0.00010398201823691653, + "loss": 4.5937, + "step": 283550 + }, + { + "epoch": 2.4414600550964187, + "grad_norm": 4.53125, + "learning_rate": 0.00010395494754146471, + "loss": 4.6073, + "step": 283600 + }, + { + "epoch": 2.4418904958677685, + "grad_norm": 8.25, + "learning_rate": 0.00010392787655572823, + "loss": 4.2603, + "step": 283650 + }, + { + "epoch": 2.4423209366391183, + "grad_norm": 3.03125, + "learning_rate": 0.00010390080528169407, + "loss": 4.4448, + "step": 283700 + }, + { + "epoch": 2.4427513774104685, + "grad_norm": 2.546875, + "learning_rate": 0.00010387373372134923, + "loss": 4.5935, + "step": 283750 + }, + { + "epoch": 2.4431818181818183, + "grad_norm": 2.9375, + "learning_rate": 0.00010384666187668061, + "loss": 4.577, + "step": 283800 + }, + { + "epoch": 2.443612258953168, + "grad_norm": 4.125, + "learning_rate": 0.00010381958974967534, + "loss": 4.351, + "step": 283850 + }, + { + "epoch": 2.444042699724518, + "grad_norm": 1.9140625, + "learning_rate": 0.00010379251734232036, + "loss": 3.7904, + "step": 283900 + }, + { + "epoch": 2.4444731404958677, + "grad_norm": 2.609375, + "learning_rate": 0.00010376544465660278, + "loss": 4.6604, + "step": 283950 + }, + { + "epoch": 2.4449035812672175, + "grad_norm": 3.921875, + "learning_rate": 0.00010373837169450966, + "loss": 4.3474, + "step": 284000 + }, + { + "epoch": 2.4453340220385673, + "grad_norm": 2.984375, + "learning_rate": 0.00010371129845802812, + "loss": 4.0781, + "step": 284050 + }, + { + "epoch": 2.445764462809917, + "grad_norm": 2.78125, + "learning_rate": 0.00010368422494914525, + "loss": 4.3142, + "step": 284100 + }, + { + "epoch": 2.4461949035812673, + "grad_norm": 2.609375, + "learning_rate": 0.00010365715116984818, + "loss": 4.3244, + "step": 284150 + }, + { + "epoch": 2.446625344352617, + "grad_norm": 2.8125, + "learning_rate": 0.0001036300771221241, + "loss": 4.0259, + "step": 284200 + }, + { + "epoch": 2.447055785123967, + "grad_norm": 2.046875, + "learning_rate": 0.00010360300280796018, + "loss": 4.2041, + "step": 284250 + }, + { + "epoch": 2.4474862258953167, + "grad_norm": 1.5078125, + "learning_rate": 0.00010357592822934356, + "loss": 4.0192, + "step": 284300 + }, + { + "epoch": 2.4479166666666665, + "grad_norm": 1.9140625, + "learning_rate": 0.00010354885338826155, + "loss": 4.3531, + "step": 284350 + }, + { + "epoch": 2.4483471074380168, + "grad_norm": 3.4375, + "learning_rate": 0.00010352177828670131, + "loss": 4.2375, + "step": 284400 + }, + { + "epoch": 2.4487775482093666, + "grad_norm": 5.15625, + "learning_rate": 0.00010349470292665011, + "loss": 4.535, + "step": 284450 + }, + { + "epoch": 2.4492079889807163, + "grad_norm": 2.859375, + "learning_rate": 0.00010346762731009527, + "loss": 4.5689, + "step": 284500 + }, + { + "epoch": 2.449638429752066, + "grad_norm": 3.078125, + "learning_rate": 0.00010344055143902403, + "loss": 4.5238, + "step": 284550 + }, + { + "epoch": 2.450068870523416, + "grad_norm": 3.296875, + "learning_rate": 0.00010341347531542372, + "loss": 4.5314, + "step": 284600 + }, + { + "epoch": 2.4504993112947657, + "grad_norm": 3.5, + "learning_rate": 0.00010338639894128166, + "loss": 4.9696, + "step": 284650 + }, + { + "epoch": 2.4509297520661155, + "grad_norm": 3.4375, + "learning_rate": 0.0001033593223185852, + "loss": 4.3965, + "step": 284700 + }, + { + "epoch": 2.4513601928374658, + "grad_norm": 2.0, + "learning_rate": 0.00010333224544932174, + "loss": 4.423, + "step": 284750 + }, + { + "epoch": 2.4517906336088156, + "grad_norm": 3.15625, + "learning_rate": 0.00010330516833547859, + "loss": 4.2458, + "step": 284800 + }, + { + "epoch": 2.4522210743801653, + "grad_norm": 4.5, + "learning_rate": 0.0001032780909790432, + "loss": 4.3486, + "step": 284850 + }, + { + "epoch": 2.452651515151515, + "grad_norm": 3.171875, + "learning_rate": 0.00010325101338200303, + "loss": 4.2645, + "step": 284900 + }, + { + "epoch": 2.453081955922865, + "grad_norm": 2.78125, + "learning_rate": 0.00010322393554634541, + "loss": 4.4039, + "step": 284950 + }, + { + "epoch": 2.4535123966942147, + "grad_norm": 2.953125, + "learning_rate": 0.0001031968574740579, + "loss": 3.922, + "step": 285000 + }, + { + "epoch": 2.4535123966942147, + "eval_loss": 5.01358699798584, + "eval_runtime": 24.04, + "eval_samples_per_second": 26.622, + "eval_steps_per_second": 13.311, + "eval_tts_loss": 7.434466893334985, + "step": 285000 + }, + { + "epoch": 2.453942837465565, + "grad_norm": 4.71875, + "learning_rate": 0.00010316977916712793, + "loss": 4.2276, + "step": 285050 + }, + { + "epoch": 2.4543732782369148, + "grad_norm": 2.75, + "learning_rate": 0.00010314270062754297, + "loss": 4.484, + "step": 285100 + }, + { + "epoch": 2.4548037190082646, + "grad_norm": 2.28125, + "learning_rate": 0.00010311562185729056, + "loss": 4.2807, + "step": 285150 + }, + { + "epoch": 2.4552341597796143, + "grad_norm": 3.109375, + "learning_rate": 0.00010308854285835819, + "loss": 4.2822, + "step": 285200 + }, + { + "epoch": 2.455664600550964, + "grad_norm": 4.71875, + "learning_rate": 0.00010306146363273346, + "loss": 4.5957, + "step": 285250 + }, + { + "epoch": 2.456095041322314, + "grad_norm": 3.296875, + "learning_rate": 0.00010303438418240383, + "loss": 4.2958, + "step": 285300 + }, + { + "epoch": 2.4565254820936637, + "grad_norm": 2.046875, + "learning_rate": 0.00010300730450935694, + "loss": 4.3225, + "step": 285350 + }, + { + "epoch": 2.456955922865014, + "grad_norm": 2.59375, + "learning_rate": 0.00010298022461558041, + "loss": 4.4848, + "step": 285400 + }, + { + "epoch": 2.4573863636363638, + "grad_norm": 3.015625, + "learning_rate": 0.00010295314450306177, + "loss": 4.4985, + "step": 285450 + }, + { + "epoch": 2.4578168044077136, + "grad_norm": 3.25, + "learning_rate": 0.00010292606417378869, + "loss": 4.2136, + "step": 285500 + }, + { + "epoch": 2.4582472451790633, + "grad_norm": 5.59375, + "learning_rate": 0.0001028989836297488, + "loss": 4.3001, + "step": 285550 + }, + { + "epoch": 2.458677685950413, + "grad_norm": 3.859375, + "learning_rate": 0.00010287190287292977, + "loss": 4.4967, + "step": 285600 + }, + { + "epoch": 2.459108126721763, + "grad_norm": 3.5625, + "learning_rate": 0.00010284482190531921, + "loss": 4.2818, + "step": 285650 + }, + { + "epoch": 2.459538567493113, + "grad_norm": 3.359375, + "learning_rate": 0.00010281774072890484, + "loss": 4.3762, + "step": 285700 + }, + { + "epoch": 2.459969008264463, + "grad_norm": 3.34375, + "learning_rate": 0.00010279065934567438, + "loss": 4.5424, + "step": 285750 + }, + { + "epoch": 2.4603994490358128, + "grad_norm": 3.421875, + "learning_rate": 0.00010276357775761553, + "loss": 3.999, + "step": 285800 + }, + { + "epoch": 2.4608298898071626, + "grad_norm": 4.3125, + "learning_rate": 0.00010273649596671602, + "loss": 4.1942, + "step": 285850 + }, + { + "epoch": 2.4612603305785123, + "grad_norm": 2.890625, + "learning_rate": 0.00010270941397496358, + "loss": 4.6793, + "step": 285900 + }, + { + "epoch": 2.461690771349862, + "grad_norm": 2.984375, + "learning_rate": 0.00010268233178434601, + "loss": 4.1216, + "step": 285950 + }, + { + "epoch": 2.462121212121212, + "grad_norm": 2.0625, + "learning_rate": 0.00010265524939685103, + "loss": 4.3523, + "step": 286000 + }, + { + "epoch": 2.462551652892562, + "grad_norm": 1.6171875, + "learning_rate": 0.00010262816681446648, + "loss": 4.3751, + "step": 286050 + }, + { + "epoch": 2.462982093663912, + "grad_norm": 3.265625, + "learning_rate": 0.00010260108403918012, + "loss": 4.0416, + "step": 286100 + }, + { + "epoch": 2.4634125344352618, + "grad_norm": 1.0859375, + "learning_rate": 0.00010257400107297981, + "loss": 4.2597, + "step": 286150 + }, + { + "epoch": 2.4638429752066116, + "grad_norm": 3.0625, + "learning_rate": 0.00010254691791785332, + "loss": 4.2485, + "step": 286200 + }, + { + "epoch": 2.4642734159779613, + "grad_norm": 2.1875, + "learning_rate": 0.00010251983457578858, + "loss": 4.167, + "step": 286250 + }, + { + "epoch": 2.464703856749311, + "grad_norm": 2.25, + "learning_rate": 0.00010249275104877338, + "loss": 4.576, + "step": 286300 + }, + { + "epoch": 2.4651342975206614, + "grad_norm": 3.21875, + "learning_rate": 0.00010246566733879561, + "loss": 4.4953, + "step": 286350 + }, + { + "epoch": 2.465564738292011, + "grad_norm": 3.34375, + "learning_rate": 0.00010243858344784316, + "loss": 4.4542, + "step": 286400 + }, + { + "epoch": 2.465995179063361, + "grad_norm": 1.7109375, + "learning_rate": 0.00010241149937790397, + "loss": 4.1143, + "step": 286450 + }, + { + "epoch": 2.4664256198347108, + "grad_norm": 3.140625, + "learning_rate": 0.00010238441513096589, + "loss": 4.3711, + "step": 286500 + }, + { + "epoch": 2.4668560606060606, + "grad_norm": 3.40625, + "learning_rate": 0.00010235733070901685, + "loss": 4.3277, + "step": 286550 + }, + { + "epoch": 2.4672865013774103, + "grad_norm": 2.859375, + "learning_rate": 0.00010233024611404485, + "loss": 4.3605, + "step": 286600 + }, + { + "epoch": 2.46771694214876, + "grad_norm": 3.859375, + "learning_rate": 0.00010230316134803774, + "loss": 4.157, + "step": 286650 + }, + { + "epoch": 2.4681473829201104, + "grad_norm": 2.015625, + "learning_rate": 0.0001022760764129836, + "loss": 4.446, + "step": 286700 + }, + { + "epoch": 2.46857782369146, + "grad_norm": 2.890625, + "learning_rate": 0.0001022489913108703, + "loss": 4.4489, + "step": 286750 + }, + { + "epoch": 2.46900826446281, + "grad_norm": 2.71875, + "learning_rate": 0.00010222190604368593, + "loss": 3.9839, + "step": 286800 + }, + { + "epoch": 2.4694387052341598, + "grad_norm": 2.359375, + "learning_rate": 0.00010219482061341841, + "loss": 4.1401, + "step": 286850 + }, + { + "epoch": 2.4698691460055096, + "grad_norm": 3.03125, + "learning_rate": 0.00010216773502205574, + "loss": 3.8893, + "step": 286900 + }, + { + "epoch": 2.4702995867768593, + "grad_norm": 3.9375, + "learning_rate": 0.00010214064927158606, + "loss": 4.4247, + "step": 286950 + }, + { + "epoch": 2.4707300275482096, + "grad_norm": 3.6875, + "learning_rate": 0.00010211356336399729, + "loss": 4.2582, + "step": 287000 + }, + { + "epoch": 2.4711604683195594, + "grad_norm": 4.5, + "learning_rate": 0.00010208647730127752, + "loss": 4.2929, + "step": 287050 + }, + { + "epoch": 2.471590909090909, + "grad_norm": 2.90625, + "learning_rate": 0.00010205939108541478, + "loss": 4.0117, + "step": 287100 + }, + { + "epoch": 2.472021349862259, + "grad_norm": 0.828125, + "learning_rate": 0.0001020323047183972, + "loss": 4.225, + "step": 287150 + }, + { + "epoch": 2.4724517906336088, + "grad_norm": 5.03125, + "learning_rate": 0.00010200521820221283, + "loss": 4.5542, + "step": 287200 + }, + { + "epoch": 2.4728822314049586, + "grad_norm": 4.1875, + "learning_rate": 0.00010197813153884972, + "loss": 4.7844, + "step": 287250 + }, + { + "epoch": 2.4733126721763083, + "grad_norm": 6.03125, + "learning_rate": 0.00010195104473029605, + "loss": 4.5003, + "step": 287300 + }, + { + "epoch": 2.4737431129476586, + "grad_norm": 2.953125, + "learning_rate": 0.00010192395777853988, + "loss": 4.0351, + "step": 287350 + }, + { + "epoch": 2.4741735537190084, + "grad_norm": 3.640625, + "learning_rate": 0.00010189687068556933, + "loss": 4.3536, + "step": 287400 + }, + { + "epoch": 2.474603994490358, + "grad_norm": 2.90625, + "learning_rate": 0.00010186978345337257, + "loss": 4.382, + "step": 287450 + }, + { + "epoch": 2.475034435261708, + "grad_norm": 2.84375, + "learning_rate": 0.00010184269608393773, + "loss": 4.3086, + "step": 287500 + }, + { + "epoch": 2.4754648760330578, + "grad_norm": 4.28125, + "learning_rate": 0.00010181560857925294, + "loss": 4.0329, + "step": 287550 + }, + { + "epoch": 2.4758953168044076, + "grad_norm": 2.953125, + "learning_rate": 0.00010178852094130642, + "loss": 4.5042, + "step": 287600 + }, + { + "epoch": 2.476325757575758, + "grad_norm": 1.234375, + "learning_rate": 0.00010176143317208631, + "loss": 4.2629, + "step": 287650 + }, + { + "epoch": 2.4767561983471076, + "grad_norm": 3.609375, + "learning_rate": 0.00010173434527358079, + "loss": 4.5112, + "step": 287700 + }, + { + "epoch": 2.4771866391184574, + "grad_norm": 3.28125, + "learning_rate": 0.00010170725724777803, + "loss": 4.0819, + "step": 287750 + }, + { + "epoch": 2.477617079889807, + "grad_norm": 3.5625, + "learning_rate": 0.00010168016909666629, + "loss": 4.3617, + "step": 287800 + }, + { + "epoch": 2.478047520661157, + "grad_norm": 4.21875, + "learning_rate": 0.00010165308082223375, + "loss": 3.8755, + "step": 287850 + }, + { + "epoch": 2.4784779614325068, + "grad_norm": 1.2109375, + "learning_rate": 0.00010162599242646862, + "loss": 4.414, + "step": 287900 + }, + { + "epoch": 2.4789084022038566, + "grad_norm": 3.40625, + "learning_rate": 0.00010159890391135916, + "loss": 4.2458, + "step": 287950 + }, + { + "epoch": 2.479338842975207, + "grad_norm": 2.53125, + "learning_rate": 0.00010157181527889358, + "loss": 4.603, + "step": 288000 + }, + { + "epoch": 2.479338842975207, + "eval_loss": 5.010517597198486, + "eval_runtime": 24.4818, + "eval_samples_per_second": 26.142, + "eval_steps_per_second": 13.071, + "eval_tts_loss": 7.395733432660736, + "step": 288000 + }, + { + "epoch": 2.4797692837465566, + "grad_norm": 3.1875, + "learning_rate": 0.00010154472653106016, + "loss": 4.504, + "step": 288050 + }, + { + "epoch": 2.4801997245179064, + "grad_norm": 3.671875, + "learning_rate": 0.00010151763766984713, + "loss": 4.1512, + "step": 288100 + }, + { + "epoch": 2.480630165289256, + "grad_norm": 1.1796875, + "learning_rate": 0.00010149054869724274, + "loss": 4.0913, + "step": 288150 + }, + { + "epoch": 2.481060606060606, + "grad_norm": 2.609375, + "learning_rate": 0.00010146345961523532, + "loss": 4.299, + "step": 288200 + }, + { + "epoch": 2.4814910468319558, + "grad_norm": 3.125, + "learning_rate": 0.00010143637042581307, + "loss": 4.2602, + "step": 288250 + }, + { + "epoch": 2.481921487603306, + "grad_norm": 3.078125, + "learning_rate": 0.00010140928113096435, + "loss": 4.3977, + "step": 288300 + }, + { + "epoch": 2.482351928374656, + "grad_norm": 3.234375, + "learning_rate": 0.00010138219173267742, + "loss": 4.3319, + "step": 288350 + }, + { + "epoch": 2.4827823691460056, + "grad_norm": 3.890625, + "learning_rate": 0.00010135510223294059, + "loss": 3.8356, + "step": 288400 + }, + { + "epoch": 2.4832128099173554, + "grad_norm": 1.9921875, + "learning_rate": 0.00010132801263374215, + "loss": 4.0408, + "step": 288450 + }, + { + "epoch": 2.483643250688705, + "grad_norm": 4.3125, + "learning_rate": 0.00010130092293707047, + "loss": 4.2576, + "step": 288500 + }, + { + "epoch": 2.484073691460055, + "grad_norm": 2.25, + "learning_rate": 0.00010127383314491384, + "loss": 4.3203, + "step": 288550 + }, + { + "epoch": 2.4845041322314048, + "grad_norm": 3.078125, + "learning_rate": 0.00010124674325926057, + "loss": 4.4197, + "step": 288600 + }, + { + "epoch": 2.484934573002755, + "grad_norm": 2.40625, + "learning_rate": 0.00010121965328209905, + "loss": 4.369, + "step": 288650 + }, + { + "epoch": 2.485365013774105, + "grad_norm": 2.109375, + "learning_rate": 0.00010119256321541761, + "loss": 4.5727, + "step": 288700 + }, + { + "epoch": 2.4857954545454546, + "grad_norm": 2.828125, + "learning_rate": 0.00010116547306120461, + "loss": 4.3041, + "step": 288750 + }, + { + "epoch": 2.4862258953168044, + "grad_norm": 5.8125, + "learning_rate": 0.00010113838282144834, + "loss": 4.2564, + "step": 288800 + }, + { + "epoch": 2.486656336088154, + "grad_norm": 4.25, + "learning_rate": 0.00010111129249813728, + "loss": 4.7618, + "step": 288850 + }, + { + "epoch": 2.487086776859504, + "grad_norm": 2.28125, + "learning_rate": 0.00010108420209325971, + "loss": 4.2519, + "step": 288900 + }, + { + "epoch": 2.487517217630854, + "grad_norm": 6.375, + "learning_rate": 0.00010105711160880402, + "loss": 4.4035, + "step": 288950 + }, + { + "epoch": 2.487947658402204, + "grad_norm": 2.734375, + "learning_rate": 0.00010103002104675866, + "loss": 4.3746, + "step": 289000 + }, + { + "epoch": 2.488378099173554, + "grad_norm": 3.625, + "learning_rate": 0.00010100293040911196, + "loss": 4.0388, + "step": 289050 + }, + { + "epoch": 2.4888085399449036, + "grad_norm": 3.90625, + "learning_rate": 0.00010097583969785231, + "loss": 4.3886, + "step": 289100 + }, + { + "epoch": 2.4892389807162534, + "grad_norm": 3.75, + "learning_rate": 0.00010094874891496812, + "loss": 4.766, + "step": 289150 + }, + { + "epoch": 2.489669421487603, + "grad_norm": 2.34375, + "learning_rate": 0.00010092165806244783, + "loss": 4.5697, + "step": 289200 + }, + { + "epoch": 2.490099862258953, + "grad_norm": 2.59375, + "learning_rate": 0.00010089456714227981, + "loss": 4.4505, + "step": 289250 + }, + { + "epoch": 2.490530303030303, + "grad_norm": 2.703125, + "learning_rate": 0.00010086747615645245, + "loss": 4.6242, + "step": 289300 + }, + { + "epoch": 2.490960743801653, + "grad_norm": 1.7734375, + "learning_rate": 0.00010084038510695424, + "loss": 4.5188, + "step": 289350 + }, + { + "epoch": 2.491391184573003, + "grad_norm": 2.640625, + "learning_rate": 0.00010081329399577357, + "loss": 4.3866, + "step": 289400 + }, + { + "epoch": 2.4918216253443526, + "grad_norm": 2.34375, + "learning_rate": 0.00010078620282489885, + "loss": 4.3092, + "step": 289450 + }, + { + "epoch": 2.4922520661157024, + "grad_norm": 3.0625, + "learning_rate": 0.00010075911159631857, + "loss": 4.5881, + "step": 289500 + }, + { + "epoch": 2.492682506887052, + "grad_norm": 2.09375, + "learning_rate": 0.00010073202031202112, + "loss": 4.678, + "step": 289550 + }, + { + "epoch": 2.4931129476584024, + "grad_norm": 3.453125, + "learning_rate": 0.00010070492897399493, + "loss": 4.1268, + "step": 289600 + }, + { + "epoch": 2.493543388429752, + "grad_norm": 3.34375, + "learning_rate": 0.00010067783758422851, + "loss": 4.1757, + "step": 289650 + }, + { + "epoch": 2.493973829201102, + "grad_norm": 3.328125, + "learning_rate": 0.00010065074614471023, + "loss": 4.5447, + "step": 289700 + }, + { + "epoch": 2.494404269972452, + "grad_norm": 1.875, + "learning_rate": 0.00010062365465742862, + "loss": 4.5332, + "step": 289750 + }, + { + "epoch": 2.4948347107438016, + "grad_norm": 2.203125, + "learning_rate": 0.00010059656312437207, + "loss": 4.2415, + "step": 289800 + }, + { + "epoch": 2.4952651515151514, + "grad_norm": 3.640625, + "learning_rate": 0.0001005694715475291, + "loss": 4.4303, + "step": 289850 + }, + { + "epoch": 2.495695592286501, + "grad_norm": 2.984375, + "learning_rate": 0.00010054237992888811, + "loss": 4.4299, + "step": 289900 + }, + { + "epoch": 2.4961260330578514, + "grad_norm": 4.21875, + "learning_rate": 0.00010051528827043761, + "loss": 4.5791, + "step": 289950 + }, + { + "epoch": 2.496556473829201, + "grad_norm": 7.125, + "learning_rate": 0.00010048819657416606, + "loss": 4.5008, + "step": 290000 + }, + { + "epoch": 2.496986914600551, + "grad_norm": 3.90625, + "learning_rate": 0.00010046110484206195, + "loss": 4.5097, + "step": 290050 + }, + { + "epoch": 2.497417355371901, + "grad_norm": 3.171875, + "learning_rate": 0.00010043401307611371, + "loss": 4.4785, + "step": 290100 + }, + { + "epoch": 2.4978477961432506, + "grad_norm": 1.5625, + "learning_rate": 0.00010040692127830987, + "loss": 4.389, + "step": 290150 + }, + { + "epoch": 2.4982782369146004, + "grad_norm": 1.796875, + "learning_rate": 0.00010037982945063887, + "loss": 4.2936, + "step": 290200 + }, + { + "epoch": 2.4987086776859506, + "grad_norm": 4.1875, + "learning_rate": 0.00010035273759508919, + "loss": 4.1188, + "step": 290250 + }, + { + "epoch": 2.4991391184573004, + "grad_norm": 3.328125, + "learning_rate": 0.00010032564571364935, + "loss": 4.3657, + "step": 290300 + }, + { + "epoch": 2.49956955922865, + "grad_norm": 4.4375, + "learning_rate": 0.00010029855380830779, + "loss": 4.3694, + "step": 290350 + }, + { + "epoch": 2.5, + "grad_norm": 3.328125, + "learning_rate": 0.00010027146188105307, + "loss": 4.3072, + "step": 290400 + }, + { + "epoch": 2.50043044077135, + "grad_norm": 2.015625, + "learning_rate": 0.00010024436993387361, + "loss": 4.0094, + "step": 290450 + }, + { + "epoch": 2.5008608815426996, + "grad_norm": 2.234375, + "learning_rate": 0.00010021727796875791, + "loss": 4.2795, + "step": 290500 + }, + { + "epoch": 2.5012913223140494, + "grad_norm": 2.578125, + "learning_rate": 0.0001001901859876945, + "loss": 4.1298, + "step": 290550 + }, + { + "epoch": 2.501721763085399, + "grad_norm": 4.0, + "learning_rate": 0.00010016309399267181, + "loss": 4.3885, + "step": 290600 + }, + { + "epoch": 2.5021522038567494, + "grad_norm": 3.9375, + "learning_rate": 0.0001001360019856784, + "loss": 4.7, + "step": 290650 + }, + { + "epoch": 2.502582644628099, + "grad_norm": 6.0, + "learning_rate": 0.00010010890996870272, + "loss": 4.2198, + "step": 290700 + }, + { + "epoch": 2.503013085399449, + "grad_norm": 4.0625, + "learning_rate": 0.0001000818179437333, + "loss": 4.289, + "step": 290750 + }, + { + "epoch": 2.503443526170799, + "grad_norm": 3.296875, + "learning_rate": 0.00010005472591275862, + "loss": 4.1972, + "step": 290800 + }, + { + "epoch": 2.503873966942149, + "grad_norm": 2.046875, + "learning_rate": 0.00010002763387776715, + "loss": 4.49, + "step": 290850 + }, + { + "epoch": 2.504304407713499, + "grad_norm": 2.796875, + "learning_rate": 0.00010000054184074746, + "loss": 4.5324, + "step": 290900 + }, + { + "epoch": 2.5047348484848486, + "grad_norm": 4.625, + "learning_rate": 9.997344980368795e-05, + "loss": 4.7044, + "step": 290950 + }, + { + "epoch": 2.5051652892561984, + "grad_norm": 6.125, + "learning_rate": 9.994635776857717e-05, + "loss": 4.3919, + "step": 291000 + }, + { + "epoch": 2.5051652892561984, + "eval_loss": 5.009791851043701, + "eval_runtime": 24.0994, + "eval_samples_per_second": 26.557, + "eval_steps_per_second": 13.278, + "eval_tts_loss": 7.433074333890757, + "step": 291000 + }, + { + "epoch": 2.505595730027548, + "grad_norm": 1.6640625, + "learning_rate": 9.991926573740364e-05, + "loss": 4.2164, + "step": 291050 + }, + { + "epoch": 2.506026170798898, + "grad_norm": 3.640625, + "learning_rate": 9.989217371215585e-05, + "loss": 4.427, + "step": 291100 + }, + { + "epoch": 2.506456611570248, + "grad_norm": 3.171875, + "learning_rate": 9.986508169482223e-05, + "loss": 4.3297, + "step": 291150 + }, + { + "epoch": 2.5068870523415976, + "grad_norm": 4.8125, + "learning_rate": 9.983798968739132e-05, + "loss": 4.1809, + "step": 291200 + }, + { + "epoch": 2.5073174931129474, + "grad_norm": 2.9375, + "learning_rate": 9.981089769185166e-05, + "loss": 4.3658, + "step": 291250 + }, + { + "epoch": 2.5077479338842976, + "grad_norm": 3.03125, + "learning_rate": 9.978380571019167e-05, + "loss": 4.3938, + "step": 291300 + }, + { + "epoch": 2.5081783746556474, + "grad_norm": 2.890625, + "learning_rate": 9.975671374439993e-05, + "loss": 4.0513, + "step": 291350 + }, + { + "epoch": 2.508608815426997, + "grad_norm": 4.59375, + "learning_rate": 9.972962179646481e-05, + "loss": 4.4905, + "step": 291400 + }, + { + "epoch": 2.509039256198347, + "grad_norm": 4.1875, + "learning_rate": 9.97025298683749e-05, + "loss": 4.2303, + "step": 291450 + }, + { + "epoch": 2.5094696969696972, + "grad_norm": 5.15625, + "learning_rate": 9.967543796211865e-05, + "loss": 4.0584, + "step": 291500 + }, + { + "epoch": 2.509900137741047, + "grad_norm": 2.484375, + "learning_rate": 9.964834607968457e-05, + "loss": 4.0834, + "step": 291550 + }, + { + "epoch": 2.510330578512397, + "grad_norm": 2.265625, + "learning_rate": 9.96212542230611e-05, + "loss": 4.53, + "step": 291600 + }, + { + "epoch": 2.5107610192837466, + "grad_norm": 2.390625, + "learning_rate": 9.95941623942368e-05, + "loss": 4.3153, + "step": 291650 + }, + { + "epoch": 2.5111914600550964, + "grad_norm": 2.453125, + "learning_rate": 9.956707059520007e-05, + "loss": 3.8703, + "step": 291700 + }, + { + "epoch": 2.511621900826446, + "grad_norm": 4.09375, + "learning_rate": 9.953997882793943e-05, + "loss": 4.2936, + "step": 291750 + }, + { + "epoch": 2.512052341597796, + "grad_norm": 2.28125, + "learning_rate": 9.951288709444336e-05, + "loss": 4.3772, + "step": 291800 + }, + { + "epoch": 2.512482782369146, + "grad_norm": 1.75, + "learning_rate": 9.948579539670033e-05, + "loss": 4.2879, + "step": 291850 + }, + { + "epoch": 2.512913223140496, + "grad_norm": 3.03125, + "learning_rate": 9.945870373669883e-05, + "loss": 4.5433, + "step": 291900 + }, + { + "epoch": 2.513343663911846, + "grad_norm": 3.984375, + "learning_rate": 9.94316121164273e-05, + "loss": 4.6699, + "step": 291950 + }, + { + "epoch": 2.5137741046831956, + "grad_norm": 3.609375, + "learning_rate": 9.94045205378742e-05, + "loss": 4.537, + "step": 292000 + }, + { + "epoch": 2.5142045454545454, + "grad_norm": 3.21875, + "learning_rate": 9.937742900302801e-05, + "loss": 4.537, + "step": 292050 + }, + { + "epoch": 2.514634986225895, + "grad_norm": 4.5625, + "learning_rate": 9.935033751387721e-05, + "loss": 4.3589, + "step": 292100 + }, + { + "epoch": 2.5150654269972454, + "grad_norm": 3.21875, + "learning_rate": 9.932324607241028e-05, + "loss": 4.3449, + "step": 292150 + }, + { + "epoch": 2.5154958677685952, + "grad_norm": 2.84375, + "learning_rate": 9.929615468061556e-05, + "loss": 4.543, + "step": 292200 + }, + { + "epoch": 2.515926308539945, + "grad_norm": 3.671875, + "learning_rate": 9.926906334048158e-05, + "loss": 4.1635, + "step": 292250 + }, + { + "epoch": 2.516356749311295, + "grad_norm": 2.25, + "learning_rate": 9.924197205399682e-05, + "loss": 4.2854, + "step": 292300 + }, + { + "epoch": 2.5167871900826446, + "grad_norm": 3.5, + "learning_rate": 9.921488082314964e-05, + "loss": 4.3496, + "step": 292350 + }, + { + "epoch": 2.5172176308539944, + "grad_norm": 3.453125, + "learning_rate": 9.918778964992856e-05, + "loss": 4.4551, + "step": 292400 + }, + { + "epoch": 2.517648071625344, + "grad_norm": 1.75, + "learning_rate": 9.916069853632195e-05, + "loss": 4.4931, + "step": 292450 + }, + { + "epoch": 2.518078512396694, + "grad_norm": 2.375, + "learning_rate": 9.913360748431825e-05, + "loss": 4.0472, + "step": 292500 + }, + { + "epoch": 2.5185089531680442, + "grad_norm": 4.21875, + "learning_rate": 9.910651649590588e-05, + "loss": 3.9426, + "step": 292550 + }, + { + "epoch": 2.518939393939394, + "grad_norm": 2.328125, + "learning_rate": 9.907942557307331e-05, + "loss": 4.8473, + "step": 292600 + }, + { + "epoch": 2.519369834710744, + "grad_norm": 3.109375, + "learning_rate": 9.905233471780894e-05, + "loss": 4.7175, + "step": 292650 + }, + { + "epoch": 2.5198002754820936, + "grad_norm": 3.53125, + "learning_rate": 9.902524393210113e-05, + "loss": 4.1179, + "step": 292700 + }, + { + "epoch": 2.5202307162534434, + "grad_norm": 5.1875, + "learning_rate": 9.899815321793831e-05, + "loss": 4.4922, + "step": 292750 + }, + { + "epoch": 2.5206611570247937, + "grad_norm": 3.484375, + "learning_rate": 9.897106257730892e-05, + "loss": 4.6808, + "step": 292800 + }, + { + "epoch": 2.5210915977961434, + "grad_norm": 2.015625, + "learning_rate": 9.89439720122013e-05, + "loss": 4.5483, + "step": 292850 + }, + { + "epoch": 2.5215220385674932, + "grad_norm": 4.40625, + "learning_rate": 9.891688152460388e-05, + "loss": 4.3343, + "step": 292900 + }, + { + "epoch": 2.521952479338843, + "grad_norm": 5.96875, + "learning_rate": 9.888979111650501e-05, + "loss": 4.1144, + "step": 292950 + }, + { + "epoch": 2.522382920110193, + "grad_norm": 4.4375, + "learning_rate": 9.886270078989309e-05, + "loss": 4.2403, + "step": 293000 + }, + { + "epoch": 2.5228133608815426, + "grad_norm": 2.140625, + "learning_rate": 9.883561054675647e-05, + "loss": 4.4512, + "step": 293050 + }, + { + "epoch": 2.5232438016528924, + "grad_norm": 2.203125, + "learning_rate": 9.880852038908356e-05, + "loss": 4.4038, + "step": 293100 + }, + { + "epoch": 2.523674242424242, + "grad_norm": 2.390625, + "learning_rate": 9.87814303188627e-05, + "loss": 4.5217, + "step": 293150 + }, + { + "epoch": 2.5241046831955924, + "grad_norm": 1.8359375, + "learning_rate": 9.875434033808219e-05, + "loss": 4.3784, + "step": 293200 + }, + { + "epoch": 2.5245351239669422, + "grad_norm": 4.1875, + "learning_rate": 9.872725044873042e-05, + "loss": 4.6214, + "step": 293250 + }, + { + "epoch": 2.524965564738292, + "grad_norm": 3.046875, + "learning_rate": 9.870016065279575e-05, + "loss": 4.4362, + "step": 293300 + }, + { + "epoch": 2.525396005509642, + "grad_norm": 3.4375, + "learning_rate": 9.867307095226648e-05, + "loss": 4.4533, + "step": 293350 + }, + { + "epoch": 2.5258264462809916, + "grad_norm": 4.34375, + "learning_rate": 9.864598134913093e-05, + "loss": 4.121, + "step": 293400 + }, + { + "epoch": 2.526256887052342, + "grad_norm": 2.734375, + "learning_rate": 9.86188918453775e-05, + "loss": 4.3346, + "step": 293450 + }, + { + "epoch": 2.5266873278236917, + "grad_norm": 2.28125, + "learning_rate": 9.85918024429944e-05, + "loss": 4.4135, + "step": 293500 + }, + { + "epoch": 2.5271177685950414, + "grad_norm": 3.1875, + "learning_rate": 9.856471314396994e-05, + "loss": 4.0899, + "step": 293550 + }, + { + "epoch": 2.5275482093663912, + "grad_norm": 4.75, + "learning_rate": 9.853762395029247e-05, + "loss": 4.2385, + "step": 293600 + }, + { + "epoch": 2.527978650137741, + "grad_norm": 4.0625, + "learning_rate": 9.851053486395027e-05, + "loss": 4.1959, + "step": 293650 + }, + { + "epoch": 2.528409090909091, + "grad_norm": 2.890625, + "learning_rate": 9.848344588693161e-05, + "loss": 4.6997, + "step": 293700 + }, + { + "epoch": 2.5288395316804406, + "grad_norm": 1.9921875, + "learning_rate": 9.845635702122474e-05, + "loss": 4.1244, + "step": 293750 + }, + { + "epoch": 2.5292699724517904, + "grad_norm": 3.828125, + "learning_rate": 9.842926826881796e-05, + "loss": 4.0847, + "step": 293800 + }, + { + "epoch": 2.5297004132231407, + "grad_norm": 4.625, + "learning_rate": 9.840217963169947e-05, + "loss": 4.4527, + "step": 293850 + }, + { + "epoch": 2.5301308539944904, + "grad_norm": 2.28125, + "learning_rate": 9.837509111185758e-05, + "loss": 4.1335, + "step": 293900 + }, + { + "epoch": 2.5305612947658402, + "grad_norm": 2.890625, + "learning_rate": 9.834800271128054e-05, + "loss": 4.1885, + "step": 293950 + }, + { + "epoch": 2.53099173553719, + "grad_norm": 3.359375, + "learning_rate": 9.832091443195653e-05, + "loss": 4.0911, + "step": 294000 + }, + { + "epoch": 2.53099173553719, + "eval_loss": 5.008902072906494, + "eval_runtime": 24.6594, + "eval_samples_per_second": 25.954, + "eval_steps_per_second": 12.977, + "eval_tts_loss": 7.441221903996289, + "step": 294000 + }, + { + "epoch": 2.53142217630854, + "grad_norm": 3.5, + "learning_rate": 9.829382627587376e-05, + "loss": 4.6011, + "step": 294050 + }, + { + "epoch": 2.53185261707989, + "grad_norm": 1.9375, + "learning_rate": 9.826673824502046e-05, + "loss": 4.3629, + "step": 294100 + }, + { + "epoch": 2.53228305785124, + "grad_norm": 3.984375, + "learning_rate": 9.823965034138488e-05, + "loss": 4.542, + "step": 294150 + }, + { + "epoch": 2.5327134986225897, + "grad_norm": 2.359375, + "learning_rate": 9.82125625669552e-05, + "loss": 4.335, + "step": 294200 + }, + { + "epoch": 2.5331439393939394, + "grad_norm": 1.359375, + "learning_rate": 9.818547492371953e-05, + "loss": 4.2868, + "step": 294250 + }, + { + "epoch": 2.5335743801652892, + "grad_norm": 2.796875, + "learning_rate": 9.815838741366611e-05, + "loss": 4.1282, + "step": 294300 + }, + { + "epoch": 2.534004820936639, + "grad_norm": 0.73828125, + "learning_rate": 9.81313000387831e-05, + "loss": 4.3212, + "step": 294350 + }, + { + "epoch": 2.534435261707989, + "grad_norm": 3.265625, + "learning_rate": 9.810421280105863e-05, + "loss": 4.2713, + "step": 294400 + }, + { + "epoch": 2.5348657024793386, + "grad_norm": 2.53125, + "learning_rate": 9.80771257024809e-05, + "loss": 4.413, + "step": 294450 + }, + { + "epoch": 2.535296143250689, + "grad_norm": 4.71875, + "learning_rate": 9.805003874503796e-05, + "loss": 4.48, + "step": 294500 + }, + { + "epoch": 2.5357265840220387, + "grad_norm": 2.75, + "learning_rate": 9.802295193071803e-05, + "loss": 4.4144, + "step": 294550 + }, + { + "epoch": 2.5361570247933884, + "grad_norm": 2.390625, + "learning_rate": 9.799586526150913e-05, + "loss": 4.4056, + "step": 294600 + }, + { + "epoch": 2.5365874655647382, + "grad_norm": 3.15625, + "learning_rate": 9.796877873939943e-05, + "loss": 4.5464, + "step": 294650 + }, + { + "epoch": 2.537017906336088, + "grad_norm": 2.953125, + "learning_rate": 9.794169236637702e-05, + "loss": 4.303, + "step": 294700 + }, + { + "epoch": 2.5374483471074383, + "grad_norm": 2.15625, + "learning_rate": 9.791460614442992e-05, + "loss": 4.0937, + "step": 294750 + }, + { + "epoch": 2.537878787878788, + "grad_norm": 2.5, + "learning_rate": 9.788752007554625e-05, + "loss": 4.4437, + "step": 294800 + }, + { + "epoch": 2.538309228650138, + "grad_norm": 4.0625, + "learning_rate": 9.786043416171407e-05, + "loss": 4.3343, + "step": 294850 + }, + { + "epoch": 2.5387396694214877, + "grad_norm": 2.84375, + "learning_rate": 9.78333484049214e-05, + "loss": 5.0317, + "step": 294900 + }, + { + "epoch": 2.5391701101928374, + "grad_norm": 2.875, + "learning_rate": 9.780626280715635e-05, + "loss": 4.1116, + "step": 294950 + }, + { + "epoch": 2.5396005509641872, + "grad_norm": 4.65625, + "learning_rate": 9.777917737040684e-05, + "loss": 4.694, + "step": 295000 + }, + { + "epoch": 2.540030991735537, + "grad_norm": 2.453125, + "learning_rate": 9.775209209666095e-05, + "loss": 4.398, + "step": 295050 + }, + { + "epoch": 2.540461432506887, + "grad_norm": 3.09375, + "learning_rate": 9.772500698790665e-05, + "loss": 4.5284, + "step": 295100 + }, + { + "epoch": 2.540891873278237, + "grad_norm": 5.0, + "learning_rate": 9.769792204613196e-05, + "loss": 4.2916, + "step": 295150 + }, + { + "epoch": 2.541322314049587, + "grad_norm": 0.984375, + "learning_rate": 9.767083727332483e-05, + "loss": 4.2949, + "step": 295200 + }, + { + "epoch": 2.5417527548209367, + "grad_norm": 3.0, + "learning_rate": 9.764375267147326e-05, + "loss": 4.4463, + "step": 295250 + }, + { + "epoch": 2.5421831955922864, + "grad_norm": 3.4375, + "learning_rate": 9.761666824256512e-05, + "loss": 4.1976, + "step": 295300 + }, + { + "epoch": 2.5426136363636362, + "grad_norm": 4.59375, + "learning_rate": 9.758958398858845e-05, + "loss": 4.4993, + "step": 295350 + }, + { + "epoch": 2.5430440771349865, + "grad_norm": 3.34375, + "learning_rate": 9.756249991153109e-05, + "loss": 4.1564, + "step": 295400 + }, + { + "epoch": 2.5434745179063363, + "grad_norm": 2.28125, + "learning_rate": 9.7535416013381e-05, + "loss": 4.7065, + "step": 295450 + }, + { + "epoch": 2.543904958677686, + "grad_norm": 2.828125, + "learning_rate": 9.750833229612611e-05, + "loss": 4.4891, + "step": 295500 + }, + { + "epoch": 2.544335399449036, + "grad_norm": 3.34375, + "learning_rate": 9.748124876175426e-05, + "loss": 4.3254, + "step": 295550 + }, + { + "epoch": 2.5447658402203857, + "grad_norm": 1.6015625, + "learning_rate": 9.745416541225329e-05, + "loss": 4.2103, + "step": 295600 + }, + { + "epoch": 2.5451962809917354, + "grad_norm": 6.65625, + "learning_rate": 9.742708224961112e-05, + "loss": 4.2909, + "step": 295650 + }, + { + "epoch": 2.5456267217630852, + "grad_norm": 2.375, + "learning_rate": 9.739999927581558e-05, + "loss": 4.0355, + "step": 295700 + }, + { + "epoch": 2.546057162534435, + "grad_norm": 2.5, + "learning_rate": 9.737291649285455e-05, + "loss": 4.1197, + "step": 295750 + }, + { + "epoch": 2.5464876033057853, + "grad_norm": 4.0625, + "learning_rate": 9.734583390271573e-05, + "loss": 4.5149, + "step": 295800 + }, + { + "epoch": 2.546918044077135, + "grad_norm": 3.6875, + "learning_rate": 9.731875150738699e-05, + "loss": 4.6415, + "step": 295850 + }, + { + "epoch": 2.547348484848485, + "grad_norm": 4.3125, + "learning_rate": 9.729166930885616e-05, + "loss": 4.3265, + "step": 295900 + }, + { + "epoch": 2.5477789256198347, + "grad_norm": 3.78125, + "learning_rate": 9.726458730911093e-05, + "loss": 4.6339, + "step": 295950 + }, + { + "epoch": 2.5482093663911844, + "grad_norm": 3.703125, + "learning_rate": 9.723750551013916e-05, + "loss": 4.1391, + "step": 296000 + }, + { + "epoch": 2.5486398071625347, + "grad_norm": 2.734375, + "learning_rate": 9.721042391392852e-05, + "loss": 4.2668, + "step": 296050 + }, + { + "epoch": 2.5490702479338845, + "grad_norm": 3.078125, + "learning_rate": 9.718334252246676e-05, + "loss": 4.5348, + "step": 296100 + }, + { + "epoch": 2.5495006887052343, + "grad_norm": 4.875, + "learning_rate": 9.715626133774158e-05, + "loss": 4.3282, + "step": 296150 + }, + { + "epoch": 2.549931129476584, + "grad_norm": 2.921875, + "learning_rate": 9.712918036174072e-05, + "loss": 4.2539, + "step": 296200 + }, + { + "epoch": 2.550361570247934, + "grad_norm": 3.21875, + "learning_rate": 9.710209959645187e-05, + "loss": 4.851, + "step": 296250 + }, + { + "epoch": 2.5507920110192837, + "grad_norm": 2.234375, + "learning_rate": 9.707501904386264e-05, + "loss": 4.5381, + "step": 296300 + }, + { + "epoch": 2.5512224517906334, + "grad_norm": 2.171875, + "learning_rate": 9.704793870596071e-05, + "loss": 4.3863, + "step": 296350 + }, + { + "epoch": 2.5516528925619832, + "grad_norm": 3.546875, + "learning_rate": 9.702085858473375e-05, + "loss": 4.4474, + "step": 296400 + }, + { + "epoch": 2.5520833333333335, + "grad_norm": 3.15625, + "learning_rate": 9.699377868216932e-05, + "loss": 4.3004, + "step": 296450 + }, + { + "epoch": 2.5525137741046833, + "grad_norm": 2.375, + "learning_rate": 9.696669900025513e-05, + "loss": 4.4782, + "step": 296500 + }, + { + "epoch": 2.552944214876033, + "grad_norm": 3.296875, + "learning_rate": 9.693961954097866e-05, + "loss": 4.3658, + "step": 296550 + }, + { + "epoch": 2.553374655647383, + "grad_norm": 4.0, + "learning_rate": 9.691254030632756e-05, + "loss": 4.2612, + "step": 296600 + }, + { + "epoch": 2.5538050964187327, + "grad_norm": 5.59375, + "learning_rate": 9.688546129828933e-05, + "loss": 4.4855, + "step": 296650 + }, + { + "epoch": 2.554235537190083, + "grad_norm": 2.578125, + "learning_rate": 9.685838251885157e-05, + "loss": 4.4261, + "step": 296700 + }, + { + "epoch": 2.5546659779614327, + "grad_norm": 3.171875, + "learning_rate": 9.683130397000178e-05, + "loss": 4.4219, + "step": 296750 + }, + { + "epoch": 2.5550964187327825, + "grad_norm": 3.328125, + "learning_rate": 9.680422565372744e-05, + "loss": 4.3975, + "step": 296800 + }, + { + "epoch": 2.5555268595041323, + "grad_norm": 2.65625, + "learning_rate": 9.677714757201603e-05, + "loss": 4.1262, + "step": 296850 + }, + { + "epoch": 2.555957300275482, + "grad_norm": 3.265625, + "learning_rate": 9.67500697268551e-05, + "loss": 4.4234, + "step": 296900 + }, + { + "epoch": 2.556387741046832, + "grad_norm": 3.34375, + "learning_rate": 9.672299212023202e-05, + "loss": 4.505, + "step": 296950 + }, + { + "epoch": 2.5568181818181817, + "grad_norm": 2.359375, + "learning_rate": 9.669591475413429e-05, + "loss": 4.6743, + "step": 297000 + }, + { + "epoch": 2.5568181818181817, + "eval_loss": 5.007340431213379, + "eval_runtime": 24.2976, + "eval_samples_per_second": 26.34, + "eval_steps_per_second": 13.17, + "eval_tts_loss": 7.371025063424796, + "step": 297000 + }, + { + "epoch": 2.5572486225895315, + "grad_norm": 3.203125, + "learning_rate": 9.666883763054933e-05, + "loss": 4.1647, + "step": 297050 + }, + { + "epoch": 2.5576790633608817, + "grad_norm": 2.703125, + "learning_rate": 9.664176075146452e-05, + "loss": 4.2352, + "step": 297100 + }, + { + "epoch": 2.5581095041322315, + "grad_norm": 3.671875, + "learning_rate": 9.661468411886721e-05, + "loss": 4.6513, + "step": 297150 + }, + { + "epoch": 2.5585399449035813, + "grad_norm": 2.703125, + "learning_rate": 9.65876077347448e-05, + "loss": 4.4155, + "step": 297200 + }, + { + "epoch": 2.558970385674931, + "grad_norm": 2.0, + "learning_rate": 9.656053160108466e-05, + "loss": 4.3145, + "step": 297250 + }, + { + "epoch": 2.559400826446281, + "grad_norm": 3.03125, + "learning_rate": 9.653345571987413e-05, + "loss": 4.5716, + "step": 297300 + }, + { + "epoch": 2.559831267217631, + "grad_norm": 2.3125, + "learning_rate": 9.650638009310044e-05, + "loss": 3.7966, + "step": 297350 + }, + { + "epoch": 2.560261707988981, + "grad_norm": 2.9375, + "learning_rate": 9.647930472275097e-05, + "loss": 4.9684, + "step": 297400 + }, + { + "epoch": 2.5606921487603307, + "grad_norm": 2.8125, + "learning_rate": 9.645222961081292e-05, + "loss": 4.0982, + "step": 297450 + }, + { + "epoch": 2.5611225895316805, + "grad_norm": 4.0625, + "learning_rate": 9.64251547592736e-05, + "loss": 4.3799, + "step": 297500 + }, + { + "epoch": 2.5615530303030303, + "grad_norm": 3.765625, + "learning_rate": 9.639808017012027e-05, + "loss": 4.2548, + "step": 297550 + }, + { + "epoch": 2.56198347107438, + "grad_norm": 3.5625, + "learning_rate": 9.637100584534008e-05, + "loss": 4.3646, + "step": 297600 + }, + { + "epoch": 2.56241391184573, + "grad_norm": 3.328125, + "learning_rate": 9.634393178692023e-05, + "loss": 4.1925, + "step": 297650 + }, + { + "epoch": 2.5628443526170797, + "grad_norm": 2.453125, + "learning_rate": 9.631685799684792e-05, + "loss": 4.6156, + "step": 297700 + }, + { + "epoch": 2.56327479338843, + "grad_norm": 3.25, + "learning_rate": 9.628978447711032e-05, + "loss": 4.0818, + "step": 297750 + }, + { + "epoch": 2.5637052341597797, + "grad_norm": 2.53125, + "learning_rate": 9.62627112296946e-05, + "loss": 4.1033, + "step": 297800 + }, + { + "epoch": 2.5641356749311295, + "grad_norm": 1.40625, + "learning_rate": 9.623563825658778e-05, + "loss": 4.2805, + "step": 297850 + }, + { + "epoch": 2.5645661157024793, + "grad_norm": 1.6875, + "learning_rate": 9.620856555977701e-05, + "loss": 4.1623, + "step": 297900 + }, + { + "epoch": 2.564996556473829, + "grad_norm": 4.46875, + "learning_rate": 9.61814931412494e-05, + "loss": 4.4439, + "step": 297950 + }, + { + "epoch": 2.5654269972451793, + "grad_norm": 3.640625, + "learning_rate": 9.615442100299195e-05, + "loss": 4.342, + "step": 298000 + }, + { + "epoch": 2.565857438016529, + "grad_norm": 2.8125, + "learning_rate": 9.612734914699178e-05, + "loss": 4.2367, + "step": 298050 + }, + { + "epoch": 2.566287878787879, + "grad_norm": 2.3125, + "learning_rate": 9.610027757523579e-05, + "loss": 4.1732, + "step": 298100 + }, + { + "epoch": 2.5667183195592287, + "grad_norm": 4.40625, + "learning_rate": 9.607320628971106e-05, + "loss": 4.0481, + "step": 298150 + }, + { + "epoch": 2.5671487603305785, + "grad_norm": 1.5625, + "learning_rate": 9.604613529240453e-05, + "loss": 4.5346, + "step": 298200 + }, + { + "epoch": 2.5675792011019283, + "grad_norm": 6.0, + "learning_rate": 9.601906458530316e-05, + "loss": 4.3702, + "step": 298250 + }, + { + "epoch": 2.568009641873278, + "grad_norm": 2.96875, + "learning_rate": 9.599199417039393e-05, + "loss": 4.2452, + "step": 298300 + }, + { + "epoch": 2.568440082644628, + "grad_norm": 3.15625, + "learning_rate": 9.596492404966365e-05, + "loss": 4.4884, + "step": 298350 + }, + { + "epoch": 2.568870523415978, + "grad_norm": 6.28125, + "learning_rate": 9.593785422509927e-05, + "loss": 4.8544, + "step": 298400 + }, + { + "epoch": 2.569300964187328, + "grad_norm": 2.34375, + "learning_rate": 9.591078469868767e-05, + "loss": 4.5118, + "step": 298450 + }, + { + "epoch": 2.5697314049586777, + "grad_norm": 3.859375, + "learning_rate": 9.588371547241565e-05, + "loss": 4.3172, + "step": 298500 + }, + { + "epoch": 2.5701618457300275, + "grad_norm": 1.2734375, + "learning_rate": 9.585664654827011e-05, + "loss": 4.4204, + "step": 298550 + }, + { + "epoch": 2.5705922865013773, + "grad_norm": 2.390625, + "learning_rate": 9.582957792823773e-05, + "loss": 4.5699, + "step": 298600 + }, + { + "epoch": 2.5710227272727275, + "grad_norm": 5.53125, + "learning_rate": 9.58025096143054e-05, + "loss": 4.1956, + "step": 298650 + }, + { + "epoch": 2.5714531680440773, + "grad_norm": 1.6796875, + "learning_rate": 9.577544160845981e-05, + "loss": 4.45, + "step": 298700 + }, + { + "epoch": 2.571883608815427, + "grad_norm": 2.90625, + "learning_rate": 9.574837391268774e-05, + "loss": 4.5122, + "step": 298750 + }, + { + "epoch": 2.572314049586777, + "grad_norm": 1.3515625, + "learning_rate": 9.572130652897585e-05, + "loss": 4.0651, + "step": 298800 + }, + { + "epoch": 2.5727444903581267, + "grad_norm": 2.671875, + "learning_rate": 9.56942394593109e-05, + "loss": 4.397, + "step": 298850 + }, + { + "epoch": 2.5731749311294765, + "grad_norm": 4.96875, + "learning_rate": 9.566717270567946e-05, + "loss": 4.3147, + "step": 298900 + }, + { + "epoch": 2.5736053719008263, + "grad_norm": 2.625, + "learning_rate": 9.564010627006825e-05, + "loss": 4.6492, + "step": 298950 + }, + { + "epoch": 2.574035812672176, + "grad_norm": 4.21875, + "learning_rate": 9.561304015446384e-05, + "loss": 4.2182, + "step": 299000 + }, + { + "epoch": 2.5744662534435263, + "grad_norm": 3.015625, + "learning_rate": 9.558597436085285e-05, + "loss": 4.0775, + "step": 299050 + }, + { + "epoch": 2.574896694214876, + "grad_norm": 2.84375, + "learning_rate": 9.555890889122188e-05, + "loss": 4.2896, + "step": 299100 + }, + { + "epoch": 2.575327134986226, + "grad_norm": 3.171875, + "learning_rate": 9.553184374755741e-05, + "loss": 4.6974, + "step": 299150 + }, + { + "epoch": 2.5757575757575757, + "grad_norm": 2.953125, + "learning_rate": 9.550477893184598e-05, + "loss": 4.0553, + "step": 299200 + }, + { + "epoch": 2.5761880165289255, + "grad_norm": 3.171875, + "learning_rate": 9.547771444607411e-05, + "loss": 4.1128, + "step": 299250 + }, + { + "epoch": 2.5766184573002757, + "grad_norm": 3.078125, + "learning_rate": 9.54506502922283e-05, + "loss": 4.7973, + "step": 299300 + }, + { + "epoch": 2.5770488980716255, + "grad_norm": 2.6875, + "learning_rate": 9.542358647229497e-05, + "loss": 4.7141, + "step": 299350 + }, + { + "epoch": 2.5774793388429753, + "grad_norm": 3.390625, + "learning_rate": 9.539652298826052e-05, + "loss": 4.482, + "step": 299400 + }, + { + "epoch": 2.577909779614325, + "grad_norm": 3.296875, + "learning_rate": 9.536945984211136e-05, + "loss": 4.3513, + "step": 299450 + }, + { + "epoch": 2.578340220385675, + "grad_norm": 3.6875, + "learning_rate": 9.534239703583393e-05, + "loss": 4.7105, + "step": 299500 + }, + { + "epoch": 2.5787706611570247, + "grad_norm": 3.15625, + "learning_rate": 9.531533457141451e-05, + "loss": 4.2576, + "step": 299550 + }, + { + "epoch": 2.5792011019283745, + "grad_norm": 3.0, + "learning_rate": 9.528827245083948e-05, + "loss": 5.111, + "step": 299600 + }, + { + "epoch": 2.5796315426997243, + "grad_norm": 4.3125, + "learning_rate": 9.526121067609511e-05, + "loss": 4.373, + "step": 299650 + }, + { + "epoch": 2.5800619834710745, + "grad_norm": 0.640625, + "learning_rate": 9.523414924916765e-05, + "loss": 4.4143, + "step": 299700 + }, + { + "epoch": 2.5804924242424243, + "grad_norm": 3.140625, + "learning_rate": 9.520708817204338e-05, + "loss": 4.4131, + "step": 299750 + }, + { + "epoch": 2.580922865013774, + "grad_norm": 2.921875, + "learning_rate": 9.518002744670855e-05, + "loss": 4.2544, + "step": 299800 + }, + { + "epoch": 2.581353305785124, + "grad_norm": 3.015625, + "learning_rate": 9.515296707514936e-05, + "loss": 4.5132, + "step": 299850 + }, + { + "epoch": 2.5817837465564737, + "grad_norm": 2.515625, + "learning_rate": 9.51259070593519e-05, + "loss": 4.3841, + "step": 299900 + }, + { + "epoch": 2.582214187327824, + "grad_norm": 5.25, + "learning_rate": 9.50988474013024e-05, + "loss": 4.3082, + "step": 299950 + }, + { + "epoch": 2.5826446280991737, + "grad_norm": 3.078125, + "learning_rate": 9.507178810298696e-05, + "loss": 4.4722, + "step": 300000 + }, + { + "epoch": 2.5826446280991737, + "eval_loss": 5.005288124084473, + "eval_runtime": 24.7806, + "eval_samples_per_second": 25.827, + "eval_steps_per_second": 12.913, + "eval_tts_loss": 7.403621951830839, + "step": 300000 + }, + { + "epoch": 2.5830750688705235, + "grad_norm": 2.546875, + "learning_rate": 9.504472916639166e-05, + "loss": 4.026, + "step": 300050 + }, + { + "epoch": 2.5835055096418733, + "grad_norm": 2.953125, + "learning_rate": 9.50176705935026e-05, + "loss": 4.4608, + "step": 300100 + }, + { + "epoch": 2.583935950413223, + "grad_norm": 2.65625, + "learning_rate": 9.499061238630578e-05, + "loss": 4.0954, + "step": 300150 + }, + { + "epoch": 2.584366391184573, + "grad_norm": 2.109375, + "learning_rate": 9.496355454678724e-05, + "loss": 4.4024, + "step": 300200 + }, + { + "epoch": 2.5847968319559227, + "grad_norm": 1.8046875, + "learning_rate": 9.493649707693295e-05, + "loss": 4.1422, + "step": 300250 + }, + { + "epoch": 2.5852272727272725, + "grad_norm": 1.7734375, + "learning_rate": 9.490943997872887e-05, + "loss": 4.2486, + "step": 300300 + }, + { + "epoch": 2.5856577134986227, + "grad_norm": 4.03125, + "learning_rate": 9.4882383254161e-05, + "loss": 4.6009, + "step": 300350 + }, + { + "epoch": 2.5860881542699725, + "grad_norm": 0.9921875, + "learning_rate": 9.485532690521514e-05, + "loss": 4.3778, + "step": 300400 + }, + { + "epoch": 2.5865185950413223, + "grad_norm": 3.03125, + "learning_rate": 9.48282709338772e-05, + "loss": 4.1779, + "step": 300450 + }, + { + "epoch": 2.586949035812672, + "grad_norm": 2.96875, + "learning_rate": 9.480121534213307e-05, + "loss": 4.3436, + "step": 300500 + }, + { + "epoch": 2.587379476584022, + "grad_norm": 1.7890625, + "learning_rate": 9.477416013196854e-05, + "loss": 4.2219, + "step": 300550 + }, + { + "epoch": 2.587809917355372, + "grad_norm": 3.71875, + "learning_rate": 9.474710530536941e-05, + "loss": 4.4922, + "step": 300600 + }, + { + "epoch": 2.588240358126722, + "grad_norm": 4.34375, + "learning_rate": 9.472005086432149e-05, + "loss": 4.5593, + "step": 300650 + }, + { + "epoch": 2.5886707988980717, + "grad_norm": 4.28125, + "learning_rate": 9.469299681081046e-05, + "loss": 4.2956, + "step": 300700 + }, + { + "epoch": 2.5891012396694215, + "grad_norm": 5.96875, + "learning_rate": 9.466594314682201e-05, + "loss": 4.4582, + "step": 300750 + }, + { + "epoch": 2.5895316804407713, + "grad_norm": 3.359375, + "learning_rate": 9.463888987434187e-05, + "loss": 4.2782, + "step": 300800 + }, + { + "epoch": 2.589962121212121, + "grad_norm": 3.34375, + "learning_rate": 9.461183699535568e-05, + "loss": 4.3627, + "step": 300850 + }, + { + "epoch": 2.590392561983471, + "grad_norm": 6.59375, + "learning_rate": 9.458478451184911e-05, + "loss": 4.5938, + "step": 300900 + }, + { + "epoch": 2.5908230027548207, + "grad_norm": 1.7734375, + "learning_rate": 9.455773242580765e-05, + "loss": 4.3129, + "step": 300950 + }, + { + "epoch": 2.591253443526171, + "grad_norm": 1.7890625, + "learning_rate": 9.453068073921694e-05, + "loss": 4.263, + "step": 301000 + }, + { + "epoch": 2.5916838842975207, + "grad_norm": 3.375, + "learning_rate": 9.450362945406247e-05, + "loss": 4.3501, + "step": 301050 + }, + { + "epoch": 2.5921143250688705, + "grad_norm": 4.75, + "learning_rate": 9.447657857232977e-05, + "loss": 4.1093, + "step": 301100 + }, + { + "epoch": 2.5925447658402203, + "grad_norm": 5.125, + "learning_rate": 9.444952809600438e-05, + "loss": 4.3862, + "step": 301150 + }, + { + "epoch": 2.59297520661157, + "grad_norm": 3.21875, + "learning_rate": 9.442247802707166e-05, + "loss": 4.3417, + "step": 301200 + }, + { + "epoch": 2.5934056473829203, + "grad_norm": 2.4375, + "learning_rate": 9.439542836751703e-05, + "loss": 4.2905, + "step": 301250 + }, + { + "epoch": 2.59383608815427, + "grad_norm": 4.34375, + "learning_rate": 9.436837911932589e-05, + "loss": 3.9661, + "step": 301300 + }, + { + "epoch": 2.59426652892562, + "grad_norm": 2.46875, + "learning_rate": 9.434133028448362e-05, + "loss": 4.647, + "step": 301350 + }, + { + "epoch": 2.5946969696969697, + "grad_norm": 3.140625, + "learning_rate": 9.431428186497558e-05, + "loss": 4.1314, + "step": 301400 + }, + { + "epoch": 2.5951274104683195, + "grad_norm": 1.9609375, + "learning_rate": 9.428723386278695e-05, + "loss": 4.648, + "step": 301450 + }, + { + "epoch": 2.5955578512396693, + "grad_norm": 2.9375, + "learning_rate": 9.426018627990308e-05, + "loss": 4.2566, + "step": 301500 + }, + { + "epoch": 2.595988292011019, + "grad_norm": 5.5625, + "learning_rate": 9.42331391183092e-05, + "loss": 4.388, + "step": 301550 + }, + { + "epoch": 2.596418732782369, + "grad_norm": 4.40625, + "learning_rate": 9.420609237999048e-05, + "loss": 4.2848, + "step": 301600 + }, + { + "epoch": 2.596849173553719, + "grad_norm": 4.75, + "learning_rate": 9.417904606693216e-05, + "loss": 4.2478, + "step": 301650 + }, + { + "epoch": 2.597279614325069, + "grad_norm": 4.375, + "learning_rate": 9.41520001811193e-05, + "loss": 4.457, + "step": 301700 + }, + { + "epoch": 2.5977100550964187, + "grad_norm": 2.71875, + "learning_rate": 9.412495472453705e-05, + "loss": 4.2519, + "step": 301750 + }, + { + "epoch": 2.5981404958677685, + "grad_norm": 5.8125, + "learning_rate": 9.409790969917048e-05, + "loss": 4.4381, + "step": 301800 + }, + { + "epoch": 2.5985709366391183, + "grad_norm": 4.1875, + "learning_rate": 9.407086510700464e-05, + "loss": 4.5783, + "step": 301850 + }, + { + "epoch": 2.5990013774104685, + "grad_norm": 3.28125, + "learning_rate": 9.404382095002457e-05, + "loss": 4.3697, + "step": 301900 + }, + { + "epoch": 2.5994318181818183, + "grad_norm": 3.453125, + "learning_rate": 9.40167772302152e-05, + "loss": 4.6052, + "step": 301950 + }, + { + "epoch": 2.599862258953168, + "grad_norm": 3.125, + "learning_rate": 9.398973394956148e-05, + "loss": 4.3887, + "step": 302000 + }, + { + "epoch": 2.600292699724518, + "grad_norm": 3.875, + "learning_rate": 9.39626911100484e-05, + "loss": 4.4233, + "step": 302050 + }, + { + "epoch": 2.6007231404958677, + "grad_norm": 3.171875, + "learning_rate": 9.393564871366078e-05, + "loss": 4.5385, + "step": 302100 + }, + { + "epoch": 2.6011535812672175, + "grad_norm": 2.21875, + "learning_rate": 9.390860676238354e-05, + "loss": 3.8624, + "step": 302150 + }, + { + "epoch": 2.6015840220385673, + "grad_norm": 3.265625, + "learning_rate": 9.388156525820142e-05, + "loss": 4.312, + "step": 302200 + }, + { + "epoch": 2.602014462809917, + "grad_norm": 2.8125, + "learning_rate": 9.385452420309925e-05, + "loss": 4.5539, + "step": 302250 + }, + { + "epoch": 2.6024449035812673, + "grad_norm": 1.3515625, + "learning_rate": 9.382748359906176e-05, + "loss": 4.2204, + "step": 302300 + }, + { + "epoch": 2.602875344352617, + "grad_norm": 6.21875, + "learning_rate": 9.38004434480737e-05, + "loss": 4.3204, + "step": 302350 + }, + { + "epoch": 2.603305785123967, + "grad_norm": 1.21875, + "learning_rate": 9.37734037521198e-05, + "loss": 4.338, + "step": 302400 + }, + { + "epoch": 2.6037362258953167, + "grad_norm": 3.671875, + "learning_rate": 9.374636451318463e-05, + "loss": 4.3437, + "step": 302450 + }, + { + "epoch": 2.6041666666666665, + "grad_norm": 5.09375, + "learning_rate": 9.371932573325285e-05, + "loss": 4.466, + "step": 302500 + }, + { + "epoch": 2.6045971074380168, + "grad_norm": 2.703125, + "learning_rate": 9.369228741430906e-05, + "loss": 4.1902, + "step": 302550 + }, + { + "epoch": 2.6050275482093666, + "grad_norm": 3.140625, + "learning_rate": 9.366524955833779e-05, + "loss": 4.3701, + "step": 302600 + }, + { + "epoch": 2.6054579889807163, + "grad_norm": 4.6875, + "learning_rate": 9.363821216732356e-05, + "loss": 4.3691, + "step": 302650 + }, + { + "epoch": 2.605888429752066, + "grad_norm": 1.7421875, + "learning_rate": 9.361117524325091e-05, + "loss": 4.4504, + "step": 302700 + }, + { + "epoch": 2.606318870523416, + "grad_norm": 2.75, + "learning_rate": 9.358413878810424e-05, + "loss": 4.3391, + "step": 302750 + }, + { + "epoch": 2.6067493112947657, + "grad_norm": 2.71875, + "learning_rate": 9.355710280386796e-05, + "loss": 4.5579, + "step": 302800 + }, + { + "epoch": 2.6071797520661155, + "grad_norm": 2.796875, + "learning_rate": 9.353006729252646e-05, + "loss": 4.1653, + "step": 302850 + }, + { + "epoch": 2.6076101928374653, + "grad_norm": 7.125, + "learning_rate": 9.350303225606413e-05, + "loss": 4.2967, + "step": 302900 + }, + { + "epoch": 2.6080406336088156, + "grad_norm": 3.21875, + "learning_rate": 9.347599769646526e-05, + "loss": 4.4816, + "step": 302950 + }, + { + "epoch": 2.6084710743801653, + "grad_norm": 2.828125, + "learning_rate": 9.344896361571411e-05, + "loss": 4.0751, + "step": 303000 + }, + { + "epoch": 2.6084710743801653, + "eval_loss": 5.003540515899658, + "eval_runtime": 24.2401, + "eval_samples_per_second": 26.403, + "eval_steps_per_second": 13.201, + "eval_tts_loss": 7.406452009501407, + "step": 303000 + }, + { + "epoch": 2.608901515151515, + "grad_norm": 2.515625, + "learning_rate": 9.342193001579491e-05, + "loss": 3.9798, + "step": 303050 + }, + { + "epoch": 2.609331955922865, + "grad_norm": 2.453125, + "learning_rate": 9.339489689869192e-05, + "loss": 4.2675, + "step": 303100 + }, + { + "epoch": 2.6097623966942147, + "grad_norm": 3.78125, + "learning_rate": 9.336786426638927e-05, + "loss": 4.3256, + "step": 303150 + }, + { + "epoch": 2.610192837465565, + "grad_norm": 2.890625, + "learning_rate": 9.334083212087115e-05, + "loss": 4.5578, + "step": 303200 + }, + { + "epoch": 2.6106232782369148, + "grad_norm": 1.9375, + "learning_rate": 9.331380046412161e-05, + "loss": 4.2694, + "step": 303250 + }, + { + "epoch": 2.6110537190082646, + "grad_norm": 2.671875, + "learning_rate": 9.32867692981247e-05, + "loss": 3.8954, + "step": 303300 + }, + { + "epoch": 2.6114841597796143, + "grad_norm": 4.0625, + "learning_rate": 9.325973862486449e-05, + "loss": 4.3279, + "step": 303350 + }, + { + "epoch": 2.611914600550964, + "grad_norm": 3.140625, + "learning_rate": 9.323270844632498e-05, + "loss": 4.3109, + "step": 303400 + }, + { + "epoch": 2.612345041322314, + "grad_norm": 1.46875, + "learning_rate": 9.320567876449013e-05, + "loss": 4.4989, + "step": 303450 + }, + { + "epoch": 2.6127754820936637, + "grad_norm": 4.5625, + "learning_rate": 9.31786495813438e-05, + "loss": 4.5941, + "step": 303500 + }, + { + "epoch": 2.6132059228650135, + "grad_norm": 1.78125, + "learning_rate": 9.31516208988699e-05, + "loss": 4.352, + "step": 303550 + }, + { + "epoch": 2.6136363636363638, + "grad_norm": 4.6875, + "learning_rate": 9.312459271905234e-05, + "loss": 4.4323, + "step": 303600 + }, + { + "epoch": 2.6140668044077136, + "grad_norm": 1.5859375, + "learning_rate": 9.309756504387484e-05, + "loss": 4.2064, + "step": 303650 + }, + { + "epoch": 2.6144972451790633, + "grad_norm": 4.09375, + "learning_rate": 9.307053787532127e-05, + "loss": 4.3542, + "step": 303700 + }, + { + "epoch": 2.614927685950413, + "grad_norm": 2.875, + "learning_rate": 9.304351121537526e-05, + "loss": 4.3965, + "step": 303750 + }, + { + "epoch": 2.615358126721763, + "grad_norm": 4.15625, + "learning_rate": 9.301648506602057e-05, + "loss": 4.4847, + "step": 303800 + }, + { + "epoch": 2.615788567493113, + "grad_norm": 3.90625, + "learning_rate": 9.298945942924085e-05, + "loss": 3.9876, + "step": 303850 + }, + { + "epoch": 2.616219008264463, + "grad_norm": 2.796875, + "learning_rate": 9.296243430701975e-05, + "loss": 4.4824, + "step": 303900 + }, + { + "epoch": 2.6166494490358128, + "grad_norm": 4.875, + "learning_rate": 9.293540970134081e-05, + "loss": 4.5415, + "step": 303950 + }, + { + "epoch": 2.6170798898071626, + "grad_norm": 4.5625, + "learning_rate": 9.29083856141876e-05, + "loss": 4.3902, + "step": 304000 + }, + { + "epoch": 2.6175103305785123, + "grad_norm": 3.4375, + "learning_rate": 9.288136204754362e-05, + "loss": 4.1748, + "step": 304050 + }, + { + "epoch": 2.617940771349862, + "grad_norm": 3.40625, + "learning_rate": 9.285433900339235e-05, + "loss": 4.463, + "step": 304100 + }, + { + "epoch": 2.618371212121212, + "grad_norm": 3.109375, + "learning_rate": 9.282731648371721e-05, + "loss": 4.2591, + "step": 304150 + }, + { + "epoch": 2.6188016528925617, + "grad_norm": 4.84375, + "learning_rate": 9.280029449050167e-05, + "loss": 4.427, + "step": 304200 + }, + { + "epoch": 2.619232093663912, + "grad_norm": 2.359375, + "learning_rate": 9.277327302572895e-05, + "loss": 4.069, + "step": 304250 + }, + { + "epoch": 2.6196625344352618, + "grad_norm": 2.546875, + "learning_rate": 9.274625209138249e-05, + "loss": 4.5566, + "step": 304300 + }, + { + "epoch": 2.6200929752066116, + "grad_norm": 0.765625, + "learning_rate": 9.271923168944548e-05, + "loss": 4.4964, + "step": 304350 + }, + { + "epoch": 2.6205234159779613, + "grad_norm": 5.65625, + "learning_rate": 9.269221182190122e-05, + "loss": 4.4107, + "step": 304400 + }, + { + "epoch": 2.620953856749311, + "grad_norm": 3.359375, + "learning_rate": 9.266519249073289e-05, + "loss": 4.243, + "step": 304450 + }, + { + "epoch": 2.6213842975206614, + "grad_norm": 4.84375, + "learning_rate": 9.263817369792368e-05, + "loss": 4.6178, + "step": 304500 + }, + { + "epoch": 2.621814738292011, + "grad_norm": 3.625, + "learning_rate": 9.261115544545663e-05, + "loss": 4.0694, + "step": 304550 + }, + { + "epoch": 2.622245179063361, + "grad_norm": 1.2109375, + "learning_rate": 9.25841377353149e-05, + "loss": 4.5586, + "step": 304600 + }, + { + "epoch": 2.6226756198347108, + "grad_norm": 2.1875, + "learning_rate": 9.25571205694815e-05, + "loss": 4.3658, + "step": 304650 + }, + { + "epoch": 2.6231060606060606, + "grad_norm": 4.03125, + "learning_rate": 9.25301039499394e-05, + "loss": 4.7799, + "step": 304700 + }, + { + "epoch": 2.6235365013774103, + "grad_norm": 2.046875, + "learning_rate": 9.250308787867167e-05, + "loss": 4.1392, + "step": 304750 + }, + { + "epoch": 2.62396694214876, + "grad_norm": 3.40625, + "learning_rate": 9.247607235766112e-05, + "loss": 3.9677, + "step": 304800 + }, + { + "epoch": 2.62439738292011, + "grad_norm": 3.234375, + "learning_rate": 9.244905738889067e-05, + "loss": 4.5434, + "step": 304850 + }, + { + "epoch": 2.62482782369146, + "grad_norm": 3.5, + "learning_rate": 9.242204297434315e-05, + "loss": 4.2262, + "step": 304900 + }, + { + "epoch": 2.62525826446281, + "grad_norm": 4.5, + "learning_rate": 9.23950291160014e-05, + "loss": 4.6274, + "step": 304950 + }, + { + "epoch": 2.6256887052341598, + "grad_norm": 4.25, + "learning_rate": 9.236801581584815e-05, + "loss": 3.9821, + "step": 305000 + }, + { + "epoch": 2.6261191460055096, + "grad_norm": 1.953125, + "learning_rate": 9.234100307586609e-05, + "loss": 4.1777, + "step": 305050 + }, + { + "epoch": 2.6265495867768593, + "grad_norm": 3.265625, + "learning_rate": 9.231399089803793e-05, + "loss": 4.6839, + "step": 305100 + }, + { + "epoch": 2.6269800275482096, + "grad_norm": 2.609375, + "learning_rate": 9.228697928434633e-05, + "loss": 4.225, + "step": 305150 + }, + { + "epoch": 2.6274104683195594, + "grad_norm": 5.375, + "learning_rate": 9.225996823677381e-05, + "loss": 4.1882, + "step": 305200 + }, + { + "epoch": 2.627840909090909, + "grad_norm": 3.46875, + "learning_rate": 9.223295775730303e-05, + "loss": 4.1015, + "step": 305250 + }, + { + "epoch": 2.628271349862259, + "grad_norm": 2.453125, + "learning_rate": 9.220594784791638e-05, + "loss": 4.3895, + "step": 305300 + }, + { + "epoch": 2.6287017906336088, + "grad_norm": 2.484375, + "learning_rate": 9.217893851059642e-05, + "loss": 4.2169, + "step": 305350 + }, + { + "epoch": 2.6291322314049586, + "grad_norm": 2.828125, + "learning_rate": 9.215192974732552e-05, + "loss": 4.5952, + "step": 305400 + }, + { + "epoch": 2.6295626721763083, + "grad_norm": 3.28125, + "learning_rate": 9.212492156008614e-05, + "loss": 4.5745, + "step": 305450 + }, + { + "epoch": 2.629993112947658, + "grad_norm": 2.921875, + "learning_rate": 9.209791395086056e-05, + "loss": 4.4447, + "step": 305500 + }, + { + "epoch": 2.6304235537190084, + "grad_norm": 3.453125, + "learning_rate": 9.207090692163106e-05, + "loss": 4.5392, + "step": 305550 + }, + { + "epoch": 2.630853994490358, + "grad_norm": 2.953125, + "learning_rate": 9.204390047437994e-05, + "loss": 4.5285, + "step": 305600 + }, + { + "epoch": 2.631284435261708, + "grad_norm": 1.671875, + "learning_rate": 9.201689461108943e-05, + "loss": 4.3483, + "step": 305650 + }, + { + "epoch": 2.6317148760330578, + "grad_norm": 4.84375, + "learning_rate": 9.198988933374164e-05, + "loss": 4.5677, + "step": 305700 + }, + { + "epoch": 2.6321453168044076, + "grad_norm": 3.078125, + "learning_rate": 9.19628846443188e-05, + "loss": 3.7258, + "step": 305750 + }, + { + "epoch": 2.632575757575758, + "grad_norm": 4.53125, + "learning_rate": 9.193588054480289e-05, + "loss": 4.6498, + "step": 305800 + }, + { + "epoch": 2.6330061983471076, + "grad_norm": 3.265625, + "learning_rate": 9.190887703717601e-05, + "loss": 4.3877, + "step": 305850 + }, + { + "epoch": 2.6334366391184574, + "grad_norm": 3.203125, + "learning_rate": 9.188187412342013e-05, + "loss": 4.3732, + "step": 305900 + }, + { + "epoch": 2.633867079889807, + "grad_norm": 2.875, + "learning_rate": 9.185487180551723e-05, + "loss": 4.2059, + "step": 305950 + }, + { + "epoch": 2.634297520661157, + "grad_norm": 3.484375, + "learning_rate": 9.182787008544925e-05, + "loss": 4.5497, + "step": 306000 + }, + { + "epoch": 2.634297520661157, + "eval_loss": 5.004040718078613, + "eval_runtime": 24.771, + "eval_samples_per_second": 25.837, + "eval_steps_per_second": 12.918, + "eval_tts_loss": 7.402762199822888, + "step": 306000 + }, + { + "epoch": 2.6347279614325068, + "grad_norm": 1.875, + "learning_rate": 9.1800868965198e-05, + "loss": 3.8846, + "step": 306050 + }, + { + "epoch": 2.6351584022038566, + "grad_norm": 3.203125, + "learning_rate": 9.17738684467453e-05, + "loss": 4.1791, + "step": 306100 + }, + { + "epoch": 2.6355888429752063, + "grad_norm": 4.75, + "learning_rate": 9.174686853207299e-05, + "loss": 4.5738, + "step": 306150 + }, + { + "epoch": 2.6360192837465566, + "grad_norm": 1.71875, + "learning_rate": 9.171986922316275e-05, + "loss": 4.4295, + "step": 306200 + }, + { + "epoch": 2.6364497245179064, + "grad_norm": 6.9375, + "learning_rate": 9.169287052199629e-05, + "loss": 4.3924, + "step": 306250 + }, + { + "epoch": 2.636880165289256, + "grad_norm": 1.1640625, + "learning_rate": 9.166587243055531e-05, + "loss": 4.0633, + "step": 306300 + }, + { + "epoch": 2.637310606060606, + "grad_norm": 3.625, + "learning_rate": 9.163887495082133e-05, + "loss": 4.5509, + "step": 306350 + }, + { + "epoch": 2.6377410468319558, + "grad_norm": 1.453125, + "learning_rate": 9.161187808477593e-05, + "loss": 4.4327, + "step": 306400 + }, + { + "epoch": 2.638171487603306, + "grad_norm": 3.46875, + "learning_rate": 9.158488183440061e-05, + "loss": 4.0974, + "step": 306450 + }, + { + "epoch": 2.638601928374656, + "grad_norm": 3.125, + "learning_rate": 9.15578862016769e-05, + "loss": 4.3478, + "step": 306500 + }, + { + "epoch": 2.6390323691460056, + "grad_norm": 3.15625, + "learning_rate": 9.15308911885862e-05, + "loss": 4.5721, + "step": 306550 + }, + { + "epoch": 2.6394628099173554, + "grad_norm": 4.78125, + "learning_rate": 9.150389679710981e-05, + "loss": 4.5783, + "step": 306600 + }, + { + "epoch": 2.639893250688705, + "grad_norm": 3.15625, + "learning_rate": 9.147690302922913e-05, + "loss": 4.3638, + "step": 306650 + }, + { + "epoch": 2.640323691460055, + "grad_norm": 3.6875, + "learning_rate": 9.144990988692543e-05, + "loss": 4.6775, + "step": 306700 + }, + { + "epoch": 2.6407541322314048, + "grad_norm": 3.09375, + "learning_rate": 9.142291737217995e-05, + "loss": 4.2685, + "step": 306750 + }, + { + "epoch": 2.6411845730027546, + "grad_norm": 4.84375, + "learning_rate": 9.139592548697391e-05, + "loss": 4.3172, + "step": 306800 + }, + { + "epoch": 2.641615013774105, + "grad_norm": 3.96875, + "learning_rate": 9.136893423328841e-05, + "loss": 4.1565, + "step": 306850 + }, + { + "epoch": 2.6420454545454546, + "grad_norm": 5.125, + "learning_rate": 9.134194361310454e-05, + "loss": 4.5736, + "step": 306900 + }, + { + "epoch": 2.6424758953168044, + "grad_norm": 3.59375, + "learning_rate": 9.131495362840339e-05, + "loss": 4.1663, + "step": 306950 + }, + { + "epoch": 2.642906336088154, + "grad_norm": 3.375, + "learning_rate": 9.128796428116597e-05, + "loss": 4.481, + "step": 307000 + }, + { + "epoch": 2.643336776859504, + "grad_norm": 3.21875, + "learning_rate": 9.126097557337326e-05, + "loss": 4.3028, + "step": 307050 + }, + { + "epoch": 2.643767217630854, + "grad_norm": 3.90625, + "learning_rate": 9.12339875070061e-05, + "loss": 4.3013, + "step": 307100 + }, + { + "epoch": 2.644197658402204, + "grad_norm": 4.125, + "learning_rate": 9.120700008404537e-05, + "loss": 4.3392, + "step": 307150 + }, + { + "epoch": 2.644628099173554, + "grad_norm": 2.09375, + "learning_rate": 9.118001330647195e-05, + "loss": 4.4566, + "step": 307200 + }, + { + "epoch": 2.6450585399449036, + "grad_norm": 3.0, + "learning_rate": 9.115302717626657e-05, + "loss": 4.1723, + "step": 307250 + }, + { + "epoch": 2.6454889807162534, + "grad_norm": 3.4375, + "learning_rate": 9.112604169540999e-05, + "loss": 4.3946, + "step": 307300 + }, + { + "epoch": 2.645919421487603, + "grad_norm": 3.296875, + "learning_rate": 9.109905686588281e-05, + "loss": 4.4125, + "step": 307350 + }, + { + "epoch": 2.646349862258953, + "grad_norm": 2.078125, + "learning_rate": 9.107207268966573e-05, + "loss": 4.5274, + "step": 307400 + }, + { + "epoch": 2.6467803030303028, + "grad_norm": 3.203125, + "learning_rate": 9.10450891687393e-05, + "loss": 4.5369, + "step": 307450 + }, + { + "epoch": 2.647210743801653, + "grad_norm": 0.94921875, + "learning_rate": 9.101810630508407e-05, + "loss": 4.0234, + "step": 307500 + }, + { + "epoch": 2.647641184573003, + "grad_norm": 4.15625, + "learning_rate": 9.09911241006805e-05, + "loss": 4.0093, + "step": 307550 + }, + { + "epoch": 2.6480716253443526, + "grad_norm": 3.59375, + "learning_rate": 9.096414255750904e-05, + "loss": 4.5366, + "step": 307600 + }, + { + "epoch": 2.6485020661157024, + "grad_norm": 3.078125, + "learning_rate": 9.093716167755006e-05, + "loss": 4.8575, + "step": 307650 + }, + { + "epoch": 2.6489325068870526, + "grad_norm": 5.125, + "learning_rate": 9.091018146278394e-05, + "loss": 4.5015, + "step": 307700 + }, + { + "epoch": 2.6493629476584024, + "grad_norm": 3.875, + "learning_rate": 9.088320191519092e-05, + "loss": 4.2045, + "step": 307750 + }, + { + "epoch": 2.649793388429752, + "grad_norm": 2.625, + "learning_rate": 9.08562230367513e-05, + "loss": 4.1992, + "step": 307800 + }, + { + "epoch": 2.650223829201102, + "grad_norm": 4.75, + "learning_rate": 9.082924482944519e-05, + "loss": 4.6196, + "step": 307850 + }, + { + "epoch": 2.650654269972452, + "grad_norm": 3.171875, + "learning_rate": 9.08022672952528e-05, + "loss": 4.1372, + "step": 307900 + }, + { + "epoch": 2.6510847107438016, + "grad_norm": 2.671875, + "learning_rate": 9.077529043615421e-05, + "loss": 4.1683, + "step": 307950 + }, + { + "epoch": 2.6515151515151514, + "grad_norm": 1.953125, + "learning_rate": 9.074831425412943e-05, + "loss": 4.5398, + "step": 308000 + }, + { + "epoch": 2.651945592286501, + "grad_norm": 2.6875, + "learning_rate": 9.072133875115849e-05, + "loss": 4.5371, + "step": 308050 + }, + { + "epoch": 2.652376033057851, + "grad_norm": 4.03125, + "learning_rate": 9.069436392922135e-05, + "loss": 4.1481, + "step": 308100 + }, + { + "epoch": 2.652806473829201, + "grad_norm": 2.921875, + "learning_rate": 9.066738979029784e-05, + "loss": 4.3286, + "step": 308150 + }, + { + "epoch": 2.653236914600551, + "grad_norm": 5.125, + "learning_rate": 9.064041633636783e-05, + "loss": 4.01, + "step": 308200 + }, + { + "epoch": 2.653667355371901, + "grad_norm": 2.8125, + "learning_rate": 9.061344356941116e-05, + "loss": 4.4116, + "step": 308250 + }, + { + "epoch": 2.6540977961432506, + "grad_norm": 1.8359375, + "learning_rate": 9.05864714914075e-05, + "loss": 4.0769, + "step": 308300 + }, + { + "epoch": 2.654528236914601, + "grad_norm": 1.625, + "learning_rate": 9.055950010433663e-05, + "loss": 4.2289, + "step": 308350 + }, + { + "epoch": 2.6549586776859506, + "grad_norm": 3.5, + "learning_rate": 9.053252941017811e-05, + "loss": 4.2436, + "step": 308400 + }, + { + "epoch": 2.6553891184573004, + "grad_norm": 3.484375, + "learning_rate": 9.050555941091156e-05, + "loss": 4.5338, + "step": 308450 + }, + { + "epoch": 2.65581955922865, + "grad_norm": 2.328125, + "learning_rate": 9.04785901085165e-05, + "loss": 3.9787, + "step": 308500 + }, + { + "epoch": 2.65625, + "grad_norm": 2.359375, + "learning_rate": 9.045162150497248e-05, + "loss": 4.3154, + "step": 308550 + }, + { + "epoch": 2.65668044077135, + "grad_norm": 2.578125, + "learning_rate": 9.04246536022589e-05, + "loss": 4.209, + "step": 308600 + }, + { + "epoch": 2.6571108815426996, + "grad_norm": 5.15625, + "learning_rate": 9.03976864023551e-05, + "loss": 4.3636, + "step": 308650 + }, + { + "epoch": 2.6575413223140494, + "grad_norm": 2.421875, + "learning_rate": 9.037071990724049e-05, + "loss": 4.2426, + "step": 308700 + }, + { + "epoch": 2.657971763085399, + "grad_norm": 5.0625, + "learning_rate": 9.034375411889431e-05, + "loss": 4.1477, + "step": 308750 + }, + { + "epoch": 2.6584022038567494, + "grad_norm": 2.765625, + "learning_rate": 9.03167890392958e-05, + "loss": 4.0072, + "step": 308800 + }, + { + "epoch": 2.658832644628099, + "grad_norm": 2.9375, + "learning_rate": 9.028982467042417e-05, + "loss": 4.3343, + "step": 308850 + }, + { + "epoch": 2.659263085399449, + "grad_norm": 1.96875, + "learning_rate": 9.026286101425846e-05, + "loss": 4.3219, + "step": 308900 + }, + { + "epoch": 2.659693526170799, + "grad_norm": 2.46875, + "learning_rate": 9.023589807277785e-05, + "loss": 4.4663, + "step": 308950 + }, + { + "epoch": 2.660123966942149, + "grad_norm": 3.59375, + "learning_rate": 9.020893584796129e-05, + "loss": 4.3882, + "step": 309000 + }, + { + "epoch": 2.660123966942149, + "eval_loss": 5.002894401550293, + "eval_runtime": 25.5644, + "eval_samples_per_second": 25.035, + "eval_steps_per_second": 12.517, + "eval_tts_loss": 7.4085076570409605, + "step": 309000 + }, + { + "epoch": 2.660554407713499, + "grad_norm": 4.4375, + "learning_rate": 9.018197434178779e-05, + "loss": 4.4659, + "step": 309050 + }, + { + "epoch": 2.6609848484848486, + "grad_norm": 3.28125, + "learning_rate": 9.015501355623627e-05, + "loss": 4.2047, + "step": 309100 + }, + { + "epoch": 2.6614152892561984, + "grad_norm": 4.96875, + "learning_rate": 9.012805349328553e-05, + "loss": 4.5465, + "step": 309150 + }, + { + "epoch": 2.661845730027548, + "grad_norm": 2.40625, + "learning_rate": 9.010109415491441e-05, + "loss": 4.293, + "step": 309200 + }, + { + "epoch": 2.662276170798898, + "grad_norm": 4.125, + "learning_rate": 9.007413554310172e-05, + "loss": 4.6997, + "step": 309250 + }, + { + "epoch": 2.662706611570248, + "grad_norm": 1.6953125, + "learning_rate": 9.00471776598261e-05, + "loss": 4.3926, + "step": 309300 + }, + { + "epoch": 2.6631370523415976, + "grad_norm": 2.0625, + "learning_rate": 9.002022050706627e-05, + "loss": 4.2365, + "step": 309350 + }, + { + "epoch": 2.6635674931129474, + "grad_norm": 1.15625, + "learning_rate": 8.999326408680073e-05, + "loss": 4.0975, + "step": 309400 + }, + { + "epoch": 2.6639979338842976, + "grad_norm": 1.9921875, + "learning_rate": 8.99663084010081e-05, + "loss": 4.5361, + "step": 309450 + }, + { + "epoch": 2.6644283746556474, + "grad_norm": 2.390625, + "learning_rate": 8.993935345166683e-05, + "loss": 4.0661, + "step": 309500 + }, + { + "epoch": 2.664858815426997, + "grad_norm": 4.25, + "learning_rate": 8.991239924075538e-05, + "loss": 4.163, + "step": 309550 + }, + { + "epoch": 2.665289256198347, + "grad_norm": 2.6875, + "learning_rate": 8.988544577025214e-05, + "loss": 4.4862, + "step": 309600 + }, + { + "epoch": 2.6657196969696972, + "grad_norm": 2.65625, + "learning_rate": 8.985849304213542e-05, + "loss": 4.3983, + "step": 309650 + }, + { + "epoch": 2.666150137741047, + "grad_norm": 3.265625, + "learning_rate": 8.983154105838345e-05, + "loss": 4.2226, + "step": 309700 + }, + { + "epoch": 2.666580578512397, + "grad_norm": 3.78125, + "learning_rate": 8.980458982097453e-05, + "loss": 4.4132, + "step": 309750 + }, + { + "epoch": 2.6670110192837466, + "grad_norm": 2.78125, + "learning_rate": 8.977763933188674e-05, + "loss": 4.3468, + "step": 309800 + }, + { + "epoch": 2.6674414600550964, + "grad_norm": 2.84375, + "learning_rate": 8.975068959309824e-05, + "loss": 4.3531, + "step": 309850 + }, + { + "epoch": 2.667871900826446, + "grad_norm": 3.515625, + "learning_rate": 8.972374060658712e-05, + "loss": 4.5711, + "step": 309900 + }, + { + "epoch": 2.668302341597796, + "grad_norm": 3.28125, + "learning_rate": 8.96967923743313e-05, + "loss": 3.7813, + "step": 309950 + }, + { + "epoch": 2.668732782369146, + "grad_norm": 1.4765625, + "learning_rate": 8.966984489830872e-05, + "loss": 4.4286, + "step": 310000 + }, + { + "epoch": 2.669163223140496, + "grad_norm": 3.90625, + "learning_rate": 8.964289818049731e-05, + "loss": 4.2551, + "step": 310050 + }, + { + "epoch": 2.669593663911846, + "grad_norm": 2.5625, + "learning_rate": 8.96159522228749e-05, + "loss": 4.4058, + "step": 310100 + }, + { + "epoch": 2.6700241046831956, + "grad_norm": 2.578125, + "learning_rate": 8.958900702741931e-05, + "loss": 4.2769, + "step": 310150 + }, + { + "epoch": 2.6704545454545454, + "grad_norm": 3.078125, + "learning_rate": 8.956206259610813e-05, + "loss": 4.1848, + "step": 310200 + }, + { + "epoch": 2.670884986225895, + "grad_norm": 4.1875, + "learning_rate": 8.95351189309191e-05, + "loss": 4.6105, + "step": 310250 + }, + { + "epoch": 2.6713154269972454, + "grad_norm": 2.203125, + "learning_rate": 8.950817603382986e-05, + "loss": 4.3688, + "step": 310300 + }, + { + "epoch": 2.6717458677685952, + "grad_norm": 2.6875, + "learning_rate": 8.94812339068179e-05, + "loss": 4.1359, + "step": 310350 + }, + { + "epoch": 2.672176308539945, + "grad_norm": 3.359375, + "learning_rate": 8.945429255186078e-05, + "loss": 4.2189, + "step": 310400 + }, + { + "epoch": 2.672606749311295, + "grad_norm": 3.203125, + "learning_rate": 8.942735197093587e-05, + "loss": 4.7217, + "step": 310450 + }, + { + "epoch": 2.6730371900826446, + "grad_norm": 2.265625, + "learning_rate": 8.940041216602057e-05, + "loss": 4.417, + "step": 310500 + }, + { + "epoch": 2.6734676308539944, + "grad_norm": 2.828125, + "learning_rate": 8.93734731390922e-05, + "loss": 4.0018, + "step": 310550 + }, + { + "epoch": 2.673898071625344, + "grad_norm": 2.21875, + "learning_rate": 8.934653489212805e-05, + "loss": 4.4157, + "step": 310600 + }, + { + "epoch": 2.674328512396694, + "grad_norm": 2.5, + "learning_rate": 8.931959742710535e-05, + "loss": 4.2251, + "step": 310650 + }, + { + "epoch": 2.6747589531680442, + "grad_norm": 6.15625, + "learning_rate": 8.929266074600118e-05, + "loss": 4.4576, + "step": 310700 + }, + { + "epoch": 2.675189393939394, + "grad_norm": 2.578125, + "learning_rate": 8.926572485079268e-05, + "loss": 4.0139, + "step": 310750 + }, + { + "epoch": 2.675619834710744, + "grad_norm": 2.15625, + "learning_rate": 8.923878974345688e-05, + "loss": 4.3964, + "step": 310800 + }, + { + "epoch": 2.6760502754820936, + "grad_norm": 1.0078125, + "learning_rate": 8.921185542597076e-05, + "loss": 4.3175, + "step": 310850 + }, + { + "epoch": 2.6764807162534434, + "grad_norm": 1.3515625, + "learning_rate": 8.918492190031126e-05, + "loss": 4.1622, + "step": 310900 + }, + { + "epoch": 2.6769111570247937, + "grad_norm": 2.6875, + "learning_rate": 8.915798916845521e-05, + "loss": 4.2837, + "step": 310950 + }, + { + "epoch": 2.6773415977961434, + "grad_norm": 1.5234375, + "learning_rate": 8.913105723237942e-05, + "loss": 4.3683, + "step": 311000 + }, + { + "epoch": 2.6777720385674932, + "grad_norm": 1.4453125, + "learning_rate": 8.910412609406064e-05, + "loss": 3.8637, + "step": 311050 + }, + { + "epoch": 2.678202479338843, + "grad_norm": 4.28125, + "learning_rate": 8.907719575547558e-05, + "loss": 4.5455, + "step": 311100 + }, + { + "epoch": 2.678632920110193, + "grad_norm": 3.8125, + "learning_rate": 8.905026621860086e-05, + "loss": 4.5472, + "step": 311150 + }, + { + "epoch": 2.6790633608815426, + "grad_norm": 2.046875, + "learning_rate": 8.902333748541303e-05, + "loss": 4.1928, + "step": 311200 + }, + { + "epoch": 2.6794938016528924, + "grad_norm": 3.140625, + "learning_rate": 8.899640955788858e-05, + "loss": 4.2625, + "step": 311250 + }, + { + "epoch": 2.679924242424242, + "grad_norm": 4.125, + "learning_rate": 8.896948243800402e-05, + "loss": 4.4184, + "step": 311300 + }, + { + "epoch": 2.6803546831955924, + "grad_norm": 3.671875, + "learning_rate": 8.89425561277357e-05, + "loss": 4.6541, + "step": 311350 + }, + { + "epoch": 2.6807851239669422, + "grad_norm": 3.484375, + "learning_rate": 8.891563062906002e-05, + "loss": 4.4166, + "step": 311400 + }, + { + "epoch": 2.681215564738292, + "grad_norm": 1.015625, + "learning_rate": 8.888870594395315e-05, + "loss": 4.1524, + "step": 311450 + }, + { + "epoch": 2.681646005509642, + "grad_norm": 5.625, + "learning_rate": 8.886178207439139e-05, + "loss": 4.4343, + "step": 311500 + }, + { + "epoch": 2.6820764462809916, + "grad_norm": 3.109375, + "learning_rate": 8.883485902235082e-05, + "loss": 4.2866, + "step": 311550 + }, + { + "epoch": 2.682506887052342, + "grad_norm": 4.25, + "learning_rate": 8.88079367898076e-05, + "loss": 4.5307, + "step": 311600 + }, + { + "epoch": 2.6829373278236917, + "grad_norm": 2.515625, + "learning_rate": 8.878101537873774e-05, + "loss": 4.6749, + "step": 311650 + }, + { + "epoch": 2.6833677685950414, + "grad_norm": 3.203125, + "learning_rate": 8.875409479111725e-05, + "loss": 4.3074, + "step": 311700 + }, + { + "epoch": 2.6837982093663912, + "grad_norm": 2.5, + "learning_rate": 8.872717502892194e-05, + "loss": 4.3442, + "step": 311750 + }, + { + "epoch": 2.684228650137741, + "grad_norm": 3.40625, + "learning_rate": 8.870025609412776e-05, + "loss": 4.1657, + "step": 311800 + }, + { + "epoch": 2.684659090909091, + "grad_norm": 4.125, + "learning_rate": 8.867333798871047e-05, + "loss": 4.5745, + "step": 311850 + }, + { + "epoch": 2.6850895316804406, + "grad_norm": 2.875, + "learning_rate": 8.86464207146458e-05, + "loss": 4.3193, + "step": 311900 + }, + { + "epoch": 2.6855199724517904, + "grad_norm": 2.40625, + "learning_rate": 8.861950427390944e-05, + "loss": 4.3696, + "step": 311950 + }, + { + "epoch": 2.6859504132231407, + "grad_norm": 3.21875, + "learning_rate": 8.859258866847697e-05, + "loss": 4.2616, + "step": 312000 + }, + { + "epoch": 2.6859504132231407, + "eval_loss": 5.001897811889648, + "eval_runtime": 24.5929, + "eval_samples_per_second": 26.024, + "eval_steps_per_second": 13.012, + "eval_tts_loss": 7.426488727864566, + "step": 312000 + }, + { + "epoch": 2.6863808539944904, + "grad_norm": 1.1875, + "learning_rate": 8.856567390032393e-05, + "loss": 4.5049, + "step": 312050 + }, + { + "epoch": 2.6868112947658402, + "grad_norm": 6.40625, + "learning_rate": 8.853875997142582e-05, + "loss": 4.7234, + "step": 312100 + }, + { + "epoch": 2.68724173553719, + "grad_norm": 4.8125, + "learning_rate": 8.851184688375809e-05, + "loss": 4.2044, + "step": 312150 + }, + { + "epoch": 2.68767217630854, + "grad_norm": 4.0, + "learning_rate": 8.848493463929611e-05, + "loss": 4.3651, + "step": 312200 + }, + { + "epoch": 2.68810261707989, + "grad_norm": 5.34375, + "learning_rate": 8.84580232400151e-05, + "loss": 4.4014, + "step": 312250 + }, + { + "epoch": 2.68853305785124, + "grad_norm": 1.75, + "learning_rate": 8.843111268789034e-05, + "loss": 4.4415, + "step": 312300 + }, + { + "epoch": 2.6889634986225897, + "grad_norm": 4.25, + "learning_rate": 8.840420298489707e-05, + "loss": 4.3472, + "step": 312350 + }, + { + "epoch": 2.6893939393939394, + "grad_norm": 2.84375, + "learning_rate": 8.83772941330103e-05, + "loss": 4.4325, + "step": 312400 + }, + { + "epoch": 2.6898243801652892, + "grad_norm": 2.546875, + "learning_rate": 8.83503861342052e-05, + "loss": 4.6666, + "step": 312450 + }, + { + "epoch": 2.690254820936639, + "grad_norm": 2.90625, + "learning_rate": 8.832347899045663e-05, + "loss": 4.1108, + "step": 312500 + }, + { + "epoch": 2.690685261707989, + "grad_norm": 2.25, + "learning_rate": 8.829657270373963e-05, + "loss": 4.1628, + "step": 312550 + }, + { + "epoch": 2.6911157024793386, + "grad_norm": 3.875, + "learning_rate": 8.826966727602895e-05, + "loss": 4.2323, + "step": 312600 + }, + { + "epoch": 2.691546143250689, + "grad_norm": 2.953125, + "learning_rate": 8.824276270929951e-05, + "loss": 4.3412, + "step": 312650 + }, + { + "epoch": 2.6919765840220387, + "grad_norm": 2.890625, + "learning_rate": 8.821585900552601e-05, + "loss": 4.4091, + "step": 312700 + }, + { + "epoch": 2.6924070247933884, + "grad_norm": 4.6875, + "learning_rate": 8.818895616668304e-05, + "loss": 4.3031, + "step": 312750 + }, + { + "epoch": 2.6928374655647382, + "grad_norm": 4.375, + "learning_rate": 8.816205419474528e-05, + "loss": 4.247, + "step": 312800 + }, + { + "epoch": 2.693267906336088, + "grad_norm": 2.71875, + "learning_rate": 8.81351530916873e-05, + "loss": 4.325, + "step": 312850 + }, + { + "epoch": 2.6936983471074383, + "grad_norm": 4.9375, + "learning_rate": 8.810825285948355e-05, + "loss": 4.1087, + "step": 312900 + }, + { + "epoch": 2.694128787878788, + "grad_norm": 4.75, + "learning_rate": 8.808135350010847e-05, + "loss": 4.6989, + "step": 312950 + }, + { + "epoch": 2.694559228650138, + "grad_norm": 3.671875, + "learning_rate": 8.805445501553638e-05, + "loss": 4.1459, + "step": 313000 + }, + { + "epoch": 2.6949896694214877, + "grad_norm": 2.703125, + "learning_rate": 8.802755740774159e-05, + "loss": 4.3638, + "step": 313050 + }, + { + "epoch": 2.6954201101928374, + "grad_norm": 1.703125, + "learning_rate": 8.800066067869831e-05, + "loss": 4.018, + "step": 313100 + }, + { + "epoch": 2.6958505509641872, + "grad_norm": 2.78125, + "learning_rate": 8.797376483038073e-05, + "loss": 4.4241, + "step": 313150 + }, + { + "epoch": 2.696280991735537, + "grad_norm": 1.9609375, + "learning_rate": 8.794686986476297e-05, + "loss": 4.2152, + "step": 313200 + }, + { + "epoch": 2.696711432506887, + "grad_norm": 2.015625, + "learning_rate": 8.7919975783819e-05, + "loss": 4.6765, + "step": 313250 + }, + { + "epoch": 2.697141873278237, + "grad_norm": 3.796875, + "learning_rate": 8.789308258952282e-05, + "loss": 4.2798, + "step": 313300 + }, + { + "epoch": 2.697572314049587, + "grad_norm": 2.421875, + "learning_rate": 8.786619028384833e-05, + "loss": 4.7068, + "step": 313350 + }, + { + "epoch": 2.6980027548209367, + "grad_norm": 3.015625, + "learning_rate": 8.783929886876936e-05, + "loss": 4.0879, + "step": 313400 + }, + { + "epoch": 2.6984331955922864, + "grad_norm": 3.28125, + "learning_rate": 8.781240834625968e-05, + "loss": 4.5351, + "step": 313450 + }, + { + "epoch": 2.6988636363636362, + "grad_norm": 2.515625, + "learning_rate": 8.778551871829305e-05, + "loss": 4.3604, + "step": 313500 + }, + { + "epoch": 2.6992940771349865, + "grad_norm": 3.984375, + "learning_rate": 8.775862998684304e-05, + "loss": 4.4568, + "step": 313550 + }, + { + "epoch": 2.6997245179063363, + "grad_norm": 3.015625, + "learning_rate": 8.773174215388323e-05, + "loss": 4.458, + "step": 313600 + }, + { + "epoch": 2.700154958677686, + "grad_norm": 2.625, + "learning_rate": 8.770485522138716e-05, + "loss": 4.5919, + "step": 313650 + }, + { + "epoch": 2.700585399449036, + "grad_norm": 2.546875, + "learning_rate": 8.767796919132827e-05, + "loss": 4.2773, + "step": 313700 + }, + { + "epoch": 2.7010158402203857, + "grad_norm": 2.734375, + "learning_rate": 8.765108406567997e-05, + "loss": 4.3466, + "step": 313750 + }, + { + "epoch": 2.7014462809917354, + "grad_norm": 2.59375, + "learning_rate": 8.762419984641547e-05, + "loss": 4.325, + "step": 313800 + }, + { + "epoch": 2.7018767217630852, + "grad_norm": 2.078125, + "learning_rate": 8.759731653550807e-05, + "loss": 4.3278, + "step": 313850 + }, + { + "epoch": 2.702307162534435, + "grad_norm": 3.046875, + "learning_rate": 8.757043413493098e-05, + "loss": 4.4759, + "step": 313900 + }, + { + "epoch": 2.7027376033057853, + "grad_norm": 2.9375, + "learning_rate": 8.754355264665726e-05, + "loss": 4.7913, + "step": 313950 + }, + { + "epoch": 2.703168044077135, + "grad_norm": 4.8125, + "learning_rate": 8.751667207266004e-05, + "loss": 3.9926, + "step": 314000 + }, + { + "epoch": 2.703598484848485, + "grad_norm": 1.8671875, + "learning_rate": 8.748979241491216e-05, + "loss": 4.0226, + "step": 314050 + }, + { + "epoch": 2.7040289256198347, + "grad_norm": 2.875, + "learning_rate": 8.746291367538663e-05, + "loss": 4.6195, + "step": 314100 + }, + { + "epoch": 2.7044593663911844, + "grad_norm": 3.28125, + "learning_rate": 8.743603585605624e-05, + "loss": 4.275, + "step": 314150 + }, + { + "epoch": 2.7048898071625347, + "grad_norm": 3.25, + "learning_rate": 8.740915895889381e-05, + "loss": 4.3307, + "step": 314200 + }, + { + "epoch": 2.7053202479338845, + "grad_norm": 2.34375, + "learning_rate": 8.738228298587204e-05, + "loss": 4.1049, + "step": 314250 + }, + { + "epoch": 2.7057506887052343, + "grad_norm": 6.09375, + "learning_rate": 8.735540793896351e-05, + "loss": 4.5113, + "step": 314300 + }, + { + "epoch": 2.706181129476584, + "grad_norm": 1.7421875, + "learning_rate": 8.732853382014083e-05, + "loss": 4.1478, + "step": 314350 + }, + { + "epoch": 2.706611570247934, + "grad_norm": 2.796875, + "learning_rate": 8.730166063137653e-05, + "loss": 4.1365, + "step": 314400 + }, + { + "epoch": 2.7070420110192837, + "grad_norm": 2.359375, + "learning_rate": 8.7274788374643e-05, + "loss": 4.4434, + "step": 314450 + }, + { + "epoch": 2.7074724517906334, + "grad_norm": 3.609375, + "learning_rate": 8.724791705191267e-05, + "loss": 4.1965, + "step": 314500 + }, + { + "epoch": 2.7079028925619832, + "grad_norm": 4.65625, + "learning_rate": 8.722104666515774e-05, + "loss": 4.0563, + "step": 314550 + }, + { + "epoch": 2.7083333333333335, + "grad_norm": 4.53125, + "learning_rate": 8.71941772163505e-05, + "loss": 4.0379, + "step": 314600 + }, + { + "epoch": 2.7087637741046833, + "grad_norm": 3.0625, + "learning_rate": 8.71673087074631e-05, + "loss": 4.3457, + "step": 314650 + }, + { + "epoch": 2.709194214876033, + "grad_norm": 3.65625, + "learning_rate": 8.714044114046764e-05, + "loss": 4.409, + "step": 314700 + }, + { + "epoch": 2.709624655647383, + "grad_norm": 2.328125, + "learning_rate": 8.711357451733613e-05, + "loss": 4.4346, + "step": 314750 + }, + { + "epoch": 2.7100550964187327, + "grad_norm": 2.140625, + "learning_rate": 8.708670884004054e-05, + "loss": 3.9265, + "step": 314800 + }, + { + "epoch": 2.710485537190083, + "grad_norm": 3.4375, + "learning_rate": 8.70598441105527e-05, + "loss": 4.1622, + "step": 314850 + }, + { + "epoch": 2.7109159779614327, + "grad_norm": 2.640625, + "learning_rate": 8.70329803308445e-05, + "loss": 4.7735, + "step": 314900 + }, + { + "epoch": 2.7113464187327825, + "grad_norm": 3.21875, + "learning_rate": 8.70061175028876e-05, + "loss": 4.4975, + "step": 314950 + }, + { + "epoch": 2.7117768595041323, + "grad_norm": 2.15625, + "learning_rate": 8.697925562865378e-05, + "loss": 4.4245, + "step": 315000 + }, + { + "epoch": 2.7117768595041323, + "eval_loss": 5.002341270446777, + "eval_runtime": 24.7637, + "eval_samples_per_second": 25.844, + "eval_steps_per_second": 12.922, + "eval_tts_loss": 7.452000815631489, + "step": 315000 + }, + { + "epoch": 2.712207300275482, + "grad_norm": 3.125, + "learning_rate": 8.695239471011453e-05, + "loss": 4.445, + "step": 315050 + }, + { + "epoch": 2.712637741046832, + "grad_norm": 3.59375, + "learning_rate": 8.692553474924145e-05, + "loss": 4.0807, + "step": 315100 + }, + { + "epoch": 2.7130681818181817, + "grad_norm": 1.640625, + "learning_rate": 8.689867574800598e-05, + "loss": 4.406, + "step": 315150 + }, + { + "epoch": 2.7134986225895315, + "grad_norm": 1.609375, + "learning_rate": 8.687181770837953e-05, + "loss": 4.2764, + "step": 315200 + }, + { + "epoch": 2.7139290633608817, + "grad_norm": 1.8515625, + "learning_rate": 8.684496063233344e-05, + "loss": 4.3596, + "step": 315250 + }, + { + "epoch": 2.7143595041322315, + "grad_norm": 3.859375, + "learning_rate": 8.681810452183892e-05, + "loss": 4.4398, + "step": 315300 + }, + { + "epoch": 2.7147899449035813, + "grad_norm": 3.484375, + "learning_rate": 8.679124937886714e-05, + "loss": 3.9907, + "step": 315350 + }, + { + "epoch": 2.715220385674931, + "grad_norm": 6.03125, + "learning_rate": 8.676439520538923e-05, + "loss": 4.0684, + "step": 315400 + }, + { + "epoch": 2.715650826446281, + "grad_norm": 4.1875, + "learning_rate": 8.673754200337628e-05, + "loss": 4.5319, + "step": 315450 + }, + { + "epoch": 2.716081267217631, + "grad_norm": 2.90625, + "learning_rate": 8.671068977479918e-05, + "loss": 4.2654, + "step": 315500 + }, + { + "epoch": 2.716511707988981, + "grad_norm": 3.15625, + "learning_rate": 8.668383852162889e-05, + "loss": 4.3361, + "step": 315550 + }, + { + "epoch": 2.7169421487603307, + "grad_norm": 5.25, + "learning_rate": 8.665698824583619e-05, + "loss": 4.5006, + "step": 315600 + }, + { + "epoch": 2.7173725895316805, + "grad_norm": 2.578125, + "learning_rate": 8.663013894939183e-05, + "loss": 4.5736, + "step": 315650 + }, + { + "epoch": 2.7178030303030303, + "grad_norm": 2.0625, + "learning_rate": 8.66032906342665e-05, + "loss": 4.2864, + "step": 315700 + }, + { + "epoch": 2.71823347107438, + "grad_norm": 4.34375, + "learning_rate": 8.657644330243082e-05, + "loss": 4.1847, + "step": 315750 + }, + { + "epoch": 2.71866391184573, + "grad_norm": 2.1875, + "learning_rate": 8.654959695585537e-05, + "loss": 4.2824, + "step": 315800 + }, + { + "epoch": 2.7190943526170797, + "grad_norm": 3.609375, + "learning_rate": 8.65227515965105e-05, + "loss": 4.395, + "step": 315850 + }, + { + "epoch": 2.71952479338843, + "grad_norm": 2.546875, + "learning_rate": 8.649590722636667e-05, + "loss": 4.3194, + "step": 315900 + }, + { + "epoch": 2.7199552341597797, + "grad_norm": 3.0625, + "learning_rate": 8.646906384739421e-05, + "loss": 4.2922, + "step": 315950 + }, + { + "epoch": 2.7203856749311295, + "grad_norm": 2.765625, + "learning_rate": 8.644222146156335e-05, + "loss": 4.3868, + "step": 316000 + }, + { + "epoch": 2.7208161157024793, + "grad_norm": 4.875, + "learning_rate": 8.641538007084428e-05, + "loss": 4.2453, + "step": 316050 + }, + { + "epoch": 2.721246556473829, + "grad_norm": 4.28125, + "learning_rate": 8.638853967720706e-05, + "loss": 4.1028, + "step": 316100 + }, + { + "epoch": 2.7216769972451793, + "grad_norm": 2.28125, + "learning_rate": 8.636170028262176e-05, + "loss": 4.0804, + "step": 316150 + }, + { + "epoch": 2.722107438016529, + "grad_norm": 3.015625, + "learning_rate": 8.63348618890583e-05, + "loss": 4.1789, + "step": 316200 + }, + { + "epoch": 2.722537878787879, + "grad_norm": 3.234375, + "learning_rate": 8.630802449848659e-05, + "loss": 4.144, + "step": 316250 + }, + { + "epoch": 2.7229683195592287, + "grad_norm": 2.453125, + "learning_rate": 8.628118811287644e-05, + "loss": 4.2576, + "step": 316300 + }, + { + "epoch": 2.7233987603305785, + "grad_norm": 4.53125, + "learning_rate": 8.625435273419753e-05, + "loss": 4.4293, + "step": 316350 + }, + { + "epoch": 2.7238292011019283, + "grad_norm": 2.15625, + "learning_rate": 8.622751836441956e-05, + "loss": 4.2431, + "step": 316400 + }, + { + "epoch": 2.724259641873278, + "grad_norm": 2.25, + "learning_rate": 8.620068500551212e-05, + "loss": 4.1925, + "step": 316450 + }, + { + "epoch": 2.724690082644628, + "grad_norm": 4.125, + "learning_rate": 8.617385265944468e-05, + "loss": 4.2578, + "step": 316500 + }, + { + "epoch": 2.725120523415978, + "grad_norm": 2.890625, + "learning_rate": 8.614702132818678e-05, + "loss": 4.1409, + "step": 316550 + }, + { + "epoch": 2.725550964187328, + "grad_norm": 2.984375, + "learning_rate": 8.612019101370764e-05, + "loss": 4.4209, + "step": 316600 + }, + { + "epoch": 2.7259814049586777, + "grad_norm": 3.546875, + "learning_rate": 8.609336171797665e-05, + "loss": 4.2466, + "step": 316650 + }, + { + "epoch": 2.7264118457300275, + "grad_norm": 2.296875, + "learning_rate": 8.606653344296296e-05, + "loss": 4.5445, + "step": 316700 + }, + { + "epoch": 2.7268422865013773, + "grad_norm": 1.25, + "learning_rate": 8.603970619063575e-05, + "loss": 3.8468, + "step": 316750 + }, + { + "epoch": 2.7272727272727275, + "grad_norm": 2.46875, + "learning_rate": 8.60128799629641e-05, + "loss": 4.4241, + "step": 316800 + }, + { + "epoch": 2.7277031680440773, + "grad_norm": 0.69140625, + "learning_rate": 8.598605476191695e-05, + "loss": 4.2696, + "step": 316850 + }, + { + "epoch": 2.728133608815427, + "grad_norm": 4.75, + "learning_rate": 8.595923058946321e-05, + "loss": 4.4268, + "step": 316900 + }, + { + "epoch": 2.728564049586777, + "grad_norm": 4.4375, + "learning_rate": 8.593240744757175e-05, + "loss": 4.2739, + "step": 316950 + }, + { + "epoch": 2.7289944903581267, + "grad_norm": 3.90625, + "learning_rate": 8.590558533821129e-05, + "loss": 4.3778, + "step": 317000 + }, + { + "epoch": 2.7294249311294765, + "grad_norm": 2.359375, + "learning_rate": 8.58787642633506e-05, + "loss": 4.3922, + "step": 317050 + }, + { + "epoch": 2.7298553719008263, + "grad_norm": 2.84375, + "learning_rate": 8.585194422495817e-05, + "loss": 4.4721, + "step": 317100 + }, + { + "epoch": 2.730285812672176, + "grad_norm": 4.03125, + "learning_rate": 8.582512522500263e-05, + "loss": 4.6698, + "step": 317150 + }, + { + "epoch": 2.7307162534435263, + "grad_norm": 3.484375, + "learning_rate": 8.579830726545237e-05, + "loss": 4.3732, + "step": 317200 + }, + { + "epoch": 2.731146694214876, + "grad_norm": 1.921875, + "learning_rate": 8.577149034827581e-05, + "loss": 4.2507, + "step": 317250 + }, + { + "epoch": 2.731577134986226, + "grad_norm": 1.0234375, + "learning_rate": 8.574467447544126e-05, + "loss": 4.0641, + "step": 317300 + }, + { + "epoch": 2.7320075757575757, + "grad_norm": 1.6875, + "learning_rate": 8.571785964891696e-05, + "loss": 4.2794, + "step": 317350 + }, + { + "epoch": 2.7324380165289255, + "grad_norm": 3.609375, + "learning_rate": 8.569104587067097e-05, + "loss": 4.4496, + "step": 317400 + }, + { + "epoch": 2.7328684573002757, + "grad_norm": 4.15625, + "learning_rate": 8.566423314267144e-05, + "loss": 4.6087, + "step": 317450 + }, + { + "epoch": 2.7332988980716255, + "grad_norm": 2.296875, + "learning_rate": 8.563742146688638e-05, + "loss": 4.4325, + "step": 317500 + }, + { + "epoch": 2.7337293388429753, + "grad_norm": 6.09375, + "learning_rate": 8.561061084528366e-05, + "loss": 4.1216, + "step": 317550 + }, + { + "epoch": 2.734159779614325, + "grad_norm": 4.3125, + "learning_rate": 8.558380127983119e-05, + "loss": 4.425, + "step": 317600 + }, + { + "epoch": 2.734590220385675, + "grad_norm": 4.0, + "learning_rate": 8.555699277249664e-05, + "loss": 4.1315, + "step": 317650 + }, + { + "epoch": 2.7350206611570247, + "grad_norm": 4.6875, + "learning_rate": 8.553018532524776e-05, + "loss": 4.515, + "step": 317700 + }, + { + "epoch": 2.7354511019283745, + "grad_norm": 2.796875, + "learning_rate": 8.550337894005213e-05, + "loss": 4.1332, + "step": 317750 + }, + { + "epoch": 2.7358815426997243, + "grad_norm": 4.21875, + "learning_rate": 8.547657361887731e-05, + "loss": 4.2553, + "step": 317800 + }, + { + "epoch": 2.7363119834710745, + "grad_norm": 4.0, + "learning_rate": 8.544976936369079e-05, + "loss": 4.5011, + "step": 317850 + }, + { + "epoch": 2.7367424242424243, + "grad_norm": 4.34375, + "learning_rate": 8.542296617645981e-05, + "loss": 4.4846, + "step": 317900 + }, + { + "epoch": 2.737172865013774, + "grad_norm": 5.75, + "learning_rate": 8.539616405915176e-05, + "loss": 4.4545, + "step": 317950 + }, + { + "epoch": 2.737603305785124, + "grad_norm": 2.28125, + "learning_rate": 8.536936301373389e-05, + "loss": 4.4221, + "step": 318000 + }, + { + "epoch": 2.737603305785124, + "eval_loss": 4.999110221862793, + "eval_runtime": 25.5297, + "eval_samples_per_second": 25.069, + "eval_steps_per_second": 12.534, + "eval_tts_loss": 7.4143139091922885, + "step": 318000 + }, + { + "epoch": 2.7380337465564737, + "grad_norm": 4.25, + "learning_rate": 8.534256304217325e-05, + "loss": 4.3131, + "step": 318050 + }, + { + "epoch": 2.738464187327824, + "grad_norm": 2.25, + "learning_rate": 8.531576414643701e-05, + "loss": 4.1048, + "step": 318100 + }, + { + "epoch": 2.7388946280991737, + "grad_norm": 1.7265625, + "learning_rate": 8.528896632849202e-05, + "loss": 4.3546, + "step": 318150 + }, + { + "epoch": 2.7393250688705235, + "grad_norm": 3.234375, + "learning_rate": 8.526216959030528e-05, + "loss": 4.2491, + "step": 318200 + }, + { + "epoch": 2.7397555096418733, + "grad_norm": 3.9375, + "learning_rate": 8.523537393384357e-05, + "loss": 4.1753, + "step": 318250 + }, + { + "epoch": 2.740185950413223, + "grad_norm": 2.46875, + "learning_rate": 8.520857936107367e-05, + "loss": 4.1779, + "step": 318300 + }, + { + "epoch": 2.740616391184573, + "grad_norm": 2.125, + "learning_rate": 8.518178587396222e-05, + "loss": 4.3107, + "step": 318350 + }, + { + "epoch": 2.7410468319559227, + "grad_norm": 1.9140625, + "learning_rate": 8.51549934744758e-05, + "loss": 4.4846, + "step": 318400 + }, + { + "epoch": 2.7414772727272725, + "grad_norm": 3.265625, + "learning_rate": 8.512820216458088e-05, + "loss": 4.4076, + "step": 318450 + }, + { + "epoch": 2.7419077134986227, + "grad_norm": 3.453125, + "learning_rate": 8.510141194624397e-05, + "loss": 4.3227, + "step": 318500 + }, + { + "epoch": 2.7423381542699725, + "grad_norm": 2.90625, + "learning_rate": 8.507462282143134e-05, + "loss": 4.4398, + "step": 318550 + }, + { + "epoch": 2.7427685950413223, + "grad_norm": 2.9375, + "learning_rate": 8.504783479210931e-05, + "loss": 4.3605, + "step": 318600 + }, + { + "epoch": 2.743199035812672, + "grad_norm": 4.28125, + "learning_rate": 8.502104786024402e-05, + "loss": 4.2704, + "step": 318650 + }, + { + "epoch": 2.743629476584022, + "grad_norm": 3.71875, + "learning_rate": 8.49942620278016e-05, + "loss": 4.5545, + "step": 318700 + }, + { + "epoch": 2.744059917355372, + "grad_norm": 3.5, + "learning_rate": 8.496747729674804e-05, + "loss": 4.6623, + "step": 318750 + }, + { + "epoch": 2.744490358126722, + "grad_norm": 2.78125, + "learning_rate": 8.494069366904928e-05, + "loss": 4.2208, + "step": 318800 + }, + { + "epoch": 2.7449207988980717, + "grad_norm": 1.859375, + "learning_rate": 8.491391114667127e-05, + "loss": 3.9635, + "step": 318850 + }, + { + "epoch": 2.7453512396694215, + "grad_norm": 4.40625, + "learning_rate": 8.48871297315797e-05, + "loss": 3.867, + "step": 318900 + }, + { + "epoch": 2.7457816804407713, + "grad_norm": 6.3125, + "learning_rate": 8.486034942574028e-05, + "loss": 4.4222, + "step": 318950 + }, + { + "epoch": 2.746212121212121, + "grad_norm": 3.53125, + "learning_rate": 8.483357023111862e-05, + "loss": 4.3181, + "step": 319000 + }, + { + "epoch": 2.746642561983471, + "grad_norm": 1.7265625, + "learning_rate": 8.48067921496803e-05, + "loss": 4.6035, + "step": 319050 + }, + { + "epoch": 2.7470730027548207, + "grad_norm": 2.578125, + "learning_rate": 8.478001518339074e-05, + "loss": 4.1172, + "step": 319100 + }, + { + "epoch": 2.747503443526171, + "grad_norm": 3.234375, + "learning_rate": 8.475323933421534e-05, + "loss": 3.7132, + "step": 319150 + }, + { + "epoch": 2.7479338842975207, + "grad_norm": 2.90625, + "learning_rate": 8.472646460411937e-05, + "loss": 3.9813, + "step": 319200 + }, + { + "epoch": 2.7483643250688705, + "grad_norm": 3.28125, + "learning_rate": 8.469969099506799e-05, + "loss": 4.2913, + "step": 319250 + }, + { + "epoch": 2.7487947658402203, + "grad_norm": 2.84375, + "learning_rate": 8.467291850902638e-05, + "loss": 4.1881, + "step": 319300 + }, + { + "epoch": 2.74922520661157, + "grad_norm": 1.1484375, + "learning_rate": 8.46461471479596e-05, + "loss": 3.9789, + "step": 319350 + }, + { + "epoch": 2.7496556473829203, + "grad_norm": 2.40625, + "learning_rate": 8.461937691383261e-05, + "loss": 4.2677, + "step": 319400 + }, + { + "epoch": 2.75008608815427, + "grad_norm": 3.1875, + "learning_rate": 8.459260780861021e-05, + "loss": 4.3883, + "step": 319450 + }, + { + "epoch": 2.75051652892562, + "grad_norm": 4.1875, + "learning_rate": 8.456583983425726e-05, + "loss": 4.1065, + "step": 319500 + }, + { + "epoch": 2.7509469696969697, + "grad_norm": 2.875, + "learning_rate": 8.453907299273847e-05, + "loss": 4.1442, + "step": 319550 + }, + { + "epoch": 2.7513774104683195, + "grad_norm": 2.28125, + "learning_rate": 8.451230728601843e-05, + "loss": 4.2141, + "step": 319600 + }, + { + "epoch": 2.7518078512396693, + "grad_norm": 2.96875, + "learning_rate": 8.448554271606176e-05, + "loss": 4.4881, + "step": 319650 + }, + { + "epoch": 2.752238292011019, + "grad_norm": 3.0, + "learning_rate": 8.445877928483283e-05, + "loss": 4.2699, + "step": 319700 + }, + { + "epoch": 2.752668732782369, + "grad_norm": 5.09375, + "learning_rate": 8.443201699429609e-05, + "loss": 4.3307, + "step": 319750 + }, + { + "epoch": 2.753099173553719, + "grad_norm": 3.28125, + "learning_rate": 8.440525584641579e-05, + "loss": 4.5304, + "step": 319800 + }, + { + "epoch": 2.753529614325069, + "grad_norm": 1.5390625, + "learning_rate": 8.437849584315616e-05, + "loss": 4.2557, + "step": 319850 + }, + { + "epoch": 2.7539600550964187, + "grad_norm": 5.40625, + "learning_rate": 8.435173698648136e-05, + "loss": 3.9056, + "step": 319900 + }, + { + "epoch": 2.7543904958677685, + "grad_norm": 1.453125, + "learning_rate": 8.432497927835536e-05, + "loss": 3.999, + "step": 319950 + }, + { + "epoch": 2.7548209366391183, + "grad_norm": 3.0625, + "learning_rate": 8.429822272074214e-05, + "loss": 4.0828, + "step": 320000 + }, + { + "epoch": 2.7552513774104685, + "grad_norm": 2.09375, + "learning_rate": 8.427146731560564e-05, + "loss": 4.4632, + "step": 320050 + }, + { + "epoch": 2.7556818181818183, + "grad_norm": 3.546875, + "learning_rate": 8.424471306490956e-05, + "loss": 4.6003, + "step": 320100 + }, + { + "epoch": 2.756112258953168, + "grad_norm": 3.4375, + "learning_rate": 8.421795997061768e-05, + "loss": 4.1435, + "step": 320150 + }, + { + "epoch": 2.756542699724518, + "grad_norm": 3.453125, + "learning_rate": 8.419120803469354e-05, + "loss": 4.5449, + "step": 320200 + }, + { + "epoch": 2.7569731404958677, + "grad_norm": 1.6953125, + "learning_rate": 8.416445725910074e-05, + "loss": 4.2561, + "step": 320250 + }, + { + "epoch": 2.7574035812672175, + "grad_norm": 3.65625, + "learning_rate": 8.413770764580267e-05, + "loss": 4.5275, + "step": 320300 + }, + { + "epoch": 2.7578340220385673, + "grad_norm": 3.640625, + "learning_rate": 8.411095919676275e-05, + "loss": 3.8868, + "step": 320350 + }, + { + "epoch": 2.758264462809917, + "grad_norm": 3.328125, + "learning_rate": 8.408421191394428e-05, + "loss": 4.6279, + "step": 320400 + }, + { + "epoch": 2.7586949035812673, + "grad_norm": 4.71875, + "learning_rate": 8.405746579931039e-05, + "loss": 4.6871, + "step": 320450 + }, + { + "epoch": 2.759125344352617, + "grad_norm": 4.125, + "learning_rate": 8.403072085482418e-05, + "loss": 4.4073, + "step": 320500 + }, + { + "epoch": 2.759555785123967, + "grad_norm": 4.3125, + "learning_rate": 8.400397708244872e-05, + "loss": 3.9658, + "step": 320550 + }, + { + "epoch": 2.7599862258953167, + "grad_norm": 4.5625, + "learning_rate": 8.397723448414692e-05, + "loss": 4.3502, + "step": 320600 + }, + { + "epoch": 2.7604166666666665, + "grad_norm": 6.15625, + "learning_rate": 8.395049306188167e-05, + "loss": 4.7147, + "step": 320650 + }, + { + "epoch": 2.7608471074380168, + "grad_norm": 2.8125, + "learning_rate": 8.392375281761564e-05, + "loss": 4.5776, + "step": 320700 + }, + { + "epoch": 2.7612775482093666, + "grad_norm": 3.984375, + "learning_rate": 8.38970137533116e-05, + "loss": 4.6361, + "step": 320750 + }, + { + "epoch": 2.7617079889807163, + "grad_norm": 2.96875, + "learning_rate": 8.387027587093208e-05, + "loss": 4.465, + "step": 320800 + }, + { + "epoch": 2.762138429752066, + "grad_norm": 3.8125, + "learning_rate": 8.384353917243961e-05, + "loss": 4.4795, + "step": 320850 + }, + { + "epoch": 2.762568870523416, + "grad_norm": 3.84375, + "learning_rate": 8.381680365979663e-05, + "loss": 4.6065, + "step": 320900 + }, + { + "epoch": 2.7629993112947657, + "grad_norm": 2.328125, + "learning_rate": 8.379006933496544e-05, + "loss": 4.1891, + "step": 320950 + }, + { + "epoch": 2.7634297520661155, + "grad_norm": 3.21875, + "learning_rate": 8.376333619990828e-05, + "loss": 4.2837, + "step": 321000 + }, + { + "epoch": 2.7634297520661155, + "eval_loss": 4.997475624084473, + "eval_runtime": 25.4314, + "eval_samples_per_second": 25.166, + "eval_steps_per_second": 12.583, + "eval_tts_loss": 7.414691866568385, + "step": 321000 + }, + { + "epoch": 2.7638601928374653, + "grad_norm": 2.3125, + "learning_rate": 8.373660425658728e-05, + "loss": 4.1129, + "step": 321050 + }, + { + "epoch": 2.7642906336088156, + "grad_norm": 3.703125, + "learning_rate": 8.370987350696456e-05, + "loss": 3.9958, + "step": 321100 + }, + { + "epoch": 2.7647210743801653, + "grad_norm": 1.984375, + "learning_rate": 8.368314395300206e-05, + "loss": 4.2864, + "step": 321150 + }, + { + "epoch": 2.765151515151515, + "grad_norm": 2.96875, + "learning_rate": 8.365641559666173e-05, + "loss": 4.3726, + "step": 321200 + }, + { + "epoch": 2.765581955922865, + "grad_norm": 3.96875, + "learning_rate": 8.362968843990528e-05, + "loss": 4.388, + "step": 321250 + }, + { + "epoch": 2.7660123966942147, + "grad_norm": 2.921875, + "learning_rate": 8.360296248469451e-05, + "loss": 4.1838, + "step": 321300 + }, + { + "epoch": 2.766442837465565, + "grad_norm": 2.625, + "learning_rate": 8.3576237732991e-05, + "loss": 4.2318, + "step": 321350 + }, + { + "epoch": 2.7668732782369148, + "grad_norm": 3.21875, + "learning_rate": 8.35495141867563e-05, + "loss": 4.6858, + "step": 321400 + }, + { + "epoch": 2.7673037190082646, + "grad_norm": 3.796875, + "learning_rate": 8.35227918479519e-05, + "loss": 3.9743, + "step": 321450 + }, + { + "epoch": 2.7677341597796143, + "grad_norm": 2.53125, + "learning_rate": 8.349607071853909e-05, + "loss": 3.9725, + "step": 321500 + }, + { + "epoch": 2.768164600550964, + "grad_norm": 2.828125, + "learning_rate": 8.346935080047917e-05, + "loss": 4.2781, + "step": 321550 + }, + { + "epoch": 2.768595041322314, + "grad_norm": 4.375, + "learning_rate": 8.344263209573336e-05, + "loss": 4.5143, + "step": 321600 + }, + { + "epoch": 2.7690254820936637, + "grad_norm": 2.703125, + "learning_rate": 8.34159146062627e-05, + "loss": 4.4649, + "step": 321650 + }, + { + "epoch": 2.7694559228650135, + "grad_norm": 3.921875, + "learning_rate": 8.338919833402829e-05, + "loss": 4.2749, + "step": 321700 + }, + { + "epoch": 2.7698863636363638, + "grad_norm": 5.0, + "learning_rate": 8.336248328099092e-05, + "loss": 4.374, + "step": 321750 + }, + { + "epoch": 2.7703168044077136, + "grad_norm": 2.796875, + "learning_rate": 8.333576944911151e-05, + "loss": 4.0905, + "step": 321800 + }, + { + "epoch": 2.7707472451790633, + "grad_norm": 3.015625, + "learning_rate": 8.330905684035076e-05, + "loss": 4.5605, + "step": 321850 + }, + { + "epoch": 2.771177685950413, + "grad_norm": 1.3515625, + "learning_rate": 8.328234545666931e-05, + "loss": 4.5177, + "step": 321900 + }, + { + "epoch": 2.771608126721763, + "grad_norm": 4.21875, + "learning_rate": 8.325563530002778e-05, + "loss": 4.6202, + "step": 321950 + }, + { + "epoch": 2.772038567493113, + "grad_norm": 4.125, + "learning_rate": 8.322892637238657e-05, + "loss": 4.573, + "step": 322000 + }, + { + "epoch": 2.772469008264463, + "grad_norm": 2.453125, + "learning_rate": 8.320221867570605e-05, + "loss": 4.1262, + "step": 322050 + }, + { + "epoch": 2.7728994490358128, + "grad_norm": 4.4375, + "learning_rate": 8.317551221194657e-05, + "loss": 4.2955, + "step": 322100 + }, + { + "epoch": 2.7733298898071626, + "grad_norm": 3.09375, + "learning_rate": 8.314880698306828e-05, + "loss": 4.0413, + "step": 322150 + }, + { + "epoch": 2.7737603305785123, + "grad_norm": 2.875, + "learning_rate": 8.312210299103133e-05, + "loss": 4.5331, + "step": 322200 + }, + { + "epoch": 2.774190771349862, + "grad_norm": 2.203125, + "learning_rate": 8.309540023779567e-05, + "loss": 4.3642, + "step": 322250 + }, + { + "epoch": 2.774621212121212, + "grad_norm": 3.1875, + "learning_rate": 8.306869872532129e-05, + "loss": 4.3883, + "step": 322300 + }, + { + "epoch": 2.7750516528925617, + "grad_norm": 4.84375, + "learning_rate": 8.304199845556797e-05, + "loss": 4.3788, + "step": 322350 + }, + { + "epoch": 2.775482093663912, + "grad_norm": 4.625, + "learning_rate": 8.301529943049547e-05, + "loss": 4.217, + "step": 322400 + }, + { + "epoch": 2.7759125344352618, + "grad_norm": 2.03125, + "learning_rate": 8.29886016520635e-05, + "loss": 4.4452, + "step": 322450 + }, + { + "epoch": 2.7763429752066116, + "grad_norm": 5.09375, + "learning_rate": 8.296190512223154e-05, + "loss": 3.9561, + "step": 322500 + }, + { + "epoch": 2.7767734159779613, + "grad_norm": 2.828125, + "learning_rate": 8.293520984295907e-05, + "loss": 4.6054, + "step": 322550 + }, + { + "epoch": 2.777203856749311, + "grad_norm": 1.8046875, + "learning_rate": 8.290851581620548e-05, + "loss": 4.2483, + "step": 322600 + }, + { + "epoch": 2.7776342975206614, + "grad_norm": 3.328125, + "learning_rate": 8.288182304393007e-05, + "loss": 4.6909, + "step": 322650 + }, + { + "epoch": 2.778064738292011, + "grad_norm": 4.4375, + "learning_rate": 8.2855131528092e-05, + "loss": 4.0004, + "step": 322700 + }, + { + "epoch": 2.778495179063361, + "grad_norm": 5.03125, + "learning_rate": 8.282844127065043e-05, + "loss": 4.2721, + "step": 322750 + }, + { + "epoch": 2.7789256198347108, + "grad_norm": 2.671875, + "learning_rate": 8.280175227356432e-05, + "loss": 4.1614, + "step": 322800 + }, + { + "epoch": 2.7793560606060606, + "grad_norm": 2.765625, + "learning_rate": 8.277506453879254e-05, + "loss": 4.5671, + "step": 322850 + }, + { + "epoch": 2.7797865013774103, + "grad_norm": 1.8046875, + "learning_rate": 8.274837806829398e-05, + "loss": 4.1722, + "step": 322900 + }, + { + "epoch": 2.78021694214876, + "grad_norm": 4.09375, + "learning_rate": 8.272169286402738e-05, + "loss": 4.592, + "step": 322950 + }, + { + "epoch": 2.78064738292011, + "grad_norm": 3.390625, + "learning_rate": 8.269500892795136e-05, + "loss": 4.2389, + "step": 323000 + }, + { + "epoch": 2.78107782369146, + "grad_norm": 3.265625, + "learning_rate": 8.266832626202441e-05, + "loss": 4.3429, + "step": 323050 + }, + { + "epoch": 2.78150826446281, + "grad_norm": 7.0, + "learning_rate": 8.264164486820502e-05, + "loss": 4.503, + "step": 323100 + }, + { + "epoch": 2.7819387052341598, + "grad_norm": 4.03125, + "learning_rate": 8.261496474845156e-05, + "loss": 4.3183, + "step": 323150 + }, + { + "epoch": 2.7823691460055096, + "grad_norm": 1.8125, + "learning_rate": 8.258828590472229e-05, + "loss": 4.2086, + "step": 323200 + }, + { + "epoch": 2.7827995867768593, + "grad_norm": 2.65625, + "learning_rate": 8.25616083389754e-05, + "loss": 4.4939, + "step": 323250 + }, + { + "epoch": 2.7832300275482096, + "grad_norm": 4.96875, + "learning_rate": 8.253493205316889e-05, + "loss": 4.1386, + "step": 323300 + }, + { + "epoch": 2.7836604683195594, + "grad_norm": 2.0, + "learning_rate": 8.250825704926081e-05, + "loss": 3.9069, + "step": 323350 + }, + { + "epoch": 2.784090909090909, + "grad_norm": 2.75, + "learning_rate": 8.248158332920902e-05, + "loss": 4.3003, + "step": 323400 + }, + { + "epoch": 2.784521349862259, + "grad_norm": 2.03125, + "learning_rate": 8.245491089497133e-05, + "loss": 4.5793, + "step": 323450 + }, + { + "epoch": 2.7849517906336088, + "grad_norm": 3.984375, + "learning_rate": 8.242823974850544e-05, + "loss": 4.0986, + "step": 323500 + }, + { + "epoch": 2.7853822314049586, + "grad_norm": 3.359375, + "learning_rate": 8.240156989176893e-05, + "loss": 4.2117, + "step": 323550 + }, + { + "epoch": 2.7858126721763083, + "grad_norm": 1.7421875, + "learning_rate": 8.237490132671932e-05, + "loss": 4.8066, + "step": 323600 + }, + { + "epoch": 2.786243112947658, + "grad_norm": 2.28125, + "learning_rate": 8.234823405531405e-05, + "loss": 4.4939, + "step": 323650 + }, + { + "epoch": 2.7866735537190084, + "grad_norm": 2.453125, + "learning_rate": 8.232156807951041e-05, + "loss": 4.3521, + "step": 323700 + }, + { + "epoch": 2.787103994490358, + "grad_norm": 1.1015625, + "learning_rate": 8.229490340126567e-05, + "loss": 4.1612, + "step": 323750 + }, + { + "epoch": 2.787534435261708, + "grad_norm": 1.2265625, + "learning_rate": 8.226824002253689e-05, + "loss": 3.9199, + "step": 323800 + }, + { + "epoch": 2.7879648760330578, + "grad_norm": 3.234375, + "learning_rate": 8.224157794528115e-05, + "loss": 4.5263, + "step": 323850 + }, + { + "epoch": 2.7883953168044076, + "grad_norm": 2.625, + "learning_rate": 8.221491717145537e-05, + "loss": 4.17, + "step": 323900 + }, + { + "epoch": 2.788825757575758, + "grad_norm": 2.203125, + "learning_rate": 8.218825770301639e-05, + "loss": 4.3201, + "step": 323950 + }, + { + "epoch": 2.7892561983471076, + "grad_norm": 4.21875, + "learning_rate": 8.216159954192103e-05, + "loss": 4.525, + "step": 324000 + }, + { + "epoch": 2.7892561983471076, + "eval_loss": 4.999444484710693, + "eval_runtime": 24.0622, + "eval_samples_per_second": 26.598, + "eval_steps_per_second": 13.299, + "eval_tts_loss": 7.436658738592688, + "step": 324000 + }, + { + "epoch": 2.7896866391184574, + "grad_norm": 3.40625, + "learning_rate": 8.213494269012585e-05, + "loss": 4.4447, + "step": 324050 + }, + { + "epoch": 2.790117079889807, + "grad_norm": 1.421875, + "learning_rate": 8.210828714958743e-05, + "loss": 4.5229, + "step": 324100 + }, + { + "epoch": 2.790547520661157, + "grad_norm": 1.9375, + "learning_rate": 8.208163292226225e-05, + "loss": 4.4509, + "step": 324150 + }, + { + "epoch": 2.7909779614325068, + "grad_norm": 3.15625, + "learning_rate": 8.205498001010665e-05, + "loss": 3.9698, + "step": 324200 + }, + { + "epoch": 2.7914084022038566, + "grad_norm": 3.71875, + "learning_rate": 8.202832841507693e-05, + "loss": 4.2365, + "step": 324250 + }, + { + "epoch": 2.7918388429752063, + "grad_norm": 3.390625, + "learning_rate": 8.200167813912922e-05, + "loss": 4.1722, + "step": 324300 + }, + { + "epoch": 2.7922692837465566, + "grad_norm": 3.703125, + "learning_rate": 8.197502918421962e-05, + "loss": 4.169, + "step": 324350 + }, + { + "epoch": 2.7926997245179064, + "grad_norm": 4.15625, + "learning_rate": 8.194838155230407e-05, + "loss": 4.5398, + "step": 324400 + }, + { + "epoch": 2.793130165289256, + "grad_norm": 2.671875, + "learning_rate": 8.192173524533848e-05, + "loss": 4.2018, + "step": 324450 + }, + { + "epoch": 2.793560606060606, + "grad_norm": 2.578125, + "learning_rate": 8.189509026527865e-05, + "loss": 3.9646, + "step": 324500 + }, + { + "epoch": 2.7939910468319558, + "grad_norm": 3.234375, + "learning_rate": 8.186844661408027e-05, + "loss": 4.381, + "step": 324550 + }, + { + "epoch": 2.794421487603306, + "grad_norm": 3.328125, + "learning_rate": 8.184180429369883e-05, + "loss": 4.3156, + "step": 324600 + }, + { + "epoch": 2.794851928374656, + "grad_norm": 1.7734375, + "learning_rate": 8.18151633060899e-05, + "loss": 4.1109, + "step": 324650 + }, + { + "epoch": 2.7952823691460056, + "grad_norm": 3.578125, + "learning_rate": 8.178852365320889e-05, + "loss": 4.4717, + "step": 324700 + }, + { + "epoch": 2.7957128099173554, + "grad_norm": 2.0625, + "learning_rate": 8.176188533701102e-05, + "loss": 4.3858, + "step": 324750 + }, + { + "epoch": 2.796143250688705, + "grad_norm": 3.421875, + "learning_rate": 8.173524835945159e-05, + "loss": 4.4907, + "step": 324800 + }, + { + "epoch": 2.796573691460055, + "grad_norm": 3.6875, + "learning_rate": 8.170861272248557e-05, + "loss": 4.3155, + "step": 324850 + }, + { + "epoch": 2.7970041322314048, + "grad_norm": 3.75, + "learning_rate": 8.168197842806805e-05, + "loss": 4.5615, + "step": 324900 + }, + { + "epoch": 2.7974345730027546, + "grad_norm": 2.90625, + "learning_rate": 8.165534547815387e-05, + "loss": 4.0864, + "step": 324950 + }, + { + "epoch": 2.797865013774105, + "grad_norm": 2.96875, + "learning_rate": 8.16287138746979e-05, + "loss": 4.136, + "step": 325000 + }, + { + "epoch": 2.7982954545454546, + "grad_norm": 5.9375, + "learning_rate": 8.160208361965483e-05, + "loss": 4.412, + "step": 325050 + }, + { + "epoch": 2.7987258953168044, + "grad_norm": 1.6171875, + "learning_rate": 8.157545471497919e-05, + "loss": 4.2311, + "step": 325100 + }, + { + "epoch": 2.799156336088154, + "grad_norm": 2.109375, + "learning_rate": 8.154882716262552e-05, + "loss": 4.1473, + "step": 325150 + }, + { + "epoch": 2.799586776859504, + "grad_norm": 3.21875, + "learning_rate": 8.152220096454826e-05, + "loss": 3.4352, + "step": 325200 + }, + { + "epoch": 2.800017217630854, + "grad_norm": 3.78125, + "learning_rate": 8.149557612270169e-05, + "loss": 3.9129, + "step": 325250 + }, + { + "epoch": 2.800447658402204, + "grad_norm": 0.6484375, + "learning_rate": 8.146895263904004e-05, + "loss": 4.5044, + "step": 325300 + }, + { + "epoch": 2.800878099173554, + "grad_norm": 4.0625, + "learning_rate": 8.144233051551738e-05, + "loss": 4.4709, + "step": 325350 + }, + { + "epoch": 2.8013085399449036, + "grad_norm": 3.234375, + "learning_rate": 8.141570975408773e-05, + "loss": 4.1304, + "step": 325400 + }, + { + "epoch": 2.8017389807162534, + "grad_norm": 4.09375, + "learning_rate": 8.138909035670499e-05, + "loss": 4.0429, + "step": 325450 + }, + { + "epoch": 2.802169421487603, + "grad_norm": 4.0, + "learning_rate": 8.136247232532298e-05, + "loss": 4.4678, + "step": 325500 + }, + { + "epoch": 2.802599862258953, + "grad_norm": 4.25, + "learning_rate": 8.133585566189542e-05, + "loss": 4.4711, + "step": 325550 + }, + { + "epoch": 2.8030303030303028, + "grad_norm": 4.96875, + "learning_rate": 8.13092403683759e-05, + "loss": 4.771, + "step": 325600 + }, + { + "epoch": 2.803460743801653, + "grad_norm": 0.89453125, + "learning_rate": 8.128262644671788e-05, + "loss": 4.1548, + "step": 325650 + }, + { + "epoch": 2.803891184573003, + "grad_norm": 2.125, + "learning_rate": 8.125601389887484e-05, + "loss": 4.255, + "step": 325700 + }, + { + "epoch": 2.8043216253443526, + "grad_norm": 3.609375, + "learning_rate": 8.122940272680005e-05, + "loss": 4.3555, + "step": 325750 + }, + { + "epoch": 2.8047520661157024, + "grad_norm": 4.25, + "learning_rate": 8.120279293244673e-05, + "loss": 4.3582, + "step": 325800 + }, + { + "epoch": 2.8051825068870526, + "grad_norm": 2.296875, + "learning_rate": 8.117618451776792e-05, + "loss": 4.4223, + "step": 325850 + }, + { + "epoch": 2.8056129476584024, + "grad_norm": 3.171875, + "learning_rate": 8.11495774847167e-05, + "loss": 4.4934, + "step": 325900 + }, + { + "epoch": 2.806043388429752, + "grad_norm": 4.71875, + "learning_rate": 8.11229718352459e-05, + "loss": 4.4963, + "step": 325950 + }, + { + "epoch": 2.806473829201102, + "grad_norm": 2.40625, + "learning_rate": 8.109636757130838e-05, + "loss": 4.4024, + "step": 326000 + }, + { + "epoch": 2.806904269972452, + "grad_norm": 4.53125, + "learning_rate": 8.106976469485683e-05, + "loss": 4.5773, + "step": 326050 + }, + { + "epoch": 2.8073347107438016, + "grad_norm": 4.25, + "learning_rate": 8.104316320784381e-05, + "loss": 4.3996, + "step": 326100 + }, + { + "epoch": 2.8077651515151514, + "grad_norm": 3.484375, + "learning_rate": 8.10165631122218e-05, + "loss": 4.6319, + "step": 326150 + }, + { + "epoch": 2.808195592286501, + "grad_norm": 3.765625, + "learning_rate": 8.09899644099432e-05, + "loss": 4.1598, + "step": 326200 + }, + { + "epoch": 2.808626033057851, + "grad_norm": 3.90625, + "learning_rate": 8.096336710296035e-05, + "loss": 4.3487, + "step": 326250 + }, + { + "epoch": 2.809056473829201, + "grad_norm": 1.34375, + "learning_rate": 8.093677119322535e-05, + "loss": 4.4717, + "step": 326300 + }, + { + "epoch": 2.809486914600551, + "grad_norm": 3.5, + "learning_rate": 8.09101766826904e-05, + "loss": 4.333, + "step": 326350 + }, + { + "epoch": 2.809917355371901, + "grad_norm": 2.03125, + "learning_rate": 8.088358357330739e-05, + "loss": 4.4636, + "step": 326400 + }, + { + "epoch": 2.8103477961432506, + "grad_norm": 1.5859375, + "learning_rate": 8.085699186702818e-05, + "loss": 4.2457, + "step": 326450 + }, + { + "epoch": 2.810778236914601, + "grad_norm": 3.9375, + "learning_rate": 8.083040156580457e-05, + "loss": 4.3138, + "step": 326500 + }, + { + "epoch": 2.8112086776859506, + "grad_norm": 3.671875, + "learning_rate": 8.080381267158827e-05, + "loss": 4.4918, + "step": 326550 + }, + { + "epoch": 2.8116391184573004, + "grad_norm": 3.65625, + "learning_rate": 8.077722518633085e-05, + "loss": 4.5321, + "step": 326600 + }, + { + "epoch": 2.81206955922865, + "grad_norm": 7.03125, + "learning_rate": 8.07506391119837e-05, + "loss": 4.3943, + "step": 326650 + }, + { + "epoch": 2.8125, + "grad_norm": 1.8984375, + "learning_rate": 8.072405445049821e-05, + "loss": 4.6043, + "step": 326700 + }, + { + "epoch": 2.81293044077135, + "grad_norm": 2.984375, + "learning_rate": 8.069747120382568e-05, + "loss": 4.4846, + "step": 326750 + }, + { + "epoch": 2.8133608815426996, + "grad_norm": 4.96875, + "learning_rate": 8.06708893739172e-05, + "loss": 4.2954, + "step": 326800 + }, + { + "epoch": 2.8137913223140494, + "grad_norm": 3.46875, + "learning_rate": 8.064430896272392e-05, + "loss": 4.4399, + "step": 326850 + }, + { + "epoch": 2.814221763085399, + "grad_norm": 2.859375, + "learning_rate": 8.061772997219665e-05, + "loss": 4.3473, + "step": 326900 + }, + { + "epoch": 2.8146522038567494, + "grad_norm": 3.625, + "learning_rate": 8.059115240428634e-05, + "loss": 4.1814, + "step": 326950 + }, + { + "epoch": 2.815082644628099, + "grad_norm": 3.671875, + "learning_rate": 8.056457626094364e-05, + "loss": 4.4147, + "step": 327000 + }, + { + "epoch": 2.815082644628099, + "eval_loss": 4.998778343200684, + "eval_runtime": 24.2422, + "eval_samples_per_second": 26.4, + "eval_steps_per_second": 13.2, + "eval_tts_loss": 7.420693159245713, + "step": 327000 + }, + { + "epoch": 2.815513085399449, + "grad_norm": 3.6875, + "learning_rate": 8.053800154411925e-05, + "loss": 4.2546, + "step": 327050 + }, + { + "epoch": 2.815943526170799, + "grad_norm": 3.296875, + "learning_rate": 8.051142825576369e-05, + "loss": 4.4716, + "step": 327100 + }, + { + "epoch": 2.816373966942149, + "grad_norm": 2.125, + "learning_rate": 8.048485639782735e-05, + "loss": 4.811, + "step": 327150 + }, + { + "epoch": 2.816804407713499, + "grad_norm": 5.0625, + "learning_rate": 8.045828597226056e-05, + "loss": 4.6568, + "step": 327200 + }, + { + "epoch": 2.8172348484848486, + "grad_norm": 5.28125, + "learning_rate": 8.043171698101354e-05, + "loss": 4.1752, + "step": 327250 + }, + { + "epoch": 2.8176652892561984, + "grad_norm": 3.859375, + "learning_rate": 8.040514942603636e-05, + "loss": 4.2854, + "step": 327300 + }, + { + "epoch": 2.818095730027548, + "grad_norm": 2.484375, + "learning_rate": 8.037858330927912e-05, + "loss": 4.3418, + "step": 327350 + }, + { + "epoch": 2.818526170798898, + "grad_norm": 1.2109375, + "learning_rate": 8.03520186326916e-05, + "loss": 4.0692, + "step": 327400 + }, + { + "epoch": 2.818956611570248, + "grad_norm": 3.25, + "learning_rate": 8.032545539822365e-05, + "loss": 4.7863, + "step": 327450 + }, + { + "epoch": 2.8193870523415976, + "grad_norm": 2.46875, + "learning_rate": 8.029889360782493e-05, + "loss": 4.0983, + "step": 327500 + }, + { + "epoch": 2.8198174931129474, + "grad_norm": 5.5625, + "learning_rate": 8.027233326344502e-05, + "loss": 4.908, + "step": 327550 + }, + { + "epoch": 2.8202479338842976, + "grad_norm": 3.34375, + "learning_rate": 8.024577436703346e-05, + "loss": 4.3652, + "step": 327600 + }, + { + "epoch": 2.8206783746556474, + "grad_norm": 2.0625, + "learning_rate": 8.021921692053953e-05, + "loss": 4.3827, + "step": 327650 + }, + { + "epoch": 2.821108815426997, + "grad_norm": 5.0625, + "learning_rate": 8.01926609259125e-05, + "loss": 4.5317, + "step": 327700 + }, + { + "epoch": 2.821539256198347, + "grad_norm": 3.140625, + "learning_rate": 8.016610638510154e-05, + "loss": 4.5239, + "step": 327750 + }, + { + "epoch": 2.8219696969696972, + "grad_norm": 1.890625, + "learning_rate": 8.013955330005571e-05, + "loss": 4.1893, + "step": 327800 + }, + { + "epoch": 2.822400137741047, + "grad_norm": 4.0, + "learning_rate": 8.011300167272398e-05, + "loss": 4.3599, + "step": 327850 + }, + { + "epoch": 2.822830578512397, + "grad_norm": 3.03125, + "learning_rate": 8.008645150505507e-05, + "loss": 3.8317, + "step": 327900 + }, + { + "epoch": 2.8232610192837466, + "grad_norm": 2.546875, + "learning_rate": 8.005990279899782e-05, + "loss": 4.2058, + "step": 327950 + }, + { + "epoch": 2.8236914600550964, + "grad_norm": 2.875, + "learning_rate": 8.003335555650077e-05, + "loss": 4.5639, + "step": 328000 + }, + { + "epoch": 2.824121900826446, + "grad_norm": 2.53125, + "learning_rate": 8.000680977951245e-05, + "loss": 4.4624, + "step": 328050 + }, + { + "epoch": 2.824552341597796, + "grad_norm": 7.5625, + "learning_rate": 7.998026546998134e-05, + "loss": 4.2624, + "step": 328100 + }, + { + "epoch": 2.824982782369146, + "grad_norm": 2.265625, + "learning_rate": 7.995372262985564e-05, + "loss": 4.0531, + "step": 328150 + }, + { + "epoch": 2.825413223140496, + "grad_norm": 1.0, + "learning_rate": 7.992718126108354e-05, + "loss": 3.8754, + "step": 328200 + }, + { + "epoch": 2.825843663911846, + "grad_norm": 3.078125, + "learning_rate": 7.990064136561317e-05, + "loss": 4.5383, + "step": 328250 + }, + { + "epoch": 2.8262741046831956, + "grad_norm": 3.0, + "learning_rate": 7.987410294539249e-05, + "loss": 4.5454, + "step": 328300 + }, + { + "epoch": 2.8267045454545454, + "grad_norm": 2.96875, + "learning_rate": 7.984756600236932e-05, + "loss": 4.6176, + "step": 328350 + }, + { + "epoch": 2.827134986225895, + "grad_norm": 3.46875, + "learning_rate": 7.98210305384915e-05, + "loss": 4.4, + "step": 328400 + }, + { + "epoch": 2.8275654269972454, + "grad_norm": 3.140625, + "learning_rate": 7.979449655570656e-05, + "loss": 4.4922, + "step": 328450 + }, + { + "epoch": 2.8279958677685952, + "grad_norm": 7.5625, + "learning_rate": 7.976796405596215e-05, + "loss": 4.6449, + "step": 328500 + }, + { + "epoch": 2.828426308539945, + "grad_norm": 3.65625, + "learning_rate": 7.974143304120562e-05, + "loss": 4.5647, + "step": 328550 + }, + { + "epoch": 2.828856749311295, + "grad_norm": 2.203125, + "learning_rate": 7.971490351338432e-05, + "loss": 4.4015, + "step": 328600 + }, + { + "epoch": 2.8292871900826446, + "grad_norm": 3.28125, + "learning_rate": 7.96883754744455e-05, + "loss": 4.1724, + "step": 328650 + }, + { + "epoch": 2.8297176308539944, + "grad_norm": 2.046875, + "learning_rate": 7.966184892633616e-05, + "loss": 4.0353, + "step": 328700 + }, + { + "epoch": 2.830148071625344, + "grad_norm": 3.171875, + "learning_rate": 7.963532387100336e-05, + "loss": 4.069, + "step": 328750 + }, + { + "epoch": 2.830578512396694, + "grad_norm": 1.921875, + "learning_rate": 7.960880031039399e-05, + "loss": 4.1332, + "step": 328800 + }, + { + "epoch": 2.8310089531680442, + "grad_norm": 3.21875, + "learning_rate": 7.958227824645478e-05, + "loss": 4.1883, + "step": 328850 + }, + { + "epoch": 2.831439393939394, + "grad_norm": 2.625, + "learning_rate": 7.955575768113245e-05, + "loss": 4.0097, + "step": 328900 + }, + { + "epoch": 2.831869834710744, + "grad_norm": 4.6875, + "learning_rate": 7.95292386163735e-05, + "loss": 4.3936, + "step": 328950 + }, + { + "epoch": 2.8323002754820936, + "grad_norm": 3.234375, + "learning_rate": 7.950272105412441e-05, + "loss": 4.3752, + "step": 329000 + }, + { + "epoch": 2.8327307162534434, + "grad_norm": 3.96875, + "learning_rate": 7.947620499633146e-05, + "loss": 4.2651, + "step": 329050 + }, + { + "epoch": 2.8331611570247937, + "grad_norm": 3.5, + "learning_rate": 7.94496904449409e-05, + "loss": 4.508, + "step": 329100 + }, + { + "epoch": 2.8335915977961434, + "grad_norm": 2.453125, + "learning_rate": 7.94231774018989e-05, + "loss": 4.187, + "step": 329150 + }, + { + "epoch": 2.8340220385674932, + "grad_norm": 1.875, + "learning_rate": 7.939666586915139e-05, + "loss": 4.3722, + "step": 329200 + }, + { + "epoch": 2.834452479338843, + "grad_norm": 3.453125, + "learning_rate": 7.937015584864425e-05, + "loss": 4.2752, + "step": 329250 + }, + { + "epoch": 2.834882920110193, + "grad_norm": 4.5625, + "learning_rate": 7.93436473423233e-05, + "loss": 4.2452, + "step": 329300 + }, + { + "epoch": 2.8353133608815426, + "grad_norm": 3.1875, + "learning_rate": 7.931714035213418e-05, + "loss": 4.1229, + "step": 329350 + }, + { + "epoch": 2.8357438016528924, + "grad_norm": 3.46875, + "learning_rate": 7.929063488002249e-05, + "loss": 4.189, + "step": 329400 + }, + { + "epoch": 2.836174242424242, + "grad_norm": 2.453125, + "learning_rate": 7.926413092793361e-05, + "loss": 4.4492, + "step": 329450 + }, + { + "epoch": 2.8366046831955924, + "grad_norm": 8.375, + "learning_rate": 7.923762849781292e-05, + "loss": 4.411, + "step": 329500 + }, + { + "epoch": 2.8370351239669422, + "grad_norm": 3.234375, + "learning_rate": 7.92111275916056e-05, + "loss": 4.5448, + "step": 329550 + }, + { + "epoch": 2.837465564738292, + "grad_norm": 4.09375, + "learning_rate": 7.918462821125681e-05, + "loss": 4.248, + "step": 329600 + }, + { + "epoch": 2.837896005509642, + "grad_norm": 2.75, + "learning_rate": 7.915813035871154e-05, + "loss": 4.1487, + "step": 329650 + }, + { + "epoch": 2.8383264462809916, + "grad_norm": 3.5, + "learning_rate": 7.913163403591464e-05, + "loss": 4.2946, + "step": 329700 + }, + { + "epoch": 2.838756887052342, + "grad_norm": 3.078125, + "learning_rate": 7.91051392448109e-05, + "loss": 4.5097, + "step": 329750 + }, + { + "epoch": 2.8391873278236917, + "grad_norm": 3.171875, + "learning_rate": 7.907864598734497e-05, + "loss": 4.3428, + "step": 329800 + }, + { + "epoch": 2.8396177685950414, + "grad_norm": 4.09375, + "learning_rate": 7.905215426546144e-05, + "loss": 4.3565, + "step": 329850 + }, + { + "epoch": 2.8400482093663912, + "grad_norm": 3.4375, + "learning_rate": 7.902566408110472e-05, + "loss": 4.2442, + "step": 329900 + }, + { + "epoch": 2.840478650137741, + "grad_norm": 3.53125, + "learning_rate": 7.899917543621911e-05, + "loss": 3.9229, + "step": 329950 + }, + { + "epoch": 2.840909090909091, + "grad_norm": 3.640625, + "learning_rate": 7.897268833274885e-05, + "loss": 4.2746, + "step": 330000 + }, + { + "epoch": 2.840909090909091, + "eval_loss": 4.996027946472168, + "eval_runtime": 24.5372, + "eval_samples_per_second": 26.083, + "eval_steps_per_second": 13.041, + "eval_tts_loss": 7.445830441324012, + "step": 330000 + }, + { + "epoch": 2.8413395316804406, + "grad_norm": 2.015625, + "learning_rate": 7.8946202772638e-05, + "loss": 4.1043, + "step": 330050 + }, + { + "epoch": 2.8417699724517904, + "grad_norm": 2.8125, + "learning_rate": 7.891971875783056e-05, + "loss": 4.4002, + "step": 330100 + }, + { + "epoch": 2.8422004132231407, + "grad_norm": 1.9140625, + "learning_rate": 7.889323629027045e-05, + "loss": 4.2002, + "step": 330150 + }, + { + "epoch": 2.8426308539944904, + "grad_norm": 3.046875, + "learning_rate": 7.88667553719014e-05, + "loss": 4.1802, + "step": 330200 + }, + { + "epoch": 2.8430612947658402, + "grad_norm": 3.4375, + "learning_rate": 7.884027600466698e-05, + "loss": 4.3297, + "step": 330250 + }, + { + "epoch": 2.84349173553719, + "grad_norm": 3.828125, + "learning_rate": 7.881379819051078e-05, + "loss": 4.3222, + "step": 330300 + }, + { + "epoch": 2.84392217630854, + "grad_norm": 4.25, + "learning_rate": 7.878732193137624e-05, + "loss": 4.2271, + "step": 330350 + }, + { + "epoch": 2.84435261707989, + "grad_norm": 3.90625, + "learning_rate": 7.87608472292066e-05, + "loss": 4.4208, + "step": 330400 + }, + { + "epoch": 2.84478305785124, + "grad_norm": 3.921875, + "learning_rate": 7.873437408594512e-05, + "loss": 4.3129, + "step": 330450 + }, + { + "epoch": 2.8452134986225897, + "grad_norm": 3.6875, + "learning_rate": 7.870790250353479e-05, + "loss": 3.6925, + "step": 330500 + }, + { + "epoch": 2.8456439393939394, + "grad_norm": 2.140625, + "learning_rate": 7.868143248391862e-05, + "loss": 4.0339, + "step": 330550 + }, + { + "epoch": 2.8460743801652892, + "grad_norm": 5.15625, + "learning_rate": 7.865496402903943e-05, + "loss": 4.3239, + "step": 330600 + }, + { + "epoch": 2.846504820936639, + "grad_norm": 2.453125, + "learning_rate": 7.862849714083995e-05, + "loss": 4.1416, + "step": 330650 + }, + { + "epoch": 2.846935261707989, + "grad_norm": 3.40625, + "learning_rate": 7.860203182126282e-05, + "loss": 4.4766, + "step": 330700 + }, + { + "epoch": 2.8473657024793386, + "grad_norm": 1.9765625, + "learning_rate": 7.857556807225051e-05, + "loss": 4.2611, + "step": 330750 + }, + { + "epoch": 2.847796143250689, + "grad_norm": 3.4375, + "learning_rate": 7.854910589574538e-05, + "loss": 4.422, + "step": 330800 + }, + { + "epoch": 2.8482265840220387, + "grad_norm": 2.765625, + "learning_rate": 7.852264529368976e-05, + "loss": 4.1691, + "step": 330850 + }, + { + "epoch": 2.8486570247933884, + "grad_norm": 1.578125, + "learning_rate": 7.849618626802573e-05, + "loss": 4.2793, + "step": 330900 + }, + { + "epoch": 2.8490874655647382, + "grad_norm": 4.1875, + "learning_rate": 7.846972882069542e-05, + "loss": 4.3258, + "step": 330950 + }, + { + "epoch": 2.849517906336088, + "grad_norm": 3.125, + "learning_rate": 7.844327295364063e-05, + "loss": 4.4516, + "step": 331000 + }, + { + "epoch": 2.8499483471074383, + "grad_norm": 3.546875, + "learning_rate": 7.841681866880324e-05, + "loss": 4.3668, + "step": 331050 + }, + { + "epoch": 2.850378787878788, + "grad_norm": 3.3125, + "learning_rate": 7.839036596812492e-05, + "loss": 4.2713, + "step": 331100 + }, + { + "epoch": 2.850809228650138, + "grad_norm": 7.0625, + "learning_rate": 7.836391485354724e-05, + "loss": 4.019, + "step": 331150 + }, + { + "epoch": 2.8512396694214877, + "grad_norm": 4.21875, + "learning_rate": 7.833746532701171e-05, + "loss": 4.4856, + "step": 331200 + }, + { + "epoch": 2.8516701101928374, + "grad_norm": 3.515625, + "learning_rate": 7.831101739045959e-05, + "loss": 3.9797, + "step": 331250 + }, + { + "epoch": 2.8521005509641872, + "grad_norm": 5.96875, + "learning_rate": 7.828457104583209e-05, + "loss": 4.4868, + "step": 331300 + }, + { + "epoch": 2.852530991735537, + "grad_norm": 2.046875, + "learning_rate": 7.825812629507037e-05, + "loss": 4.5017, + "step": 331350 + }, + { + "epoch": 2.852961432506887, + "grad_norm": 3.125, + "learning_rate": 7.823168314011542e-05, + "loss": 4.5807, + "step": 331400 + }, + { + "epoch": 2.853391873278237, + "grad_norm": 2.59375, + "learning_rate": 7.820524158290811e-05, + "loss": 4.3126, + "step": 331450 + }, + { + "epoch": 2.853822314049587, + "grad_norm": 3.109375, + "learning_rate": 7.817880162538912e-05, + "loss": 4.4338, + "step": 331500 + }, + { + "epoch": 2.8542527548209367, + "grad_norm": 1.34375, + "learning_rate": 7.815236326949919e-05, + "loss": 4.0954, + "step": 331550 + }, + { + "epoch": 2.8546831955922864, + "grad_norm": 4.25, + "learning_rate": 7.812592651717876e-05, + "loss": 4.3199, + "step": 331600 + }, + { + "epoch": 2.8551136363636362, + "grad_norm": 2.328125, + "learning_rate": 7.809949137036825e-05, + "loss": 4.1058, + "step": 331650 + }, + { + "epoch": 2.8555440771349865, + "grad_norm": 1.90625, + "learning_rate": 7.8073057831008e-05, + "loss": 4.442, + "step": 331700 + }, + { + "epoch": 2.8559745179063363, + "grad_norm": 1.34375, + "learning_rate": 7.80466259010381e-05, + "loss": 4.6728, + "step": 331750 + }, + { + "epoch": 2.856404958677686, + "grad_norm": 3.109375, + "learning_rate": 7.802019558239861e-05, + "loss": 3.9543, + "step": 331800 + }, + { + "epoch": 2.856835399449036, + "grad_norm": 2.53125, + "learning_rate": 7.799376687702949e-05, + "loss": 4.1872, + "step": 331850 + }, + { + "epoch": 2.8572658402203857, + "grad_norm": 4.71875, + "learning_rate": 7.796733978687054e-05, + "loss": 4.2554, + "step": 331900 + }, + { + "epoch": 2.8576962809917354, + "grad_norm": 3.40625, + "learning_rate": 7.794091431386141e-05, + "loss": 4.0566, + "step": 331950 + }, + { + "epoch": 2.8581267217630852, + "grad_norm": 2.390625, + "learning_rate": 7.791449045994177e-05, + "loss": 4.1505, + "step": 332000 + }, + { + "epoch": 2.858557162534435, + "grad_norm": 2.90625, + "learning_rate": 7.788806822705097e-05, + "loss": 4.206, + "step": 332050 + }, + { + "epoch": 2.8589876033057853, + "grad_norm": 2.421875, + "learning_rate": 7.78616476171284e-05, + "loss": 4.3296, + "step": 332100 + }, + { + "epoch": 2.859418044077135, + "grad_norm": 3.4375, + "learning_rate": 7.783522863211324e-05, + "loss": 4.6101, + "step": 332150 + }, + { + "epoch": 2.859848484848485, + "grad_norm": 5.28125, + "learning_rate": 7.780881127394464e-05, + "loss": 4.1777, + "step": 332200 + }, + { + "epoch": 2.8602789256198347, + "grad_norm": 2.484375, + "learning_rate": 7.778239554456156e-05, + "loss": 4.3672, + "step": 332250 + }, + { + "epoch": 2.8607093663911844, + "grad_norm": 3.28125, + "learning_rate": 7.775598144590281e-05, + "loss": 4.2635, + "step": 332300 + }, + { + "epoch": 2.8611398071625347, + "grad_norm": 3.515625, + "learning_rate": 7.772956897990715e-05, + "loss": 4.4765, + "step": 332350 + }, + { + "epoch": 2.8615702479338845, + "grad_norm": 3.8125, + "learning_rate": 7.770315814851324e-05, + "loss": 4.271, + "step": 332400 + }, + { + "epoch": 2.8620006887052343, + "grad_norm": 2.890625, + "learning_rate": 7.767674895365953e-05, + "loss": 4.1485, + "step": 332450 + }, + { + "epoch": 2.862431129476584, + "grad_norm": 3.0625, + "learning_rate": 7.765034139728446e-05, + "loss": 4.219, + "step": 332500 + }, + { + "epoch": 2.862861570247934, + "grad_norm": 2.734375, + "learning_rate": 7.762393548132619e-05, + "loss": 4.3381, + "step": 332550 + }, + { + "epoch": 2.8632920110192837, + "grad_norm": 4.625, + "learning_rate": 7.759753120772295e-05, + "loss": 4.2912, + "step": 332600 + }, + { + "epoch": 2.8637224517906334, + "grad_norm": 3.3125, + "learning_rate": 7.75711285784127e-05, + "loss": 4.348, + "step": 332650 + }, + { + "epoch": 2.8641528925619832, + "grad_norm": 2.34375, + "learning_rate": 7.754472759533334e-05, + "loss": 4.2607, + "step": 332700 + }, + { + "epoch": 2.8645833333333335, + "grad_norm": 3.546875, + "learning_rate": 7.751832826042271e-05, + "loss": 3.9201, + "step": 332750 + }, + { + "epoch": 2.8650137741046833, + "grad_norm": 3.296875, + "learning_rate": 7.749193057561839e-05, + "loss": 4.6225, + "step": 332800 + }, + { + "epoch": 2.865444214876033, + "grad_norm": 2.421875, + "learning_rate": 7.746553454285793e-05, + "loss": 4.6142, + "step": 332850 + }, + { + "epoch": 2.865874655647383, + "grad_norm": 4.28125, + "learning_rate": 7.743914016407875e-05, + "loss": 4.2471, + "step": 332900 + }, + { + "epoch": 2.8663050964187327, + "grad_norm": 2.0, + "learning_rate": 7.741274744121814e-05, + "loss": 4.7241, + "step": 332950 + }, + { + "epoch": 2.866735537190083, + "grad_norm": 2.4375, + "learning_rate": 7.73863563762133e-05, + "loss": 3.8558, + "step": 333000 + }, + { + "epoch": 2.866735537190083, + "eval_loss": 4.995693206787109, + "eval_runtime": 24.1386, + "eval_samples_per_second": 26.514, + "eval_steps_per_second": 13.257, + "eval_tts_loss": 7.401987731875897, + "step": 333000 + }, + { + "epoch": 2.8671659779614327, + "grad_norm": 2.140625, + "learning_rate": 7.735996697100121e-05, + "loss": 4.163, + "step": 333050 + }, + { + "epoch": 2.8675964187327825, + "grad_norm": 2.484375, + "learning_rate": 7.733357922751885e-05, + "loss": 4.6496, + "step": 333100 + }, + { + "epoch": 2.8680268595041323, + "grad_norm": 3.03125, + "learning_rate": 7.730719314770299e-05, + "loss": 4.5372, + "step": 333150 + }, + { + "epoch": 2.868457300275482, + "grad_norm": 4.5625, + "learning_rate": 7.728080873349032e-05, + "loss": 4.3275, + "step": 333200 + }, + { + "epoch": 2.868887741046832, + "grad_norm": 3.046875, + "learning_rate": 7.725442598681746e-05, + "loss": 4.3856, + "step": 333250 + }, + { + "epoch": 2.8693181818181817, + "grad_norm": 7.375, + "learning_rate": 7.722804490962078e-05, + "loss": 4.099, + "step": 333300 + }, + { + "epoch": 2.8697486225895315, + "grad_norm": 2.78125, + "learning_rate": 7.720166550383658e-05, + "loss": 4.1062, + "step": 333350 + }, + { + "epoch": 2.8701790633608817, + "grad_norm": 2.90625, + "learning_rate": 7.717528777140107e-05, + "loss": 4.503, + "step": 333400 + }, + { + "epoch": 2.8706095041322315, + "grad_norm": 4.65625, + "learning_rate": 7.714891171425036e-05, + "loss": 4.3076, + "step": 333450 + }, + { + "epoch": 2.8710399449035813, + "grad_norm": 4.3125, + "learning_rate": 7.712253733432039e-05, + "loss": 4.5337, + "step": 333500 + }, + { + "epoch": 2.871470385674931, + "grad_norm": 3.40625, + "learning_rate": 7.70961646335469e-05, + "loss": 4.3035, + "step": 333550 + }, + { + "epoch": 2.871900826446281, + "grad_norm": 3.5625, + "learning_rate": 7.706979361386565e-05, + "loss": 4.2263, + "step": 333600 + }, + { + "epoch": 2.872331267217631, + "grad_norm": 1.234375, + "learning_rate": 7.704342427721225e-05, + "loss": 4.1651, + "step": 333650 + }, + { + "epoch": 2.872761707988981, + "grad_norm": 2.953125, + "learning_rate": 7.701705662552209e-05, + "loss": 4.5041, + "step": 333700 + }, + { + "epoch": 2.8731921487603307, + "grad_norm": 3.78125, + "learning_rate": 7.699069066073053e-05, + "loss": 4.4416, + "step": 333750 + }, + { + "epoch": 2.8736225895316805, + "grad_norm": 2.140625, + "learning_rate": 7.696432638477281e-05, + "loss": 4.2717, + "step": 333800 + }, + { + "epoch": 2.8740530303030303, + "grad_norm": 4.59375, + "learning_rate": 7.693796379958391e-05, + "loss": 4.3637, + "step": 333850 + }, + { + "epoch": 2.87448347107438, + "grad_norm": 2.625, + "learning_rate": 7.691160290709886e-05, + "loss": 4.1766, + "step": 333900 + }, + { + "epoch": 2.87491391184573, + "grad_norm": 3.40625, + "learning_rate": 7.68852437092525e-05, + "loss": 4.4558, + "step": 333950 + }, + { + "epoch": 2.8753443526170797, + "grad_norm": 2.90625, + "learning_rate": 7.685888620797949e-05, + "loss": 4.2098, + "step": 334000 + }, + { + "epoch": 2.87577479338843, + "grad_norm": 2.140625, + "learning_rate": 7.683253040521451e-05, + "loss": 4.1785, + "step": 334050 + }, + { + "epoch": 2.8762052341597797, + "grad_norm": 4.40625, + "learning_rate": 7.680617630289189e-05, + "loss": 4.3321, + "step": 334100 + }, + { + "epoch": 2.8766356749311295, + "grad_norm": 2.65625, + "learning_rate": 7.677982390294605e-05, + "loss": 4.3175, + "step": 334150 + }, + { + "epoch": 2.8770661157024793, + "grad_norm": 2.96875, + "learning_rate": 7.675347320731116e-05, + "loss": 4.594, + "step": 334200 + }, + { + "epoch": 2.877496556473829, + "grad_norm": 2.5, + "learning_rate": 7.672712421792133e-05, + "loss": 4.4218, + "step": 334250 + }, + { + "epoch": 2.8779269972451793, + "grad_norm": 4.5, + "learning_rate": 7.670077693671052e-05, + "loss": 4.1439, + "step": 334300 + }, + { + "epoch": 2.878357438016529, + "grad_norm": 2.578125, + "learning_rate": 7.667443136561255e-05, + "loss": 4.44, + "step": 334350 + }, + { + "epoch": 2.878787878787879, + "grad_norm": 2.984375, + "learning_rate": 7.664808750656111e-05, + "loss": 4.25, + "step": 334400 + }, + { + "epoch": 2.8792183195592287, + "grad_norm": 2.515625, + "learning_rate": 7.662174536148982e-05, + "loss": 4.7782, + "step": 334450 + }, + { + "epoch": 2.8796487603305785, + "grad_norm": 2.203125, + "learning_rate": 7.65954049323321e-05, + "loss": 3.9941, + "step": 334500 + }, + { + "epoch": 2.8800792011019283, + "grad_norm": 2.0, + "learning_rate": 7.656906622102136e-05, + "loss": 3.7434, + "step": 334550 + }, + { + "epoch": 2.880509641873278, + "grad_norm": 2.40625, + "learning_rate": 7.654272922949069e-05, + "loss": 4.2841, + "step": 334600 + }, + { + "epoch": 2.880940082644628, + "grad_norm": 3.265625, + "learning_rate": 7.651639395967324e-05, + "loss": 4.3047, + "step": 334650 + }, + { + "epoch": 2.881370523415978, + "grad_norm": 3.921875, + "learning_rate": 7.649006041350194e-05, + "loss": 4.0306, + "step": 334700 + }, + { + "epoch": 2.881800964187328, + "grad_norm": 3.203125, + "learning_rate": 7.646372859290963e-05, + "loss": 4.4538, + "step": 334750 + }, + { + "epoch": 2.8822314049586777, + "grad_norm": 4.46875, + "learning_rate": 7.643739849982903e-05, + "loss": 4.3576, + "step": 334800 + }, + { + "epoch": 2.8826618457300275, + "grad_norm": 2.84375, + "learning_rate": 7.641107013619265e-05, + "loss": 4.5172, + "step": 334850 + }, + { + "epoch": 2.8830922865013773, + "grad_norm": 3.25, + "learning_rate": 7.638474350393296e-05, + "loss": 3.9946, + "step": 334900 + }, + { + "epoch": 2.8835227272727275, + "grad_norm": 4.96875, + "learning_rate": 7.635841860498229e-05, + "loss": 4.3149, + "step": 334950 + }, + { + "epoch": 2.8839531680440773, + "grad_norm": 2.78125, + "learning_rate": 7.633209544127286e-05, + "loss": 4.4252, + "step": 335000 + }, + { + "epoch": 2.884383608815427, + "grad_norm": 2.859375, + "learning_rate": 7.630577401473671e-05, + "loss": 4.2135, + "step": 335050 + }, + { + "epoch": 2.884814049586777, + "grad_norm": 3.03125, + "learning_rate": 7.627945432730572e-05, + "loss": 4.4764, + "step": 335100 + }, + { + "epoch": 2.8852444903581267, + "grad_norm": 3.609375, + "learning_rate": 7.625313638091179e-05, + "loss": 4.3044, + "step": 335150 + }, + { + "epoch": 2.8856749311294765, + "grad_norm": 4.6875, + "learning_rate": 7.622682017748651e-05, + "loss": 3.9791, + "step": 335200 + }, + { + "epoch": 2.8861053719008263, + "grad_norm": 1.859375, + "learning_rate": 7.62005057189615e-05, + "loss": 4.4244, + "step": 335250 + }, + { + "epoch": 2.886535812672176, + "grad_norm": 3.625, + "learning_rate": 7.617419300726819e-05, + "loss": 4.4314, + "step": 335300 + }, + { + "epoch": 2.8869662534435263, + "grad_norm": 3.34375, + "learning_rate": 7.614788204433784e-05, + "loss": 4.3055, + "step": 335350 + }, + { + "epoch": 2.887396694214876, + "grad_norm": 4.5, + "learning_rate": 7.612157283210159e-05, + "loss": 4.2885, + "step": 335400 + }, + { + "epoch": 2.887827134986226, + "grad_norm": 3.671875, + "learning_rate": 7.609526537249054e-05, + "loss": 4.5623, + "step": 335450 + }, + { + "epoch": 2.8882575757575757, + "grad_norm": 0.6484375, + "learning_rate": 7.60689596674356e-05, + "loss": 4.2664, + "step": 335500 + }, + { + "epoch": 2.8886880165289255, + "grad_norm": 2.84375, + "learning_rate": 7.60426557188675e-05, + "loss": 4.4225, + "step": 335550 + }, + { + "epoch": 2.8891184573002757, + "grad_norm": 3.421875, + "learning_rate": 7.601635352871696e-05, + "loss": 4.1482, + "step": 335600 + }, + { + "epoch": 2.8895488980716255, + "grad_norm": 3.484375, + "learning_rate": 7.599005309891445e-05, + "loss": 4.4473, + "step": 335650 + }, + { + "epoch": 2.8899793388429753, + "grad_norm": 2.75, + "learning_rate": 7.596375443139038e-05, + "loss": 4.4659, + "step": 335700 + }, + { + "epoch": 2.890409779614325, + "grad_norm": 1.5234375, + "learning_rate": 7.593745752807501e-05, + "loss": 4.1006, + "step": 335750 + }, + { + "epoch": 2.890840220385675, + "grad_norm": 2.3125, + "learning_rate": 7.591116239089851e-05, + "loss": 4.3912, + "step": 335800 + }, + { + "epoch": 2.8912706611570247, + "grad_norm": 3.546875, + "learning_rate": 7.588486902179088e-05, + "loss": 4.4834, + "step": 335850 + }, + { + "epoch": 2.8917011019283745, + "grad_norm": 2.765625, + "learning_rate": 7.585857742268194e-05, + "loss": 4.587, + "step": 335900 + }, + { + "epoch": 2.8921315426997243, + "grad_norm": 2.921875, + "learning_rate": 7.583228759550148e-05, + "loss": 4.0826, + "step": 335950 + }, + { + "epoch": 2.8925619834710745, + "grad_norm": 3.828125, + "learning_rate": 7.580599954217912e-05, + "loss": 4.5829, + "step": 336000 + }, + { + "epoch": 2.8925619834710745, + "eval_loss": 4.996245861053467, + "eval_runtime": 24.6638, + "eval_samples_per_second": 25.949, + "eval_steps_per_second": 12.974, + "eval_tts_loss": 7.448037665631593, + "step": 336000 + }, + { + "epoch": 2.8929924242424243, + "grad_norm": 1.4609375, + "learning_rate": 7.577971326464432e-05, + "loss": 4.3964, + "step": 336050 + }, + { + "epoch": 2.893422865013774, + "grad_norm": 2.03125, + "learning_rate": 7.57534287648265e-05, + "loss": 4.2171, + "step": 336100 + }, + { + "epoch": 2.893853305785124, + "grad_norm": 5.03125, + "learning_rate": 7.57271460446548e-05, + "loss": 4.2809, + "step": 336150 + }, + { + "epoch": 2.8942837465564737, + "grad_norm": 1.65625, + "learning_rate": 7.570086510605839e-05, + "loss": 4.2913, + "step": 336200 + }, + { + "epoch": 2.894714187327824, + "grad_norm": 5.25, + "learning_rate": 7.567458595096615e-05, + "loss": 4.3189, + "step": 336250 + }, + { + "epoch": 2.8951446280991737, + "grad_norm": 1.578125, + "learning_rate": 7.564830858130698e-05, + "loss": 4.2536, + "step": 336300 + }, + { + "epoch": 2.8955750688705235, + "grad_norm": 3.359375, + "learning_rate": 7.56220329990096e-05, + "loss": 4.2402, + "step": 336350 + }, + { + "epoch": 2.8960055096418733, + "grad_norm": 4.21875, + "learning_rate": 7.559575920600255e-05, + "loss": 4.3815, + "step": 336400 + }, + { + "epoch": 2.896435950413223, + "grad_norm": 1.25, + "learning_rate": 7.556948720421422e-05, + "loss": 4.6188, + "step": 336450 + }, + { + "epoch": 2.896866391184573, + "grad_norm": 3.40625, + "learning_rate": 7.554321699557299e-05, + "loss": 4.3958, + "step": 336500 + }, + { + "epoch": 2.8972968319559227, + "grad_norm": 1.9375, + "learning_rate": 7.551694858200701e-05, + "loss": 4.5251, + "step": 336550 + }, + { + "epoch": 2.8977272727272725, + "grad_norm": 2.6875, + "learning_rate": 7.549068196544436e-05, + "loss": 4.4589, + "step": 336600 + }, + { + "epoch": 2.8981577134986227, + "grad_norm": 1.6171875, + "learning_rate": 7.546441714781287e-05, + "loss": 4.2934, + "step": 336650 + }, + { + "epoch": 2.8985881542699725, + "grad_norm": 4.3125, + "learning_rate": 7.54381541310404e-05, + "loss": 4.5141, + "step": 336700 + }, + { + "epoch": 2.8990185950413223, + "grad_norm": 3.375, + "learning_rate": 7.541189291705455e-05, + "loss": 4.6853, + "step": 336750 + }, + { + "epoch": 2.899449035812672, + "grad_norm": 2.390625, + "learning_rate": 7.538563350778285e-05, + "loss": 4.0215, + "step": 336800 + }, + { + "epoch": 2.899879476584022, + "grad_norm": 3.171875, + "learning_rate": 7.535937590515274e-05, + "loss": 4.3871, + "step": 336850 + }, + { + "epoch": 2.900309917355372, + "grad_norm": 2.46875, + "learning_rate": 7.533312011109141e-05, + "loss": 3.9731, + "step": 336900 + }, + { + "epoch": 2.900740358126722, + "grad_norm": 1.859375, + "learning_rate": 7.530686612752596e-05, + "loss": 4.3554, + "step": 336950 + }, + { + "epoch": 2.9011707988980717, + "grad_norm": 1.203125, + "learning_rate": 7.52806139563834e-05, + "loss": 4.1233, + "step": 337000 + }, + { + "epoch": 2.9016012396694215, + "grad_norm": 3.46875, + "learning_rate": 7.525436359959062e-05, + "loss": 4.4772, + "step": 337050 + }, + { + "epoch": 2.9020316804407713, + "grad_norm": 4.375, + "learning_rate": 7.522811505907433e-05, + "loss": 4.1946, + "step": 337100 + }, + { + "epoch": 2.902462121212121, + "grad_norm": 4.1875, + "learning_rate": 7.520186833676106e-05, + "loss": 4.287, + "step": 337150 + }, + { + "epoch": 2.902892561983471, + "grad_norm": 3.046875, + "learning_rate": 7.517562343457729e-05, + "loss": 3.7122, + "step": 337200 + }, + { + "epoch": 2.9033230027548207, + "grad_norm": 5.75, + "learning_rate": 7.514938035444939e-05, + "loss": 4.0659, + "step": 337250 + }, + { + "epoch": 2.903753443526171, + "grad_norm": 2.59375, + "learning_rate": 7.512313909830346e-05, + "loss": 4.3358, + "step": 337300 + }, + { + "epoch": 2.9041838842975207, + "grad_norm": 3.984375, + "learning_rate": 7.509689966806562e-05, + "loss": 4.3521, + "step": 337350 + }, + { + "epoch": 2.9046143250688705, + "grad_norm": 3.140625, + "learning_rate": 7.50706620656618e-05, + "loss": 4.6674, + "step": 337400 + }, + { + "epoch": 2.9050447658402203, + "grad_norm": 2.4375, + "learning_rate": 7.504442629301769e-05, + "loss": 4.2609, + "step": 337450 + }, + { + "epoch": 2.90547520661157, + "grad_norm": 1.8671875, + "learning_rate": 7.5018192352059e-05, + "loss": 4.6744, + "step": 337500 + }, + { + "epoch": 2.9059056473829203, + "grad_norm": 5.375, + "learning_rate": 7.499196024471127e-05, + "loss": 4.5588, + "step": 337550 + }, + { + "epoch": 2.90633608815427, + "grad_norm": 3.484375, + "learning_rate": 7.496572997289983e-05, + "loss": 4.3246, + "step": 337600 + }, + { + "epoch": 2.90676652892562, + "grad_norm": 1.765625, + "learning_rate": 7.493950153854998e-05, + "loss": 4.2777, + "step": 337650 + }, + { + "epoch": 2.9071969696969697, + "grad_norm": 3.125, + "learning_rate": 7.491327494358677e-05, + "loss": 4.1976, + "step": 337700 + }, + { + "epoch": 2.9076274104683195, + "grad_norm": 3.234375, + "learning_rate": 7.488705018993522e-05, + "loss": 4.2998, + "step": 337750 + }, + { + "epoch": 2.9080578512396693, + "grad_norm": 2.34375, + "learning_rate": 7.486082727952013e-05, + "loss": 4.076, + "step": 337800 + }, + { + "epoch": 2.908488292011019, + "grad_norm": 2.453125, + "learning_rate": 7.483460621426624e-05, + "loss": 4.347, + "step": 337850 + }, + { + "epoch": 2.908918732782369, + "grad_norm": 3.96875, + "learning_rate": 7.480838699609812e-05, + "loss": 4.5208, + "step": 337900 + }, + { + "epoch": 2.909349173553719, + "grad_norm": 3.15625, + "learning_rate": 7.47821696269402e-05, + "loss": 4.348, + "step": 337950 + }, + { + "epoch": 2.909779614325069, + "grad_norm": 2.84375, + "learning_rate": 7.475595410871674e-05, + "loss": 4.4903, + "step": 338000 + }, + { + "epoch": 2.9102100550964187, + "grad_norm": 4.09375, + "learning_rate": 7.472974044335196e-05, + "loss": 4.4813, + "step": 338050 + }, + { + "epoch": 2.9106404958677685, + "grad_norm": 1.359375, + "learning_rate": 7.470352863276983e-05, + "loss": 4.2015, + "step": 338100 + }, + { + "epoch": 2.9110709366391183, + "grad_norm": 1.4453125, + "learning_rate": 7.467731867889434e-05, + "loss": 4.3932, + "step": 338150 + }, + { + "epoch": 2.9115013774104685, + "grad_norm": 2.375, + "learning_rate": 7.46511105836491e-05, + "loss": 4.1417, + "step": 338200 + }, + { + "epoch": 2.9119318181818183, + "grad_norm": 4.78125, + "learning_rate": 7.462490434895786e-05, + "loss": 3.9601, + "step": 338250 + }, + { + "epoch": 2.912362258953168, + "grad_norm": 1.109375, + "learning_rate": 7.4598699976744e-05, + "loss": 4.3112, + "step": 338300 + }, + { + "epoch": 2.912792699724518, + "grad_norm": 2.265625, + "learning_rate": 7.457249746893091e-05, + "loss": 4.291, + "step": 338350 + }, + { + "epoch": 2.9132231404958677, + "grad_norm": 2.5625, + "learning_rate": 7.454629682744186e-05, + "loss": 4.316, + "step": 338400 + }, + { + "epoch": 2.9136535812672175, + "grad_norm": 3.171875, + "learning_rate": 7.452009805419981e-05, + "loss": 4.4949, + "step": 338450 + }, + { + "epoch": 2.9140840220385673, + "grad_norm": 3.0625, + "learning_rate": 7.449390115112774e-05, + "loss": 4.0673, + "step": 338500 + }, + { + "epoch": 2.914514462809917, + "grad_norm": 2.96875, + "learning_rate": 7.446770612014841e-05, + "loss": 4.5295, + "step": 338550 + }, + { + "epoch": 2.9149449035812673, + "grad_norm": 1.9296875, + "learning_rate": 7.444151296318458e-05, + "loss": 4.0253, + "step": 338600 + }, + { + "epoch": 2.915375344352617, + "grad_norm": 7.375, + "learning_rate": 7.441532168215872e-05, + "loss": 4.2479, + "step": 338650 + }, + { + "epoch": 2.915805785123967, + "grad_norm": 5.34375, + "learning_rate": 7.438913227899316e-05, + "loss": 4.3128, + "step": 338700 + }, + { + "epoch": 2.9162362258953167, + "grad_norm": 3.921875, + "learning_rate": 7.43629447556102e-05, + "loss": 4.5874, + "step": 338750 + }, + { + "epoch": 2.9166666666666665, + "grad_norm": 2.625, + "learning_rate": 7.433675911393193e-05, + "loss": 4.0499, + "step": 338800 + }, + { + "epoch": 2.9170971074380168, + "grad_norm": 2.03125, + "learning_rate": 7.43105753558803e-05, + "loss": 4.124, + "step": 338850 + }, + { + "epoch": 2.9175275482093666, + "grad_norm": 4.5625, + "learning_rate": 7.428439348337721e-05, + "loss": 4.111, + "step": 338900 + }, + { + "epoch": 2.9179579889807163, + "grad_norm": 5.6875, + "learning_rate": 7.425821349834431e-05, + "loss": 4.3855, + "step": 338950 + }, + { + "epoch": 2.918388429752066, + "grad_norm": 2.90625, + "learning_rate": 7.423203540270311e-05, + "loss": 4.3686, + "step": 339000 + }, + { + "epoch": 2.918388429752066, + "eval_loss": 4.994027614593506, + "eval_runtime": 23.9056, + "eval_samples_per_second": 26.772, + "eval_steps_per_second": 13.386, + "eval_tts_loss": 7.426867337462221, + "step": 339000 + }, + { + "epoch": 2.918818870523416, + "grad_norm": 5.1875, + "learning_rate": 7.42058591983751e-05, + "loss": 4.6089, + "step": 339050 + }, + { + "epoch": 2.9192493112947657, + "grad_norm": 3.078125, + "learning_rate": 7.417968488728153e-05, + "loss": 4.0212, + "step": 339100 + }, + { + "epoch": 2.9196797520661155, + "grad_norm": 3.28125, + "learning_rate": 7.415351247134351e-05, + "loss": 4.0897, + "step": 339150 + }, + { + "epoch": 2.9201101928374653, + "grad_norm": 3.625, + "learning_rate": 7.412734195248212e-05, + "loss": 4.6838, + "step": 339200 + }, + { + "epoch": 2.9205406336088156, + "grad_norm": 1.75, + "learning_rate": 7.410117333261812e-05, + "loss": 4.1019, + "step": 339250 + }, + { + "epoch": 2.9209710743801653, + "grad_norm": 4.6875, + "learning_rate": 7.407500661367228e-05, + "loss": 4.2213, + "step": 339300 + }, + { + "epoch": 2.921401515151515, + "grad_norm": 4.28125, + "learning_rate": 7.404884179756517e-05, + "loss": 4.1974, + "step": 339350 + }, + { + "epoch": 2.921831955922865, + "grad_norm": 1.46875, + "learning_rate": 7.402267888621725e-05, + "loss": 4.2541, + "step": 339400 + }, + { + "epoch": 2.9222623966942147, + "grad_norm": 2.46875, + "learning_rate": 7.399651788154882e-05, + "loss": 4.234, + "step": 339450 + }, + { + "epoch": 2.922692837465565, + "grad_norm": 2.15625, + "learning_rate": 7.397035878548002e-05, + "loss": 4.2452, + "step": 339500 + }, + { + "epoch": 2.9231232782369148, + "grad_norm": 2.890625, + "learning_rate": 7.394420159993085e-05, + "loss": 4.6126, + "step": 339550 + }, + { + "epoch": 2.9235537190082646, + "grad_norm": 3.59375, + "learning_rate": 7.391804632682126e-05, + "loss": 4.3926, + "step": 339600 + }, + { + "epoch": 2.9239841597796143, + "grad_norm": 1.8515625, + "learning_rate": 7.389189296807093e-05, + "loss": 4.395, + "step": 339650 + }, + { + "epoch": 2.924414600550964, + "grad_norm": 3.09375, + "learning_rate": 7.386574152559952e-05, + "loss": 4.0477, + "step": 339700 + }, + { + "epoch": 2.924845041322314, + "grad_norm": 2.765625, + "learning_rate": 7.38395920013264e-05, + "loss": 4.1257, + "step": 339750 + }, + { + "epoch": 2.9252754820936637, + "grad_norm": 2.6875, + "learning_rate": 7.381344439717098e-05, + "loss": 4.0482, + "step": 339800 + }, + { + "epoch": 2.9257059228650135, + "grad_norm": 4.8125, + "learning_rate": 7.378729871505237e-05, + "loss": 4.4351, + "step": 339850 + }, + { + "epoch": 2.9261363636363638, + "grad_norm": 4.25, + "learning_rate": 7.376115495688965e-05, + "loss": 4.7657, + "step": 339900 + }, + { + "epoch": 2.9265668044077136, + "grad_norm": 2.9375, + "learning_rate": 7.373501312460173e-05, + "loss": 4.4889, + "step": 339950 + }, + { + "epoch": 2.9269972451790633, + "grad_norm": 3.265625, + "learning_rate": 7.370887322010731e-05, + "loss": 4.2453, + "step": 340000 + }, + { + "epoch": 2.927427685950413, + "grad_norm": 3.34375, + "learning_rate": 7.368273524532502e-05, + "loss": 4.4345, + "step": 340050 + }, + { + "epoch": 2.927858126721763, + "grad_norm": 3.640625, + "learning_rate": 7.365659920217335e-05, + "loss": 4.2598, + "step": 340100 + }, + { + "epoch": 2.928288567493113, + "grad_norm": 1.6484375, + "learning_rate": 7.363046509257061e-05, + "loss": 3.9422, + "step": 340150 + }, + { + "epoch": 2.928719008264463, + "grad_norm": 5.5, + "learning_rate": 7.360433291843503e-05, + "loss": 4.3143, + "step": 340200 + }, + { + "epoch": 2.9291494490358128, + "grad_norm": 3.578125, + "learning_rate": 7.357820268168458e-05, + "loss": 4.3546, + "step": 340250 + }, + { + "epoch": 2.9295798898071626, + "grad_norm": 3.515625, + "learning_rate": 7.355207438423723e-05, + "loss": 4.3261, + "step": 340300 + }, + { + "epoch": 2.9300103305785123, + "grad_norm": 3.90625, + "learning_rate": 7.35259480280107e-05, + "loss": 4.3995, + "step": 340350 + }, + { + "epoch": 2.930440771349862, + "grad_norm": 1.8125, + "learning_rate": 7.349982361492262e-05, + "loss": 4.3694, + "step": 340400 + }, + { + "epoch": 2.930871212121212, + "grad_norm": 6.625, + "learning_rate": 7.347370114689051e-05, + "loss": 4.585, + "step": 340450 + }, + { + "epoch": 2.9313016528925617, + "grad_norm": 3.5625, + "learning_rate": 7.344758062583165e-05, + "loss": 4.4143, + "step": 340500 + }, + { + "epoch": 2.931732093663912, + "grad_norm": 2.375, + "learning_rate": 7.342146205366321e-05, + "loss": 4.4472, + "step": 340550 + }, + { + "epoch": 2.9321625344352618, + "grad_norm": 3.734375, + "learning_rate": 7.339534543230228e-05, + "loss": 4.6417, + "step": 340600 + }, + { + "epoch": 2.9325929752066116, + "grad_norm": 4.1875, + "learning_rate": 7.336923076366577e-05, + "loss": 4.1988, + "step": 340650 + }, + { + "epoch": 2.9330234159779613, + "grad_norm": 3.109375, + "learning_rate": 7.334311804967044e-05, + "loss": 4.5914, + "step": 340700 + }, + { + "epoch": 2.933453856749311, + "grad_norm": 4.625, + "learning_rate": 7.331700729223285e-05, + "loss": 4.2424, + "step": 340750 + }, + { + "epoch": 2.9338842975206614, + "grad_norm": 3.796875, + "learning_rate": 7.329089849326952e-05, + "loss": 4.4046, + "step": 340800 + }, + { + "epoch": 2.934314738292011, + "grad_norm": 3.390625, + "learning_rate": 7.32647916546968e-05, + "loss": 4.1883, + "step": 340850 + }, + { + "epoch": 2.934745179063361, + "grad_norm": 3.78125, + "learning_rate": 7.323868677843082e-05, + "loss": 4.3866, + "step": 340900 + }, + { + "epoch": 2.9351756198347108, + "grad_norm": 2.78125, + "learning_rate": 7.32125838663877e-05, + "loss": 4.2655, + "step": 340950 + }, + { + "epoch": 2.9356060606060606, + "grad_norm": 4.03125, + "learning_rate": 7.318648292048327e-05, + "loss": 4.1353, + "step": 341000 + }, + { + "epoch": 2.9360365013774103, + "grad_norm": 4.6875, + "learning_rate": 7.316038394263327e-05, + "loss": 4.0012, + "step": 341050 + }, + { + "epoch": 2.93646694214876, + "grad_norm": 4.34375, + "learning_rate": 7.313428693475336e-05, + "loss": 4.4765, + "step": 341100 + }, + { + "epoch": 2.93689738292011, + "grad_norm": 4.4375, + "learning_rate": 7.310819189875899e-05, + "loss": 4.46, + "step": 341150 + }, + { + "epoch": 2.93732782369146, + "grad_norm": 1.7578125, + "learning_rate": 7.308209883656547e-05, + "loss": 4.4946, + "step": 341200 + }, + { + "epoch": 2.93775826446281, + "grad_norm": 1.5546875, + "learning_rate": 7.3056007750088e-05, + "loss": 4.5813, + "step": 341250 + }, + { + "epoch": 2.9381887052341598, + "grad_norm": 3.328125, + "learning_rate": 7.302991864124158e-05, + "loss": 4.4975, + "step": 341300 + }, + { + "epoch": 2.9386191460055096, + "grad_norm": 1.109375, + "learning_rate": 7.30038315119411e-05, + "loss": 4.2773, + "step": 341350 + }, + { + "epoch": 2.9390495867768593, + "grad_norm": 3.875, + "learning_rate": 7.297774636410129e-05, + "loss": 4.102, + "step": 341400 + }, + { + "epoch": 2.9394800275482096, + "grad_norm": 4.5625, + "learning_rate": 7.295166319963676e-05, + "loss": 4.309, + "step": 341450 + }, + { + "epoch": 2.9399104683195594, + "grad_norm": 2.59375, + "learning_rate": 7.292558202046197e-05, + "loss": 4.0754, + "step": 341500 + }, + { + "epoch": 2.940340909090909, + "grad_norm": 6.59375, + "learning_rate": 7.289950282849123e-05, + "loss": 4.4291, + "step": 341550 + }, + { + "epoch": 2.940771349862259, + "grad_norm": 3.75, + "learning_rate": 7.287342562563863e-05, + "loss": 4.4011, + "step": 341600 + }, + { + "epoch": 2.9412017906336088, + "grad_norm": 6.21875, + "learning_rate": 7.284735041381824e-05, + "loss": 4.5579, + "step": 341650 + }, + { + "epoch": 2.9416322314049586, + "grad_norm": 4.875, + "learning_rate": 7.28212771949439e-05, + "loss": 4.4128, + "step": 341700 + }, + { + "epoch": 2.9420626721763083, + "grad_norm": 2.609375, + "learning_rate": 7.279520597092938e-05, + "loss": 4.4299, + "step": 341750 + }, + { + "epoch": 2.942493112947658, + "grad_norm": 4.6875, + "learning_rate": 7.276913674368817e-05, + "loss": 4.401, + "step": 341800 + }, + { + "epoch": 2.9429235537190084, + "grad_norm": 1.953125, + "learning_rate": 7.274306951513374e-05, + "loss": 4.4946, + "step": 341850 + }, + { + "epoch": 2.943353994490358, + "grad_norm": 3.4375, + "learning_rate": 7.271700428717936e-05, + "loss": 4.6358, + "step": 341900 + }, + { + "epoch": 2.943784435261708, + "grad_norm": 3.875, + "learning_rate": 7.269094106173815e-05, + "loss": 4.2043, + "step": 341950 + }, + { + "epoch": 2.9442148760330578, + "grad_norm": 6.28125, + "learning_rate": 7.266487984072315e-05, + "loss": 4.0789, + "step": 342000 + }, + { + "epoch": 2.9442148760330578, + "eval_loss": 4.994072914123535, + "eval_runtime": 23.8197, + "eval_samples_per_second": 26.869, + "eval_steps_per_second": 13.434, + "eval_tts_loss": 7.446218288815809, + "step": 342000 + }, + { + "epoch": 2.9446453168044076, + "grad_norm": 3.1875, + "learning_rate": 7.263882062604714e-05, + "loss": 4.4023, + "step": 342050 + }, + { + "epoch": 2.945075757575758, + "grad_norm": 2.71875, + "learning_rate": 7.261276341962282e-05, + "loss": 4.4711, + "step": 342100 + }, + { + "epoch": 2.9455061983471076, + "grad_norm": 4.34375, + "learning_rate": 7.258670822336273e-05, + "loss": 4.3143, + "step": 342150 + }, + { + "epoch": 2.9459366391184574, + "grad_norm": 2.828125, + "learning_rate": 7.256065503917928e-05, + "loss": 4.4861, + "step": 342200 + }, + { + "epoch": 2.946367079889807, + "grad_norm": 8.0625, + "learning_rate": 7.253460386898476e-05, + "loss": 4.478, + "step": 342250 + }, + { + "epoch": 2.946797520661157, + "grad_norm": 4.0, + "learning_rate": 7.250855471469115e-05, + "loss": 4.476, + "step": 342300 + }, + { + "epoch": 2.9472279614325068, + "grad_norm": 1.515625, + "learning_rate": 7.248250757821053e-05, + "loss": 4.4263, + "step": 342350 + }, + { + "epoch": 2.9476584022038566, + "grad_norm": 2.5625, + "learning_rate": 7.245646246145461e-05, + "loss": 4.5628, + "step": 342400 + }, + { + "epoch": 2.9480888429752063, + "grad_norm": 1.9765625, + "learning_rate": 7.243041936633508e-05, + "loss": 4.1868, + "step": 342450 + }, + { + "epoch": 2.9485192837465566, + "grad_norm": 3.5625, + "learning_rate": 7.24043782947635e-05, + "loss": 4.2413, + "step": 342500 + }, + { + "epoch": 2.9489497245179064, + "grad_norm": 4.34375, + "learning_rate": 7.237833924865116e-05, + "loss": 4.2355, + "step": 342550 + }, + { + "epoch": 2.949380165289256, + "grad_norm": 4.5, + "learning_rate": 7.235230222990926e-05, + "loss": 4.2631, + "step": 342600 + }, + { + "epoch": 2.949810606060606, + "grad_norm": 3.671875, + "learning_rate": 7.23262672404489e-05, + "loss": 3.7034, + "step": 342650 + }, + { + "epoch": 2.9502410468319558, + "grad_norm": 2.796875, + "learning_rate": 7.2300234282181e-05, + "loss": 4.3717, + "step": 342700 + }, + { + "epoch": 2.950671487603306, + "grad_norm": 1.9140625, + "learning_rate": 7.227420335701632e-05, + "loss": 4.2489, + "step": 342750 + }, + { + "epoch": 2.951101928374656, + "grad_norm": 2.703125, + "learning_rate": 7.224817446686542e-05, + "loss": 4.4818, + "step": 342800 + }, + { + "epoch": 2.9515323691460056, + "grad_norm": 2.46875, + "learning_rate": 7.222214761363882e-05, + "loss": 4.1592, + "step": 342850 + }, + { + "epoch": 2.9519628099173554, + "grad_norm": 3.765625, + "learning_rate": 7.219612279924683e-05, + "loss": 4.5082, + "step": 342900 + }, + { + "epoch": 2.952393250688705, + "grad_norm": 1.703125, + "learning_rate": 7.217010002559959e-05, + "loss": 4.38, + "step": 342950 + }, + { + "epoch": 2.952823691460055, + "grad_norm": 2.0, + "learning_rate": 7.214407929460714e-05, + "loss": 4.2772, + "step": 343000 + }, + { + "epoch": 2.9532541322314048, + "grad_norm": 4.125, + "learning_rate": 7.211806060817934e-05, + "loss": 4.2779, + "step": 343050 + }, + { + "epoch": 2.9536845730027546, + "grad_norm": 3.234375, + "learning_rate": 7.209204396822591e-05, + "loss": 4.096, + "step": 343100 + }, + { + "epoch": 2.954115013774105, + "grad_norm": 3.890625, + "learning_rate": 7.206602937665638e-05, + "loss": 4.2168, + "step": 343150 + }, + { + "epoch": 2.9545454545454546, + "grad_norm": 2.625, + "learning_rate": 7.20400168353802e-05, + "loss": 4.2189, + "step": 343200 + }, + { + "epoch": 2.9549758953168044, + "grad_norm": 4.15625, + "learning_rate": 7.201400634630662e-05, + "loss": 4.3709, + "step": 343250 + }, + { + "epoch": 2.955406336088154, + "grad_norm": 3.90625, + "learning_rate": 7.198799791134481e-05, + "loss": 4.6831, + "step": 343300 + }, + { + "epoch": 2.955836776859504, + "grad_norm": 4.0625, + "learning_rate": 7.196199153240362e-05, + "loss": 4.3318, + "step": 343350 + }, + { + "epoch": 2.956267217630854, + "grad_norm": 2.3125, + "learning_rate": 7.193598721139196e-05, + "loss": 4.4729, + "step": 343400 + }, + { + "epoch": 2.956697658402204, + "grad_norm": 3.015625, + "learning_rate": 7.190998495021845e-05, + "loss": 4.5371, + "step": 343450 + }, + { + "epoch": 2.957128099173554, + "grad_norm": 2.109375, + "learning_rate": 7.188398475079159e-05, + "loss": 4.4545, + "step": 343500 + }, + { + "epoch": 2.9575585399449036, + "grad_norm": 4.0625, + "learning_rate": 7.185798661501981e-05, + "loss": 4.5769, + "step": 343550 + }, + { + "epoch": 2.9579889807162534, + "grad_norm": 4.5, + "learning_rate": 7.183199054481122e-05, + "loss": 4.2668, + "step": 343600 + }, + { + "epoch": 2.958419421487603, + "grad_norm": 4.09375, + "learning_rate": 7.180599654207391e-05, + "loss": 4.313, + "step": 343650 + }, + { + "epoch": 2.958849862258953, + "grad_norm": 2.390625, + "learning_rate": 7.178000460871583e-05, + "loss": 4.3008, + "step": 343700 + }, + { + "epoch": 2.9592803030303028, + "grad_norm": 3.375, + "learning_rate": 7.175401474664467e-05, + "loss": 4.3496, + "step": 343750 + }, + { + "epoch": 2.959710743801653, + "grad_norm": 4.59375, + "learning_rate": 7.172802695776808e-05, + "loss": 4.4027, + "step": 343800 + }, + { + "epoch": 2.960141184573003, + "grad_norm": 3.03125, + "learning_rate": 7.170204124399345e-05, + "loss": 4.5014, + "step": 343850 + }, + { + "epoch": 2.9605716253443526, + "grad_norm": 3.25, + "learning_rate": 7.167605760722813e-05, + "loss": 4.5292, + "step": 343900 + }, + { + "epoch": 2.9610020661157024, + "grad_norm": 3.46875, + "learning_rate": 7.165007604937922e-05, + "loss": 4.5672, + "step": 343950 + }, + { + "epoch": 2.9614325068870526, + "grad_norm": 2.3125, + "learning_rate": 7.162409657235371e-05, + "loss": 4.3536, + "step": 344000 + }, + { + "epoch": 2.9618629476584024, + "grad_norm": 2.546875, + "learning_rate": 7.159811917805854e-05, + "loss": 4.3316, + "step": 344050 + }, + { + "epoch": 2.962293388429752, + "grad_norm": 1.71875, + "learning_rate": 7.157214386840026e-05, + "loss": 4.0619, + "step": 344100 + }, + { + "epoch": 2.962723829201102, + "grad_norm": 3.75, + "learning_rate": 7.154617064528545e-05, + "loss": 4.8132, + "step": 344150 + }, + { + "epoch": 2.963154269972452, + "grad_norm": 3.65625, + "learning_rate": 7.15201995106205e-05, + "loss": 4.3088, + "step": 344200 + }, + { + "epoch": 2.9635847107438016, + "grad_norm": 3.4375, + "learning_rate": 7.149423046631163e-05, + "loss": 4.1888, + "step": 344250 + }, + { + "epoch": 2.9640151515151514, + "grad_norm": 1.6796875, + "learning_rate": 7.146826351426496e-05, + "loss": 4.3703, + "step": 344300 + }, + { + "epoch": 2.964445592286501, + "grad_norm": 3.71875, + "learning_rate": 7.14422986563863e-05, + "loss": 4.1878, + "step": 344350 + }, + { + "epoch": 2.964876033057851, + "grad_norm": 3.140625, + "learning_rate": 7.141633589458147e-05, + "loss": 4.5171, + "step": 344400 + }, + { + "epoch": 2.965306473829201, + "grad_norm": 3.21875, + "learning_rate": 7.139037523075614e-05, + "loss": 4.3684, + "step": 344450 + }, + { + "epoch": 2.965736914600551, + "grad_norm": 3.328125, + "learning_rate": 7.136441666681566e-05, + "loss": 4.2267, + "step": 344500 + }, + { + "epoch": 2.966167355371901, + "grad_norm": 3.234375, + "learning_rate": 7.133846020466543e-05, + "loss": 4.395, + "step": 344550 + }, + { + "epoch": 2.9665977961432506, + "grad_norm": 3.28125, + "learning_rate": 7.131250584621054e-05, + "loss": 4.2683, + "step": 344600 + }, + { + "epoch": 2.967028236914601, + "grad_norm": 2.796875, + "learning_rate": 7.128655359335599e-05, + "loss": 4.5993, + "step": 344650 + }, + { + "epoch": 2.9674586776859506, + "grad_norm": 3.21875, + "learning_rate": 7.12606034480066e-05, + "loss": 4.2759, + "step": 344700 + }, + { + "epoch": 2.9678891184573004, + "grad_norm": 2.890625, + "learning_rate": 7.123465541206714e-05, + "loss": 3.9885, + "step": 344750 + }, + { + "epoch": 2.96831955922865, + "grad_norm": 2.59375, + "learning_rate": 7.120870948744202e-05, + "loss": 4.4197, + "step": 344800 + }, + { + "epoch": 2.96875, + "grad_norm": 2.984375, + "learning_rate": 7.118276567603574e-05, + "loss": 4.2508, + "step": 344850 + }, + { + "epoch": 2.96918044077135, + "grad_norm": 3.609375, + "learning_rate": 7.115682397975242e-05, + "loss": 4.3649, + "step": 344900 + }, + { + "epoch": 2.9696108815426996, + "grad_norm": 3.25, + "learning_rate": 7.113088440049617e-05, + "loss": 4.3533, + "step": 344950 + }, + { + "epoch": 2.9700413223140494, + "grad_norm": 2.703125, + "learning_rate": 7.110494694017089e-05, + "loss": 4.5324, + "step": 345000 + }, + { + "epoch": 2.9700413223140494, + "eval_loss": 4.992117404937744, + "eval_runtime": 24.5137, + "eval_samples_per_second": 26.108, + "eval_steps_per_second": 13.054, + "eval_tts_loss": 7.4279264540100876, + "step": 345000 + }, + { + "epoch": 2.970471763085399, + "grad_norm": 3.640625, + "learning_rate": 7.107901160068032e-05, + "loss": 4.0649, + "step": 345050 + }, + { + "epoch": 2.9709022038567494, + "grad_norm": 3.703125, + "learning_rate": 7.105307838392812e-05, + "loss": 4.1369, + "step": 345100 + }, + { + "epoch": 2.971332644628099, + "grad_norm": 4.5625, + "learning_rate": 7.102714729181767e-05, + "loss": 4.4068, + "step": 345150 + }, + { + "epoch": 2.971763085399449, + "grad_norm": 1.96875, + "learning_rate": 7.100121832625224e-05, + "loss": 4.3105, + "step": 345200 + }, + { + "epoch": 2.972193526170799, + "grad_norm": 1.5, + "learning_rate": 7.097529148913503e-05, + "loss": 4.1333, + "step": 345250 + }, + { + "epoch": 2.972623966942149, + "grad_norm": 2.71875, + "learning_rate": 7.094936678236894e-05, + "loss": 4.4415, + "step": 345300 + }, + { + "epoch": 2.973054407713499, + "grad_norm": 3.328125, + "learning_rate": 7.092344420785686e-05, + "loss": 4.3307, + "step": 345350 + }, + { + "epoch": 2.9734848484848486, + "grad_norm": 3.65625, + "learning_rate": 7.089752376750138e-05, + "loss": 4.5526, + "step": 345400 + }, + { + "epoch": 2.9739152892561984, + "grad_norm": 3.0, + "learning_rate": 7.087160546320506e-05, + "loss": 4.5478, + "step": 345450 + }, + { + "epoch": 2.974345730027548, + "grad_norm": 4.6875, + "learning_rate": 7.084568929687022e-05, + "loss": 3.944, + "step": 345500 + }, + { + "epoch": 2.974776170798898, + "grad_norm": 1.765625, + "learning_rate": 7.081977527039905e-05, + "loss": 4.3547, + "step": 345550 + }, + { + "epoch": 2.975206611570248, + "grad_norm": 3.3125, + "learning_rate": 7.079386338569363e-05, + "loss": 4.3263, + "step": 345600 + }, + { + "epoch": 2.9756370523415976, + "grad_norm": 2.171875, + "learning_rate": 7.076795364465578e-05, + "loss": 4.5249, + "step": 345650 + }, + { + "epoch": 2.9760674931129474, + "grad_norm": 3.109375, + "learning_rate": 7.074204604918722e-05, + "loss": 4.5851, + "step": 345700 + }, + { + "epoch": 2.9764979338842976, + "grad_norm": 2.9375, + "learning_rate": 7.071614060118954e-05, + "loss": 4.3908, + "step": 345750 + }, + { + "epoch": 2.9769283746556474, + "grad_norm": 3.90625, + "learning_rate": 7.069023730256413e-05, + "loss": 4.6488, + "step": 345800 + }, + { + "epoch": 2.977358815426997, + "grad_norm": 5.625, + "learning_rate": 7.066433615521228e-05, + "loss": 4.5108, + "step": 345850 + }, + { + "epoch": 2.977789256198347, + "grad_norm": 4.40625, + "learning_rate": 7.0638437161035e-05, + "loss": 4.0877, + "step": 345900 + }, + { + "epoch": 2.9782196969696972, + "grad_norm": 4.1875, + "learning_rate": 7.061254032193327e-05, + "loss": 4.3139, + "step": 345950 + }, + { + "epoch": 2.978650137741047, + "grad_norm": 2.6875, + "learning_rate": 7.058664563980783e-05, + "loss": 4.5791, + "step": 346000 + }, + { + "epoch": 2.979080578512397, + "grad_norm": 2.5, + "learning_rate": 7.056075311655932e-05, + "loss": 4.2769, + "step": 346050 + }, + { + "epoch": 2.9795110192837466, + "grad_norm": 1.875, + "learning_rate": 7.053486275408822e-05, + "loss": 4.3123, + "step": 346100 + }, + { + "epoch": 2.9799414600550964, + "grad_norm": 3.015625, + "learning_rate": 7.050897455429479e-05, + "loss": 4.254, + "step": 346150 + }, + { + "epoch": 2.980371900826446, + "grad_norm": 3.359375, + "learning_rate": 7.048308851907916e-05, + "loss": 4.4651, + "step": 346200 + }, + { + "epoch": 2.980802341597796, + "grad_norm": 3.359375, + "learning_rate": 7.045720465034132e-05, + "loss": 4.1671, + "step": 346250 + }, + { + "epoch": 2.981232782369146, + "grad_norm": 2.734375, + "learning_rate": 7.043132294998112e-05, + "loss": 4.3572, + "step": 346300 + }, + { + "epoch": 2.981663223140496, + "grad_norm": 5.53125, + "learning_rate": 7.040544341989821e-05, + "loss": 4.4563, + "step": 346350 + }, + { + "epoch": 2.982093663911846, + "grad_norm": 4.0, + "learning_rate": 7.037956606199202e-05, + "loss": 4.4577, + "step": 346400 + }, + { + "epoch": 2.9825241046831956, + "grad_norm": 2.515625, + "learning_rate": 7.0353690878162e-05, + "loss": 4.0317, + "step": 346450 + }, + { + "epoch": 2.9829545454545454, + "grad_norm": 2.5, + "learning_rate": 7.032781787030727e-05, + "loss": 4.3282, + "step": 346500 + }, + { + "epoch": 2.983384986225895, + "grad_norm": 3.171875, + "learning_rate": 7.030194704032685e-05, + "loss": 4.5129, + "step": 346550 + }, + { + "epoch": 2.9838154269972454, + "grad_norm": 3.171875, + "learning_rate": 7.027607839011963e-05, + "loss": 4.4157, + "step": 346600 + }, + { + "epoch": 2.9842458677685952, + "grad_norm": 1.8203125, + "learning_rate": 7.025021192158433e-05, + "loss": 4.1739, + "step": 346650 + }, + { + "epoch": 2.984676308539945, + "grad_norm": 3.09375, + "learning_rate": 7.022434763661945e-05, + "loss": 4.338, + "step": 346700 + }, + { + "epoch": 2.985106749311295, + "grad_norm": 3.40625, + "learning_rate": 7.019848553712337e-05, + "loss": 4.2847, + "step": 346750 + }, + { + "epoch": 2.9855371900826446, + "grad_norm": 5.0, + "learning_rate": 7.017262562499436e-05, + "loss": 3.9216, + "step": 346800 + }, + { + "epoch": 2.9859676308539944, + "grad_norm": 3.71875, + "learning_rate": 7.014676790213042e-05, + "loss": 4.4951, + "step": 346850 + }, + { + "epoch": 2.986398071625344, + "grad_norm": 4.3125, + "learning_rate": 7.012091237042955e-05, + "loss": 4.6992, + "step": 346900 + }, + { + "epoch": 2.986828512396694, + "grad_norm": 1.625, + "learning_rate": 7.009505903178936e-05, + "loss": 4.6149, + "step": 346950 + }, + { + "epoch": 2.9872589531680442, + "grad_norm": 3.8125, + "learning_rate": 7.006920788810752e-05, + "loss": 4.3064, + "step": 347000 + }, + { + "epoch": 2.987689393939394, + "grad_norm": 2.140625, + "learning_rate": 7.00433589412814e-05, + "loss": 4.2437, + "step": 347050 + }, + { + "epoch": 2.988119834710744, + "grad_norm": 3.640625, + "learning_rate": 7.001751219320829e-05, + "loss": 4.0124, + "step": 347100 + }, + { + "epoch": 2.9885502754820936, + "grad_norm": 2.640625, + "learning_rate": 6.999166764578532e-05, + "loss": 4.1106, + "step": 347150 + }, + { + "epoch": 2.9889807162534434, + "grad_norm": 1.6484375, + "learning_rate": 6.996582530090936e-05, + "loss": 3.7507, + "step": 347200 + }, + { + "epoch": 2.9894111570247937, + "grad_norm": 3.03125, + "learning_rate": 6.993998516047717e-05, + "loss": 4.8152, + "step": 347250 + }, + { + "epoch": 2.9898415977961434, + "grad_norm": 2.8125, + "learning_rate": 6.99141472263854e-05, + "loss": 4.5709, + "step": 347300 + }, + { + "epoch": 2.9902720385674932, + "grad_norm": 2.265625, + "learning_rate": 6.988831150053051e-05, + "loss": 4.0828, + "step": 347350 + }, + { + "epoch": 2.990702479338843, + "grad_norm": 3.390625, + "learning_rate": 6.98624779848088e-05, + "loss": 4.6084, + "step": 347400 + }, + { + "epoch": 2.991132920110193, + "grad_norm": 4.5, + "learning_rate": 6.983664668111632e-05, + "loss": 4.2643, + "step": 347450 + }, + { + "epoch": 2.9915633608815426, + "grad_norm": 3.5, + "learning_rate": 6.981081759134907e-05, + "loss": 4.4713, + "step": 347500 + }, + { + "epoch": 2.9919938016528924, + "grad_norm": 3.265625, + "learning_rate": 6.978499071740286e-05, + "loss": 4.2272, + "step": 347550 + }, + { + "epoch": 2.992424242424242, + "grad_norm": 3.921875, + "learning_rate": 6.975916606117332e-05, + "loss": 4.6481, + "step": 347600 + }, + { + "epoch": 2.9928546831955924, + "grad_norm": 2.828125, + "learning_rate": 6.973334362455597e-05, + "loss": 4.2598, + "step": 347650 + }, + { + "epoch": 2.9932851239669422, + "grad_norm": 3.671875, + "learning_rate": 6.970752340944604e-05, + "loss": 3.8855, + "step": 347700 + }, + { + "epoch": 2.993715564738292, + "grad_norm": 3.421875, + "learning_rate": 6.968170541773871e-05, + "loss": 4.099, + "step": 347750 + }, + { + "epoch": 2.994146005509642, + "grad_norm": 2.84375, + "learning_rate": 6.965588965132896e-05, + "loss": 4.5199, + "step": 347800 + }, + { + "epoch": 2.9945764462809916, + "grad_norm": 1.8125, + "learning_rate": 6.963007611211166e-05, + "loss": 4.1152, + "step": 347850 + }, + { + "epoch": 2.995006887052342, + "grad_norm": 4.125, + "learning_rate": 6.960426480198143e-05, + "loss": 3.9402, + "step": 347900 + }, + { + "epoch": 2.9954373278236917, + "grad_norm": 3.515625, + "learning_rate": 6.957845572283275e-05, + "loss": 4.5469, + "step": 347950 + }, + { + "epoch": 2.9958677685950414, + "grad_norm": 2.84375, + "learning_rate": 6.955264887655995e-05, + "loss": 4.2333, + "step": 348000 + }, + { + "epoch": 2.9958677685950414, + "eval_loss": 4.991213321685791, + "eval_runtime": 24.0364, + "eval_samples_per_second": 26.626, + "eval_steps_per_second": 13.313, + "eval_tts_loss": 7.409507157671467, + "step": 348000 + }, + { + "epoch": 2.9962982093663912, + "grad_norm": 1.171875, + "learning_rate": 6.952684426505724e-05, + "loss": 4.1471, + "step": 348050 + }, + { + "epoch": 2.996728650137741, + "grad_norm": 3.25, + "learning_rate": 6.950104189021856e-05, + "loss": 3.7776, + "step": 348100 + }, + { + "epoch": 2.997159090909091, + "grad_norm": 2.796875, + "learning_rate": 6.947524175393785e-05, + "loss": 4.6694, + "step": 348150 + }, + { + "epoch": 2.9975895316804406, + "grad_norm": 3.828125, + "learning_rate": 6.944944385810868e-05, + "loss": 4.5849, + "step": 348200 + }, + { + "epoch": 2.9980199724517904, + "grad_norm": 4.09375, + "learning_rate": 6.94236482046246e-05, + "loss": 4.2124, + "step": 348250 + }, + { + "epoch": 2.9984504132231407, + "grad_norm": 3.8125, + "learning_rate": 6.939785479537893e-05, + "loss": 4.3135, + "step": 348300 + }, + { + "epoch": 2.9988808539944904, + "grad_norm": 3.015625, + "learning_rate": 6.93720636322649e-05, + "loss": 4.493, + "step": 348350 + }, + { + "epoch": 2.9993112947658402, + "grad_norm": 3.90625, + "learning_rate": 6.934627471717549e-05, + "loss": 4.5723, + "step": 348400 + }, + { + "epoch": 2.99974173553719, + "grad_norm": 5.875, + "learning_rate": 6.93204880520036e-05, + "loss": 4.3788, + "step": 348450 + }, + { + "epoch": 3.00017217630854, + "grad_norm": 1.3671875, + "learning_rate": 6.929470363864185e-05, + "loss": 4.0193, + "step": 348500 + }, + { + "epoch": 3.0006026170798896, + "grad_norm": 3.109375, + "learning_rate": 6.92689214789828e-05, + "loss": 4.0593, + "step": 348550 + }, + { + "epoch": 3.00103305785124, + "grad_norm": 4.5625, + "learning_rate": 6.924314157491877e-05, + "loss": 4.3015, + "step": 348600 + }, + { + "epoch": 3.0014634986225897, + "grad_norm": 4.28125, + "learning_rate": 6.921736392834197e-05, + "loss": 4.4952, + "step": 348650 + }, + { + "epoch": 3.0018939393939394, + "grad_norm": 2.734375, + "learning_rate": 6.919158854114447e-05, + "loss": 4.2368, + "step": 348700 + }, + { + "epoch": 3.0023243801652892, + "grad_norm": 3.765625, + "learning_rate": 6.916581541521807e-05, + "loss": 4.4221, + "step": 348750 + }, + { + "epoch": 3.002754820936639, + "grad_norm": 2.9375, + "learning_rate": 6.914004455245446e-05, + "loss": 3.8055, + "step": 348800 + }, + { + "epoch": 3.003185261707989, + "grad_norm": 3.546875, + "learning_rate": 6.91142759547452e-05, + "loss": 4.6014, + "step": 348850 + }, + { + "epoch": 3.0036157024793386, + "grad_norm": 2.515625, + "learning_rate": 6.908850962398162e-05, + "loss": 4.2205, + "step": 348900 + }, + { + "epoch": 3.004046143250689, + "grad_norm": 2.203125, + "learning_rate": 6.906274556205495e-05, + "loss": 4.2182, + "step": 348950 + }, + { + "epoch": 3.0044765840220387, + "grad_norm": 5.28125, + "learning_rate": 6.903698377085614e-05, + "loss": 4.0105, + "step": 349000 + }, + { + "epoch": 3.0049070247933884, + "grad_norm": 5.53125, + "learning_rate": 6.901122425227615e-05, + "loss": 4.2359, + "step": 349050 + }, + { + "epoch": 3.0053374655647382, + "grad_norm": 3.640625, + "learning_rate": 6.89854670082056e-05, + "loss": 4.2774, + "step": 349100 + }, + { + "epoch": 3.005767906336088, + "grad_norm": 1.9140625, + "learning_rate": 6.895971204053504e-05, + "loss": 4.3043, + "step": 349150 + }, + { + "epoch": 3.006198347107438, + "grad_norm": 2.5625, + "learning_rate": 6.893395935115487e-05, + "loss": 4.2211, + "step": 349200 + }, + { + "epoch": 3.006628787878788, + "grad_norm": 1.6640625, + "learning_rate": 6.890820894195522e-05, + "loss": 4.2544, + "step": 349250 + }, + { + "epoch": 3.007059228650138, + "grad_norm": 2.359375, + "learning_rate": 6.888246081482613e-05, + "loss": 4.342, + "step": 349300 + }, + { + "epoch": 3.0074896694214877, + "grad_norm": 2.921875, + "learning_rate": 6.885671497165745e-05, + "loss": 4.3731, + "step": 349350 + }, + { + "epoch": 3.0079201101928374, + "grad_norm": 4.21875, + "learning_rate": 6.883097141433892e-05, + "loss": 4.0846, + "step": 349400 + }, + { + "epoch": 3.0083505509641872, + "grad_norm": 4.40625, + "learning_rate": 6.880523014476004e-05, + "loss": 4.2551, + "step": 349450 + }, + { + "epoch": 3.008780991735537, + "grad_norm": 2.671875, + "learning_rate": 6.87794911648101e-05, + "loss": 4.831, + "step": 349500 + }, + { + "epoch": 3.009211432506887, + "grad_norm": 2.125, + "learning_rate": 6.875375447637836e-05, + "loss": 4.7646, + "step": 349550 + }, + { + "epoch": 3.009641873278237, + "grad_norm": 1.671875, + "learning_rate": 6.87280200813538e-05, + "loss": 3.9587, + "step": 349600 + }, + { + "epoch": 3.010072314049587, + "grad_norm": 2.515625, + "learning_rate": 6.870228798162528e-05, + "loss": 4.1138, + "step": 349650 + }, + { + "epoch": 3.0105027548209367, + "grad_norm": 2.015625, + "learning_rate": 6.867655817908152e-05, + "loss": 4.4146, + "step": 349700 + }, + { + "epoch": 3.0109331955922864, + "grad_norm": 3.5, + "learning_rate": 6.865083067561098e-05, + "loss": 4.4591, + "step": 349750 + }, + { + "epoch": 3.0113636363636362, + "grad_norm": 4.28125, + "learning_rate": 6.862510547310199e-05, + "loss": 4.1988, + "step": 349800 + }, + { + "epoch": 3.011794077134986, + "grad_norm": 3.828125, + "learning_rate": 6.859938257344277e-05, + "loss": 4.5588, + "step": 349850 + }, + { + "epoch": 3.0122245179063363, + "grad_norm": 2.671875, + "learning_rate": 6.857366197852132e-05, + "loss": 4.3576, + "step": 349900 + }, + { + "epoch": 3.012654958677686, + "grad_norm": 3.109375, + "learning_rate": 6.854794369022548e-05, + "loss": 4.2082, + "step": 349950 + }, + { + "epoch": 3.013085399449036, + "grad_norm": 1.625, + "learning_rate": 6.852222771044287e-05, + "loss": 4.2093, + "step": 350000 + }, + { + "epoch": 3.0135158402203857, + "grad_norm": 3.140625, + "learning_rate": 6.849651404106102e-05, + "loss": 4.2408, + "step": 350050 + }, + { + "epoch": 3.0139462809917354, + "grad_norm": 4.375, + "learning_rate": 6.847080268396728e-05, + "loss": 4.0275, + "step": 350100 + }, + { + "epoch": 3.0143767217630852, + "grad_norm": 4.375, + "learning_rate": 6.844509364104876e-05, + "loss": 4.0008, + "step": 350150 + }, + { + "epoch": 3.0148071625344355, + "grad_norm": 4.59375, + "learning_rate": 6.84193869141925e-05, + "loss": 4.1721, + "step": 350200 + }, + { + "epoch": 3.0152376033057853, + "grad_norm": 1.859375, + "learning_rate": 6.839368250528529e-05, + "loss": 4.5617, + "step": 350250 + }, + { + "epoch": 3.015668044077135, + "grad_norm": 5.4375, + "learning_rate": 6.836798041621377e-05, + "loss": 4.3456, + "step": 350300 + }, + { + "epoch": 3.016098484848485, + "grad_norm": 3.515625, + "learning_rate": 6.834228064886442e-05, + "loss": 4.0823, + "step": 350350 + }, + { + "epoch": 3.0165289256198347, + "grad_norm": 1.9453125, + "learning_rate": 6.831658320512357e-05, + "loss": 3.9468, + "step": 350400 + }, + { + "epoch": 3.0169593663911844, + "grad_norm": 3.09375, + "learning_rate": 6.829088808687734e-05, + "loss": 4.402, + "step": 350450 + }, + { + "epoch": 3.0173898071625342, + "grad_norm": 4.46875, + "learning_rate": 6.826519529601174e-05, + "loss": 4.019, + "step": 350500 + }, + { + "epoch": 3.0178202479338845, + "grad_norm": 2.71875, + "learning_rate": 6.823950483441247e-05, + "loss": 4.2083, + "step": 350550 + }, + { + "epoch": 3.0182506887052343, + "grad_norm": 5.03125, + "learning_rate": 6.821381670396524e-05, + "loss": 4.0946, + "step": 350600 + }, + { + "epoch": 3.018681129476584, + "grad_norm": 1.6171875, + "learning_rate": 6.818813090655544e-05, + "loss": 4.2547, + "step": 350650 + }, + { + "epoch": 3.019111570247934, + "grad_norm": 3.703125, + "learning_rate": 6.816244744406841e-05, + "loss": 4.1576, + "step": 350700 + }, + { + "epoch": 3.0195420110192837, + "grad_norm": 3.640625, + "learning_rate": 6.813676631838926e-05, + "loss": 4.4224, + "step": 350750 + }, + { + "epoch": 3.0199724517906334, + "grad_norm": 2.890625, + "learning_rate": 6.81110875314029e-05, + "loss": 4.1079, + "step": 350800 + }, + { + "epoch": 3.0204028925619837, + "grad_norm": 3.4375, + "learning_rate": 6.808541108499407e-05, + "loss": 4.2391, + "step": 350850 + }, + { + "epoch": 3.0208333333333335, + "grad_norm": 4.28125, + "learning_rate": 6.805973698104742e-05, + "loss": 4.4265, + "step": 350900 + }, + { + "epoch": 3.0212637741046833, + "grad_norm": 1.4609375, + "learning_rate": 6.803406522144735e-05, + "loss": 4.4175, + "step": 350950 + }, + { + "epoch": 3.021694214876033, + "grad_norm": 3.5, + "learning_rate": 6.800839580807816e-05, + "loss": 4.0121, + "step": 351000 + }, + { + "epoch": 3.021694214876033, + "eval_loss": 4.993182182312012, + "eval_runtime": 25.3417, + "eval_samples_per_second": 25.255, + "eval_steps_per_second": 12.627, + "eval_tts_loss": 7.477537839047033, + "step": 351000 + }, + { + "epoch": 3.022124655647383, + "grad_norm": 2.8125, + "learning_rate": 6.798272874282383e-05, + "loss": 4.2912, + "step": 351050 + }, + { + "epoch": 3.0225550964187327, + "grad_norm": 3.4375, + "learning_rate": 6.795706402756835e-05, + "loss": 4.4703, + "step": 351100 + }, + { + "epoch": 3.0229855371900825, + "grad_norm": 5.34375, + "learning_rate": 6.79314016641954e-05, + "loss": 4.2438, + "step": 351150 + }, + { + "epoch": 3.0234159779614327, + "grad_norm": 3.140625, + "learning_rate": 6.790574165458858e-05, + "loss": 4.2153, + "step": 351200 + }, + { + "epoch": 3.0238464187327825, + "grad_norm": 2.265625, + "learning_rate": 6.78800840006313e-05, + "loss": 4.5285, + "step": 351250 + }, + { + "epoch": 3.0242768595041323, + "grad_norm": 2.15625, + "learning_rate": 6.785442870420674e-05, + "loss": 4.4665, + "step": 351300 + }, + { + "epoch": 3.024707300275482, + "grad_norm": 4.0625, + "learning_rate": 6.782877576719793e-05, + "loss": 4.1483, + "step": 351350 + }, + { + "epoch": 3.025137741046832, + "grad_norm": 2.1875, + "learning_rate": 6.780312519148775e-05, + "loss": 4.2245, + "step": 351400 + }, + { + "epoch": 3.0255681818181817, + "grad_norm": 1.765625, + "learning_rate": 6.777747697895893e-05, + "loss": 4.2463, + "step": 351450 + }, + { + "epoch": 3.025998622589532, + "grad_norm": 2.125, + "learning_rate": 6.775183113149399e-05, + "loss": 4.3165, + "step": 351500 + }, + { + "epoch": 3.0264290633608817, + "grad_norm": 2.53125, + "learning_rate": 6.772618765097522e-05, + "loss": 4.3179, + "step": 351550 + }, + { + "epoch": 3.0268595041322315, + "grad_norm": 2.953125, + "learning_rate": 6.770054653928485e-05, + "loss": 4.4555, + "step": 351600 + }, + { + "epoch": 3.0272899449035813, + "grad_norm": 3.1875, + "learning_rate": 6.767490779830487e-05, + "loss": 4.261, + "step": 351650 + }, + { + "epoch": 3.027720385674931, + "grad_norm": 3.703125, + "learning_rate": 6.76492714299171e-05, + "loss": 4.2654, + "step": 351700 + }, + { + "epoch": 3.028150826446281, + "grad_norm": 4.40625, + "learning_rate": 6.762363743600325e-05, + "loss": 4.1454, + "step": 351750 + }, + { + "epoch": 3.0285812672176307, + "grad_norm": 2.96875, + "learning_rate": 6.759800581844474e-05, + "loss": 4.2149, + "step": 351800 + }, + { + "epoch": 3.029011707988981, + "grad_norm": 3.65625, + "learning_rate": 6.757237657912283e-05, + "loss": 4.4111, + "step": 351850 + }, + { + "epoch": 3.0294421487603307, + "grad_norm": 3.484375, + "learning_rate": 6.754674971991875e-05, + "loss": 4.2162, + "step": 351900 + }, + { + "epoch": 3.0298725895316805, + "grad_norm": 2.84375, + "learning_rate": 6.752112524271341e-05, + "loss": 4.3516, + "step": 351950 + }, + { + "epoch": 3.0303030303030303, + "grad_norm": 5.28125, + "learning_rate": 6.749550314938758e-05, + "loss": 4.2341, + "step": 352000 + }, + { + "epoch": 3.03073347107438, + "grad_norm": 3.3125, + "learning_rate": 6.746988344182192e-05, + "loss": 4.4311, + "step": 352050 + }, + { + "epoch": 3.03116391184573, + "grad_norm": 3.171875, + "learning_rate": 6.74442661218968e-05, + "loss": 4.5223, + "step": 352100 + }, + { + "epoch": 3.03159435261708, + "grad_norm": 2.765625, + "learning_rate": 6.741865119149252e-05, + "loss": 4.2839, + "step": 352150 + }, + { + "epoch": 3.03202479338843, + "grad_norm": 2.4375, + "learning_rate": 6.739303865248911e-05, + "loss": 4.2244, + "step": 352200 + }, + { + "epoch": 3.0324552341597797, + "grad_norm": 4.40625, + "learning_rate": 6.736742850676652e-05, + "loss": 4.4552, + "step": 352250 + }, + { + "epoch": 3.0328856749311295, + "grad_norm": 2.984375, + "learning_rate": 6.73418207562045e-05, + "loss": 4.2598, + "step": 352300 + }, + { + "epoch": 3.0333161157024793, + "grad_norm": 4.65625, + "learning_rate": 6.731621540268257e-05, + "loss": 4.6048, + "step": 352350 + }, + { + "epoch": 3.033746556473829, + "grad_norm": 2.96875, + "learning_rate": 6.729061244808006e-05, + "loss": 4.5114, + "step": 352400 + }, + { + "epoch": 3.034176997245179, + "grad_norm": 3.65625, + "learning_rate": 6.726501189427624e-05, + "loss": 4.3409, + "step": 352450 + }, + { + "epoch": 3.034607438016529, + "grad_norm": 2.140625, + "learning_rate": 6.72394137431501e-05, + "loss": 4.1391, + "step": 352500 + }, + { + "epoch": 3.035037878787879, + "grad_norm": 1.9921875, + "learning_rate": 6.721381799658057e-05, + "loss": 4.074, + "step": 352550 + }, + { + "epoch": 3.0354683195592287, + "grad_norm": 1.96875, + "learning_rate": 6.718822465644619e-05, + "loss": 4.5309, + "step": 352600 + }, + { + "epoch": 3.0358987603305785, + "grad_norm": 2.546875, + "learning_rate": 6.716263372462554e-05, + "loss": 4.1559, + "step": 352650 + }, + { + "epoch": 3.0363292011019283, + "grad_norm": 1.859375, + "learning_rate": 6.713704520299694e-05, + "loss": 4.3863, + "step": 352700 + }, + { + "epoch": 3.036759641873278, + "grad_norm": 3.21875, + "learning_rate": 6.711145909343848e-05, + "loss": 4.4605, + "step": 352750 + }, + { + "epoch": 3.0371900826446283, + "grad_norm": 3.5, + "learning_rate": 6.708587539782821e-05, + "loss": 4.263, + "step": 352800 + }, + { + "epoch": 3.037620523415978, + "grad_norm": 4.9375, + "learning_rate": 6.706029411804386e-05, + "loss": 4.2533, + "step": 352850 + }, + { + "epoch": 3.038050964187328, + "grad_norm": 4.15625, + "learning_rate": 6.703471525596301e-05, + "loss": 4.565, + "step": 352900 + }, + { + "epoch": 3.0384814049586777, + "grad_norm": 3.078125, + "learning_rate": 6.700913881346316e-05, + "loss": 4.2158, + "step": 352950 + }, + { + "epoch": 3.0389118457300275, + "grad_norm": 4.71875, + "learning_rate": 6.698356479242154e-05, + "loss": 4.629, + "step": 353000 + }, + { + "epoch": 3.0393422865013773, + "grad_norm": 2.65625, + "learning_rate": 6.695799319471525e-05, + "loss": 4.5001, + "step": 353050 + }, + { + "epoch": 3.039772727272727, + "grad_norm": 4.3125, + "learning_rate": 6.693242402222113e-05, + "loss": 4.2241, + "step": 353100 + }, + { + "epoch": 3.0402031680440773, + "grad_norm": 3.796875, + "learning_rate": 6.690685727681594e-05, + "loss": 4.0977, + "step": 353150 + }, + { + "epoch": 3.040633608815427, + "grad_norm": 2.125, + "learning_rate": 6.688129296037624e-05, + "loss": 4.4082, + "step": 353200 + }, + { + "epoch": 3.041064049586777, + "grad_norm": 4.0, + "learning_rate": 6.685573107477836e-05, + "loss": 4.0613, + "step": 353250 + }, + { + "epoch": 3.0414944903581267, + "grad_norm": 4.21875, + "learning_rate": 6.683017162189855e-05, + "loss": 4.1511, + "step": 353300 + }, + { + "epoch": 3.0419249311294765, + "grad_norm": 5.03125, + "learning_rate": 6.680461460361277e-05, + "loss": 4.8417, + "step": 353350 + }, + { + "epoch": 3.0423553719008263, + "grad_norm": 1.9375, + "learning_rate": 6.677906002179681e-05, + "loss": 4.3596, + "step": 353400 + }, + { + "epoch": 3.0427858126721765, + "grad_norm": 3.171875, + "learning_rate": 6.675350787832639e-05, + "loss": 4.5467, + "step": 353450 + }, + { + "epoch": 3.0432162534435263, + "grad_norm": 3.328125, + "learning_rate": 6.672795817507697e-05, + "loss": 4.5393, + "step": 353500 + }, + { + "epoch": 3.043646694214876, + "grad_norm": 2.828125, + "learning_rate": 6.670241091392386e-05, + "loss": 4.4852, + "step": 353550 + }, + { + "epoch": 3.044077134986226, + "grad_norm": 3.609375, + "learning_rate": 6.66768660967421e-05, + "loss": 4.1685, + "step": 353600 + }, + { + "epoch": 3.0445075757575757, + "grad_norm": 4.3125, + "learning_rate": 6.665132372540668e-05, + "loss": 4.2069, + "step": 353650 + }, + { + "epoch": 3.0449380165289255, + "grad_norm": 4.71875, + "learning_rate": 6.662578380179236e-05, + "loss": 4.3321, + "step": 353700 + }, + { + "epoch": 3.0453684573002753, + "grad_norm": 1.8828125, + "learning_rate": 6.660024632777368e-05, + "loss": 4.2612, + "step": 353750 + }, + { + "epoch": 3.0457988980716255, + "grad_norm": 1.640625, + "learning_rate": 6.657471130522508e-05, + "loss": 3.9872, + "step": 353800 + }, + { + "epoch": 3.0462293388429753, + "grad_norm": 3.40625, + "learning_rate": 6.654917873602075e-05, + "loss": 4.615, + "step": 353850 + }, + { + "epoch": 3.046659779614325, + "grad_norm": 1.453125, + "learning_rate": 6.652364862203474e-05, + "loss": 4.3082, + "step": 353900 + }, + { + "epoch": 3.047090220385675, + "grad_norm": 3.875, + "learning_rate": 6.649812096514087e-05, + "loss": 4.5583, + "step": 353950 + }, + { + "epoch": 3.0475206611570247, + "grad_norm": 4.5, + "learning_rate": 6.647259576721288e-05, + "loss": 4.2475, + "step": 354000 + }, + { + "epoch": 3.0475206611570247, + "eval_loss": 4.994631767272949, + "eval_runtime": 24.3067, + "eval_samples_per_second": 26.33, + "eval_steps_per_second": 13.165, + "eval_tts_loss": 7.493267624532595, + "step": 354000 + }, + { + "epoch": 3.0479511019283745, + "grad_norm": 5.375, + "learning_rate": 6.644707303012418e-05, + "loss": 3.9716, + "step": 354050 + }, + { + "epoch": 3.0483815426997247, + "grad_norm": 3.375, + "learning_rate": 6.642155275574818e-05, + "loss": 4.414, + "step": 354100 + }, + { + "epoch": 3.0488119834710745, + "grad_norm": 3.4375, + "learning_rate": 6.639603494595789e-05, + "loss": 4.5595, + "step": 354150 + }, + { + "epoch": 3.0492424242424243, + "grad_norm": 3.453125, + "learning_rate": 6.637051960262639e-05, + "loss": 4.3779, + "step": 354200 + }, + { + "epoch": 3.049672865013774, + "grad_norm": 3.984375, + "learning_rate": 6.634500672762637e-05, + "loss": 4.3522, + "step": 354250 + }, + { + "epoch": 3.050103305785124, + "grad_norm": 6.65625, + "learning_rate": 6.631949632283044e-05, + "loss": 4.3906, + "step": 354300 + }, + { + "epoch": 3.0505337465564737, + "grad_norm": 2.921875, + "learning_rate": 6.629398839011105e-05, + "loss": 4.3624, + "step": 354350 + }, + { + "epoch": 3.0509641873278235, + "grad_norm": 3.265625, + "learning_rate": 6.626848293134038e-05, + "loss": 4.305, + "step": 354400 + }, + { + "epoch": 3.0513946280991737, + "grad_norm": 2.46875, + "learning_rate": 6.624297994839047e-05, + "loss": 3.9894, + "step": 354450 + }, + { + "epoch": 3.0518250688705235, + "grad_norm": 3.203125, + "learning_rate": 6.621747944313319e-05, + "loss": 4.1997, + "step": 354500 + }, + { + "epoch": 3.0522555096418733, + "grad_norm": 4.4375, + "learning_rate": 6.619198141744027e-05, + "loss": 4.1079, + "step": 354550 + }, + { + "epoch": 3.052685950413223, + "grad_norm": 3.203125, + "learning_rate": 6.616648587318317e-05, + "loss": 4.6535, + "step": 354600 + }, + { + "epoch": 3.053116391184573, + "grad_norm": 1.1328125, + "learning_rate": 6.61409928122332e-05, + "loss": 3.9644, + "step": 354650 + }, + { + "epoch": 3.0535468319559227, + "grad_norm": 3.96875, + "learning_rate": 6.61155022364615e-05, + "loss": 4.3938, + "step": 354700 + }, + { + "epoch": 3.053977272727273, + "grad_norm": 3.65625, + "learning_rate": 6.609001414773904e-05, + "loss": 4.1628, + "step": 354750 + }, + { + "epoch": 3.0544077134986227, + "grad_norm": 3.40625, + "learning_rate": 6.606452854793656e-05, + "loss": 4.5047, + "step": 354800 + }, + { + "epoch": 3.0548381542699725, + "grad_norm": 2.71875, + "learning_rate": 6.603904543892471e-05, + "loss": 4.3843, + "step": 354850 + }, + { + "epoch": 3.0552685950413223, + "grad_norm": 4.34375, + "learning_rate": 6.601356482257386e-05, + "loss": 4.1097, + "step": 354900 + }, + { + "epoch": 3.055699035812672, + "grad_norm": 3.78125, + "learning_rate": 6.598808670075418e-05, + "loss": 4.438, + "step": 354950 + }, + { + "epoch": 3.056129476584022, + "grad_norm": 2.65625, + "learning_rate": 6.596261107533578e-05, + "loss": 4.269, + "step": 355000 + }, + { + "epoch": 3.0565599173553717, + "grad_norm": 2.65625, + "learning_rate": 6.593713794818849e-05, + "loss": 4.0392, + "step": 355050 + }, + { + "epoch": 3.056990358126722, + "grad_norm": 3.84375, + "learning_rate": 6.591166732118204e-05, + "loss": 4.1408, + "step": 355100 + }, + { + "epoch": 3.0574207988980717, + "grad_norm": 3.484375, + "learning_rate": 6.588619919618579e-05, + "loss": 4.3555, + "step": 355150 + }, + { + "epoch": 3.0578512396694215, + "grad_norm": 3.6875, + "learning_rate": 6.586073357506915e-05, + "loss": 4.3225, + "step": 355200 + }, + { + "epoch": 3.0582816804407713, + "grad_norm": 3.625, + "learning_rate": 6.583527045970122e-05, + "loss": 4.3289, + "step": 355250 + }, + { + "epoch": 3.058712121212121, + "grad_norm": 3.625, + "learning_rate": 6.580980985195091e-05, + "loss": 4.3943, + "step": 355300 + }, + { + "epoch": 3.059142561983471, + "grad_norm": 5.46875, + "learning_rate": 6.578435175368704e-05, + "loss": 4.1192, + "step": 355350 + }, + { + "epoch": 3.059573002754821, + "grad_norm": 3.921875, + "learning_rate": 6.57588961667781e-05, + "loss": 4.5199, + "step": 355400 + }, + { + "epoch": 3.060003443526171, + "grad_norm": 4.4375, + "learning_rate": 6.57334430930925e-05, + "loss": 4.5455, + "step": 355450 + }, + { + "epoch": 3.0604338842975207, + "grad_norm": 2.875, + "learning_rate": 6.570799253449846e-05, + "loss": 4.2386, + "step": 355500 + }, + { + "epoch": 3.0608643250688705, + "grad_norm": 1.7265625, + "learning_rate": 6.568254449286399e-05, + "loss": 4.1244, + "step": 355550 + }, + { + "epoch": 3.0612947658402203, + "grad_norm": 4.1875, + "learning_rate": 6.565709897005696e-05, + "loss": 4.0862, + "step": 355600 + }, + { + "epoch": 3.06172520661157, + "grad_norm": 4.375, + "learning_rate": 6.563165596794492e-05, + "loss": 4.3905, + "step": 355650 + }, + { + "epoch": 3.06215564738292, + "grad_norm": 4.65625, + "learning_rate": 6.560621548839538e-05, + "loss": 4.3767, + "step": 355700 + }, + { + "epoch": 3.06258608815427, + "grad_norm": 3.3125, + "learning_rate": 6.558077753327566e-05, + "loss": 4.4376, + "step": 355750 + }, + { + "epoch": 3.06301652892562, + "grad_norm": 7.09375, + "learning_rate": 6.555534210445278e-05, + "loss": 4.1551, + "step": 355800 + }, + { + "epoch": 3.0634469696969697, + "grad_norm": 1.6484375, + "learning_rate": 6.552990920379369e-05, + "loss": 4.0698, + "step": 355850 + }, + { + "epoch": 3.0638774104683195, + "grad_norm": 9.1875, + "learning_rate": 6.550447883316512e-05, + "loss": 4.166, + "step": 355900 + }, + { + "epoch": 3.0643078512396693, + "grad_norm": 6.0, + "learning_rate": 6.547905099443358e-05, + "loss": 4.5483, + "step": 355950 + }, + { + "epoch": 3.064738292011019, + "grad_norm": 2.84375, + "learning_rate": 6.545362568946542e-05, + "loss": 4.5924, + "step": 356000 + }, + { + "epoch": 3.0651687327823693, + "grad_norm": 3.09375, + "learning_rate": 6.542820292012681e-05, + "loss": 4.1123, + "step": 356050 + }, + { + "epoch": 3.065599173553719, + "grad_norm": 3.171875, + "learning_rate": 6.54027826882837e-05, + "loss": 4.089, + "step": 356100 + }, + { + "epoch": 3.066029614325069, + "grad_norm": 1.671875, + "learning_rate": 6.537736499580194e-05, + "loss": 4.47, + "step": 356150 + }, + { + "epoch": 3.0664600550964187, + "grad_norm": 3.5625, + "learning_rate": 6.535194984454707e-05, + "loss": 4.4509, + "step": 356200 + }, + { + "epoch": 3.0668904958677685, + "grad_norm": 3.4375, + "learning_rate": 6.532653723638453e-05, + "loss": 4.2035, + "step": 356250 + }, + { + "epoch": 3.0673209366391183, + "grad_norm": 4.40625, + "learning_rate": 6.530112717317956e-05, + "loss": 4.0482, + "step": 356300 + }, + { + "epoch": 3.067751377410468, + "grad_norm": 3.71875, + "learning_rate": 6.52757196567972e-05, + "loss": 4.4188, + "step": 356350 + }, + { + "epoch": 3.0681818181818183, + "grad_norm": 2.6875, + "learning_rate": 6.525031468910232e-05, + "loss": 4.5306, + "step": 356400 + }, + { + "epoch": 3.068612258953168, + "grad_norm": 5.40625, + "learning_rate": 6.522491227195956e-05, + "loss": 4.6338, + "step": 356450 + }, + { + "epoch": 3.069042699724518, + "grad_norm": 2.546875, + "learning_rate": 6.519951240723344e-05, + "loss": 4.0925, + "step": 356500 + }, + { + "epoch": 3.0694731404958677, + "grad_norm": 3.828125, + "learning_rate": 6.517411509678819e-05, + "loss": 4.1743, + "step": 356550 + }, + { + "epoch": 3.0699035812672175, + "grad_norm": 3.890625, + "learning_rate": 6.5148720342488e-05, + "loss": 4.6664, + "step": 356600 + }, + { + "epoch": 3.0703340220385673, + "grad_norm": 3.203125, + "learning_rate": 6.512332814619677e-05, + "loss": 4.5723, + "step": 356650 + }, + { + "epoch": 3.0707644628099175, + "grad_norm": 3.546875, + "learning_rate": 6.509793850977817e-05, + "loss": 4.1521, + "step": 356700 + }, + { + "epoch": 3.0711949035812673, + "grad_norm": 4.53125, + "learning_rate": 6.507255143509579e-05, + "loss": 4.2396, + "step": 356750 + }, + { + "epoch": 3.071625344352617, + "grad_norm": 3.359375, + "learning_rate": 6.504716692401301e-05, + "loss": 4.3324, + "step": 356800 + }, + { + "epoch": 3.072055785123967, + "grad_norm": 2.0, + "learning_rate": 6.502178497839296e-05, + "loss": 4.1093, + "step": 356850 + }, + { + "epoch": 3.0724862258953167, + "grad_norm": 4.0, + "learning_rate": 6.499640560009867e-05, + "loss": 4.0299, + "step": 356900 + }, + { + "epoch": 3.0729166666666665, + "grad_norm": 2.84375, + "learning_rate": 6.497102879099288e-05, + "loss": 4.2207, + "step": 356950 + }, + { + "epoch": 3.0733471074380168, + "grad_norm": 3.53125, + "learning_rate": 6.494565455293817e-05, + "loss": 4.0947, + "step": 357000 + }, + { + "epoch": 3.0733471074380168, + "eval_loss": 4.9925127029418945, + "eval_runtime": 24.1044, + "eval_samples_per_second": 26.551, + "eval_steps_per_second": 13.276, + "eval_tts_loss": 7.451498809537497, + "step": 357000 + }, + { + "epoch": 3.0737775482093666, + "grad_norm": 3.046875, + "learning_rate": 6.492028288779702e-05, + "loss": 4.3632, + "step": 357050 + }, + { + "epoch": 3.0742079889807163, + "grad_norm": 2.78125, + "learning_rate": 6.489491379743165e-05, + "loss": 4.4784, + "step": 357100 + }, + { + "epoch": 3.074638429752066, + "grad_norm": 2.8125, + "learning_rate": 6.486954728370408e-05, + "loss": 4.1263, + "step": 357150 + }, + { + "epoch": 3.075068870523416, + "grad_norm": 5.40625, + "learning_rate": 6.484418334847612e-05, + "loss": 4.5602, + "step": 357200 + }, + { + "epoch": 3.0754993112947657, + "grad_norm": 3.171875, + "learning_rate": 6.481882199360947e-05, + "loss": 4.2463, + "step": 357250 + }, + { + "epoch": 3.0759297520661155, + "grad_norm": 3.4375, + "learning_rate": 6.47934632209656e-05, + "loss": 4.3203, + "step": 357300 + }, + { + "epoch": 3.0763601928374658, + "grad_norm": 2.921875, + "learning_rate": 6.476810703240577e-05, + "loss": 4.1856, + "step": 357350 + }, + { + "epoch": 3.0767906336088156, + "grad_norm": 3.828125, + "learning_rate": 6.474275342979112e-05, + "loss": 4.1917, + "step": 357400 + }, + { + "epoch": 3.0772210743801653, + "grad_norm": 3.296875, + "learning_rate": 6.471740241498245e-05, + "loss": 4.1496, + "step": 357450 + }, + { + "epoch": 3.077651515151515, + "grad_norm": 2.046875, + "learning_rate": 6.469205398984058e-05, + "loss": 4.2282, + "step": 357500 + }, + { + "epoch": 3.078081955922865, + "grad_norm": 3.234375, + "learning_rate": 6.466670815622595e-05, + "loss": 4.3859, + "step": 357550 + }, + { + "epoch": 3.0785123966942147, + "grad_norm": 6.125, + "learning_rate": 6.464136491599893e-05, + "loss": 4.2796, + "step": 357600 + }, + { + "epoch": 3.078942837465565, + "grad_norm": 2.140625, + "learning_rate": 6.461602427101963e-05, + "loss": 4.5376, + "step": 357650 + }, + { + "epoch": 3.0793732782369148, + "grad_norm": 4.6875, + "learning_rate": 6.459068622314808e-05, + "loss": 4.2966, + "step": 357700 + }, + { + "epoch": 3.0798037190082646, + "grad_norm": 1.3515625, + "learning_rate": 6.456535077424391e-05, + "loss": 4.4985, + "step": 357750 + }, + { + "epoch": 3.0802341597796143, + "grad_norm": 3.09375, + "learning_rate": 6.454001792616679e-05, + "loss": 4.3831, + "step": 357800 + }, + { + "epoch": 3.080664600550964, + "grad_norm": 3.15625, + "learning_rate": 6.451468768077602e-05, + "loss": 4.3182, + "step": 357850 + }, + { + "epoch": 3.081095041322314, + "grad_norm": 4.34375, + "learning_rate": 6.448936003993085e-05, + "loss": 4.103, + "step": 357900 + }, + { + "epoch": 3.0815254820936637, + "grad_norm": 3.28125, + "learning_rate": 6.446403500549027e-05, + "loss": 4.2688, + "step": 357950 + }, + { + "epoch": 3.081955922865014, + "grad_norm": 2.03125, + "learning_rate": 6.443871257931307e-05, + "loss": 4.353, + "step": 358000 + }, + { + "epoch": 3.0823863636363638, + "grad_norm": 2.765625, + "learning_rate": 6.441339276325782e-05, + "loss": 4.4004, + "step": 358050 + }, + { + "epoch": 3.0828168044077136, + "grad_norm": 4.34375, + "learning_rate": 6.438807555918296e-05, + "loss": 4.3556, + "step": 358100 + }, + { + "epoch": 3.0832472451790633, + "grad_norm": 4.53125, + "learning_rate": 6.436276096894678e-05, + "loss": 4.1774, + "step": 358150 + }, + { + "epoch": 3.083677685950413, + "grad_norm": 2.359375, + "learning_rate": 6.433744899440726e-05, + "loss": 4.448, + "step": 358200 + }, + { + "epoch": 3.084108126721763, + "grad_norm": 2.0625, + "learning_rate": 6.431213963742223e-05, + "loss": 4.0852, + "step": 358250 + }, + { + "epoch": 3.084538567493113, + "grad_norm": 2.578125, + "learning_rate": 6.428683289984938e-05, + "loss": 4.1548, + "step": 358300 + }, + { + "epoch": 3.084969008264463, + "grad_norm": 3.59375, + "learning_rate": 6.426152878354614e-05, + "loss": 4.3454, + "step": 358350 + }, + { + "epoch": 3.0853994490358128, + "grad_norm": 5.28125, + "learning_rate": 6.423622729036979e-05, + "loss": 4.1213, + "step": 358400 + }, + { + "epoch": 3.0858298898071626, + "grad_norm": 2.734375, + "learning_rate": 6.421092842217746e-05, + "loss": 4.2708, + "step": 358450 + }, + { + "epoch": 3.0862603305785123, + "grad_norm": 4.71875, + "learning_rate": 6.418563218082597e-05, + "loss": 4.1605, + "step": 358500 + }, + { + "epoch": 3.086690771349862, + "grad_norm": 3.203125, + "learning_rate": 6.416033856817199e-05, + "loss": 4.3796, + "step": 358550 + }, + { + "epoch": 3.087121212121212, + "grad_norm": 4.5625, + "learning_rate": 6.413504758607203e-05, + "loss": 4.5386, + "step": 358600 + }, + { + "epoch": 3.087551652892562, + "grad_norm": 2.8125, + "learning_rate": 6.410975923638246e-05, + "loss": 4.0379, + "step": 358650 + }, + { + "epoch": 3.087982093663912, + "grad_norm": 4.75, + "learning_rate": 6.408447352095935e-05, + "loss": 4.5101, + "step": 358700 + }, + { + "epoch": 3.0884125344352618, + "grad_norm": 3.5625, + "learning_rate": 6.405919044165855e-05, + "loss": 4.022, + "step": 358750 + }, + { + "epoch": 3.0888429752066116, + "grad_norm": 5.4375, + "learning_rate": 6.403391000033587e-05, + "loss": 4.4119, + "step": 358800 + }, + { + "epoch": 3.0892734159779613, + "grad_norm": 3.34375, + "learning_rate": 6.400863219884684e-05, + "loss": 4.3505, + "step": 358850 + }, + { + "epoch": 3.089703856749311, + "grad_norm": 3.0625, + "learning_rate": 6.398335703904673e-05, + "loss": 4.2578, + "step": 358900 + }, + { + "epoch": 3.0901342975206614, + "grad_norm": 4.59375, + "learning_rate": 6.395808452279076e-05, + "loss": 4.4511, + "step": 358950 + }, + { + "epoch": 3.090564738292011, + "grad_norm": 5.21875, + "learning_rate": 6.393281465193379e-05, + "loss": 4.031, + "step": 359000 + }, + { + "epoch": 3.090995179063361, + "grad_norm": 2.984375, + "learning_rate": 6.390754742833066e-05, + "loss": 4.3142, + "step": 359050 + }, + { + "epoch": 3.0914256198347108, + "grad_norm": 4.21875, + "learning_rate": 6.388228285383588e-05, + "loss": 4.2652, + "step": 359100 + }, + { + "epoch": 3.0918560606060606, + "grad_norm": 4.09375, + "learning_rate": 6.385702093030383e-05, + "loss": 4.68, + "step": 359150 + }, + { + "epoch": 3.0922865013774103, + "grad_norm": 2.96875, + "learning_rate": 6.383176165958872e-05, + "loss": 4.2393, + "step": 359200 + }, + { + "epoch": 3.09271694214876, + "grad_norm": 4.65625, + "learning_rate": 6.380650504354444e-05, + "loss": 4.192, + "step": 359250 + }, + { + "epoch": 3.0931473829201104, + "grad_norm": 2.28125, + "learning_rate": 6.378125108402482e-05, + "loss": 4.1481, + "step": 359300 + }, + { + "epoch": 3.09357782369146, + "grad_norm": 1.3359375, + "learning_rate": 6.375599978288345e-05, + "loss": 4.484, + "step": 359350 + }, + { + "epoch": 3.09400826446281, + "grad_norm": 3.1875, + "learning_rate": 6.373075114197371e-05, + "loss": 4.7217, + "step": 359400 + }, + { + "epoch": 3.0944387052341598, + "grad_norm": 2.390625, + "learning_rate": 6.37055051631488e-05, + "loss": 4.199, + "step": 359450 + }, + { + "epoch": 3.0948691460055096, + "grad_norm": 3.671875, + "learning_rate": 6.368026184826174e-05, + "loss": 4.1352, + "step": 359500 + }, + { + "epoch": 3.0952995867768593, + "grad_norm": 2.921875, + "learning_rate": 6.365502119916532e-05, + "loss": 4.5603, + "step": 359550 + }, + { + "epoch": 3.0957300275482096, + "grad_norm": 2.296875, + "learning_rate": 6.362978321771212e-05, + "loss": 4.1977, + "step": 359600 + }, + { + "epoch": 3.0961604683195594, + "grad_norm": 5.34375, + "learning_rate": 6.36045479057546e-05, + "loss": 4.4953, + "step": 359650 + }, + { + "epoch": 3.096590909090909, + "grad_norm": 3.0, + "learning_rate": 6.357931526514493e-05, + "loss": 4.2018, + "step": 359700 + }, + { + "epoch": 3.097021349862259, + "grad_norm": 0.828125, + "learning_rate": 6.35540852977352e-05, + "loss": 4.3821, + "step": 359750 + }, + { + "epoch": 3.0974517906336088, + "grad_norm": 4.4375, + "learning_rate": 6.352885800537716e-05, + "loss": 4.1022, + "step": 359800 + }, + { + "epoch": 3.0978822314049586, + "grad_norm": 2.375, + "learning_rate": 6.350363338992249e-05, + "loss": 4.3221, + "step": 359850 + }, + { + "epoch": 3.0983126721763083, + "grad_norm": 3.4375, + "learning_rate": 6.347841145322259e-05, + "loss": 4.2567, + "step": 359900 + }, + { + "epoch": 3.0987431129476586, + "grad_norm": 3.765625, + "learning_rate": 6.345319219712871e-05, + "loss": 4.1406, + "step": 359950 + }, + { + "epoch": 3.0991735537190084, + "grad_norm": 4.5, + "learning_rate": 6.342797562349192e-05, + "loss": 4.3052, + "step": 360000 + }, + { + "epoch": 3.0991735537190084, + "eval_loss": 4.993627071380615, + "eval_runtime": 23.8195, + "eval_samples_per_second": 26.869, + "eval_steps_per_second": 13.434, + "eval_tts_loss": 7.470391318419026, + "step": 360000 + }, + { + "epoch": 3.099603994490358, + "grad_norm": 5.15625, + "learning_rate": 6.340276173416302e-05, + "loss": 4.6036, + "step": 360050 + }, + { + "epoch": 3.100034435261708, + "grad_norm": 2.40625, + "learning_rate": 6.337755053099263e-05, + "loss": 4.3408, + "step": 360100 + }, + { + "epoch": 3.1004648760330578, + "grad_norm": 4.5625, + "learning_rate": 6.335234201583126e-05, + "loss": 4.4338, + "step": 360150 + }, + { + "epoch": 3.1008953168044076, + "grad_norm": 3.296875, + "learning_rate": 6.332713619052915e-05, + "loss": 4.3446, + "step": 360200 + }, + { + "epoch": 3.101325757575758, + "grad_norm": 3.953125, + "learning_rate": 6.330193305693637e-05, + "loss": 4.3714, + "step": 360250 + }, + { + "epoch": 3.1017561983471076, + "grad_norm": 2.421875, + "learning_rate": 6.327673261690267e-05, + "loss": 4.2715, + "step": 360300 + }, + { + "epoch": 3.1021866391184574, + "grad_norm": 5.90625, + "learning_rate": 6.325153487227782e-05, + "loss": 4.1991, + "step": 360350 + }, + { + "epoch": 3.102617079889807, + "grad_norm": 2.609375, + "learning_rate": 6.322633982491126e-05, + "loss": 4.2997, + "step": 360400 + }, + { + "epoch": 3.103047520661157, + "grad_norm": 2.484375, + "learning_rate": 6.32011474766522e-05, + "loss": 4.2015, + "step": 360450 + }, + { + "epoch": 3.1034779614325068, + "grad_norm": 1.96875, + "learning_rate": 6.31759578293498e-05, + "loss": 4.2898, + "step": 360500 + }, + { + "epoch": 3.1039084022038566, + "grad_norm": 3.421875, + "learning_rate": 6.315077088485282e-05, + "loss": 4.2906, + "step": 360550 + }, + { + "epoch": 3.104338842975207, + "grad_norm": 2.171875, + "learning_rate": 6.312558664500998e-05, + "loss": 4.2858, + "step": 360600 + }, + { + "epoch": 3.1047692837465566, + "grad_norm": 3.1875, + "learning_rate": 6.310040511166973e-05, + "loss": 4.3682, + "step": 360650 + }, + { + "epoch": 3.1051997245179064, + "grad_norm": 3.140625, + "learning_rate": 6.307522628668038e-05, + "loss": 4.65, + "step": 360700 + }, + { + "epoch": 3.105630165289256, + "grad_norm": 3.25, + "learning_rate": 6.305005017188999e-05, + "loss": 4.1018, + "step": 360750 + }, + { + "epoch": 3.106060606060606, + "grad_norm": 3.328125, + "learning_rate": 6.302487676914636e-05, + "loss": 4.1349, + "step": 360800 + }, + { + "epoch": 3.1064910468319558, + "grad_norm": 1.6953125, + "learning_rate": 6.299970608029726e-05, + "loss": 4.2589, + "step": 360850 + }, + { + "epoch": 3.106921487603306, + "grad_norm": 3.640625, + "learning_rate": 6.297453810719011e-05, + "loss": 3.9939, + "step": 360900 + }, + { + "epoch": 3.107351928374656, + "grad_norm": 2.8125, + "learning_rate": 6.29493728516722e-05, + "loss": 4.3162, + "step": 360950 + }, + { + "epoch": 3.1077823691460056, + "grad_norm": 2.859375, + "learning_rate": 6.292421031559064e-05, + "loss": 4.274, + "step": 361000 + }, + { + "epoch": 3.1082128099173554, + "grad_norm": 4.28125, + "learning_rate": 6.289905050079224e-05, + "loss": 4.4353, + "step": 361050 + }, + { + "epoch": 3.108643250688705, + "grad_norm": 1.9375, + "learning_rate": 6.287389340912372e-05, + "loss": 3.9579, + "step": 361100 + }, + { + "epoch": 3.109073691460055, + "grad_norm": 3.859375, + "learning_rate": 6.284873904243152e-05, + "loss": 4.4099, + "step": 361150 + }, + { + "epoch": 3.1095041322314048, + "grad_norm": 2.109375, + "learning_rate": 6.282358740256198e-05, + "loss": 4.3133, + "step": 361200 + }, + { + "epoch": 3.109934573002755, + "grad_norm": 4.65625, + "learning_rate": 6.27984384913611e-05, + "loss": 4.071, + "step": 361250 + }, + { + "epoch": 3.110365013774105, + "grad_norm": 2.828125, + "learning_rate": 6.277329231067484e-05, + "loss": 4.1805, + "step": 361300 + }, + { + "epoch": 3.1107954545454546, + "grad_norm": 3.96875, + "learning_rate": 6.274814886234879e-05, + "loss": 4.2217, + "step": 361350 + }, + { + "epoch": 3.1112258953168044, + "grad_norm": 4.84375, + "learning_rate": 6.272300814822847e-05, + "loss": 4.1471, + "step": 361400 + }, + { + "epoch": 3.111656336088154, + "grad_norm": 2.4375, + "learning_rate": 6.269787017015914e-05, + "loss": 4.2561, + "step": 361450 + }, + { + "epoch": 3.112086776859504, + "grad_norm": 4.75, + "learning_rate": 6.267273492998589e-05, + "loss": 4.0435, + "step": 361500 + }, + { + "epoch": 3.112517217630854, + "grad_norm": 1.3515625, + "learning_rate": 6.264760242955361e-05, + "loss": 4.0504, + "step": 361550 + }, + { + "epoch": 3.112947658402204, + "grad_norm": 3.140625, + "learning_rate": 6.262247267070693e-05, + "loss": 4.0465, + "step": 361600 + }, + { + "epoch": 3.113378099173554, + "grad_norm": 2.640625, + "learning_rate": 6.259734565529033e-05, + "loss": 4.3578, + "step": 361650 + }, + { + "epoch": 3.1138085399449036, + "grad_norm": 3.296875, + "learning_rate": 6.257222138514807e-05, + "loss": 4.3567, + "step": 361700 + }, + { + "epoch": 3.1142389807162534, + "grad_norm": 3.40625, + "learning_rate": 6.254709986212426e-05, + "loss": 4.8075, + "step": 361750 + }, + { + "epoch": 3.114669421487603, + "grad_norm": 2.40625, + "learning_rate": 6.252198108806275e-05, + "loss": 4.2787, + "step": 361800 + }, + { + "epoch": 3.115099862258953, + "grad_norm": 4.125, + "learning_rate": 6.249686506480716e-05, + "loss": 4.3385, + "step": 361850 + }, + { + "epoch": 3.115530303030303, + "grad_norm": 3.28125, + "learning_rate": 6.2471751794201e-05, + "loss": 4.6175, + "step": 361900 + }, + { + "epoch": 3.115960743801653, + "grad_norm": 1.484375, + "learning_rate": 6.244664127808751e-05, + "loss": 4.1945, + "step": 361950 + }, + { + "epoch": 3.116391184573003, + "grad_norm": 3.640625, + "learning_rate": 6.242153351830972e-05, + "loss": 4.0884, + "step": 362000 + }, + { + "epoch": 3.1168216253443526, + "grad_norm": 3.734375, + "learning_rate": 6.239642851671058e-05, + "loss": 4.5337, + "step": 362050 + }, + { + "epoch": 3.1172520661157024, + "grad_norm": 4.5, + "learning_rate": 6.237132627513265e-05, + "loss": 4.1964, + "step": 362100 + }, + { + "epoch": 3.117682506887052, + "grad_norm": 3.453125, + "learning_rate": 6.23462267954184e-05, + "loss": 4.3932, + "step": 362150 + }, + { + "epoch": 3.1181129476584024, + "grad_norm": 2.265625, + "learning_rate": 6.232113007941008e-05, + "loss": 4.3091, + "step": 362200 + }, + { + "epoch": 3.118543388429752, + "grad_norm": 1.6953125, + "learning_rate": 6.229603612894978e-05, + "loss": 4.2519, + "step": 362250 + }, + { + "epoch": 3.118973829201102, + "grad_norm": 3.546875, + "learning_rate": 6.22709449458793e-05, + "loss": 4.1282, + "step": 362300 + }, + { + "epoch": 3.119404269972452, + "grad_norm": 1.296875, + "learning_rate": 6.224585653204027e-05, + "loss": 4.364, + "step": 362350 + }, + { + "epoch": 3.1198347107438016, + "grad_norm": 3.53125, + "learning_rate": 6.222077088927414e-05, + "loss": 4.387, + "step": 362400 + }, + { + "epoch": 3.1202651515151514, + "grad_norm": 2.71875, + "learning_rate": 6.219568801942215e-05, + "loss": 4.174, + "step": 362450 + }, + { + "epoch": 3.120695592286501, + "grad_norm": 1.65625, + "learning_rate": 6.21706079243253e-05, + "loss": 4.1107, + "step": 362500 + }, + { + "epoch": 3.1211260330578514, + "grad_norm": 4.1875, + "learning_rate": 6.214553060582449e-05, + "loss": 4.327, + "step": 362550 + }, + { + "epoch": 3.121556473829201, + "grad_norm": 4.375, + "learning_rate": 6.212045606576023e-05, + "loss": 4.2824, + "step": 362600 + }, + { + "epoch": 3.121986914600551, + "grad_norm": 2.5625, + "learning_rate": 6.209538430597303e-05, + "loss": 4.3991, + "step": 362650 + }, + { + "epoch": 3.122417355371901, + "grad_norm": 1.7578125, + "learning_rate": 6.207031532830303e-05, + "loss": 4.3649, + "step": 362700 + }, + { + "epoch": 3.1228477961432506, + "grad_norm": 2.78125, + "learning_rate": 6.204524913459031e-05, + "loss": 3.7126, + "step": 362750 + }, + { + "epoch": 3.1232782369146004, + "grad_norm": 2.859375, + "learning_rate": 6.202018572667467e-05, + "loss": 3.9808, + "step": 362800 + }, + { + "epoch": 3.1237086776859506, + "grad_norm": 3.96875, + "learning_rate": 6.199512510639563e-05, + "loss": 4.2064, + "step": 362850 + }, + { + "epoch": 3.1241391184573004, + "grad_norm": 3.453125, + "learning_rate": 6.197006727559264e-05, + "loss": 3.9251, + "step": 362900 + }, + { + "epoch": 3.12456955922865, + "grad_norm": 3.09375, + "learning_rate": 6.19450122361049e-05, + "loss": 4.34, + "step": 362950 + }, + { + "epoch": 3.125, + "grad_norm": 4.6875, + "learning_rate": 6.191995998977138e-05, + "loss": 4.1998, + "step": 363000 + }, + { + "epoch": 3.125, + "eval_loss": 4.995603561401367, + "eval_runtime": 24.2612, + "eval_samples_per_second": 26.38, + "eval_steps_per_second": 13.19, + "eval_tts_loss": 7.500190487637266, + "step": 363000 + }, + { + "epoch": 3.12543044077135, + "grad_norm": 6.71875, + "learning_rate": 6.189491053843086e-05, + "loss": 4.439, + "step": 363050 + }, + { + "epoch": 3.1258608815426996, + "grad_norm": 1.625, + "learning_rate": 6.186986388392196e-05, + "loss": 4.4058, + "step": 363100 + }, + { + "epoch": 3.1262913223140494, + "grad_norm": 2.4375, + "learning_rate": 6.184482002808298e-05, + "loss": 4.3491, + "step": 363150 + }, + { + "epoch": 3.1267217630853996, + "grad_norm": 2.0625, + "learning_rate": 6.18197789727521e-05, + "loss": 4.4404, + "step": 363200 + }, + { + "epoch": 3.1271522038567494, + "grad_norm": 2.375, + "learning_rate": 6.179474071976734e-05, + "loss": 4.2152, + "step": 363250 + }, + { + "epoch": 3.127582644628099, + "grad_norm": 5.25, + "learning_rate": 6.176970527096637e-05, + "loss": 4.2062, + "step": 363300 + }, + { + "epoch": 3.128013085399449, + "grad_norm": 3.578125, + "learning_rate": 6.174467262818683e-05, + "loss": 4.0699, + "step": 363350 + }, + { + "epoch": 3.128443526170799, + "grad_norm": 3.140625, + "learning_rate": 6.171964279326596e-05, + "loss": 4.5137, + "step": 363400 + }, + { + "epoch": 3.1288739669421486, + "grad_norm": 4.28125, + "learning_rate": 6.169461576804098e-05, + "loss": 4.0273, + "step": 363450 + }, + { + "epoch": 3.129304407713499, + "grad_norm": 3.640625, + "learning_rate": 6.166959155434875e-05, + "loss": 4.1074, + "step": 363500 + }, + { + "epoch": 3.1297348484848486, + "grad_norm": 3.125, + "learning_rate": 6.164457015402604e-05, + "loss": 4.1651, + "step": 363550 + }, + { + "epoch": 3.1301652892561984, + "grad_norm": 3.359375, + "learning_rate": 6.16195515689094e-05, + "loss": 4.1972, + "step": 363600 + }, + { + "epoch": 3.130595730027548, + "grad_norm": 6.1875, + "learning_rate": 6.159453580083508e-05, + "loss": 4.3394, + "step": 363650 + }, + { + "epoch": 3.131026170798898, + "grad_norm": 4.0625, + "learning_rate": 6.156952285163915e-05, + "loss": 4.2125, + "step": 363700 + }, + { + "epoch": 3.131456611570248, + "grad_norm": 3.109375, + "learning_rate": 6.154451272315758e-05, + "loss": 4.0876, + "step": 363750 + }, + { + "epoch": 3.1318870523415976, + "grad_norm": 3.59375, + "learning_rate": 6.151950541722606e-05, + "loss": 4.4389, + "step": 363800 + }, + { + "epoch": 3.132317493112948, + "grad_norm": 3.640625, + "learning_rate": 6.149450093568008e-05, + "loss": 4.5061, + "step": 363850 + }, + { + "epoch": 3.1327479338842976, + "grad_norm": 2.34375, + "learning_rate": 6.146949928035482e-05, + "loss": 4.5249, + "step": 363900 + }, + { + "epoch": 3.1331783746556474, + "grad_norm": 1.75, + "learning_rate": 6.144450045308543e-05, + "loss": 4.3586, + "step": 363950 + }, + { + "epoch": 3.133608815426997, + "grad_norm": 3.96875, + "learning_rate": 6.141950445570677e-05, + "loss": 4.3803, + "step": 364000 + }, + { + "epoch": 3.134039256198347, + "grad_norm": 3.40625, + "learning_rate": 6.139451129005344e-05, + "loss": 4.4974, + "step": 364050 + }, + { + "epoch": 3.134469696969697, + "grad_norm": 4.28125, + "learning_rate": 6.136952095795997e-05, + "loss": 4.6624, + "step": 364100 + }, + { + "epoch": 3.134900137741047, + "grad_norm": 1.21875, + "learning_rate": 6.134453346126052e-05, + "loss": 4.1538, + "step": 364150 + }, + { + "epoch": 3.135330578512397, + "grad_norm": 1.9453125, + "learning_rate": 6.131954880178914e-05, + "loss": 4.0171, + "step": 364200 + }, + { + "epoch": 3.1357610192837466, + "grad_norm": 1.65625, + "learning_rate": 6.129456698137964e-05, + "loss": 4.448, + "step": 364250 + }, + { + "epoch": 3.1361914600550964, + "grad_norm": 3.578125, + "learning_rate": 6.126958800186568e-05, + "loss": 4.4268, + "step": 364300 + }, + { + "epoch": 3.136621900826446, + "grad_norm": 3.40625, + "learning_rate": 6.124461186508064e-05, + "loss": 4.1488, + "step": 364350 + }, + { + "epoch": 3.137052341597796, + "grad_norm": 1.9921875, + "learning_rate": 6.121963857285766e-05, + "loss": 4.3665, + "step": 364400 + }, + { + "epoch": 3.137482782369146, + "grad_norm": 2.734375, + "learning_rate": 6.119466812702977e-05, + "loss": 4.2319, + "step": 364450 + }, + { + "epoch": 3.137913223140496, + "grad_norm": 4.8125, + "learning_rate": 6.116970052942979e-05, + "loss": 4.4639, + "step": 364500 + }, + { + "epoch": 3.138343663911846, + "grad_norm": 4.1875, + "learning_rate": 6.11447357818902e-05, + "loss": 4.3677, + "step": 364550 + }, + { + "epoch": 3.1387741046831956, + "grad_norm": 2.765625, + "learning_rate": 6.111977388624345e-05, + "loss": 4.3788, + "step": 364600 + }, + { + "epoch": 3.1392045454545454, + "grad_norm": 3.640625, + "learning_rate": 6.109481484432162e-05, + "loss": 4.5474, + "step": 364650 + }, + { + "epoch": 3.139634986225895, + "grad_norm": 4.53125, + "learning_rate": 6.106985865795668e-05, + "loss": 4.372, + "step": 364700 + }, + { + "epoch": 3.140065426997245, + "grad_norm": 2.546875, + "learning_rate": 6.104490532898033e-05, + "loss": 4.2874, + "step": 364750 + }, + { + "epoch": 3.1404958677685952, + "grad_norm": 1.9921875, + "learning_rate": 6.1019954859224146e-05, + "loss": 3.8088, + "step": 364800 + }, + { + "epoch": 3.140926308539945, + "grad_norm": 2.90625, + "learning_rate": 6.0995007250519386e-05, + "loss": 4.1065, + "step": 364850 + }, + { + "epoch": 3.141356749311295, + "grad_norm": 3.015625, + "learning_rate": 6.097006250469721e-05, + "loss": 4.6339, + "step": 364900 + }, + { + "epoch": 3.1417871900826446, + "grad_norm": 3.390625, + "learning_rate": 6.094512062358842e-05, + "loss": 4.1012, + "step": 364950 + }, + { + "epoch": 3.1422176308539944, + "grad_norm": 2.875, + "learning_rate": 6.0920181609023785e-05, + "loss": 4.5716, + "step": 365000 + }, + { + "epoch": 3.142648071625344, + "grad_norm": 3.765625, + "learning_rate": 6.089524546283372e-05, + "loss": 4.461, + "step": 365050 + }, + { + "epoch": 3.143078512396694, + "grad_norm": 3.40625, + "learning_rate": 6.087031218684849e-05, + "loss": 4.3748, + "step": 365100 + }, + { + "epoch": 3.1435089531680442, + "grad_norm": 2.84375, + "learning_rate": 6.0845381782898194e-05, + "loss": 4.2182, + "step": 365150 + }, + { + "epoch": 3.143939393939394, + "grad_norm": 2.1875, + "learning_rate": 6.082045425281262e-05, + "loss": 4.5994, + "step": 365200 + }, + { + "epoch": 3.144369834710744, + "grad_norm": 3.921875, + "learning_rate": 6.0795529598421374e-05, + "loss": 4.3806, + "step": 365250 + }, + { + "epoch": 3.1448002754820936, + "grad_norm": 3.328125, + "learning_rate": 6.0770607821553915e-05, + "loss": 4.0178, + "step": 365300 + }, + { + "epoch": 3.1452307162534434, + "grad_norm": 4.15625, + "learning_rate": 6.0745688924039454e-05, + "loss": 4.3046, + "step": 365350 + }, + { + "epoch": 3.145661157024793, + "grad_norm": 3.734375, + "learning_rate": 6.072077290770699e-05, + "loss": 4.3052, + "step": 365400 + }, + { + "epoch": 3.1460915977961434, + "grad_norm": 2.96875, + "learning_rate": 6.069585977438525e-05, + "loss": 4.1699, + "step": 365450 + }, + { + "epoch": 3.1465220385674932, + "grad_norm": 2.796875, + "learning_rate": 6.0670949525902844e-05, + "loss": 4.5027, + "step": 365500 + }, + { + "epoch": 3.146952479338843, + "grad_norm": 2.515625, + "learning_rate": 6.0646042164088114e-05, + "loss": 4.3779, + "step": 365550 + }, + { + "epoch": 3.147382920110193, + "grad_norm": 3.8125, + "learning_rate": 6.062113769076923e-05, + "loss": 3.7515, + "step": 365600 + }, + { + "epoch": 3.1478133608815426, + "grad_norm": 3.328125, + "learning_rate": 6.059623610777412e-05, + "loss": 4.2055, + "step": 365650 + }, + { + "epoch": 3.1482438016528924, + "grad_norm": 3.15625, + "learning_rate": 6.057133741693051e-05, + "loss": 4.1914, + "step": 365700 + }, + { + "epoch": 3.148674242424242, + "grad_norm": 2.796875, + "learning_rate": 6.054644162006586e-05, + "loss": 4.3808, + "step": 365750 + }, + { + "epoch": 3.1491046831955924, + "grad_norm": 2.71875, + "learning_rate": 6.052154871900753e-05, + "loss": 4.1459, + "step": 365800 + }, + { + "epoch": 3.1495351239669422, + "grad_norm": 3.28125, + "learning_rate": 6.049665871558259e-05, + "loss": 3.9858, + "step": 365850 + }, + { + "epoch": 3.149965564738292, + "grad_norm": 5.0625, + "learning_rate": 6.047177161161792e-05, + "loss": 4.5558, + "step": 365900 + }, + { + "epoch": 3.150396005509642, + "grad_norm": 2.71875, + "learning_rate": 6.044688740894013e-05, + "loss": 4.4139, + "step": 365950 + }, + { + "epoch": 3.1508264462809916, + "grad_norm": 4.25, + "learning_rate": 6.042200610937572e-05, + "loss": 4.3586, + "step": 366000 + }, + { + "epoch": 3.1508264462809916, + "eval_loss": 4.995471000671387, + "eval_runtime": 24.3469, + "eval_samples_per_second": 26.287, + "eval_steps_per_second": 13.143, + "eval_tts_loss": 7.496336504952824, + "step": 366000 + }, + { + "epoch": 3.1512568870523414, + "grad_norm": 3.203125, + "learning_rate": 6.03971277147509e-05, + "loss": 4.6061, + "step": 366050 + }, + { + "epoch": 3.1516873278236917, + "grad_norm": 1.9375, + "learning_rate": 6.03722522268917e-05, + "loss": 4.4332, + "step": 366100 + }, + { + "epoch": 3.1521177685950414, + "grad_norm": 2.65625, + "learning_rate": 6.0347379647623956e-05, + "loss": 4.5218, + "step": 366150 + }, + { + "epoch": 3.1525482093663912, + "grad_norm": 3.5625, + "learning_rate": 6.03225099787732e-05, + "loss": 4.3174, + "step": 366200 + }, + { + "epoch": 3.152978650137741, + "grad_norm": 2.90625, + "learning_rate": 6.029764322216486e-05, + "loss": 4.2855, + "step": 366250 + }, + { + "epoch": 3.153409090909091, + "grad_norm": 6.15625, + "learning_rate": 6.027277937962406e-05, + "loss": 4.3564, + "step": 366300 + }, + { + "epoch": 3.1538395316804406, + "grad_norm": 3.484375, + "learning_rate": 6.024791845297579e-05, + "loss": 4.6584, + "step": 366350 + }, + { + "epoch": 3.1542699724517904, + "grad_norm": 1.328125, + "learning_rate": 6.022306044404482e-05, + "loss": 4.0403, + "step": 366400 + }, + { + "epoch": 3.1547004132231407, + "grad_norm": 5.3125, + "learning_rate": 6.019820535465557e-05, + "loss": 4.1525, + "step": 366450 + }, + { + "epoch": 3.1551308539944904, + "grad_norm": 4.78125, + "learning_rate": 6.017335318663242e-05, + "loss": 4.5943, + "step": 366500 + }, + { + "epoch": 3.1555612947658402, + "grad_norm": 4.25, + "learning_rate": 6.0148503941799485e-05, + "loss": 4.0463, + "step": 366550 + }, + { + "epoch": 3.15599173553719, + "grad_norm": 4.375, + "learning_rate": 6.012365762198059e-05, + "loss": 4.2938, + "step": 366600 + }, + { + "epoch": 3.15642217630854, + "grad_norm": 5.65625, + "learning_rate": 6.009881422899942e-05, + "loss": 4.6583, + "step": 366650 + }, + { + "epoch": 3.1568526170798896, + "grad_norm": 3.109375, + "learning_rate": 6.007397376467949e-05, + "loss": 4.3703, + "step": 366700 + }, + { + "epoch": 3.15728305785124, + "grad_norm": 7.96875, + "learning_rate": 6.004913623084396e-05, + "loss": 4.2425, + "step": 366750 + }, + { + "epoch": 3.1577134986225897, + "grad_norm": 3.671875, + "learning_rate": 6.002430162931585e-05, + "loss": 4.2259, + "step": 366800 + }, + { + "epoch": 3.1581439393939394, + "grad_norm": 2.515625, + "learning_rate": 5.9999469961917987e-05, + "loss": 4.1448, + "step": 366850 + }, + { + "epoch": 3.1585743801652892, + "grad_norm": 4.90625, + "learning_rate": 5.9974641230472995e-05, + "loss": 4.3983, + "step": 366900 + }, + { + "epoch": 3.159004820936639, + "grad_norm": 4.78125, + "learning_rate": 5.994981543680324e-05, + "loss": 4.5295, + "step": 366950 + }, + { + "epoch": 3.159435261707989, + "grad_norm": 3.0, + "learning_rate": 5.992499258273081e-05, + "loss": 4.1152, + "step": 367000 + }, + { + "epoch": 3.1598657024793386, + "grad_norm": 3.109375, + "learning_rate": 5.9900172670077725e-05, + "loss": 4.1262, + "step": 367050 + }, + { + "epoch": 3.160296143250689, + "grad_norm": 1.125, + "learning_rate": 5.9875355700665684e-05, + "loss": 4.0769, + "step": 367100 + }, + { + "epoch": 3.1607265840220387, + "grad_norm": 0.953125, + "learning_rate": 5.9850541676316185e-05, + "loss": 4.2557, + "step": 367150 + }, + { + "epoch": 3.1611570247933884, + "grad_norm": 1.8828125, + "learning_rate": 5.9825730598850594e-05, + "loss": 4.3774, + "step": 367200 + }, + { + "epoch": 3.1615874655647382, + "grad_norm": 2.8125, + "learning_rate": 5.9800922470089915e-05, + "loss": 4.6077, + "step": 367250 + }, + { + "epoch": 3.162017906336088, + "grad_norm": 3.953125, + "learning_rate": 5.977611729185502e-05, + "loss": 4.4091, + "step": 367300 + }, + { + "epoch": 3.162448347107438, + "grad_norm": 2.90625, + "learning_rate": 5.975131506596656e-05, + "loss": 4.4654, + "step": 367350 + }, + { + "epoch": 3.162878787878788, + "grad_norm": 4.25, + "learning_rate": 5.972651579424502e-05, + "loss": 4.5842, + "step": 367400 + }, + { + "epoch": 3.163309228650138, + "grad_norm": 1.3984375, + "learning_rate": 5.970171947851058e-05, + "loss": 4.3589, + "step": 367450 + }, + { + "epoch": 3.1637396694214877, + "grad_norm": 2.96875, + "learning_rate": 5.967692612058318e-05, + "loss": 4.1508, + "step": 367500 + }, + { + "epoch": 3.1641701101928374, + "grad_norm": 3.734375, + "learning_rate": 5.965213572228264e-05, + "loss": 4.2954, + "step": 367550 + }, + { + "epoch": 3.1646005509641872, + "grad_norm": 4.84375, + "learning_rate": 5.962734828542855e-05, + "loss": 4.2682, + "step": 367600 + }, + { + "epoch": 3.165030991735537, + "grad_norm": 2.3125, + "learning_rate": 5.9602563811840215e-05, + "loss": 4.1953, + "step": 367650 + }, + { + "epoch": 3.165461432506887, + "grad_norm": 4.1875, + "learning_rate": 5.957778230333684e-05, + "loss": 4.307, + "step": 367700 + }, + { + "epoch": 3.165891873278237, + "grad_norm": 2.703125, + "learning_rate": 5.955300376173722e-05, + "loss": 4.3906, + "step": 367750 + }, + { + "epoch": 3.166322314049587, + "grad_norm": 4.125, + "learning_rate": 5.9528228188860104e-05, + "loss": 4.0493, + "step": 367800 + }, + { + "epoch": 3.1667527548209367, + "grad_norm": 3.21875, + "learning_rate": 5.950345558652395e-05, + "loss": 4.3504, + "step": 367850 + }, + { + "epoch": 3.1671831955922864, + "grad_norm": 3.296875, + "learning_rate": 5.947868595654704e-05, + "loss": 4.2462, + "step": 367900 + }, + { + "epoch": 3.1676136363636362, + "grad_norm": 3.984375, + "learning_rate": 5.945391930074743e-05, + "loss": 4.0279, + "step": 367950 + }, + { + "epoch": 3.168044077134986, + "grad_norm": 1.828125, + "learning_rate": 5.9429155620942866e-05, + "loss": 4.1845, + "step": 368000 + }, + { + "epoch": 3.1684745179063363, + "grad_norm": 1.9453125, + "learning_rate": 5.940439491895098e-05, + "loss": 4.1649, + "step": 368050 + }, + { + "epoch": 3.168904958677686, + "grad_norm": 2.90625, + "learning_rate": 5.937963719658919e-05, + "loss": 4.2094, + "step": 368100 + }, + { + "epoch": 3.169335399449036, + "grad_norm": 2.46875, + "learning_rate": 5.93548824556746e-05, + "loss": 4.1752, + "step": 368150 + }, + { + "epoch": 3.1697658402203857, + "grad_norm": 4.125, + "learning_rate": 5.9330130698024244e-05, + "loss": 4.126, + "step": 368200 + }, + { + "epoch": 3.1701962809917354, + "grad_norm": 2.765625, + "learning_rate": 5.930538192545474e-05, + "loss": 4.194, + "step": 368250 + }, + { + "epoch": 3.1706267217630852, + "grad_norm": 5.34375, + "learning_rate": 5.928063613978268e-05, + "loss": 4.2881, + "step": 368300 + }, + { + "epoch": 3.171057162534435, + "grad_norm": 4.03125, + "learning_rate": 5.9255893342824284e-05, + "loss": 4.4968, + "step": 368350 + }, + { + "epoch": 3.1714876033057853, + "grad_norm": 3.640625, + "learning_rate": 5.923115353639568e-05, + "loss": 4.2541, + "step": 368400 + }, + { + "epoch": 3.171918044077135, + "grad_norm": 3.109375, + "learning_rate": 5.9206416722312685e-05, + "loss": 4.1264, + "step": 368450 + }, + { + "epoch": 3.172348484848485, + "grad_norm": 2.046875, + "learning_rate": 5.9181682902390935e-05, + "loss": 4.5032, + "step": 368500 + }, + { + "epoch": 3.1727789256198347, + "grad_norm": 3.078125, + "learning_rate": 5.915695207844581e-05, + "loss": 4.1226, + "step": 368550 + }, + { + "epoch": 3.1732093663911844, + "grad_norm": 3.328125, + "learning_rate": 5.913222425229255e-05, + "loss": 4.424, + "step": 368600 + }, + { + "epoch": 3.1736398071625342, + "grad_norm": 2.5, + "learning_rate": 5.9107499425746095e-05, + "loss": 4.3525, + "step": 368650 + }, + { + "epoch": 3.1740702479338845, + "grad_norm": 3.171875, + "learning_rate": 5.908277760062118e-05, + "loss": 4.4041, + "step": 368700 + }, + { + "epoch": 3.1745006887052343, + "grad_norm": 4.4375, + "learning_rate": 5.9058058778732406e-05, + "loss": 4.3873, + "step": 368750 + }, + { + "epoch": 3.174931129476584, + "grad_norm": 3.84375, + "learning_rate": 5.9033342961894e-05, + "loss": 3.9546, + "step": 368800 + }, + { + "epoch": 3.175361570247934, + "grad_norm": 1.1484375, + "learning_rate": 5.900863015192007e-05, + "loss": 4.5598, + "step": 368850 + }, + { + "epoch": 3.1757920110192837, + "grad_norm": 4.53125, + "learning_rate": 5.8983920350624475e-05, + "loss": 4.0618, + "step": 368900 + }, + { + "epoch": 3.1762224517906334, + "grad_norm": 3.75, + "learning_rate": 5.8959213559820926e-05, + "loss": 4.2138, + "step": 368950 + }, + { + "epoch": 3.1766528925619832, + "grad_norm": 6.0, + "learning_rate": 5.89345097813228e-05, + "loss": 4.3419, + "step": 369000 + }, + { + "epoch": 3.1766528925619832, + "eval_loss": 4.995718955993652, + "eval_runtime": 24.0677, + "eval_samples_per_second": 26.592, + "eval_steps_per_second": 13.296, + "eval_tts_loss": 7.504618805636363, + "step": 369000 + }, + { + "epoch": 3.1770833333333335, + "grad_norm": 2.734375, + "learning_rate": 5.890980901694325e-05, + "loss": 4.319, + "step": 369050 + }, + { + "epoch": 3.1775137741046833, + "grad_norm": 2.265625, + "learning_rate": 5.888511126849535e-05, + "loss": 4.25, + "step": 369100 + }, + { + "epoch": 3.177944214876033, + "grad_norm": 2.734375, + "learning_rate": 5.8860416537791796e-05, + "loss": 4.1594, + "step": 369150 + }, + { + "epoch": 3.178374655647383, + "grad_norm": 2.8125, + "learning_rate": 5.883572482664516e-05, + "loss": 4.4726, + "step": 369200 + }, + { + "epoch": 3.1788050964187327, + "grad_norm": 3.5, + "learning_rate": 5.881103613686778e-05, + "loss": 4.3734, + "step": 369250 + }, + { + "epoch": 3.1792355371900825, + "grad_norm": 3.5625, + "learning_rate": 5.878635047027172e-05, + "loss": 4.4663, + "step": 369300 + }, + { + "epoch": 3.1796659779614327, + "grad_norm": 4.4375, + "learning_rate": 5.876166782866883e-05, + "loss": 4.1323, + "step": 369350 + }, + { + "epoch": 3.1800964187327825, + "grad_norm": 4.375, + "learning_rate": 5.8736988213870816e-05, + "loss": 4.2094, + "step": 369400 + }, + { + "epoch": 3.1805268595041323, + "grad_norm": 3.828125, + "learning_rate": 5.871231162768909e-05, + "loss": 4.7159, + "step": 369450 + }, + { + "epoch": 3.180957300275482, + "grad_norm": 4.84375, + "learning_rate": 5.8687638071934884e-05, + "loss": 4.0699, + "step": 369500 + }, + { + "epoch": 3.181387741046832, + "grad_norm": 3.046875, + "learning_rate": 5.866296754841911e-05, + "loss": 3.8833, + "step": 369550 + }, + { + "epoch": 3.1818181818181817, + "grad_norm": 3.4375, + "learning_rate": 5.863830005895259e-05, + "loss": 4.1504, + "step": 369600 + }, + { + "epoch": 3.1822486225895315, + "grad_norm": 4.4375, + "learning_rate": 5.8613635605345865e-05, + "loss": 4.7323, + "step": 369650 + }, + { + "epoch": 3.1826790633608817, + "grad_norm": 4.125, + "learning_rate": 5.858897418940923e-05, + "loss": 3.8436, + "step": 369700 + }, + { + "epoch": 3.1831095041322315, + "grad_norm": 1.2421875, + "learning_rate": 5.856431581295282e-05, + "loss": 3.9756, + "step": 369750 + }, + { + "epoch": 3.1835399449035813, + "grad_norm": 4.875, + "learning_rate": 5.853966047778645e-05, + "loss": 4.1581, + "step": 369800 + }, + { + "epoch": 3.183970385674931, + "grad_norm": 3.921875, + "learning_rate": 5.85150081857198e-05, + "loss": 4.2672, + "step": 369850 + }, + { + "epoch": 3.184400826446281, + "grad_norm": 3.578125, + "learning_rate": 5.8490358938562294e-05, + "loss": 4.1397, + "step": 369900 + }, + { + "epoch": 3.1848312672176307, + "grad_norm": 2.890625, + "learning_rate": 5.8465712738123136e-05, + "loss": 4.2818, + "step": 369950 + }, + { + "epoch": 3.185261707988981, + "grad_norm": 3.90625, + "learning_rate": 5.844106958621132e-05, + "loss": 4.1011, + "step": 370000 + }, + { + "epoch": 3.1856921487603307, + "grad_norm": 4.25, + "learning_rate": 5.841642948463555e-05, + "loss": 4.4151, + "step": 370050 + }, + { + "epoch": 3.1861225895316805, + "grad_norm": 7.1875, + "learning_rate": 5.839179243520439e-05, + "loss": 4.2047, + "step": 370100 + }, + { + "epoch": 3.1865530303030303, + "grad_norm": 2.90625, + "learning_rate": 5.8367158439726134e-05, + "loss": 4.4279, + "step": 370150 + }, + { + "epoch": 3.18698347107438, + "grad_norm": 4.21875, + "learning_rate": 5.83425275000089e-05, + "loss": 4.1264, + "step": 370200 + }, + { + "epoch": 3.18741391184573, + "grad_norm": 5.25, + "learning_rate": 5.8317899617860516e-05, + "loss": 4.0315, + "step": 370250 + }, + { + "epoch": 3.1878443526170797, + "grad_norm": 4.03125, + "learning_rate": 5.829327479508861e-05, + "loss": 4.2168, + "step": 370300 + }, + { + "epoch": 3.18827479338843, + "grad_norm": 1.5859375, + "learning_rate": 5.8268653033500564e-05, + "loss": 4.127, + "step": 370350 + }, + { + "epoch": 3.1887052341597797, + "grad_norm": 3.265625, + "learning_rate": 5.824403433490364e-05, + "loss": 4.2178, + "step": 370400 + }, + { + "epoch": 3.1891356749311295, + "grad_norm": 3.046875, + "learning_rate": 5.8219418701104745e-05, + "loss": 4.5114, + "step": 370450 + }, + { + "epoch": 3.1895661157024793, + "grad_norm": 3.796875, + "learning_rate": 5.819480613391059e-05, + "loss": 4.2566, + "step": 370500 + }, + { + "epoch": 3.189996556473829, + "grad_norm": 3.71875, + "learning_rate": 5.817019663512782e-05, + "loss": 4.6002, + "step": 370550 + }, + { + "epoch": 3.190426997245179, + "grad_norm": 1.8984375, + "learning_rate": 5.814559020656255e-05, + "loss": 4.8422, + "step": 370600 + }, + { + "epoch": 3.190857438016529, + "grad_norm": 5.46875, + "learning_rate": 5.8120986850020876e-05, + "loss": 4.3317, + "step": 370650 + }, + { + "epoch": 3.191287878787879, + "grad_norm": 5.25, + "learning_rate": 5.80963865673087e-05, + "loss": 4.0244, + "step": 370700 + }, + { + "epoch": 3.1917183195592287, + "grad_norm": 4.9375, + "learning_rate": 5.80717893602316e-05, + "loss": 4.6798, + "step": 370750 + }, + { + "epoch": 3.1921487603305785, + "grad_norm": 4.34375, + "learning_rate": 5.8047195230594944e-05, + "loss": 4.1681, + "step": 370800 + }, + { + "epoch": 3.1925792011019283, + "grad_norm": 2.984375, + "learning_rate": 5.802260418020389e-05, + "loss": 4.3521, + "step": 370850 + }, + { + "epoch": 3.193009641873278, + "grad_norm": 3.78125, + "learning_rate": 5.7998016210863336e-05, + "loss": 4.3071, + "step": 370900 + }, + { + "epoch": 3.193440082644628, + "grad_norm": 1.953125, + "learning_rate": 5.797343132437807e-05, + "loss": 4.3148, + "step": 370950 + }, + { + "epoch": 3.193870523415978, + "grad_norm": 6.15625, + "learning_rate": 5.7948849522552505e-05, + "loss": 4.2374, + "step": 371000 + }, + { + "epoch": 3.194300964187328, + "grad_norm": 1.9609375, + "learning_rate": 5.7924270807190925e-05, + "loss": 4.4721, + "step": 371050 + }, + { + "epoch": 3.1947314049586777, + "grad_norm": 1.4375, + "learning_rate": 5.7899695180097335e-05, + "loss": 4.2304, + "step": 371100 + }, + { + "epoch": 3.1951618457300275, + "grad_norm": 8.6875, + "learning_rate": 5.787512264307554e-05, + "loss": 4.1293, + "step": 371150 + }, + { + "epoch": 3.1955922865013773, + "grad_norm": 3.78125, + "learning_rate": 5.785055319792907e-05, + "loss": 4.3817, + "step": 371200 + }, + { + "epoch": 3.196022727272727, + "grad_norm": 2.890625, + "learning_rate": 5.782598684646134e-05, + "loss": 4.5016, + "step": 371250 + }, + { + "epoch": 3.1964531680440773, + "grad_norm": 4.21875, + "learning_rate": 5.780142359047549e-05, + "loss": 4.2858, + "step": 371300 + }, + { + "epoch": 3.196883608815427, + "grad_norm": 3.375, + "learning_rate": 5.7776863431774263e-05, + "loss": 4.2233, + "step": 371350 + }, + { + "epoch": 3.197314049586777, + "grad_norm": 4.78125, + "learning_rate": 5.775230637216045e-05, + "loss": 4.2313, + "step": 371400 + }, + { + "epoch": 3.1977444903581267, + "grad_norm": 3.828125, + "learning_rate": 5.7727752413436465e-05, + "loss": 3.9859, + "step": 371450 + }, + { + "epoch": 3.1981749311294765, + "grad_norm": 4.375, + "learning_rate": 5.7703201557404454e-05, + "loss": 4.2741, + "step": 371500 + }, + { + "epoch": 3.1986053719008263, + "grad_norm": 2.8125, + "learning_rate": 5.767865380586652e-05, + "loss": 4.0336, + "step": 371550 + }, + { + "epoch": 3.1990358126721765, + "grad_norm": 4.34375, + "learning_rate": 5.765410916062428e-05, + "loss": 4.1959, + "step": 371600 + }, + { + "epoch": 3.1994662534435263, + "grad_norm": 3.03125, + "learning_rate": 5.762956762347934e-05, + "loss": 4.3028, + "step": 371650 + }, + { + "epoch": 3.199896694214876, + "grad_norm": 2.90625, + "learning_rate": 5.7605029196232984e-05, + "loss": 4.0621, + "step": 371700 + }, + { + "epoch": 3.200327134986226, + "grad_norm": 3.890625, + "learning_rate": 5.758049388068626e-05, + "loss": 4.618, + "step": 371750 + }, + { + "epoch": 3.2007575757575757, + "grad_norm": 4.84375, + "learning_rate": 5.7555961678640015e-05, + "loss": 4.2074, + "step": 371800 + }, + { + "epoch": 3.2011880165289255, + "grad_norm": 1.6328125, + "learning_rate": 5.753143259189487e-05, + "loss": 4.3642, + "step": 371850 + }, + { + "epoch": 3.2016184573002753, + "grad_norm": 2.53125, + "learning_rate": 5.7506906622251165e-05, + "loss": 4.0858, + "step": 371900 + }, + { + "epoch": 3.2020488980716255, + "grad_norm": 5.03125, + "learning_rate": 5.7482383771509115e-05, + "loss": 4.495, + "step": 371950 + }, + { + "epoch": 3.2024793388429753, + "grad_norm": 3.65625, + "learning_rate": 5.7457864041468625e-05, + "loss": 4.0665, + "step": 372000 + }, + { + "epoch": 3.2024793388429753, + "eval_loss": 4.995144844055176, + "eval_runtime": 24.1419, + "eval_samples_per_second": 26.51, + "eval_steps_per_second": 13.255, + "eval_tts_loss": 7.490159339418232, + "step": 372000 + }, + { + "epoch": 3.202909779614325, + "grad_norm": 4.28125, + "learning_rate": 5.743334743392939e-05, + "loss": 4.3072, + "step": 372050 + }, + { + "epoch": 3.203340220385675, + "grad_norm": 3.375, + "learning_rate": 5.7408833950690856e-05, + "loss": 4.2735, + "step": 372100 + }, + { + "epoch": 3.2037706611570247, + "grad_norm": 1.4296875, + "learning_rate": 5.738432359355228e-05, + "loss": 4.3538, + "step": 372150 + }, + { + "epoch": 3.2042011019283745, + "grad_norm": 2.046875, + "learning_rate": 5.7359816364312614e-05, + "loss": 4.5132, + "step": 372200 + }, + { + "epoch": 3.2046315426997247, + "grad_norm": 2.859375, + "learning_rate": 5.7335312264770735e-05, + "loss": 4.2675, + "step": 372250 + }, + { + "epoch": 3.2050619834710745, + "grad_norm": 1.546875, + "learning_rate": 5.731081129672513e-05, + "loss": 4.345, + "step": 372300 + }, + { + "epoch": 3.2054924242424243, + "grad_norm": 3.953125, + "learning_rate": 5.728631346197413e-05, + "loss": 4.4702, + "step": 372350 + }, + { + "epoch": 3.205922865013774, + "grad_norm": 5.625, + "learning_rate": 5.726181876231582e-05, + "loss": 4.4556, + "step": 372400 + }, + { + "epoch": 3.206353305785124, + "grad_norm": 3.5625, + "learning_rate": 5.723732719954806e-05, + "loss": 4.4963, + "step": 372450 + }, + { + "epoch": 3.2067837465564737, + "grad_norm": 4.96875, + "learning_rate": 5.7212838775468434e-05, + "loss": 4.2145, + "step": 372500 + }, + { + "epoch": 3.2072141873278235, + "grad_norm": 3.109375, + "learning_rate": 5.718835349187444e-05, + "loss": 4.0102, + "step": 372550 + }, + { + "epoch": 3.2076446280991737, + "grad_norm": 4.0625, + "learning_rate": 5.716387135056318e-05, + "loss": 4.087, + "step": 372600 + }, + { + "epoch": 3.2080750688705235, + "grad_norm": 4.46875, + "learning_rate": 5.713939235333159e-05, + "loss": 4.5784, + "step": 372650 + }, + { + "epoch": 3.2085055096418733, + "grad_norm": 3.15625, + "learning_rate": 5.711491650197639e-05, + "loss": 4.6105, + "step": 372700 + }, + { + "epoch": 3.208935950413223, + "grad_norm": 3.390625, + "learning_rate": 5.7090443798294e-05, + "loss": 4.1451, + "step": 372750 + }, + { + "epoch": 3.209366391184573, + "grad_norm": 4.1875, + "learning_rate": 5.7065974244080764e-05, + "loss": 4.231, + "step": 372800 + }, + { + "epoch": 3.2097968319559227, + "grad_norm": 4.59375, + "learning_rate": 5.704150784113269e-05, + "loss": 4.4618, + "step": 372850 + }, + { + "epoch": 3.210227272727273, + "grad_norm": 2.90625, + "learning_rate": 5.7017044591245416e-05, + "loss": 4.5273, + "step": 372900 + }, + { + "epoch": 3.2106577134986227, + "grad_norm": 3.625, + "learning_rate": 5.699258449621464e-05, + "loss": 4.3227, + "step": 372950 + }, + { + "epoch": 3.2110881542699725, + "grad_norm": 2.828125, + "learning_rate": 5.6968127557835626e-05, + "loss": 3.9205, + "step": 373000 + }, + { + "epoch": 3.2115185950413223, + "grad_norm": 4.09375, + "learning_rate": 5.694367377790342e-05, + "loss": 4.5589, + "step": 373050 + }, + { + "epoch": 3.211949035812672, + "grad_norm": 1.765625, + "learning_rate": 5.691922315821301e-05, + "loss": 4.1797, + "step": 373100 + }, + { + "epoch": 3.212379476584022, + "grad_norm": 2.765625, + "learning_rate": 5.6894775700558875e-05, + "loss": 4.585, + "step": 373150 + }, + { + "epoch": 3.212809917355372, + "grad_norm": 4.34375, + "learning_rate": 5.687033140673543e-05, + "loss": 4.1926, + "step": 373200 + }, + { + "epoch": 3.213240358126722, + "grad_norm": 3.90625, + "learning_rate": 5.684589027853691e-05, + "loss": 4.228, + "step": 373250 + }, + { + "epoch": 3.2136707988980717, + "grad_norm": 1.7109375, + "learning_rate": 5.682145231775719e-05, + "loss": 4.4378, + "step": 373300 + }, + { + "epoch": 3.2141012396694215, + "grad_norm": 4.96875, + "learning_rate": 5.679701752618996e-05, + "loss": 4.3353, + "step": 373350 + }, + { + "epoch": 3.2145316804407713, + "grad_norm": 2.296875, + "learning_rate": 5.6772585905628706e-05, + "loss": 4.4708, + "step": 373400 + }, + { + "epoch": 3.214962121212121, + "grad_norm": 1.953125, + "learning_rate": 5.674815745786659e-05, + "loss": 4.0621, + "step": 373450 + }, + { + "epoch": 3.215392561983471, + "grad_norm": 5.15625, + "learning_rate": 5.672373218469669e-05, + "loss": 4.0959, + "step": 373500 + }, + { + "epoch": 3.215823002754821, + "grad_norm": 1.9609375, + "learning_rate": 5.669931008791175e-05, + "loss": 4.3164, + "step": 373550 + }, + { + "epoch": 3.216253443526171, + "grad_norm": 2.125, + "learning_rate": 5.667489116930428e-05, + "loss": 4.2294, + "step": 373600 + }, + { + "epoch": 3.2166838842975207, + "grad_norm": 3.484375, + "learning_rate": 5.6650475430666584e-05, + "loss": 4.1345, + "step": 373650 + }, + { + "epoch": 3.2171143250688705, + "grad_norm": 1.515625, + "learning_rate": 5.662606287379072e-05, + "loss": 3.9975, + "step": 373700 + }, + { + "epoch": 3.2175447658402203, + "grad_norm": 2.0, + "learning_rate": 5.660165350046849e-05, + "loss": 4.4641, + "step": 373750 + }, + { + "epoch": 3.21797520661157, + "grad_norm": 3.015625, + "learning_rate": 5.657724731249154e-05, + "loss": 4.359, + "step": 373800 + }, + { + "epoch": 3.2184056473829203, + "grad_norm": 2.921875, + "learning_rate": 5.655284431165122e-05, + "loss": 4.5078, + "step": 373850 + }, + { + "epoch": 3.21883608815427, + "grad_norm": 2.734375, + "learning_rate": 5.6528444499738644e-05, + "loss": 4.358, + "step": 373900 + }, + { + "epoch": 3.21926652892562, + "grad_norm": 2.28125, + "learning_rate": 5.6504047878544716e-05, + "loss": 4.0136, + "step": 373950 + }, + { + "epoch": 3.2196969696969697, + "grad_norm": 1.625, + "learning_rate": 5.64796544498601e-05, + "loss": 4.3448, + "step": 374000 + }, + { + "epoch": 3.2201274104683195, + "grad_norm": 4.65625, + "learning_rate": 5.6455264215475156e-05, + "loss": 4.0919, + "step": 374050 + }, + { + "epoch": 3.2205578512396693, + "grad_norm": 1.78125, + "learning_rate": 5.6430877177180176e-05, + "loss": 4.3081, + "step": 374100 + }, + { + "epoch": 3.220988292011019, + "grad_norm": 5.875, + "learning_rate": 5.6406493336765064e-05, + "loss": 3.9731, + "step": 374150 + }, + { + "epoch": 3.2214187327823693, + "grad_norm": 8.75, + "learning_rate": 5.638211269601955e-05, + "loss": 4.5018, + "step": 374200 + }, + { + "epoch": 3.221849173553719, + "grad_norm": 3.265625, + "learning_rate": 5.635773525673311e-05, + "loss": 4.6859, + "step": 374250 + }, + { + "epoch": 3.222279614325069, + "grad_norm": 2.28125, + "learning_rate": 5.633336102069497e-05, + "loss": 4.0108, + "step": 374300 + }, + { + "epoch": 3.2227100550964187, + "grad_norm": 3.453125, + "learning_rate": 5.630898998969423e-05, + "loss": 4.2525, + "step": 374350 + }, + { + "epoch": 3.2231404958677685, + "grad_norm": 4.0625, + "learning_rate": 5.6284622165519664e-05, + "loss": 4.1158, + "step": 374400 + }, + { + "epoch": 3.2235709366391183, + "grad_norm": 3.328125, + "learning_rate": 5.626025754995967e-05, + "loss": 4.3399, + "step": 374450 + }, + { + "epoch": 3.2240013774104685, + "grad_norm": 1.6875, + "learning_rate": 5.6235896144802726e-05, + "loss": 4.0313, + "step": 374500 + }, + { + "epoch": 3.2244318181818183, + "grad_norm": 2.21875, + "learning_rate": 5.621153795183683e-05, + "loss": 4.2005, + "step": 374550 + }, + { + "epoch": 3.224862258953168, + "grad_norm": 3.65625, + "learning_rate": 5.6187182972849805e-05, + "loss": 4.5472, + "step": 374600 + }, + { + "epoch": 3.225292699724518, + "grad_norm": 3.34375, + "learning_rate": 5.616283120962936e-05, + "loss": 4.6579, + "step": 374650 + }, + { + "epoch": 3.2257231404958677, + "grad_norm": 4.1875, + "learning_rate": 5.613848266396274e-05, + "loss": 3.9426, + "step": 374700 + }, + { + "epoch": 3.2261535812672175, + "grad_norm": 3.28125, + "learning_rate": 5.61141373376371e-05, + "loss": 4.0532, + "step": 374750 + }, + { + "epoch": 3.2265840220385673, + "grad_norm": 3.65625, + "learning_rate": 5.608979523243938e-05, + "loss": 4.3047, + "step": 374800 + }, + { + "epoch": 3.2270144628099175, + "grad_norm": 3.5, + "learning_rate": 5.606545635015622e-05, + "loss": 4.0366, + "step": 374850 + }, + { + "epoch": 3.2274449035812673, + "grad_norm": 4.375, + "learning_rate": 5.604112069257403e-05, + "loss": 4.5421, + "step": 374900 + }, + { + "epoch": 3.227875344352617, + "grad_norm": 2.203125, + "learning_rate": 5.6016788261479006e-05, + "loss": 3.9863, + "step": 374950 + }, + { + "epoch": 3.228305785123967, + "grad_norm": 3.71875, + "learning_rate": 5.5992459058657056e-05, + "loss": 4.5136, + "step": 375000 + }, + { + "epoch": 3.228305785123967, + "eval_loss": 4.99493932723999, + "eval_runtime": 24.1536, + "eval_samples_per_second": 26.497, + "eval_steps_per_second": 13.249, + "eval_tts_loss": 7.483240953229233, + "step": 375000 + }, + { + "epoch": 3.2287362258953167, + "grad_norm": 1.5625, + "learning_rate": 5.596813308589396e-05, + "loss": 4.0813, + "step": 375050 + }, + { + "epoch": 3.2291666666666665, + "grad_norm": 3.296875, + "learning_rate": 5.594381034497516e-05, + "loss": 4.5951, + "step": 375100 + }, + { + "epoch": 3.2295971074380168, + "grad_norm": 6.375, + "learning_rate": 5.5919490837685896e-05, + "loss": 4.3345, + "step": 375150 + }, + { + "epoch": 3.2300275482093666, + "grad_norm": 7.09375, + "learning_rate": 5.589517456581116e-05, + "loss": 4.4634, + "step": 375200 + }, + { + "epoch": 3.2304579889807163, + "grad_norm": 3.3125, + "learning_rate": 5.5870861531135724e-05, + "loss": 4.3415, + "step": 375250 + }, + { + "epoch": 3.230888429752066, + "grad_norm": 3.125, + "learning_rate": 5.584655173544404e-05, + "loss": 4.7467, + "step": 375300 + }, + { + "epoch": 3.231318870523416, + "grad_norm": 4.21875, + "learning_rate": 5.582224518052053e-05, + "loss": 3.8459, + "step": 375350 + }, + { + "epoch": 3.2317493112947657, + "grad_norm": 3.328125, + "learning_rate": 5.5797941868149196e-05, + "loss": 4.0402, + "step": 375400 + }, + { + "epoch": 3.2321797520661155, + "grad_norm": 3.5, + "learning_rate": 5.577364180011375e-05, + "loss": 4.3711, + "step": 375450 + }, + { + "epoch": 3.2326101928374658, + "grad_norm": 3.53125, + "learning_rate": 5.574934497819788e-05, + "loss": 4.3598, + "step": 375500 + }, + { + "epoch": 3.2330406336088156, + "grad_norm": 4.03125, + "learning_rate": 5.572505140418488e-05, + "loss": 4.4656, + "step": 375550 + }, + { + "epoch": 3.2334710743801653, + "grad_norm": 1.640625, + "learning_rate": 5.57007610798578e-05, + "loss": 4.3525, + "step": 375600 + }, + { + "epoch": 3.233901515151515, + "grad_norm": 3.71875, + "learning_rate": 5.567647400699965e-05, + "loss": 4.3795, + "step": 375650 + }, + { + "epoch": 3.234331955922865, + "grad_norm": 2.703125, + "learning_rate": 5.565219018739283e-05, + "loss": 4.4562, + "step": 375700 + }, + { + "epoch": 3.2347623966942147, + "grad_norm": 6.5625, + "learning_rate": 5.56279096228199e-05, + "loss": 4.496, + "step": 375750 + }, + { + "epoch": 3.235192837465565, + "grad_norm": 3.71875, + "learning_rate": 5.5603632315062915e-05, + "loss": 4.3562, + "step": 375800 + }, + { + "epoch": 3.2356232782369148, + "grad_norm": 4.46875, + "learning_rate": 5.557935826590377e-05, + "loss": 4.4144, + "step": 375850 + }, + { + "epoch": 3.2360537190082646, + "grad_norm": 2.609375, + "learning_rate": 5.555508747712419e-05, + "loss": 4.448, + "step": 375900 + }, + { + "epoch": 3.2364841597796143, + "grad_norm": 2.609375, + "learning_rate": 5.5530819950505597e-05, + "loss": 4.4849, + "step": 375950 + }, + { + "epoch": 3.236914600550964, + "grad_norm": 1.921875, + "learning_rate": 5.550655568782908e-05, + "loss": 4.4583, + "step": 376000 + }, + { + "epoch": 3.237345041322314, + "grad_norm": 3.390625, + "learning_rate": 5.548229469087567e-05, + "loss": 4.219, + "step": 376050 + }, + { + "epoch": 3.2377754820936637, + "grad_norm": 7.65625, + "learning_rate": 5.545803696142605e-05, + "loss": 3.8585, + "step": 376100 + }, + { + "epoch": 3.238205922865014, + "grad_norm": 1.375, + "learning_rate": 5.543378250126064e-05, + "loss": 4.1569, + "step": 376150 + }, + { + "epoch": 3.2386363636363638, + "grad_norm": 1.5859375, + "learning_rate": 5.54095313121598e-05, + "loss": 4.2678, + "step": 376200 + }, + { + "epoch": 3.2390668044077136, + "grad_norm": 3.453125, + "learning_rate": 5.538528339590336e-05, + "loss": 4.7378, + "step": 376250 + }, + { + "epoch": 3.2394972451790633, + "grad_norm": 2.0, + "learning_rate": 5.536103875427111e-05, + "loss": 4.5216, + "step": 376300 + }, + { + "epoch": 3.239927685950413, + "grad_norm": 5.0625, + "learning_rate": 5.5336797389042606e-05, + "loss": 4.1535, + "step": 376350 + }, + { + "epoch": 3.240358126721763, + "grad_norm": 4.09375, + "learning_rate": 5.531255930199708e-05, + "loss": 4.4992, + "step": 376400 + }, + { + "epoch": 3.240788567493113, + "grad_norm": 2.203125, + "learning_rate": 5.528832449491355e-05, + "loss": 4.4439, + "step": 376450 + }, + { + "epoch": 3.241219008264463, + "grad_norm": 4.78125, + "learning_rate": 5.52640929695708e-05, + "loss": 4.3586, + "step": 376500 + }, + { + "epoch": 3.2416494490358128, + "grad_norm": 4.5625, + "learning_rate": 5.523986472774734e-05, + "loss": 4.3194, + "step": 376550 + }, + { + "epoch": 3.2420798898071626, + "grad_norm": 4.625, + "learning_rate": 5.521563977122154e-05, + "loss": 3.9062, + "step": 376600 + }, + { + "epoch": 3.2425103305785123, + "grad_norm": 3.78125, + "learning_rate": 5.519141810177143e-05, + "loss": 3.869, + "step": 376650 + }, + { + "epoch": 3.242940771349862, + "grad_norm": 3.546875, + "learning_rate": 5.516719972117481e-05, + "loss": 3.9001, + "step": 376700 + }, + { + "epoch": 3.243371212121212, + "grad_norm": 2.5, + "learning_rate": 5.5142984631209284e-05, + "loss": 4.2057, + "step": 376750 + }, + { + "epoch": 3.243801652892562, + "grad_norm": 3.953125, + "learning_rate": 5.511877283365218e-05, + "loss": 4.3461, + "step": 376800 + }, + { + "epoch": 3.244232093663912, + "grad_norm": 2.03125, + "learning_rate": 5.509456433028053e-05, + "loss": 4.219, + "step": 376850 + }, + { + "epoch": 3.2446625344352618, + "grad_norm": 1.5078125, + "learning_rate": 5.5070359122871285e-05, + "loss": 4.1926, + "step": 376900 + }, + { + "epoch": 3.2450929752066116, + "grad_norm": 2.296875, + "learning_rate": 5.5046157213201056e-05, + "loss": 4.325, + "step": 376950 + }, + { + "epoch": 3.2455234159779613, + "grad_norm": 4.59375, + "learning_rate": 5.5021958603046084e-05, + "loss": 3.9349, + "step": 377000 + }, + { + "epoch": 3.245953856749311, + "grad_norm": 2.25, + "learning_rate": 5.499776329418262e-05, + "loss": 4.812, + "step": 377050 + }, + { + "epoch": 3.2463842975206614, + "grad_norm": 8.4375, + "learning_rate": 5.497357128838651e-05, + "loss": 4.5487, + "step": 377100 + }, + { + "epoch": 3.246814738292011, + "grad_norm": 3.171875, + "learning_rate": 5.494938258743334e-05, + "loss": 4.0864, + "step": 377150 + }, + { + "epoch": 3.247245179063361, + "grad_norm": 2.6875, + "learning_rate": 5.492519719309864e-05, + "loss": 4.1475, + "step": 377200 + }, + { + "epoch": 3.2476756198347108, + "grad_norm": 3.203125, + "learning_rate": 5.4901015107157416e-05, + "loss": 4.3365, + "step": 377250 + }, + { + "epoch": 3.2481060606060606, + "grad_norm": 3.421875, + "learning_rate": 5.4876836331384674e-05, + "loss": 4.5483, + "step": 377300 + }, + { + "epoch": 3.2485365013774103, + "grad_norm": 3.0, + "learning_rate": 5.485266086755507e-05, + "loss": 4.3026, + "step": 377350 + }, + { + "epoch": 3.24896694214876, + "grad_norm": 2.1875, + "learning_rate": 5.4828488717443024e-05, + "loss": 4.1662, + "step": 377400 + }, + { + "epoch": 3.2493973829201104, + "grad_norm": 4.90625, + "learning_rate": 5.480431988282271e-05, + "loss": 4.2475, + "step": 377450 + }, + { + "epoch": 3.24982782369146, + "grad_norm": 8.9375, + "learning_rate": 5.478015436546808e-05, + "loss": 4.4411, + "step": 377500 + }, + { + "epoch": 3.25025826446281, + "grad_norm": 3.9375, + "learning_rate": 5.475599216715279e-05, + "loss": 4.3599, + "step": 377550 + }, + { + "epoch": 3.2506887052341598, + "grad_norm": 4.8125, + "learning_rate": 5.473183328965037e-05, + "loss": 4.2462, + "step": 377600 + }, + { + "epoch": 3.2511191460055096, + "grad_norm": 3.09375, + "learning_rate": 5.470767773473398e-05, + "loss": 4.1422, + "step": 377650 + }, + { + "epoch": 3.2515495867768593, + "grad_norm": 4.46875, + "learning_rate": 5.468352550417657e-05, + "loss": 4.2798, + "step": 377700 + }, + { + "epoch": 3.2519800275482096, + "grad_norm": 3.390625, + "learning_rate": 5.4659376599750957e-05, + "loss": 4.6952, + "step": 377750 + }, + { + "epoch": 3.2524104683195594, + "grad_norm": 2.390625, + "learning_rate": 5.4635231023229506e-05, + "loss": 4.5524, + "step": 377800 + }, + { + "epoch": 3.252840909090909, + "grad_norm": 5.90625, + "learning_rate": 5.4611088776384453e-05, + "loss": 4.0466, + "step": 377850 + }, + { + "epoch": 3.253271349862259, + "grad_norm": 3.40625, + "learning_rate": 5.458694986098787e-05, + "loss": 4.219, + "step": 377900 + }, + { + "epoch": 3.2537017906336088, + "grad_norm": 2.984375, + "learning_rate": 5.4562814278811456e-05, + "loss": 4.5835, + "step": 377950 + }, + { + "epoch": 3.2541322314049586, + "grad_norm": 3.4375, + "learning_rate": 5.453868203162672e-05, + "loss": 4.2677, + "step": 378000 + }, + { + "epoch": 3.2541322314049586, + "eval_loss": 4.995173454284668, + "eval_runtime": 23.6957, + "eval_samples_per_second": 27.009, + "eval_steps_per_second": 13.505, + "eval_tts_loss": 7.484762244246582, + "step": 378000 + }, + { + "epoch": 3.2545626721763083, + "grad_norm": 2.0, + "learning_rate": 5.4514553121204905e-05, + "loss": 4.379, + "step": 378050 + }, + { + "epoch": 3.2549931129476586, + "grad_norm": 3.09375, + "learning_rate": 5.449042754931699e-05, + "loss": 4.2898, + "step": 378100 + }, + { + "epoch": 3.2554235537190084, + "grad_norm": 5.78125, + "learning_rate": 5.4466305317733826e-05, + "loss": 4.0237, + "step": 378150 + }, + { + "epoch": 3.255853994490358, + "grad_norm": 3.0, + "learning_rate": 5.444218642822586e-05, + "loss": 4.3617, + "step": 378200 + }, + { + "epoch": 3.256284435261708, + "grad_norm": 3.75, + "learning_rate": 5.441807088256341e-05, + "loss": 4.5322, + "step": 378250 + }, + { + "epoch": 3.2567148760330578, + "grad_norm": 4.125, + "learning_rate": 5.439395868251647e-05, + "loss": 4.128, + "step": 378300 + }, + { + "epoch": 3.2571453168044076, + "grad_norm": 3.015625, + "learning_rate": 5.4369849829854844e-05, + "loss": 4.1571, + "step": 378350 + }, + { + "epoch": 3.257575757575758, + "grad_norm": 3.421875, + "learning_rate": 5.434574432634802e-05, + "loss": 4.2891, + "step": 378400 + }, + { + "epoch": 3.2580061983471076, + "grad_norm": 3.28125, + "learning_rate": 5.432164217376539e-05, + "loss": 4.0523, + "step": 378450 + }, + { + "epoch": 3.2584366391184574, + "grad_norm": 2.609375, + "learning_rate": 5.429754337387598e-05, + "loss": 3.9968, + "step": 378500 + }, + { + "epoch": 3.258867079889807, + "grad_norm": 2.15625, + "learning_rate": 5.4273447928448477e-05, + "loss": 4.333, + "step": 378550 + }, + { + "epoch": 3.259297520661157, + "grad_norm": 2.265625, + "learning_rate": 5.4249355839251547e-05, + "loss": 4.5407, + "step": 378600 + }, + { + "epoch": 3.2597279614325068, + "grad_norm": 1.4609375, + "learning_rate": 5.422526710805346e-05, + "loss": 4.3556, + "step": 378650 + }, + { + "epoch": 3.2601584022038566, + "grad_norm": 2.71875, + "learning_rate": 5.420118173662224e-05, + "loss": 4.3925, + "step": 378700 + }, + { + "epoch": 3.260588842975207, + "grad_norm": 2.9375, + "learning_rate": 5.417709972672582e-05, + "loss": 4.2892, + "step": 378750 + }, + { + "epoch": 3.2610192837465566, + "grad_norm": 4.125, + "learning_rate": 5.4153021080131626e-05, + "loss": 4.8368, + "step": 378800 + }, + { + "epoch": 3.2614497245179064, + "grad_norm": 3.578125, + "learning_rate": 5.412894579860707e-05, + "loss": 4.168, + "step": 378850 + }, + { + "epoch": 3.261880165289256, + "grad_norm": 2.84375, + "learning_rate": 5.41048738839192e-05, + "loss": 3.8279, + "step": 378900 + }, + { + "epoch": 3.262310606060606, + "grad_norm": 3.421875, + "learning_rate": 5.408080533783484e-05, + "loss": 4.7128, + "step": 378950 + }, + { + "epoch": 3.2627410468319558, + "grad_norm": 2.5, + "learning_rate": 5.4056740162120564e-05, + "loss": 4.3488, + "step": 379000 + }, + { + "epoch": 3.263171487603306, + "grad_norm": 1.3359375, + "learning_rate": 5.403267835854273e-05, + "loss": 4.5935, + "step": 379050 + }, + { + "epoch": 3.263601928374656, + "grad_norm": 2.625, + "learning_rate": 5.400861992886734e-05, + "loss": 4.423, + "step": 379100 + }, + { + "epoch": 3.2640323691460056, + "grad_norm": 4.0625, + "learning_rate": 5.398456487486036e-05, + "loss": 4.3281, + "step": 379150 + }, + { + "epoch": 3.2644628099173554, + "grad_norm": 2.421875, + "learning_rate": 5.39605131982873e-05, + "loss": 4.1154, + "step": 379200 + }, + { + "epoch": 3.264893250688705, + "grad_norm": 3.84375, + "learning_rate": 5.393646490091351e-05, + "loss": 3.7891, + "step": 379250 + }, + { + "epoch": 3.265323691460055, + "grad_norm": 2.59375, + "learning_rate": 5.391241998450409e-05, + "loss": 4.3135, + "step": 379300 + }, + { + "epoch": 3.2657541322314048, + "grad_norm": 2.5, + "learning_rate": 5.3888378450823885e-05, + "loss": 3.9096, + "step": 379350 + }, + { + "epoch": 3.266184573002755, + "grad_norm": 2.75, + "learning_rate": 5.386434030163745e-05, + "loss": 4.1958, + "step": 379400 + }, + { + "epoch": 3.266615013774105, + "grad_norm": 2.15625, + "learning_rate": 5.3840305538709214e-05, + "loss": 4.2789, + "step": 379450 + }, + { + "epoch": 3.2670454545454546, + "grad_norm": 4.15625, + "learning_rate": 5.381627416380324e-05, + "loss": 3.9915, + "step": 379500 + }, + { + "epoch": 3.2674758953168044, + "grad_norm": 4.375, + "learning_rate": 5.379224617868337e-05, + "loss": 4.5594, + "step": 379550 + }, + { + "epoch": 3.267906336088154, + "grad_norm": 4.9375, + "learning_rate": 5.376822158511321e-05, + "loss": 4.4055, + "step": 379600 + }, + { + "epoch": 3.268336776859504, + "grad_norm": 2.328125, + "learning_rate": 5.374420038485613e-05, + "loss": 4.3554, + "step": 379650 + }, + { + "epoch": 3.268767217630854, + "grad_norm": 3.984375, + "learning_rate": 5.372018257967515e-05, + "loss": 4.3826, + "step": 379700 + }, + { + "epoch": 3.269197658402204, + "grad_norm": 3.3125, + "learning_rate": 5.369616817133326e-05, + "loss": 4.3408, + "step": 379750 + }, + { + "epoch": 3.269628099173554, + "grad_norm": 3.859375, + "learning_rate": 5.3672157161592995e-05, + "loss": 4.2632, + "step": 379800 + }, + { + "epoch": 3.2700585399449036, + "grad_norm": 3.734375, + "learning_rate": 5.3648149552216706e-05, + "loss": 4.363, + "step": 379850 + }, + { + "epoch": 3.2704889807162534, + "grad_norm": 5.875, + "learning_rate": 5.362414534496654e-05, + "loss": 4.2094, + "step": 379900 + }, + { + "epoch": 3.270919421487603, + "grad_norm": 2.671875, + "learning_rate": 5.3600144541604267e-05, + "loss": 4.5149, + "step": 379950 + }, + { + "epoch": 3.271349862258953, + "grad_norm": 3.34375, + "learning_rate": 5.3576147143891584e-05, + "loss": 4.1589, + "step": 380000 + }, + { + "epoch": 3.271780303030303, + "grad_norm": 5.0625, + "learning_rate": 5.3552153153589865e-05, + "loss": 4.3196, + "step": 380050 + }, + { + "epoch": 3.272210743801653, + "grad_norm": 5.34375, + "learning_rate": 5.352816257246009e-05, + "loss": 4.4324, + "step": 380100 + }, + { + "epoch": 3.272641184573003, + "grad_norm": 4.03125, + "learning_rate": 5.3504175402263245e-05, + "loss": 4.1397, + "step": 380150 + }, + { + "epoch": 3.2730716253443526, + "grad_norm": 3.125, + "learning_rate": 5.348019164475988e-05, + "loss": 4.3999, + "step": 380200 + }, + { + "epoch": 3.2735020661157024, + "grad_norm": 5.78125, + "learning_rate": 5.345621130171031e-05, + "loss": 4.2404, + "step": 380250 + }, + { + "epoch": 3.273932506887052, + "grad_norm": 2.40625, + "learning_rate": 5.343223437487478e-05, + "loss": 4.5147, + "step": 380300 + }, + { + "epoch": 3.2743629476584024, + "grad_norm": 3.265625, + "learning_rate": 5.340826086601296e-05, + "loss": 4.3614, + "step": 380350 + }, + { + "epoch": 3.274793388429752, + "grad_norm": 2.28125, + "learning_rate": 5.33842907768846e-05, + "loss": 4.0675, + "step": 380400 + }, + { + "epoch": 3.275223829201102, + "grad_norm": 1.3671875, + "learning_rate": 5.3360324109249005e-05, + "loss": 4.2549, + "step": 380450 + }, + { + "epoch": 3.275654269972452, + "grad_norm": 3.390625, + "learning_rate": 5.3336360864865265e-05, + "loss": 4.3411, + "step": 380500 + }, + { + "epoch": 3.2760847107438016, + "grad_norm": 3.53125, + "learning_rate": 5.3312401045492246e-05, + "loss": 4.1957, + "step": 380550 + }, + { + "epoch": 3.2765151515151514, + "grad_norm": 4.9375, + "learning_rate": 5.328844465288853e-05, + "loss": 4.2552, + "step": 380600 + }, + { + "epoch": 3.276945592286501, + "grad_norm": 3.609375, + "learning_rate": 5.3264491688812424e-05, + "loss": 4.2976, + "step": 380650 + }, + { + "epoch": 3.2773760330578514, + "grad_norm": 2.609375, + "learning_rate": 5.3240542155022135e-05, + "loss": 4.3418, + "step": 380700 + }, + { + "epoch": 3.277806473829201, + "grad_norm": 3.046875, + "learning_rate": 5.3216596053275435e-05, + "loss": 4.5812, + "step": 380750 + }, + { + "epoch": 3.278236914600551, + "grad_norm": 3.0625, + "learning_rate": 5.3192653385329926e-05, + "loss": 4.5653, + "step": 380800 + }, + { + "epoch": 3.278667355371901, + "grad_norm": 2.3125, + "learning_rate": 5.3168714152942955e-05, + "loss": 4.0254, + "step": 380850 + }, + { + "epoch": 3.2790977961432506, + "grad_norm": 5.0, + "learning_rate": 5.31447783578716e-05, + "loss": 4.572, + "step": 380900 + }, + { + "epoch": 3.2795282369146004, + "grad_norm": 2.25, + "learning_rate": 5.312084600187267e-05, + "loss": 4.2414, + "step": 380950 + }, + { + "epoch": 3.2799586776859506, + "grad_norm": 1.7421875, + "learning_rate": 5.309691708670281e-05, + "loss": 4.288, + "step": 381000 + }, + { + "epoch": 3.2799586776859506, + "eval_loss": 4.992687225341797, + "eval_runtime": 24.042, + "eval_samples_per_second": 26.62, + "eval_steps_per_second": 13.31, + "eval_tts_loss": 7.472846155654953, + "step": 381000 + }, + { + "epoch": 3.2803891184573004, + "grad_norm": 3.46875, + "learning_rate": 5.307299161411834e-05, + "loss": 4.1551, + "step": 381050 + }, + { + "epoch": 3.28081955922865, + "grad_norm": 3.09375, + "learning_rate": 5.30490695858753e-05, + "loss": 4.4248, + "step": 381100 + }, + { + "epoch": 3.28125, + "grad_norm": 3.8125, + "learning_rate": 5.3025151003729535e-05, + "loss": 4.1041, + "step": 381150 + }, + { + "epoch": 3.28168044077135, + "grad_norm": 5.5625, + "learning_rate": 5.3001235869436636e-05, + "loss": 4.5563, + "step": 381200 + }, + { + "epoch": 3.2821108815426996, + "grad_norm": 3.09375, + "learning_rate": 5.2977324184751856e-05, + "loss": 4.2727, + "step": 381250 + }, + { + "epoch": 3.2825413223140494, + "grad_norm": 5.15625, + "learning_rate": 5.2953415951430376e-05, + "loss": 4.2165, + "step": 381300 + }, + { + "epoch": 3.2829717630853996, + "grad_norm": 3.09375, + "learning_rate": 5.292951117122686e-05, + "loss": 4.2481, + "step": 381350 + }, + { + "epoch": 3.2834022038567494, + "grad_norm": 4.96875, + "learning_rate": 5.290560984589601e-05, + "loss": 4.1418, + "step": 381400 + }, + { + "epoch": 3.283832644628099, + "grad_norm": 2.078125, + "learning_rate": 5.2881711977192064e-05, + "loss": 4.1323, + "step": 381450 + }, + { + "epoch": 3.284263085399449, + "grad_norm": 5.3125, + "learning_rate": 5.285781756686903e-05, + "loss": 4.4193, + "step": 381500 + }, + { + "epoch": 3.284693526170799, + "grad_norm": 5.59375, + "learning_rate": 5.2833926616680806e-05, + "loss": 3.9395, + "step": 381550 + }, + { + "epoch": 3.2851239669421486, + "grad_norm": 3.171875, + "learning_rate": 5.281003912838094e-05, + "loss": 4.2469, + "step": 381600 + }, + { + "epoch": 3.285554407713499, + "grad_norm": 3.484375, + "learning_rate": 5.278615510372257e-05, + "loss": 3.9632, + "step": 381650 + }, + { + "epoch": 3.2859848484848486, + "grad_norm": 2.515625, + "learning_rate": 5.276227454445888e-05, + "loss": 4.0695, + "step": 381700 + }, + { + "epoch": 3.2864152892561984, + "grad_norm": 4.03125, + "learning_rate": 5.2738397452342606e-05, + "loss": 4.5583, + "step": 381750 + }, + { + "epoch": 3.286845730027548, + "grad_norm": 3.5, + "learning_rate": 5.2714523829126225e-05, + "loss": 4.2797, + "step": 381800 + }, + { + "epoch": 3.287276170798898, + "grad_norm": 2.203125, + "learning_rate": 5.2690653676562144e-05, + "loss": 4.1788, + "step": 381850 + }, + { + "epoch": 3.287706611570248, + "grad_norm": 5.34375, + "learning_rate": 5.2666786996402265e-05, + "loss": 3.9448, + "step": 381900 + }, + { + "epoch": 3.2881370523415976, + "grad_norm": 3.0625, + "learning_rate": 5.2642923790398325e-05, + "loss": 4.487, + "step": 381950 + }, + { + "epoch": 3.288567493112948, + "grad_norm": 2.171875, + "learning_rate": 5.261906406030192e-05, + "loss": 4.3355, + "step": 382000 + }, + { + "epoch": 3.2889979338842976, + "grad_norm": 4.59375, + "learning_rate": 5.259520780786429e-05, + "loss": 4.1598, + "step": 382050 + }, + { + "epoch": 3.2894283746556474, + "grad_norm": 2.90625, + "learning_rate": 5.257135503483641e-05, + "loss": 4.6427, + "step": 382100 + }, + { + "epoch": 3.289858815426997, + "grad_norm": 3.75, + "learning_rate": 5.254750574296902e-05, + "loss": 3.9327, + "step": 382150 + }, + { + "epoch": 3.290289256198347, + "grad_norm": 1.84375, + "learning_rate": 5.252365993401256e-05, + "loss": 4.1238, + "step": 382200 + }, + { + "epoch": 3.290719696969697, + "grad_norm": 3.921875, + "learning_rate": 5.249981760971737e-05, + "loss": 4.294, + "step": 382250 + }, + { + "epoch": 3.291150137741047, + "grad_norm": 2.578125, + "learning_rate": 5.2475978771833366e-05, + "loss": 4.2283, + "step": 382300 + }, + { + "epoch": 3.291580578512397, + "grad_norm": 4.9375, + "learning_rate": 5.2452143422110267e-05, + "loss": 4.3361, + "step": 382350 + }, + { + "epoch": 3.2920110192837466, + "grad_norm": 3.6875, + "learning_rate": 5.242831156229753e-05, + "loss": 4.4711, + "step": 382400 + }, + { + "epoch": 3.2924414600550964, + "grad_norm": 0.95703125, + "learning_rate": 5.240448319414438e-05, + "loss": 4.3467, + "step": 382450 + }, + { + "epoch": 3.292871900826446, + "grad_norm": 3.03125, + "learning_rate": 5.238065831939972e-05, + "loss": 4.3403, + "step": 382500 + }, + { + "epoch": 3.293302341597796, + "grad_norm": 5.15625, + "learning_rate": 5.235683693981233e-05, + "loss": 4.5563, + "step": 382550 + }, + { + "epoch": 3.293732782369146, + "grad_norm": 5.28125, + "learning_rate": 5.23330190571306e-05, + "loss": 4.2845, + "step": 382600 + }, + { + "epoch": 3.294163223140496, + "grad_norm": 2.46875, + "learning_rate": 5.2309204673102695e-05, + "loss": 4.3014, + "step": 382650 + }, + { + "epoch": 3.294593663911846, + "grad_norm": 3.625, + "learning_rate": 5.2285393789476564e-05, + "loss": 4.1626, + "step": 382700 + }, + { + "epoch": 3.2950241046831956, + "grad_norm": 3.953125, + "learning_rate": 5.2261586407999877e-05, + "loss": 4.5075, + "step": 382750 + }, + { + "epoch": 3.2954545454545454, + "grad_norm": 2.765625, + "learning_rate": 5.223778253042e-05, + "loss": 4.2139, + "step": 382800 + }, + { + "epoch": 3.295884986225895, + "grad_norm": 3.765625, + "learning_rate": 5.221398215848419e-05, + "loss": 4.4173, + "step": 382850 + }, + { + "epoch": 3.296315426997245, + "grad_norm": 3.328125, + "learning_rate": 5.2190185293939196e-05, + "loss": 4.2536, + "step": 382900 + }, + { + "epoch": 3.2967458677685952, + "grad_norm": 2.953125, + "learning_rate": 5.216639193853179e-05, + "loss": 4.2438, + "step": 382950 + }, + { + "epoch": 3.297176308539945, + "grad_norm": 4.0, + "learning_rate": 5.214260209400829e-05, + "loss": 3.8653, + "step": 383000 + }, + { + "epoch": 3.297606749311295, + "grad_norm": 3.640625, + "learning_rate": 5.211881576211479e-05, + "loss": 4.391, + "step": 383050 + }, + { + "epoch": 3.2980371900826446, + "grad_norm": 2.53125, + "learning_rate": 5.209503294459727e-05, + "loss": 4.132, + "step": 383100 + }, + { + "epoch": 3.2984676308539944, + "grad_norm": 1.9375, + "learning_rate": 5.2071253643201227e-05, + "loss": 4.1338, + "step": 383150 + }, + { + "epoch": 3.298898071625344, + "grad_norm": 4.90625, + "learning_rate": 5.204747785967199e-05, + "loss": 4.2765, + "step": 383200 + }, + { + "epoch": 3.299328512396694, + "grad_norm": 1.6015625, + "learning_rate": 5.202370559575476e-05, + "loss": 4.1567, + "step": 383250 + }, + { + "epoch": 3.2997589531680442, + "grad_norm": 3.328125, + "learning_rate": 5.199993685319431e-05, + "loss": 4.3439, + "step": 383300 + }, + { + "epoch": 3.300189393939394, + "grad_norm": 1.953125, + "learning_rate": 5.1976171633735184e-05, + "loss": 3.9614, + "step": 383350 + }, + { + "epoch": 3.300619834710744, + "grad_norm": 3.28125, + "learning_rate": 5.195240993912181e-05, + "loss": 4.1035, + "step": 383400 + }, + { + "epoch": 3.3010502754820936, + "grad_norm": 3.21875, + "learning_rate": 5.192865177109813e-05, + "loss": 4.4349, + "step": 383450 + }, + { + "epoch": 3.3014807162534434, + "grad_norm": 3.046875, + "learning_rate": 5.1904897131407956e-05, + "loss": 4.6754, + "step": 383500 + }, + { + "epoch": 3.301911157024793, + "grad_norm": 3.296875, + "learning_rate": 5.188114602179488e-05, + "loss": 4.3999, + "step": 383550 + }, + { + "epoch": 3.3023415977961434, + "grad_norm": 3.53125, + "learning_rate": 5.1857398444002145e-05, + "loss": 4.356, + "step": 383600 + }, + { + "epoch": 3.3027720385674932, + "grad_norm": 2.859375, + "learning_rate": 5.183365439977281e-05, + "loss": 4.6765, + "step": 383650 + }, + { + "epoch": 3.303202479338843, + "grad_norm": 3.84375, + "learning_rate": 5.180991389084959e-05, + "loss": 3.9962, + "step": 383700 + }, + { + "epoch": 3.303632920110193, + "grad_norm": 5.125, + "learning_rate": 5.178617691897497e-05, + "loss": 4.6447, + "step": 383750 + }, + { + "epoch": 3.3040633608815426, + "grad_norm": 4.5, + "learning_rate": 5.176244348589128e-05, + "loss": 4.0382, + "step": 383800 + }, + { + "epoch": 3.3044938016528924, + "grad_norm": 4.125, + "learning_rate": 5.1738713593340436e-05, + "loss": 4.0448, + "step": 383850 + }, + { + "epoch": 3.304924242424242, + "grad_norm": 1.90625, + "learning_rate": 5.171498724306418e-05, + "loss": 4.2292, + "step": 383900 + }, + { + "epoch": 3.3053546831955924, + "grad_norm": 3.828125, + "learning_rate": 5.169126443680399e-05, + "loss": 4.3154, + "step": 383950 + }, + { + "epoch": 3.3057851239669422, + "grad_norm": 2.375, + "learning_rate": 5.1667545176301034e-05, + "loss": 4.4976, + "step": 384000 + }, + { + "epoch": 3.3057851239669422, + "eval_loss": 4.993453025817871, + "eval_runtime": 23.9704, + "eval_samples_per_second": 26.7, + "eval_steps_per_second": 13.35, + "eval_tts_loss": 7.479142194899681, + "step": 384000 + }, + { + "epoch": 3.306215564738292, + "grad_norm": 5.03125, + "learning_rate": 5.164382946329623e-05, + "loss": 4.1415, + "step": 384050 + }, + { + "epoch": 3.306646005509642, + "grad_norm": 1.546875, + "learning_rate": 5.1620117299530356e-05, + "loss": 4.234, + "step": 384100 + }, + { + "epoch": 3.3070764462809916, + "grad_norm": 3.890625, + "learning_rate": 5.15964086867438e-05, + "loss": 4.1881, + "step": 384150 + }, + { + "epoch": 3.3075068870523414, + "grad_norm": 3.421875, + "learning_rate": 5.1572703626676645e-05, + "loss": 3.9042, + "step": 384200 + }, + { + "epoch": 3.3079373278236917, + "grad_norm": 5.65625, + "learning_rate": 5.154900212106888e-05, + "loss": 4.3213, + "step": 384250 + }, + { + "epoch": 3.3083677685950414, + "grad_norm": 3.640625, + "learning_rate": 5.1525304171660114e-05, + "loss": 4.4492, + "step": 384300 + }, + { + "epoch": 3.3087982093663912, + "grad_norm": 4.625, + "learning_rate": 5.1501609780189694e-05, + "loss": 4.3373, + "step": 384350 + }, + { + "epoch": 3.309228650137741, + "grad_norm": 4.28125, + "learning_rate": 5.1477918948396845e-05, + "loss": 4.5075, + "step": 384400 + }, + { + "epoch": 3.309659090909091, + "grad_norm": 5.375, + "learning_rate": 5.145423167802026e-05, + "loss": 4.3757, + "step": 384450 + }, + { + "epoch": 3.3100895316804406, + "grad_norm": 4.0, + "learning_rate": 5.143054797079867e-05, + "loss": 4.4652, + "step": 384500 + }, + { + "epoch": 3.3105199724517904, + "grad_norm": 2.15625, + "learning_rate": 5.140686782847035e-05, + "loss": 4.329, + "step": 384550 + }, + { + "epoch": 3.3109504132231407, + "grad_norm": 2.640625, + "learning_rate": 5.138319125277334e-05, + "loss": 4.4536, + "step": 384600 + }, + { + "epoch": 3.3113808539944904, + "grad_norm": 3.359375, + "learning_rate": 5.135951824544558e-05, + "loss": 4.4816, + "step": 384650 + }, + { + "epoch": 3.3118112947658402, + "grad_norm": 2.828125, + "learning_rate": 5.133584880822449e-05, + "loss": 4.3902, + "step": 384700 + }, + { + "epoch": 3.31224173553719, + "grad_norm": 3.765625, + "learning_rate": 5.131218294284734e-05, + "loss": 4.1089, + "step": 384750 + }, + { + "epoch": 3.31267217630854, + "grad_norm": 2.265625, + "learning_rate": 5.128852065105125e-05, + "loss": 4.2618, + "step": 384800 + }, + { + "epoch": 3.3131026170798896, + "grad_norm": 2.765625, + "learning_rate": 5.126486193457295e-05, + "loss": 4.2408, + "step": 384850 + }, + { + "epoch": 3.31353305785124, + "grad_norm": 3.84375, + "learning_rate": 5.124120679514892e-05, + "loss": 4.1616, + "step": 384900 + }, + { + "epoch": 3.3139634986225897, + "grad_norm": 1.96875, + "learning_rate": 5.12175552345154e-05, + "loss": 4.368, + "step": 384950 + }, + { + "epoch": 3.3143939393939394, + "grad_norm": 2.953125, + "learning_rate": 5.119390725440838e-05, + "loss": 4.2134, + "step": 385000 + }, + { + "epoch": 3.3148243801652892, + "grad_norm": 4.46875, + "learning_rate": 5.1170262856563514e-05, + "loss": 4.2689, + "step": 385050 + }, + { + "epoch": 3.315254820936639, + "grad_norm": 5.4375, + "learning_rate": 5.114662204271633e-05, + "loss": 4.1006, + "step": 385100 + }, + { + "epoch": 3.315685261707989, + "grad_norm": 3.25, + "learning_rate": 5.112298481460198e-05, + "loss": 4.443, + "step": 385150 + }, + { + "epoch": 3.3161157024793386, + "grad_norm": 2.609375, + "learning_rate": 5.109935117395538e-05, + "loss": 4.486, + "step": 385200 + }, + { + "epoch": 3.316546143250689, + "grad_norm": 4.40625, + "learning_rate": 5.10757211225112e-05, + "loss": 4.4206, + "step": 385250 + }, + { + "epoch": 3.3169765840220387, + "grad_norm": 2.578125, + "learning_rate": 5.105209466200378e-05, + "loss": 4.2105, + "step": 385300 + }, + { + "epoch": 3.3174070247933884, + "grad_norm": 3.390625, + "learning_rate": 5.102847179416733e-05, + "loss": 4.2119, + "step": 385350 + }, + { + "epoch": 3.3178374655647382, + "grad_norm": 2.828125, + "learning_rate": 5.10048525207357e-05, + "loss": 4.0055, + "step": 385400 + }, + { + "epoch": 3.318267906336088, + "grad_norm": 1.625, + "learning_rate": 5.098123684344247e-05, + "loss": 4.3641, + "step": 385450 + }, + { + "epoch": 3.318698347107438, + "grad_norm": 4.0, + "learning_rate": 5.0957624764020995e-05, + "loss": 4.2787, + "step": 385500 + }, + { + "epoch": 3.319128787878788, + "grad_norm": 2.796875, + "learning_rate": 5.0934016284204336e-05, + "loss": 4.2553, + "step": 385550 + }, + { + "epoch": 3.319559228650138, + "grad_norm": 2.65625, + "learning_rate": 5.0910411405725275e-05, + "loss": 4.1602, + "step": 385600 + }, + { + "epoch": 3.3199896694214877, + "grad_norm": 2.234375, + "learning_rate": 5.088681013031644e-05, + "loss": 4.0053, + "step": 385650 + }, + { + "epoch": 3.3204201101928374, + "grad_norm": 5.125, + "learning_rate": 5.086321245971011e-05, + "loss": 3.9055, + "step": 385700 + }, + { + "epoch": 3.3208505509641872, + "grad_norm": 3.84375, + "learning_rate": 5.08396183956382e-05, + "loss": 4.9393, + "step": 385750 + }, + { + "epoch": 3.321280991735537, + "grad_norm": 2.78125, + "learning_rate": 5.081602793983256e-05, + "loss": 4.3131, + "step": 385800 + }, + { + "epoch": 3.321711432506887, + "grad_norm": 2.125, + "learning_rate": 5.079244109402463e-05, + "loss": 4.8291, + "step": 385850 + }, + { + "epoch": 3.322141873278237, + "grad_norm": 3.109375, + "learning_rate": 5.076885785994563e-05, + "loss": 3.9214, + "step": 385900 + }, + { + "epoch": 3.322572314049587, + "grad_norm": 3.265625, + "learning_rate": 5.074527823932662e-05, + "loss": 4.1668, + "step": 385950 + }, + { + "epoch": 3.3230027548209367, + "grad_norm": 3.046875, + "learning_rate": 5.072170223389812e-05, + "loss": 4.3386, + "step": 386000 + }, + { + "epoch": 3.3234331955922864, + "grad_norm": 4.1875, + "learning_rate": 5.069812984539071e-05, + "loss": 4.2399, + "step": 386050 + }, + { + "epoch": 3.3238636363636362, + "grad_norm": 1.8515625, + "learning_rate": 5.067456107553448e-05, + "loss": 4.6372, + "step": 386100 + }, + { + "epoch": 3.324294077134986, + "grad_norm": 2.171875, + "learning_rate": 5.065099592605933e-05, + "loss": 3.8546, + "step": 386150 + }, + { + "epoch": 3.3247245179063363, + "grad_norm": 3.078125, + "learning_rate": 5.062743439869492e-05, + "loss": 4.4209, + "step": 386200 + }, + { + "epoch": 3.325154958677686, + "grad_norm": 4.28125, + "learning_rate": 5.060387649517059e-05, + "loss": 4.0753, + "step": 386250 + }, + { + "epoch": 3.325585399449036, + "grad_norm": 2.8125, + "learning_rate": 5.05803222172154e-05, + "loss": 4.0681, + "step": 386300 + }, + { + "epoch": 3.3260158402203857, + "grad_norm": 4.875, + "learning_rate": 5.055677156655829e-05, + "loss": 4.1523, + "step": 386350 + }, + { + "epoch": 3.3264462809917354, + "grad_norm": 2.859375, + "learning_rate": 5.053322454492775e-05, + "loss": 4.3313, + "step": 386400 + }, + { + "epoch": 3.3268767217630852, + "grad_norm": 3.125, + "learning_rate": 5.05096811540521e-05, + "loss": 4.4541, + "step": 386450 + }, + { + "epoch": 3.327307162534435, + "grad_norm": 4.5625, + "learning_rate": 5.048614139565938e-05, + "loss": 4.4754, + "step": 386500 + }, + { + "epoch": 3.3277376033057853, + "grad_norm": 4.375, + "learning_rate": 5.046260527147735e-05, + "loss": 4.4157, + "step": 386550 + }, + { + "epoch": 3.328168044077135, + "grad_norm": 3.625, + "learning_rate": 5.043907278323346e-05, + "loss": 4.3153, + "step": 386600 + }, + { + "epoch": 3.328598484848485, + "grad_norm": 3.046875, + "learning_rate": 5.041554393265504e-05, + "loss": 4.2392, + "step": 386650 + }, + { + "epoch": 3.3290289256198347, + "grad_norm": 2.828125, + "learning_rate": 5.039201872146902e-05, + "loss": 4.7573, + "step": 386700 + }, + { + "epoch": 3.3294593663911844, + "grad_norm": 2.3125, + "learning_rate": 5.036849715140208e-05, + "loss": 4.5726, + "step": 386750 + }, + { + "epoch": 3.3298898071625342, + "grad_norm": 6.53125, + "learning_rate": 5.034497922418067e-05, + "loss": 4.2354, + "step": 386800 + }, + { + "epoch": 3.3303202479338845, + "grad_norm": 4.125, + "learning_rate": 5.032146494153095e-05, + "loss": 4.4768, + "step": 386850 + }, + { + "epoch": 3.3307506887052343, + "grad_norm": 5.09375, + "learning_rate": 5.0297954305178774e-05, + "loss": 3.8892, + "step": 386900 + }, + { + "epoch": 3.331181129476584, + "grad_norm": 0.9921875, + "learning_rate": 5.027444731684985e-05, + "loss": 4.2422, + "step": 386950 + }, + { + "epoch": 3.331611570247934, + "grad_norm": 2.28125, + "learning_rate": 5.0250943978269516e-05, + "loss": 4.283, + "step": 387000 + }, + { + "epoch": 3.331611570247934, + "eval_loss": 4.994412422180176, + "eval_runtime": 23.7253, + "eval_samples_per_second": 26.975, + "eval_steps_per_second": 13.488, + "eval_tts_loss": 7.491261116750935, + "step": 387000 + }, + { + "epoch": 3.3320420110192837, + "grad_norm": 2.0625, + "learning_rate": 5.022744429116285e-05, + "loss": 4.4999, + "step": 387050 + }, + { + "epoch": 3.3324724517906334, + "grad_norm": 3.921875, + "learning_rate": 5.0203948257254694e-05, + "loss": 4.2192, + "step": 387100 + }, + { + "epoch": 3.3329028925619832, + "grad_norm": 2.390625, + "learning_rate": 5.018045587826954e-05, + "loss": 4.296, + "step": 387150 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 3.625, + "learning_rate": 5.015696715593179e-05, + "loss": 4.2384, + "step": 387200 + }, + { + "epoch": 3.3337637741046833, + "grad_norm": 5.0625, + "learning_rate": 5.0133482091965445e-05, + "loss": 4.1351, + "step": 387250 + }, + { + "epoch": 3.334194214876033, + "grad_norm": 4.21875, + "learning_rate": 5.0110000688094146e-05, + "loss": 4.5279, + "step": 387300 + }, + { + "epoch": 3.334624655647383, + "grad_norm": 3.9375, + "learning_rate": 5.008652294604149e-05, + "loss": 3.993, + "step": 387350 + }, + { + "epoch": 3.3350550964187327, + "grad_norm": 3.046875, + "learning_rate": 5.006304886753066e-05, + "loss": 4.1831, + "step": 387400 + }, + { + "epoch": 3.335485537190083, + "grad_norm": 4.96875, + "learning_rate": 5.003957845428457e-05, + "loss": 4.36, + "step": 387450 + }, + { + "epoch": 3.3359159779614327, + "grad_norm": 2.96875, + "learning_rate": 5.001611170802599e-05, + "loss": 4.2505, + "step": 387500 + }, + { + "epoch": 3.3363464187327825, + "grad_norm": 2.453125, + "learning_rate": 4.99926486304772e-05, + "loss": 4.2206, + "step": 387550 + }, + { + "epoch": 3.3367768595041323, + "grad_norm": 2.734375, + "learning_rate": 4.9969189223360446e-05, + "loss": 4.5257, + "step": 387600 + }, + { + "epoch": 3.337207300275482, + "grad_norm": 2.734375, + "learning_rate": 4.994573348839756e-05, + "loss": 4.6288, + "step": 387650 + }, + { + "epoch": 3.337637741046832, + "grad_norm": 3.59375, + "learning_rate": 4.992228142731015e-05, + "loss": 4.3768, + "step": 387700 + }, + { + "epoch": 3.3380681818181817, + "grad_norm": 2.28125, + "learning_rate": 4.9898833041819515e-05, + "loss": 4.4389, + "step": 387750 + }, + { + "epoch": 3.3384986225895315, + "grad_norm": 1.5703125, + "learning_rate": 4.987538833364675e-05, + "loss": 4.0628, + "step": 387800 + }, + { + "epoch": 3.3389290633608817, + "grad_norm": 3.796875, + "learning_rate": 4.9851947304512605e-05, + "loss": 4.2834, + "step": 387850 + }, + { + "epoch": 3.3393595041322315, + "grad_norm": 3.59375, + "learning_rate": 4.982850995613767e-05, + "loss": 3.8162, + "step": 387900 + }, + { + "epoch": 3.3397899449035813, + "grad_norm": 3.0, + "learning_rate": 4.9805076290242144e-05, + "loss": 3.9103, + "step": 387950 + }, + { + "epoch": 3.340220385674931, + "grad_norm": 3.34375, + "learning_rate": 4.978164630854604e-05, + "loss": 4.3271, + "step": 388000 + }, + { + "epoch": 3.340650826446281, + "grad_norm": 3.359375, + "learning_rate": 4.975822001276903e-05, + "loss": 4.4111, + "step": 388050 + }, + { + "epoch": 3.341081267217631, + "grad_norm": 5.0625, + "learning_rate": 4.973479740463058e-05, + "loss": 4.4683, + "step": 388100 + }, + { + "epoch": 3.341511707988981, + "grad_norm": 3.828125, + "learning_rate": 4.971137848584981e-05, + "loss": 4.1036, + "step": 388150 + }, + { + "epoch": 3.3419421487603307, + "grad_norm": 2.578125, + "learning_rate": 4.9687963258145706e-05, + "loss": 4.5362, + "step": 388200 + }, + { + "epoch": 3.3423725895316805, + "grad_norm": 3.234375, + "learning_rate": 4.966455172323685e-05, + "loss": 4.0423, + "step": 388250 + }, + { + "epoch": 3.3428030303030303, + "grad_norm": 5.15625, + "learning_rate": 4.96411438828416e-05, + "loss": 4.1517, + "step": 388300 + }, + { + "epoch": 3.34323347107438, + "grad_norm": 5.125, + "learning_rate": 4.961773973867804e-05, + "loss": 4.1902, + "step": 388350 + }, + { + "epoch": 3.34366391184573, + "grad_norm": 4.125, + "learning_rate": 4.959433929246398e-05, + "loss": 4.4626, + "step": 388400 + }, + { + "epoch": 3.3440943526170797, + "grad_norm": 3.53125, + "learning_rate": 4.957094254591692e-05, + "loss": 4.2152, + "step": 388450 + }, + { + "epoch": 3.34452479338843, + "grad_norm": 3.296875, + "learning_rate": 4.954754950075425e-05, + "loss": 4.1573, + "step": 388500 + }, + { + "epoch": 3.3449552341597797, + "grad_norm": 1.8984375, + "learning_rate": 4.9524160158692835e-05, + "loss": 4.2415, + "step": 388550 + }, + { + "epoch": 3.3453856749311295, + "grad_norm": 2.421875, + "learning_rate": 4.950077452144949e-05, + "loss": 4.6862, + "step": 388600 + }, + { + "epoch": 3.3458161157024793, + "grad_norm": 2.90625, + "learning_rate": 4.9477392590740645e-05, + "loss": 4.3831, + "step": 388650 + }, + { + "epoch": 3.346246556473829, + "grad_norm": 5.90625, + "learning_rate": 4.945401436828244e-05, + "loss": 4.5584, + "step": 388700 + }, + { + "epoch": 3.3466769972451793, + "grad_norm": 2.34375, + "learning_rate": 4.9430639855790876e-05, + "loss": 4.2195, + "step": 388750 + }, + { + "epoch": 3.347107438016529, + "grad_norm": 1.890625, + "learning_rate": 4.940726905498158e-05, + "loss": 4.4348, + "step": 388800 + }, + { + "epoch": 3.347537878787879, + "grad_norm": 4.03125, + "learning_rate": 4.9383901967569804e-05, + "loss": 4.0903, + "step": 388850 + }, + { + "epoch": 3.3479683195592287, + "grad_norm": 3.703125, + "learning_rate": 4.936053859527075e-05, + "loss": 4.1093, + "step": 388900 + }, + { + "epoch": 3.3483987603305785, + "grad_norm": 3.625, + "learning_rate": 4.933717893979921e-05, + "loss": 4.081, + "step": 388950 + }, + { + "epoch": 3.3488292011019283, + "grad_norm": 0.90234375, + "learning_rate": 4.9313823002869705e-05, + "loss": 4.1994, + "step": 389000 + }, + { + "epoch": 3.349259641873278, + "grad_norm": 2.75, + "learning_rate": 4.9290470786196605e-05, + "loss": 3.9767, + "step": 389050 + }, + { + "epoch": 3.349690082644628, + "grad_norm": 6.09375, + "learning_rate": 4.9267122291493814e-05, + "loss": 4.4585, + "step": 389100 + }, + { + "epoch": 3.350120523415978, + "grad_norm": 2.34375, + "learning_rate": 4.9243777520475056e-05, + "loss": 4.5203, + "step": 389150 + }, + { + "epoch": 3.350550964187328, + "grad_norm": 2.578125, + "learning_rate": 4.9220436474853845e-05, + "loss": 4.4309, + "step": 389200 + }, + { + "epoch": 3.3509814049586777, + "grad_norm": 2.265625, + "learning_rate": 4.919709915634335e-05, + "loss": 4.4047, + "step": 389250 + }, + { + "epoch": 3.3514118457300275, + "grad_norm": 4.40625, + "learning_rate": 4.917376556665648e-05, + "loss": 4.4213, + "step": 389300 + }, + { + "epoch": 3.3518422865013773, + "grad_norm": 3.109375, + "learning_rate": 4.915043570750586e-05, + "loss": 4.8675, + "step": 389350 + }, + { + "epoch": 3.3522727272727275, + "grad_norm": 3.75, + "learning_rate": 4.91271095806038e-05, + "loss": 4.5339, + "step": 389400 + }, + { + "epoch": 3.3527031680440773, + "grad_norm": 4.15625, + "learning_rate": 4.91037871876625e-05, + "loss": 3.929, + "step": 389450 + }, + { + "epoch": 3.353133608815427, + "grad_norm": 4.1875, + "learning_rate": 4.9080468530393706e-05, + "loss": 4.4526, + "step": 389500 + }, + { + "epoch": 3.353564049586777, + "grad_norm": 2.515625, + "learning_rate": 4.9057153610508965e-05, + "loss": 4.3747, + "step": 389550 + }, + { + "epoch": 3.3539944903581267, + "grad_norm": 2.25, + "learning_rate": 4.903384242971953e-05, + "loss": 4.083, + "step": 389600 + }, + { + "epoch": 3.3544249311294765, + "grad_norm": 2.546875, + "learning_rate": 4.9010534989736424e-05, + "loss": 4.0807, + "step": 389650 + }, + { + "epoch": 3.3548553719008263, + "grad_norm": 3.09375, + "learning_rate": 4.89872312922703e-05, + "loss": 4.4303, + "step": 389700 + }, + { + "epoch": 3.355285812672176, + "grad_norm": 2.390625, + "learning_rate": 4.896393133903168e-05, + "loss": 4.4677, + "step": 389750 + }, + { + "epoch": 3.3557162534435263, + "grad_norm": 4.40625, + "learning_rate": 4.8940635131730685e-05, + "loss": 4.2841, + "step": 389800 + }, + { + "epoch": 3.356146694214876, + "grad_norm": 3.921875, + "learning_rate": 4.891734267207723e-05, + "loss": 4.0092, + "step": 389850 + }, + { + "epoch": 3.356577134986226, + "grad_norm": 5.40625, + "learning_rate": 4.8894053961780895e-05, + "loss": 4.345, + "step": 389900 + }, + { + "epoch": 3.3570075757575757, + "grad_norm": 2.0, + "learning_rate": 4.8870769002551055e-05, + "loss": 4.0638, + "step": 389950 + }, + { + "epoch": 3.3574380165289255, + "grad_norm": 2.40625, + "learning_rate": 4.884748779609672e-05, + "loss": 4.4728, + "step": 390000 + }, + { + "epoch": 3.3574380165289255, + "eval_loss": 4.994321346282959, + "eval_runtime": 24.6569, + "eval_samples_per_second": 25.956, + "eval_steps_per_second": 12.978, + "eval_tts_loss": 7.48053660015481, + "step": 390000 + }, + { + "epoch": 3.3578684573002757, + "grad_norm": 2.46875, + "learning_rate": 4.88242103441268e-05, + "loss": 4.0664, + "step": 390050 + }, + { + "epoch": 3.3582988980716255, + "grad_norm": 3.9375, + "learning_rate": 4.880093664834966e-05, + "loss": 3.9159, + "step": 390100 + }, + { + "epoch": 3.3587293388429753, + "grad_norm": 3.625, + "learning_rate": 4.8777666710473644e-05, + "loss": 4.2394, + "step": 390150 + }, + { + "epoch": 3.359159779614325, + "grad_norm": 2.296875, + "learning_rate": 4.875440053220669e-05, + "loss": 3.9599, + "step": 390200 + }, + { + "epoch": 3.359590220385675, + "grad_norm": 3.4375, + "learning_rate": 4.8731138115256446e-05, + "loss": 4.5411, + "step": 390250 + }, + { + "epoch": 3.3600206611570247, + "grad_norm": 6.78125, + "learning_rate": 4.870787946133043e-05, + "loss": 4.3078, + "step": 390300 + }, + { + "epoch": 3.3604511019283745, + "grad_norm": 3.59375, + "learning_rate": 4.868462457213566e-05, + "loss": 4.6105, + "step": 390350 + }, + { + "epoch": 3.3608815426997243, + "grad_norm": 3.625, + "learning_rate": 4.8661373449379e-05, + "loss": 4.2969, + "step": 390400 + }, + { + "epoch": 3.3613119834710745, + "grad_norm": 2.921875, + "learning_rate": 4.863812609476711e-05, + "loss": 4.2849, + "step": 390450 + }, + { + "epoch": 3.3617424242424243, + "grad_norm": 5.3125, + "learning_rate": 4.861488251000627e-05, + "loss": 4.3593, + "step": 390500 + }, + { + "epoch": 3.362172865013774, + "grad_norm": 3.375, + "learning_rate": 4.8591642696802444e-05, + "loss": 4.1489, + "step": 390550 + }, + { + "epoch": 3.362603305785124, + "grad_norm": 1.3984375, + "learning_rate": 4.856840665686151e-05, + "loss": 4.3814, + "step": 390600 + }, + { + "epoch": 3.3630337465564737, + "grad_norm": 3.390625, + "learning_rate": 4.854517439188884e-05, + "loss": 4.3655, + "step": 390650 + }, + { + "epoch": 3.363464187327824, + "grad_norm": 1.9609375, + "learning_rate": 4.852194590358962e-05, + "loss": 4.0472, + "step": 390700 + }, + { + "epoch": 3.3638946280991737, + "grad_norm": 3.03125, + "learning_rate": 4.849872119366884e-05, + "loss": 4.3848, + "step": 390750 + }, + { + "epoch": 3.3643250688705235, + "grad_norm": 3.609375, + "learning_rate": 4.847550026383113e-05, + "loss": 4.5576, + "step": 390800 + }, + { + "epoch": 3.3647555096418733, + "grad_norm": 3.09375, + "learning_rate": 4.8452283115780836e-05, + "loss": 4.5376, + "step": 390850 + }, + { + "epoch": 3.365185950413223, + "grad_norm": 3.546875, + "learning_rate": 4.842906975122206e-05, + "loss": 4.2247, + "step": 390900 + }, + { + "epoch": 3.365616391184573, + "grad_norm": 4.0625, + "learning_rate": 4.840586017185856e-05, + "loss": 4.1984, + "step": 390950 + }, + { + "epoch": 3.3660468319559227, + "grad_norm": 4.09375, + "learning_rate": 4.8382654379393966e-05, + "loss": 4.3393, + "step": 391000 + }, + { + "epoch": 3.3664772727272725, + "grad_norm": 3.75, + "learning_rate": 4.835945237553148e-05, + "loss": 4.4571, + "step": 391050 + }, + { + "epoch": 3.3669077134986227, + "grad_norm": 2.0625, + "learning_rate": 4.833625416197407e-05, + "loss": 4.0708, + "step": 391100 + }, + { + "epoch": 3.3673381542699725, + "grad_norm": 3.171875, + "learning_rate": 4.831305974042446e-05, + "loss": 4.4227, + "step": 391150 + }, + { + "epoch": 3.3677685950413223, + "grad_norm": 4.40625, + "learning_rate": 4.828986911258505e-05, + "loss": 4.5377, + "step": 391200 + }, + { + "epoch": 3.368199035812672, + "grad_norm": 3.6875, + "learning_rate": 4.826668228015796e-05, + "loss": 4.6127, + "step": 391250 + }, + { + "epoch": 3.368629476584022, + "grad_norm": 1.859375, + "learning_rate": 4.8243499244845116e-05, + "loss": 4.1092, + "step": 391300 + }, + { + "epoch": 3.369059917355372, + "grad_norm": 4.84375, + "learning_rate": 4.822032000834811e-05, + "loss": 4.3181, + "step": 391350 + }, + { + "epoch": 3.369490358126722, + "grad_norm": 3.375, + "learning_rate": 4.8197144572368135e-05, + "loss": 4.045, + "step": 391400 + }, + { + "epoch": 3.3699207988980717, + "grad_norm": 3.28125, + "learning_rate": 4.817397293860633e-05, + "loss": 4.3422, + "step": 391450 + }, + { + "epoch": 3.3703512396694215, + "grad_norm": 2.734375, + "learning_rate": 4.8150805108763395e-05, + "loss": 4.2938, + "step": 391500 + }, + { + "epoch": 3.3707816804407713, + "grad_norm": 2.875, + "learning_rate": 4.812764108453978e-05, + "loss": 4.3602, + "step": 391550 + }, + { + "epoch": 3.371212121212121, + "grad_norm": 2.578125, + "learning_rate": 4.810448086763577e-05, + "loss": 3.9826, + "step": 391600 + }, + { + "epoch": 3.371642561983471, + "grad_norm": 2.15625, + "learning_rate": 4.808132445975114e-05, + "loss": 4.3741, + "step": 391650 + }, + { + "epoch": 3.3720730027548207, + "grad_norm": 3.875, + "learning_rate": 4.805817186258562e-05, + "loss": 4.2754, + "step": 391700 + }, + { + "epoch": 3.372503443526171, + "grad_norm": 3.828125, + "learning_rate": 4.803502307783854e-05, + "loss": 4.1645, + "step": 391750 + }, + { + "epoch": 3.3729338842975207, + "grad_norm": 1.15625, + "learning_rate": 4.801187810720892e-05, + "loss": 4.5861, + "step": 391800 + }, + { + "epoch": 3.3733643250688705, + "grad_norm": 3.46875, + "learning_rate": 4.798873695239566e-05, + "loss": 4.2072, + "step": 391850 + }, + { + "epoch": 3.3737947658402203, + "grad_norm": 1.8046875, + "learning_rate": 4.796559961509717e-05, + "loss": 4.4066, + "step": 391900 + }, + { + "epoch": 3.37422520661157, + "grad_norm": 4.09375, + "learning_rate": 4.794246609701167e-05, + "loss": 4.4111, + "step": 391950 + }, + { + "epoch": 3.3746556473829203, + "grad_norm": 2.671875, + "learning_rate": 4.79193363998372e-05, + "loss": 4.467, + "step": 392000 + }, + { + "epoch": 3.37508608815427, + "grad_norm": 2.359375, + "learning_rate": 4.789621052527138e-05, + "loss": 3.8915, + "step": 392050 + }, + { + "epoch": 3.37551652892562, + "grad_norm": 3.125, + "learning_rate": 4.7873088475011606e-05, + "loss": 4.3434, + "step": 392100 + }, + { + "epoch": 3.3759469696969697, + "grad_norm": 2.75, + "learning_rate": 4.7849970250754985e-05, + "loss": 4.0741, + "step": 392150 + }, + { + "epoch": 3.3763774104683195, + "grad_norm": 4.1875, + "learning_rate": 4.7826855854198336e-05, + "loss": 4.1819, + "step": 392200 + }, + { + "epoch": 3.3768078512396693, + "grad_norm": 3.0625, + "learning_rate": 4.780374528703818e-05, + "loss": 4.1821, + "step": 392250 + }, + { + "epoch": 3.377238292011019, + "grad_norm": 1.953125, + "learning_rate": 4.7780638550970855e-05, + "loss": 4.1299, + "step": 392300 + }, + { + "epoch": 3.377668732782369, + "grad_norm": 3.734375, + "learning_rate": 4.775753564769231e-05, + "loss": 4.2985, + "step": 392350 + }, + { + "epoch": 3.378099173553719, + "grad_norm": 2.796875, + "learning_rate": 4.773443657889824e-05, + "loss": 3.9785, + "step": 392400 + }, + { + "epoch": 3.378529614325069, + "grad_norm": 2.640625, + "learning_rate": 4.7711341346284074e-05, + "loss": 4.6907, + "step": 392450 + }, + { + "epoch": 3.3789600550964187, + "grad_norm": 3.0625, + "learning_rate": 4.7688249951544916e-05, + "loss": 4.2184, + "step": 392500 + }, + { + "epoch": 3.3793904958677685, + "grad_norm": 1.0859375, + "learning_rate": 4.7665162396375697e-05, + "loss": 4.2271, + "step": 392550 + }, + { + "epoch": 3.3798209366391183, + "grad_norm": 3.3125, + "learning_rate": 4.764207868247096e-05, + "loss": 4.3007, + "step": 392600 + }, + { + "epoch": 3.3802513774104685, + "grad_norm": 2.609375, + "learning_rate": 4.7618998811524995e-05, + "loss": 4.5501, + "step": 392650 + }, + { + "epoch": 3.3806818181818183, + "grad_norm": 4.15625, + "learning_rate": 4.7595922785231814e-05, + "loss": 4.5851, + "step": 392700 + }, + { + "epoch": 3.381112258953168, + "grad_norm": 3.34375, + "learning_rate": 4.7572850605285146e-05, + "loss": 4.3675, + "step": 392750 + }, + { + "epoch": 3.381542699724518, + "grad_norm": 4.90625, + "learning_rate": 4.754978227337842e-05, + "loss": 4.4713, + "step": 392800 + }, + { + "epoch": 3.3819731404958677, + "grad_norm": 2.484375, + "learning_rate": 4.752671779120486e-05, + "loss": 4.3006, + "step": 392850 + }, + { + "epoch": 3.3824035812672175, + "grad_norm": 4.875, + "learning_rate": 4.750365716045734e-05, + "loss": 4.1813, + "step": 392900 + }, + { + "epoch": 3.3828340220385673, + "grad_norm": 1.2421875, + "learning_rate": 4.7480600382828366e-05, + "loss": 4.2718, + "step": 392950 + }, + { + "epoch": 3.383264462809917, + "grad_norm": 5.15625, + "learning_rate": 4.7457547460010366e-05, + "loss": 4.1846, + "step": 393000 + }, + { + "epoch": 3.383264462809917, + "eval_loss": 4.993402004241943, + "eval_runtime": 23.4826, + "eval_samples_per_second": 27.254, + "eval_steps_per_second": 13.627, + "eval_tts_loss": 7.468892651127944, + "step": 393000 + }, + { + "epoch": 3.3836949035812673, + "grad_norm": 2.359375, + "learning_rate": 4.743449839369534e-05, + "loss": 4.4091, + "step": 393050 + }, + { + "epoch": 3.384125344352617, + "grad_norm": 5.0625, + "learning_rate": 4.741145318557498e-05, + "loss": 3.9521, + "step": 393100 + }, + { + "epoch": 3.384555785123967, + "grad_norm": 3.625, + "learning_rate": 4.73884118373409e-05, + "loss": 4.4356, + "step": 393150 + }, + { + "epoch": 3.3849862258953167, + "grad_norm": 3.328125, + "learning_rate": 4.736537435068411e-05, + "loss": 4.3135, + "step": 393200 + }, + { + "epoch": 3.3854166666666665, + "grad_norm": 3.296875, + "learning_rate": 4.734234072729564e-05, + "loss": 4.3765, + "step": 393250 + }, + { + "epoch": 3.3858471074380168, + "grad_norm": 4.1875, + "learning_rate": 4.731931096886606e-05, + "loss": 3.9165, + "step": 393300 + }, + { + "epoch": 3.3862775482093666, + "grad_norm": 3.796875, + "learning_rate": 4.729628507708571e-05, + "loss": 4.1185, + "step": 393350 + }, + { + "epoch": 3.3867079889807163, + "grad_norm": 2.78125, + "learning_rate": 4.727326305364464e-05, + "loss": 4.5291, + "step": 393400 + }, + { + "epoch": 3.387138429752066, + "grad_norm": 5.3125, + "learning_rate": 4.7250244900232624e-05, + "loss": 4.4192, + "step": 393450 + }, + { + "epoch": 3.387568870523416, + "grad_norm": 1.296875, + "learning_rate": 4.72272306185391e-05, + "loss": 4.4869, + "step": 393500 + }, + { + "epoch": 3.3879993112947657, + "grad_norm": 2.234375, + "learning_rate": 4.7204220210253336e-05, + "loss": 4.494, + "step": 393550 + }, + { + "epoch": 3.3884297520661155, + "grad_norm": 5.25, + "learning_rate": 4.7181213677064216e-05, + "loss": 4.2612, + "step": 393600 + }, + { + "epoch": 3.3888601928374658, + "grad_norm": 3.46875, + "learning_rate": 4.715821102066037e-05, + "loss": 4.1171, + "step": 393650 + }, + { + "epoch": 3.3892906336088156, + "grad_norm": 1.8046875, + "learning_rate": 4.7135212242730154e-05, + "loss": 4.2632, + "step": 393700 + }, + { + "epoch": 3.3897210743801653, + "grad_norm": 3.75, + "learning_rate": 4.711221734496161e-05, + "loss": 3.9147, + "step": 393750 + }, + { + "epoch": 3.390151515151515, + "grad_norm": 0.92578125, + "learning_rate": 4.708922632904248e-05, + "loss": 4.3943, + "step": 393800 + }, + { + "epoch": 3.390581955922865, + "grad_norm": 2.921875, + "learning_rate": 4.7066239196660335e-05, + "loss": 4.584, + "step": 393850 + }, + { + "epoch": 3.3910123966942147, + "grad_norm": 3.75, + "learning_rate": 4.704325594950234e-05, + "loss": 4.3678, + "step": 393900 + }, + { + "epoch": 3.391442837465565, + "grad_norm": 3.046875, + "learning_rate": 4.702027658925543e-05, + "loss": 4.0652, + "step": 393950 + }, + { + "epoch": 3.3918732782369148, + "grad_norm": 3.015625, + "learning_rate": 4.699730111760623e-05, + "loss": 4.1975, + "step": 394000 + }, + { + "epoch": 3.3923037190082646, + "grad_norm": 2.078125, + "learning_rate": 4.6974329536241036e-05, + "loss": 4.2021, + "step": 394050 + }, + { + "epoch": 3.3927341597796143, + "grad_norm": 1.9140625, + "learning_rate": 4.695136184684602e-05, + "loss": 4.38, + "step": 394100 + }, + { + "epoch": 3.393164600550964, + "grad_norm": 2.546875, + "learning_rate": 4.6928398051106936e-05, + "loss": 4.2613, + "step": 394150 + }, + { + "epoch": 3.393595041322314, + "grad_norm": 4.9375, + "learning_rate": 4.690543815070918e-05, + "loss": 4.5153, + "step": 394200 + }, + { + "epoch": 3.3940254820936637, + "grad_norm": 2.78125, + "learning_rate": 4.6882482147338055e-05, + "loss": 4.5173, + "step": 394250 + }, + { + "epoch": 3.394455922865014, + "grad_norm": 4.65625, + "learning_rate": 4.685953004267847e-05, + "loss": 4.566, + "step": 394300 + }, + { + "epoch": 3.3948863636363638, + "grad_norm": 2.96875, + "learning_rate": 4.6836581838414996e-05, + "loss": 4.1144, + "step": 394350 + }, + { + "epoch": 3.3953168044077136, + "grad_norm": 4.40625, + "learning_rate": 4.6813637536232066e-05, + "loss": 4.4777, + "step": 394400 + }, + { + "epoch": 3.3957472451790633, + "grad_norm": 2.328125, + "learning_rate": 4.6790697137813756e-05, + "loss": 4.7136, + "step": 394450 + }, + { + "epoch": 3.396177685950413, + "grad_norm": 2.359375, + "learning_rate": 4.6767760644843716e-05, + "loss": 4.3581, + "step": 394500 + }, + { + "epoch": 3.396608126721763, + "grad_norm": 3.125, + "learning_rate": 4.674482805900555e-05, + "loss": 4.1576, + "step": 394550 + }, + { + "epoch": 3.397038567493113, + "grad_norm": 2.125, + "learning_rate": 4.672189938198242e-05, + "loss": 4.2351, + "step": 394600 + }, + { + "epoch": 3.397469008264463, + "grad_norm": 4.375, + "learning_rate": 4.6698974615457206e-05, + "loss": 4.4396, + "step": 394650 + }, + { + "epoch": 3.3978994490358128, + "grad_norm": 2.46875, + "learning_rate": 4.6676053761112646e-05, + "loss": 4.0337, + "step": 394700 + }, + { + "epoch": 3.3983298898071626, + "grad_norm": 2.640625, + "learning_rate": 4.665313682063095e-05, + "loss": 4.3153, + "step": 394750 + }, + { + "epoch": 3.3987603305785123, + "grad_norm": 2.265625, + "learning_rate": 4.663022379569426e-05, + "loss": 4.577, + "step": 394800 + }, + { + "epoch": 3.399190771349862, + "grad_norm": 4.25, + "learning_rate": 4.660731468798432e-05, + "loss": 4.3847, + "step": 394850 + }, + { + "epoch": 3.399621212121212, + "grad_norm": 2.921875, + "learning_rate": 4.65844094991826e-05, + "loss": 4.4085, + "step": 394900 + }, + { + "epoch": 3.400051652892562, + "grad_norm": 3.046875, + "learning_rate": 4.65615082309703e-05, + "loss": 4.1411, + "step": 394950 + }, + { + "epoch": 3.400482093663912, + "grad_norm": 2.8125, + "learning_rate": 4.6538610885028314e-05, + "loss": 4.2553, + "step": 395000 + }, + { + "epoch": 3.4009125344352618, + "grad_norm": 2.640625, + "learning_rate": 4.651571746303722e-05, + "loss": 4.3857, + "step": 395050 + }, + { + "epoch": 3.4013429752066116, + "grad_norm": 6.25, + "learning_rate": 4.6492827966677446e-05, + "loss": 4.2306, + "step": 395100 + }, + { + "epoch": 3.4017734159779613, + "grad_norm": 1.96875, + "learning_rate": 4.646994239762896e-05, + "loss": 4.0199, + "step": 395150 + }, + { + "epoch": 3.402203856749311, + "grad_norm": 2.4375, + "learning_rate": 4.644706075757154e-05, + "loss": 4.6054, + "step": 395200 + }, + { + "epoch": 3.4026342975206614, + "grad_norm": 3.921875, + "learning_rate": 4.642418304818463e-05, + "loss": 3.8235, + "step": 395250 + }, + { + "epoch": 3.403064738292011, + "grad_norm": 4.1875, + "learning_rate": 4.640130927114742e-05, + "loss": 3.994, + "step": 395300 + }, + { + "epoch": 3.403495179063361, + "grad_norm": 3.125, + "learning_rate": 4.6378439428138756e-05, + "loss": 4.4975, + "step": 395350 + }, + { + "epoch": 3.4039256198347108, + "grad_norm": 1.828125, + "learning_rate": 4.635557352083729e-05, + "loss": 4.2129, + "step": 395400 + }, + { + "epoch": 3.4043560606060606, + "grad_norm": 4.125, + "learning_rate": 4.6332711550921316e-05, + "loss": 4.5797, + "step": 395450 + }, + { + "epoch": 3.4047865013774103, + "grad_norm": 3.265625, + "learning_rate": 4.6309853520068844e-05, + "loss": 3.8621, + "step": 395500 + }, + { + "epoch": 3.40521694214876, + "grad_norm": 2.015625, + "learning_rate": 4.6286999429957613e-05, + "loss": 3.9599, + "step": 395550 + }, + { + "epoch": 3.4056473829201104, + "grad_norm": 4.65625, + "learning_rate": 4.626414928226506e-05, + "loss": 4.1441, + "step": 395600 + }, + { + "epoch": 3.40607782369146, + "grad_norm": 3.078125, + "learning_rate": 4.624130307866829e-05, + "loss": 4.5329, + "step": 395650 + }, + { + "epoch": 3.40650826446281, + "grad_norm": 2.5625, + "learning_rate": 4.6218460820844276e-05, + "loss": 4.0584, + "step": 395700 + }, + { + "epoch": 3.4069387052341598, + "grad_norm": 3.359375, + "learning_rate": 4.619562251046946e-05, + "loss": 3.9191, + "step": 395750 + }, + { + "epoch": 3.4073691460055096, + "grad_norm": 3.078125, + "learning_rate": 4.617278814922022e-05, + "loss": 4.521, + "step": 395800 + }, + { + "epoch": 3.4077995867768593, + "grad_norm": 4.15625, + "learning_rate": 4.614995773877252e-05, + "loss": 4.2975, + "step": 395850 + }, + { + "epoch": 3.4082300275482096, + "grad_norm": 3.734375, + "learning_rate": 4.6127131280802026e-05, + "loss": 4.1153, + "step": 395900 + }, + { + "epoch": 3.4086604683195594, + "grad_norm": 2.03125, + "learning_rate": 4.6104308776984265e-05, + "loss": 4.3501, + "step": 395950 + }, + { + "epoch": 3.409090909090909, + "grad_norm": 5.09375, + "learning_rate": 4.608149022899423e-05, + "loss": 4.4347, + "step": 396000 + }, + { + "epoch": 3.409090909090909, + "eval_loss": 4.994675636291504, + "eval_runtime": 24.6395, + "eval_samples_per_second": 25.975, + "eval_steps_per_second": 12.987, + "eval_tts_loss": 7.505496205374016, + "step": 396000 + }, + { + "epoch": 3.409521349862259, + "grad_norm": 2.140625, + "learning_rate": 4.6058675638506776e-05, + "loss": 4.3295, + "step": 396050 + }, + { + "epoch": 3.4099517906336088, + "grad_norm": 3.875, + "learning_rate": 4.603586500719651e-05, + "loss": 4.2509, + "step": 396100 + }, + { + "epoch": 3.4103822314049586, + "grad_norm": 4.09375, + "learning_rate": 4.6013058336737637e-05, + "loss": 4.5618, + "step": 396150 + }, + { + "epoch": 3.4108126721763083, + "grad_norm": 7.6875, + "learning_rate": 4.599025562880409e-05, + "loss": 4.2612, + "step": 396200 + }, + { + "epoch": 3.4112431129476586, + "grad_norm": 2.171875, + "learning_rate": 4.596745688506966e-05, + "loss": 4.2725, + "step": 396250 + }, + { + "epoch": 3.4116735537190084, + "grad_norm": 2.609375, + "learning_rate": 4.594466210720756e-05, + "loss": 4.3079, + "step": 396300 + }, + { + "epoch": 3.412103994490358, + "grad_norm": 3.921875, + "learning_rate": 4.5921871296890996e-05, + "loss": 4.2141, + "step": 396350 + }, + { + "epoch": 3.412534435261708, + "grad_norm": 4.0, + "learning_rate": 4.589908445579272e-05, + "loss": 4.3151, + "step": 396400 + }, + { + "epoch": 3.4129648760330578, + "grad_norm": 2.78125, + "learning_rate": 4.5876301585585246e-05, + "loss": 4.5092, + "step": 396450 + }, + { + "epoch": 3.4133953168044076, + "grad_norm": 4.15625, + "learning_rate": 4.5853522687940784e-05, + "loss": 4.2517, + "step": 396500 + }, + { + "epoch": 3.413825757575758, + "grad_norm": 3.390625, + "learning_rate": 4.583074776453127e-05, + "loss": 4.3639, + "step": 396550 + }, + { + "epoch": 3.4142561983471076, + "grad_norm": 5.34375, + "learning_rate": 4.5807976817028276e-05, + "loss": 4.5705, + "step": 396600 + }, + { + "epoch": 3.4146866391184574, + "grad_norm": 6.8125, + "learning_rate": 4.5785209847103225e-05, + "loss": 4.1948, + "step": 396650 + }, + { + "epoch": 3.415117079889807, + "grad_norm": 3.640625, + "learning_rate": 4.576244685642712e-05, + "loss": 4.4557, + "step": 396700 + }, + { + "epoch": 3.415547520661157, + "grad_norm": 3.734375, + "learning_rate": 4.573968784667073e-05, + "loss": 3.8626, + "step": 396750 + }, + { + "epoch": 3.4159779614325068, + "grad_norm": 3.359375, + "learning_rate": 4.57169328195045e-05, + "loss": 4.1515, + "step": 396800 + }, + { + "epoch": 3.4164084022038566, + "grad_norm": 1.890625, + "learning_rate": 4.569418177659861e-05, + "loss": 3.9, + "step": 396850 + }, + { + "epoch": 3.416838842975207, + "grad_norm": 3.078125, + "learning_rate": 4.5671434719622906e-05, + "loss": 4.3722, + "step": 396900 + }, + { + "epoch": 3.4172692837465566, + "grad_norm": 3.046875, + "learning_rate": 4.564869165024703e-05, + "loss": 4.2138, + "step": 396950 + }, + { + "epoch": 3.4176997245179064, + "grad_norm": 4.59375, + "learning_rate": 4.562595257014025e-05, + "loss": 4.2076, + "step": 397000 + }, + { + "epoch": 3.418130165289256, + "grad_norm": 2.625, + "learning_rate": 4.560321748097156e-05, + "loss": 4.2064, + "step": 397050 + }, + { + "epoch": 3.418560606060606, + "grad_norm": 3.921875, + "learning_rate": 4.558048638440967e-05, + "loss": 4.5278, + "step": 397100 + }, + { + "epoch": 3.4189910468319558, + "grad_norm": 2.75, + "learning_rate": 4.5557759282123006e-05, + "loss": 4.1936, + "step": 397150 + }, + { + "epoch": 3.419421487603306, + "grad_norm": 4.75, + "learning_rate": 4.553503617577962e-05, + "loss": 4.4387, + "step": 397200 + }, + { + "epoch": 3.419851928374656, + "grad_norm": 0.9453125, + "learning_rate": 4.5512317067047484e-05, + "loss": 3.9222, + "step": 397250 + }, + { + "epoch": 3.4202823691460056, + "grad_norm": 3.984375, + "learning_rate": 4.548960195759395e-05, + "loss": 4.4349, + "step": 397300 + }, + { + "epoch": 3.4207128099173554, + "grad_norm": 2.625, + "learning_rate": 4.5466890849086395e-05, + "loss": 4.4317, + "step": 397350 + }, + { + "epoch": 3.421143250688705, + "grad_norm": 2.9375, + "learning_rate": 4.5444183743191724e-05, + "loss": 4.2755, + "step": 397400 + }, + { + "epoch": 3.421573691460055, + "grad_norm": 3.09375, + "learning_rate": 4.542148064157655e-05, + "loss": 4.467, + "step": 397450 + }, + { + "epoch": 3.4220041322314048, + "grad_norm": 4.90625, + "learning_rate": 4.5398781545907345e-05, + "loss": 4.0659, + "step": 397500 + }, + { + "epoch": 3.422434573002755, + "grad_norm": 3.125, + "learning_rate": 4.5376086457850055e-05, + "loss": 4.3646, + "step": 397550 + }, + { + "epoch": 3.422865013774105, + "grad_norm": 2.921875, + "learning_rate": 4.535339537907045e-05, + "loss": 4.3562, + "step": 397600 + }, + { + "epoch": 3.4232954545454546, + "grad_norm": 2.296875, + "learning_rate": 4.5330708311234096e-05, + "loss": 4.47, + "step": 397650 + }, + { + "epoch": 3.4237258953168044, + "grad_norm": 4.78125, + "learning_rate": 4.530802525600613e-05, + "loss": 4.0313, + "step": 397700 + }, + { + "epoch": 3.424156336088154, + "grad_norm": 3.4375, + "learning_rate": 4.528534621505142e-05, + "loss": 4.5859, + "step": 397750 + }, + { + "epoch": 3.424586776859504, + "grad_norm": 4.75, + "learning_rate": 4.526267119003459e-05, + "loss": 4.327, + "step": 397800 + }, + { + "epoch": 3.425017217630854, + "grad_norm": 3.078125, + "learning_rate": 4.5240000182619924e-05, + "loss": 4.2197, + "step": 397850 + }, + { + "epoch": 3.425447658402204, + "grad_norm": 3.015625, + "learning_rate": 4.521733319447138e-05, + "loss": 4.1267, + "step": 397900 + }, + { + "epoch": 3.425878099173554, + "grad_norm": 3.015625, + "learning_rate": 4.5194670227252754e-05, + "loss": 4.3995, + "step": 397950 + }, + { + "epoch": 3.4263085399449036, + "grad_norm": 3.859375, + "learning_rate": 4.5172011282627414e-05, + "loss": 4.0103, + "step": 398000 + }, + { + "epoch": 3.4267389807162534, + "grad_norm": 0.85546875, + "learning_rate": 4.5149356362258486e-05, + "loss": 4.0549, + "step": 398050 + }, + { + "epoch": 3.427169421487603, + "grad_norm": 2.765625, + "learning_rate": 4.5126705467808785e-05, + "loss": 4.5051, + "step": 398100 + }, + { + "epoch": 3.427599862258953, + "grad_norm": 3.078125, + "learning_rate": 4.5104058600940805e-05, + "loss": 4.4772, + "step": 398150 + }, + { + "epoch": 3.428030303030303, + "grad_norm": 3.234375, + "learning_rate": 4.5081415763316845e-05, + "loss": 4.5994, + "step": 398200 + }, + { + "epoch": 3.428460743801653, + "grad_norm": 5.1875, + "learning_rate": 4.5058776956598804e-05, + "loss": 4.4596, + "step": 398250 + }, + { + "epoch": 3.428891184573003, + "grad_norm": 3.5625, + "learning_rate": 4.503614218244834e-05, + "loss": 4.3909, + "step": 398300 + }, + { + "epoch": 3.4293216253443526, + "grad_norm": 1.4296875, + "learning_rate": 4.501351144252677e-05, + "loss": 4.6276, + "step": 398350 + }, + { + "epoch": 3.4297520661157024, + "grad_norm": 2.125, + "learning_rate": 4.4990884738495154e-05, + "loss": 4.5762, + "step": 398400 + }, + { + "epoch": 3.430182506887052, + "grad_norm": 4.8125, + "learning_rate": 4.49682620720142e-05, + "loss": 4.1249, + "step": 398450 + }, + { + "epoch": 3.4306129476584024, + "grad_norm": 3.109375, + "learning_rate": 4.494564344474446e-05, + "loss": 4.5648, + "step": 398500 + }, + { + "epoch": 3.431043388429752, + "grad_norm": 4.46875, + "learning_rate": 4.492302885834606e-05, + "loss": 4.3008, + "step": 398550 + }, + { + "epoch": 3.431473829201102, + "grad_norm": 3.078125, + "learning_rate": 4.490041831447877e-05, + "loss": 4.5481, + "step": 398600 + }, + { + "epoch": 3.431904269972452, + "grad_norm": 3.28125, + "learning_rate": 4.4877811814802276e-05, + "loss": 4.3814, + "step": 398650 + }, + { + "epoch": 3.4323347107438016, + "grad_norm": 3.734375, + "learning_rate": 4.485520936097578e-05, + "loss": 4.3508, + "step": 398700 + }, + { + "epoch": 3.4327651515151514, + "grad_norm": 1.3046875, + "learning_rate": 4.483261095465824e-05, + "loss": 4.3446, + "step": 398750 + }, + { + "epoch": 3.433195592286501, + "grad_norm": 2.84375, + "learning_rate": 4.481001659750843e-05, + "loss": 4.0748, + "step": 398800 + }, + { + "epoch": 3.4336260330578514, + "grad_norm": 2.40625, + "learning_rate": 4.4787426291184586e-05, + "loss": 3.8616, + "step": 398850 + }, + { + "epoch": 3.434056473829201, + "grad_norm": 3.875, + "learning_rate": 4.47648400373449e-05, + "loss": 4.5028, + "step": 398900 + }, + { + "epoch": 3.434486914600551, + "grad_norm": 2.953125, + "learning_rate": 4.4742257837647107e-05, + "loss": 4.0689, + "step": 398950 + }, + { + "epoch": 3.434917355371901, + "grad_norm": 2.390625, + "learning_rate": 4.471967969374866e-05, + "loss": 4.047, + "step": 399000 + }, + { + "epoch": 3.434917355371901, + "eval_loss": 4.9932861328125, + "eval_runtime": 26.286, + "eval_samples_per_second": 24.348, + "eval_steps_per_second": 12.174, + "eval_tts_loss": 7.48067210551469, + "step": 399000 + }, + { + "epoch": 3.4353477961432506, + "grad_norm": 2.15625, + "learning_rate": 4.4697105607306865e-05, + "loss": 4.5423, + "step": 399050 + }, + { + "epoch": 3.4357782369146004, + "grad_norm": 2.921875, + "learning_rate": 4.467453557997849e-05, + "loss": 4.1795, + "step": 399100 + }, + { + "epoch": 3.4362086776859506, + "grad_norm": 3.09375, + "learning_rate": 4.465196961342014e-05, + "loss": 4.4131, + "step": 399150 + }, + { + "epoch": 3.4366391184573004, + "grad_norm": 3.234375, + "learning_rate": 4.4629407709288174e-05, + "loss": 4.1543, + "step": 399200 + }, + { + "epoch": 3.43706955922865, + "grad_norm": 3.28125, + "learning_rate": 4.460684986923854e-05, + "loss": 4.359, + "step": 399250 + }, + { + "epoch": 3.4375, + "grad_norm": 2.46875, + "learning_rate": 4.458429609492695e-05, + "loss": 4.0929, + "step": 399300 + }, + { + "epoch": 3.43793044077135, + "grad_norm": 2.796875, + "learning_rate": 4.456174638800879e-05, + "loss": 4.3975, + "step": 399350 + }, + { + "epoch": 3.4383608815426996, + "grad_norm": 3.0625, + "learning_rate": 4.453920075013916e-05, + "loss": 4.4661, + "step": 399400 + }, + { + "epoch": 3.4387913223140494, + "grad_norm": 4.25, + "learning_rate": 4.451665918297284e-05, + "loss": 4.5318, + "step": 399450 + }, + { + "epoch": 3.4392217630853996, + "grad_norm": 10.0, + "learning_rate": 4.4494121688164405e-05, + "loss": 4.4995, + "step": 399500 + }, + { + "epoch": 3.4396522038567494, + "grad_norm": 4.375, + "learning_rate": 4.4471588267368e-05, + "loss": 4.5414, + "step": 399550 + }, + { + "epoch": 3.440082644628099, + "grad_norm": 2.109375, + "learning_rate": 4.444905892223756e-05, + "loss": 3.9789, + "step": 399600 + }, + { + "epoch": 3.440513085399449, + "grad_norm": 3.109375, + "learning_rate": 4.442653365442666e-05, + "loss": 4.5508, + "step": 399650 + }, + { + "epoch": 3.440943526170799, + "grad_norm": 2.3125, + "learning_rate": 4.44040124655886e-05, + "loss": 4.3807, + "step": 399700 + }, + { + "epoch": 3.4413739669421486, + "grad_norm": 3.25, + "learning_rate": 4.438149535737642e-05, + "loss": 4.6795, + "step": 399750 + }, + { + "epoch": 3.441804407713499, + "grad_norm": 4.5, + "learning_rate": 4.435898233144283e-05, + "loss": 4.1523, + "step": 399800 + }, + { + "epoch": 3.4422348484848486, + "grad_norm": 2.765625, + "learning_rate": 4.433647338944023e-05, + "loss": 4.4798, + "step": 399850 + }, + { + "epoch": 3.4426652892561984, + "grad_norm": 2.890625, + "learning_rate": 4.43139685330207e-05, + "loss": 4.3965, + "step": 399900 + }, + { + "epoch": 3.443095730027548, + "grad_norm": 4.28125, + "learning_rate": 4.429146776383608e-05, + "loss": 4.6089, + "step": 399950 + }, + { + "epoch": 3.443526170798898, + "grad_norm": 5.78125, + "learning_rate": 4.4268971083537825e-05, + "loss": 4.2955, + "step": 400000 + }, + { + "epoch": 3.443956611570248, + "grad_norm": 3.828125, + "learning_rate": 4.4246478493777223e-05, + "loss": 3.9327, + "step": 400050 + }, + { + "epoch": 3.4443870523415976, + "grad_norm": 3.921875, + "learning_rate": 4.422398999620517e-05, + "loss": 4.4019, + "step": 400100 + }, + { + "epoch": 3.444817493112948, + "grad_norm": 3.9375, + "learning_rate": 4.420150559247219e-05, + "loss": 4.2561, + "step": 400150 + }, + { + "epoch": 3.4452479338842976, + "grad_norm": 2.3125, + "learning_rate": 4.417902528422867e-05, + "loss": 4.1378, + "step": 400200 + }, + { + "epoch": 3.4456783746556474, + "grad_norm": 2.296875, + "learning_rate": 4.4156549073124595e-05, + "loss": 4.1599, + "step": 400250 + }, + { + "epoch": 3.446108815426997, + "grad_norm": 6.5625, + "learning_rate": 4.4134076960809634e-05, + "loss": 4.5083, + "step": 400300 + }, + { + "epoch": 3.446539256198347, + "grad_norm": 5.15625, + "learning_rate": 4.41116089489333e-05, + "loss": 3.8569, + "step": 400350 + }, + { + "epoch": 3.446969696969697, + "grad_norm": 2.78125, + "learning_rate": 4.4089145039144544e-05, + "loss": 4.0468, + "step": 400400 + }, + { + "epoch": 3.447400137741047, + "grad_norm": 3.53125, + "learning_rate": 4.406668523309229e-05, + "loss": 3.9891, + "step": 400450 + }, + { + "epoch": 3.447830578512397, + "grad_norm": 6.0625, + "learning_rate": 4.404422953242499e-05, + "loss": 4.4894, + "step": 400500 + }, + { + "epoch": 3.4482610192837466, + "grad_norm": 3.421875, + "learning_rate": 4.402177793879085e-05, + "loss": 4.185, + "step": 400550 + }, + { + "epoch": 3.4486914600550964, + "grad_norm": 3.515625, + "learning_rate": 4.399933045383777e-05, + "loss": 4.3618, + "step": 400600 + }, + { + "epoch": 3.449121900826446, + "grad_norm": 3.28125, + "learning_rate": 4.397688707921335e-05, + "loss": 4.3885, + "step": 400650 + }, + { + "epoch": 3.449552341597796, + "grad_norm": 1.890625, + "learning_rate": 4.3954447816564836e-05, + "loss": 4.6036, + "step": 400700 + }, + { + "epoch": 3.449982782369146, + "grad_norm": 5.0, + "learning_rate": 4.393201266753931e-05, + "loss": 4.3906, + "step": 400750 + }, + { + "epoch": 3.450413223140496, + "grad_norm": 2.84375, + "learning_rate": 4.390958163378342e-05, + "loss": 4.8371, + "step": 400800 + }, + { + "epoch": 3.450843663911846, + "grad_norm": 4.15625, + "learning_rate": 4.388715471694357e-05, + "loss": 4.4369, + "step": 400850 + }, + { + "epoch": 3.4512741046831956, + "grad_norm": 3.140625, + "learning_rate": 4.386473191866581e-05, + "loss": 4.5882, + "step": 400900 + }, + { + "epoch": 3.4517045454545454, + "grad_norm": 1.7734375, + "learning_rate": 4.384231324059597e-05, + "loss": 4.1069, + "step": 400950 + }, + { + "epoch": 3.452134986225895, + "grad_norm": 3.890625, + "learning_rate": 4.381989868437946e-05, + "loss": 4.0956, + "step": 401000 + }, + { + "epoch": 3.452565426997245, + "grad_norm": 4.53125, + "learning_rate": 4.3797488251661555e-05, + "loss": 4.4168, + "step": 401050 + }, + { + "epoch": 3.4529958677685952, + "grad_norm": 2.421875, + "learning_rate": 4.3775081944087095e-05, + "loss": 4.2854, + "step": 401100 + }, + { + "epoch": 3.453426308539945, + "grad_norm": 1.8828125, + "learning_rate": 4.375267976330064e-05, + "loss": 3.9802, + "step": 401150 + }, + { + "epoch": 3.453856749311295, + "grad_norm": 2.40625, + "learning_rate": 4.373028171094648e-05, + "loss": 4.121, + "step": 401200 + }, + { + "epoch": 3.4542871900826446, + "grad_norm": 3.734375, + "learning_rate": 4.370788778866852e-05, + "loss": 4.2481, + "step": 401250 + }, + { + "epoch": 3.4547176308539944, + "grad_norm": 4.875, + "learning_rate": 4.3685497998110535e-05, + "loss": 4.3699, + "step": 401300 + }, + { + "epoch": 3.455148071625344, + "grad_norm": 3.90625, + "learning_rate": 4.366311234091586e-05, + "loss": 4.1837, + "step": 401350 + }, + { + "epoch": 3.455578512396694, + "grad_norm": 2.703125, + "learning_rate": 4.364073081872746e-05, + "loss": 4.6542, + "step": 401400 + }, + { + "epoch": 3.4560089531680442, + "grad_norm": 3.890625, + "learning_rate": 4.3618353433188194e-05, + "loss": 4.238, + "step": 401450 + }, + { + "epoch": 3.456439393939394, + "grad_norm": 3.984375, + "learning_rate": 4.3595980185940475e-05, + "loss": 4.3141, + "step": 401500 + }, + { + "epoch": 3.456869834710744, + "grad_norm": 5.4375, + "learning_rate": 4.357361107862642e-05, + "loss": 4.3467, + "step": 401550 + }, + { + "epoch": 3.4573002754820936, + "grad_norm": 2.578125, + "learning_rate": 4.3551246112887947e-05, + "loss": 4.3645, + "step": 401600 + }, + { + "epoch": 3.4577307162534434, + "grad_norm": 2.109375, + "learning_rate": 4.352888529036658e-05, + "loss": 4.2373, + "step": 401650 + }, + { + "epoch": 3.458161157024793, + "grad_norm": 4.40625, + "learning_rate": 4.350652861270347e-05, + "loss": 4.2433, + "step": 401700 + }, + { + "epoch": 3.4585915977961434, + "grad_norm": 4.03125, + "learning_rate": 4.3484176081539654e-05, + "loss": 4.3552, + "step": 401750 + }, + { + "epoch": 3.4590220385674932, + "grad_norm": 2.953125, + "learning_rate": 4.34618276985157e-05, + "loss": 4.3612, + "step": 401800 + }, + { + "epoch": 3.459452479338843, + "grad_norm": 1.4609375, + "learning_rate": 4.343948346527192e-05, + "loss": 4.4421, + "step": 401850 + }, + { + "epoch": 3.459882920110193, + "grad_norm": 3.265625, + "learning_rate": 4.341714338344843e-05, + "loss": 4.4969, + "step": 401900 + }, + { + "epoch": 3.4603133608815426, + "grad_norm": 5.21875, + "learning_rate": 4.3394807454684805e-05, + "loss": 3.903, + "step": 401950 + }, + { + "epoch": 3.4607438016528924, + "grad_norm": 5.1875, + "learning_rate": 4.337247568062056e-05, + "loss": 4.1216, + "step": 402000 + }, + { + "epoch": 3.4607438016528924, + "eval_loss": 4.993624210357666, + "eval_runtime": 23.5215, + "eval_samples_per_second": 27.209, + "eval_steps_per_second": 13.605, + "eval_tts_loss": 7.48834117052633, + "step": 402000 + }, + { + "epoch": 3.461174242424242, + "grad_norm": 2.265625, + "learning_rate": 4.3350148062894766e-05, + "loss": 4.5171, + "step": 402050 + }, + { + "epoch": 3.4616046831955924, + "grad_norm": 3.078125, + "learning_rate": 4.3327824603146217e-05, + "loss": 4.1878, + "step": 402100 + }, + { + "epoch": 3.4620351239669422, + "grad_norm": 3.65625, + "learning_rate": 4.330550530301341e-05, + "loss": 4.2243, + "step": 402150 + }, + { + "epoch": 3.462465564738292, + "grad_norm": 3.34375, + "learning_rate": 4.3283190164134526e-05, + "loss": 4.3377, + "step": 402200 + }, + { + "epoch": 3.462896005509642, + "grad_norm": 2.828125, + "learning_rate": 4.326087918814742e-05, + "loss": 4.2108, + "step": 402250 + }, + { + "epoch": 3.4633264462809916, + "grad_norm": 5.0625, + "learning_rate": 4.323857237668975e-05, + "loss": 4.1, + "step": 402300 + }, + { + "epoch": 3.4637568870523414, + "grad_norm": 4.5625, + "learning_rate": 4.321626973139873e-05, + "loss": 4.2767, + "step": 402350 + }, + { + "epoch": 3.4641873278236917, + "grad_norm": 1.28125, + "learning_rate": 4.319397125391135e-05, + "loss": 4.0428, + "step": 402400 + }, + { + "epoch": 3.4646177685950414, + "grad_norm": 5.34375, + "learning_rate": 4.317167694586425e-05, + "loss": 4.4062, + "step": 402450 + }, + { + "epoch": 3.4650482093663912, + "grad_norm": 3.1875, + "learning_rate": 4.314938680889379e-05, + "loss": 4.4809, + "step": 402500 + }, + { + "epoch": 3.465478650137741, + "grad_norm": 4.09375, + "learning_rate": 4.312710084463598e-05, + "loss": 4.3265, + "step": 402550 + }, + { + "epoch": 3.465909090909091, + "grad_norm": 5.46875, + "learning_rate": 4.3104819054726656e-05, + "loss": 4.1198, + "step": 402600 + }, + { + "epoch": 3.4663395316804406, + "grad_norm": 2.1875, + "learning_rate": 4.3082541440801196e-05, + "loss": 4.3553, + "step": 402650 + }, + { + "epoch": 3.4667699724517904, + "grad_norm": 2.671875, + "learning_rate": 4.3060268004494734e-05, + "loss": 4.4858, + "step": 402700 + }, + { + "epoch": 3.4672004132231407, + "grad_norm": 2.484375, + "learning_rate": 4.303799874744208e-05, + "loss": 4.2628, + "step": 402750 + }, + { + "epoch": 3.4676308539944904, + "grad_norm": 3.015625, + "learning_rate": 4.301573367127777e-05, + "loss": 4.0898, + "step": 402800 + }, + { + "epoch": 3.4680612947658402, + "grad_norm": 2.203125, + "learning_rate": 4.2993472777635956e-05, + "loss": 4.3174, + "step": 402850 + }, + { + "epoch": 3.46849173553719, + "grad_norm": 4.8125, + "learning_rate": 4.2971216068150675e-05, + "loss": 4.4431, + "step": 402900 + }, + { + "epoch": 3.46892217630854, + "grad_norm": 2.765625, + "learning_rate": 4.2948963544455354e-05, + "loss": 3.8362, + "step": 402950 + }, + { + "epoch": 3.4693526170798896, + "grad_norm": 3.375, + "learning_rate": 4.2926715208183396e-05, + "loss": 4.0822, + "step": 403000 + }, + { + "epoch": 3.46978305785124, + "grad_norm": 6.09375, + "learning_rate": 4.2904471060967754e-05, + "loss": 3.732, + "step": 403050 + }, + { + "epoch": 3.4702134986225897, + "grad_norm": 3.375, + "learning_rate": 4.288223110444105e-05, + "loss": 4.4691, + "step": 403100 + }, + { + "epoch": 3.4706439393939394, + "grad_norm": 2.9375, + "learning_rate": 4.285999534023576e-05, + "loss": 4.2009, + "step": 403150 + }, + { + "epoch": 3.4710743801652892, + "grad_norm": 2.546875, + "learning_rate": 4.283776376998384e-05, + "loss": 4.266, + "step": 403200 + }, + { + "epoch": 3.471504820936639, + "grad_norm": 5.5, + "learning_rate": 4.2815536395317035e-05, + "loss": 4.082, + "step": 403250 + }, + { + "epoch": 3.471935261707989, + "grad_norm": 2.78125, + "learning_rate": 4.279331321786686e-05, + "loss": 4.244, + "step": 403300 + }, + { + "epoch": 3.4723657024793386, + "grad_norm": 1.65625, + "learning_rate": 4.277109423926441e-05, + "loss": 4.423, + "step": 403350 + }, + { + "epoch": 3.472796143250689, + "grad_norm": 4.28125, + "learning_rate": 4.274887946114048e-05, + "loss": 4.6118, + "step": 403400 + }, + { + "epoch": 3.4732265840220387, + "grad_norm": 3.3125, + "learning_rate": 4.2726668885125684e-05, + "loss": 4.447, + "step": 403450 + }, + { + "epoch": 3.4736570247933884, + "grad_norm": 2.765625, + "learning_rate": 4.27044625128501e-05, + "loss": 4.0383, + "step": 403500 + }, + { + "epoch": 3.4740874655647382, + "grad_norm": 1.7265625, + "learning_rate": 4.268226034594371e-05, + "loss": 4.3027, + "step": 403550 + }, + { + "epoch": 3.474517906336088, + "grad_norm": 2.5625, + "learning_rate": 4.266006238603612e-05, + "loss": 4.303, + "step": 403600 + }, + { + "epoch": 3.474948347107438, + "grad_norm": 2.546875, + "learning_rate": 4.263786863475655e-05, + "loss": 4.2431, + "step": 403650 + }, + { + "epoch": 3.475378787878788, + "grad_norm": 3.234375, + "learning_rate": 4.2615679093734016e-05, + "loss": 3.9285, + "step": 403700 + }, + { + "epoch": 3.475809228650138, + "grad_norm": 2.5, + "learning_rate": 4.259349376459718e-05, + "loss": 4.5531, + "step": 403750 + }, + { + "epoch": 3.4762396694214877, + "grad_norm": 2.4375, + "learning_rate": 4.2571312648974347e-05, + "loss": 4.343, + "step": 403800 + }, + { + "epoch": 3.4766701101928374, + "grad_norm": 3.90625, + "learning_rate": 4.254913574849364e-05, + "loss": 4.1313, + "step": 403850 + }, + { + "epoch": 3.4771005509641872, + "grad_norm": 4.875, + "learning_rate": 4.2526963064782754e-05, + "loss": 4.2954, + "step": 403900 + }, + { + "epoch": 3.477530991735537, + "grad_norm": 6.15625, + "learning_rate": 4.2504794599469134e-05, + "loss": 4.2832, + "step": 403950 + }, + { + "epoch": 3.477961432506887, + "grad_norm": 2.171875, + "learning_rate": 4.2482630354179875e-05, + "loss": 3.9697, + "step": 404000 + }, + { + "epoch": 3.478391873278237, + "grad_norm": 2.78125, + "learning_rate": 4.24604703305418e-05, + "loss": 4.263, + "step": 404050 + }, + { + "epoch": 3.478822314049587, + "grad_norm": 4.1875, + "learning_rate": 4.243831453018137e-05, + "loss": 4.1289, + "step": 404100 + }, + { + "epoch": 3.4792527548209367, + "grad_norm": 3.734375, + "learning_rate": 4.2416162954724836e-05, + "loss": 4.5328, + "step": 404150 + }, + { + "epoch": 3.4796831955922864, + "grad_norm": 1.78125, + "learning_rate": 4.2394015605798054e-05, + "loss": 4.4685, + "step": 404200 + }, + { + "epoch": 3.4801136363636362, + "grad_norm": 3.5625, + "learning_rate": 4.237187248502659e-05, + "loss": 4.0868, + "step": 404250 + }, + { + "epoch": 3.480544077134986, + "grad_norm": 3.0625, + "learning_rate": 4.234973359403568e-05, + "loss": 4.0506, + "step": 404300 + }, + { + "epoch": 3.4809745179063363, + "grad_norm": 3.515625, + "learning_rate": 4.23275989344503e-05, + "loss": 4.3471, + "step": 404350 + }, + { + "epoch": 3.481404958677686, + "grad_norm": 4.09375, + "learning_rate": 4.230546850789502e-05, + "loss": 4.051, + "step": 404400 + }, + { + "epoch": 3.481835399449036, + "grad_norm": 3.90625, + "learning_rate": 4.22833423159943e-05, + "loss": 4.4472, + "step": 404450 + }, + { + "epoch": 3.4822658402203857, + "grad_norm": 3.03125, + "learning_rate": 4.2261220360372e-05, + "loss": 4.3289, + "step": 404500 + }, + { + "epoch": 3.4826962809917354, + "grad_norm": 2.5625, + "learning_rate": 4.223910264265194e-05, + "loss": 3.6602, + "step": 404550 + }, + { + "epoch": 3.4831267217630852, + "grad_norm": 3.328125, + "learning_rate": 4.2216989164457466e-05, + "loss": 4.0248, + "step": 404600 + }, + { + "epoch": 3.483557162534435, + "grad_norm": 0.68359375, + "learning_rate": 4.2194879927411625e-05, + "loss": 4.2495, + "step": 404650 + }, + { + "epoch": 3.4839876033057853, + "grad_norm": 4.15625, + "learning_rate": 4.21727749331373e-05, + "loss": 4.3664, + "step": 404700 + }, + { + "epoch": 3.484418044077135, + "grad_norm": 2.953125, + "learning_rate": 4.2150674183256844e-05, + "loss": 4.5422, + "step": 404750 + }, + { + "epoch": 3.484848484848485, + "grad_norm": 2.734375, + "learning_rate": 4.21285776793924e-05, + "loss": 4.0481, + "step": 404800 + }, + { + "epoch": 3.4852789256198347, + "grad_norm": 4.71875, + "learning_rate": 4.2106485423165876e-05, + "loss": 4.6, + "step": 404850 + }, + { + "epoch": 3.4857093663911844, + "grad_norm": 4.3125, + "learning_rate": 4.2084397416198764e-05, + "loss": 4.309, + "step": 404900 + }, + { + "epoch": 3.4861398071625342, + "grad_norm": 3.234375, + "learning_rate": 4.2062313660112265e-05, + "loss": 4.3773, + "step": 404950 + }, + { + "epoch": 3.4865702479338845, + "grad_norm": 2.046875, + "learning_rate": 4.2040234156527295e-05, + "loss": 4.2435, + "step": 405000 + }, + { + "epoch": 3.4865702479338845, + "eval_loss": 4.992835998535156, + "eval_runtime": 23.5141, + "eval_samples_per_second": 27.218, + "eval_steps_per_second": 13.609, + "eval_tts_loss": 7.475708443343599, + "step": 405000 + }, + { + "epoch": 3.4870006887052343, + "grad_norm": 1.4296875, + "learning_rate": 4.201815890706442e-05, + "loss": 4.4341, + "step": 405050 + }, + { + "epoch": 3.487431129476584, + "grad_norm": 3.4375, + "learning_rate": 4.1996087913343905e-05, + "loss": 4.4096, + "step": 405100 + }, + { + "epoch": 3.487861570247934, + "grad_norm": 3.0625, + "learning_rate": 4.197402117698578e-05, + "loss": 4.2599, + "step": 405150 + }, + { + "epoch": 3.4882920110192837, + "grad_norm": 2.015625, + "learning_rate": 4.195195869960964e-05, + "loss": 4.16, + "step": 405200 + }, + { + "epoch": 3.4887224517906334, + "grad_norm": 3.140625, + "learning_rate": 4.192990048283485e-05, + "loss": 4.1305, + "step": 405250 + }, + { + "epoch": 3.4891528925619832, + "grad_norm": 3.15625, + "learning_rate": 4.190784652828043e-05, + "loss": 4.7154, + "step": 405300 + }, + { + "epoch": 3.4895833333333335, + "grad_norm": 2.328125, + "learning_rate": 4.188579683756503e-05, + "loss": 4.6028, + "step": 405350 + }, + { + "epoch": 3.4900137741046833, + "grad_norm": 2.75, + "learning_rate": 4.186375141230715e-05, + "loss": 4.2713, + "step": 405400 + }, + { + "epoch": 3.490444214876033, + "grad_norm": 4.1875, + "learning_rate": 4.1841710254124836e-05, + "loss": 4.6832, + "step": 405450 + }, + { + "epoch": 3.490874655647383, + "grad_norm": 4.21875, + "learning_rate": 4.181967336463586e-05, + "loss": 4.3429, + "step": 405500 + }, + { + "epoch": 3.4913050964187327, + "grad_norm": 3.0625, + "learning_rate": 4.1797640745457675e-05, + "loss": 4.5516, + "step": 405550 + }, + { + "epoch": 3.491735537190083, + "grad_norm": 3.140625, + "learning_rate": 4.177561239820744e-05, + "loss": 4.2398, + "step": 405600 + }, + { + "epoch": 3.4921659779614327, + "grad_norm": 1.4375, + "learning_rate": 4.175358832450195e-05, + "loss": 4.4105, + "step": 405650 + }, + { + "epoch": 3.4925964187327825, + "grad_norm": 4.3125, + "learning_rate": 4.1731568525957774e-05, + "loss": 4.5158, + "step": 405700 + }, + { + "epoch": 3.4930268595041323, + "grad_norm": 2.671875, + "learning_rate": 4.1709553004191115e-05, + "loss": 4.3405, + "step": 405750 + }, + { + "epoch": 3.493457300275482, + "grad_norm": 4.15625, + "learning_rate": 4.168754176081785e-05, + "loss": 4.1078, + "step": 405800 + }, + { + "epoch": 3.493887741046832, + "grad_norm": 3.28125, + "learning_rate": 4.166553479745356e-05, + "loss": 4.3578, + "step": 405850 + }, + { + "epoch": 3.4943181818181817, + "grad_norm": 3.328125, + "learning_rate": 4.16435321157135e-05, + "loss": 4.5388, + "step": 405900 + }, + { + "epoch": 3.4947486225895315, + "grad_norm": 3.53125, + "learning_rate": 4.162153371721259e-05, + "loss": 4.3248, + "step": 405950 + }, + { + "epoch": 3.4951790633608817, + "grad_norm": 5.46875, + "learning_rate": 4.159953960356557e-05, + "loss": 4.2265, + "step": 406000 + }, + { + "epoch": 3.4956095041322315, + "grad_norm": 3.0, + "learning_rate": 4.1577549776386625e-05, + "loss": 4.2892, + "step": 406050 + }, + { + "epoch": 3.4960399449035813, + "grad_norm": 3.359375, + "learning_rate": 4.155556423728986e-05, + "loss": 4.5105, + "step": 406100 + }, + { + "epoch": 3.496470385674931, + "grad_norm": 4.75, + "learning_rate": 4.153358298788893e-05, + "loss": 4.5752, + "step": 406150 + }, + { + "epoch": 3.496900826446281, + "grad_norm": 4.21875, + "learning_rate": 4.151160602979719e-05, + "loss": 4.4671, + "step": 406200 + }, + { + "epoch": 3.497331267217631, + "grad_norm": 3.0625, + "learning_rate": 4.148963336462779e-05, + "loss": 4.4357, + "step": 406250 + }, + { + "epoch": 3.497761707988981, + "grad_norm": 4.75, + "learning_rate": 4.146766499399337e-05, + "loss": 4.3913, + "step": 406300 + }, + { + "epoch": 3.4981921487603307, + "grad_norm": 5.03125, + "learning_rate": 4.144570091950638e-05, + "loss": 4.2077, + "step": 406350 + }, + { + "epoch": 3.4986225895316805, + "grad_norm": 2.234375, + "learning_rate": 4.142374114277899e-05, + "loss": 4.0173, + "step": 406400 + }, + { + "epoch": 3.4990530303030303, + "grad_norm": 2.34375, + "learning_rate": 4.140178566542298e-05, + "loss": 4.4509, + "step": 406450 + }, + { + "epoch": 3.49948347107438, + "grad_norm": 3.015625, + "learning_rate": 4.1379834489049806e-05, + "loss": 4.2229, + "step": 406500 + }, + { + "epoch": 3.49991391184573, + "grad_norm": 4.40625, + "learning_rate": 4.1357887615270676e-05, + "loss": 4.0933, + "step": 406550 + }, + { + "epoch": 3.5003443526170797, + "grad_norm": 2.171875, + "learning_rate": 4.133594504569641e-05, + "loss": 4.3448, + "step": 406600 + }, + { + "epoch": 3.50077479338843, + "grad_norm": 2.09375, + "learning_rate": 4.131400678193753e-05, + "loss": 4.1662, + "step": 406650 + }, + { + "epoch": 3.5012052341597797, + "grad_norm": 2.75, + "learning_rate": 4.129207282560431e-05, + "loss": 3.9231, + "step": 406700 + }, + { + "epoch": 3.5016356749311295, + "grad_norm": 3.0, + "learning_rate": 4.127014317830664e-05, + "loss": 4.3492, + "step": 406750 + }, + { + "epoch": 3.5020661157024793, + "grad_norm": 4.4375, + "learning_rate": 4.12482178416541e-05, + "loss": 4.5736, + "step": 406800 + }, + { + "epoch": 3.502496556473829, + "grad_norm": 4.34375, + "learning_rate": 4.122629681725597e-05, + "loss": 4.471, + "step": 406850 + }, + { + "epoch": 3.5029269972451793, + "grad_norm": 2.53125, + "learning_rate": 4.120438010672115e-05, + "loss": 4.046, + "step": 406900 + }, + { + "epoch": 3.503357438016529, + "grad_norm": 1.2734375, + "learning_rate": 4.118246771165838e-05, + "loss": 4.2873, + "step": 406950 + }, + { + "epoch": 3.503787878787879, + "grad_norm": 5.3125, + "learning_rate": 4.1160559633675954e-05, + "loss": 4.3485, + "step": 407000 + }, + { + "epoch": 3.5042183195592287, + "grad_norm": 2.84375, + "learning_rate": 4.113865587438178e-05, + "loss": 4.4273, + "step": 407050 + }, + { + "epoch": 3.5046487603305785, + "grad_norm": 2.734375, + "learning_rate": 4.111675643538366e-05, + "loss": 4.5408, + "step": 407100 + }, + { + "epoch": 3.5050792011019283, + "grad_norm": 3.625, + "learning_rate": 4.109486131828893e-05, + "loss": 4.4453, + "step": 407150 + }, + { + "epoch": 3.505509641873278, + "grad_norm": 3.359375, + "learning_rate": 4.107297052470461e-05, + "loss": 4.3029, + "step": 407200 + }, + { + "epoch": 3.505940082644628, + "grad_norm": 4.8125, + "learning_rate": 4.10510840562375e-05, + "loss": 4.2998, + "step": 407250 + }, + { + "epoch": 3.506370523415978, + "grad_norm": 4.46875, + "learning_rate": 4.102920191449402e-05, + "loss": 4.3553, + "step": 407300 + }, + { + "epoch": 3.506800964187328, + "grad_norm": 4.71875, + "learning_rate": 4.100732410108018e-05, + "loss": 4.1658, + "step": 407350 + }, + { + "epoch": 3.5072314049586777, + "grad_norm": 1.3984375, + "learning_rate": 4.098545061760185e-05, + "loss": 4.2452, + "step": 407400 + }, + { + "epoch": 3.5076618457300275, + "grad_norm": 1.5625, + "learning_rate": 4.096358146566447e-05, + "loss": 4.3528, + "step": 407450 + }, + { + "epoch": 3.5080922865013773, + "grad_norm": 1.640625, + "learning_rate": 4.0941716646873165e-05, + "loss": 3.9162, + "step": 407500 + }, + { + "epoch": 3.5085227272727275, + "grad_norm": 3.421875, + "learning_rate": 4.091985616283286e-05, + "loss": 4.3284, + "step": 407550 + }, + { + "epoch": 3.5089531680440773, + "grad_norm": 4.4375, + "learning_rate": 4.089800001514792e-05, + "loss": 4.3556, + "step": 407600 + }, + { + "epoch": 3.509383608815427, + "grad_norm": 1.265625, + "learning_rate": 4.087614820542266e-05, + "loss": 4.0045, + "step": 407650 + }, + { + "epoch": 3.509814049586777, + "grad_norm": 4.625, + "learning_rate": 4.085430073526091e-05, + "loss": 3.9775, + "step": 407700 + }, + { + "epoch": 3.5102444903581267, + "grad_norm": 4.78125, + "learning_rate": 4.083245760626619e-05, + "loss": 4.5104, + "step": 407750 + }, + { + "epoch": 3.5106749311294765, + "grad_norm": 6.3125, + "learning_rate": 4.081061882004186e-05, + "loss": 4.5451, + "step": 407800 + }, + { + "epoch": 3.5111053719008263, + "grad_norm": 3.015625, + "learning_rate": 4.0788784378190725e-05, + "loss": 3.7941, + "step": 407850 + }, + { + "epoch": 3.511535812672176, + "grad_norm": 3.859375, + "learning_rate": 4.0766954282315375e-05, + "loss": 4.0777, + "step": 407900 + }, + { + "epoch": 3.5119662534435263, + "grad_norm": 1.625, + "learning_rate": 4.074512853401818e-05, + "loss": 4.3275, + "step": 407950 + }, + { + "epoch": 3.512396694214876, + "grad_norm": 2.390625, + "learning_rate": 4.072330713490106e-05, + "loss": 4.2221, + "step": 408000 + }, + { + "epoch": 3.512396694214876, + "eval_loss": 4.992944240570068, + "eval_runtime": 24.6473, + "eval_samples_per_second": 25.966, + "eval_steps_per_second": 12.983, + "eval_tts_loss": 7.486227808068295, + "step": 408000 + }, + { + "epoch": 3.512827134986226, + "grad_norm": 1.890625, + "learning_rate": 4.0701490086565665e-05, + "loss": 4.4555, + "step": 408050 + }, + { + "epoch": 3.5132575757575757, + "grad_norm": 5.09375, + "learning_rate": 4.06796773906133e-05, + "loss": 4.5868, + "step": 408100 + }, + { + "epoch": 3.5136880165289255, + "grad_norm": 3.984375, + "learning_rate": 4.065786904864499e-05, + "loss": 4.5604, + "step": 408150 + }, + { + "epoch": 3.5141184573002757, + "grad_norm": 2.671875, + "learning_rate": 4.063606506226137e-05, + "loss": 4.4013, + "step": 408200 + }, + { + "epoch": 3.5145488980716255, + "grad_norm": 2.5, + "learning_rate": 4.06142654330629e-05, + "loss": 4.4633, + "step": 408250 + }, + { + "epoch": 3.5149793388429753, + "grad_norm": 3.296875, + "learning_rate": 4.0592470162649577e-05, + "loss": 4.115, + "step": 408300 + }, + { + "epoch": 3.515409779614325, + "grad_norm": 3.234375, + "learning_rate": 4.0570679252621116e-05, + "loss": 4.6039, + "step": 408350 + }, + { + "epoch": 3.515840220385675, + "grad_norm": 3.171875, + "learning_rate": 4.054889270457693e-05, + "loss": 4.0557, + "step": 408400 + }, + { + "epoch": 3.5162706611570247, + "grad_norm": 1.6640625, + "learning_rate": 4.0527110520116076e-05, + "loss": 4.4601, + "step": 408450 + }, + { + "epoch": 3.5167011019283745, + "grad_norm": 2.90625, + "learning_rate": 4.0505332700837386e-05, + "loss": 4.0398, + "step": 408500 + }, + { + "epoch": 3.5171315426997243, + "grad_norm": 4.25, + "learning_rate": 4.048355924833931e-05, + "loss": 4.4516, + "step": 408550 + }, + { + "epoch": 3.5175619834710745, + "grad_norm": 3.40625, + "learning_rate": 4.0461790164219846e-05, + "loss": 4.5029, + "step": 408600 + }, + { + "epoch": 3.5179924242424243, + "grad_norm": 1.9375, + "learning_rate": 4.0440025450076934e-05, + "loss": 4.0985, + "step": 408650 + }, + { + "epoch": 3.518422865013774, + "grad_norm": 4.03125, + "learning_rate": 4.0418265107508e-05, + "loss": 3.9489, + "step": 408700 + }, + { + "epoch": 3.518853305785124, + "grad_norm": 4.3125, + "learning_rate": 4.039650913811018e-05, + "loss": 4.624, + "step": 408750 + }, + { + "epoch": 3.5192837465564737, + "grad_norm": 4.5, + "learning_rate": 4.037475754348042e-05, + "loss": 4.6938, + "step": 408800 + }, + { + "epoch": 3.519714187327824, + "grad_norm": 3.59375, + "learning_rate": 4.035301032521515e-05, + "loss": 4.0894, + "step": 408850 + }, + { + "epoch": 3.5201446280991737, + "grad_norm": 3.59375, + "learning_rate": 4.033126748491053e-05, + "loss": 4.375, + "step": 408900 + }, + { + "epoch": 3.5205750688705235, + "grad_norm": 6.53125, + "learning_rate": 4.030952902416254e-05, + "loss": 4.6975, + "step": 408950 + }, + { + "epoch": 3.5210055096418733, + "grad_norm": 3.640625, + "learning_rate": 4.028779494456669e-05, + "loss": 4.6206, + "step": 409000 + }, + { + "epoch": 3.521435950413223, + "grad_norm": 2.40625, + "learning_rate": 4.026606524771819e-05, + "loss": 4.1971, + "step": 409050 + }, + { + "epoch": 3.521866391184573, + "grad_norm": 1.734375, + "learning_rate": 4.024433993521204e-05, + "loss": 4.042, + "step": 409100 + }, + { + "epoch": 3.5222968319559227, + "grad_norm": 3.09375, + "learning_rate": 4.0222619008642706e-05, + "loss": 4.1495, + "step": 409150 + }, + { + "epoch": 3.5227272727272725, + "grad_norm": 3.421875, + "learning_rate": 4.020090246960456e-05, + "loss": 4.449, + "step": 409200 + }, + { + "epoch": 3.5231577134986227, + "grad_norm": 2.953125, + "learning_rate": 4.017919031969151e-05, + "loss": 4.4549, + "step": 409250 + }, + { + "epoch": 3.5235881542699725, + "grad_norm": 1.7109375, + "learning_rate": 4.015748256049717e-05, + "loss": 4.3652, + "step": 409300 + }, + { + "epoch": 3.5240185950413223, + "grad_norm": 2.234375, + "learning_rate": 4.013577919361488e-05, + "loss": 4.4345, + "step": 409350 + }, + { + "epoch": 3.524449035812672, + "grad_norm": 3.921875, + "learning_rate": 4.0114080220637576e-05, + "loss": 4.5185, + "step": 409400 + }, + { + "epoch": 3.524879476584022, + "grad_norm": 1.6796875, + "learning_rate": 4.0092385643157905e-05, + "loss": 4.3577, + "step": 409450 + }, + { + "epoch": 3.525309917355372, + "grad_norm": 3.40625, + "learning_rate": 4.0070695462768274e-05, + "loss": 4.2808, + "step": 409500 + }, + { + "epoch": 3.525740358126722, + "grad_norm": 6.34375, + "learning_rate": 4.004900968106064e-05, + "loss": 4.2734, + "step": 409550 + }, + { + "epoch": 3.5261707988980717, + "grad_norm": 2.328125, + "learning_rate": 4.002732829962673e-05, + "loss": 4.27, + "step": 409600 + }, + { + "epoch": 3.5266012396694215, + "grad_norm": 2.609375, + "learning_rate": 4.000565132005788e-05, + "loss": 4.4383, + "step": 409650 + }, + { + "epoch": 3.5270316804407713, + "grad_norm": 3.15625, + "learning_rate": 3.9983978743945136e-05, + "loss": 3.9027, + "step": 409700 + }, + { + "epoch": 3.527462121212121, + "grad_norm": 2.765625, + "learning_rate": 3.996231057287919e-05, + "loss": 4.0727, + "step": 409750 + }, + { + "epoch": 3.527892561983471, + "grad_norm": 2.046875, + "learning_rate": 3.994064680845051e-05, + "loss": 4.214, + "step": 409800 + }, + { + "epoch": 3.5283230027548207, + "grad_norm": 3.09375, + "learning_rate": 3.991898745224913e-05, + "loss": 4.6483, + "step": 409850 + }, + { + "epoch": 3.528753443526171, + "grad_norm": 2.96875, + "learning_rate": 3.9897332505864816e-05, + "loss": 4.1834, + "step": 409900 + }, + { + "epoch": 3.5291838842975207, + "grad_norm": 2.265625, + "learning_rate": 3.987568197088697e-05, + "loss": 4.1743, + "step": 409950 + }, + { + "epoch": 3.5296143250688705, + "grad_norm": 3.390625, + "learning_rate": 3.985403584890468e-05, + "loss": 4.3048, + "step": 410000 + }, + { + "epoch": 3.5300447658402203, + "grad_norm": 4.40625, + "learning_rate": 3.983239414150678e-05, + "loss": 4.2697, + "step": 410050 + }, + { + "epoch": 3.53047520661157, + "grad_norm": 2.265625, + "learning_rate": 3.9810756850281727e-05, + "loss": 4.031, + "step": 410100 + }, + { + "epoch": 3.5309056473829203, + "grad_norm": 2.296875, + "learning_rate": 3.9789123976817556e-05, + "loss": 4.0189, + "step": 410150 + }, + { + "epoch": 3.53133608815427, + "grad_norm": 2.375, + "learning_rate": 3.976749552270218e-05, + "loss": 4.4259, + "step": 410200 + }, + { + "epoch": 3.53176652892562, + "grad_norm": 2.296875, + "learning_rate": 3.974587148952302e-05, + "loss": 4.2886, + "step": 410250 + }, + { + "epoch": 3.5321969696969697, + "grad_norm": 4.375, + "learning_rate": 3.972425187886724e-05, + "loss": 4.5847, + "step": 410300 + }, + { + "epoch": 3.5326274104683195, + "grad_norm": 3.359375, + "learning_rate": 3.970263669232175e-05, + "loss": 4.3887, + "step": 410350 + }, + { + "epoch": 3.5330578512396693, + "grad_norm": 2.515625, + "learning_rate": 3.968102593147295e-05, + "loss": 4.2217, + "step": 410400 + }, + { + "epoch": 3.533488292011019, + "grad_norm": 3.21875, + "learning_rate": 3.965941959790703e-05, + "loss": 3.98, + "step": 410450 + }, + { + "epoch": 3.533918732782369, + "grad_norm": 3.609375, + "learning_rate": 3.9637817693209936e-05, + "loss": 4.2807, + "step": 410500 + }, + { + "epoch": 3.534349173553719, + "grad_norm": 1.484375, + "learning_rate": 3.9616220218967134e-05, + "loss": 4.2255, + "step": 410550 + }, + { + "epoch": 3.534779614325069, + "grad_norm": 2.71875, + "learning_rate": 3.959462717676385e-05, + "loss": 4.3583, + "step": 410600 + }, + { + "epoch": 3.5352100550964187, + "grad_norm": 5.21875, + "learning_rate": 3.957303856818497e-05, + "loss": 4.314, + "step": 410650 + }, + { + "epoch": 3.5356404958677685, + "grad_norm": 4.65625, + "learning_rate": 3.955145439481502e-05, + "loss": 4.5294, + "step": 410700 + }, + { + "epoch": 3.5360709366391183, + "grad_norm": 3.84375, + "learning_rate": 3.9529874658238287e-05, + "loss": 4.3699, + "step": 410750 + }, + { + "epoch": 3.5365013774104685, + "grad_norm": 3.5625, + "learning_rate": 3.9508299360038645e-05, + "loss": 4.4296, + "step": 410800 + }, + { + "epoch": 3.5369318181818183, + "grad_norm": 1.9453125, + "learning_rate": 3.9486728501799685e-05, + "loss": 4.2292, + "step": 410850 + }, + { + "epoch": 3.537362258953168, + "grad_norm": 3.328125, + "learning_rate": 3.946516208510466e-05, + "loss": 4.158, + "step": 410900 + }, + { + "epoch": 3.537792699724518, + "grad_norm": 2.71875, + "learning_rate": 3.944360011153648e-05, + "loss": 4.2304, + "step": 410950 + }, + { + "epoch": 3.5382231404958677, + "grad_norm": 5.3125, + "learning_rate": 3.9422042582677743e-05, + "loss": 4.4766, + "step": 411000 + }, + { + "epoch": 3.5382231404958677, + "eval_loss": 4.992468357086182, + "eval_runtime": 23.829, + "eval_samples_per_second": 26.858, + "eval_steps_per_second": 13.429, + "eval_tts_loss": 7.48197030355066, + "step": 411000 + }, + { + "epoch": 3.5386535812672175, + "grad_norm": 2.859375, + "learning_rate": 3.940048950011077e-05, + "loss": 4.786, + "step": 411050 + }, + { + "epoch": 3.5390840220385673, + "grad_norm": 2.671875, + "learning_rate": 3.937894086541748e-05, + "loss": 4.2958, + "step": 411100 + }, + { + "epoch": 3.539514462809917, + "grad_norm": 4.34375, + "learning_rate": 3.9357396680179504e-05, + "loss": 4.6541, + "step": 411150 + }, + { + "epoch": 3.5399449035812673, + "grad_norm": 3.296875, + "learning_rate": 3.933585694597814e-05, + "loss": 4.1217, + "step": 411200 + }, + { + "epoch": 3.540375344352617, + "grad_norm": 3.40625, + "learning_rate": 3.931432166439435e-05, + "loss": 4.431, + "step": 411250 + }, + { + "epoch": 3.540805785123967, + "grad_norm": 3.265625, + "learning_rate": 3.929279083700875e-05, + "loss": 4.3732, + "step": 411300 + }, + { + "epoch": 3.5412362258953167, + "grad_norm": 3.015625, + "learning_rate": 3.9271264465401724e-05, + "loss": 4.3619, + "step": 411350 + }, + { + "epoch": 3.5416666666666665, + "grad_norm": 1.265625, + "learning_rate": 3.924974255115321e-05, + "loss": 4.2284, + "step": 411400 + }, + { + "epoch": 3.5420971074380168, + "grad_norm": 3.3125, + "learning_rate": 3.922822509584291e-05, + "loss": 4.2226, + "step": 411450 + }, + { + "epoch": 3.5425275482093666, + "grad_norm": 4.375, + "learning_rate": 3.9206712101050114e-05, + "loss": 4.5041, + "step": 411500 + }, + { + "epoch": 3.5429579889807163, + "grad_norm": 3.953125, + "learning_rate": 3.9185203568353856e-05, + "loss": 4.0905, + "step": 411550 + }, + { + "epoch": 3.543388429752066, + "grad_norm": 4.03125, + "learning_rate": 3.9163699499332776e-05, + "loss": 4.6507, + "step": 411600 + }, + { + "epoch": 3.543818870523416, + "grad_norm": 3.203125, + "learning_rate": 3.914219989556532e-05, + "loss": 4.4417, + "step": 411650 + }, + { + "epoch": 3.5442493112947657, + "grad_norm": 1.9765625, + "learning_rate": 3.91207047586294e-05, + "loss": 4.3321, + "step": 411700 + }, + { + "epoch": 3.5446797520661155, + "grad_norm": 4.71875, + "learning_rate": 3.909921409010279e-05, + "loss": 4.1627, + "step": 411750 + }, + { + "epoch": 3.5451101928374653, + "grad_norm": 2.8125, + "learning_rate": 3.907772789156283e-05, + "loss": 4.2878, + "step": 411800 + }, + { + "epoch": 3.5455406336088156, + "grad_norm": 2.625, + "learning_rate": 3.905624616458654e-05, + "loss": 3.9459, + "step": 411850 + }, + { + "epoch": 3.5459710743801653, + "grad_norm": 2.765625, + "learning_rate": 3.903476891075073e-05, + "loss": 4.1009, + "step": 411900 + }, + { + "epoch": 3.546401515151515, + "grad_norm": 1.40625, + "learning_rate": 3.901329613163166e-05, + "loss": 4.3531, + "step": 411950 + }, + { + "epoch": 3.546831955922865, + "grad_norm": 4.375, + "learning_rate": 3.899182782880542e-05, + "loss": 4.6201, + "step": 412000 + }, + { + "epoch": 3.5472623966942147, + "grad_norm": 4.5625, + "learning_rate": 3.897036400384778e-05, + "loss": 4.2447, + "step": 412050 + }, + { + "epoch": 3.547692837465565, + "grad_norm": 2.3125, + "learning_rate": 3.894890465833412e-05, + "loss": 4.6572, + "step": 412100 + }, + { + "epoch": 3.5481232782369148, + "grad_norm": 6.46875, + "learning_rate": 3.89274497938395e-05, + "loss": 4.2096, + "step": 412150 + }, + { + "epoch": 3.5485537190082646, + "grad_norm": 7.5, + "learning_rate": 3.8905999411938665e-05, + "loss": 4.158, + "step": 412200 + }, + { + "epoch": 3.5489841597796143, + "grad_norm": 3.53125, + "learning_rate": 3.888455351420602e-05, + "loss": 4.4452, + "step": 412250 + }, + { + "epoch": 3.549414600550964, + "grad_norm": 2.03125, + "learning_rate": 3.886311210221562e-05, + "loss": 4.3399, + "step": 412300 + }, + { + "epoch": 3.549845041322314, + "grad_norm": 1.578125, + "learning_rate": 3.88416751775413e-05, + "loss": 4.2448, + "step": 412350 + }, + { + "epoch": 3.5502754820936637, + "grad_norm": 3.421875, + "learning_rate": 3.882024274175643e-05, + "loss": 4.5586, + "step": 412400 + }, + { + "epoch": 3.5507059228650135, + "grad_norm": 3.296875, + "learning_rate": 3.87988147964341e-05, + "loss": 4.7373, + "step": 412450 + }, + { + "epoch": 3.5511363636363638, + "grad_norm": 4.28125, + "learning_rate": 3.8777391343147095e-05, + "loss": 4.2711, + "step": 412500 + }, + { + "epoch": 3.5515668044077136, + "grad_norm": 5.84375, + "learning_rate": 3.875597238346781e-05, + "loss": 4.3551, + "step": 412550 + }, + { + "epoch": 3.5519972451790633, + "grad_norm": 4.1875, + "learning_rate": 3.8734557918968415e-05, + "loss": 4.3224, + "step": 412600 + }, + { + "epoch": 3.552427685950413, + "grad_norm": 2.71875, + "learning_rate": 3.871314795122065e-05, + "loss": 4.2193, + "step": 412650 + }, + { + "epoch": 3.552858126721763, + "grad_norm": 1.703125, + "learning_rate": 3.869174248179597e-05, + "loss": 4.3477, + "step": 412700 + }, + { + "epoch": 3.553288567493113, + "grad_norm": 2.671875, + "learning_rate": 3.8670341512265476e-05, + "loss": 4.2872, + "step": 412750 + }, + { + "epoch": 3.553719008264463, + "grad_norm": 4.71875, + "learning_rate": 3.864894504419996e-05, + "loss": 4.5247, + "step": 412800 + }, + { + "epoch": 3.5541494490358128, + "grad_norm": 5.28125, + "learning_rate": 3.862755307916985e-05, + "loss": 4.4421, + "step": 412850 + }, + { + "epoch": 3.5545798898071626, + "grad_norm": 1.53125, + "learning_rate": 3.8606165618745324e-05, + "loss": 4.1341, + "step": 412900 + }, + { + "epoch": 3.5550103305785123, + "grad_norm": 2.859375, + "learning_rate": 3.8584782664496156e-05, + "loss": 4.5346, + "step": 412950 + }, + { + "epoch": 3.555440771349862, + "grad_norm": 5.75, + "learning_rate": 3.8563404217991805e-05, + "loss": 4.103, + "step": 413000 + }, + { + "epoch": 3.555871212121212, + "grad_norm": 2.875, + "learning_rate": 3.854203028080139e-05, + "loss": 4.3643, + "step": 413050 + }, + { + "epoch": 3.5563016528925617, + "grad_norm": 2.8125, + "learning_rate": 3.852066085449373e-05, + "loss": 4.3044, + "step": 413100 + }, + { + "epoch": 3.556732093663912, + "grad_norm": 2.75, + "learning_rate": 3.849929594063725e-05, + "loss": 4.7237, + "step": 413150 + }, + { + "epoch": 3.5571625344352618, + "grad_norm": 2.34375, + "learning_rate": 3.8477935540800195e-05, + "loss": 4.3593, + "step": 413200 + }, + { + "epoch": 3.5575929752066116, + "grad_norm": 3.296875, + "learning_rate": 3.845657965655023e-05, + "loss": 4.0331, + "step": 413250 + }, + { + "epoch": 3.5580234159779613, + "grad_norm": 2.125, + "learning_rate": 3.843522828945495e-05, + "loss": 4.5532, + "step": 413300 + }, + { + "epoch": 3.558453856749311, + "grad_norm": 3.09375, + "learning_rate": 3.8413881441081446e-05, + "loss": 4.3786, + "step": 413350 + }, + { + "epoch": 3.5588842975206614, + "grad_norm": 5.03125, + "learning_rate": 3.8392539112996494e-05, + "loss": 4.1899, + "step": 413400 + }, + { + "epoch": 3.559314738292011, + "grad_norm": 3.59375, + "learning_rate": 3.83712013067667e-05, + "loss": 4.7605, + "step": 413450 + }, + { + "epoch": 3.559745179063361, + "grad_norm": 5.78125, + "learning_rate": 3.83498680239581e-05, + "loss": 3.7052, + "step": 413500 + }, + { + "epoch": 3.5601756198347108, + "grad_norm": 5.125, + "learning_rate": 3.8328539266136496e-05, + "loss": 4.6599, + "step": 413550 + }, + { + "epoch": 3.5606060606060606, + "grad_norm": 2.53125, + "learning_rate": 3.8307215034867473e-05, + "loss": 4.3137, + "step": 413600 + }, + { + "epoch": 3.5610365013774103, + "grad_norm": 2.28125, + "learning_rate": 3.828589533171611e-05, + "loss": 4.1429, + "step": 413650 + }, + { + "epoch": 3.56146694214876, + "grad_norm": 1.8671875, + "learning_rate": 3.826458015824727e-05, + "loss": 4.2299, + "step": 413700 + }, + { + "epoch": 3.56189738292011, + "grad_norm": 3.40625, + "learning_rate": 3.82432695160254e-05, + "loss": 4.3519, + "step": 413750 + }, + { + "epoch": 3.56232782369146, + "grad_norm": 2.453125, + "learning_rate": 3.822196340661469e-05, + "loss": 4.2761, + "step": 413800 + }, + { + "epoch": 3.56275826446281, + "grad_norm": 3.109375, + "learning_rate": 3.820066183157891e-05, + "loss": 4.4955, + "step": 413850 + }, + { + "epoch": 3.5631887052341598, + "grad_norm": 4.03125, + "learning_rate": 3.817936479248162e-05, + "loss": 4.0771, + "step": 413900 + }, + { + "epoch": 3.5636191460055096, + "grad_norm": 3.421875, + "learning_rate": 3.815807229088594e-05, + "loss": 4.1555, + "step": 413950 + }, + { + "epoch": 3.5640495867768593, + "grad_norm": 3.65625, + "learning_rate": 3.8136784328354694e-05, + "loss": 4.2392, + "step": 414000 + }, + { + "epoch": 3.5640495867768593, + "eval_loss": 4.993220329284668, + "eval_runtime": 23.761, + "eval_samples_per_second": 26.935, + "eval_steps_per_second": 13.467, + "eval_tts_loss": 7.490751677720266, + "step": 414000 + }, + { + "epoch": 3.5644800275482096, + "grad_norm": 3.09375, + "learning_rate": 3.811550090645039e-05, + "loss": 4.0995, + "step": 414050 + }, + { + "epoch": 3.5649104683195594, + "grad_norm": 4.625, + "learning_rate": 3.8094222026735117e-05, + "loss": 4.399, + "step": 414100 + }, + { + "epoch": 3.565340909090909, + "grad_norm": 3.5625, + "learning_rate": 3.80729476907708e-05, + "loss": 4.14, + "step": 414150 + }, + { + "epoch": 3.565771349862259, + "grad_norm": 3.0625, + "learning_rate": 3.805167790011892e-05, + "loss": 4.3395, + "step": 414200 + }, + { + "epoch": 3.5662017906336088, + "grad_norm": 2.765625, + "learning_rate": 3.803041265634052e-05, + "loss": 4.046, + "step": 414250 + }, + { + "epoch": 3.5666322314049586, + "grad_norm": 1.3046875, + "learning_rate": 3.800915196099655e-05, + "loss": 4.0017, + "step": 414300 + }, + { + "epoch": 3.5670626721763083, + "grad_norm": 5.5, + "learning_rate": 3.798789581564743e-05, + "loss": 4.5304, + "step": 414350 + }, + { + "epoch": 3.567493112947658, + "grad_norm": 3.921875, + "learning_rate": 3.796664422185331e-05, + "loss": 4.2132, + "step": 414400 + }, + { + "epoch": 3.5679235537190084, + "grad_norm": 3.375, + "learning_rate": 3.794539718117407e-05, + "loss": 4.2827, + "step": 414450 + }, + { + "epoch": 3.568353994490358, + "grad_norm": 2.875, + "learning_rate": 3.792415469516919e-05, + "loss": 4.3844, + "step": 414500 + }, + { + "epoch": 3.568784435261708, + "grad_norm": 3.171875, + "learning_rate": 3.790291676539773e-05, + "loss": 4.9016, + "step": 414550 + }, + { + "epoch": 3.5692148760330578, + "grad_norm": 0.5859375, + "learning_rate": 3.788168339341862e-05, + "loss": 4.4461, + "step": 414600 + }, + { + "epoch": 3.5696453168044076, + "grad_norm": 1.828125, + "learning_rate": 3.786045458079028e-05, + "loss": 4.2271, + "step": 414650 + }, + { + "epoch": 3.570075757575758, + "grad_norm": 3.515625, + "learning_rate": 3.783923032907086e-05, + "loss": 4.3428, + "step": 414700 + }, + { + "epoch": 3.5705061983471076, + "grad_norm": 2.375, + "learning_rate": 3.781801063981826e-05, + "loss": 4.5283, + "step": 414750 + }, + { + "epoch": 3.5709366391184574, + "grad_norm": 3.25, + "learning_rate": 3.779679551458981e-05, + "loss": 4.3794, + "step": 414800 + }, + { + "epoch": 3.571367079889807, + "grad_norm": 4.8125, + "learning_rate": 3.7775584954942764e-05, + "loss": 4.3613, + "step": 414850 + }, + { + "epoch": 3.571797520661157, + "grad_norm": 5.1875, + "learning_rate": 3.775437896243392e-05, + "loss": 4.2454, + "step": 414900 + }, + { + "epoch": 3.5722279614325068, + "grad_norm": 3.203125, + "learning_rate": 3.773317753861969e-05, + "loss": 4.3172, + "step": 414950 + }, + { + "epoch": 3.5726584022038566, + "grad_norm": 2.859375, + "learning_rate": 3.7711980685056334e-05, + "loss": 4.1723, + "step": 415000 + }, + { + "epoch": 3.5730888429752063, + "grad_norm": 2.78125, + "learning_rate": 3.769078840329955e-05, + "loss": 4.3558, + "step": 415050 + }, + { + "epoch": 3.5735192837465566, + "grad_norm": 4.03125, + "learning_rate": 3.7669600694904796e-05, + "loss": 4.5202, + "step": 415100 + }, + { + "epoch": 3.5739497245179064, + "grad_norm": 8.5625, + "learning_rate": 3.764841756142727e-05, + "loss": 4.2215, + "step": 415150 + }, + { + "epoch": 3.574380165289256, + "grad_norm": 5.0625, + "learning_rate": 3.762723900442175e-05, + "loss": 3.8953, + "step": 415200 + }, + { + "epoch": 3.574810606060606, + "grad_norm": 4.5625, + "learning_rate": 3.7606065025442684e-05, + "loss": 4.4706, + "step": 415250 + }, + { + "epoch": 3.5752410468319558, + "grad_norm": 4.78125, + "learning_rate": 3.7584895626044206e-05, + "loss": 4.4278, + "step": 415300 + }, + { + "epoch": 3.575671487603306, + "grad_norm": 3.328125, + "learning_rate": 3.7563730807780095e-05, + "loss": 4.1582, + "step": 415350 + }, + { + "epoch": 3.576101928374656, + "grad_norm": 3.8125, + "learning_rate": 3.754257057220377e-05, + "loss": 4.0407, + "step": 415400 + }, + { + "epoch": 3.5765323691460056, + "grad_norm": 6.15625, + "learning_rate": 3.7521414920868424e-05, + "loss": 4.5944, + "step": 415450 + }, + { + "epoch": 3.5769628099173554, + "grad_norm": 2.515625, + "learning_rate": 3.75002638553268e-05, + "loss": 4.7636, + "step": 415500 + }, + { + "epoch": 3.577393250688705, + "grad_norm": 1.5546875, + "learning_rate": 3.7479117377131335e-05, + "loss": 4.4981, + "step": 415550 + }, + { + "epoch": 3.577823691460055, + "grad_norm": 2.15625, + "learning_rate": 3.745797548783414e-05, + "loss": 4.2684, + "step": 415600 + }, + { + "epoch": 3.5782541322314048, + "grad_norm": 3.8125, + "learning_rate": 3.7436838188986946e-05, + "loss": 4.6423, + "step": 415650 + }, + { + "epoch": 3.5786845730027546, + "grad_norm": 2.65625, + "learning_rate": 3.741570548214125e-05, + "loss": 4.3073, + "step": 415700 + }, + { + "epoch": 3.579115013774105, + "grad_norm": 2.0, + "learning_rate": 3.739457736884815e-05, + "loss": 4.863, + "step": 415750 + }, + { + "epoch": 3.5795454545454546, + "grad_norm": 2.765625, + "learning_rate": 3.737345385065831e-05, + "loss": 4.546, + "step": 415800 + }, + { + "epoch": 3.5799758953168044, + "grad_norm": 1.8359375, + "learning_rate": 3.735233492912225e-05, + "loss": 4.3051, + "step": 415850 + }, + { + "epoch": 3.580406336088154, + "grad_norm": 5.78125, + "learning_rate": 3.733122060579001e-05, + "loss": 4.361, + "step": 415900 + }, + { + "epoch": 3.580836776859504, + "grad_norm": 5.28125, + "learning_rate": 3.731011088221131e-05, + "loss": 4.1161, + "step": 415950 + }, + { + "epoch": 3.581267217630854, + "grad_norm": 4.5625, + "learning_rate": 3.7289005759935655e-05, + "loss": 4.4665, + "step": 416000 + }, + { + "epoch": 3.581697658402204, + "grad_norm": 3.03125, + "learning_rate": 3.726790524051203e-05, + "loss": 4.4466, + "step": 416050 + }, + { + "epoch": 3.582128099173554, + "grad_norm": 2.609375, + "learning_rate": 3.724680932548914e-05, + "loss": 4.4199, + "step": 416100 + }, + { + "epoch": 3.5825585399449036, + "grad_norm": 3.0625, + "learning_rate": 3.7225718016415466e-05, + "loss": 4.2236, + "step": 416150 + }, + { + "epoch": 3.5829889807162534, + "grad_norm": 3.03125, + "learning_rate": 3.7204631314839034e-05, + "loss": 4.3118, + "step": 416200 + }, + { + "epoch": 3.583419421487603, + "grad_norm": 3.046875, + "learning_rate": 3.718354922230751e-05, + "loss": 4.0586, + "step": 416250 + }, + { + "epoch": 3.583849862258953, + "grad_norm": 2.4375, + "learning_rate": 3.71624717403684e-05, + "loss": 4.3164, + "step": 416300 + }, + { + "epoch": 3.5842803030303028, + "grad_norm": 4.25, + "learning_rate": 3.714139887056859e-05, + "loss": 4.0762, + "step": 416350 + }, + { + "epoch": 3.584710743801653, + "grad_norm": 3.40625, + "learning_rate": 3.7120330614454905e-05, + "loss": 4.4412, + "step": 416400 + }, + { + "epoch": 3.585141184573003, + "grad_norm": 4.8125, + "learning_rate": 3.7099266973573645e-05, + "loss": 4.0982, + "step": 416450 + }, + { + "epoch": 3.5855716253443526, + "grad_norm": 4.96875, + "learning_rate": 3.707820794947087e-05, + "loss": 4.4131, + "step": 416500 + }, + { + "epoch": 3.5860020661157024, + "grad_norm": 3.734375, + "learning_rate": 3.705715354369225e-05, + "loss": 4.4848, + "step": 416550 + }, + { + "epoch": 3.5864325068870526, + "grad_norm": 1.3671875, + "learning_rate": 3.703610375778312e-05, + "loss": 4.0339, + "step": 416600 + }, + { + "epoch": 3.5868629476584024, + "grad_norm": 2.859375, + "learning_rate": 3.701505859328849e-05, + "loss": 4.3279, + "step": 416650 + }, + { + "epoch": 3.587293388429752, + "grad_norm": 4.875, + "learning_rate": 3.6994018051753064e-05, + "loss": 4.1989, + "step": 416700 + }, + { + "epoch": 3.587723829201102, + "grad_norm": 2.875, + "learning_rate": 3.697298213472116e-05, + "loss": 4.5478, + "step": 416750 + }, + { + "epoch": 3.588154269972452, + "grad_norm": 3.046875, + "learning_rate": 3.695195084373676e-05, + "loss": 4.4153, + "step": 416800 + }, + { + "epoch": 3.5885847107438016, + "grad_norm": 3.203125, + "learning_rate": 3.693092418034351e-05, + "loss": 4.0509, + "step": 416850 + }, + { + "epoch": 3.5890151515151514, + "grad_norm": 2.328125, + "learning_rate": 3.690990214608473e-05, + "loss": 4.5398, + "step": 416900 + }, + { + "epoch": 3.589445592286501, + "grad_norm": 3.234375, + "learning_rate": 3.688888474250336e-05, + "loss": 4.135, + "step": 416950 + }, + { + "epoch": 3.589876033057851, + "grad_norm": 2.453125, + "learning_rate": 3.68678719711421e-05, + "loss": 4.402, + "step": 417000 + }, + { + "epoch": 3.589876033057851, + "eval_loss": 4.99120569229126, + "eval_runtime": 24.1585, + "eval_samples_per_second": 26.492, + "eval_steps_per_second": 13.246, + "eval_tts_loss": 7.476320691126799, + "step": 417000 + }, + { + "epoch": 3.590306473829201, + "grad_norm": 2.65625, + "learning_rate": 3.68468638335432e-05, + "loss": 4.7107, + "step": 417050 + }, + { + "epoch": 3.590736914600551, + "grad_norm": 3.890625, + "learning_rate": 3.682586033124862e-05, + "loss": 4.182, + "step": 417100 + }, + { + "epoch": 3.591167355371901, + "grad_norm": 3.6875, + "learning_rate": 3.680486146579997e-05, + "loss": 4.2218, + "step": 417150 + }, + { + "epoch": 3.5915977961432506, + "grad_norm": 3.421875, + "learning_rate": 3.678386723873848e-05, + "loss": 4.2281, + "step": 417200 + }, + { + "epoch": 3.592028236914601, + "grad_norm": 6.78125, + "learning_rate": 3.6762877651605164e-05, + "loss": 4.2575, + "step": 417250 + }, + { + "epoch": 3.5924586776859506, + "grad_norm": 3.0, + "learning_rate": 3.674189270594061e-05, + "loss": 4.1099, + "step": 417300 + }, + { + "epoch": 3.5928891184573004, + "grad_norm": 5.1875, + "learning_rate": 3.6720912403284945e-05, + "loss": 4.5233, + "step": 417350 + }, + { + "epoch": 3.59331955922865, + "grad_norm": 3.078125, + "learning_rate": 3.66999367451782e-05, + "loss": 4.1764, + "step": 417400 + }, + { + "epoch": 3.59375, + "grad_norm": 3.375, + "learning_rate": 3.66789657331599e-05, + "loss": 3.8756, + "step": 417450 + }, + { + "epoch": 3.59418044077135, + "grad_norm": 3.390625, + "learning_rate": 3.665799936876925e-05, + "loss": 4.4614, + "step": 417500 + }, + { + "epoch": 3.5946108815426996, + "grad_norm": 3.28125, + "learning_rate": 3.663703765354523e-05, + "loss": 4.3267, + "step": 417550 + }, + { + "epoch": 3.5950413223140494, + "grad_norm": 3.328125, + "learning_rate": 3.661608058902628e-05, + "loss": 4.4989, + "step": 417600 + }, + { + "epoch": 3.595471763085399, + "grad_norm": 3.40625, + "learning_rate": 3.6595128176750604e-05, + "loss": 4.2691, + "step": 417650 + }, + { + "epoch": 3.5959022038567494, + "grad_norm": 3.828125, + "learning_rate": 3.6574180418256146e-05, + "loss": 4.4983, + "step": 417700 + }, + { + "epoch": 3.596332644628099, + "grad_norm": 2.359375, + "learning_rate": 3.655323731508037e-05, + "loss": 4.0985, + "step": 417750 + }, + { + "epoch": 3.596763085399449, + "grad_norm": 3.65625, + "learning_rate": 3.653229886876047e-05, + "loss": 4.1892, + "step": 417800 + }, + { + "epoch": 3.597193526170799, + "grad_norm": 2.890625, + "learning_rate": 3.651136508083327e-05, + "loss": 4.4907, + "step": 417850 + }, + { + "epoch": 3.597623966942149, + "grad_norm": 2.15625, + "learning_rate": 3.649043595283526e-05, + "loss": 4.2404, + "step": 417900 + }, + { + "epoch": 3.598054407713499, + "grad_norm": 3.09375, + "learning_rate": 3.646951148630262e-05, + "loss": 4.2375, + "step": 417950 + }, + { + "epoch": 3.5984848484848486, + "grad_norm": 2.28125, + "learning_rate": 3.644859168277116e-05, + "loss": 4.5455, + "step": 418000 + }, + { + "epoch": 3.5989152892561984, + "grad_norm": 2.515625, + "learning_rate": 3.642767654377633e-05, + "loss": 4.3991, + "step": 418050 + }, + { + "epoch": 3.599345730027548, + "grad_norm": 3.078125, + "learning_rate": 3.640676607085327e-05, + "loss": 4.578, + "step": 418100 + }, + { + "epoch": 3.599776170798898, + "grad_norm": 4.125, + "learning_rate": 3.638586026553674e-05, + "loss": 4.2867, + "step": 418150 + }, + { + "epoch": 3.600206611570248, + "grad_norm": 4.375, + "learning_rate": 3.636495912936118e-05, + "loss": 4.2437, + "step": 418200 + }, + { + "epoch": 3.6006370523415976, + "grad_norm": 4.65625, + "learning_rate": 3.634406266386074e-05, + "loss": 4.6503, + "step": 418250 + }, + { + "epoch": 3.6010674931129474, + "grad_norm": 4.28125, + "learning_rate": 3.6323170870569136e-05, + "loss": 4.0717, + "step": 418300 + }, + { + "epoch": 3.6014979338842976, + "grad_norm": 3.28125, + "learning_rate": 3.630228375101977e-05, + "loss": 4.0399, + "step": 418350 + }, + { + "epoch": 3.6019283746556474, + "grad_norm": 3.640625, + "learning_rate": 3.6281401306745756e-05, + "loss": 4.5156, + "step": 418400 + }, + { + "epoch": 3.602358815426997, + "grad_norm": 4.03125, + "learning_rate": 3.626052353927977e-05, + "loss": 4.09, + "step": 418450 + }, + { + "epoch": 3.602789256198347, + "grad_norm": 3.515625, + "learning_rate": 3.62396504501542e-05, + "loss": 4.2808, + "step": 418500 + }, + { + "epoch": 3.6032196969696972, + "grad_norm": 4.21875, + "learning_rate": 3.6218782040901124e-05, + "loss": 4.3584, + "step": 418550 + }, + { + "epoch": 3.603650137741047, + "grad_norm": 3.546875, + "learning_rate": 3.6197918313052214e-05, + "loss": 4.3041, + "step": 418600 + }, + { + "epoch": 3.604080578512397, + "grad_norm": 3.03125, + "learning_rate": 3.6177059268138834e-05, + "loss": 4.4148, + "step": 418650 + }, + { + "epoch": 3.6045110192837466, + "grad_norm": 2.375, + "learning_rate": 3.6156204907691994e-05, + "loss": 4.1302, + "step": 418700 + }, + { + "epoch": 3.6049414600550964, + "grad_norm": 2.359375, + "learning_rate": 3.613535523324234e-05, + "loss": 4.3817, + "step": 418750 + }, + { + "epoch": 3.605371900826446, + "grad_norm": 2.75, + "learning_rate": 3.6114510246320165e-05, + "loss": 4.2568, + "step": 418800 + }, + { + "epoch": 3.605802341597796, + "grad_norm": 2.84375, + "learning_rate": 3.609366994845555e-05, + "loss": 4.509, + "step": 418850 + }, + { + "epoch": 3.606232782369146, + "grad_norm": 4.09375, + "learning_rate": 3.607283434117802e-05, + "loss": 4.3664, + "step": 418900 + }, + { + "epoch": 3.606663223140496, + "grad_norm": 2.5625, + "learning_rate": 3.605200342601692e-05, + "loss": 4.2576, + "step": 418950 + }, + { + "epoch": 3.607093663911846, + "grad_norm": 4.8125, + "learning_rate": 3.6031177204501176e-05, + "loss": 4.2341, + "step": 419000 + }, + { + "epoch": 3.6075241046831956, + "grad_norm": 3.03125, + "learning_rate": 3.6010355678159365e-05, + "loss": 4.2752, + "step": 419050 + }, + { + "epoch": 3.6079545454545454, + "grad_norm": 1.21875, + "learning_rate": 3.5989538848519845e-05, + "loss": 4.3877, + "step": 419100 + }, + { + "epoch": 3.608384986225895, + "grad_norm": 2.96875, + "learning_rate": 3.596872671711041e-05, + "loss": 4.1981, + "step": 419150 + }, + { + "epoch": 3.6088154269972454, + "grad_norm": 1.953125, + "learning_rate": 3.594791928545864e-05, + "loss": 3.8554, + "step": 419200 + }, + { + "epoch": 3.6092458677685952, + "grad_norm": 3.1875, + "learning_rate": 3.592711655509181e-05, + "loss": 4.3153, + "step": 419250 + }, + { + "epoch": 3.609676308539945, + "grad_norm": 3.703125, + "learning_rate": 3.5906318527536776e-05, + "loss": 4.1656, + "step": 419300 + }, + { + "epoch": 3.610106749311295, + "grad_norm": 4.3125, + "learning_rate": 3.5885525204320057e-05, + "loss": 4.5029, + "step": 419350 + }, + { + "epoch": 3.6105371900826446, + "grad_norm": 3.1875, + "learning_rate": 3.586473658696784e-05, + "loss": 4.369, + "step": 419400 + }, + { + "epoch": 3.6109676308539944, + "grad_norm": 2.9375, + "learning_rate": 3.584395267700593e-05, + "loss": 3.7832, + "step": 419450 + }, + { + "epoch": 3.611398071625344, + "grad_norm": 2.71875, + "learning_rate": 3.58231734759599e-05, + "loss": 4.3527, + "step": 419500 + }, + { + "epoch": 3.611828512396694, + "grad_norm": 2.609375, + "learning_rate": 3.580239898535486e-05, + "loss": 4.0425, + "step": 419550 + }, + { + "epoch": 3.6122589531680442, + "grad_norm": 3.328125, + "learning_rate": 3.5781629206715595e-05, + "loss": 4.6007, + "step": 419600 + }, + { + "epoch": 3.612689393939394, + "grad_norm": 4.1875, + "learning_rate": 3.576086414156659e-05, + "loss": 4.528, + "step": 419650 + }, + { + "epoch": 3.613119834710744, + "grad_norm": 2.359375, + "learning_rate": 3.574010379143193e-05, + "loss": 4.3826, + "step": 419700 + }, + { + "epoch": 3.6135502754820936, + "grad_norm": 3.421875, + "learning_rate": 3.5719348157835374e-05, + "loss": 4.4953, + "step": 419750 + }, + { + "epoch": 3.6139807162534434, + "grad_norm": 3.984375, + "learning_rate": 3.569859724230038e-05, + "loss": 4.0398, + "step": 419800 + }, + { + "epoch": 3.6144111570247937, + "grad_norm": 2.15625, + "learning_rate": 3.567785104635004e-05, + "loss": 4.1638, + "step": 419850 + }, + { + "epoch": 3.6148415977961434, + "grad_norm": 3.8125, + "learning_rate": 3.565710957150697e-05, + "loss": 4.3688, + "step": 419900 + }, + { + "epoch": 3.6152720385674932, + "grad_norm": 4.59375, + "learning_rate": 3.563637281929366e-05, + "loss": 4.5083, + "step": 419950 + }, + { + "epoch": 3.615702479338843, + "grad_norm": 5.5625, + "learning_rate": 3.5615640791232105e-05, + "loss": 3.8894, + "step": 420000 + }, + { + "epoch": 3.615702479338843, + "eval_loss": 4.991680145263672, + "eval_runtime": 23.5326, + "eval_samples_per_second": 27.196, + "eval_steps_per_second": 13.598, + "eval_tts_loss": 7.4729306236027835, + "step": 420000 + }, + { + "epoch": 3.616132920110193, + "grad_norm": 4.9375, + "learning_rate": 3.559491348884395e-05, + "loss": 4.4534, + "step": 420050 + }, + { + "epoch": 3.6165633608815426, + "grad_norm": 2.609375, + "learning_rate": 3.557419091365062e-05, + "loss": 4.4996, + "step": 420100 + }, + { + "epoch": 3.6169938016528924, + "grad_norm": 6.1875, + "learning_rate": 3.555347306717305e-05, + "loss": 4.5165, + "step": 420150 + }, + { + "epoch": 3.617424242424242, + "grad_norm": 2.96875, + "learning_rate": 3.553275995093189e-05, + "loss": 4.0153, + "step": 420200 + }, + { + "epoch": 3.6178546831955924, + "grad_norm": 4.25, + "learning_rate": 3.551205156644746e-05, + "loss": 4.288, + "step": 420250 + }, + { + "epoch": 3.6182851239669422, + "grad_norm": 2.703125, + "learning_rate": 3.549134791523968e-05, + "loss": 4.447, + "step": 420300 + }, + { + "epoch": 3.618715564738292, + "grad_norm": 3.3125, + "learning_rate": 3.547064899882814e-05, + "loss": 4.284, + "step": 420350 + }, + { + "epoch": 3.619146005509642, + "grad_norm": 3.546875, + "learning_rate": 3.54499548187322e-05, + "loss": 4.1718, + "step": 420400 + }, + { + "epoch": 3.6195764462809916, + "grad_norm": 2.765625, + "learning_rate": 3.5429265376470625e-05, + "loss": 4.2339, + "step": 420450 + }, + { + "epoch": 3.620006887052342, + "grad_norm": 3.828125, + "learning_rate": 3.540858067356207e-05, + "loss": 4.7083, + "step": 420500 + }, + { + "epoch": 3.6204373278236917, + "grad_norm": 1.328125, + "learning_rate": 3.5387900711524725e-05, + "loss": 4.2373, + "step": 420550 + }, + { + "epoch": 3.6208677685950414, + "grad_norm": 4.375, + "learning_rate": 3.536722549187641e-05, + "loss": 4.2521, + "step": 420600 + }, + { + "epoch": 3.6212982093663912, + "grad_norm": 5.5, + "learning_rate": 3.5346555016134766e-05, + "loss": 4.6201, + "step": 420650 + }, + { + "epoch": 3.621728650137741, + "grad_norm": 3.484375, + "learning_rate": 3.5325889285816824e-05, + "loss": 4.067, + "step": 420700 + }, + { + "epoch": 3.622159090909091, + "grad_norm": 3.25, + "learning_rate": 3.530522830243944e-05, + "loss": 4.4582, + "step": 420750 + }, + { + "epoch": 3.6225895316804406, + "grad_norm": 2.890625, + "learning_rate": 3.528457206751914e-05, + "loss": 4.2861, + "step": 420800 + }, + { + "epoch": 3.6230199724517904, + "grad_norm": 2.8125, + "learning_rate": 3.526392058257201e-05, + "loss": 4.7673, + "step": 420850 + }, + { + "epoch": 3.6234504132231407, + "grad_norm": 3.328125, + "learning_rate": 3.5243273849113824e-05, + "loss": 4.2167, + "step": 420900 + }, + { + "epoch": 3.6238808539944904, + "grad_norm": 4.625, + "learning_rate": 3.5222631868660004e-05, + "loss": 3.787, + "step": 420950 + }, + { + "epoch": 3.6243112947658402, + "grad_norm": 1.53125, + "learning_rate": 3.5201994642725635e-05, + "loss": 4.4714, + "step": 421000 + }, + { + "epoch": 3.62474173553719, + "grad_norm": 2.578125, + "learning_rate": 3.5181362172825415e-05, + "loss": 4.3027, + "step": 421050 + }, + { + "epoch": 3.62517217630854, + "grad_norm": 6.84375, + "learning_rate": 3.516073446047378e-05, + "loss": 4.3151, + "step": 421100 + }, + { + "epoch": 3.62560261707989, + "grad_norm": 3.703125, + "learning_rate": 3.5140111507184724e-05, + "loss": 4.1441, + "step": 421150 + }, + { + "epoch": 3.62603305785124, + "grad_norm": 4.59375, + "learning_rate": 3.511949331447194e-05, + "loss": 4.2146, + "step": 421200 + }, + { + "epoch": 3.6264634986225897, + "grad_norm": 5.1875, + "learning_rate": 3.509887988384876e-05, + "loss": 4.6127, + "step": 421250 + }, + { + "epoch": 3.6268939393939394, + "grad_norm": 3.625, + "learning_rate": 3.5078271216828115e-05, + "loss": 4.4266, + "step": 421300 + }, + { + "epoch": 3.6273243801652892, + "grad_norm": 3.96875, + "learning_rate": 3.5057667314922714e-05, + "loss": 4.0089, + "step": 421350 + }, + { + "epoch": 3.627754820936639, + "grad_norm": 4.25, + "learning_rate": 3.503706817964485e-05, + "loss": 3.9434, + "step": 421400 + }, + { + "epoch": 3.628185261707989, + "grad_norm": 3.203125, + "learning_rate": 3.5016473812506324e-05, + "loss": 4.3359, + "step": 421450 + }, + { + "epoch": 3.6286157024793386, + "grad_norm": 2.8125, + "learning_rate": 3.499588421501885e-05, + "loss": 4.1194, + "step": 421500 + }, + { + "epoch": 3.629046143250689, + "grad_norm": 2.453125, + "learning_rate": 3.4975299388693615e-05, + "loss": 4.5715, + "step": 421550 + }, + { + "epoch": 3.6294765840220387, + "grad_norm": 2.78125, + "learning_rate": 3.495471933504146e-05, + "loss": 4.6143, + "step": 421600 + }, + { + "epoch": 3.6299070247933884, + "grad_norm": 2.234375, + "learning_rate": 3.4934144055573034e-05, + "loss": 4.4325, + "step": 421650 + }, + { + "epoch": 3.6303374655647382, + "grad_norm": 2.78125, + "learning_rate": 3.491357355179837e-05, + "loss": 4.3959, + "step": 421700 + }, + { + "epoch": 3.630767906336088, + "grad_norm": 1.4921875, + "learning_rate": 3.489300782522741e-05, + "loss": 4.4999, + "step": 421750 + }, + { + "epoch": 3.6311983471074383, + "grad_norm": 3.078125, + "learning_rate": 3.487244687736958e-05, + "loss": 4.3383, + "step": 421800 + }, + { + "epoch": 3.631628787878788, + "grad_norm": 3.0625, + "learning_rate": 3.485189070973404e-05, + "loss": 4.5649, + "step": 421850 + }, + { + "epoch": 3.632059228650138, + "grad_norm": 2.640625, + "learning_rate": 3.4831339323829505e-05, + "loss": 3.8946, + "step": 421900 + }, + { + "epoch": 3.6324896694214877, + "grad_norm": 2.734375, + "learning_rate": 3.481079272116452e-05, + "loss": 4.2906, + "step": 421950 + }, + { + "epoch": 3.6329201101928374, + "grad_norm": 3.46875, + "learning_rate": 3.479025090324702e-05, + "loss": 4.5026, + "step": 422000 + }, + { + "epoch": 3.6333505509641872, + "grad_norm": 3.046875, + "learning_rate": 3.4769713871584843e-05, + "loss": 4.1248, + "step": 422050 + }, + { + "epoch": 3.633780991735537, + "grad_norm": 3.09375, + "learning_rate": 3.4749181627685326e-05, + "loss": 4.4575, + "step": 422100 + }, + { + "epoch": 3.634211432506887, + "grad_norm": 2.484375, + "learning_rate": 3.4728654173055444e-05, + "loss": 4.3899, + "step": 422150 + }, + { + "epoch": 3.634641873278237, + "grad_norm": 1.6015625, + "learning_rate": 3.470813150920198e-05, + "loss": 3.8903, + "step": 422200 + }, + { + "epoch": 3.635072314049587, + "grad_norm": 3.640625, + "learning_rate": 3.468761363763114e-05, + "loss": 4.1154, + "step": 422250 + }, + { + "epoch": 3.6355027548209367, + "grad_norm": 2.53125, + "learning_rate": 3.466710055984892e-05, + "loss": 4.3853, + "step": 422300 + }, + { + "epoch": 3.6359331955922864, + "grad_norm": 2.578125, + "learning_rate": 3.464659227736098e-05, + "loss": 4.445, + "step": 422350 + }, + { + "epoch": 3.6363636363636362, + "grad_norm": 1.7109375, + "learning_rate": 3.462608879167255e-05, + "loss": 4.3955, + "step": 422400 + }, + { + "epoch": 3.6367940771349865, + "grad_norm": 2.953125, + "learning_rate": 3.460559010428855e-05, + "loss": 4.1305, + "step": 422450 + }, + { + "epoch": 3.6372245179063363, + "grad_norm": 1.4609375, + "learning_rate": 3.458509621671354e-05, + "loss": 4.2699, + "step": 422500 + }, + { + "epoch": 3.637654958677686, + "grad_norm": 2.453125, + "learning_rate": 3.456460713045172e-05, + "loss": 4.5622, + "step": 422550 + }, + { + "epoch": 3.638085399449036, + "grad_norm": 4.0625, + "learning_rate": 3.454412284700691e-05, + "loss": 4.1964, + "step": 422600 + }, + { + "epoch": 3.6385158402203857, + "grad_norm": 4.625, + "learning_rate": 3.45236433678827e-05, + "loss": 4.1271, + "step": 422650 + }, + { + "epoch": 3.6389462809917354, + "grad_norm": 4.375, + "learning_rate": 3.450316869458218e-05, + "loss": 4.4434, + "step": 422700 + }, + { + "epoch": 3.6393767217630852, + "grad_norm": 3.296875, + "learning_rate": 3.448269882860815e-05, + "loss": 4.5871, + "step": 422750 + }, + { + "epoch": 3.639807162534435, + "grad_norm": 4.125, + "learning_rate": 3.446223377146308e-05, + "loss": 4.3567, + "step": 422800 + }, + { + "epoch": 3.6402376033057853, + "grad_norm": 3.21875, + "learning_rate": 3.4441773524649e-05, + "loss": 4.5778, + "step": 422850 + }, + { + "epoch": 3.640668044077135, + "grad_norm": 4.28125, + "learning_rate": 3.442131808966772e-05, + "loss": 4.3927, + "step": 422900 + }, + { + "epoch": 3.641098484848485, + "grad_norm": 1.6328125, + "learning_rate": 3.4400867468020636e-05, + "loss": 4.1146, + "step": 422950 + }, + { + "epoch": 3.6415289256198347, + "grad_norm": 4.96875, + "learning_rate": 3.4380421661208675e-05, + "loss": 4.2752, + "step": 423000 + }, + { + "epoch": 3.6415289256198347, + "eval_loss": 4.99205207824707, + "eval_runtime": 24.1641, + "eval_samples_per_second": 26.486, + "eval_steps_per_second": 13.243, + "eval_tts_loss": 7.487585057957488, + "step": 423000 + }, + { + "epoch": 3.6419593663911844, + "grad_norm": 4.28125, + "learning_rate": 3.43599806707326e-05, + "loss": 4.4091, + "step": 423050 + }, + { + "epoch": 3.6423898071625347, + "grad_norm": 2.078125, + "learning_rate": 3.4339544498092736e-05, + "loss": 4.0776, + "step": 423100 + }, + { + "epoch": 3.6428202479338845, + "grad_norm": 4.28125, + "learning_rate": 3.431911314478898e-05, + "loss": 4.5538, + "step": 423150 + }, + { + "epoch": 3.6432506887052343, + "grad_norm": 2.578125, + "learning_rate": 3.4298686612321086e-05, + "loss": 4.2152, + "step": 423200 + }, + { + "epoch": 3.643681129476584, + "grad_norm": 3.03125, + "learning_rate": 3.42782649021882e-05, + "loss": 4.2584, + "step": 423250 + }, + { + "epoch": 3.644111570247934, + "grad_norm": 6.21875, + "learning_rate": 3.425784801588923e-05, + "loss": 4.2701, + "step": 423300 + }, + { + "epoch": 3.6445420110192837, + "grad_norm": 2.09375, + "learning_rate": 3.4237435954922794e-05, + "loss": 4.4707, + "step": 423350 + }, + { + "epoch": 3.6449724517906334, + "grad_norm": 2.1875, + "learning_rate": 3.421702872078707e-05, + "loss": 4.1121, + "step": 423400 + }, + { + "epoch": 3.6454028925619832, + "grad_norm": 3.890625, + "learning_rate": 3.419662631497991e-05, + "loss": 4.4922, + "step": 423450 + }, + { + "epoch": 3.6458333333333335, + "grad_norm": 4.6875, + "learning_rate": 3.4176228738998784e-05, + "loss": 4.2265, + "step": 423500 + }, + { + "epoch": 3.6462637741046833, + "grad_norm": 1.46875, + "learning_rate": 3.4155835994340825e-05, + "loss": 4.4298, + "step": 423550 + }, + { + "epoch": 3.646694214876033, + "grad_norm": 3.171875, + "learning_rate": 3.413544808250286e-05, + "loss": 4.4119, + "step": 423600 + }, + { + "epoch": 3.647124655647383, + "grad_norm": 3.765625, + "learning_rate": 3.4115065004981305e-05, + "loss": 4.3498, + "step": 423650 + }, + { + "epoch": 3.6475550964187327, + "grad_norm": 2.34375, + "learning_rate": 3.409468676327219e-05, + "loss": 3.8549, + "step": 423700 + }, + { + "epoch": 3.647985537190083, + "grad_norm": 4.46875, + "learning_rate": 3.407431335887132e-05, + "loss": 4.3146, + "step": 423750 + }, + { + "epoch": 3.6484159779614327, + "grad_norm": 5.34375, + "learning_rate": 3.4053944793273985e-05, + "loss": 4.7968, + "step": 423800 + }, + { + "epoch": 3.6488464187327825, + "grad_norm": 1.421875, + "learning_rate": 3.403358106797519e-05, + "loss": 4.5443, + "step": 423850 + }, + { + "epoch": 3.6492768595041323, + "grad_norm": 3.859375, + "learning_rate": 3.401322218446963e-05, + "loss": 4.2175, + "step": 423900 + }, + { + "epoch": 3.649707300275482, + "grad_norm": 3.75, + "learning_rate": 3.39928681442516e-05, + "loss": 4.1739, + "step": 423950 + }, + { + "epoch": 3.650137741046832, + "grad_norm": 2.890625, + "learning_rate": 3.3972518948815025e-05, + "loss": 4.5082, + "step": 424000 + }, + { + "epoch": 3.6505681818181817, + "grad_norm": 1.5859375, + "learning_rate": 3.39521745996535e-05, + "loss": 4.026, + "step": 424050 + }, + { + "epoch": 3.6509986225895315, + "grad_norm": 3.203125, + "learning_rate": 3.393183509826024e-05, + "loss": 4.3433, + "step": 424100 + }, + { + "epoch": 3.6514290633608817, + "grad_norm": 4.875, + "learning_rate": 3.391150044612812e-05, + "loss": 4.2548, + "step": 424150 + }, + { + "epoch": 3.6518595041322315, + "grad_norm": 4.0625, + "learning_rate": 3.3891170644749705e-05, + "loss": 4.6771, + "step": 424200 + }, + { + "epoch": 3.6522899449035813, + "grad_norm": 1.6328125, + "learning_rate": 3.387084569561712e-05, + "loss": 4.0842, + "step": 424250 + }, + { + "epoch": 3.652720385674931, + "grad_norm": 6.9375, + "learning_rate": 3.385052560022218e-05, + "loss": 4.2175, + "step": 424300 + }, + { + "epoch": 3.653150826446281, + "grad_norm": 2.890625, + "learning_rate": 3.383021036005634e-05, + "loss": 4.0116, + "step": 424350 + }, + { + "epoch": 3.653581267217631, + "grad_norm": 4.8125, + "learning_rate": 3.380989997661066e-05, + "loss": 4.3937, + "step": 424400 + }, + { + "epoch": 3.654011707988981, + "grad_norm": 5.4375, + "learning_rate": 3.378959445137594e-05, + "loss": 4.0663, + "step": 424450 + }, + { + "epoch": 3.6544421487603307, + "grad_norm": 3.46875, + "learning_rate": 3.376929378584257e-05, + "loss": 4.1896, + "step": 424500 + }, + { + "epoch": 3.6548725895316805, + "grad_norm": 2.453125, + "learning_rate": 3.374899798150046e-05, + "loss": 4.2375, + "step": 424550 + }, + { + "epoch": 3.6553030303030303, + "grad_norm": 5.0, + "learning_rate": 3.37287070398394e-05, + "loss": 4.3769, + "step": 424600 + }, + { + "epoch": 3.65573347107438, + "grad_norm": 3.265625, + "learning_rate": 3.3708420962348655e-05, + "loss": 4.132, + "step": 424650 + }, + { + "epoch": 3.65616391184573, + "grad_norm": 1.8828125, + "learning_rate": 3.368813975051715e-05, + "loss": 4.1329, + "step": 424700 + }, + { + "epoch": 3.6565943526170797, + "grad_norm": 3.609375, + "learning_rate": 3.366786340583358e-05, + "loss": 4.1082, + "step": 424750 + }, + { + "epoch": 3.65702479338843, + "grad_norm": 2.875, + "learning_rate": 3.364759192978607e-05, + "loss": 4.3278, + "step": 424800 + }, + { + "epoch": 3.6574552341597797, + "grad_norm": 3.953125, + "learning_rate": 3.3627325323862537e-05, + "loss": 4.2373, + "step": 424850 + }, + { + "epoch": 3.6578856749311295, + "grad_norm": 2.15625, + "learning_rate": 3.360706358955055e-05, + "loss": 4.2467, + "step": 424900 + }, + { + "epoch": 3.6583161157024793, + "grad_norm": 1.265625, + "learning_rate": 3.3586806728337236e-05, + "loss": 3.951, + "step": 424950 + }, + { + "epoch": 3.658746556473829, + "grad_norm": 2.28125, + "learning_rate": 3.3566554741709434e-05, + "loss": 4.3198, + "step": 425000 + }, + { + "epoch": 3.6591769972451793, + "grad_norm": 3.9375, + "learning_rate": 3.354630763115356e-05, + "loss": 4.327, + "step": 425050 + }, + { + "epoch": 3.659607438016529, + "grad_norm": 2.78125, + "learning_rate": 3.3526065398155706e-05, + "loss": 4.1036, + "step": 425100 + }, + { + "epoch": 3.660037878787879, + "grad_norm": 2.96875, + "learning_rate": 3.350582804420166e-05, + "loss": 4.4225, + "step": 425150 + }, + { + "epoch": 3.6604683195592287, + "grad_norm": 1.359375, + "learning_rate": 3.348559557077677e-05, + "loss": 4.3596, + "step": 425200 + }, + { + "epoch": 3.6608987603305785, + "grad_norm": 3.859375, + "learning_rate": 3.346536797936606e-05, + "loss": 4.3288, + "step": 425250 + }, + { + "epoch": 3.6613292011019283, + "grad_norm": 2.5, + "learning_rate": 3.3445145271454184e-05, + "loss": 4.4486, + "step": 425300 + }, + { + "epoch": 3.661759641873278, + "grad_norm": 1.4453125, + "learning_rate": 3.3424927448525446e-05, + "loss": 4.2422, + "step": 425350 + }, + { + "epoch": 3.662190082644628, + "grad_norm": 3.046875, + "learning_rate": 3.340471451206376e-05, + "loss": 4.6697, + "step": 425400 + }, + { + "epoch": 3.662620523415978, + "grad_norm": 3.828125, + "learning_rate": 3.3384506463552775e-05, + "loss": 4.5314, + "step": 425450 + }, + { + "epoch": 3.663050964187328, + "grad_norm": 2.296875, + "learning_rate": 3.33643033044757e-05, + "loss": 4.0822, + "step": 425500 + }, + { + "epoch": 3.6634814049586777, + "grad_norm": 2.25, + "learning_rate": 3.3344105036315384e-05, + "loss": 4.1879, + "step": 425550 + }, + { + "epoch": 3.6639118457300275, + "grad_norm": 3.25, + "learning_rate": 3.332391166055435e-05, + "loss": 4.4086, + "step": 425600 + }, + { + "epoch": 3.6643422865013773, + "grad_norm": 3.65625, + "learning_rate": 3.3303723178674736e-05, + "loss": 4.0846, + "step": 425650 + }, + { + "epoch": 3.6647727272727275, + "grad_norm": 3.453125, + "learning_rate": 3.328353959215831e-05, + "loss": 4.0723, + "step": 425700 + }, + { + "epoch": 3.6652031680440773, + "grad_norm": 2.296875, + "learning_rate": 3.326336090248656e-05, + "loss": 4.3801, + "step": 425750 + }, + { + "epoch": 3.665633608815427, + "grad_norm": 3.28125, + "learning_rate": 3.324318711114055e-05, + "loss": 4.4875, + "step": 425800 + }, + { + "epoch": 3.666064049586777, + "grad_norm": 1.765625, + "learning_rate": 3.322301821960097e-05, + "loss": 4.11, + "step": 425850 + }, + { + "epoch": 3.6664944903581267, + "grad_norm": 3.375, + "learning_rate": 3.320285422934817e-05, + "loss": 4.3325, + "step": 425900 + }, + { + "epoch": 3.6669249311294765, + "grad_norm": 3.4375, + "learning_rate": 3.318269514186215e-05, + "loss": 4.3303, + "step": 425950 + }, + { + "epoch": 3.6673553719008263, + "grad_norm": 2.296875, + "learning_rate": 3.3162540958622514e-05, + "loss": 4.3698, + "step": 426000 + }, + { + "epoch": 3.6673553719008263, + "eval_loss": 4.991609573364258, + "eval_runtime": 23.8703, + "eval_samples_per_second": 26.812, + "eval_steps_per_second": 13.406, + "eval_tts_loss": 7.474097057115932, + "step": 426000 + }, + { + "epoch": 3.667785812672176, + "grad_norm": 2.25, + "learning_rate": 3.314239168110863e-05, + "loss": 4.4304, + "step": 426050 + }, + { + "epoch": 3.6682162534435263, + "grad_norm": 2.171875, + "learning_rate": 3.3122247310799293e-05, + "loss": 3.7824, + "step": 426100 + }, + { + "epoch": 3.668646694214876, + "grad_norm": 2.765625, + "learning_rate": 3.310210784917315e-05, + "loss": 4.4677, + "step": 426150 + }, + { + "epoch": 3.669077134986226, + "grad_norm": 7.0, + "learning_rate": 3.3081973297708336e-05, + "loss": 4.2738, + "step": 426200 + }, + { + "epoch": 3.6695075757575757, + "grad_norm": 3.015625, + "learning_rate": 3.3061843657882684e-05, + "loss": 4.1526, + "step": 426250 + }, + { + "epoch": 3.6699380165289255, + "grad_norm": 2.125, + "learning_rate": 3.3041718931173746e-05, + "loss": 4.3083, + "step": 426300 + }, + { + "epoch": 3.6703684573002757, + "grad_norm": 3.140625, + "learning_rate": 3.302159911905856e-05, + "loss": 4.2445, + "step": 426350 + }, + { + "epoch": 3.6707988980716255, + "grad_norm": 5.8125, + "learning_rate": 3.3001484223013836e-05, + "loss": 4.4528, + "step": 426400 + }, + { + "epoch": 3.6712293388429753, + "grad_norm": 1.3828125, + "learning_rate": 3.298137424451606e-05, + "loss": 4.3865, + "step": 426450 + }, + { + "epoch": 3.671659779614325, + "grad_norm": 2.640625, + "learning_rate": 3.296126918504122e-05, + "loss": 4.2466, + "step": 426500 + }, + { + "epoch": 3.672090220385675, + "grad_norm": 5.09375, + "learning_rate": 3.2941169046064976e-05, + "loss": 4.1227, + "step": 426550 + }, + { + "epoch": 3.6725206611570247, + "grad_norm": 3.765625, + "learning_rate": 3.2921073829062654e-05, + "loss": 4.5522, + "step": 426600 + }, + { + "epoch": 3.6729511019283745, + "grad_norm": 5.03125, + "learning_rate": 3.290098353550915e-05, + "loss": 4.3921, + "step": 426650 + }, + { + "epoch": 3.6733815426997243, + "grad_norm": 2.8125, + "learning_rate": 3.2880898166879125e-05, + "loss": 4.0194, + "step": 426700 + }, + { + "epoch": 3.6738119834710745, + "grad_norm": 5.46875, + "learning_rate": 3.2860817724646765e-05, + "loss": 4.365, + "step": 426750 + }, + { + "epoch": 3.6742424242424243, + "grad_norm": 3.421875, + "learning_rate": 3.2840742210285925e-05, + "loss": 4.1843, + "step": 426800 + }, + { + "epoch": 3.674672865013774, + "grad_norm": 3.109375, + "learning_rate": 3.2820671625270116e-05, + "loss": 4.4287, + "step": 426850 + }, + { + "epoch": 3.675103305785124, + "grad_norm": 4.09375, + "learning_rate": 3.2800605971072465e-05, + "loss": 4.2402, + "step": 426900 + }, + { + "epoch": 3.6755337465564737, + "grad_norm": 1.2578125, + "learning_rate": 3.278054524916572e-05, + "loss": 4.1549, + "step": 426950 + }, + { + "epoch": 3.675964187327824, + "grad_norm": 3.71875, + "learning_rate": 3.2760489461022356e-05, + "loss": 4.1764, + "step": 427000 + }, + { + "epoch": 3.6763946280991737, + "grad_norm": 4.21875, + "learning_rate": 3.274043860811444e-05, + "loss": 4.3668, + "step": 427050 + }, + { + "epoch": 3.6768250688705235, + "grad_norm": 1.328125, + "learning_rate": 3.272039269191355e-05, + "loss": 4.0412, + "step": 427100 + }, + { + "epoch": 3.6772555096418733, + "grad_norm": 4.40625, + "learning_rate": 3.2700351713891107e-05, + "loss": 4.38, + "step": 427150 + }, + { + "epoch": 3.677685950413223, + "grad_norm": 2.984375, + "learning_rate": 3.2680315675518046e-05, + "loss": 3.9532, + "step": 427200 + }, + { + "epoch": 3.678116391184573, + "grad_norm": 2.953125, + "learning_rate": 3.2660284578264954e-05, + "loss": 4.3657, + "step": 427250 + }, + { + "epoch": 3.6785468319559227, + "grad_norm": 4.4375, + "learning_rate": 3.264025842360211e-05, + "loss": 4.3934, + "step": 427300 + }, + { + "epoch": 3.6789772727272725, + "grad_norm": 3.875, + "learning_rate": 3.2620237212999375e-05, + "loss": 4.365, + "step": 427350 + }, + { + "epoch": 3.6794077134986227, + "grad_norm": 3.0, + "learning_rate": 3.2600220947926255e-05, + "loss": 4.0804, + "step": 427400 + }, + { + "epoch": 3.6798381542699725, + "grad_norm": 2.671875, + "learning_rate": 3.25802096298519e-05, + "loss": 4.2792, + "step": 427450 + }, + { + "epoch": 3.6802685950413223, + "grad_norm": 2.09375, + "learning_rate": 3.25602032602451e-05, + "loss": 4.6233, + "step": 427500 + }, + { + "epoch": 3.680699035812672, + "grad_norm": 2.578125, + "learning_rate": 3.2540201840574246e-05, + "loss": 4.3971, + "step": 427550 + }, + { + "epoch": 3.681129476584022, + "grad_norm": 2.125, + "learning_rate": 3.252020537230751e-05, + "loss": 4.1721, + "step": 427600 + }, + { + "epoch": 3.681559917355372, + "grad_norm": 3.109375, + "learning_rate": 3.250021385691244e-05, + "loss": 4.462, + "step": 427650 + }, + { + "epoch": 3.681990358126722, + "grad_norm": 3.921875, + "learning_rate": 3.248022729585648e-05, + "loss": 4.2483, + "step": 427700 + }, + { + "epoch": 3.6824207988980717, + "grad_norm": 2.609375, + "learning_rate": 3.246024569060656e-05, + "loss": 4.359, + "step": 427750 + }, + { + "epoch": 3.6828512396694215, + "grad_norm": 2.78125, + "learning_rate": 3.2440269042629254e-05, + "loss": 4.6668, + "step": 427800 + }, + { + "epoch": 3.6832816804407713, + "grad_norm": 2.671875, + "learning_rate": 3.242029735339092e-05, + "loss": 4.3851, + "step": 427850 + }, + { + "epoch": 3.683712121212121, + "grad_norm": 2.71875, + "learning_rate": 3.2400330624357314e-05, + "loss": 4.3193, + "step": 427900 + }, + { + "epoch": 3.684142561983471, + "grad_norm": 1.5390625, + "learning_rate": 3.238036885699397e-05, + "loss": 4.1023, + "step": 427950 + }, + { + "epoch": 3.6845730027548207, + "grad_norm": 2.84375, + "learning_rate": 3.2360412052766096e-05, + "loss": 4.6179, + "step": 428000 + }, + { + "epoch": 3.685003443526171, + "grad_norm": 4.03125, + "learning_rate": 3.2340460213138443e-05, + "loss": 4.2287, + "step": 428050 + }, + { + "epoch": 3.6854338842975207, + "grad_norm": 3.3125, + "learning_rate": 3.232051333957544e-05, + "loss": 4.1586, + "step": 428100 + }, + { + "epoch": 3.6858643250688705, + "grad_norm": 3.5625, + "learning_rate": 3.230057143354114e-05, + "loss": 4.2369, + "step": 428150 + }, + { + "epoch": 3.6862947658402203, + "grad_norm": 3.90625, + "learning_rate": 3.228063449649924e-05, + "loss": 4.5667, + "step": 428200 + }, + { + "epoch": 3.68672520661157, + "grad_norm": 3.765625, + "learning_rate": 3.2260702529913034e-05, + "loss": 4.7902, + "step": 428250 + }, + { + "epoch": 3.6871556473829203, + "grad_norm": 4.53125, + "learning_rate": 3.224077553524555e-05, + "loss": 4.1604, + "step": 428300 + }, + { + "epoch": 3.68758608815427, + "grad_norm": 2.6875, + "learning_rate": 3.2220853513959345e-05, + "loss": 4.2735, + "step": 428350 + }, + { + "epoch": 3.68801652892562, + "grad_norm": 2.984375, + "learning_rate": 3.2200936467516675e-05, + "loss": 4.2855, + "step": 428400 + }, + { + "epoch": 3.6884469696969697, + "grad_norm": 3.84375, + "learning_rate": 3.218102439737939e-05, + "loss": 4.4194, + "step": 428450 + }, + { + "epoch": 3.6888774104683195, + "grad_norm": 2.171875, + "learning_rate": 3.216111730500896e-05, + "loss": 4.3832, + "step": 428500 + }, + { + "epoch": 3.6893078512396693, + "grad_norm": 3.140625, + "learning_rate": 3.21412151918666e-05, + "loss": 4.1877, + "step": 428550 + }, + { + "epoch": 3.689738292011019, + "grad_norm": 2.953125, + "learning_rate": 3.2121318059413076e-05, + "loss": 4.7643, + "step": 428600 + }, + { + "epoch": 3.690168732782369, + "grad_norm": 4.3125, + "learning_rate": 3.21014259091087e-05, + "loss": 4.0911, + "step": 428650 + }, + { + "epoch": 3.690599173553719, + "grad_norm": 2.140625, + "learning_rate": 3.2081538742413596e-05, + "loss": 4.1611, + "step": 428700 + }, + { + "epoch": 3.691029614325069, + "grad_norm": 3.9375, + "learning_rate": 3.2061656560787424e-05, + "loss": 4.243, + "step": 428750 + }, + { + "epoch": 3.6914600550964187, + "grad_norm": 1.5703125, + "learning_rate": 3.204177936568945e-05, + "loss": 4.2006, + "step": 428800 + }, + { + "epoch": 3.6918904958677685, + "grad_norm": 2.578125, + "learning_rate": 3.202190715857872e-05, + "loss": 4.3707, + "step": 428850 + }, + { + "epoch": 3.6923209366391183, + "grad_norm": 4.53125, + "learning_rate": 3.2002039940913684e-05, + "loss": 4.3832, + "step": 428900 + }, + { + "epoch": 3.6927513774104685, + "grad_norm": 3.53125, + "learning_rate": 3.198217771415265e-05, + "loss": 4.2319, + "step": 428950 + }, + { + "epoch": 3.6931818181818183, + "grad_norm": 3.296875, + "learning_rate": 3.196232047975343e-05, + "loss": 4.0473, + "step": 429000 + }, + { + "epoch": 3.6931818181818183, + "eval_loss": 4.99127197265625, + "eval_runtime": 24.3883, + "eval_samples_per_second": 26.242, + "eval_steps_per_second": 13.121, + "eval_tts_loss": 7.473439829219363, + "step": 429000 + }, + { + "epoch": 3.693612258953168, + "grad_norm": 4.875, + "learning_rate": 3.1942468239173495e-05, + "loss": 4.2664, + "step": 429050 + }, + { + "epoch": 3.694042699724518, + "grad_norm": 3.9375, + "learning_rate": 3.192262099386992e-05, + "loss": 4.5117, + "step": 429100 + }, + { + "epoch": 3.6944731404958677, + "grad_norm": 3.5625, + "learning_rate": 3.1902778745299576e-05, + "loss": 4.3189, + "step": 429150 + }, + { + "epoch": 3.6949035812672175, + "grad_norm": 2.53125, + "learning_rate": 3.1882941494918686e-05, + "loss": 4.1603, + "step": 429200 + }, + { + "epoch": 3.6953340220385673, + "grad_norm": 3.59375, + "learning_rate": 3.186310924418335e-05, + "loss": 4.0825, + "step": 429250 + }, + { + "epoch": 3.695764462809917, + "grad_norm": 3.171875, + "learning_rate": 3.184328199454921e-05, + "loss": 4.4443, + "step": 429300 + }, + { + "epoch": 3.6961949035812673, + "grad_norm": 2.875, + "learning_rate": 3.1823459747471494e-05, + "loss": 4.2424, + "step": 429350 + }, + { + "epoch": 3.696625344352617, + "grad_norm": 4.4375, + "learning_rate": 3.180364250440521e-05, + "loss": 4.4861, + "step": 429400 + }, + { + "epoch": 3.697055785123967, + "grad_norm": 3.109375, + "learning_rate": 3.17838302668048e-05, + "loss": 4.1681, + "step": 429450 + }, + { + "epoch": 3.6974862258953167, + "grad_norm": 4.25, + "learning_rate": 3.176402303612446e-05, + "loss": 4.7728, + "step": 429500 + }, + { + "epoch": 3.6979166666666665, + "grad_norm": 4.375, + "learning_rate": 3.1744220813818036e-05, + "loss": 4.06, + "step": 429550 + }, + { + "epoch": 3.6983471074380168, + "grad_norm": 1.9609375, + "learning_rate": 3.172442360133896e-05, + "loss": 4.3757, + "step": 429600 + }, + { + "epoch": 3.6987775482093666, + "grad_norm": 3.46875, + "learning_rate": 3.170463140014028e-05, + "loss": 4.4362, + "step": 429650 + }, + { + "epoch": 3.6992079889807163, + "grad_norm": 5.0, + "learning_rate": 3.168484421167471e-05, + "loss": 4.3744, + "step": 429700 + }, + { + "epoch": 3.699638429752066, + "grad_norm": 3.609375, + "learning_rate": 3.166506203739461e-05, + "loss": 4.4303, + "step": 429750 + }, + { + "epoch": 3.700068870523416, + "grad_norm": 3.625, + "learning_rate": 3.164528487875188e-05, + "loss": 4.492, + "step": 429800 + }, + { + "epoch": 3.7004993112947657, + "grad_norm": 3.6875, + "learning_rate": 3.1625512737198216e-05, + "loss": 4.1798, + "step": 429850 + }, + { + "epoch": 3.7009297520661155, + "grad_norm": 5.75, + "learning_rate": 3.16057456141848e-05, + "loss": 4.3871, + "step": 429900 + }, + { + "epoch": 3.7013601928374653, + "grad_norm": 4.71875, + "learning_rate": 3.1585983511162495e-05, + "loss": 4.2744, + "step": 429950 + }, + { + "epoch": 3.7017906336088156, + "grad_norm": 2.75, + "learning_rate": 3.156622642958181e-05, + "loss": 4.2747, + "step": 430000 + }, + { + "epoch": 3.7022210743801653, + "grad_norm": 3.78125, + "learning_rate": 3.154647437089284e-05, + "loss": 4.485, + "step": 430050 + }, + { + "epoch": 3.702651515151515, + "grad_norm": 1.7421875, + "learning_rate": 3.1526727336545405e-05, + "loss": 4.765, + "step": 430100 + }, + { + "epoch": 3.703081955922865, + "grad_norm": 2.15625, + "learning_rate": 3.150698532798888e-05, + "loss": 4.0358, + "step": 430150 + }, + { + "epoch": 3.7035123966942147, + "grad_norm": 3.15625, + "learning_rate": 3.1487248346672205e-05, + "loss": 3.9946, + "step": 430200 + }, + { + "epoch": 3.703942837465565, + "grad_norm": 1.9453125, + "learning_rate": 3.146751639404412e-05, + "loss": 4.348, + "step": 430250 + }, + { + "epoch": 3.7043732782369148, + "grad_norm": 3.421875, + "learning_rate": 3.144778947155288e-05, + "loss": 4.3902, + "step": 430300 + }, + { + "epoch": 3.7048037190082646, + "grad_norm": 3.1875, + "learning_rate": 3.142806758064637e-05, + "loss": 4.1994, + "step": 430350 + }, + { + "epoch": 3.7052341597796143, + "grad_norm": 2.359375, + "learning_rate": 3.140835072277223e-05, + "loss": 4.4339, + "step": 430400 + }, + { + "epoch": 3.705664600550964, + "grad_norm": 5.0, + "learning_rate": 3.1388638899377534e-05, + "loss": 4.2263, + "step": 430450 + }, + { + "epoch": 3.706095041322314, + "grad_norm": 2.3125, + "learning_rate": 3.136893211190909e-05, + "loss": 4.3503, + "step": 430500 + }, + { + "epoch": 3.7065254820936637, + "grad_norm": 2.859375, + "learning_rate": 3.13492303618134e-05, + "loss": 3.8462, + "step": 430550 + }, + { + "epoch": 3.7069559228650135, + "grad_norm": 3.09375, + "learning_rate": 3.132953365053649e-05, + "loss": 4.2801, + "step": 430600 + }, + { + "epoch": 3.7073863636363638, + "grad_norm": 3.3125, + "learning_rate": 3.130984197952407e-05, + "loss": 4.2556, + "step": 430650 + }, + { + "epoch": 3.7078168044077136, + "grad_norm": 2.5, + "learning_rate": 3.129015535022145e-05, + "loss": 4.0256, + "step": 430700 + }, + { + "epoch": 3.7082472451790633, + "grad_norm": 2.546875, + "learning_rate": 3.127047376407356e-05, + "loss": 4.0735, + "step": 430750 + }, + { + "epoch": 3.708677685950413, + "grad_norm": 3.78125, + "learning_rate": 3.125079722252506e-05, + "loss": 4.3669, + "step": 430800 + }, + { + "epoch": 3.709108126721763, + "grad_norm": 5.84375, + "learning_rate": 3.123112572702013e-05, + "loss": 4.2698, + "step": 430850 + }, + { + "epoch": 3.709538567493113, + "grad_norm": 2.53125, + "learning_rate": 3.121145927900256e-05, + "loss": 4.3271, + "step": 430900 + }, + { + "epoch": 3.709969008264463, + "grad_norm": 3.140625, + "learning_rate": 3.1191797879915966e-05, + "loss": 4.0091, + "step": 430950 + }, + { + "epoch": 3.7103994490358128, + "grad_norm": 2.65625, + "learning_rate": 3.1172141531203314e-05, + "loss": 4.0807, + "step": 431000 + }, + { + "epoch": 3.7108298898071626, + "grad_norm": 2.9375, + "learning_rate": 3.115249023430736e-05, + "loss": 4.6708, + "step": 431050 + }, + { + "epoch": 3.7112603305785123, + "grad_norm": 1.609375, + "learning_rate": 3.113284399067052e-05, + "loss": 4.4226, + "step": 431100 + }, + { + "epoch": 3.711690771349862, + "grad_norm": 2.390625, + "learning_rate": 3.111320280173476e-05, + "loss": 4.3794, + "step": 431150 + }, + { + "epoch": 3.712121212121212, + "grad_norm": 4.59375, + "learning_rate": 3.109356666894171e-05, + "loss": 4.4494, + "step": 431200 + }, + { + "epoch": 3.7125516528925617, + "grad_norm": 1.90625, + "learning_rate": 3.1073935593732606e-05, + "loss": 4.2612, + "step": 431250 + }, + { + "epoch": 3.712982093663912, + "grad_norm": 3.015625, + "learning_rate": 3.1054309577548334e-05, + "loss": 4.3012, + "step": 431300 + }, + { + "epoch": 3.7134125344352618, + "grad_norm": 2.625, + "learning_rate": 3.103468862182935e-05, + "loss": 4.1803, + "step": 431350 + }, + { + "epoch": 3.7138429752066116, + "grad_norm": 2.578125, + "learning_rate": 3.101507272801589e-05, + "loss": 4.4064, + "step": 431400 + }, + { + "epoch": 3.7142734159779613, + "grad_norm": 2.390625, + "learning_rate": 3.099546189754766e-05, + "loss": 4.3279, + "step": 431450 + }, + { + "epoch": 3.714703856749311, + "grad_norm": 1.828125, + "learning_rate": 3.0975856131864065e-05, + "loss": 3.821, + "step": 431500 + }, + { + "epoch": 3.7151342975206614, + "grad_norm": 2.765625, + "learning_rate": 3.0956255432404117e-05, + "loss": 4.0931, + "step": 431550 + }, + { + "epoch": 3.715564738292011, + "grad_norm": 2.03125, + "learning_rate": 3.093665980060644e-05, + "loss": 4.6108, + "step": 431600 + }, + { + "epoch": 3.715995179063361, + "grad_norm": 2.53125, + "learning_rate": 3.091706923790936e-05, + "loss": 4.1827, + "step": 431650 + }, + { + "epoch": 3.7164256198347108, + "grad_norm": 2.515625, + "learning_rate": 3.08974837457508e-05, + "loss": 4.2806, + "step": 431700 + }, + { + "epoch": 3.7168560606060606, + "grad_norm": 3.515625, + "learning_rate": 3.087790332556819e-05, + "loss": 4.4051, + "step": 431750 + }, + { + "epoch": 3.7172865013774103, + "grad_norm": 3.40625, + "learning_rate": 3.08583279787988e-05, + "loss": 4.7127, + "step": 431800 + }, + { + "epoch": 3.71771694214876, + "grad_norm": 3.640625, + "learning_rate": 3.083875770687936e-05, + "loss": 4.0419, + "step": 431850 + }, + { + "epoch": 3.71814738292011, + "grad_norm": 3.0625, + "learning_rate": 3.0819192511246274e-05, + "loss": 4.2122, + "step": 431900 + }, + { + "epoch": 3.71857782369146, + "grad_norm": 3.5, + "learning_rate": 3.0799632393335677e-05, + "loss": 4.3132, + "step": 431950 + }, + { + "epoch": 3.71900826446281, + "grad_norm": 4.21875, + "learning_rate": 3.0780077354583134e-05, + "loss": 4.4361, + "step": 432000 + }, + { + "epoch": 3.71900826446281, + "eval_loss": 4.991272449493408, + "eval_runtime": 23.5652, + "eval_samples_per_second": 27.159, + "eval_steps_per_second": 13.579, + "eval_tts_loss": 7.483763048702864, + "step": 432000 + }, + { + "epoch": 3.7194387052341598, + "grad_norm": 3.3125, + "learning_rate": 3.0760527396423954e-05, + "loss": 4.3255, + "step": 432050 + }, + { + "epoch": 3.7198691460055096, + "grad_norm": 6.53125, + "learning_rate": 3.0740982520293125e-05, + "loss": 3.975, + "step": 432100 + }, + { + "epoch": 3.7202995867768593, + "grad_norm": 3.53125, + "learning_rate": 3.072144272762516e-05, + "loss": 4.3928, + "step": 432150 + }, + { + "epoch": 3.7207300275482096, + "grad_norm": 3.171875, + "learning_rate": 3.0701908019854256e-05, + "loss": 4.3393, + "step": 432200 + }, + { + "epoch": 3.7211604683195594, + "grad_norm": 2.5, + "learning_rate": 3.0682378398414194e-05, + "loss": 4.1379, + "step": 432250 + }, + { + "epoch": 3.721590909090909, + "grad_norm": 1.9609375, + "learning_rate": 3.066285386473838e-05, + "loss": 4.005, + "step": 432300 + }, + { + "epoch": 3.722021349862259, + "grad_norm": 1.9609375, + "learning_rate": 3.064333442025995e-05, + "loss": 4.2459, + "step": 432350 + }, + { + "epoch": 3.7224517906336088, + "grad_norm": 2.921875, + "learning_rate": 3.062382006641155e-05, + "loss": 3.925, + "step": 432400 + }, + { + "epoch": 3.7228822314049586, + "grad_norm": 3.28125, + "learning_rate": 3.060431080462548e-05, + "loss": 4.2413, + "step": 432450 + }, + { + "epoch": 3.7233126721763083, + "grad_norm": 5.0, + "learning_rate": 3.0584806636333693e-05, + "loss": 4.5442, + "step": 432500 + }, + { + "epoch": 3.723743112947658, + "grad_norm": 2.9375, + "learning_rate": 3.0565307562967746e-05, + "loss": 4.1942, + "step": 432550 + }, + { + "epoch": 3.7241735537190084, + "grad_norm": 5.71875, + "learning_rate": 3.05458135859588e-05, + "loss": 4.1234, + "step": 432600 + }, + { + "epoch": 3.724603994490358, + "grad_norm": 3.5625, + "learning_rate": 3.052632470673774e-05, + "loss": 4.0179, + "step": 432650 + }, + { + "epoch": 3.725034435261708, + "grad_norm": 2.171875, + "learning_rate": 3.050684092673497e-05, + "loss": 4.4334, + "step": 432700 + }, + { + "epoch": 3.7254648760330578, + "grad_norm": 3.171875, + "learning_rate": 3.0487362247380558e-05, + "loss": 4.1771, + "step": 432750 + }, + { + "epoch": 3.7258953168044076, + "grad_norm": 4.6875, + "learning_rate": 3.0467888670104194e-05, + "loss": 4.2968, + "step": 432800 + }, + { + "epoch": 3.726325757575758, + "grad_norm": 3.0625, + "learning_rate": 3.0448420196335203e-05, + "loss": 4.3296, + "step": 432850 + }, + { + "epoch": 3.7267561983471076, + "grad_norm": 1.890625, + "learning_rate": 3.0428956827502488e-05, + "loss": 4.0443, + "step": 432900 + }, + { + "epoch": 3.7271866391184574, + "grad_norm": 3.34375, + "learning_rate": 3.0409498565034688e-05, + "loss": 4.1331, + "step": 432950 + }, + { + "epoch": 3.727617079889807, + "grad_norm": 4.34375, + "learning_rate": 3.039004541035997e-05, + "loss": 4.5738, + "step": 433000 + }, + { + "epoch": 3.728047520661157, + "grad_norm": 2.96875, + "learning_rate": 3.037059736490614e-05, + "loss": 4.1179, + "step": 433050 + }, + { + "epoch": 3.7284779614325068, + "grad_norm": 2.421875, + "learning_rate": 3.035115443010066e-05, + "loss": 4.2804, + "step": 433100 + }, + { + "epoch": 3.7289084022038566, + "grad_norm": 3.03125, + "learning_rate": 3.0331716607370552e-05, + "loss": 4.435, + "step": 433150 + }, + { + "epoch": 3.7293388429752063, + "grad_norm": 3.171875, + "learning_rate": 3.031228389814259e-05, + "loss": 4.3343, + "step": 433200 + }, + { + "epoch": 3.7297692837465566, + "grad_norm": 6.9375, + "learning_rate": 3.0292856303843087e-05, + "loss": 4.3997, + "step": 433250 + }, + { + "epoch": 3.7301997245179064, + "grad_norm": 1.9921875, + "learning_rate": 3.027343382589788e-05, + "loss": 4.5484, + "step": 433300 + }, + { + "epoch": 3.730630165289256, + "grad_norm": 5.0, + "learning_rate": 3.025401646573265e-05, + "loss": 4.3565, + "step": 433350 + }, + { + "epoch": 3.731060606060606, + "grad_norm": 3.734375, + "learning_rate": 3.0234604224772543e-05, + "loss": 4.313, + "step": 433400 + }, + { + "epoch": 3.7314910468319558, + "grad_norm": 3.390625, + "learning_rate": 3.021519710444235e-05, + "loss": 4.0807, + "step": 433450 + }, + { + "epoch": 3.731921487603306, + "grad_norm": 3.15625, + "learning_rate": 3.01957951061666e-05, + "loss": 4.1778, + "step": 433500 + }, + { + "epoch": 3.732351928374656, + "grad_norm": 2.515625, + "learning_rate": 3.0176398231369285e-05, + "loss": 4.3013, + "step": 433550 + }, + { + "epoch": 3.7327823691460056, + "grad_norm": 6.28125, + "learning_rate": 3.0157006481474072e-05, + "loss": 4.745, + "step": 433600 + }, + { + "epoch": 3.7332128099173554, + "grad_norm": 2.859375, + "learning_rate": 3.0137619857904344e-05, + "loss": 4.2788, + "step": 433650 + }, + { + "epoch": 3.733643250688705, + "grad_norm": 4.8125, + "learning_rate": 3.0118238362083007e-05, + "loss": 4.1373, + "step": 433700 + }, + { + "epoch": 3.734073691460055, + "grad_norm": 2.46875, + "learning_rate": 3.0098861995432616e-05, + "loss": 4.4103, + "step": 433750 + }, + { + "epoch": 3.7345041322314048, + "grad_norm": 3.265625, + "learning_rate": 3.0079490759375362e-05, + "loss": 4.1113, + "step": 433800 + }, + { + "epoch": 3.7349345730027546, + "grad_norm": 3.078125, + "learning_rate": 3.0060124655333023e-05, + "loss": 4.3222, + "step": 433850 + }, + { + "epoch": 3.735365013774105, + "grad_norm": 2.0, + "learning_rate": 3.004076368472707e-05, + "loss": 4.2851, + "step": 433900 + }, + { + "epoch": 3.7357954545454546, + "grad_norm": 3.46875, + "learning_rate": 3.0021407848978554e-05, + "loss": 4.3138, + "step": 433950 + }, + { + "epoch": 3.7362258953168044, + "grad_norm": 4.0, + "learning_rate": 3.0002057149508133e-05, + "loss": 4.1978, + "step": 434000 + }, + { + "epoch": 3.736656336088154, + "grad_norm": 2.96875, + "learning_rate": 2.998271158773611e-05, + "loss": 4.6576, + "step": 434050 + }, + { + "epoch": 3.737086776859504, + "grad_norm": 2.875, + "learning_rate": 2.996337116508242e-05, + "loss": 4.314, + "step": 434100 + }, + { + "epoch": 3.737517217630854, + "grad_norm": 3.953125, + "learning_rate": 2.994403588296656e-05, + "loss": 4.3415, + "step": 434150 + }, + { + "epoch": 3.737947658402204, + "grad_norm": 4.0625, + "learning_rate": 2.9924705742807778e-05, + "loss": 4.3394, + "step": 434200 + }, + { + "epoch": 3.738378099173554, + "grad_norm": 1.7578125, + "learning_rate": 2.9905380746024846e-05, + "loss": 4.1042, + "step": 434250 + }, + { + "epoch": 3.7388085399449036, + "grad_norm": 1.671875, + "learning_rate": 2.9886060894036095e-05, + "loss": 4.4118, + "step": 434300 + }, + { + "epoch": 3.7392389807162534, + "grad_norm": 4.75, + "learning_rate": 2.9866746188259655e-05, + "loss": 4.0821, + "step": 434350 + }, + { + "epoch": 3.739669421487603, + "grad_norm": 4.65625, + "learning_rate": 2.9847436630113136e-05, + "loss": 4.2308, + "step": 434400 + }, + { + "epoch": 3.740099862258953, + "grad_norm": 3.8125, + "learning_rate": 2.9828132221013817e-05, + "loss": 4.1144, + "step": 434450 + }, + { + "epoch": 3.7405303030303028, + "grad_norm": 3.890625, + "learning_rate": 2.9808832962378664e-05, + "loss": 4.2559, + "step": 434500 + }, + { + "epoch": 3.740960743801653, + "grad_norm": 5.53125, + "learning_rate": 2.9789538855624098e-05, + "loss": 4.3842, + "step": 434550 + }, + { + "epoch": 3.741391184573003, + "grad_norm": 1.9609375, + "learning_rate": 2.9770249902166338e-05, + "loss": 4.4714, + "step": 434600 + }, + { + "epoch": 3.7418216253443526, + "grad_norm": 2.3125, + "learning_rate": 2.975096610342114e-05, + "loss": 4.2406, + "step": 434650 + }, + { + "epoch": 3.7422520661157024, + "grad_norm": 2.609375, + "learning_rate": 2.9731687460803894e-05, + "loss": 4.2844, + "step": 434700 + }, + { + "epoch": 3.7426825068870526, + "grad_norm": 2.21875, + "learning_rate": 2.9712413975729557e-05, + "loss": 4.4126, + "step": 434750 + }, + { + "epoch": 3.7431129476584024, + "grad_norm": 1.984375, + "learning_rate": 2.9693145649612876e-05, + "loss": 4.1002, + "step": 434800 + }, + { + "epoch": 3.743543388429752, + "grad_norm": 3.125, + "learning_rate": 2.9673882483867966e-05, + "loss": 4.5453, + "step": 434850 + }, + { + "epoch": 3.743973829201102, + "grad_norm": 4.21875, + "learning_rate": 2.965462447990881e-05, + "loss": 4.7175, + "step": 434900 + }, + { + "epoch": 3.744404269972452, + "grad_norm": 3.421875, + "learning_rate": 2.9635371639148858e-05, + "loss": 4.2061, + "step": 434950 + }, + { + "epoch": 3.7448347107438016, + "grad_norm": 4.84375, + "learning_rate": 2.9616123963001206e-05, + "loss": 3.9491, + "step": 435000 + }, + { + "epoch": 3.7448347107438016, + "eval_loss": 4.991069316864014, + "eval_runtime": 24.8181, + "eval_samples_per_second": 25.788, + "eval_steps_per_second": 12.894, + "eval_tts_loss": 7.4790225853498375, + "step": 435000 + }, + { + "epoch": 3.7452651515151514, + "grad_norm": 2.0625, + "learning_rate": 2.9596881452878687e-05, + "loss": 3.9077, + "step": 435050 + }, + { + "epoch": 3.745695592286501, + "grad_norm": 4.03125, + "learning_rate": 2.957764411019356e-05, + "loss": 4.2705, + "step": 435100 + }, + { + "epoch": 3.746126033057851, + "grad_norm": 3.140625, + "learning_rate": 2.95584119363578e-05, + "loss": 4.2979, + "step": 435150 + }, + { + "epoch": 3.746556473829201, + "grad_norm": 3.015625, + "learning_rate": 2.9539184932783083e-05, + "loss": 4.5898, + "step": 435200 + }, + { + "epoch": 3.746986914600551, + "grad_norm": 2.1875, + "learning_rate": 2.9519963100880588e-05, + "loss": 4.0995, + "step": 435250 + }, + { + "epoch": 3.747417355371901, + "grad_norm": 3.40625, + "learning_rate": 2.9500746442061165e-05, + "loss": 3.8345, + "step": 435300 + }, + { + "epoch": 3.7478477961432506, + "grad_norm": 5.59375, + "learning_rate": 2.948153495773527e-05, + "loss": 3.8499, + "step": 435350 + }, + { + "epoch": 3.748278236914601, + "grad_norm": 2.859375, + "learning_rate": 2.9462328649312944e-05, + "loss": 4.2361, + "step": 435400 + }, + { + "epoch": 3.7487086776859506, + "grad_norm": 2.484375, + "learning_rate": 2.9443127518203962e-05, + "loss": 4.0381, + "step": 435450 + }, + { + "epoch": 3.7491391184573004, + "grad_norm": 2.0625, + "learning_rate": 2.942393156581762e-05, + "loss": 4.11, + "step": 435500 + }, + { + "epoch": 3.74956955922865, + "grad_norm": 5.1875, + "learning_rate": 2.9404740793562857e-05, + "loss": 4.2611, + "step": 435550 + }, + { + "epoch": 3.75, + "grad_norm": 3.734375, + "learning_rate": 2.9385555202848215e-05, + "loss": 4.25, + "step": 435600 + }, + { + "epoch": 3.75043044077135, + "grad_norm": 4.4375, + "learning_rate": 2.936637479508191e-05, + "loss": 3.9644, + "step": 435650 + }, + { + "epoch": 3.7508608815426996, + "grad_norm": 3.296875, + "learning_rate": 2.9347199571671667e-05, + "loss": 4.2064, + "step": 435700 + }, + { + "epoch": 3.7512913223140494, + "grad_norm": 3.015625, + "learning_rate": 2.9328029534025014e-05, + "loss": 4.1764, + "step": 435750 + }, + { + "epoch": 3.751721763085399, + "grad_norm": 2.359375, + "learning_rate": 2.9308864683548966e-05, + "loss": 4.4975, + "step": 435800 + }, + { + "epoch": 3.7521522038567494, + "grad_norm": 2.828125, + "learning_rate": 2.9289705021650083e-05, + "loss": 4.2121, + "step": 435850 + }, + { + "epoch": 3.752582644628099, + "grad_norm": 2.890625, + "learning_rate": 2.9270550549734754e-05, + "loss": 4.3651, + "step": 435900 + }, + { + "epoch": 3.753013085399449, + "grad_norm": 2.03125, + "learning_rate": 2.9251401269208835e-05, + "loss": 4.3243, + "step": 435950 + }, + { + "epoch": 3.753443526170799, + "grad_norm": 8.875, + "learning_rate": 2.9232257181477805e-05, + "loss": 4.2977, + "step": 436000 + }, + { + "epoch": 3.753873966942149, + "grad_norm": 2.546875, + "learning_rate": 2.9213118287946905e-05, + "loss": 4.0136, + "step": 436050 + }, + { + "epoch": 3.754304407713499, + "grad_norm": 3.40625, + "learning_rate": 2.919398459002076e-05, + "loss": 3.9184, + "step": 436100 + }, + { + "epoch": 3.7547348484848486, + "grad_norm": 4.09375, + "learning_rate": 2.917485608910383e-05, + "loss": 3.9648, + "step": 436150 + }, + { + "epoch": 3.7551652892561984, + "grad_norm": 5.625, + "learning_rate": 2.9155732786600088e-05, + "loss": 4.4635, + "step": 436200 + }, + { + "epoch": 3.755595730027548, + "grad_norm": 4.09375, + "learning_rate": 2.913661468391312e-05, + "loss": 4.3398, + "step": 436250 + }, + { + "epoch": 3.756026170798898, + "grad_norm": 2.6875, + "learning_rate": 2.9117501782446177e-05, + "loss": 4.1965, + "step": 436300 + }, + { + "epoch": 3.756456611570248, + "grad_norm": 2.515625, + "learning_rate": 2.9098394083602097e-05, + "loss": 4.6177, + "step": 436350 + }, + { + "epoch": 3.7568870523415976, + "grad_norm": 3.09375, + "learning_rate": 2.9079291588783308e-05, + "loss": 4.3223, + "step": 436400 + }, + { + "epoch": 3.7573174931129474, + "grad_norm": 7.28125, + "learning_rate": 2.906019429939195e-05, + "loss": 4.4316, + "step": 436450 + }, + { + "epoch": 3.7577479338842976, + "grad_norm": 3.0625, + "learning_rate": 2.904110221682971e-05, + "loss": 3.8981, + "step": 436500 + }, + { + "epoch": 3.7581783746556474, + "grad_norm": 3.328125, + "learning_rate": 2.9022015342497866e-05, + "loss": 4.5275, + "step": 436550 + }, + { + "epoch": 3.758608815426997, + "grad_norm": 4.0, + "learning_rate": 2.9002933677797438e-05, + "loss": 4.8012, + "step": 436600 + }, + { + "epoch": 3.759039256198347, + "grad_norm": 4.0, + "learning_rate": 2.8983857224128897e-05, + "loss": 4.2455, + "step": 436650 + }, + { + "epoch": 3.7594696969696972, + "grad_norm": 1.6484375, + "learning_rate": 2.8964785982892407e-05, + "loss": 3.99, + "step": 436700 + }, + { + "epoch": 3.759900137741047, + "grad_norm": 4.71875, + "learning_rate": 2.8945719955487817e-05, + "loss": 4.393, + "step": 436750 + }, + { + "epoch": 3.760330578512397, + "grad_norm": 3.890625, + "learning_rate": 2.8926659143314515e-05, + "loss": 4.4553, + "step": 436800 + }, + { + "epoch": 3.7607610192837466, + "grad_norm": 3.21875, + "learning_rate": 2.89076035477715e-05, + "loss": 4.7125, + "step": 436850 + }, + { + "epoch": 3.7611914600550964, + "grad_norm": 3.765625, + "learning_rate": 2.888855317025744e-05, + "loss": 4.4291, + "step": 436900 + }, + { + "epoch": 3.761621900826446, + "grad_norm": 1.90625, + "learning_rate": 2.8869508012170566e-05, + "loss": 4.5483, + "step": 436950 + }, + { + "epoch": 3.762052341597796, + "grad_norm": 3.1875, + "learning_rate": 2.8850468074908745e-05, + "loss": 4.337, + "step": 437000 + }, + { + "epoch": 3.762482782369146, + "grad_norm": 3.859375, + "learning_rate": 2.8831433359869496e-05, + "loss": 4.722, + "step": 437050 + }, + { + "epoch": 3.762913223140496, + "grad_norm": 1.2421875, + "learning_rate": 2.8812403868449932e-05, + "loss": 4.155, + "step": 437100 + }, + { + "epoch": 3.763343663911846, + "grad_norm": 4.71875, + "learning_rate": 2.8793379602046753e-05, + "loss": 4.2119, + "step": 437150 + }, + { + "epoch": 3.7637741046831956, + "grad_norm": 4.75, + "learning_rate": 2.8774360562056312e-05, + "loss": 4.1054, + "step": 437200 + }, + { + "epoch": 3.7642045454545454, + "grad_norm": 3.4375, + "learning_rate": 2.8755346749874522e-05, + "loss": 3.8826, + "step": 437250 + }, + { + "epoch": 3.764634986225895, + "grad_norm": 5.53125, + "learning_rate": 2.8736338166897026e-05, + "loss": 4.2956, + "step": 437300 + }, + { + "epoch": 3.7650654269972454, + "grad_norm": 3.25, + "learning_rate": 2.8717334814519015e-05, + "loss": 4.2503, + "step": 437350 + }, + { + "epoch": 3.7654958677685952, + "grad_norm": 4.75, + "learning_rate": 2.8698336694135197e-05, + "loss": 4.3729, + "step": 437400 + }, + { + "epoch": 3.765926308539945, + "grad_norm": 5.0, + "learning_rate": 2.867934380714008e-05, + "loss": 4.1687, + "step": 437450 + }, + { + "epoch": 3.766356749311295, + "grad_norm": 5.03125, + "learning_rate": 2.8660356154927693e-05, + "loss": 4.1661, + "step": 437500 + }, + { + "epoch": 3.7667871900826446, + "grad_norm": 3.1875, + "learning_rate": 2.864137373889163e-05, + "loss": 4.561, + "step": 437550 + }, + { + "epoch": 3.7672176308539944, + "grad_norm": 2.546875, + "learning_rate": 2.862239656042527e-05, + "loss": 4.07, + "step": 437600 + }, + { + "epoch": 3.767648071625344, + "grad_norm": 2.828125, + "learning_rate": 2.8603424620921392e-05, + "loss": 4.0358, + "step": 437650 + }, + { + "epoch": 3.768078512396694, + "grad_norm": 3.8125, + "learning_rate": 2.85844579217725e-05, + "loss": 4.1929, + "step": 437700 + }, + { + "epoch": 3.7685089531680442, + "grad_norm": 3.078125, + "learning_rate": 2.856549646437078e-05, + "loss": 4.3856, + "step": 437750 + }, + { + "epoch": 3.768939393939394, + "grad_norm": 2.53125, + "learning_rate": 2.8546540250107922e-05, + "loss": 4.3432, + "step": 437800 + }, + { + "epoch": 3.769369834710744, + "grad_norm": 4.03125, + "learning_rate": 2.852758928037528e-05, + "loss": 4.416, + "step": 437850 + }, + { + "epoch": 3.7698002754820936, + "grad_norm": 2.296875, + "learning_rate": 2.8508643556563795e-05, + "loss": 4.2531, + "step": 437900 + }, + { + "epoch": 3.7702307162534434, + "grad_norm": 2.359375, + "learning_rate": 2.8489703080064035e-05, + "loss": 4.179, + "step": 437950 + }, + { + "epoch": 3.7706611570247937, + "grad_norm": 4.125, + "learning_rate": 2.8470767852266244e-05, + "loss": 4.3769, + "step": 438000 + }, + { + "epoch": 3.7706611570247937, + "eval_loss": 4.991232395172119, + "eval_runtime": 23.3738, + "eval_samples_per_second": 27.381, + "eval_steps_per_second": 13.691, + "eval_tts_loss": 7.4769983337970585, + "step": 438000 + }, + { + "epoch": 3.7710915977961434, + "grad_norm": 2.265625, + "learning_rate": 2.8451837874560182e-05, + "loss": 4.5347, + "step": 438050 + }, + { + "epoch": 3.7715220385674932, + "grad_norm": 1.90625, + "learning_rate": 2.8432913148335295e-05, + "loss": 4.492, + "step": 438100 + }, + { + "epoch": 3.771952479338843, + "grad_norm": 2.8125, + "learning_rate": 2.841399367498059e-05, + "loss": 4.6457, + "step": 438150 + }, + { + "epoch": 3.772382920110193, + "grad_norm": 3.0625, + "learning_rate": 2.839507945588473e-05, + "loss": 4.1685, + "step": 438200 + }, + { + "epoch": 3.7728133608815426, + "grad_norm": 2.5625, + "learning_rate": 2.837617049243595e-05, + "loss": 4.1531, + "step": 438250 + }, + { + "epoch": 3.7732438016528924, + "grad_norm": 3.453125, + "learning_rate": 2.8357266786022174e-05, + "loss": 4.1016, + "step": 438300 + }, + { + "epoch": 3.773674242424242, + "grad_norm": 3.546875, + "learning_rate": 2.8338368338030885e-05, + "loss": 4.4416, + "step": 438350 + }, + { + "epoch": 3.7741046831955924, + "grad_norm": 4.0625, + "learning_rate": 2.831947514984916e-05, + "loss": 4.3861, + "step": 438400 + }, + { + "epoch": 3.7745351239669422, + "grad_norm": 2.53125, + "learning_rate": 2.830058722286375e-05, + "loss": 4.3383, + "step": 438450 + }, + { + "epoch": 3.774965564738292, + "grad_norm": 4.03125, + "learning_rate": 2.8281704558460963e-05, + "loss": 4.3597, + "step": 438500 + }, + { + "epoch": 3.775396005509642, + "grad_norm": 2.921875, + "learning_rate": 2.826282715802674e-05, + "loss": 4.188, + "step": 438550 + }, + { + "epoch": 3.7758264462809916, + "grad_norm": 2.390625, + "learning_rate": 2.8243955022946668e-05, + "loss": 4.4031, + "step": 438600 + }, + { + "epoch": 3.776256887052342, + "grad_norm": 3.65625, + "learning_rate": 2.8225088154605928e-05, + "loss": 3.9243, + "step": 438650 + }, + { + "epoch": 3.7766873278236917, + "grad_norm": 3.796875, + "learning_rate": 2.820622655438927e-05, + "loss": 4.5401, + "step": 438700 + }, + { + "epoch": 3.7771177685950414, + "grad_norm": 2.71875, + "learning_rate": 2.8187370223681132e-05, + "loss": 4.2161, + "step": 438750 + }, + { + "epoch": 3.7775482093663912, + "grad_norm": 4.5625, + "learning_rate": 2.816851916386547e-05, + "loss": 4.5565, + "step": 438800 + }, + { + "epoch": 3.777978650137741, + "grad_norm": 2.984375, + "learning_rate": 2.814967337632599e-05, + "loss": 4.1442, + "step": 438850 + }, + { + "epoch": 3.778409090909091, + "grad_norm": 2.25, + "learning_rate": 2.813083286244592e-05, + "loss": 4.2059, + "step": 438900 + }, + { + "epoch": 3.7788395316804406, + "grad_norm": 3.453125, + "learning_rate": 2.811199762360802e-05, + "loss": 4.1219, + "step": 438950 + }, + { + "epoch": 3.7792699724517904, + "grad_norm": 3.671875, + "learning_rate": 2.8093167661194864e-05, + "loss": 4.454, + "step": 439000 + }, + { + "epoch": 3.7797004132231407, + "grad_norm": 4.46875, + "learning_rate": 2.8074342976588485e-05, + "loss": 4.2363, + "step": 439050 + }, + { + "epoch": 3.7801308539944904, + "grad_norm": 2.03125, + "learning_rate": 2.805552357117056e-05, + "loss": 4.4363, + "step": 439100 + }, + { + "epoch": 3.7805612947658402, + "grad_norm": 4.1875, + "learning_rate": 2.8036709446322463e-05, + "loss": 4.3007, + "step": 439150 + }, + { + "epoch": 3.78099173553719, + "grad_norm": 5.125, + "learning_rate": 2.801790060342503e-05, + "loss": 4.4281, + "step": 439200 + }, + { + "epoch": 3.78142217630854, + "grad_norm": 3.625, + "learning_rate": 2.799909704385879e-05, + "loss": 4.4363, + "step": 439250 + }, + { + "epoch": 3.78185261707989, + "grad_norm": 3.84375, + "learning_rate": 2.7980298769003942e-05, + "loss": 4.2758, + "step": 439300 + }, + { + "epoch": 3.78228305785124, + "grad_norm": 3.25, + "learning_rate": 2.7961505780240205e-05, + "loss": 4.338, + "step": 439350 + }, + { + "epoch": 3.7827134986225897, + "grad_norm": 3.421875, + "learning_rate": 2.7942718078946962e-05, + "loss": 4.2511, + "step": 439400 + }, + { + "epoch": 3.7831439393939394, + "grad_norm": 2.34375, + "learning_rate": 2.7923935666503166e-05, + "loss": 4.0931, + "step": 439450 + }, + { + "epoch": 3.7835743801652892, + "grad_norm": 3.296875, + "learning_rate": 2.790515854428739e-05, + "loss": 3.9331, + "step": 439500 + }, + { + "epoch": 3.784004820936639, + "grad_norm": 3.046875, + "learning_rate": 2.7886386713677892e-05, + "loss": 4.2368, + "step": 439550 + }, + { + "epoch": 3.784435261707989, + "grad_norm": 5.3125, + "learning_rate": 2.7867620176052456e-05, + "loss": 4.5077, + "step": 439600 + }, + { + "epoch": 3.7848657024793386, + "grad_norm": 2.484375, + "learning_rate": 2.784885893278849e-05, + "loss": 4.1218, + "step": 439650 + }, + { + "epoch": 3.785296143250689, + "grad_norm": 3.3125, + "learning_rate": 2.783010298526305e-05, + "loss": 4.2164, + "step": 439700 + }, + { + "epoch": 3.7857265840220387, + "grad_norm": 2.84375, + "learning_rate": 2.781135233485277e-05, + "loss": 4.6578, + "step": 439750 + }, + { + "epoch": 3.7861570247933884, + "grad_norm": 3.640625, + "learning_rate": 2.779260698293389e-05, + "loss": 4.5096, + "step": 439800 + }, + { + "epoch": 3.7865874655647382, + "grad_norm": 4.6875, + "learning_rate": 2.777386693088232e-05, + "loss": 4.2233, + "step": 439850 + }, + { + "epoch": 3.787017906336088, + "grad_norm": 6.34375, + "learning_rate": 2.7755132180073552e-05, + "loss": 4.2595, + "step": 439900 + }, + { + "epoch": 3.7874483471074383, + "grad_norm": 2.84375, + "learning_rate": 2.7736402731882582e-05, + "loss": 3.8967, + "step": 439950 + }, + { + "epoch": 3.787878787878788, + "grad_norm": 5.3125, + "learning_rate": 2.7717678587684205e-05, + "loss": 4.4487, + "step": 440000 + }, + { + "epoch": 3.788309228650138, + "grad_norm": 2.53125, + "learning_rate": 2.7698959748852692e-05, + "loss": 4.2939, + "step": 440050 + }, + { + "epoch": 3.7887396694214877, + "grad_norm": 2.59375, + "learning_rate": 2.768024621676195e-05, + "loss": 4.1282, + "step": 440100 + }, + { + "epoch": 3.7891701101928374, + "grad_norm": 4.34375, + "learning_rate": 2.766153799278557e-05, + "loss": 4.4936, + "step": 440150 + }, + { + "epoch": 3.7896005509641872, + "grad_norm": 5.4375, + "learning_rate": 2.7642835078296647e-05, + "loss": 4.5217, + "step": 440200 + }, + { + "epoch": 3.790030991735537, + "grad_norm": 2.234375, + "learning_rate": 2.7624137474667966e-05, + "loss": 4.4994, + "step": 440250 + }, + { + "epoch": 3.790461432506887, + "grad_norm": 3.359375, + "learning_rate": 2.760544518327186e-05, + "loss": 4.2678, + "step": 440300 + }, + { + "epoch": 3.790891873278237, + "grad_norm": 3.0, + "learning_rate": 2.758675820548029e-05, + "loss": 4.1454, + "step": 440350 + }, + { + "epoch": 3.791322314049587, + "grad_norm": 4.65625, + "learning_rate": 2.75680765426649e-05, + "loss": 4.0165, + "step": 440400 + }, + { + "epoch": 3.7917527548209367, + "grad_norm": 2.140625, + "learning_rate": 2.7549400196196884e-05, + "loss": 4.1414, + "step": 440450 + }, + { + "epoch": 3.7921831955922864, + "grad_norm": 2.71875, + "learning_rate": 2.7530729167446945e-05, + "loss": 4.3019, + "step": 440500 + }, + { + "epoch": 3.7926136363636362, + "grad_norm": 4.28125, + "learning_rate": 2.7512063457785596e-05, + "loss": 4.6083, + "step": 440550 + }, + { + "epoch": 3.7930440771349865, + "grad_norm": 3.265625, + "learning_rate": 2.7493403068582823e-05, + "loss": 4.026, + "step": 440600 + }, + { + "epoch": 3.7934745179063363, + "grad_norm": 5.34375, + "learning_rate": 2.747474800120824e-05, + "loss": 3.9298, + "step": 440650 + }, + { + "epoch": 3.793904958677686, + "grad_norm": 1.7421875, + "learning_rate": 2.745609825703117e-05, + "loss": 4.2406, + "step": 440700 + }, + { + "epoch": 3.794335399449036, + "grad_norm": 3.953125, + "learning_rate": 2.7437453837420378e-05, + "loss": 4.3084, + "step": 440750 + }, + { + "epoch": 3.7947658402203857, + "grad_norm": 4.1875, + "learning_rate": 2.7418814743744314e-05, + "loss": 4.2397, + "step": 440800 + }, + { + "epoch": 3.7951962809917354, + "grad_norm": 2.40625, + "learning_rate": 2.7400180977371126e-05, + "loss": 4.3035, + "step": 440850 + }, + { + "epoch": 3.7956267217630852, + "grad_norm": 3.65625, + "learning_rate": 2.7381552539668442e-05, + "loss": 4.3716, + "step": 440900 + }, + { + "epoch": 3.796057162534435, + "grad_norm": 2.359375, + "learning_rate": 2.736292943200356e-05, + "loss": 4.4056, + "step": 440950 + }, + { + "epoch": 3.7964876033057853, + "grad_norm": 3.625, + "learning_rate": 2.7344311655743382e-05, + "loss": 4.2118, + "step": 441000 + }, + { + "epoch": 3.7964876033057853, + "eval_loss": 4.9910712242126465, + "eval_runtime": 24.0559, + "eval_samples_per_second": 26.605, + "eval_steps_per_second": 13.302, + "eval_tts_loss": 7.471262435327908, + "step": 441000 + }, + { + "epoch": 3.796918044077135, + "grad_norm": 3.375, + "learning_rate": 2.7325699212254363e-05, + "loss": 4.5749, + "step": 441050 + }, + { + "epoch": 3.797348484848485, + "grad_norm": 3.65625, + "learning_rate": 2.73070921029027e-05, + "loss": 4.0224, + "step": 441100 + }, + { + "epoch": 3.7977789256198347, + "grad_norm": 4.125, + "learning_rate": 2.7288490329054073e-05, + "loss": 4.1009, + "step": 441150 + }, + { + "epoch": 3.7982093663911844, + "grad_norm": 2.6875, + "learning_rate": 2.7269893892073804e-05, + "loss": 4.4229, + "step": 441200 + }, + { + "epoch": 3.7986398071625347, + "grad_norm": 2.484375, + "learning_rate": 2.725130279332685e-05, + "loss": 4.2913, + "step": 441250 + }, + { + "epoch": 3.7990702479338845, + "grad_norm": 4.25, + "learning_rate": 2.7232717034177746e-05, + "loss": 4.1804, + "step": 441300 + }, + { + "epoch": 3.7995006887052343, + "grad_norm": 1.8671875, + "learning_rate": 2.7214136615990626e-05, + "loss": 3.6511, + "step": 441350 + }, + { + "epoch": 3.799931129476584, + "grad_norm": 1.875, + "learning_rate": 2.71955615401293e-05, + "loss": 3.7034, + "step": 441400 + }, + { + "epoch": 3.800361570247934, + "grad_norm": 4.34375, + "learning_rate": 2.7176991807957142e-05, + "loss": 4.2727, + "step": 441450 + }, + { + "epoch": 3.8007920110192837, + "grad_norm": 5.25, + "learning_rate": 2.7158427420837042e-05, + "loss": 4.4436, + "step": 441500 + }, + { + "epoch": 3.8012224517906334, + "grad_norm": 2.34375, + "learning_rate": 2.7139868380131673e-05, + "loss": 4.2093, + "step": 441550 + }, + { + "epoch": 3.8016528925619832, + "grad_norm": 2.515625, + "learning_rate": 2.7121314687203203e-05, + "loss": 4.1859, + "step": 441600 + }, + { + "epoch": 3.8020833333333335, + "grad_norm": 2.53125, + "learning_rate": 2.710276634341341e-05, + "loss": 4.2751, + "step": 441650 + }, + { + "epoch": 3.8025137741046833, + "grad_norm": 3.078125, + "learning_rate": 2.7084223350123783e-05, + "loss": 4.4252, + "step": 441700 + }, + { + "epoch": 3.802944214876033, + "grad_norm": 4.53125, + "learning_rate": 2.706568570869521e-05, + "loss": 4.5279, + "step": 441750 + }, + { + "epoch": 3.803374655647383, + "grad_norm": 1.875, + "learning_rate": 2.7047153420488425e-05, + "loss": 4.2245, + "step": 441800 + }, + { + "epoch": 3.8038050964187327, + "grad_norm": 2.6875, + "learning_rate": 2.7028626486863617e-05, + "loss": 4.3283, + "step": 441850 + }, + { + "epoch": 3.804235537190083, + "grad_norm": 3.140625, + "learning_rate": 2.7010104909180624e-05, + "loss": 4.2854, + "step": 441900 + }, + { + "epoch": 3.8046659779614327, + "grad_norm": 5.0625, + "learning_rate": 2.6991588688798853e-05, + "loss": 4.2609, + "step": 441950 + }, + { + "epoch": 3.8050964187327825, + "grad_norm": 3.6875, + "learning_rate": 2.6973077827077442e-05, + "loss": 4.4615, + "step": 442000 + }, + { + "epoch": 3.8055268595041323, + "grad_norm": 5.09375, + "learning_rate": 2.6954572325374948e-05, + "loss": 4.2627, + "step": 442050 + }, + { + "epoch": 3.805957300275482, + "grad_norm": 4.96875, + "learning_rate": 2.693607218504972e-05, + "loss": 4.6391, + "step": 442100 + }, + { + "epoch": 3.806387741046832, + "grad_norm": 3.5, + "learning_rate": 2.6917577407459583e-05, + "loss": 4.4179, + "step": 442150 + }, + { + "epoch": 3.8068181818181817, + "grad_norm": 2.828125, + "learning_rate": 2.6899087993961992e-05, + "loss": 4.4048, + "step": 442200 + }, + { + "epoch": 3.8072486225895315, + "grad_norm": 2.109375, + "learning_rate": 2.688060394591413e-05, + "loss": 4.4438, + "step": 442250 + }, + { + "epoch": 3.8076790633608817, + "grad_norm": 2.75, + "learning_rate": 2.6862125264672578e-05, + "loss": 4.5975, + "step": 442300 + }, + { + "epoch": 3.8081095041322315, + "grad_norm": 1.5234375, + "learning_rate": 2.684365195159365e-05, + "loss": 4.197, + "step": 442350 + }, + { + "epoch": 3.8085399449035813, + "grad_norm": 3.203125, + "learning_rate": 2.682518400803329e-05, + "loss": 4.0516, + "step": 442400 + }, + { + "epoch": 3.808970385674931, + "grad_norm": 1.7265625, + "learning_rate": 2.6806721435346992e-05, + "loss": 4.6244, + "step": 442450 + }, + { + "epoch": 3.809400826446281, + "grad_norm": 3.484375, + "learning_rate": 2.678826423488986e-05, + "loss": 4.2073, + "step": 442500 + }, + { + "epoch": 3.809831267217631, + "grad_norm": 2.984375, + "learning_rate": 2.6769812408016616e-05, + "loss": 4.5374, + "step": 442550 + }, + { + "epoch": 3.810261707988981, + "grad_norm": 3.203125, + "learning_rate": 2.675136595608154e-05, + "loss": 4.2841, + "step": 442600 + }, + { + "epoch": 3.8106921487603307, + "grad_norm": 4.4375, + "learning_rate": 2.6732924880438648e-05, + "loss": 4.1611, + "step": 442650 + }, + { + "epoch": 3.8111225895316805, + "grad_norm": 3.390625, + "learning_rate": 2.6714489182441427e-05, + "loss": 4.5294, + "step": 442700 + }, + { + "epoch": 3.8115530303030303, + "grad_norm": 2.75, + "learning_rate": 2.669605886344303e-05, + "loss": 4.4323, + "step": 442750 + }, + { + "epoch": 3.81198347107438, + "grad_norm": 5.375, + "learning_rate": 2.6677633924796186e-05, + "loss": 4.4475, + "step": 442800 + }, + { + "epoch": 3.81241391184573, + "grad_norm": 2.984375, + "learning_rate": 2.6659214367853248e-05, + "loss": 4.5494, + "step": 442850 + }, + { + "epoch": 3.8128443526170797, + "grad_norm": 4.5, + "learning_rate": 2.6640800193966165e-05, + "loss": 4.3748, + "step": 442900 + }, + { + "epoch": 3.81327479338843, + "grad_norm": 4.4375, + "learning_rate": 2.6622391404486523e-05, + "loss": 4.3392, + "step": 442950 + }, + { + "epoch": 3.8137052341597797, + "grad_norm": 2.84375, + "learning_rate": 2.6603988000765523e-05, + "loss": 4.3399, + "step": 443000 + }, + { + "epoch": 3.8141356749311295, + "grad_norm": 2.015625, + "learning_rate": 2.6585589984153824e-05, + "loss": 4.4275, + "step": 443050 + }, + { + "epoch": 3.8145661157024793, + "grad_norm": 4.21875, + "learning_rate": 2.6567197356001883e-05, + "loss": 4.0843, + "step": 443100 + }, + { + "epoch": 3.814996556473829, + "grad_norm": 3.21875, + "learning_rate": 2.6548810117659673e-05, + "loss": 4.4453, + "step": 443150 + }, + { + "epoch": 3.8154269972451793, + "grad_norm": 3.203125, + "learning_rate": 2.653042827047674e-05, + "loss": 4.1612, + "step": 443200 + }, + { + "epoch": 3.815857438016529, + "grad_norm": 3.40625, + "learning_rate": 2.6512051815802354e-05, + "loss": 4.4573, + "step": 443250 + }, + { + "epoch": 3.816287878787879, + "grad_norm": 4.03125, + "learning_rate": 2.6493680754985184e-05, + "loss": 4.8011, + "step": 443300 + }, + { + "epoch": 3.8167183195592287, + "grad_norm": 4.71875, + "learning_rate": 2.647531508937373e-05, + "loss": 4.6141, + "step": 443350 + }, + { + "epoch": 3.8171487603305785, + "grad_norm": 1.7578125, + "learning_rate": 2.6456954820315948e-05, + "loss": 4.0716, + "step": 443400 + }, + { + "epoch": 3.8175792011019283, + "grad_norm": 3.515625, + "learning_rate": 2.643859994915946e-05, + "loss": 4.3812, + "step": 443450 + }, + { + "epoch": 3.818009641873278, + "grad_norm": 1.078125, + "learning_rate": 2.6420250477251463e-05, + "loss": 4.2029, + "step": 443500 + }, + { + "epoch": 3.818440082644628, + "grad_norm": 4.0, + "learning_rate": 2.640190640593877e-05, + "loss": 4.2339, + "step": 443550 + }, + { + "epoch": 3.818870523415978, + "grad_norm": 1.5, + "learning_rate": 2.638356773656776e-05, + "loss": 4.5809, + "step": 443600 + }, + { + "epoch": 3.819300964187328, + "grad_norm": 3.546875, + "learning_rate": 2.6365234470484524e-05, + "loss": 4.0893, + "step": 443650 + }, + { + "epoch": 3.8197314049586777, + "grad_norm": 4.0625, + "learning_rate": 2.634690660903465e-05, + "loss": 4.9535, + "step": 443700 + }, + { + "epoch": 3.8201618457300275, + "grad_norm": 4.8125, + "learning_rate": 2.632858415356333e-05, + "loss": 4.1552, + "step": 443750 + }, + { + "epoch": 3.8205922865013773, + "grad_norm": 3.9375, + "learning_rate": 2.6310267105415477e-05, + "loss": 4.5734, + "step": 443800 + }, + { + "epoch": 3.8210227272727275, + "grad_norm": 4.21875, + "learning_rate": 2.629195546593545e-05, + "loss": 4.5253, + "step": 443850 + }, + { + "epoch": 3.8214531680440773, + "grad_norm": 2.65625, + "learning_rate": 2.6273649236467278e-05, + "loss": 4.3029, + "step": 443900 + }, + { + "epoch": 3.821883608815427, + "grad_norm": 4.9375, + "learning_rate": 2.6255348418354652e-05, + "loss": 4.3019, + "step": 443950 + }, + { + "epoch": 3.822314049586777, + "grad_norm": 1.828125, + "learning_rate": 2.623705301294078e-05, + "loss": 4.3868, + "step": 444000 + }, + { + "epoch": 3.822314049586777, + "eval_loss": 4.990033149719238, + "eval_runtime": 24.0036, + "eval_samples_per_second": 26.663, + "eval_steps_per_second": 13.331, + "eval_tts_loss": 7.464097043278323, + "step": 444000 + }, + { + "epoch": 3.8227444903581267, + "grad_norm": 2.296875, + "learning_rate": 2.6218763021568514e-05, + "loss": 3.7023, + "step": 444050 + }, + { + "epoch": 3.8231749311294765, + "grad_norm": 2.75, + "learning_rate": 2.62004784455803e-05, + "loss": 4.2649, + "step": 444100 + }, + { + "epoch": 3.8236053719008263, + "grad_norm": 4.84375, + "learning_rate": 2.618219928631819e-05, + "loss": 4.4702, + "step": 444150 + }, + { + "epoch": 3.824035812672176, + "grad_norm": 4.1875, + "learning_rate": 2.616392554512379e-05, + "loss": 4.4969, + "step": 444200 + }, + { + "epoch": 3.8244662534435263, + "grad_norm": 1.71875, + "learning_rate": 2.614565722333844e-05, + "loss": 4.12, + "step": 444250 + }, + { + "epoch": 3.824896694214876, + "grad_norm": 2.984375, + "learning_rate": 2.6127394322302935e-05, + "loss": 4.0162, + "step": 444300 + }, + { + "epoch": 3.825327134986226, + "grad_norm": 2.34375, + "learning_rate": 2.610913684335774e-05, + "loss": 3.9894, + "step": 444350 + }, + { + "epoch": 3.8257575757575757, + "grad_norm": 3.71875, + "learning_rate": 2.609088478784293e-05, + "loss": 4.4528, + "step": 444400 + }, + { + "epoch": 3.8261880165289255, + "grad_norm": 3.625, + "learning_rate": 2.6072638157098115e-05, + "loss": 4.411, + "step": 444450 + }, + { + "epoch": 3.8266184573002757, + "grad_norm": 3.8125, + "learning_rate": 2.6054396952462645e-05, + "loss": 4.6046, + "step": 444500 + }, + { + "epoch": 3.8270488980716255, + "grad_norm": 2.65625, + "learning_rate": 2.6036161175275365e-05, + "loss": 4.5418, + "step": 444550 + }, + { + "epoch": 3.8274793388429753, + "grad_norm": 4.28125, + "learning_rate": 2.601793082687466e-05, + "loss": 4.1446, + "step": 444600 + }, + { + "epoch": 3.827909779614325, + "grad_norm": 2.125, + "learning_rate": 2.599970590859868e-05, + "loss": 4.7732, + "step": 444650 + }, + { + "epoch": 3.828340220385675, + "grad_norm": 2.0, + "learning_rate": 2.5981486421785074e-05, + "loss": 4.4962, + "step": 444700 + }, + { + "epoch": 3.8287706611570247, + "grad_norm": 4.09375, + "learning_rate": 2.5963272367771073e-05, + "loss": 4.4409, + "step": 444750 + }, + { + "epoch": 3.8292011019283745, + "grad_norm": 3.59375, + "learning_rate": 2.594506374789365e-05, + "loss": 4.2017, + "step": 444800 + }, + { + "epoch": 3.8296315426997243, + "grad_norm": 3.234375, + "learning_rate": 2.5926860563489164e-05, + "loss": 3.9821, + "step": 444850 + }, + { + "epoch": 3.8300619834710745, + "grad_norm": 3.734375, + "learning_rate": 2.5908662815893757e-05, + "loss": 3.932, + "step": 444900 + }, + { + "epoch": 3.8304924242424243, + "grad_norm": 2.5, + "learning_rate": 2.58904705064431e-05, + "loss": 4.1595, + "step": 444950 + }, + { + "epoch": 3.830922865013774, + "grad_norm": 4.90625, + "learning_rate": 2.587228363647245e-05, + "loss": 4.1939, + "step": 445000 + }, + { + "epoch": 3.831353305785124, + "grad_norm": 4.875, + "learning_rate": 2.585410220731669e-05, + "loss": 3.9153, + "step": 445050 + }, + { + "epoch": 3.8317837465564737, + "grad_norm": 3.390625, + "learning_rate": 2.5835926220310304e-05, + "loss": 4.3097, + "step": 445100 + }, + { + "epoch": 3.832214187327824, + "grad_norm": 2.625, + "learning_rate": 2.5817755676787327e-05, + "loss": 4.4918, + "step": 445150 + }, + { + "epoch": 3.8326446280991737, + "grad_norm": 6.6875, + "learning_rate": 2.5799590578081513e-05, + "loss": 4.1698, + "step": 445200 + }, + { + "epoch": 3.8330750688705235, + "grad_norm": 3.75, + "learning_rate": 2.5781430925526107e-05, + "loss": 4.4249, + "step": 445250 + }, + { + "epoch": 3.8335055096418733, + "grad_norm": 3.0, + "learning_rate": 2.5763276720453975e-05, + "loss": 4.2956, + "step": 445300 + }, + { + "epoch": 3.833935950413223, + "grad_norm": 2.109375, + "learning_rate": 2.5745127964197613e-05, + "loss": 4.1823, + "step": 445350 + }, + { + "epoch": 3.834366391184573, + "grad_norm": 5.1875, + "learning_rate": 2.572698465808909e-05, + "loss": 4.2439, + "step": 445400 + }, + { + "epoch": 3.8347968319559227, + "grad_norm": 2.625, + "learning_rate": 2.5708846803460074e-05, + "loss": 4.2578, + "step": 445450 + }, + { + "epoch": 3.8352272727272725, + "grad_norm": 1.6875, + "learning_rate": 2.569071440164188e-05, + "loss": 4.0689, + "step": 445500 + }, + { + "epoch": 3.8356577134986227, + "grad_norm": 2.046875, + "learning_rate": 2.567258745396537e-05, + "loss": 4.2629, + "step": 445550 + }, + { + "epoch": 3.8360881542699725, + "grad_norm": 2.640625, + "learning_rate": 2.565446596176102e-05, + "loss": 4.3783, + "step": 445600 + }, + { + "epoch": 3.8365185950413223, + "grad_norm": 3.265625, + "learning_rate": 2.5636349926358916e-05, + "loss": 4.3314, + "step": 445650 + }, + { + "epoch": 3.836949035812672, + "grad_norm": 6.875, + "learning_rate": 2.5618239349088723e-05, + "loss": 4.6774, + "step": 445700 + }, + { + "epoch": 3.837379476584022, + "grad_norm": 2.21875, + "learning_rate": 2.5600134231279706e-05, + "loss": 4.1236, + "step": 445750 + }, + { + "epoch": 3.837809917355372, + "grad_norm": 4.53125, + "learning_rate": 2.5582034574260782e-05, + "loss": 4.3099, + "step": 445800 + }, + { + "epoch": 3.838240358126722, + "grad_norm": 3.40625, + "learning_rate": 2.5563940379360406e-05, + "loss": 4.1484, + "step": 445850 + }, + { + "epoch": 3.8386707988980717, + "grad_norm": 3.078125, + "learning_rate": 2.554585164790666e-05, + "loss": 4.4937, + "step": 445900 + }, + { + "epoch": 3.8391012396694215, + "grad_norm": 1.3359375, + "learning_rate": 2.55277683812272e-05, + "loss": 4.2706, + "step": 445950 + }, + { + "epoch": 3.8395316804407713, + "grad_norm": 3.21875, + "learning_rate": 2.550969058064928e-05, + "loss": 4.4372, + "step": 446000 + }, + { + "epoch": 3.839962121212121, + "grad_norm": 2.390625, + "learning_rate": 2.5491618247499837e-05, + "loss": 4.0736, + "step": 446050 + }, + { + "epoch": 3.840392561983471, + "grad_norm": 3.03125, + "learning_rate": 2.5473551383105344e-05, + "loss": 4.0506, + "step": 446100 + }, + { + "epoch": 3.8408230027548207, + "grad_norm": 2.640625, + "learning_rate": 2.5455489988791758e-05, + "loss": 4.1714, + "step": 446150 + }, + { + "epoch": 3.841253443526171, + "grad_norm": 4.21875, + "learning_rate": 2.5437434065884856e-05, + "loss": 4.0006, + "step": 446200 + }, + { + "epoch": 3.8416838842975207, + "grad_norm": 3.40625, + "learning_rate": 2.5419383615709868e-05, + "loss": 4.2765, + "step": 446250 + }, + { + "epoch": 3.8421143250688705, + "grad_norm": 2.703125, + "learning_rate": 2.540133863959162e-05, + "loss": 4.3294, + "step": 446300 + }, + { + "epoch": 3.8425447658402203, + "grad_norm": 4.84375, + "learning_rate": 2.538329913885469e-05, + "loss": 4.0776, + "step": 446350 + }, + { + "epoch": 3.84297520661157, + "grad_norm": 3.75, + "learning_rate": 2.536526511482301e-05, + "loss": 4.3104, + "step": 446400 + }, + { + "epoch": 3.8434056473829203, + "grad_norm": 2.453125, + "learning_rate": 2.5347236568820275e-05, + "loss": 4.222, + "step": 446450 + }, + { + "epoch": 3.84383608815427, + "grad_norm": 1.0703125, + "learning_rate": 2.5329213502169792e-05, + "loss": 4.2919, + "step": 446500 + }, + { + "epoch": 3.84426652892562, + "grad_norm": 1.4609375, + "learning_rate": 2.5311195916194362e-05, + "loss": 4.271, + "step": 446550 + }, + { + "epoch": 3.8446969696969697, + "grad_norm": 4.28125, + "learning_rate": 2.5293183812216458e-05, + "loss": 4.2928, + "step": 446600 + }, + { + "epoch": 3.8451274104683195, + "grad_norm": 3.09375, + "learning_rate": 2.5275177191558143e-05, + "loss": 3.933, + "step": 446650 + }, + { + "epoch": 3.8455578512396693, + "grad_norm": 4.84375, + "learning_rate": 2.5257176055540998e-05, + "loss": 3.9075, + "step": 446700 + }, + { + "epoch": 3.845988292011019, + "grad_norm": 4.25, + "learning_rate": 2.523918040548635e-05, + "loss": 4.2498, + "step": 446750 + }, + { + "epoch": 3.846418732782369, + "grad_norm": 3.21875, + "learning_rate": 2.5221190242715022e-05, + "loss": 4.2309, + "step": 446800 + }, + { + "epoch": 3.846849173553719, + "grad_norm": 3.515625, + "learning_rate": 2.5203205568547427e-05, + "loss": 4.3565, + "step": 446850 + }, + { + "epoch": 3.847279614325069, + "grad_norm": 2.1875, + "learning_rate": 2.5185226384303617e-05, + "loss": 4.1702, + "step": 446900 + }, + { + "epoch": 3.8477100550964187, + "grad_norm": 4.3125, + "learning_rate": 2.5167252691303223e-05, + "loss": 4.217, + "step": 446950 + }, + { + "epoch": 3.8481404958677685, + "grad_norm": 2.25, + "learning_rate": 2.5149284490865445e-05, + "loss": 4.3949, + "step": 447000 + }, + { + "epoch": 3.8481404958677685, + "eval_loss": 4.990220546722412, + "eval_runtime": 23.9495, + "eval_samples_per_second": 26.723, + "eval_steps_per_second": 13.361, + "eval_tts_loss": 7.46313589566455, + "step": 447000 + }, + { + "epoch": 3.8485709366391183, + "grad_norm": 4.09375, + "learning_rate": 2.513132178430917e-05, + "loss": 4.2625, + "step": 447050 + }, + { + "epoch": 3.8490013774104685, + "grad_norm": 3.453125, + "learning_rate": 2.511336457295279e-05, + "loss": 4.3678, + "step": 447100 + }, + { + "epoch": 3.8494318181818183, + "grad_norm": 3.375, + "learning_rate": 2.509541285811432e-05, + "loss": 4.1511, + "step": 447150 + }, + { + "epoch": 3.849862258953168, + "grad_norm": 4.59375, + "learning_rate": 2.5077466641111403e-05, + "loss": 4.3171, + "step": 447200 + }, + { + "epoch": 3.850292699724518, + "grad_norm": 3.59375, + "learning_rate": 2.5059525923261218e-05, + "loss": 4.447, + "step": 447250 + }, + { + "epoch": 3.8507231404958677, + "grad_norm": 3.5625, + "learning_rate": 2.504159070588057e-05, + "loss": 4.1131, + "step": 447300 + }, + { + "epoch": 3.8511535812672175, + "grad_norm": 4.625, + "learning_rate": 2.5023660990285947e-05, + "loss": 4.2023, + "step": 447350 + }, + { + "epoch": 3.8515840220385673, + "grad_norm": 5.1875, + "learning_rate": 2.500573677779323e-05, + "loss": 3.9092, + "step": 447400 + }, + { + "epoch": 3.852014462809917, + "grad_norm": 2.796875, + "learning_rate": 2.4987818069718104e-05, + "loss": 4.4807, + "step": 447450 + }, + { + "epoch": 3.8524449035812673, + "grad_norm": 3.28125, + "learning_rate": 2.496990486737575e-05, + "loss": 4.4532, + "step": 447500 + }, + { + "epoch": 3.852875344352617, + "grad_norm": 5.21875, + "learning_rate": 2.495199717208091e-05, + "loss": 4.6168, + "step": 447550 + }, + { + "epoch": 3.853305785123967, + "grad_norm": 4.15625, + "learning_rate": 2.4934094985148038e-05, + "loss": 4.3396, + "step": 447600 + }, + { + "epoch": 3.8537362258953167, + "grad_norm": 2.0, + "learning_rate": 2.4916198307891115e-05, + "loss": 4.4078, + "step": 447650 + }, + { + "epoch": 3.8541666666666665, + "grad_norm": 2.125, + "learning_rate": 2.4898307141623632e-05, + "loss": 4.1776, + "step": 447700 + }, + { + "epoch": 3.8545971074380168, + "grad_norm": 2.59375, + "learning_rate": 2.4880421487658844e-05, + "loss": 4.1043, + "step": 447750 + }, + { + "epoch": 3.8550275482093666, + "grad_norm": 4.59375, + "learning_rate": 2.48625413473095e-05, + "loss": 4.1875, + "step": 447800 + }, + { + "epoch": 3.8554579889807163, + "grad_norm": 3.90625, + "learning_rate": 2.4844666721887932e-05, + "loss": 4.36, + "step": 447850 + }, + { + "epoch": 3.855888429752066, + "grad_norm": 3.71875, + "learning_rate": 2.4826797612706178e-05, + "loss": 4.6485, + "step": 447900 + }, + { + "epoch": 3.856318870523416, + "grad_norm": 5.28125, + "learning_rate": 2.4808934021075713e-05, + "loss": 4.0192, + "step": 447950 + }, + { + "epoch": 3.8567493112947657, + "grad_norm": 2.921875, + "learning_rate": 2.4791075948307685e-05, + "loss": 4.0292, + "step": 448000 + }, + { + "epoch": 3.8571797520661155, + "grad_norm": 2.078125, + "learning_rate": 2.4773223395712896e-05, + "loss": 4.2331, + "step": 448050 + }, + { + "epoch": 3.8576101928374653, + "grad_norm": 3.46875, + "learning_rate": 2.4755376364601647e-05, + "loss": 4.1477, + "step": 448100 + }, + { + "epoch": 3.8580406336088156, + "grad_norm": 3.625, + "learning_rate": 2.4737534856283893e-05, + "loss": 4.175, + "step": 448150 + }, + { + "epoch": 3.8584710743801653, + "grad_norm": 4.46875, + "learning_rate": 2.471969887206914e-05, + "loss": 4.117, + "step": 448200 + }, + { + "epoch": 3.858901515151515, + "grad_norm": 3.09375, + "learning_rate": 2.4701868413266495e-05, + "loss": 4.342, + "step": 448250 + }, + { + "epoch": 3.859331955922865, + "grad_norm": 3.609375, + "learning_rate": 2.4684043481184727e-05, + "loss": 4.3347, + "step": 448300 + }, + { + "epoch": 3.8597623966942147, + "grad_norm": 3.53125, + "learning_rate": 2.4666224077132118e-05, + "loss": 4.3601, + "step": 448350 + }, + { + "epoch": 3.860192837465565, + "grad_norm": 2.109375, + "learning_rate": 2.464841020241658e-05, + "loss": 4.233, + "step": 448400 + }, + { + "epoch": 3.8606232782369148, + "grad_norm": 3.34375, + "learning_rate": 2.4630601858345615e-05, + "loss": 4.203, + "step": 448450 + }, + { + "epoch": 3.8610537190082646, + "grad_norm": 2.875, + "learning_rate": 2.4612799046226308e-05, + "loss": 4.5164, + "step": 448500 + }, + { + "epoch": 3.8614841597796143, + "grad_norm": 3.546875, + "learning_rate": 2.459500176736532e-05, + "loss": 4.2748, + "step": 448550 + }, + { + "epoch": 3.861914600550964, + "grad_norm": 4.34375, + "learning_rate": 2.4577210023068996e-05, + "loss": 4.1038, + "step": 448600 + }, + { + "epoch": 3.862345041322314, + "grad_norm": 3.15625, + "learning_rate": 2.455942381464321e-05, + "loss": 4.2417, + "step": 448650 + }, + { + "epoch": 3.8627754820936637, + "grad_norm": 2.953125, + "learning_rate": 2.4541643143393344e-05, + "loss": 4.1249, + "step": 448700 + }, + { + "epoch": 3.8632059228650135, + "grad_norm": 1.6796875, + "learning_rate": 2.452386801062454e-05, + "loss": 4.2709, + "step": 448750 + }, + { + "epoch": 3.8636363636363638, + "grad_norm": 3.3125, + "learning_rate": 2.4506098417641443e-05, + "loss": 4.4672, + "step": 448800 + }, + { + "epoch": 3.8640668044077136, + "grad_norm": 3.453125, + "learning_rate": 2.4488334365748254e-05, + "loss": 4.2192, + "step": 448850 + }, + { + "epoch": 3.8644972451790633, + "grad_norm": 2.0625, + "learning_rate": 2.4470575856248924e-05, + "loss": 3.8423, + "step": 448900 + }, + { + "epoch": 3.864927685950413, + "grad_norm": 3.109375, + "learning_rate": 2.4452822890446758e-05, + "loss": 4.5047, + "step": 448950 + }, + { + "epoch": 3.865358126721763, + "grad_norm": 2.296875, + "learning_rate": 2.4435075469644876e-05, + "loss": 4.6898, + "step": 449000 + }, + { + "epoch": 3.865788567493113, + "grad_norm": 2.421875, + "learning_rate": 2.4417333595145875e-05, + "loss": 4.2188, + "step": 449050 + }, + { + "epoch": 3.866219008264463, + "grad_norm": 4.34375, + "learning_rate": 2.4399597268251927e-05, + "loss": 4.6614, + "step": 449100 + }, + { + "epoch": 3.8666494490358128, + "grad_norm": 3.96875, + "learning_rate": 2.4381866490264948e-05, + "loss": 3.9614, + "step": 449150 + }, + { + "epoch": 3.8670798898071626, + "grad_norm": 3.859375, + "learning_rate": 2.4364141262486252e-05, + "loss": 4.0229, + "step": 449200 + }, + { + "epoch": 3.8675103305785123, + "grad_norm": 3.125, + "learning_rate": 2.4346421586216807e-05, + "loss": 4.5416, + "step": 449250 + }, + { + "epoch": 3.867940771349862, + "grad_norm": 5.34375, + "learning_rate": 2.4328707462757283e-05, + "loss": 4.5289, + "step": 449300 + }, + { + "epoch": 3.868371212121212, + "grad_norm": 3.375, + "learning_rate": 2.431099889340782e-05, + "loss": 4.4172, + "step": 449350 + }, + { + "epoch": 3.8688016528925617, + "grad_norm": 1.9921875, + "learning_rate": 2.429329587946816e-05, + "loss": 4.161, + "step": 449400 + }, + { + "epoch": 3.869232093663912, + "grad_norm": 3.171875, + "learning_rate": 2.4275598422237745e-05, + "loss": 4.1826, + "step": 449450 + }, + { + "epoch": 3.8696625344352618, + "grad_norm": 3.015625, + "learning_rate": 2.4257906523015462e-05, + "loss": 4.1467, + "step": 449500 + }, + { + "epoch": 3.8700929752066116, + "grad_norm": 4.375, + "learning_rate": 2.4240220183099836e-05, + "loss": 4.3832, + "step": 449550 + }, + { + "epoch": 3.8705234159779613, + "grad_norm": 3.875, + "learning_rate": 2.422253940378909e-05, + "loss": 4.4008, + "step": 449600 + }, + { + "epoch": 3.870953856749311, + "grad_norm": 5.75, + "learning_rate": 2.4204864186380904e-05, + "loss": 4.3042, + "step": 449650 + }, + { + "epoch": 3.8713842975206614, + "grad_norm": 2.84375, + "learning_rate": 2.4187194532172618e-05, + "loss": 4.382, + "step": 449700 + }, + { + "epoch": 3.871814738292011, + "grad_norm": 5.96875, + "learning_rate": 2.416953044246113e-05, + "loss": 4.2328, + "step": 449750 + }, + { + "epoch": 3.872245179063361, + "grad_norm": 4.125, + "learning_rate": 2.4151871918542923e-05, + "loss": 4.2891, + "step": 449800 + }, + { + "epoch": 3.8726756198347108, + "grad_norm": 5.53125, + "learning_rate": 2.413421896171416e-05, + "loss": 4.2952, + "step": 449850 + }, + { + "epoch": 3.8731060606060606, + "grad_norm": 2.484375, + "learning_rate": 2.4116571573270495e-05, + "loss": 4.3608, + "step": 449900 + }, + { + "epoch": 3.8735365013774103, + "grad_norm": 2.828125, + "learning_rate": 2.4098929754507215e-05, + "loss": 4.4103, + "step": 449950 + }, + { + "epoch": 3.87396694214876, + "grad_norm": 1.3125, + "learning_rate": 2.4081293506719172e-05, + "loss": 4.0878, + "step": 450000 + }, + { + "epoch": 3.87396694214876, + "eval_loss": 4.990390777587891, + "eval_runtime": 24.439, + "eval_samples_per_second": 26.188, + "eval_steps_per_second": 13.094, + "eval_tts_loss": 7.472823273668061, + "step": 450000 + }, + { + "epoch": 3.87439738292011, + "grad_norm": 2.09375, + "learning_rate": 2.4063662831200852e-05, + "loss": 4.4684, + "step": 450050 + }, + { + "epoch": 3.87482782369146, + "grad_norm": 3.9375, + "learning_rate": 2.404603772924626e-05, + "loss": 4.207, + "step": 450100 + }, + { + "epoch": 3.87525826446281, + "grad_norm": 4.09375, + "learning_rate": 2.4028418202149127e-05, + "loss": 4.1974, + "step": 450150 + }, + { + "epoch": 3.8756887052341598, + "grad_norm": 2.09375, + "learning_rate": 2.4010804251202655e-05, + "loss": 4.3152, + "step": 450200 + }, + { + "epoch": 3.8761191460055096, + "grad_norm": 1.3046875, + "learning_rate": 2.3993195877699605e-05, + "loss": 4.1947, + "step": 450250 + }, + { + "epoch": 3.8765495867768593, + "grad_norm": 4.625, + "learning_rate": 2.397559308293247e-05, + "loss": 4.3928, + "step": 450300 + }, + { + "epoch": 3.8769800275482096, + "grad_norm": 3.46875, + "learning_rate": 2.395799586819323e-05, + "loss": 4.4172, + "step": 450350 + }, + { + "epoch": 3.8774104683195594, + "grad_norm": 3.34375, + "learning_rate": 2.394040423477346e-05, + "loss": 4.3648, + "step": 450400 + }, + { + "epoch": 3.877840909090909, + "grad_norm": 4.375, + "learning_rate": 2.3922818183964423e-05, + "loss": 4.2479, + "step": 450450 + }, + { + "epoch": 3.878271349862259, + "grad_norm": 3.171875, + "learning_rate": 2.3905237717056796e-05, + "loss": 4.3042, + "step": 450500 + }, + { + "epoch": 3.8787017906336088, + "grad_norm": 3.21875, + "learning_rate": 2.388766283534103e-05, + "loss": 4.3755, + "step": 450550 + }, + { + "epoch": 3.8791322314049586, + "grad_norm": 3.265625, + "learning_rate": 2.3870093540107053e-05, + "loss": 4.7025, + "step": 450600 + }, + { + "epoch": 3.8795626721763083, + "grad_norm": 2.71875, + "learning_rate": 2.38525298326444e-05, + "loss": 3.9741, + "step": 450650 + }, + { + "epoch": 3.879993112947658, + "grad_norm": 3.0, + "learning_rate": 2.383497171424224e-05, + "loss": 3.7651, + "step": 450700 + }, + { + "epoch": 3.8804235537190084, + "grad_norm": 4.34375, + "learning_rate": 2.3817419186189262e-05, + "loss": 4.0537, + "step": 450750 + }, + { + "epoch": 3.880853994490358, + "grad_norm": 3.53125, + "learning_rate": 2.3799872249773793e-05, + "loss": 4.4243, + "step": 450800 + }, + { + "epoch": 3.881284435261708, + "grad_norm": 3.390625, + "learning_rate": 2.378233090628377e-05, + "loss": 3.9407, + "step": 450850 + }, + { + "epoch": 3.8817148760330578, + "grad_norm": 3.90625, + "learning_rate": 2.376479515700668e-05, + "loss": 4.3903, + "step": 450900 + }, + { + "epoch": 3.8821453168044076, + "grad_norm": 2.90625, + "learning_rate": 2.374726500322959e-05, + "loss": 4.3274, + "step": 450950 + }, + { + "epoch": 3.882575757575758, + "grad_norm": 2.390625, + "learning_rate": 2.37297404462392e-05, + "loss": 4.4075, + "step": 451000 + }, + { + "epoch": 3.8830061983471076, + "grad_norm": 5.28125, + "learning_rate": 2.371222148732175e-05, + "loss": 4.1965, + "step": 451050 + }, + { + "epoch": 3.8834366391184574, + "grad_norm": 3.71875, + "learning_rate": 2.369470812776309e-05, + "loss": 4.1834, + "step": 451100 + }, + { + "epoch": 3.883867079889807, + "grad_norm": 2.234375, + "learning_rate": 2.3677200368848697e-05, + "loss": 4.2672, + "step": 451150 + }, + { + "epoch": 3.884297520661157, + "grad_norm": 2.734375, + "learning_rate": 2.3659698211863592e-05, + "loss": 4.3593, + "step": 451200 + }, + { + "epoch": 3.8847279614325068, + "grad_norm": 2.875, + "learning_rate": 2.3642201658092377e-05, + "loss": 4.5176, + "step": 451250 + }, + { + "epoch": 3.8851584022038566, + "grad_norm": 3.21875, + "learning_rate": 2.3624710708819276e-05, + "loss": 4.2402, + "step": 451300 + }, + { + "epoch": 3.8855888429752063, + "grad_norm": 5.0, + "learning_rate": 2.360722536532809e-05, + "loss": 3.8982, + "step": 451350 + }, + { + "epoch": 3.8860192837465566, + "grad_norm": 2.828125, + "learning_rate": 2.358974562890216e-05, + "loss": 4.46, + "step": 451400 + }, + { + "epoch": 3.8864497245179064, + "grad_norm": 2.953125, + "learning_rate": 2.357227150082454e-05, + "loss": 4.2613, + "step": 451450 + }, + { + "epoch": 3.886880165289256, + "grad_norm": 3.359375, + "learning_rate": 2.3554802982377742e-05, + "loss": 4.3461, + "step": 451500 + }, + { + "epoch": 3.887310606060606, + "grad_norm": 3.03125, + "learning_rate": 2.3537340074843938e-05, + "loss": 4.2654, + "step": 451550 + }, + { + "epoch": 3.8877410468319558, + "grad_norm": 5.28125, + "learning_rate": 2.3519882779504854e-05, + "loss": 4.5663, + "step": 451600 + }, + { + "epoch": 3.888171487603306, + "grad_norm": 3.734375, + "learning_rate": 2.3502431097641796e-05, + "loss": 4.2362, + "step": 451650 + }, + { + "epoch": 3.888601928374656, + "grad_norm": 2.46875, + "learning_rate": 2.3484985030535733e-05, + "loss": 4.3701, + "step": 451700 + }, + { + "epoch": 3.8890323691460056, + "grad_norm": 1.5859375, + "learning_rate": 2.346754457946717e-05, + "loss": 4.3001, + "step": 451750 + }, + { + "epoch": 3.8894628099173554, + "grad_norm": 1.6640625, + "learning_rate": 2.345010974571612e-05, + "loss": 4.0906, + "step": 451800 + }, + { + "epoch": 3.889893250688705, + "grad_norm": 5.90625, + "learning_rate": 2.3432680530562335e-05, + "loss": 4.6358, + "step": 451850 + }, + { + "epoch": 3.890323691460055, + "grad_norm": 3.296875, + "learning_rate": 2.341525693528507e-05, + "loss": 4.1502, + "step": 451900 + }, + { + "epoch": 3.8907541322314048, + "grad_norm": 3.796875, + "learning_rate": 2.339783896116313e-05, + "loss": 4.323, + "step": 451950 + }, + { + "epoch": 3.8911845730027546, + "grad_norm": 3.0625, + "learning_rate": 2.3380426609475058e-05, + "loss": 4.4724, + "step": 452000 + }, + { + "epoch": 3.891615013774105, + "grad_norm": 2.8125, + "learning_rate": 2.336301988149876e-05, + "loss": 4.3725, + "step": 452050 + }, + { + "epoch": 3.8920454545454546, + "grad_norm": 2.921875, + "learning_rate": 2.3345618778511947e-05, + "loss": 4.1476, + "step": 452100 + }, + { + "epoch": 3.8924758953168044, + "grad_norm": 2.53125, + "learning_rate": 2.3328223301791796e-05, + "loss": 4.4637, + "step": 452150 + }, + { + "epoch": 3.892906336088154, + "grad_norm": 6.25, + "learning_rate": 2.331083345261508e-05, + "loss": 4.5984, + "step": 452200 + }, + { + "epoch": 3.893336776859504, + "grad_norm": 4.46875, + "learning_rate": 2.329344923225819e-05, + "loss": 4.0695, + "step": 452250 + }, + { + "epoch": 3.893767217630854, + "grad_norm": 2.34375, + "learning_rate": 2.3276070641997093e-05, + "loss": 4.1813, + "step": 452300 + }, + { + "epoch": 3.894197658402204, + "grad_norm": 5.15625, + "learning_rate": 2.32586976831073e-05, + "loss": 4.3672, + "step": 452350 + }, + { + "epoch": 3.894628099173554, + "grad_norm": 5.0, + "learning_rate": 2.3241330356864023e-05, + "loss": 4.2152, + "step": 452400 + }, + { + "epoch": 3.8950585399449036, + "grad_norm": 2.390625, + "learning_rate": 2.322396866454194e-05, + "loss": 4.3901, + "step": 452450 + }, + { + "epoch": 3.8954889807162534, + "grad_norm": 2.953125, + "learning_rate": 2.320661260741537e-05, + "loss": 4.0748, + "step": 452500 + }, + { + "epoch": 3.895919421487603, + "grad_norm": 2.25, + "learning_rate": 2.318926218675821e-05, + "loss": 4.2602, + "step": 452550 + }, + { + "epoch": 3.896349862258953, + "grad_norm": 5.625, + "learning_rate": 2.3171917403843934e-05, + "loss": 4.8482, + "step": 452600 + }, + { + "epoch": 3.8967803030303028, + "grad_norm": 3.203125, + "learning_rate": 2.3154578259945602e-05, + "loss": 4.2367, + "step": 452650 + }, + { + "epoch": 3.897210743801653, + "grad_norm": 4.8125, + "learning_rate": 2.313724475633592e-05, + "loss": 4.4015, + "step": 452700 + }, + { + "epoch": 3.897641184573003, + "grad_norm": 1.4296875, + "learning_rate": 2.31199168942871e-05, + "loss": 4.5502, + "step": 452750 + }, + { + "epoch": 3.8980716253443526, + "grad_norm": 4.125, + "learning_rate": 2.3102594675070956e-05, + "loss": 4.15, + "step": 452800 + }, + { + "epoch": 3.8985020661157024, + "grad_norm": 1.6484375, + "learning_rate": 2.3085278099958918e-05, + "loss": 4.4132, + "step": 452850 + }, + { + "epoch": 3.8989325068870526, + "grad_norm": 1.8671875, + "learning_rate": 2.3067967170221984e-05, + "loss": 4.6712, + "step": 452900 + }, + { + "epoch": 3.8993629476584024, + "grad_norm": 2.0625, + "learning_rate": 2.305066188713071e-05, + "loss": 4.1893, + "step": 452950 + }, + { + "epoch": 3.899793388429752, + "grad_norm": 3.421875, + "learning_rate": 2.3033362251955315e-05, + "loss": 4.1439, + "step": 453000 + }, + { + "epoch": 3.899793388429752, + "eval_loss": 4.99050235748291, + "eval_runtime": 24.212, + "eval_samples_per_second": 26.433, + "eval_steps_per_second": 13.217, + "eval_tts_loss": 7.474577586485744, + "step": 453000 + }, + { + "epoch": 3.900223829201102, + "grad_norm": 3.046875, + "learning_rate": 2.301606826596553e-05, + "loss": 4.1552, + "step": 453050 + }, + { + "epoch": 3.900654269972452, + "grad_norm": 3.015625, + "learning_rate": 2.2998779930430703e-05, + "loss": 4.2096, + "step": 453100 + }, + { + "epoch": 3.9010847107438016, + "grad_norm": 4.125, + "learning_rate": 2.2981497246619767e-05, + "loss": 4.3103, + "step": 453150 + }, + { + "epoch": 3.9015151515151514, + "grad_norm": 2.921875, + "learning_rate": 2.296422021580118e-05, + "loss": 4.1876, + "step": 453200 + }, + { + "epoch": 3.901945592286501, + "grad_norm": 2.953125, + "learning_rate": 2.294694883924312e-05, + "loss": 4.2966, + "step": 453250 + }, + { + "epoch": 3.902376033057851, + "grad_norm": 3.671875, + "learning_rate": 2.292968311821325e-05, + "loss": 4.3407, + "step": 453300 + }, + { + "epoch": 3.902806473829201, + "grad_norm": 1.328125, + "learning_rate": 2.291242305397877e-05, + "loss": 3.5915, + "step": 453350 + }, + { + "epoch": 3.903236914600551, + "grad_norm": 4.65625, + "learning_rate": 2.289516864780661e-05, + "loss": 4.1179, + "step": 453400 + }, + { + "epoch": 3.903667355371901, + "grad_norm": 1.296875, + "learning_rate": 2.2877919900963174e-05, + "loss": 4.1593, + "step": 453450 + }, + { + "epoch": 3.9040977961432506, + "grad_norm": 3.21875, + "learning_rate": 2.2860676814714453e-05, + "loss": 4.4113, + "step": 453500 + }, + { + "epoch": 3.904528236914601, + "grad_norm": 3.296875, + "learning_rate": 2.2843439390326137e-05, + "loss": 4.5458, + "step": 453550 + }, + { + "epoch": 3.9049586776859506, + "grad_norm": 2.21875, + "learning_rate": 2.2826207629063344e-05, + "loss": 4.3653, + "step": 453600 + }, + { + "epoch": 3.9053891184573004, + "grad_norm": 4.9375, + "learning_rate": 2.280898153219083e-05, + "loss": 4.6557, + "step": 453650 + }, + { + "epoch": 3.90581955922865, + "grad_norm": 3.453125, + "learning_rate": 2.279176110097303e-05, + "loss": 4.3007, + "step": 453700 + }, + { + "epoch": 3.90625, + "grad_norm": 4.46875, + "learning_rate": 2.277454633667384e-05, + "loss": 4.5776, + "step": 453750 + }, + { + "epoch": 3.90668044077135, + "grad_norm": 3.578125, + "learning_rate": 2.27573372405568e-05, + "loss": 4.2157, + "step": 453800 + }, + { + "epoch": 3.9071108815426996, + "grad_norm": 5.125, + "learning_rate": 2.2740133813885e-05, + "loss": 4.2166, + "step": 453850 + }, + { + "epoch": 3.9075413223140494, + "grad_norm": 4.84375, + "learning_rate": 2.272293605792114e-05, + "loss": 4.2229, + "step": 453900 + }, + { + "epoch": 3.907971763085399, + "grad_norm": 3.46875, + "learning_rate": 2.270574397392753e-05, + "loss": 4.0801, + "step": 453950 + }, + { + "epoch": 3.9084022038567494, + "grad_norm": 2.46875, + "learning_rate": 2.2688557563166003e-05, + "loss": 4.2192, + "step": 454000 + }, + { + "epoch": 3.908832644628099, + "grad_norm": 5.1875, + "learning_rate": 2.2671376826898017e-05, + "loss": 4.4716, + "step": 454050 + }, + { + "epoch": 3.909263085399449, + "grad_norm": 1.109375, + "learning_rate": 2.2654201766384586e-05, + "loss": 4.3365, + "step": 454100 + }, + { + "epoch": 3.909693526170799, + "grad_norm": 3.1875, + "learning_rate": 2.2637032382886337e-05, + "loss": 4.5118, + "step": 454150 + }, + { + "epoch": 3.910123966942149, + "grad_norm": 1.8203125, + "learning_rate": 2.2619868677663437e-05, + "loss": 4.3146, + "step": 454200 + }, + { + "epoch": 3.910554407713499, + "grad_norm": 5.21875, + "learning_rate": 2.260271065197571e-05, + "loss": 4.2847, + "step": 454250 + }, + { + "epoch": 3.9109848484848486, + "grad_norm": 3.46875, + "learning_rate": 2.25855583070825e-05, + "loss": 4.5388, + "step": 454300 + }, + { + "epoch": 3.9114152892561984, + "grad_norm": 3.5, + "learning_rate": 2.256841164424275e-05, + "loss": 4.0971, + "step": 454350 + }, + { + "epoch": 3.911845730027548, + "grad_norm": 4.34375, + "learning_rate": 2.2551270664714984e-05, + "loss": 3.8285, + "step": 454400 + }, + { + "epoch": 3.912276170798898, + "grad_norm": 3.9375, + "learning_rate": 2.2534135369757316e-05, + "loss": 4.4057, + "step": 454450 + }, + { + "epoch": 3.912706611570248, + "grad_norm": 1.953125, + "learning_rate": 2.251700576062742e-05, + "loss": 4.1583, + "step": 454500 + }, + { + "epoch": 3.9131370523415976, + "grad_norm": 2.734375, + "learning_rate": 2.2499881838582637e-05, + "loss": 4.3322, + "step": 454550 + }, + { + "epoch": 3.9135674931129474, + "grad_norm": 4.09375, + "learning_rate": 2.248276360487973e-05, + "loss": 4.31, + "step": 454600 + }, + { + "epoch": 3.9139979338842976, + "grad_norm": 1.6953125, + "learning_rate": 2.2465651060775217e-05, + "loss": 4.0418, + "step": 454650 + }, + { + "epoch": 3.9144283746556474, + "grad_norm": 1.34375, + "learning_rate": 2.2448544207525102e-05, + "loss": 4.7029, + "step": 454700 + }, + { + "epoch": 3.914858815426997, + "grad_norm": 2.546875, + "learning_rate": 2.2431443046384948e-05, + "loss": 4.0764, + "step": 454750 + }, + { + "epoch": 3.915289256198347, + "grad_norm": 3.625, + "learning_rate": 2.2414347578610017e-05, + "loss": 3.9479, + "step": 454800 + }, + { + "epoch": 3.9157196969696972, + "grad_norm": 1.984375, + "learning_rate": 2.2397257805455074e-05, + "loss": 4.3572, + "step": 454850 + }, + { + "epoch": 3.916150137741047, + "grad_norm": 4.78125, + "learning_rate": 2.238017372817438e-05, + "loss": 4.5265, + "step": 454900 + }, + { + "epoch": 3.916580578512397, + "grad_norm": 3.25, + "learning_rate": 2.2363095348021968e-05, + "loss": 4.1992, + "step": 454950 + }, + { + "epoch": 3.9170110192837466, + "grad_norm": 2.390625, + "learning_rate": 2.234602266625131e-05, + "loss": 4.07, + "step": 455000 + }, + { + "epoch": 3.9174414600550964, + "grad_norm": 3.046875, + "learning_rate": 2.2328955684115483e-05, + "loss": 3.9802, + "step": 455050 + }, + { + "epoch": 3.917871900826446, + "grad_norm": 2.75, + "learning_rate": 2.231189440286725e-05, + "loss": 4.4272, + "step": 455100 + }, + { + "epoch": 3.918302341597796, + "grad_norm": 3.90625, + "learning_rate": 2.229483882375879e-05, + "loss": 4.3121, + "step": 455150 + }, + { + "epoch": 3.918732782369146, + "grad_norm": 4.03125, + "learning_rate": 2.2277788948041944e-05, + "loss": 4.4964, + "step": 455200 + }, + { + "epoch": 3.919163223140496, + "grad_norm": 2.84375, + "learning_rate": 2.22607447769682e-05, + "loss": 4.1009, + "step": 455250 + }, + { + "epoch": 3.919593663911846, + "grad_norm": 3.90625, + "learning_rate": 2.2243706311788527e-05, + "loss": 4.0371, + "step": 455300 + }, + { + "epoch": 3.9200241046831956, + "grad_norm": 3.546875, + "learning_rate": 2.2226673553753506e-05, + "loss": 4.6052, + "step": 455350 + }, + { + "epoch": 3.9204545454545454, + "grad_norm": 3.296875, + "learning_rate": 2.2209646504113323e-05, + "loss": 4.0969, + "step": 455400 + }, + { + "epoch": 3.920884986225895, + "grad_norm": 4.25, + "learning_rate": 2.2192625164117685e-05, + "loss": 4.1325, + "step": 455450 + }, + { + "epoch": 3.9213154269972454, + "grad_norm": 4.1875, + "learning_rate": 2.2175609535015984e-05, + "loss": 4.3922, + "step": 455500 + }, + { + "epoch": 3.9217458677685952, + "grad_norm": 3.21875, + "learning_rate": 2.2158599618057098e-05, + "loss": 4.2107, + "step": 455550 + }, + { + "epoch": 3.922176308539945, + "grad_norm": 3.125, + "learning_rate": 2.2141595414489513e-05, + "loss": 4.263, + "step": 455600 + }, + { + "epoch": 3.922606749311295, + "grad_norm": 1.3828125, + "learning_rate": 2.2124596925561315e-05, + "loss": 4.0325, + "step": 455650 + }, + { + "epoch": 3.9230371900826446, + "grad_norm": 3.25, + "learning_rate": 2.2107604152520147e-05, + "loss": 4.6308, + "step": 455700 + }, + { + "epoch": 3.9234676308539944, + "grad_norm": 3.046875, + "learning_rate": 2.209061709661322e-05, + "loss": 4.3302, + "step": 455750 + }, + { + "epoch": 3.923898071625344, + "grad_norm": 3.15625, + "learning_rate": 2.2073635759087397e-05, + "loss": 4.3726, + "step": 455800 + }, + { + "epoch": 3.924328512396694, + "grad_norm": 2.921875, + "learning_rate": 2.2056660141189068e-05, + "loss": 4.2595, + "step": 455850 + }, + { + "epoch": 3.9247589531680442, + "grad_norm": 4.9375, + "learning_rate": 2.2039690244164136e-05, + "loss": 3.8418, + "step": 455900 + }, + { + "epoch": 3.925189393939394, + "grad_norm": 2.703125, + "learning_rate": 2.2022726069258238e-05, + "loss": 4.1195, + "step": 455950 + }, + { + "epoch": 3.925619834710744, + "grad_norm": 3.96875, + "learning_rate": 2.2005767617716465e-05, + "loss": 4.5111, + "step": 456000 + }, + { + "epoch": 3.925619834710744, + "eval_loss": 4.989672660827637, + "eval_runtime": 24.7502, + "eval_samples_per_second": 25.858, + "eval_steps_per_second": 12.929, + "eval_tts_loss": 7.461466540797048, + "step": 456000 + }, + { + "epoch": 3.9260502754820936, + "grad_norm": 5.0, + "learning_rate": 2.1988814890783517e-05, + "loss": 4.4894, + "step": 456050 + }, + { + "epoch": 3.9264807162534434, + "grad_norm": 2.5, + "learning_rate": 2.1971867889703755e-05, + "loss": 4.5378, + "step": 456100 + }, + { + "epoch": 3.9269111570247937, + "grad_norm": 1.8125, + "learning_rate": 2.195492661572096e-05, + "loss": 4.2555, + "step": 456150 + }, + { + "epoch": 3.9273415977961434, + "grad_norm": 4.875, + "learning_rate": 2.193799107007867e-05, + "loss": 4.3128, + "step": 456200 + }, + { + "epoch": 3.9277720385674932, + "grad_norm": 1.2421875, + "learning_rate": 2.192106125401987e-05, + "loss": 4.3103, + "step": 456250 + }, + { + "epoch": 3.928202479338843, + "grad_norm": 2.21875, + "learning_rate": 2.1904137168787155e-05, + "loss": 4.1488, + "step": 456300 + }, + { + "epoch": 3.928632920110193, + "grad_norm": 4.09375, + "learning_rate": 2.18872188156228e-05, + "loss": 4.0685, + "step": 456350 + }, + { + "epoch": 3.9290633608815426, + "grad_norm": 1.46875, + "learning_rate": 2.1870306195768497e-05, + "loss": 4.4033, + "step": 456400 + }, + { + "epoch": 3.9294938016528924, + "grad_norm": 3.21875, + "learning_rate": 2.1853399310465595e-05, + "loss": 4.2641, + "step": 456450 + }, + { + "epoch": 3.929924242424242, + "grad_norm": 2.21875, + "learning_rate": 2.1836498160955065e-05, + "loss": 4.382, + "step": 456500 + }, + { + "epoch": 3.9303546831955924, + "grad_norm": 1.6875, + "learning_rate": 2.1819602748477397e-05, + "loss": 4.3428, + "step": 456550 + }, + { + "epoch": 3.9307851239669422, + "grad_norm": 4.125, + "learning_rate": 2.1802713074272653e-05, + "loss": 4.3731, + "step": 456600 + }, + { + "epoch": 3.931215564738292, + "grad_norm": 5.15625, + "learning_rate": 2.1785829139580584e-05, + "loss": 4.4775, + "step": 456650 + }, + { + "epoch": 3.931646005509642, + "grad_norm": 2.078125, + "learning_rate": 2.1768950945640343e-05, + "loss": 4.4326, + "step": 456700 + }, + { + "epoch": 3.9320764462809916, + "grad_norm": 1.7578125, + "learning_rate": 2.1752078493690752e-05, + "loss": 4.5349, + "step": 456750 + }, + { + "epoch": 3.932506887052342, + "grad_norm": 2.796875, + "learning_rate": 2.173521178497028e-05, + "loss": 4.2966, + "step": 456800 + }, + { + "epoch": 3.9329373278236917, + "grad_norm": 3.09375, + "learning_rate": 2.171835082071686e-05, + "loss": 4.4935, + "step": 456850 + }, + { + "epoch": 3.9333677685950414, + "grad_norm": 3.21875, + "learning_rate": 2.1701495602168077e-05, + "loss": 4.2956, + "step": 456900 + }, + { + "epoch": 3.9337982093663912, + "grad_norm": 2.9375, + "learning_rate": 2.1684646130561047e-05, + "loss": 4.4077, + "step": 456950 + }, + { + "epoch": 3.934228650137741, + "grad_norm": 3.03125, + "learning_rate": 2.1667802407132466e-05, + "loss": 3.9705, + "step": 457000 + }, + { + "epoch": 3.934659090909091, + "grad_norm": 2.078125, + "learning_rate": 2.165096443311867e-05, + "loss": 4.5346, + "step": 457050 + }, + { + "epoch": 3.9350895316804406, + "grad_norm": 3.234375, + "learning_rate": 2.163413220975552e-05, + "loss": 4.1243, + "step": 457100 + }, + { + "epoch": 3.9355199724517904, + "grad_norm": 3.21875, + "learning_rate": 2.1617305738278458e-05, + "loss": 4.3102, + "step": 457150 + }, + { + "epoch": 3.9359504132231407, + "grad_norm": 2.921875, + "learning_rate": 2.16004850199225e-05, + "loss": 3.713, + "step": 457200 + }, + { + "epoch": 3.9363808539944904, + "grad_norm": 4.5625, + "learning_rate": 2.1583670055922277e-05, + "loss": 4.5762, + "step": 457250 + }, + { + "epoch": 3.9368112947658402, + "grad_norm": 3.0625, + "learning_rate": 2.1566860847511916e-05, + "loss": 4.5289, + "step": 457300 + }, + { + "epoch": 3.93724173553719, + "grad_norm": 1.703125, + "learning_rate": 2.155005739592525e-05, + "loss": 4.4189, + "step": 457350 + }, + { + "epoch": 3.93767217630854, + "grad_norm": 5.5, + "learning_rate": 2.1533259702395604e-05, + "loss": 4.616, + "step": 457400 + }, + { + "epoch": 3.93810261707989, + "grad_norm": 2.015625, + "learning_rate": 2.1516467768155812e-05, + "loss": 4.2837, + "step": 457450 + }, + { + "epoch": 3.93853305785124, + "grad_norm": 3.0625, + "learning_rate": 2.1499681594438457e-05, + "loss": 4.4649, + "step": 457500 + }, + { + "epoch": 3.9389634986225897, + "grad_norm": 2.671875, + "learning_rate": 2.148290118247558e-05, + "loss": 4.1393, + "step": 457550 + }, + { + "epoch": 3.9393939393939394, + "grad_norm": 5.90625, + "learning_rate": 2.1466126533498788e-05, + "loss": 4.0418, + "step": 457600 + }, + { + "epoch": 3.9398243801652892, + "grad_norm": 2.15625, + "learning_rate": 2.1449357648739398e-05, + "loss": 4.2382, + "step": 457650 + }, + { + "epoch": 3.940254820936639, + "grad_norm": 2.9375, + "learning_rate": 2.1432594529428087e-05, + "loss": 4.2597, + "step": 457700 + }, + { + "epoch": 3.940685261707989, + "grad_norm": 3.90625, + "learning_rate": 2.141583717679533e-05, + "loss": 4.3176, + "step": 457750 + }, + { + "epoch": 3.9411157024793386, + "grad_norm": 3.0, + "learning_rate": 2.1399085592071045e-05, + "loss": 4.5763, + "step": 457800 + }, + { + "epoch": 3.941546143250689, + "grad_norm": 4.34375, + "learning_rate": 2.1382339776484763e-05, + "loss": 4.7258, + "step": 457850 + }, + { + "epoch": 3.9419765840220387, + "grad_norm": 3.171875, + "learning_rate": 2.1365599731265595e-05, + "loss": 4.09, + "step": 457900 + }, + { + "epoch": 3.9424070247933884, + "grad_norm": 3.078125, + "learning_rate": 2.1348865457642207e-05, + "loss": 4.3835, + "step": 457950 + }, + { + "epoch": 3.9428374655647382, + "grad_norm": 2.40625, + "learning_rate": 2.133213695684286e-05, + "loss": 4.4301, + "step": 458000 + }, + { + "epoch": 3.943267906336088, + "grad_norm": 2.765625, + "learning_rate": 2.131541423009542e-05, + "loss": 4.5665, + "step": 458050 + }, + { + "epoch": 3.9436983471074383, + "grad_norm": 7.78125, + "learning_rate": 2.129869727862729e-05, + "loss": 4.2329, + "step": 458100 + }, + { + "epoch": 3.944128787878788, + "grad_norm": 2.75, + "learning_rate": 2.128198610366544e-05, + "loss": 4.1698, + "step": 458150 + }, + { + "epoch": 3.944559228650138, + "grad_norm": 2.34375, + "learning_rate": 2.1265280706436452e-05, + "loss": 4.2915, + "step": 458200 + }, + { + "epoch": 3.9449896694214877, + "grad_norm": 4.28125, + "learning_rate": 2.1248581088166453e-05, + "loss": 4.5038, + "step": 458250 + }, + { + "epoch": 3.9454201101928374, + "grad_norm": 2.328125, + "learning_rate": 2.1231887250081138e-05, + "loss": 4.23, + "step": 458300 + }, + { + "epoch": 3.9458505509641872, + "grad_norm": 3.3125, + "learning_rate": 2.1215199193405856e-05, + "loss": 4.443, + "step": 458350 + }, + { + "epoch": 3.946280991735537, + "grad_norm": 5.34375, + "learning_rate": 2.1198516919365428e-05, + "loss": 4.4481, + "step": 458400 + }, + { + "epoch": 3.946711432506887, + "grad_norm": 2.296875, + "learning_rate": 2.118184042918433e-05, + "loss": 4.4335, + "step": 458450 + }, + { + "epoch": 3.947141873278237, + "grad_norm": 3.5625, + "learning_rate": 2.1165169724086552e-05, + "loss": 4.648, + "step": 458500 + }, + { + "epoch": 3.947572314049587, + "grad_norm": 2.921875, + "learning_rate": 2.1148504805295677e-05, + "loss": 4.4054, + "step": 458550 + }, + { + "epoch": 3.9480027548209367, + "grad_norm": 3.25, + "learning_rate": 2.113184567403492e-05, + "loss": 4.26, + "step": 458600 + }, + { + "epoch": 3.9484331955922864, + "grad_norm": 1.71875, + "learning_rate": 2.111519233152701e-05, + "loss": 4.0183, + "step": 458650 + }, + { + "epoch": 3.9488636363636362, + "grad_norm": 8.75, + "learning_rate": 2.1098544778994255e-05, + "loss": 4.4697, + "step": 458700 + }, + { + "epoch": 3.9492940771349865, + "grad_norm": 1.3671875, + "learning_rate": 2.1081903017658543e-05, + "loss": 4.1582, + "step": 458750 + }, + { + "epoch": 3.9497245179063363, + "grad_norm": 3.21875, + "learning_rate": 2.1065267048741365e-05, + "loss": 3.7962, + "step": 458800 + }, + { + "epoch": 3.950154958677686, + "grad_norm": 4.9375, + "learning_rate": 2.104863687346371e-05, + "loss": 4.2527, + "step": 458850 + }, + { + "epoch": 3.950585399449036, + "grad_norm": 4.03125, + "learning_rate": 2.1032012493046282e-05, + "loss": 4.2759, + "step": 458900 + }, + { + "epoch": 3.9510158402203857, + "grad_norm": 4.1875, + "learning_rate": 2.1015393908709258e-05, + "loss": 4.2437, + "step": 458950 + }, + { + "epoch": 3.9514462809917354, + "grad_norm": 4.21875, + "learning_rate": 2.0998781121672316e-05, + "loss": 4.3284, + "step": 459000 + }, + { + "epoch": 3.9514462809917354, + "eval_loss": 4.989922046661377, + "eval_runtime": 23.8491, + "eval_samples_per_second": 26.835, + "eval_steps_per_second": 13.418, + "eval_tts_loss": 7.461940387165521, + "step": 459000 + }, + { + "epoch": 3.9518767217630852, + "grad_norm": 3.515625, + "learning_rate": 2.0982174133154907e-05, + "loss": 4.2978, + "step": 459050 + }, + { + "epoch": 3.952307162534435, + "grad_norm": 0.80078125, + "learning_rate": 2.09655729443759e-05, + "loss": 4.4284, + "step": 459100 + }, + { + "epoch": 3.9527376033057853, + "grad_norm": 1.90625, + "learning_rate": 2.094897755655376e-05, + "loss": 4.2241, + "step": 459150 + }, + { + "epoch": 3.953168044077135, + "grad_norm": 3.03125, + "learning_rate": 2.093238797090664e-05, + "loss": 4.4189, + "step": 459200 + }, + { + "epoch": 3.953598484848485, + "grad_norm": 2.984375, + "learning_rate": 2.0915804188652077e-05, + "loss": 4.0136, + "step": 459250 + }, + { + "epoch": 3.9540289256198347, + "grad_norm": 5.875, + "learning_rate": 2.089922621100735e-05, + "loss": 4.0621, + "step": 459300 + }, + { + "epoch": 3.9544593663911844, + "grad_norm": 2.515625, + "learning_rate": 2.088265403918923e-05, + "loss": 4.2556, + "step": 459350 + }, + { + "epoch": 3.9548898071625347, + "grad_norm": 3.75, + "learning_rate": 2.086608767441409e-05, + "loss": 4.1928, + "step": 459400 + }, + { + "epoch": 3.9553202479338845, + "grad_norm": 3.6875, + "learning_rate": 2.0849527117897836e-05, + "loss": 4.7572, + "step": 459450 + }, + { + "epoch": 3.9557506887052343, + "grad_norm": 3.453125, + "learning_rate": 2.0832972370856007e-05, + "loss": 4.3422, + "step": 459500 + }, + { + "epoch": 3.956181129476584, + "grad_norm": 4.21875, + "learning_rate": 2.0816423434503652e-05, + "loss": 4.443, + "step": 459550 + }, + { + "epoch": 3.956611570247934, + "grad_norm": 2.953125, + "learning_rate": 2.0799880310055464e-05, + "loss": 4.3839, + "step": 459600 + }, + { + "epoch": 3.9570420110192837, + "grad_norm": 3.953125, + "learning_rate": 2.0783342998725665e-05, + "loss": 4.6666, + "step": 459650 + }, + { + "epoch": 3.9574724517906334, + "grad_norm": 3.515625, + "learning_rate": 2.0766811501728046e-05, + "loss": 4.3964, + "step": 459700 + }, + { + "epoch": 3.9579028925619832, + "grad_norm": 1.8671875, + "learning_rate": 2.0750285820275996e-05, + "loss": 4.1742, + "step": 459750 + }, + { + "epoch": 3.9583333333333335, + "grad_norm": 3.484375, + "learning_rate": 2.073376595558245e-05, + "loss": 4.2851, + "step": 459800 + }, + { + "epoch": 3.9587637741046833, + "grad_norm": 1.484375, + "learning_rate": 2.0717251908859903e-05, + "loss": 4.3001, + "step": 459850 + }, + { + "epoch": 3.959194214876033, + "grad_norm": 2.921875, + "learning_rate": 2.0700743681320523e-05, + "loss": 4.4291, + "step": 459900 + }, + { + "epoch": 3.959624655647383, + "grad_norm": 3.796875, + "learning_rate": 2.0684241274175942e-05, + "loss": 4.3904, + "step": 459950 + }, + { + "epoch": 3.9600550964187327, + "grad_norm": 4.0625, + "learning_rate": 2.0667744688637392e-05, + "loss": 4.5331, + "step": 460000 + }, + { + "epoch": 3.960485537190083, + "grad_norm": 1.8125, + "learning_rate": 2.0651253925915694e-05, + "loss": 4.2643, + "step": 460050 + }, + { + "epoch": 3.9609159779614327, + "grad_norm": 1.25, + "learning_rate": 2.0634768987221244e-05, + "loss": 4.6239, + "step": 460100 + }, + { + "epoch": 3.9613464187327825, + "grad_norm": 2.0625, + "learning_rate": 2.061828987376395e-05, + "loss": 4.4271, + "step": 460150 + }, + { + "epoch": 3.9617768595041323, + "grad_norm": 2.890625, + "learning_rate": 2.060181658675345e-05, + "loss": 4.3318, + "step": 460200 + }, + { + "epoch": 3.962207300275482, + "grad_norm": 1.75, + "learning_rate": 2.0585349127398724e-05, + "loss": 4.1366, + "step": 460250 + }, + { + "epoch": 3.962637741046832, + "grad_norm": 3.1875, + "learning_rate": 2.056888749690854e-05, + "loss": 4.5682, + "step": 460300 + }, + { + "epoch": 3.9630681818181817, + "grad_norm": 3.09375, + "learning_rate": 2.0552431696491104e-05, + "loss": 4.5183, + "step": 460350 + }, + { + "epoch": 3.9634986225895315, + "grad_norm": 2.515625, + "learning_rate": 2.0535981727354225e-05, + "loss": 4.1679, + "step": 460400 + }, + { + "epoch": 3.9639290633608817, + "grad_norm": 3.125, + "learning_rate": 2.051953759070534e-05, + "loss": 4.325, + "step": 460450 + }, + { + "epoch": 3.9643595041322315, + "grad_norm": 4.75, + "learning_rate": 2.0503099287751415e-05, + "loss": 4.0851, + "step": 460500 + }, + { + "epoch": 3.9647899449035813, + "grad_norm": 2.0625, + "learning_rate": 2.0486666819698908e-05, + "loss": 4.4, + "step": 460550 + }, + { + "epoch": 3.965220385674931, + "grad_norm": 4.90625, + "learning_rate": 2.047024018775401e-05, + "loss": 4.413, + "step": 460600 + }, + { + "epoch": 3.965650826446281, + "grad_norm": 2.5625, + "learning_rate": 2.0453819393122366e-05, + "loss": 4.2945, + "step": 460650 + }, + { + "epoch": 3.966081267217631, + "grad_norm": 1.5078125, + "learning_rate": 2.04374044370092e-05, + "loss": 4.3787, + "step": 460700 + }, + { + "epoch": 3.966511707988981, + "grad_norm": 2.234375, + "learning_rate": 2.042099532061943e-05, + "loss": 4.2596, + "step": 460750 + }, + { + "epoch": 3.9669421487603307, + "grad_norm": 4.3125, + "learning_rate": 2.040459204515732e-05, + "loss": 4.3229, + "step": 460800 + }, + { + "epoch": 3.9673725895316805, + "grad_norm": 1.7265625, + "learning_rate": 2.0388194611826927e-05, + "loss": 4.5099, + "step": 460850 + }, + { + "epoch": 3.9678030303030303, + "grad_norm": 1.9609375, + "learning_rate": 2.037180302183177e-05, + "loss": 3.9855, + "step": 460900 + }, + { + "epoch": 3.96823347107438, + "grad_norm": 2.390625, + "learning_rate": 2.0355417276374943e-05, + "loss": 4.3092, + "step": 460950 + }, + { + "epoch": 3.96866391184573, + "grad_norm": 4.84375, + "learning_rate": 2.033903737665912e-05, + "loss": 4.1461, + "step": 461000 + }, + { + "epoch": 3.9690943526170797, + "grad_norm": 3.59375, + "learning_rate": 2.032266332388657e-05, + "loss": 4.4845, + "step": 461050 + }, + { + "epoch": 3.96952479338843, + "grad_norm": 6.46875, + "learning_rate": 2.0306295119259066e-05, + "loss": 4.2084, + "step": 461100 + }, + { + "epoch": 3.9699552341597797, + "grad_norm": 2.625, + "learning_rate": 2.028993276397807e-05, + "loss": 4.5187, + "step": 461150 + }, + { + "epoch": 3.9703856749311295, + "grad_norm": 2.28125, + "learning_rate": 2.02735762592445e-05, + "loss": 4.0516, + "step": 461200 + }, + { + "epoch": 3.9708161157024793, + "grad_norm": 3.546875, + "learning_rate": 2.02572256062589e-05, + "loss": 4.0382, + "step": 461250 + }, + { + "epoch": 3.971246556473829, + "grad_norm": 2.9375, + "learning_rate": 2.0240880806221375e-05, + "loss": 4.4745, + "step": 461300 + }, + { + "epoch": 3.9716769972451793, + "grad_norm": 3.3125, + "learning_rate": 2.022454186033158e-05, + "loss": 4.272, + "step": 461350 + }, + { + "epoch": 3.972107438016529, + "grad_norm": 3.078125, + "learning_rate": 2.020820876978875e-05, + "loss": 4.1312, + "step": 461400 + }, + { + "epoch": 3.972537878787879, + "grad_norm": 4.84375, + "learning_rate": 2.019188153579176e-05, + "loss": 4.3924, + "step": 461450 + }, + { + "epoch": 3.9729683195592287, + "grad_norm": 3.875, + "learning_rate": 2.017556015953893e-05, + "loss": 4.3548, + "step": 461500 + }, + { + "epoch": 3.9733987603305785, + "grad_norm": 4.59375, + "learning_rate": 2.015924464222826e-05, + "loss": 4.5869, + "step": 461550 + }, + { + "epoch": 3.9738292011019283, + "grad_norm": 4.71875, + "learning_rate": 2.0142934985057237e-05, + "loss": 4.448, + "step": 461600 + }, + { + "epoch": 3.974259641873278, + "grad_norm": 3.140625, + "learning_rate": 2.0126631189222965e-05, + "loss": 4.0536, + "step": 461650 + }, + { + "epoch": 3.974690082644628, + "grad_norm": 3.09375, + "learning_rate": 2.0110333255922097e-05, + "loss": 4.1835, + "step": 461700 + }, + { + "epoch": 3.975120523415978, + "grad_norm": 7.3125, + "learning_rate": 2.009404118635093e-05, + "loss": 4.3824, + "step": 461750 + }, + { + "epoch": 3.975550964187328, + "grad_norm": 2.59375, + "learning_rate": 2.0077754981705165e-05, + "loss": 4.5331, + "step": 461800 + }, + { + "epoch": 3.9759814049586777, + "grad_norm": 2.890625, + "learning_rate": 2.0061474643180255e-05, + "loss": 4.3659, + "step": 461850 + }, + { + "epoch": 3.9764118457300275, + "grad_norm": 5.625, + "learning_rate": 2.0045200171971113e-05, + "loss": 4.4113, + "step": 461900 + }, + { + "epoch": 3.9768422865013773, + "grad_norm": 1.6953125, + "learning_rate": 2.0028931569272225e-05, + "loss": 4.5685, + "step": 461950 + }, + { + "epoch": 3.9772727272727275, + "grad_norm": 3.453125, + "learning_rate": 2.0012668836277748e-05, + "loss": 4.7479, + "step": 462000 + }, + { + "epoch": 3.9772727272727275, + "eval_loss": 4.989577293395996, + "eval_runtime": 24.1819, + "eval_samples_per_second": 26.466, + "eval_steps_per_second": 13.233, + "eval_tts_loss": 7.4560609404651474, + "step": 462000 + }, + { + "epoch": 3.9777031680440773, + "grad_norm": 3.734375, + "learning_rate": 1.999641197418126e-05, + "loss": 4.0643, + "step": 462050 + }, + { + "epoch": 3.978133608815427, + "grad_norm": 3.734375, + "learning_rate": 1.9980160984175977e-05, + "loss": 4.1987, + "step": 462100 + }, + { + "epoch": 3.978564049586777, + "grad_norm": 4.875, + "learning_rate": 1.996391586745473e-05, + "loss": 4.4418, + "step": 462150 + }, + { + "epoch": 3.9789944903581267, + "grad_norm": 3.078125, + "learning_rate": 1.9947676625209855e-05, + "loss": 4.1893, + "step": 462200 + }, + { + "epoch": 3.9794249311294765, + "grad_norm": 3.84375, + "learning_rate": 1.9931443258633255e-05, + "loss": 4.4415, + "step": 462250 + }, + { + "epoch": 3.9798553719008263, + "grad_norm": 1.7109375, + "learning_rate": 1.9915215768916506e-05, + "loss": 4.2932, + "step": 462300 + }, + { + "epoch": 3.980285812672176, + "grad_norm": 3.96875, + "learning_rate": 1.9898994157250573e-05, + "loss": 4.2352, + "step": 462350 + }, + { + "epoch": 3.9807162534435263, + "grad_norm": 2.078125, + "learning_rate": 1.988277842482611e-05, + "loss": 4.3896, + "step": 462400 + }, + { + "epoch": 3.981146694214876, + "grad_norm": 2.6875, + "learning_rate": 1.9866568572833366e-05, + "loss": 4.3079, + "step": 462450 + }, + { + "epoch": 3.981577134986226, + "grad_norm": 5.71875, + "learning_rate": 1.9850364602462067e-05, + "loss": 4.3261, + "step": 462500 + }, + { + "epoch": 3.9820075757575757, + "grad_norm": 3.453125, + "learning_rate": 1.9834166514901565e-05, + "loss": 4.3331, + "step": 462550 + }, + { + "epoch": 3.9824380165289255, + "grad_norm": 3.125, + "learning_rate": 1.9817974311340757e-05, + "loss": 4.0866, + "step": 462600 + }, + { + "epoch": 3.9828684573002757, + "grad_norm": 3.203125, + "learning_rate": 1.980178799296809e-05, + "loss": 4.253, + "step": 462650 + }, + { + "epoch": 3.9832988980716255, + "grad_norm": 3.375, + "learning_rate": 1.9785607560971664e-05, + "loss": 4.3319, + "step": 462700 + }, + { + "epoch": 3.9837293388429753, + "grad_norm": 3.140625, + "learning_rate": 1.976943301653905e-05, + "loss": 4.4869, + "step": 462750 + }, + { + "epoch": 3.984159779614325, + "grad_norm": 3.15625, + "learning_rate": 1.975326436085745e-05, + "loss": 4.4756, + "step": 462800 + }, + { + "epoch": 3.984590220385675, + "grad_norm": 1.4453125, + "learning_rate": 1.9737101595113572e-05, + "loss": 4.0252, + "step": 462850 + }, + { + "epoch": 3.9850206611570247, + "grad_norm": 3.140625, + "learning_rate": 1.972094472049375e-05, + "loss": 4.3611, + "step": 462900 + }, + { + "epoch": 3.9854511019283745, + "grad_norm": 4.28125, + "learning_rate": 1.9704793738183846e-05, + "loss": 3.9808, + "step": 462950 + }, + { + "epoch": 3.9858815426997243, + "grad_norm": 3.578125, + "learning_rate": 1.9688648649369333e-05, + "loss": 4.3338, + "step": 463000 + }, + { + "epoch": 3.9863119834710745, + "grad_norm": 2.15625, + "learning_rate": 1.9672509455235255e-05, + "loss": 4.602, + "step": 463050 + }, + { + "epoch": 3.9867424242424243, + "grad_norm": 2.953125, + "learning_rate": 1.9656376156966093e-05, + "loss": 4.7624, + "step": 463100 + }, + { + "epoch": 3.987172865013774, + "grad_norm": 2.796875, + "learning_rate": 1.9640248755746082e-05, + "loss": 4.2008, + "step": 463150 + }, + { + "epoch": 3.987603305785124, + "grad_norm": 3.15625, + "learning_rate": 1.962412725275892e-05, + "loss": 4.4183, + "step": 463200 + }, + { + "epoch": 3.9880337465564737, + "grad_norm": 4.28125, + "learning_rate": 1.9608011649187853e-05, + "loss": 3.9213, + "step": 463250 + }, + { + "epoch": 3.988464187327824, + "grad_norm": 4.25, + "learning_rate": 1.9591901946215817e-05, + "loss": 4.0634, + "step": 463300 + }, + { + "epoch": 3.9888946280991737, + "grad_norm": 2.859375, + "learning_rate": 1.9575798145025127e-05, + "loss": 3.9527, + "step": 463350 + }, + { + "epoch": 3.9893250688705235, + "grad_norm": 3.375, + "learning_rate": 1.955970024679784e-05, + "loss": 4.4453, + "step": 463400 + }, + { + "epoch": 3.9897555096418733, + "grad_norm": 2.84375, + "learning_rate": 1.9543608252715484e-05, + "loss": 4.7095, + "step": 463450 + }, + { + "epoch": 3.990185950413223, + "grad_norm": 2.765625, + "learning_rate": 1.9527522163959144e-05, + "loss": 4.0063, + "step": 463500 + }, + { + "epoch": 3.990616391184573, + "grad_norm": 2.96875, + "learning_rate": 1.951144198170959e-05, + "loss": 4.5552, + "step": 463550 + }, + { + "epoch": 3.9910468319559227, + "grad_norm": 3.0, + "learning_rate": 1.9495367707146994e-05, + "loss": 4.3213, + "step": 463600 + }, + { + "epoch": 3.9914772727272725, + "grad_norm": 3.046875, + "learning_rate": 1.9479299341451174e-05, + "loss": 4.4042, + "step": 463650 + }, + { + "epoch": 3.9919077134986227, + "grad_norm": 2.875, + "learning_rate": 1.9463236885801573e-05, + "loss": 4.0999, + "step": 463700 + }, + { + "epoch": 3.9923381542699725, + "grad_norm": 2.875, + "learning_rate": 1.944718034137709e-05, + "loss": 4.734, + "step": 463750 + }, + { + "epoch": 3.9927685950413223, + "grad_norm": 2.25, + "learning_rate": 1.9431129709356267e-05, + "loss": 4.2549, + "step": 463800 + }, + { + "epoch": 3.993199035812672, + "grad_norm": 2.71875, + "learning_rate": 1.9415084990917166e-05, + "loss": 3.871, + "step": 463850 + }, + { + "epoch": 3.993629476584022, + "grad_norm": 4.84375, + "learning_rate": 1.9399046187237447e-05, + "loss": 4.1596, + "step": 463900 + }, + { + "epoch": 3.994059917355372, + "grad_norm": 2.578125, + "learning_rate": 1.9383013299494292e-05, + "loss": 4.3716, + "step": 463950 + }, + { + "epoch": 3.994490358126722, + "grad_norm": 5.65625, + "learning_rate": 1.9366986328864534e-05, + "loss": 4.2075, + "step": 464000 + }, + { + "epoch": 3.9949207988980717, + "grad_norm": 2.859375, + "learning_rate": 1.9350965276524503e-05, + "loss": 4.0072, + "step": 464050 + }, + { + "epoch": 3.9953512396694215, + "grad_norm": 4.125, + "learning_rate": 1.9334950143650086e-05, + "loss": 4.2279, + "step": 464100 + }, + { + "epoch": 3.9957816804407713, + "grad_norm": 5.15625, + "learning_rate": 1.9318940931416784e-05, + "loss": 4.3637, + "step": 464150 + }, + { + "epoch": 3.996212121212121, + "grad_norm": 2.625, + "learning_rate": 1.9302937640999595e-05, + "loss": 4.2214, + "step": 464200 + }, + { + "epoch": 3.996642561983471, + "grad_norm": 3.765625, + "learning_rate": 1.9286940273573196e-05, + "loss": 3.7824, + "step": 464250 + }, + { + "epoch": 3.9970730027548207, + "grad_norm": 3.109375, + "learning_rate": 1.9270948830311707e-05, + "loss": 4.3371, + "step": 464300 + }, + { + "epoch": 3.997503443526171, + "grad_norm": 4.96875, + "learning_rate": 1.9254963312388895e-05, + "loss": 4.7692, + "step": 464350 + }, + { + "epoch": 3.9979338842975207, + "grad_norm": 3.609375, + "learning_rate": 1.9238983720978043e-05, + "loss": 4.0711, + "step": 464400 + }, + { + "epoch": 3.9983643250688705, + "grad_norm": 3.765625, + "learning_rate": 1.9223010057252024e-05, + "loss": 4.3957, + "step": 464450 + }, + { + "epoch": 3.9987947658402203, + "grad_norm": 3.453125, + "learning_rate": 1.920704232238324e-05, + "loss": 4.3544, + "step": 464500 + }, + { + "epoch": 3.99922520661157, + "grad_norm": 3.109375, + "learning_rate": 1.9191080517543746e-05, + "loss": 4.6815, + "step": 464550 + }, + { + "epoch": 3.9996556473829203, + "grad_norm": 5.0625, + "learning_rate": 1.9175124643905107e-05, + "loss": 4.306, + "step": 464600 + }, + { + "epoch": 4.00008608815427, + "grad_norm": 2.359375, + "learning_rate": 1.915917470263837e-05, + "loss": 4.0637, + "step": 464650 + }, + { + "epoch": 4.00051652892562, + "grad_norm": 3.40625, + "learning_rate": 1.9143230694914292e-05, + "loss": 4.2511, + "step": 464700 + }, + { + "epoch": 4.00094696969697, + "grad_norm": 3.03125, + "learning_rate": 1.9127292621903127e-05, + "loss": 4.1813, + "step": 464750 + }, + { + "epoch": 4.0013774104683195, + "grad_norm": 3.40625, + "learning_rate": 1.9111360484774644e-05, + "loss": 4.3493, + "step": 464800 + }, + { + "epoch": 4.001807851239669, + "grad_norm": 3.5, + "learning_rate": 1.909543428469832e-05, + "loss": 4.4086, + "step": 464850 + }, + { + "epoch": 4.002238292011019, + "grad_norm": 3.984375, + "learning_rate": 1.9079514022843002e-05, + "loss": 4.2353, + "step": 464900 + }, + { + "epoch": 4.002668732782369, + "grad_norm": 1.578125, + "learning_rate": 1.9063599700377265e-05, + "loss": 4.4949, + "step": 464950 + }, + { + "epoch": 4.003099173553719, + "grad_norm": 4.9375, + "learning_rate": 1.9047691318469185e-05, + "loss": 4.3271, + "step": 465000 + }, + { + "epoch": 4.003099173553719, + "eval_loss": 4.989533424377441, + "eval_runtime": 23.9105, + "eval_samples_per_second": 26.766, + "eval_steps_per_second": 13.383, + "eval_tts_loss": 7.465740052456312, + "step": 465000 + }, + { + "epoch": 4.0035296143250685, + "grad_norm": 6.53125, + "learning_rate": 1.903178887828636e-05, + "loss": 4.218, + "step": 465050 + }, + { + "epoch": 4.003960055096419, + "grad_norm": 2.15625, + "learning_rate": 1.901589238099607e-05, + "loss": 4.3344, + "step": 465100 + }, + { + "epoch": 4.004390495867769, + "grad_norm": 4.25, + "learning_rate": 1.900000182776501e-05, + "loss": 4.4278, + "step": 465150 + }, + { + "epoch": 4.004820936639119, + "grad_norm": 3.609375, + "learning_rate": 1.8984117219759513e-05, + "loss": 4.2559, + "step": 465200 + }, + { + "epoch": 4.0052513774104685, + "grad_norm": 4.5, + "learning_rate": 1.8968238558145533e-05, + "loss": 4.1277, + "step": 465250 + }, + { + "epoch": 4.005681818181818, + "grad_norm": 4.78125, + "learning_rate": 1.8952365844088504e-05, + "loss": 4.68, + "step": 465300 + }, + { + "epoch": 4.006112258953168, + "grad_norm": 3.828125, + "learning_rate": 1.8936499078753435e-05, + "loss": 4.2194, + "step": 465350 + }, + { + "epoch": 4.006542699724518, + "grad_norm": 4.53125, + "learning_rate": 1.8920638263304925e-05, + "loss": 4.0065, + "step": 465400 + }, + { + "epoch": 4.006973140495868, + "grad_norm": 3.328125, + "learning_rate": 1.890478339890711e-05, + "loss": 4.3619, + "step": 465450 + }, + { + "epoch": 4.0074035812672175, + "grad_norm": 4.0625, + "learning_rate": 1.8888934486723696e-05, + "loss": 4.5314, + "step": 465500 + }, + { + "epoch": 4.007834022038567, + "grad_norm": 3.0, + "learning_rate": 1.8873091527917997e-05, + "loss": 4.3445, + "step": 465550 + }, + { + "epoch": 4.008264462809917, + "grad_norm": 2.390625, + "learning_rate": 1.8857254523652835e-05, + "loss": 4.4336, + "step": 465600 + }, + { + "epoch": 4.008694903581267, + "grad_norm": 2.484375, + "learning_rate": 1.8841423475090614e-05, + "loss": 4.507, + "step": 465650 + }, + { + "epoch": 4.009125344352617, + "grad_norm": 1.46875, + "learning_rate": 1.8825598383393285e-05, + "loss": 4.1518, + "step": 465700 + }, + { + "epoch": 4.009555785123967, + "grad_norm": 3.703125, + "learning_rate": 1.880977924972236e-05, + "loss": 4.3511, + "step": 465750 + }, + { + "epoch": 4.009986225895317, + "grad_norm": 3.6875, + "learning_rate": 1.8793966075238966e-05, + "loss": 4.4737, + "step": 465800 + }, + { + "epoch": 4.010416666666667, + "grad_norm": 5.875, + "learning_rate": 1.8778158861103755e-05, + "loss": 4.3586, + "step": 465850 + }, + { + "epoch": 4.010847107438017, + "grad_norm": 2.25, + "learning_rate": 1.8762357608476933e-05, + "loss": 4.0145, + "step": 465900 + }, + { + "epoch": 4.0112775482093666, + "grad_norm": 2.328125, + "learning_rate": 1.8746562318518267e-05, + "loss": 4.1285, + "step": 465950 + }, + { + "epoch": 4.011707988980716, + "grad_norm": 4.875, + "learning_rate": 1.8730772992387115e-05, + "loss": 4.1862, + "step": 466000 + }, + { + "epoch": 4.012138429752066, + "grad_norm": 3.6875, + "learning_rate": 1.871498963124233e-05, + "loss": 3.9745, + "step": 466050 + }, + { + "epoch": 4.012568870523416, + "grad_norm": 2.40625, + "learning_rate": 1.8699212236242458e-05, + "loss": 4.3076, + "step": 466100 + }, + { + "epoch": 4.012999311294766, + "grad_norm": 5.21875, + "learning_rate": 1.8683440808545506e-05, + "loss": 4.5248, + "step": 466150 + }, + { + "epoch": 4.0134297520661155, + "grad_norm": 3.921875, + "learning_rate": 1.8667675349308987e-05, + "loss": 4.3498, + "step": 466200 + }, + { + "epoch": 4.013860192837465, + "grad_norm": 2.84375, + "learning_rate": 1.8651915859690128e-05, + "loss": 3.9982, + "step": 466250 + }, + { + "epoch": 4.014290633608815, + "grad_norm": 4.21875, + "learning_rate": 1.863616234084563e-05, + "loss": 4.78, + "step": 466300 + }, + { + "epoch": 4.014721074380165, + "grad_norm": 2.640625, + "learning_rate": 1.8620414793931727e-05, + "loss": 4.9355, + "step": 466350 + }, + { + "epoch": 4.015151515151516, + "grad_norm": 3.578125, + "learning_rate": 1.8604673220104328e-05, + "loss": 3.9323, + "step": 466400 + }, + { + "epoch": 4.015581955922865, + "grad_norm": 2.25, + "learning_rate": 1.8588937620518755e-05, + "loss": 4.6762, + "step": 466450 + }, + { + "epoch": 4.016012396694215, + "grad_norm": 3.375, + "learning_rate": 1.8573207996330012e-05, + "loss": 4.3824, + "step": 466500 + }, + { + "epoch": 4.016442837465565, + "grad_norm": 3.671875, + "learning_rate": 1.855748434869261e-05, + "loss": 4.2368, + "step": 466550 + }, + { + "epoch": 4.016873278236915, + "grad_norm": 3.5, + "learning_rate": 1.854176667876063e-05, + "loss": 4.5086, + "step": 466600 + }, + { + "epoch": 4.0173037190082646, + "grad_norm": 6.125, + "learning_rate": 1.8526054987687714e-05, + "loss": 4.1508, + "step": 466650 + }, + { + "epoch": 4.017734159779614, + "grad_norm": 3.03125, + "learning_rate": 1.851034927662707e-05, + "loss": 4.2153, + "step": 466700 + }, + { + "epoch": 4.018164600550964, + "grad_norm": 3.078125, + "learning_rate": 1.8494649546731434e-05, + "loss": 4.1755, + "step": 466750 + }, + { + "epoch": 4.018595041322314, + "grad_norm": 1.6484375, + "learning_rate": 1.8478955799153174e-05, + "loss": 4.0992, + "step": 466800 + }, + { + "epoch": 4.019025482093664, + "grad_norm": 3.15625, + "learning_rate": 1.8463268035044168e-05, + "loss": 4.564, + "step": 466850 + }, + { + "epoch": 4.0194559228650135, + "grad_norm": 4.34375, + "learning_rate": 1.844758625555586e-05, + "loss": 4.1945, + "step": 466900 + }, + { + "epoch": 4.019886363636363, + "grad_norm": 2.984375, + "learning_rate": 1.8431910461839252e-05, + "loss": 4.3524, + "step": 466950 + }, + { + "epoch": 4.020316804407713, + "grad_norm": 3.9375, + "learning_rate": 1.8416240655044915e-05, + "loss": 4.2614, + "step": 467000 + }, + { + "epoch": 4.020747245179064, + "grad_norm": 3.40625, + "learning_rate": 1.840057683632297e-05, + "loss": 4.0348, + "step": 467050 + }, + { + "epoch": 4.021177685950414, + "grad_norm": 4.96875, + "learning_rate": 1.838491900682313e-05, + "loss": 4.5726, + "step": 467100 + }, + { + "epoch": 4.021608126721763, + "grad_norm": 4.53125, + "learning_rate": 1.8369267167694638e-05, + "loss": 4.1148, + "step": 467150 + }, + { + "epoch": 4.022038567493113, + "grad_norm": 3.8125, + "learning_rate": 1.835362132008631e-05, + "loss": 3.9623, + "step": 467200 + }, + { + "epoch": 4.022469008264463, + "grad_norm": 1.71875, + "learning_rate": 1.8337981465146504e-05, + "loss": 4.1407, + "step": 467250 + }, + { + "epoch": 4.022899449035813, + "grad_norm": 2.890625, + "learning_rate": 1.832234760402316e-05, + "loss": 4.5119, + "step": 467300 + }, + { + "epoch": 4.0233298898071626, + "grad_norm": 1.875, + "learning_rate": 1.8306719737863744e-05, + "loss": 4.2315, + "step": 467350 + }, + { + "epoch": 4.023760330578512, + "grad_norm": 3.3125, + "learning_rate": 1.8291097867815375e-05, + "loss": 4.6522, + "step": 467400 + }, + { + "epoch": 4.024190771349862, + "grad_norm": 4.09375, + "learning_rate": 1.8275481995024578e-05, + "loss": 4.2538, + "step": 467450 + }, + { + "epoch": 4.024621212121212, + "grad_norm": 2.453125, + "learning_rate": 1.82598721206376e-05, + "loss": 4.0893, + "step": 467500 + }, + { + "epoch": 4.025051652892562, + "grad_norm": 3.359375, + "learning_rate": 1.8244268245800135e-05, + "loss": 3.8504, + "step": 467550 + }, + { + "epoch": 4.0254820936639115, + "grad_norm": 1.953125, + "learning_rate": 1.8228670371657442e-05, + "loss": 4.1312, + "step": 467600 + }, + { + "epoch": 4.025912534435261, + "grad_norm": 5.0, + "learning_rate": 1.821307849935444e-05, + "loss": 4.145, + "step": 467650 + }, + { + "epoch": 4.026342975206612, + "grad_norm": 2.9375, + "learning_rate": 1.819749263003554e-05, + "loss": 4.2722, + "step": 467700 + }, + { + "epoch": 4.026773415977962, + "grad_norm": 3.703125, + "learning_rate": 1.818191276484462e-05, + "loss": 4.7459, + "step": 467750 + }, + { + "epoch": 4.027203856749312, + "grad_norm": 5.3125, + "learning_rate": 1.8166338904925294e-05, + "loss": 4.6664, + "step": 467800 + }, + { + "epoch": 4.027634297520661, + "grad_norm": 2.78125, + "learning_rate": 1.8150771051420622e-05, + "loss": 4.482, + "step": 467850 + }, + { + "epoch": 4.028064738292011, + "grad_norm": 3.75, + "learning_rate": 1.8135209205473225e-05, + "loss": 4.5145, + "step": 467900 + }, + { + "epoch": 4.028495179063361, + "grad_norm": 1.609375, + "learning_rate": 1.8119653368225385e-05, + "loss": 4.4687, + "step": 467950 + }, + { + "epoch": 4.028925619834711, + "grad_norm": 5.875, + "learning_rate": 1.8104103540818773e-05, + "loss": 3.8254, + "step": 468000 + }, + { + "epoch": 4.028925619834711, + "eval_loss": 4.990596771240234, + "eval_runtime": 24.0267, + "eval_samples_per_second": 26.637, + "eval_steps_per_second": 13.319, + "eval_tts_loss": 7.479180711330167, + "step": 468000 + }, + { + "epoch": 4.0293560606060606, + "grad_norm": 2.703125, + "learning_rate": 1.8088559724394783e-05, + "loss": 4.6154, + "step": 468050 + }, + { + "epoch": 4.02978650137741, + "grad_norm": 3.328125, + "learning_rate": 1.8073021920094267e-05, + "loss": 4.4653, + "step": 468100 + }, + { + "epoch": 4.03021694214876, + "grad_norm": 1.96875, + "learning_rate": 1.8057490129057674e-05, + "loss": 4.156, + "step": 468150 + }, + { + "epoch": 4.03064738292011, + "grad_norm": 3.4375, + "learning_rate": 1.8041964352425e-05, + "loss": 4.272, + "step": 468200 + }, + { + "epoch": 4.03107782369146, + "grad_norm": 2.78125, + "learning_rate": 1.8026444591335813e-05, + "loss": 4.5182, + "step": 468250 + }, + { + "epoch": 4.0315082644628095, + "grad_norm": 1.4921875, + "learning_rate": 1.8010930846929185e-05, + "loss": 4.3646, + "step": 468300 + }, + { + "epoch": 4.03193870523416, + "grad_norm": 3.609375, + "learning_rate": 1.7995423120343856e-05, + "loss": 4.3485, + "step": 468350 + }, + { + "epoch": 4.03236914600551, + "grad_norm": 1.4140625, + "learning_rate": 1.7979921412718048e-05, + "loss": 4.5566, + "step": 468400 + }, + { + "epoch": 4.03279958677686, + "grad_norm": 2.96875, + "learning_rate": 1.7964425725189527e-05, + "loss": 4.4085, + "step": 468450 + }, + { + "epoch": 4.03323002754821, + "grad_norm": 8.25, + "learning_rate": 1.794893605889565e-05, + "loss": 4.7689, + "step": 468500 + }, + { + "epoch": 4.033660468319559, + "grad_norm": 4.28125, + "learning_rate": 1.793345241497333e-05, + "loss": 4.1303, + "step": 468550 + }, + { + "epoch": 4.034090909090909, + "grad_norm": 4.28125, + "learning_rate": 1.791797479455901e-05, + "loss": 4.1237, + "step": 468600 + }, + { + "epoch": 4.034521349862259, + "grad_norm": 3.328125, + "learning_rate": 1.7902503198788755e-05, + "loss": 4.2392, + "step": 468650 + }, + { + "epoch": 4.034951790633609, + "grad_norm": 3.3125, + "learning_rate": 1.7887037628798132e-05, + "loss": 4.1755, + "step": 468700 + }, + { + "epoch": 4.0353822314049586, + "grad_norm": 3.109375, + "learning_rate": 1.7871578085722285e-05, + "loss": 4.1829, + "step": 468750 + }, + { + "epoch": 4.035812672176308, + "grad_norm": 2.28125, + "learning_rate": 1.7856124570695888e-05, + "loss": 3.9362, + "step": 468800 + }, + { + "epoch": 4.036243112947658, + "grad_norm": 3.375, + "learning_rate": 1.784067708485322e-05, + "loss": 4.1969, + "step": 468850 + }, + { + "epoch": 4.036673553719008, + "grad_norm": 3.90625, + "learning_rate": 1.782523562932805e-05, + "loss": 4.0791, + "step": 468900 + }, + { + "epoch": 4.037103994490358, + "grad_norm": 3.78125, + "learning_rate": 1.7809800205253847e-05, + "loss": 4.5571, + "step": 468950 + }, + { + "epoch": 4.037534435261708, + "grad_norm": 2.484375, + "learning_rate": 1.7794370813763406e-05, + "loss": 3.9455, + "step": 469000 + }, + { + "epoch": 4.037964876033058, + "grad_norm": 4.21875, + "learning_rate": 1.7778947455989313e-05, + "loss": 4.0368, + "step": 469050 + }, + { + "epoch": 4.038395316804408, + "grad_norm": 3.75, + "learning_rate": 1.7763530133063566e-05, + "loss": 4.4333, + "step": 469100 + }, + { + "epoch": 4.038825757575758, + "grad_norm": 1.8515625, + "learning_rate": 1.7748118846117745e-05, + "loss": 4.5984, + "step": 469150 + }, + { + "epoch": 4.039256198347108, + "grad_norm": 3.203125, + "learning_rate": 1.773271359628309e-05, + "loss": 4.1432, + "step": 469200 + }, + { + "epoch": 4.039686639118457, + "grad_norm": 3.03125, + "learning_rate": 1.771731438469022e-05, + "loss": 4.0816, + "step": 469250 + }, + { + "epoch": 4.040117079889807, + "grad_norm": 5.1875, + "learning_rate": 1.7701921212469417e-05, + "loss": 3.943, + "step": 469300 + }, + { + "epoch": 4.040547520661157, + "grad_norm": 2.265625, + "learning_rate": 1.7686534080750562e-05, + "loss": 4.3908, + "step": 469350 + }, + { + "epoch": 4.040977961432507, + "grad_norm": 1.9375, + "learning_rate": 1.7671152990662986e-05, + "loss": 3.9232, + "step": 469400 + }, + { + "epoch": 4.0414084022038566, + "grad_norm": 3.1875, + "learning_rate": 1.7655777943335638e-05, + "loss": 4.3269, + "step": 469450 + }, + { + "epoch": 4.041838842975206, + "grad_norm": 4.53125, + "learning_rate": 1.764040893989706e-05, + "loss": 4.583, + "step": 469500 + }, + { + "epoch": 4.042269283746556, + "grad_norm": 2.90625, + "learning_rate": 1.762504598147524e-05, + "loss": 4.1703, + "step": 469550 + }, + { + "epoch": 4.042699724517906, + "grad_norm": 3.625, + "learning_rate": 1.760968906919779e-05, + "loss": 4.4236, + "step": 469600 + }, + { + "epoch": 4.043130165289257, + "grad_norm": 4.40625, + "learning_rate": 1.7594338204191918e-05, + "loss": 4.3865, + "step": 469650 + }, + { + "epoch": 4.043560606060606, + "grad_norm": 3.171875, + "learning_rate": 1.757899338758432e-05, + "loss": 4.0767, + "step": 469700 + }, + { + "epoch": 4.043991046831956, + "grad_norm": 1.1640625, + "learning_rate": 1.756365462050128e-05, + "loss": 4.114, + "step": 469750 + }, + { + "epoch": 4.044421487603306, + "grad_norm": 1.3515625, + "learning_rate": 1.7548321904068622e-05, + "loss": 4.2026, + "step": 469800 + }, + { + "epoch": 4.044851928374656, + "grad_norm": 2.421875, + "learning_rate": 1.753299523941171e-05, + "loss": 4.3217, + "step": 469850 + }, + { + "epoch": 4.045282369146006, + "grad_norm": 3.546875, + "learning_rate": 1.7517674627655533e-05, + "loss": 4.171, + "step": 469900 + }, + { + "epoch": 4.045712809917355, + "grad_norm": 3.703125, + "learning_rate": 1.7502360069924583e-05, + "loss": 4.3414, + "step": 469950 + }, + { + "epoch": 4.046143250688705, + "grad_norm": 2.8125, + "learning_rate": 1.7487051567342905e-05, + "loss": 4.2661, + "step": 470000 + }, + { + "epoch": 4.046573691460055, + "grad_norm": 3.953125, + "learning_rate": 1.7471749121034108e-05, + "loss": 4.5303, + "step": 470050 + }, + { + "epoch": 4.047004132231405, + "grad_norm": 6.5, + "learning_rate": 1.7456452732121364e-05, + "loss": 3.9519, + "step": 470100 + }, + { + "epoch": 4.0474345730027546, + "grad_norm": 2.5, + "learning_rate": 1.7441162401727362e-05, + "loss": 4.3258, + "step": 470150 + }, + { + "epoch": 4.047865013774104, + "grad_norm": 3.71875, + "learning_rate": 1.7425878130974436e-05, + "loss": 4.2178, + "step": 470200 + }, + { + "epoch": 4.048295454545454, + "grad_norm": 3.890625, + "learning_rate": 1.7410599920984395e-05, + "loss": 4.4268, + "step": 470250 + }, + { + "epoch": 4.048725895316805, + "grad_norm": 2.578125, + "learning_rate": 1.739532777287862e-05, + "loss": 4.0618, + "step": 470300 + }, + { + "epoch": 4.049156336088155, + "grad_norm": 5.5, + "learning_rate": 1.738006168777806e-05, + "loss": 4.2749, + "step": 470350 + }, + { + "epoch": 4.049586776859504, + "grad_norm": 2.734375, + "learning_rate": 1.736480166680322e-05, + "loss": 4.4164, + "step": 470400 + }, + { + "epoch": 4.050017217630854, + "grad_norm": 3.484375, + "learning_rate": 1.734954771107411e-05, + "loss": 4.2764, + "step": 470450 + }, + { + "epoch": 4.050447658402204, + "grad_norm": 5.34375, + "learning_rate": 1.733429982171042e-05, + "loss": 4.5184, + "step": 470500 + }, + { + "epoch": 4.050878099173554, + "grad_norm": 1.984375, + "learning_rate": 1.7319057999831213e-05, + "loss": 4.2812, + "step": 470550 + }, + { + "epoch": 4.051308539944904, + "grad_norm": 2.625, + "learning_rate": 1.7303822246555277e-05, + "loss": 4.2761, + "step": 470600 + }, + { + "epoch": 4.051738980716253, + "grad_norm": 2.078125, + "learning_rate": 1.7288592563000873e-05, + "loss": 4.4205, + "step": 470650 + }, + { + "epoch": 4.052169421487603, + "grad_norm": 3.21875, + "learning_rate": 1.7273368950285785e-05, + "loss": 4.4832, + "step": 470700 + }, + { + "epoch": 4.052599862258953, + "grad_norm": 2.765625, + "learning_rate": 1.725815140952748e-05, + "loss": 4.5096, + "step": 470750 + }, + { + "epoch": 4.053030303030303, + "grad_norm": 3.734375, + "learning_rate": 1.7242939941842816e-05, + "loss": 4.2955, + "step": 470800 + }, + { + "epoch": 4.0534607438016526, + "grad_norm": 3.125, + "learning_rate": 1.7227734548348273e-05, + "loss": 3.8665, + "step": 470850 + }, + { + "epoch": 4.053891184573002, + "grad_norm": 1.7734375, + "learning_rate": 1.7212535230159944e-05, + "loss": 4.5488, + "step": 470900 + }, + { + "epoch": 4.054321625344353, + "grad_norm": 3.234375, + "learning_rate": 1.7197341988393413e-05, + "loss": 4.3105, + "step": 470950 + }, + { + "epoch": 4.054752066115703, + "grad_norm": 3.8125, + "learning_rate": 1.718215482416383e-05, + "loss": 4.155, + "step": 471000 + }, + { + "epoch": 4.054752066115703, + "eval_loss": 4.990444183349609, + "eval_runtime": 24.202, + "eval_samples_per_second": 26.444, + "eval_steps_per_second": 13.222, + "eval_tts_loss": 7.478690195086511, + "step": 471000 + }, + { + "epoch": 4.055182506887053, + "grad_norm": 2.8125, + "learning_rate": 1.716697373858589e-05, + "loss": 4.5042, + "step": 471050 + }, + { + "epoch": 4.055612947658402, + "grad_norm": 3.53125, + "learning_rate": 1.715179873277386e-05, + "loss": 4.3599, + "step": 471100 + }, + { + "epoch": 4.056043388429752, + "grad_norm": 2.875, + "learning_rate": 1.713662980784153e-05, + "loss": 4.3022, + "step": 471150 + }, + { + "epoch": 4.056473829201102, + "grad_norm": 2.75, + "learning_rate": 1.71214669649023e-05, + "loss": 4.16, + "step": 471200 + }, + { + "epoch": 4.056904269972452, + "grad_norm": 5.53125, + "learning_rate": 1.710631020506909e-05, + "loss": 4.3182, + "step": 471250 + }, + { + "epoch": 4.057334710743802, + "grad_norm": 3.84375, + "learning_rate": 1.7091159529454347e-05, + "loss": 4.296, + "step": 471300 + }, + { + "epoch": 4.057765151515151, + "grad_norm": 2.765625, + "learning_rate": 1.7076014939170127e-05, + "loss": 4.2559, + "step": 471350 + }, + { + "epoch": 4.058195592286501, + "grad_norm": 3.765625, + "learning_rate": 1.7060876435327954e-05, + "loss": 4.0452, + "step": 471400 + }, + { + "epoch": 4.058626033057851, + "grad_norm": 4.1875, + "learning_rate": 1.7045744019039046e-05, + "loss": 4.3682, + "step": 471450 + }, + { + "epoch": 4.059056473829201, + "grad_norm": 4.03125, + "learning_rate": 1.703061769141403e-05, + "loss": 3.894, + "step": 471500 + }, + { + "epoch": 4.0594869146005506, + "grad_norm": 3.84375, + "learning_rate": 1.7015497453563167e-05, + "loss": 4.6356, + "step": 471550 + }, + { + "epoch": 4.059917355371901, + "grad_norm": 5.875, + "learning_rate": 1.7000383306596244e-05, + "loss": 4.1306, + "step": 471600 + }, + { + "epoch": 4.060347796143251, + "grad_norm": 3.09375, + "learning_rate": 1.6985275251622612e-05, + "loss": 3.6677, + "step": 471650 + }, + { + "epoch": 4.060778236914601, + "grad_norm": 7.21875, + "learning_rate": 1.6970173289751145e-05, + "loss": 4.3977, + "step": 471700 + }, + { + "epoch": 4.061208677685951, + "grad_norm": 2.6875, + "learning_rate": 1.6955077422090328e-05, + "loss": 4.1676, + "step": 471750 + }, + { + "epoch": 4.0616391184573, + "grad_norm": 1.59375, + "learning_rate": 1.693998764974819e-05, + "loss": 4.1295, + "step": 471800 + }, + { + "epoch": 4.06206955922865, + "grad_norm": 3.34375, + "learning_rate": 1.6924903973832183e-05, + "loss": 4.3328, + "step": 471850 + }, + { + "epoch": 4.0625, + "grad_norm": 3.078125, + "learning_rate": 1.6909826395449525e-05, + "loss": 4.5923, + "step": 471900 + }, + { + "epoch": 4.06293044077135, + "grad_norm": 4.9375, + "learning_rate": 1.6894754915706835e-05, + "loss": 4.7769, + "step": 471950 + }, + { + "epoch": 4.0633608815427, + "grad_norm": 4.0625, + "learning_rate": 1.6879689535710297e-05, + "loss": 4.2948, + "step": 472000 + }, + { + "epoch": 4.063791322314049, + "grad_norm": 3.59375, + "learning_rate": 1.686463025656576e-05, + "loss": 4.1265, + "step": 472050 + }, + { + "epoch": 4.064221763085399, + "grad_norm": 0.8125, + "learning_rate": 1.684957707937844e-05, + "loss": 4.1848, + "step": 472100 + }, + { + "epoch": 4.064652203856749, + "grad_norm": 2.59375, + "learning_rate": 1.683453000525329e-05, + "loss": 4.441, + "step": 472150 + }, + { + "epoch": 4.065082644628099, + "grad_norm": 3.046875, + "learning_rate": 1.68194890352947e-05, + "loss": 4.8487, + "step": 472200 + }, + { + "epoch": 4.0655130853994494, + "grad_norm": 3.546875, + "learning_rate": 1.680445417060663e-05, + "loss": 4.3647, + "step": 472250 + }, + { + "epoch": 4.065943526170799, + "grad_norm": 3.59375, + "learning_rate": 1.6789425412292657e-05, + "loss": 4.3761, + "step": 472300 + }, + { + "epoch": 4.066373966942149, + "grad_norm": 3.296875, + "learning_rate": 1.677440276145582e-05, + "loss": 4.1368, + "step": 472350 + }, + { + "epoch": 4.066804407713499, + "grad_norm": 3.40625, + "learning_rate": 1.6759386219198726e-05, + "loss": 4.2209, + "step": 472400 + }, + { + "epoch": 4.067234848484849, + "grad_norm": 5.125, + "learning_rate": 1.67443757866236e-05, + "loss": 4.2474, + "step": 472450 + }, + { + "epoch": 4.067665289256198, + "grad_norm": 1.703125, + "learning_rate": 1.6729371464832176e-05, + "loss": 3.9704, + "step": 472500 + }, + { + "epoch": 4.068095730027548, + "grad_norm": 3.40625, + "learning_rate": 1.671437325492572e-05, + "loss": 4.3687, + "step": 472550 + }, + { + "epoch": 4.068526170798898, + "grad_norm": 2.703125, + "learning_rate": 1.6699381158005068e-05, + "loss": 4.474, + "step": 472600 + }, + { + "epoch": 4.068956611570248, + "grad_norm": 4.0, + "learning_rate": 1.668439517517062e-05, + "loss": 4.1668, + "step": 472650 + }, + { + "epoch": 4.069387052341598, + "grad_norm": 2.953125, + "learning_rate": 1.6669415307522285e-05, + "loss": 4.2232, + "step": 472700 + }, + { + "epoch": 4.069817493112947, + "grad_norm": 2.03125, + "learning_rate": 1.6654441556159593e-05, + "loss": 4.3866, + "step": 472750 + }, + { + "epoch": 4.070247933884297, + "grad_norm": 7.5625, + "learning_rate": 1.663947392218157e-05, + "loss": 4.2389, + "step": 472800 + }, + { + "epoch": 4.070678374655647, + "grad_norm": 9.0, + "learning_rate": 1.662451240668681e-05, + "loss": 4.4785, + "step": 472850 + }, + { + "epoch": 4.071108815426998, + "grad_norm": 2.96875, + "learning_rate": 1.6609557010773446e-05, + "loss": 4.1316, + "step": 472900 + }, + { + "epoch": 4.0715392561983474, + "grad_norm": 8.25, + "learning_rate": 1.659460773553917e-05, + "loss": 4.4142, + "step": 472950 + }, + { + "epoch": 4.071969696969697, + "grad_norm": 2.421875, + "learning_rate": 1.6579664582081246e-05, + "loss": 4.3803, + "step": 473000 + }, + { + "epoch": 4.072400137741047, + "grad_norm": 4.125, + "learning_rate": 1.6564727551496485e-05, + "loss": 4.1947, + "step": 473050 + }, + { + "epoch": 4.072830578512397, + "grad_norm": 4.5, + "learning_rate": 1.6549796644881156e-05, + "loss": 4.3879, + "step": 473100 + }, + { + "epoch": 4.073261019283747, + "grad_norm": 2.640625, + "learning_rate": 1.6534871863331224e-05, + "loss": 4.4808, + "step": 473150 + }, + { + "epoch": 4.073691460055096, + "grad_norm": 4.125, + "learning_rate": 1.651995320794213e-05, + "loss": 4.2729, + "step": 473200 + }, + { + "epoch": 4.074121900826446, + "grad_norm": 6.75, + "learning_rate": 1.6505040679808824e-05, + "loss": 4.5253, + "step": 473250 + }, + { + "epoch": 4.074552341597796, + "grad_norm": 2.09375, + "learning_rate": 1.6490134280025914e-05, + "loss": 3.7976, + "step": 473300 + }, + { + "epoch": 4.074982782369146, + "grad_norm": 3.015625, + "learning_rate": 1.64752340096875e-05, + "loss": 4.2162, + "step": 473350 + }, + { + "epoch": 4.075413223140496, + "grad_norm": 3.953125, + "learning_rate": 1.6460339869887152e-05, + "loss": 3.8813, + "step": 473400 + }, + { + "epoch": 4.075843663911845, + "grad_norm": 2.453125, + "learning_rate": 1.6445451861718143e-05, + "loss": 4.3434, + "step": 473450 + }, + { + "epoch": 4.076274104683195, + "grad_norm": 4.3125, + "learning_rate": 1.6430569986273194e-05, + "loss": 4.0991, + "step": 473500 + }, + { + "epoch": 4.076704545454546, + "grad_norm": 3.484375, + "learning_rate": 1.6415694244644575e-05, + "loss": 4.443, + "step": 473550 + }, + { + "epoch": 4.077134986225896, + "grad_norm": 3.875, + "learning_rate": 1.6400824637924216e-05, + "loss": 4.1666, + "step": 473600 + }, + { + "epoch": 4.0775654269972454, + "grad_norm": 3.21875, + "learning_rate": 1.638596116720341e-05, + "loss": 4.3432, + "step": 473650 + }, + { + "epoch": 4.077995867768595, + "grad_norm": 5.9375, + "learning_rate": 1.637110383357319e-05, + "loss": 4.1673, + "step": 473700 + }, + { + "epoch": 4.078426308539945, + "grad_norm": 4.375, + "learning_rate": 1.6356252638123994e-05, + "loss": 4.4722, + "step": 473750 + }, + { + "epoch": 4.078856749311295, + "grad_norm": 3.59375, + "learning_rate": 1.63414075819459e-05, + "loss": 4.5102, + "step": 473800 + }, + { + "epoch": 4.079287190082645, + "grad_norm": 3.203125, + "learning_rate": 1.6326568666128495e-05, + "loss": 4.3715, + "step": 473850 + }, + { + "epoch": 4.079717630853994, + "grad_norm": 3.53125, + "learning_rate": 1.6311735891760926e-05, + "loss": 4.0467, + "step": 473900 + }, + { + "epoch": 4.080148071625344, + "grad_norm": 7.71875, + "learning_rate": 1.6296909259931858e-05, + "loss": 4.4708, + "step": 473950 + }, + { + "epoch": 4.080578512396694, + "grad_norm": 0.71484375, + "learning_rate": 1.6282088771729575e-05, + "loss": 4.1761, + "step": 474000 + }, + { + "epoch": 4.080578512396694, + "eval_loss": 4.990285396575928, + "eval_runtime": 24.0607, + "eval_samples_per_second": 26.599, + "eval_steps_per_second": 13.3, + "eval_tts_loss": 7.4766609914280675, + "step": 474000 + }, + { + "epoch": 4.081008953168044, + "grad_norm": 2.09375, + "learning_rate": 1.6267274428241852e-05, + "loss": 4.3962, + "step": 474050 + }, + { + "epoch": 4.081439393939394, + "grad_norm": 1.0234375, + "learning_rate": 1.625246623055604e-05, + "loss": 4.3978, + "step": 474100 + }, + { + "epoch": 4.081869834710743, + "grad_norm": 4.1875, + "learning_rate": 1.6237664179759016e-05, + "loss": 4.2482, + "step": 474150 + }, + { + "epoch": 4.082300275482094, + "grad_norm": 3.0, + "learning_rate": 1.622286827693722e-05, + "loss": 4.098, + "step": 474200 + }, + { + "epoch": 4.082730716253444, + "grad_norm": 2.71875, + "learning_rate": 1.6208078523176617e-05, + "loss": 4.0942, + "step": 474250 + }, + { + "epoch": 4.083161157024794, + "grad_norm": 2.703125, + "learning_rate": 1.6193294919562786e-05, + "loss": 4.4851, + "step": 474300 + }, + { + "epoch": 4.0835915977961434, + "grad_norm": 3.03125, + "learning_rate": 1.61785174671808e-05, + "loss": 4.2229, + "step": 474350 + }, + { + "epoch": 4.084022038567493, + "grad_norm": 3.296875, + "learning_rate": 1.616374616711528e-05, + "loss": 4.3572, + "step": 474400 + }, + { + "epoch": 4.084452479338843, + "grad_norm": 3.375, + "learning_rate": 1.6148981020450417e-05, + "loss": 4.332, + "step": 474450 + }, + { + "epoch": 4.084882920110193, + "grad_norm": 1.8828125, + "learning_rate": 1.613422202826992e-05, + "loss": 4.1032, + "step": 474500 + }, + { + "epoch": 4.085313360881543, + "grad_norm": 2.328125, + "learning_rate": 1.61194691916571e-05, + "loss": 4.1243, + "step": 474550 + }, + { + "epoch": 4.085743801652892, + "grad_norm": 3.296875, + "learning_rate": 1.610472251169479e-05, + "loss": 4.5435, + "step": 474600 + }, + { + "epoch": 4.086174242424242, + "grad_norm": 2.90625, + "learning_rate": 1.6089981989465296e-05, + "loss": 4.3172, + "step": 474650 + }, + { + "epoch": 4.086604683195592, + "grad_norm": 2.953125, + "learning_rate": 1.6075247626050617e-05, + "loss": 3.9243, + "step": 474700 + }, + { + "epoch": 4.087035123966942, + "grad_norm": 2.453125, + "learning_rate": 1.6060519422532182e-05, + "loss": 4.3399, + "step": 474750 + }, + { + "epoch": 4.087465564738292, + "grad_norm": 3.734375, + "learning_rate": 1.6045797379991e-05, + "loss": 3.9782, + "step": 474800 + }, + { + "epoch": 4.087896005509642, + "grad_norm": 2.875, + "learning_rate": 1.6031081499507717e-05, + "loss": 4.2701, + "step": 474850 + }, + { + "epoch": 4.088326446280992, + "grad_norm": 3.265625, + "learning_rate": 1.6016371782162355e-05, + "loss": 3.9421, + "step": 474900 + }, + { + "epoch": 4.088756887052342, + "grad_norm": 3.6875, + "learning_rate": 1.6001668229034584e-05, + "loss": 4.3552, + "step": 474950 + }, + { + "epoch": 4.089187327823692, + "grad_norm": 4.25, + "learning_rate": 1.5986970841203662e-05, + "loss": 4.0449, + "step": 475000 + }, + { + "epoch": 4.0896177685950414, + "grad_norm": 3.5, + "learning_rate": 1.597227961974833e-05, + "loss": 4.399, + "step": 475050 + }, + { + "epoch": 4.090048209366391, + "grad_norm": 2.359375, + "learning_rate": 1.5957594565746846e-05, + "loss": 4.2658, + "step": 475100 + }, + { + "epoch": 4.090478650137741, + "grad_norm": 2.96875, + "learning_rate": 1.5942915680277158e-05, + "loss": 4.0079, + "step": 475150 + }, + { + "epoch": 4.090909090909091, + "grad_norm": 3.265625, + "learning_rate": 1.592824296441655e-05, + "loss": 4.3937, + "step": 475200 + }, + { + "epoch": 4.091339531680441, + "grad_norm": 3.625, + "learning_rate": 1.591357641924206e-05, + "loss": 4.4162, + "step": 475250 + }, + { + "epoch": 4.09176997245179, + "grad_norm": 3.203125, + "learning_rate": 1.5898916045830136e-05, + "loss": 4.5583, + "step": 475300 + }, + { + "epoch": 4.09220041322314, + "grad_norm": 4.375, + "learning_rate": 1.588426184525682e-05, + "loss": 4.4763, + "step": 475350 + }, + { + "epoch": 4.09263085399449, + "grad_norm": 2.328125, + "learning_rate": 1.5869613818597716e-05, + "loss": 4.2226, + "step": 475400 + }, + { + "epoch": 4.09306129476584, + "grad_norm": 4.0, + "learning_rate": 1.5854971966927934e-05, + "loss": 4.3951, + "step": 475450 + }, + { + "epoch": 4.0934917355371905, + "grad_norm": 3.765625, + "learning_rate": 1.5840336291322156e-05, + "loss": 4.2528, + "step": 475500 + }, + { + "epoch": 4.09392217630854, + "grad_norm": 2.59375, + "learning_rate": 1.5825706792854633e-05, + "loss": 4.5994, + "step": 475550 + }, + { + "epoch": 4.09435261707989, + "grad_norm": 2.984375, + "learning_rate": 1.581108347259913e-05, + "loss": 4.1948, + "step": 475600 + }, + { + "epoch": 4.09478305785124, + "grad_norm": 3.453125, + "learning_rate": 1.5796466331628968e-05, + "loss": 4.2568, + "step": 475650 + }, + { + "epoch": 4.09521349862259, + "grad_norm": 2.921875, + "learning_rate": 1.5781855371017008e-05, + "loss": 4.6507, + "step": 475700 + }, + { + "epoch": 4.0956439393939394, + "grad_norm": 3.078125, + "learning_rate": 1.576725059183566e-05, + "loss": 4.0814, + "step": 475750 + }, + { + "epoch": 4.096074380165289, + "grad_norm": 2.96875, + "learning_rate": 1.5752651995156863e-05, + "loss": 4.6558, + "step": 475800 + }, + { + "epoch": 4.096504820936639, + "grad_norm": 3.9375, + "learning_rate": 1.5738059582052166e-05, + "loss": 4.064, + "step": 475850 + }, + { + "epoch": 4.096935261707989, + "grad_norm": 2.765625, + "learning_rate": 1.5723473353592612e-05, + "loss": 4.1358, + "step": 475900 + }, + { + "epoch": 4.097365702479339, + "grad_norm": 3.546875, + "learning_rate": 1.570889331084878e-05, + "loss": 4.6223, + "step": 475950 + }, + { + "epoch": 4.097796143250688, + "grad_norm": 6.46875, + "learning_rate": 1.5694319454890827e-05, + "loss": 4.4869, + "step": 476000 + }, + { + "epoch": 4.098226584022038, + "grad_norm": 3.5, + "learning_rate": 1.567975178678843e-05, + "loss": 4.2434, + "step": 476050 + }, + { + "epoch": 4.098657024793388, + "grad_norm": 3.34375, + "learning_rate": 1.5665190307610812e-05, + "loss": 3.9627, + "step": 476100 + }, + { + "epoch": 4.099087465564739, + "grad_norm": 5.375, + "learning_rate": 1.565063501842683e-05, + "loss": 4.4755, + "step": 476150 + }, + { + "epoch": 4.0995179063360885, + "grad_norm": 3.65625, + "learning_rate": 1.56360859203047e-05, + "loss": 4.613, + "step": 476200 + }, + { + "epoch": 4.099948347107438, + "grad_norm": 5.625, + "learning_rate": 1.5621543014312377e-05, + "loss": 4.2559, + "step": 476250 + }, + { + "epoch": 4.100378787878788, + "grad_norm": 2.359375, + "learning_rate": 1.5607006301517245e-05, + "loss": 4.3416, + "step": 476300 + }, + { + "epoch": 4.100809228650138, + "grad_norm": 2.359375, + "learning_rate": 1.559247578298626e-05, + "loss": 4.5287, + "step": 476350 + }, + { + "epoch": 4.101239669421488, + "grad_norm": 1.8359375, + "learning_rate": 1.557795145978599e-05, + "loss": 4.1401, + "step": 476400 + }, + { + "epoch": 4.1016701101928374, + "grad_norm": 3.671875, + "learning_rate": 1.5563433332982412e-05, + "loss": 4.5392, + "step": 476450 + }, + { + "epoch": 4.102100550964187, + "grad_norm": 4.125, + "learning_rate": 1.5548921403641137e-05, + "loss": 4.6783, + "step": 476500 + }, + { + "epoch": 4.102530991735537, + "grad_norm": 2.828125, + "learning_rate": 1.5534415672827363e-05, + "loss": 4.5235, + "step": 476550 + }, + { + "epoch": 4.102961432506887, + "grad_norm": 2.296875, + "learning_rate": 1.551991614160574e-05, + "loss": 4.2722, + "step": 476600 + }, + { + "epoch": 4.103391873278237, + "grad_norm": 2.96875, + "learning_rate": 1.5505422811040504e-05, + "loss": 4.4376, + "step": 476650 + }, + { + "epoch": 4.103822314049586, + "grad_norm": 3.203125, + "learning_rate": 1.549093568219544e-05, + "loss": 4.2473, + "step": 476700 + }, + { + "epoch": 4.104252754820936, + "grad_norm": 5.1875, + "learning_rate": 1.5476454756133875e-05, + "loss": 4.3745, + "step": 476750 + }, + { + "epoch": 4.104683195592287, + "grad_norm": 2.15625, + "learning_rate": 1.5461980033918656e-05, + "loss": 4.4971, + "step": 476800 + }, + { + "epoch": 4.105113636363637, + "grad_norm": 6.28125, + "learning_rate": 1.5447511516612222e-05, + "loss": 4.1005, + "step": 476850 + }, + { + "epoch": 4.1055440771349865, + "grad_norm": 2.65625, + "learning_rate": 1.543304920527654e-05, + "loss": 3.9515, + "step": 476900 + }, + { + "epoch": 4.105974517906336, + "grad_norm": 3.34375, + "learning_rate": 1.541859310097309e-05, + "loss": 4.7117, + "step": 476950 + }, + { + "epoch": 4.106404958677686, + "grad_norm": 1.828125, + "learning_rate": 1.5404143204762922e-05, + "loss": 4.2185, + "step": 477000 + }, + { + "epoch": 4.106404958677686, + "eval_loss": 4.99039363861084, + "eval_runtime": 24.7587, + "eval_samples_per_second": 25.85, + "eval_steps_per_second": 12.925, + "eval_tts_loss": 7.485888761083619, + "step": 477000 + }, + { + "epoch": 4.106835399449036, + "grad_norm": 6.90625, + "learning_rate": 1.5389699517706602e-05, + "loss": 4.4078, + "step": 477050 + }, + { + "epoch": 4.107265840220386, + "grad_norm": 3.796875, + "learning_rate": 1.5375262040864323e-05, + "loss": 4.4557, + "step": 477100 + }, + { + "epoch": 4.1076962809917354, + "grad_norm": 2.046875, + "learning_rate": 1.5360830775295744e-05, + "loss": 4.2413, + "step": 477150 + }, + { + "epoch": 4.108126721763085, + "grad_norm": 3.359375, + "learning_rate": 1.5346405722060066e-05, + "loss": 4.5365, + "step": 477200 + }, + { + "epoch": 4.108557162534435, + "grad_norm": 4.28125, + "learning_rate": 1.5331986882216077e-05, + "loss": 4.1829, + "step": 477250 + }, + { + "epoch": 4.108987603305785, + "grad_norm": 2.515625, + "learning_rate": 1.5317574256822086e-05, + "loss": 4.3858, + "step": 477300 + }, + { + "epoch": 4.109418044077135, + "grad_norm": 5.78125, + "learning_rate": 1.5303167846935917e-05, + "loss": 4.3307, + "step": 477350 + }, + { + "epoch": 4.109848484848484, + "grad_norm": 3.125, + "learning_rate": 1.528876765361502e-05, + "loss": 4.0777, + "step": 477400 + }, + { + "epoch": 4.110278925619835, + "grad_norm": 3.6875, + "learning_rate": 1.527437367791632e-05, + "loss": 4.2276, + "step": 477450 + }, + { + "epoch": 4.110709366391185, + "grad_norm": 2.96875, + "learning_rate": 1.5259985920896302e-05, + "loss": 4.0667, + "step": 477500 + }, + { + "epoch": 4.111139807162535, + "grad_norm": 3.765625, + "learning_rate": 1.5245604383610989e-05, + "loss": 4.4329, + "step": 477550 + }, + { + "epoch": 4.1115702479338845, + "grad_norm": 2.796875, + "learning_rate": 1.523122906711596e-05, + "loss": 4.1659, + "step": 477600 + }, + { + "epoch": 4.112000688705234, + "grad_norm": 3.046875, + "learning_rate": 1.5216859972466314e-05, + "loss": 4.0445, + "step": 477650 + }, + { + "epoch": 4.112431129476584, + "grad_norm": 3.203125, + "learning_rate": 1.5202497100716784e-05, + "loss": 4.7465, + "step": 477700 + }, + { + "epoch": 4.112861570247934, + "grad_norm": 3.890625, + "learning_rate": 1.5188140452921462e-05, + "loss": 4.2884, + "step": 477750 + }, + { + "epoch": 4.113292011019284, + "grad_norm": 2.140625, + "learning_rate": 1.5173790030134183e-05, + "loss": 4.2021, + "step": 477800 + }, + { + "epoch": 4.1137224517906334, + "grad_norm": 1.9375, + "learning_rate": 1.5159445833408203e-05, + "loss": 4.1972, + "step": 477850 + }, + { + "epoch": 4.114152892561983, + "grad_norm": 1.84375, + "learning_rate": 1.5145107863796337e-05, + "loss": 4.2857, + "step": 477900 + }, + { + "epoch": 4.114583333333333, + "grad_norm": 3.828125, + "learning_rate": 1.5130776122351031e-05, + "loss": 4.6469, + "step": 477950 + }, + { + "epoch": 4.115013774104683, + "grad_norm": 4.03125, + "learning_rate": 1.5116450610124134e-05, + "loss": 4.3185, + "step": 478000 + }, + { + "epoch": 4.115444214876033, + "grad_norm": 1.2734375, + "learning_rate": 1.5102131328167114e-05, + "loss": 4.2037, + "step": 478050 + }, + { + "epoch": 4.115874655647383, + "grad_norm": 3.375, + "learning_rate": 1.5087818277531007e-05, + "loss": 4.3135, + "step": 478100 + }, + { + "epoch": 4.116305096418733, + "grad_norm": 5.21875, + "learning_rate": 1.5073511459266344e-05, + "loss": 4.3542, + "step": 478150 + }, + { + "epoch": 4.116735537190083, + "grad_norm": 4.71875, + "learning_rate": 1.5059210874423213e-05, + "loss": 4.2297, + "step": 478200 + }, + { + "epoch": 4.117165977961433, + "grad_norm": 3.46875, + "learning_rate": 1.5044916524051256e-05, + "loss": 4.3947, + "step": 478250 + }, + { + "epoch": 4.1175964187327825, + "grad_norm": 5.0, + "learning_rate": 1.5030628409199643e-05, + "loss": 4.2481, + "step": 478300 + }, + { + "epoch": 4.118026859504132, + "grad_norm": 2.875, + "learning_rate": 1.5016346530917058e-05, + "loss": 4.3567, + "step": 478350 + }, + { + "epoch": 4.118457300275482, + "grad_norm": 2.984375, + "learning_rate": 1.5002070890251819e-05, + "loss": 4.5786, + "step": 478400 + }, + { + "epoch": 4.118887741046832, + "grad_norm": 2.125, + "learning_rate": 1.4987801488251695e-05, + "loss": 4.3133, + "step": 478450 + }, + { + "epoch": 4.119318181818182, + "grad_norm": 2.59375, + "learning_rate": 1.4973538325964042e-05, + "loss": 4.5338, + "step": 478500 + }, + { + "epoch": 4.1197486225895315, + "grad_norm": 3.84375, + "learning_rate": 1.4959281404435732e-05, + "loss": 4.4301, + "step": 478550 + }, + { + "epoch": 4.120179063360881, + "grad_norm": 5.9375, + "learning_rate": 1.4945030724713172e-05, + "loss": 4.2069, + "step": 478600 + }, + { + "epoch": 4.120609504132231, + "grad_norm": 3.34375, + "learning_rate": 1.493078628784238e-05, + "loss": 3.7939, + "step": 478650 + }, + { + "epoch": 4.121039944903581, + "grad_norm": 3.0, + "learning_rate": 1.4916548094868844e-05, + "loss": 4.4275, + "step": 478700 + }, + { + "epoch": 4.1214703856749315, + "grad_norm": 3.828125, + "learning_rate": 1.4902316146837613e-05, + "loss": 4.2769, + "step": 478750 + }, + { + "epoch": 4.121900826446281, + "grad_norm": 3.0, + "learning_rate": 1.4888090444793279e-05, + "loss": 4.2704, + "step": 478800 + }, + { + "epoch": 4.122331267217631, + "grad_norm": 3.640625, + "learning_rate": 1.4873870989779993e-05, + "loss": 4.0391, + "step": 478850 + }, + { + "epoch": 4.122761707988981, + "grad_norm": 2.984375, + "learning_rate": 1.4859657782841396e-05, + "loss": 4.0243, + "step": 478900 + }, + { + "epoch": 4.123192148760331, + "grad_norm": 1.5, + "learning_rate": 1.4845450825020757e-05, + "loss": 4.2848, + "step": 478950 + }, + { + "epoch": 4.1236225895316805, + "grad_norm": 2.046875, + "learning_rate": 1.483125011736084e-05, + "loss": 4.4641, + "step": 479000 + }, + { + "epoch": 4.12405303030303, + "grad_norm": 1.4375, + "learning_rate": 1.4817055660903867e-05, + "loss": 4.1726, + "step": 479050 + }, + { + "epoch": 4.12448347107438, + "grad_norm": 3.40625, + "learning_rate": 1.480286745669176e-05, + "loss": 4.4247, + "step": 479100 + }, + { + "epoch": 4.12491391184573, + "grad_norm": 1.1171875, + "learning_rate": 1.4788685505765875e-05, + "loss": 4.6344, + "step": 479150 + }, + { + "epoch": 4.12534435261708, + "grad_norm": 3.109375, + "learning_rate": 1.4774509809167114e-05, + "loss": 4.4806, + "step": 479200 + }, + { + "epoch": 4.1257747933884295, + "grad_norm": 4.28125, + "learning_rate": 1.4760340367936009e-05, + "loss": 4.2063, + "step": 479250 + }, + { + "epoch": 4.126205234159779, + "grad_norm": 2.046875, + "learning_rate": 1.4746177183112475e-05, + "loss": 4.0203, + "step": 479300 + }, + { + "epoch": 4.12663567493113, + "grad_norm": 3.5, + "learning_rate": 1.4732020255736134e-05, + "loss": 4.6891, + "step": 479350 + }, + { + "epoch": 4.12706611570248, + "grad_norm": 2.921875, + "learning_rate": 1.4717869586846056e-05, + "loss": 4.6682, + "step": 479400 + }, + { + "epoch": 4.1274965564738295, + "grad_norm": 1.8125, + "learning_rate": 1.470372517748082e-05, + "loss": 4.1853, + "step": 479450 + }, + { + "epoch": 4.127926997245179, + "grad_norm": 2.234375, + "learning_rate": 1.4689587028678698e-05, + "loss": 4.5874, + "step": 479500 + }, + { + "epoch": 4.128357438016529, + "grad_norm": 3.078125, + "learning_rate": 1.4675455141477312e-05, + "loss": 4.1844, + "step": 479550 + }, + { + "epoch": 4.128787878787879, + "grad_norm": 5.15625, + "learning_rate": 1.4661329516913913e-05, + "loss": 4.3014, + "step": 479600 + }, + { + "epoch": 4.129218319559229, + "grad_norm": 3.015625, + "learning_rate": 1.4647210156025337e-05, + "loss": 4.2146, + "step": 479650 + }, + { + "epoch": 4.1296487603305785, + "grad_norm": 4.53125, + "learning_rate": 1.463309705984789e-05, + "loss": 4.2865, + "step": 479700 + }, + { + "epoch": 4.130079201101928, + "grad_norm": 3.265625, + "learning_rate": 1.461899022941745e-05, + "loss": 4.5302, + "step": 479750 + }, + { + "epoch": 4.130509641873278, + "grad_norm": 3.4375, + "learning_rate": 1.460488966576944e-05, + "loss": 4.5474, + "step": 479800 + }, + { + "epoch": 4.130940082644628, + "grad_norm": 2.890625, + "learning_rate": 1.459079536993878e-05, + "loss": 4.2235, + "step": 479850 + }, + { + "epoch": 4.131370523415978, + "grad_norm": 2.78125, + "learning_rate": 1.457670734295996e-05, + "loss": 4.3429, + "step": 479900 + }, + { + "epoch": 4.1318009641873275, + "grad_norm": 2.28125, + "learning_rate": 1.4562625585867052e-05, + "loss": 4.317, + "step": 479950 + }, + { + "epoch": 4.132231404958677, + "grad_norm": 2.359375, + "learning_rate": 1.4548550099693592e-05, + "loss": 4.3332, + "step": 480000 + }, + { + "epoch": 4.132231404958677, + "eval_loss": 4.990168571472168, + "eval_runtime": 23.9078, + "eval_samples_per_second": 26.769, + "eval_steps_per_second": 13.385, + "eval_tts_loss": 7.47751308258016, + "step": 480000 + }, + { + "epoch": 4.132661845730028, + "grad_norm": 4.6875, + "learning_rate": 1.4534480885472712e-05, + "loss": 4.2705, + "step": 480050 + }, + { + "epoch": 4.133092286501378, + "grad_norm": 2.015625, + "learning_rate": 1.4520417944237053e-05, + "loss": 4.1762, + "step": 480100 + }, + { + "epoch": 4.1335227272727275, + "grad_norm": 2.15625, + "learning_rate": 1.4506361277018777e-05, + "loss": 4.1225, + "step": 480150 + }, + { + "epoch": 4.133953168044077, + "grad_norm": 3.828125, + "learning_rate": 1.4492310884849657e-05, + "loss": 4.1823, + "step": 480200 + }, + { + "epoch": 4.134383608815427, + "grad_norm": 3.921875, + "learning_rate": 1.4478266768760972e-05, + "loss": 4.3453, + "step": 480250 + }, + { + "epoch": 4.134814049586777, + "grad_norm": 3.53125, + "learning_rate": 1.4464228929783453e-05, + "loss": 4.2912, + "step": 480300 + }, + { + "epoch": 4.135244490358127, + "grad_norm": 3.171875, + "learning_rate": 1.4450197368947516e-05, + "loss": 4.2596, + "step": 480350 + }, + { + "epoch": 4.1356749311294765, + "grad_norm": 2.71875, + "learning_rate": 1.443617208728303e-05, + "loss": 4.5021, + "step": 480400 + }, + { + "epoch": 4.136105371900826, + "grad_norm": 2.296875, + "learning_rate": 1.4422153085819402e-05, + "loss": 4.2144, + "step": 480450 + }, + { + "epoch": 4.136535812672176, + "grad_norm": 2.5625, + "learning_rate": 1.440814036558562e-05, + "loss": 4.2672, + "step": 480500 + }, + { + "epoch": 4.136966253443526, + "grad_norm": 2.109375, + "learning_rate": 1.439413392761021e-05, + "loss": 4.4934, + "step": 480550 + }, + { + "epoch": 4.137396694214876, + "grad_norm": 2.5, + "learning_rate": 1.4380133772921134e-05, + "loss": 4.3124, + "step": 480600 + }, + { + "epoch": 4.137827134986226, + "grad_norm": 4.125, + "learning_rate": 1.4366139902546061e-05, + "loss": 4.3709, + "step": 480650 + }, + { + "epoch": 4.138257575757576, + "grad_norm": 3.0, + "learning_rate": 1.4352152317512057e-05, + "loss": 4.1758, + "step": 480700 + }, + { + "epoch": 4.138688016528926, + "grad_norm": 1.6171875, + "learning_rate": 1.433817101884578e-05, + "loss": 4.1738, + "step": 480750 + }, + { + "epoch": 4.139118457300276, + "grad_norm": 6.28125, + "learning_rate": 1.4324196007573487e-05, + "loss": 4.467, + "step": 480800 + }, + { + "epoch": 4.1395488980716255, + "grad_norm": 4.28125, + "learning_rate": 1.4310227284720823e-05, + "loss": 4.6783, + "step": 480850 + }, + { + "epoch": 4.139979338842975, + "grad_norm": 2.65625, + "learning_rate": 1.4296264851313146e-05, + "loss": 4.1553, + "step": 480900 + }, + { + "epoch": 4.140409779614325, + "grad_norm": 2.921875, + "learning_rate": 1.4282308708375225e-05, + "loss": 4.4313, + "step": 480950 + }, + { + "epoch": 4.140840220385675, + "grad_norm": 4.8125, + "learning_rate": 1.4268358856931407e-05, + "loss": 4.198, + "step": 481000 + }, + { + "epoch": 4.141270661157025, + "grad_norm": 2.765625, + "learning_rate": 1.4254415298005608e-05, + "loss": 4.0836, + "step": 481050 + }, + { + "epoch": 4.1417011019283745, + "grad_norm": 2.796875, + "learning_rate": 1.424047803262123e-05, + "loss": 3.8872, + "step": 481100 + }, + { + "epoch": 4.142131542699724, + "grad_norm": 3.78125, + "learning_rate": 1.4226547061801232e-05, + "loss": 4.2321, + "step": 481150 + }, + { + "epoch": 4.142561983471074, + "grad_norm": 2.953125, + "learning_rate": 1.4212622386568153e-05, + "loss": 4.5142, + "step": 481200 + }, + { + "epoch": 4.142992424242424, + "grad_norm": 2.515625, + "learning_rate": 1.419870400794402e-05, + "loss": 4.2966, + "step": 481250 + }, + { + "epoch": 4.143422865013774, + "grad_norm": 3.484375, + "learning_rate": 1.4184791926950403e-05, + "loss": 4.3673, + "step": 481300 + }, + { + "epoch": 4.143853305785124, + "grad_norm": 1.125, + "learning_rate": 1.417088614460842e-05, + "loss": 4.1101, + "step": 481350 + }, + { + "epoch": 4.144283746556474, + "grad_norm": 3.75, + "learning_rate": 1.415698666193872e-05, + "loss": 4.4379, + "step": 481400 + }, + { + "epoch": 4.144714187327824, + "grad_norm": 5.03125, + "learning_rate": 1.41430934799615e-05, + "loss": 4.2962, + "step": 481450 + }, + { + "epoch": 4.145144628099174, + "grad_norm": 3.078125, + "learning_rate": 1.41292065996965e-05, + "loss": 4.308, + "step": 481500 + }, + { + "epoch": 4.1455750688705235, + "grad_norm": 4.09375, + "learning_rate": 1.411532602216299e-05, + "loss": 4.4051, + "step": 481550 + }, + { + "epoch": 4.146005509641873, + "grad_norm": 3.109375, + "learning_rate": 1.4101451748379757e-05, + "loss": 4.8246, + "step": 481600 + }, + { + "epoch": 4.146435950413223, + "grad_norm": 4.21875, + "learning_rate": 1.4087583779365143e-05, + "loss": 4.2333, + "step": 481650 + }, + { + "epoch": 4.146866391184573, + "grad_norm": 2.6875, + "learning_rate": 1.4073722116137022e-05, + "loss": 4.2659, + "step": 481700 + }, + { + "epoch": 4.147296831955923, + "grad_norm": 2.15625, + "learning_rate": 1.4059866759712847e-05, + "loss": 4.1725, + "step": 481750 + }, + { + "epoch": 4.1477272727272725, + "grad_norm": 2.390625, + "learning_rate": 1.404601771110956e-05, + "loss": 3.9544, + "step": 481800 + }, + { + "epoch": 4.148157713498622, + "grad_norm": 3.5, + "learning_rate": 1.4032174971343604e-05, + "loss": 4.4572, + "step": 481850 + }, + { + "epoch": 4.148588154269972, + "grad_norm": 4.9375, + "learning_rate": 1.4018338541431064e-05, + "loss": 4.2602, + "step": 481900 + }, + { + "epoch": 4.149018595041323, + "grad_norm": 4.03125, + "learning_rate": 1.4004508422387475e-05, + "loss": 4.3454, + "step": 481950 + }, + { + "epoch": 4.1494490358126725, + "grad_norm": 4.125, + "learning_rate": 1.3990684615227912e-05, + "loss": 4.4533, + "step": 482000 + }, + { + "epoch": 4.149879476584022, + "grad_norm": 2.703125, + "learning_rate": 1.3976867120967108e-05, + "loss": 4.576, + "step": 482050 + }, + { + "epoch": 4.150309917355372, + "grad_norm": 3.765625, + "learning_rate": 1.3963055940619141e-05, + "loss": 4.0701, + "step": 482100 + }, + { + "epoch": 4.150740358126722, + "grad_norm": 3.203125, + "learning_rate": 1.3949251075197733e-05, + "loss": 4.2049, + "step": 482150 + }, + { + "epoch": 4.151170798898072, + "grad_norm": 4.46875, + "learning_rate": 1.3935452525716175e-05, + "loss": 4.3496, + "step": 482200 + }, + { + "epoch": 4.1516012396694215, + "grad_norm": 1.640625, + "learning_rate": 1.3921660293187222e-05, + "loss": 4.5934, + "step": 482250 + }, + { + "epoch": 4.152031680440771, + "grad_norm": 3.21875, + "learning_rate": 1.3907874378623187e-05, + "loss": 4.7418, + "step": 482300 + }, + { + "epoch": 4.152462121212121, + "grad_norm": 2.328125, + "learning_rate": 1.3894094783035993e-05, + "loss": 3.9849, + "step": 482350 + }, + { + "epoch": 4.152892561983471, + "grad_norm": 2.640625, + "learning_rate": 1.3880321507436923e-05, + "loss": 4.3936, + "step": 482400 + }, + { + "epoch": 4.153323002754821, + "grad_norm": 2.15625, + "learning_rate": 1.3866554552836986e-05, + "loss": 3.9804, + "step": 482450 + }, + { + "epoch": 4.1537534435261705, + "grad_norm": 1.453125, + "learning_rate": 1.3852793920246631e-05, + "loss": 4.3132, + "step": 482500 + }, + { + "epoch": 4.15418388429752, + "grad_norm": 3.1875, + "learning_rate": 1.3839039610675853e-05, + "loss": 4.3411, + "step": 482550 + }, + { + "epoch": 4.15461432506887, + "grad_norm": 2.53125, + "learning_rate": 1.3825291625134184e-05, + "loss": 4.0502, + "step": 482600 + }, + { + "epoch": 4.155044765840221, + "grad_norm": 3.40625, + "learning_rate": 1.38115499646307e-05, + "loss": 4.3411, + "step": 482650 + }, + { + "epoch": 4.1554752066115705, + "grad_norm": 5.65625, + "learning_rate": 1.3797814630173978e-05, + "loss": 4.5538, + "step": 482700 + }, + { + "epoch": 4.15590564738292, + "grad_norm": 3.328125, + "learning_rate": 1.3784085622772224e-05, + "loss": 4.63, + "step": 482750 + }, + { + "epoch": 4.15633608815427, + "grad_norm": 3.578125, + "learning_rate": 1.377036294343309e-05, + "loss": 4.3575, + "step": 482800 + }, + { + "epoch": 4.15676652892562, + "grad_norm": 3.90625, + "learning_rate": 1.375664659316378e-05, + "loss": 4.3019, + "step": 482850 + }, + { + "epoch": 4.15719696969697, + "grad_norm": 2.40625, + "learning_rate": 1.3742936572971065e-05, + "loss": 4.2185, + "step": 482900 + }, + { + "epoch": 4.1576274104683195, + "grad_norm": 2.1875, + "learning_rate": 1.372923288386121e-05, + "loss": 4.8507, + "step": 482950 + }, + { + "epoch": 4.158057851239669, + "grad_norm": 2.703125, + "learning_rate": 1.371553552684003e-05, + "loss": 4.0664, + "step": 483000 + }, + { + "epoch": 4.158057851239669, + "eval_loss": 4.990303993225098, + "eval_runtime": 24.971, + "eval_samples_per_second": 25.63, + "eval_steps_per_second": 12.815, + "eval_tts_loss": 7.480562996970848, + "step": 483000 + }, + { + "epoch": 4.158488292011019, + "grad_norm": 3.828125, + "learning_rate": 1.3701844502912909e-05, + "loss": 4.4465, + "step": 483050 + }, + { + "epoch": 4.158918732782369, + "grad_norm": 3.828125, + "learning_rate": 1.3688159813084722e-05, + "loss": 4.5435, + "step": 483100 + }, + { + "epoch": 4.159349173553719, + "grad_norm": 3.359375, + "learning_rate": 1.3674481458359911e-05, + "loss": 4.33, + "step": 483150 + }, + { + "epoch": 4.1597796143250685, + "grad_norm": 2.8125, + "learning_rate": 1.3660809439742428e-05, + "loss": 3.9643, + "step": 483200 + }, + { + "epoch": 4.160210055096419, + "grad_norm": 3.359375, + "learning_rate": 1.3647143758235759e-05, + "loss": 4.0003, + "step": 483250 + }, + { + "epoch": 4.160640495867769, + "grad_norm": 2.9375, + "learning_rate": 1.3633484414842935e-05, + "loss": 4.4465, + "step": 483300 + }, + { + "epoch": 4.161070936639119, + "grad_norm": 1.6171875, + "learning_rate": 1.3619831410566564e-05, + "loss": 3.862, + "step": 483350 + }, + { + "epoch": 4.1615013774104685, + "grad_norm": 3.265625, + "learning_rate": 1.3606184746408679e-05, + "loss": 4.0066, + "step": 483400 + }, + { + "epoch": 4.161931818181818, + "grad_norm": 5.59375, + "learning_rate": 1.3592544423370978e-05, + "loss": 4.3242, + "step": 483450 + }, + { + "epoch": 4.162362258953168, + "grad_norm": 2.8125, + "learning_rate": 1.3578910442454596e-05, + "loss": 4.3814, + "step": 483500 + }, + { + "epoch": 4.162792699724518, + "grad_norm": 3.203125, + "learning_rate": 1.3565282804660229e-05, + "loss": 4.3254, + "step": 483550 + }, + { + "epoch": 4.163223140495868, + "grad_norm": 3.3125, + "learning_rate": 1.355166151098819e-05, + "loss": 4.5882, + "step": 483600 + }, + { + "epoch": 4.1636535812672175, + "grad_norm": 2.703125, + "learning_rate": 1.3538046562438167e-05, + "loss": 4.061, + "step": 483650 + }, + { + "epoch": 4.164084022038567, + "grad_norm": 2.828125, + "learning_rate": 1.3524437960009473e-05, + "loss": 4.3966, + "step": 483700 + }, + { + "epoch": 4.164514462809917, + "grad_norm": 1.75, + "learning_rate": 1.3510835704700997e-05, + "loss": 4.697, + "step": 483750 + }, + { + "epoch": 4.164944903581267, + "grad_norm": 3.234375, + "learning_rate": 1.3497239797511097e-05, + "loss": 3.862, + "step": 483800 + }, + { + "epoch": 4.165375344352617, + "grad_norm": 3.859375, + "learning_rate": 1.3483650239437684e-05, + "loss": 4.3522, + "step": 483850 + }, + { + "epoch": 4.1658057851239665, + "grad_norm": 3.640625, + "learning_rate": 1.3470067031478195e-05, + "loss": 4.2348, + "step": 483900 + }, + { + "epoch": 4.166236225895317, + "grad_norm": 4.09375, + "learning_rate": 1.34564901746296e-05, + "loss": 4.3267, + "step": 483950 + }, + { + "epoch": 4.166666666666667, + "grad_norm": 2.796875, + "learning_rate": 1.3442919669888432e-05, + "loss": 3.9724, + "step": 484000 + }, + { + "epoch": 4.167097107438017, + "grad_norm": 4.03125, + "learning_rate": 1.342935551825073e-05, + "loss": 3.7752, + "step": 484050 + }, + { + "epoch": 4.1675275482093666, + "grad_norm": 3.109375, + "learning_rate": 1.3415797720712076e-05, + "loss": 4.7967, + "step": 484100 + }, + { + "epoch": 4.167957988980716, + "grad_norm": 7.0, + "learning_rate": 1.3402246278267571e-05, + "loss": 4.2417, + "step": 484150 + }, + { + "epoch": 4.168388429752066, + "grad_norm": 4.6875, + "learning_rate": 1.338870119191188e-05, + "loss": 4.252, + "step": 484200 + }, + { + "epoch": 4.168818870523416, + "grad_norm": 2.046875, + "learning_rate": 1.3375162462639146e-05, + "loss": 4.4715, + "step": 484250 + }, + { + "epoch": 4.169249311294766, + "grad_norm": 2.90625, + "learning_rate": 1.3361630091443122e-05, + "loss": 4.1569, + "step": 484300 + }, + { + "epoch": 4.1696797520661155, + "grad_norm": 3.4375, + "learning_rate": 1.3348104079317059e-05, + "loss": 4.0755, + "step": 484350 + }, + { + "epoch": 4.170110192837465, + "grad_norm": 4.28125, + "learning_rate": 1.3334584427253704e-05, + "loss": 4.3337, + "step": 484400 + }, + { + "epoch": 4.170540633608815, + "grad_norm": 1.8671875, + "learning_rate": 1.33210711362454e-05, + "loss": 4.2487, + "step": 484450 + }, + { + "epoch": 4.170971074380165, + "grad_norm": 3.171875, + "learning_rate": 1.3307564207283974e-05, + "loss": 4.3619, + "step": 484500 + }, + { + "epoch": 4.171401515151516, + "grad_norm": 2.84375, + "learning_rate": 1.3294063641360787e-05, + "loss": 4.2338, + "step": 484550 + }, + { + "epoch": 4.171831955922865, + "grad_norm": 3.578125, + "learning_rate": 1.3280569439466795e-05, + "loss": 4.2768, + "step": 484600 + }, + { + "epoch": 4.172262396694215, + "grad_norm": 4.65625, + "learning_rate": 1.3267081602592436e-05, + "loss": 4.048, + "step": 484650 + }, + { + "epoch": 4.172692837465565, + "grad_norm": 3.8125, + "learning_rate": 1.3253600131727672e-05, + "loss": 4.5198, + "step": 484700 + }, + { + "epoch": 4.173123278236915, + "grad_norm": 3.65625, + "learning_rate": 1.3240125027862016e-05, + "loss": 4.1501, + "step": 484750 + }, + { + "epoch": 4.1735537190082646, + "grad_norm": 5.3125, + "learning_rate": 1.3226656291984518e-05, + "loss": 4.1201, + "step": 484800 + }, + { + "epoch": 4.173984159779614, + "grad_norm": 3.75, + "learning_rate": 1.3213193925083721e-05, + "loss": 4.5307, + "step": 484850 + }, + { + "epoch": 4.174414600550964, + "grad_norm": 4.3125, + "learning_rate": 1.3199737928147815e-05, + "loss": 4.5901, + "step": 484900 + }, + { + "epoch": 4.174845041322314, + "grad_norm": 3.4375, + "learning_rate": 1.318628830216434e-05, + "loss": 4.4692, + "step": 484950 + }, + { + "epoch": 4.175275482093664, + "grad_norm": 1.78125, + "learning_rate": 1.3172845048120542e-05, + "loss": 4.0747, + "step": 485000 + }, + { + "epoch": 4.1757059228650135, + "grad_norm": 3.046875, + "learning_rate": 1.3159408167003106e-05, + "loss": 4.7405, + "step": 485050 + }, + { + "epoch": 4.176136363636363, + "grad_norm": 1.7421875, + "learning_rate": 1.3145977659798247e-05, + "loss": 4.0364, + "step": 485100 + }, + { + "epoch": 4.176566804407713, + "grad_norm": 2.078125, + "learning_rate": 1.3132553527491797e-05, + "loss": 4.2683, + "step": 485150 + }, + { + "epoch": 4.176997245179064, + "grad_norm": 3.28125, + "learning_rate": 1.3119135771069002e-05, + "loss": 4.0564, + "step": 485200 + }, + { + "epoch": 4.177427685950414, + "grad_norm": 1.2265625, + "learning_rate": 1.3105724391514696e-05, + "loss": 4.5556, + "step": 485250 + }, + { + "epoch": 4.177858126721763, + "grad_norm": 2.453125, + "learning_rate": 1.3092319389813268e-05, + "loss": 4.1933, + "step": 485300 + }, + { + "epoch": 4.178288567493113, + "grad_norm": 3.84375, + "learning_rate": 1.307892076694862e-05, + "loss": 4.4607, + "step": 485350 + }, + { + "epoch": 4.178719008264463, + "grad_norm": 2.9375, + "learning_rate": 1.306552852390417e-05, + "loss": 4.5075, + "step": 485400 + }, + { + "epoch": 4.179149449035813, + "grad_norm": 2.6875, + "learning_rate": 1.305214266166288e-05, + "loss": 3.9504, + "step": 485450 + }, + { + "epoch": 4.1795798898071626, + "grad_norm": 3.875, + "learning_rate": 1.3038763181207248e-05, + "loss": 4.7029, + "step": 485500 + }, + { + "epoch": 4.180010330578512, + "grad_norm": 3.046875, + "learning_rate": 1.3025390083519273e-05, + "loss": 4.0459, + "step": 485550 + }, + { + "epoch": 4.180440771349862, + "grad_norm": 7.59375, + "learning_rate": 1.3012023369580551e-05, + "loss": 4.2071, + "step": 485600 + }, + { + "epoch": 4.180871212121212, + "grad_norm": 3.9375, + "learning_rate": 1.299866304037216e-05, + "loss": 4.1605, + "step": 485650 + }, + { + "epoch": 4.181301652892562, + "grad_norm": 4.65625, + "learning_rate": 1.298530909687472e-05, + "loss": 4.1318, + "step": 485700 + }, + { + "epoch": 4.1817320936639115, + "grad_norm": 4.4375, + "learning_rate": 1.2971961540068367e-05, + "loss": 4.0893, + "step": 485750 + }, + { + "epoch": 4.182162534435261, + "grad_norm": 3.1875, + "learning_rate": 1.2958620370932773e-05, + "loss": 4.3281, + "step": 485800 + }, + { + "epoch": 4.182592975206612, + "grad_norm": 2.953125, + "learning_rate": 1.2945285590447188e-05, + "loss": 4.2348, + "step": 485850 + }, + { + "epoch": 4.183023415977962, + "grad_norm": 2.859375, + "learning_rate": 1.2931957199590361e-05, + "loss": 4.2268, + "step": 485900 + }, + { + "epoch": 4.183453856749312, + "grad_norm": 5.09375, + "learning_rate": 1.2918635199340501e-05, + "loss": 4.1926, + "step": 485950 + }, + { + "epoch": 4.183884297520661, + "grad_norm": 3.203125, + "learning_rate": 1.290531959067548e-05, + "loss": 4.3034, + "step": 486000 + }, + { + "epoch": 4.183884297520661, + "eval_loss": 4.99005651473999, + "eval_runtime": 23.9809, + "eval_samples_per_second": 26.688, + "eval_steps_per_second": 13.344, + "eval_tts_loss": 7.475855944563067, + "step": 486000 + }, + { + "epoch": 4.184314738292011, + "grad_norm": 4.0, + "learning_rate": 1.2892010374572616e-05, + "loss": 4.6245, + "step": 486050 + }, + { + "epoch": 4.184745179063361, + "grad_norm": 1.953125, + "learning_rate": 1.287870755200874e-05, + "loss": 4.6211, + "step": 486100 + }, + { + "epoch": 4.185175619834711, + "grad_norm": 3.734375, + "learning_rate": 1.2865411123960313e-05, + "loss": 4.3865, + "step": 486150 + }, + { + "epoch": 4.1856060606060606, + "grad_norm": 3.46875, + "learning_rate": 1.2852121091403235e-05, + "loss": 4.2504, + "step": 486200 + }, + { + "epoch": 4.18603650137741, + "grad_norm": 3.609375, + "learning_rate": 1.2838837455312958e-05, + "loss": 4.6313, + "step": 486250 + }, + { + "epoch": 4.18646694214876, + "grad_norm": 5.4375, + "learning_rate": 1.2825560216664489e-05, + "loss": 4.4578, + "step": 486300 + }, + { + "epoch": 4.18689738292011, + "grad_norm": 1.9609375, + "learning_rate": 1.2812289376432329e-05, + "loss": 4.2719, + "step": 486350 + }, + { + "epoch": 4.18732782369146, + "grad_norm": 3.609375, + "learning_rate": 1.2799024935590531e-05, + "loss": 4.4011, + "step": 486400 + }, + { + "epoch": 4.1877582644628095, + "grad_norm": 3.53125, + "learning_rate": 1.2785766895112717e-05, + "loss": 4.1507, + "step": 486450 + }, + { + "epoch": 4.18818870523416, + "grad_norm": 3.515625, + "learning_rate": 1.2772515255971928e-05, + "loss": 4.0321, + "step": 486500 + }, + { + "epoch": 4.18861914600551, + "grad_norm": 2.8125, + "learning_rate": 1.2759270019140867e-05, + "loss": 4.1574, + "step": 486550 + }, + { + "epoch": 4.18904958677686, + "grad_norm": 2.3125, + "learning_rate": 1.2746031185591677e-05, + "loss": 4.403, + "step": 486600 + }, + { + "epoch": 4.18948002754821, + "grad_norm": 1.578125, + "learning_rate": 1.273279875629605e-05, + "loss": 4.1491, + "step": 486650 + }, + { + "epoch": 4.189910468319559, + "grad_norm": 3.125, + "learning_rate": 1.2719572732225283e-05, + "loss": 4.235, + "step": 486700 + }, + { + "epoch": 4.190340909090909, + "grad_norm": 3.28125, + "learning_rate": 1.2706353114350055e-05, + "loss": 4.3334, + "step": 486750 + }, + { + "epoch": 4.190771349862259, + "grad_norm": 3.8125, + "learning_rate": 1.2693139903640683e-05, + "loss": 4.5982, + "step": 486800 + }, + { + "epoch": 4.191201790633609, + "grad_norm": 5.3125, + "learning_rate": 1.267993310106701e-05, + "loss": 4.2806, + "step": 486850 + }, + { + "epoch": 4.1916322314049586, + "grad_norm": 3.453125, + "learning_rate": 1.2666732707598372e-05, + "loss": 4.1032, + "step": 486900 + }, + { + "epoch": 4.192062672176308, + "grad_norm": 4.78125, + "learning_rate": 1.265353872420365e-05, + "loss": 4.1981, + "step": 486950 + }, + { + "epoch": 4.192493112947658, + "grad_norm": 3.25, + "learning_rate": 1.264035115185126e-05, + "loss": 4.3563, + "step": 487000 + }, + { + "epoch": 4.192923553719008, + "grad_norm": 2.703125, + "learning_rate": 1.2627169991509136e-05, + "loss": 4.1019, + "step": 487050 + }, + { + "epoch": 4.193353994490358, + "grad_norm": 0.92578125, + "learning_rate": 1.2613995244144727e-05, + "loss": 4.1691, + "step": 487100 + }, + { + "epoch": 4.193784435261708, + "grad_norm": 3.40625, + "learning_rate": 1.2600826910725072e-05, + "loss": 4.3536, + "step": 487150 + }, + { + "epoch": 4.194214876033058, + "grad_norm": 3.125, + "learning_rate": 1.2587664992216674e-05, + "loss": 4.2473, + "step": 487200 + }, + { + "epoch": 4.194645316804408, + "grad_norm": 3.203125, + "learning_rate": 1.2574509489585595e-05, + "loss": 4.6249, + "step": 487250 + }, + { + "epoch": 4.195075757575758, + "grad_norm": 2.890625, + "learning_rate": 1.2561360403797418e-05, + "loss": 4.3506, + "step": 487300 + }, + { + "epoch": 4.195506198347108, + "grad_norm": 2.21875, + "learning_rate": 1.2548217735817236e-05, + "loss": 4.4289, + "step": 487350 + }, + { + "epoch": 4.195936639118457, + "grad_norm": 3.40625, + "learning_rate": 1.2535081486609735e-05, + "loss": 4.3421, + "step": 487400 + }, + { + "epoch": 4.196367079889807, + "grad_norm": 2.921875, + "learning_rate": 1.2521951657139086e-05, + "loss": 4.3659, + "step": 487450 + }, + { + "epoch": 4.196797520661157, + "grad_norm": 3.53125, + "learning_rate": 1.2508828248368942e-05, + "loss": 4.1716, + "step": 487500 + }, + { + "epoch": 4.197227961432507, + "grad_norm": 3.03125, + "learning_rate": 1.2495711261262566e-05, + "loss": 4.5417, + "step": 487550 + }, + { + "epoch": 4.1976584022038566, + "grad_norm": 2.796875, + "learning_rate": 1.2482600696782731e-05, + "loss": 4.2121, + "step": 487600 + }, + { + "epoch": 4.198088842975206, + "grad_norm": 2.671875, + "learning_rate": 1.2469496555891669e-05, + "loss": 4.2866, + "step": 487650 + }, + { + "epoch": 4.198519283746556, + "grad_norm": 2.96875, + "learning_rate": 1.2456398839551286e-05, + "loss": 3.9799, + "step": 487700 + }, + { + "epoch": 4.198949724517906, + "grad_norm": 4.125, + "learning_rate": 1.2443307548722848e-05, + "loss": 4.2645, + "step": 487750 + }, + { + "epoch": 4.199380165289257, + "grad_norm": 4.8125, + "learning_rate": 1.2430222684367233e-05, + "loss": 4.319, + "step": 487800 + }, + { + "epoch": 4.199810606060606, + "grad_norm": 4.40625, + "learning_rate": 1.2417144247444879e-05, + "loss": 4.1868, + "step": 487850 + }, + { + "epoch": 4.200241046831956, + "grad_norm": 3.296875, + "learning_rate": 1.240407223891571e-05, + "loss": 4.6528, + "step": 487900 + }, + { + "epoch": 4.200671487603306, + "grad_norm": 3.859375, + "learning_rate": 1.2391006659739135e-05, + "loss": 4.1551, + "step": 487950 + }, + { + "epoch": 4.201101928374656, + "grad_norm": 3.296875, + "learning_rate": 1.2377947510874233e-05, + "loss": 3.7787, + "step": 488000 + }, + { + "epoch": 4.201532369146006, + "grad_norm": 3.3125, + "learning_rate": 1.2364894793279402e-05, + "loss": 4.6407, + "step": 488050 + }, + { + "epoch": 4.201962809917355, + "grad_norm": 6.21875, + "learning_rate": 1.2351848507912767e-05, + "loss": 4.4595, + "step": 488100 + }, + { + "epoch": 4.202393250688705, + "grad_norm": 3.078125, + "learning_rate": 1.2338808655731882e-05, + "loss": 4.3777, + "step": 488150 + }, + { + "epoch": 4.202823691460055, + "grad_norm": 3.828125, + "learning_rate": 1.2325775237693804e-05, + "loss": 3.9588, + "step": 488200 + }, + { + "epoch": 4.203254132231405, + "grad_norm": 4.40625, + "learning_rate": 1.2312748254755225e-05, + "loss": 3.8889, + "step": 488250 + }, + { + "epoch": 4.2036845730027546, + "grad_norm": 4.625, + "learning_rate": 1.2299727707872243e-05, + "loss": 4.2328, + "step": 488300 + }, + { + "epoch": 4.204115013774104, + "grad_norm": 3.46875, + "learning_rate": 1.2286713598000544e-05, + "loss": 4.4372, + "step": 488350 + }, + { + "epoch": 4.204545454545454, + "grad_norm": 3.34375, + "learning_rate": 1.2273705926095358e-05, + "loss": 4.3798, + "step": 488400 + }, + { + "epoch": 4.204975895316805, + "grad_norm": 7.90625, + "learning_rate": 1.2260704693111413e-05, + "loss": 4.2295, + "step": 488450 + }, + { + "epoch": 4.205406336088155, + "grad_norm": 2.28125, + "learning_rate": 1.2247709900002957e-05, + "loss": 3.9938, + "step": 488500 + }, + { + "epoch": 4.205836776859504, + "grad_norm": 2.28125, + "learning_rate": 1.2234721547723803e-05, + "loss": 4.1058, + "step": 488550 + }, + { + "epoch": 4.206267217630854, + "grad_norm": 1.53125, + "learning_rate": 1.2221739637227247e-05, + "loss": 4.3419, + "step": 488600 + }, + { + "epoch": 4.206697658402204, + "grad_norm": 2.46875, + "learning_rate": 1.2208764169466125e-05, + "loss": 4.1758, + "step": 488650 + }, + { + "epoch": 4.207128099173554, + "grad_norm": 2.953125, + "learning_rate": 1.2195795145392852e-05, + "loss": 4.3812, + "step": 488700 + }, + { + "epoch": 4.207558539944904, + "grad_norm": 3.984375, + "learning_rate": 1.218283256595929e-05, + "loss": 4.1588, + "step": 488750 + }, + { + "epoch": 4.207988980716253, + "grad_norm": 3.0625, + "learning_rate": 1.2169876432116878e-05, + "loss": 4.2296, + "step": 488800 + }, + { + "epoch": 4.208419421487603, + "grad_norm": 4.1875, + "learning_rate": 1.2156926744816565e-05, + "loss": 4.2307, + "step": 488850 + }, + { + "epoch": 4.208849862258953, + "grad_norm": 2.84375, + "learning_rate": 1.2143983505008815e-05, + "loss": 4.2124, + "step": 488900 + }, + { + "epoch": 4.209280303030303, + "grad_norm": 3.328125, + "learning_rate": 1.2131046713643656e-05, + "loss": 4.29, + "step": 488950 + }, + { + "epoch": 4.2097107438016526, + "grad_norm": 8.125, + "learning_rate": 1.211811637167064e-05, + "loss": 3.9951, + "step": 489000 + }, + { + "epoch": 4.2097107438016526, + "eval_loss": 4.99033260345459, + "eval_runtime": 22.4777, + "eval_samples_per_second": 28.473, + "eval_steps_per_second": 14.236, + "eval_tts_loss": 7.48369155709714, + "step": 489000 + }, + { + "epoch": 4.210141184573002, + "grad_norm": 3.03125, + "learning_rate": 1.2105192480038762e-05, + "loss": 4.3183, + "step": 489050 + }, + { + "epoch": 4.210571625344353, + "grad_norm": 3.75, + "learning_rate": 1.2092275039696666e-05, + "loss": 4.1129, + "step": 489100 + }, + { + "epoch": 4.211002066115703, + "grad_norm": 3.34375, + "learning_rate": 1.2079364051592446e-05, + "loss": 4.1003, + "step": 489150 + }, + { + "epoch": 4.211432506887053, + "grad_norm": 4.21875, + "learning_rate": 1.2066459516673711e-05, + "loss": 4.3532, + "step": 489200 + }, + { + "epoch": 4.211862947658402, + "grad_norm": 4.65625, + "learning_rate": 1.2053561435887694e-05, + "loss": 4.6279, + "step": 489250 + }, + { + "epoch": 4.212293388429752, + "grad_norm": 2.765625, + "learning_rate": 1.2040669810181027e-05, + "loss": 4.5556, + "step": 489300 + }, + { + "epoch": 4.212723829201102, + "grad_norm": 3.609375, + "learning_rate": 1.2027784640499928e-05, + "loss": 4.3745, + "step": 489350 + }, + { + "epoch": 4.213154269972452, + "grad_norm": 2.625, + "learning_rate": 1.2014905927790176e-05, + "loss": 4.2876, + "step": 489400 + }, + { + "epoch": 4.213584710743802, + "grad_norm": 1.609375, + "learning_rate": 1.2002033672997026e-05, + "loss": 3.9901, + "step": 489450 + }, + { + "epoch": 4.214015151515151, + "grad_norm": 4.03125, + "learning_rate": 1.1989167877065276e-05, + "loss": 4.4062, + "step": 489500 + }, + { + "epoch": 4.214445592286501, + "grad_norm": 3.59375, + "learning_rate": 1.1976308540939241e-05, + "loss": 3.9376, + "step": 489550 + }, + { + "epoch": 4.214876033057851, + "grad_norm": 2.625, + "learning_rate": 1.196345566556275e-05, + "loss": 4.2519, + "step": 489600 + }, + { + "epoch": 4.215306473829201, + "grad_norm": 4.0, + "learning_rate": 1.195060925187922e-05, + "loss": 4.4874, + "step": 489650 + }, + { + "epoch": 4.2157369146005506, + "grad_norm": 4.28125, + "learning_rate": 1.1937769300831525e-05, + "loss": 4.5443, + "step": 489700 + }, + { + "epoch": 4.216167355371901, + "grad_norm": 5.71875, + "learning_rate": 1.1924935813362092e-05, + "loss": 4.2705, + "step": 489750 + }, + { + "epoch": 4.216597796143251, + "grad_norm": 1.671875, + "learning_rate": 1.191210879041288e-05, + "loss": 4.2869, + "step": 489800 + }, + { + "epoch": 4.217028236914601, + "grad_norm": 2.953125, + "learning_rate": 1.1899288232925343e-05, + "loss": 4.2483, + "step": 489850 + }, + { + "epoch": 4.217458677685951, + "grad_norm": 5.1875, + "learning_rate": 1.1886474141840486e-05, + "loss": 4.6351, + "step": 489900 + }, + { + "epoch": 4.2178891184573, + "grad_norm": 3.140625, + "learning_rate": 1.1873666518098857e-05, + "loss": 4.4643, + "step": 489950 + }, + { + "epoch": 4.21831955922865, + "grad_norm": 4.34375, + "learning_rate": 1.1860865362640505e-05, + "loss": 4.3536, + "step": 490000 + }, + { + "epoch": 4.21875, + "grad_norm": 2.890625, + "learning_rate": 1.1848070676404998e-05, + "loss": 4.6972, + "step": 490050 + }, + { + "epoch": 4.21918044077135, + "grad_norm": 2.84375, + "learning_rate": 1.1835282460331443e-05, + "loss": 4.7093, + "step": 490100 + }, + { + "epoch": 4.2196108815427, + "grad_norm": 3.15625, + "learning_rate": 1.1822500715358464e-05, + "loss": 4.4186, + "step": 490150 + }, + { + "epoch": 4.220041322314049, + "grad_norm": 2.734375, + "learning_rate": 1.180972544242418e-05, + "loss": 4.4695, + "step": 490200 + }, + { + "epoch": 4.220471763085399, + "grad_norm": 6.8125, + "learning_rate": 1.1796956642466339e-05, + "loss": 4.7324, + "step": 490250 + }, + { + "epoch": 4.220902203856749, + "grad_norm": 2.828125, + "learning_rate": 1.1784194316422092e-05, + "loss": 3.6999, + "step": 490300 + }, + { + "epoch": 4.221332644628099, + "grad_norm": 3.0625, + "learning_rate": 1.177143846522819e-05, + "loss": 4.292, + "step": 490350 + }, + { + "epoch": 4.2217630853994494, + "grad_norm": 5.5, + "learning_rate": 1.1758689089820873e-05, + "loss": 4.0894, + "step": 490400 + }, + { + "epoch": 4.222193526170799, + "grad_norm": 3.875, + "learning_rate": 1.1745946191135903e-05, + "loss": 4.2006, + "step": 490450 + }, + { + "epoch": 4.222623966942149, + "grad_norm": 2.921875, + "learning_rate": 1.1733209770108621e-05, + "loss": 4.4199, + "step": 490500 + }, + { + "epoch": 4.223054407713499, + "grad_norm": 3.328125, + "learning_rate": 1.1720479827673847e-05, + "loss": 4.5137, + "step": 490550 + }, + { + "epoch": 4.223484848484849, + "grad_norm": 3.0625, + "learning_rate": 1.1707756364765877e-05, + "loss": 4.4945, + "step": 490600 + }, + { + "epoch": 4.223915289256198, + "grad_norm": 2.53125, + "learning_rate": 1.1695039382318652e-05, + "loss": 4.2744, + "step": 490650 + }, + { + "epoch": 4.224345730027548, + "grad_norm": 1.9140625, + "learning_rate": 1.1682328881265536e-05, + "loss": 4.1861, + "step": 490700 + }, + { + "epoch": 4.224776170798898, + "grad_norm": 3.890625, + "learning_rate": 1.166962486253943e-05, + "loss": 4.3473, + "step": 490750 + }, + { + "epoch": 4.225206611570248, + "grad_norm": 3.828125, + "learning_rate": 1.1656927327072864e-05, + "loss": 4.2845, + "step": 490800 + }, + { + "epoch": 4.225637052341598, + "grad_norm": 1.8125, + "learning_rate": 1.1644236275797737e-05, + "loss": 4.7198, + "step": 490850 + }, + { + "epoch": 4.226067493112947, + "grad_norm": 2.0625, + "learning_rate": 1.1631551709645538e-05, + "loss": 4.3826, + "step": 490900 + }, + { + "epoch": 4.226497933884297, + "grad_norm": 3.25, + "learning_rate": 1.1618873629547334e-05, + "loss": 4.6071, + "step": 490950 + }, + { + "epoch": 4.226928374655647, + "grad_norm": 4.53125, + "learning_rate": 1.1606202036433655e-05, + "loss": 4.3886, + "step": 491000 + }, + { + "epoch": 4.227358815426998, + "grad_norm": 4.28125, + "learning_rate": 1.1593536931234561e-05, + "loss": 4.6401, + "step": 491050 + }, + { + "epoch": 4.2277892561983474, + "grad_norm": 7.46875, + "learning_rate": 1.1580878314879639e-05, + "loss": 4.5303, + "step": 491100 + }, + { + "epoch": 4.228219696969697, + "grad_norm": 3.078125, + "learning_rate": 1.1568226188298004e-05, + "loss": 4.3496, + "step": 491150 + }, + { + "epoch": 4.228650137741047, + "grad_norm": 2.890625, + "learning_rate": 1.155558055241831e-05, + "loss": 4.3177, + "step": 491200 + }, + { + "epoch": 4.229080578512397, + "grad_norm": 3.25, + "learning_rate": 1.1542941408168706e-05, + "loss": 4.6797, + "step": 491250 + }, + { + "epoch": 4.229511019283747, + "grad_norm": 3.515625, + "learning_rate": 1.1530308756476893e-05, + "loss": 4.4444, + "step": 491300 + }, + { + "epoch": 4.229941460055096, + "grad_norm": 4.25, + "learning_rate": 1.1517682598270063e-05, + "loss": 4.6788, + "step": 491350 + }, + { + "epoch": 4.230371900826446, + "grad_norm": 4.125, + "learning_rate": 1.1505062934474952e-05, + "loss": 4.2895, + "step": 491400 + }, + { + "epoch": 4.230802341597796, + "grad_norm": 6.25, + "learning_rate": 1.149244976601781e-05, + "loss": 4.0844, + "step": 491450 + }, + { + "epoch": 4.231232782369146, + "grad_norm": 3.84375, + "learning_rate": 1.1479843093824438e-05, + "loss": 3.9535, + "step": 491500 + }, + { + "epoch": 4.231663223140496, + "grad_norm": 2.890625, + "learning_rate": 1.1467242918820132e-05, + "loss": 4.4283, + "step": 491550 + }, + { + "epoch": 4.232093663911845, + "grad_norm": 3.4375, + "learning_rate": 1.1454649241929716e-05, + "loss": 4.6304, + "step": 491600 + }, + { + "epoch": 4.232524104683195, + "grad_norm": 3.046875, + "learning_rate": 1.144206206407753e-05, + "loss": 4.277, + "step": 491650 + }, + { + "epoch": 4.232954545454546, + "grad_norm": 1.640625, + "learning_rate": 1.1429481386187446e-05, + "loss": 4.1858, + "step": 491700 + }, + { + "epoch": 4.233384986225896, + "grad_norm": 2.78125, + "learning_rate": 1.1416907209182858e-05, + "loss": 4.4552, + "step": 491750 + }, + { + "epoch": 4.2338154269972454, + "grad_norm": 3.25, + "learning_rate": 1.1404339533986697e-05, + "loss": 4.4078, + "step": 491800 + }, + { + "epoch": 4.234245867768595, + "grad_norm": 3.484375, + "learning_rate": 1.1391778361521398e-05, + "loss": 4.3713, + "step": 491850 + }, + { + "epoch": 4.234676308539945, + "grad_norm": 2.796875, + "learning_rate": 1.1379223692708929e-05, + "loss": 4.0514, + "step": 491900 + }, + { + "epoch": 4.235106749311295, + "grad_norm": 2.046875, + "learning_rate": 1.136667552847076e-05, + "loss": 4.1963, + "step": 491950 + }, + { + "epoch": 4.235537190082645, + "grad_norm": 4.0625, + "learning_rate": 1.1354133869727912e-05, + "loss": 4.4756, + "step": 492000 + }, + { + "epoch": 4.235537190082645, + "eval_loss": 4.990378379821777, + "eval_runtime": 22.1678, + "eval_samples_per_second": 28.871, + "eval_steps_per_second": 14.435, + "eval_tts_loss": 7.481186896345774, + "step": 492000 + }, + { + "epoch": 4.235967630853994, + "grad_norm": 2.671875, + "learning_rate": 1.1341598717400892e-05, + "loss": 4.5046, + "step": 492050 + }, + { + "epoch": 4.236398071625344, + "grad_norm": 2.515625, + "learning_rate": 1.1329070072409808e-05, + "loss": 4.0987, + "step": 492100 + }, + { + "epoch": 4.236828512396694, + "grad_norm": 1.953125, + "learning_rate": 1.131654793567416e-05, + "loss": 4.2011, + "step": 492150 + }, + { + "epoch": 4.237258953168044, + "grad_norm": 4.09375, + "learning_rate": 1.130403230811311e-05, + "loss": 3.7776, + "step": 492200 + }, + { + "epoch": 4.237689393939394, + "grad_norm": 4.21875, + "learning_rate": 1.129152319064526e-05, + "loss": 4.0129, + "step": 492250 + }, + { + "epoch": 4.238119834710743, + "grad_norm": 2.609375, + "learning_rate": 1.1279020584188715e-05, + "loss": 4.2839, + "step": 492300 + }, + { + "epoch": 4.238550275482094, + "grad_norm": 3.03125, + "learning_rate": 1.1266524489661212e-05, + "loss": 3.9612, + "step": 492350 + }, + { + "epoch": 4.238980716253444, + "grad_norm": 3.3125, + "learning_rate": 1.1254034907979882e-05, + "loss": 4.2487, + "step": 492400 + }, + { + "epoch": 4.239411157024794, + "grad_norm": 3.984375, + "learning_rate": 1.1241551840061426e-05, + "loss": 4.2714, + "step": 492450 + }, + { + "epoch": 4.2398415977961434, + "grad_norm": 5.21875, + "learning_rate": 1.1229075286822121e-05, + "loss": 4.0773, + "step": 492500 + }, + { + "epoch": 4.240272038567493, + "grad_norm": 3.140625, + "learning_rate": 1.1216605249177692e-05, + "loss": 3.8335, + "step": 492550 + }, + { + "epoch": 4.240702479338843, + "grad_norm": 2.890625, + "learning_rate": 1.1204141728043404e-05, + "loss": 4.1616, + "step": 492600 + }, + { + "epoch": 4.241132920110193, + "grad_norm": 3.265625, + "learning_rate": 1.1191684724334083e-05, + "loss": 4.4248, + "step": 492650 + }, + { + "epoch": 4.241563360881543, + "grad_norm": 1.6484375, + "learning_rate": 1.117923423896401e-05, + "loss": 4.0175, + "step": 492700 + }, + { + "epoch": 4.241993801652892, + "grad_norm": 2.3125, + "learning_rate": 1.1166790272847028e-05, + "loss": 4.2557, + "step": 492750 + }, + { + "epoch": 4.242424242424242, + "grad_norm": 4.15625, + "learning_rate": 1.1154352826896531e-05, + "loss": 4.7157, + "step": 492800 + }, + { + "epoch": 4.242854683195592, + "grad_norm": 3.1875, + "learning_rate": 1.1141921902025387e-05, + "loss": 4.437, + "step": 492850 + }, + { + "epoch": 4.243285123966942, + "grad_norm": 3.671875, + "learning_rate": 1.112949749914598e-05, + "loss": 4.1353, + "step": 492900 + }, + { + "epoch": 4.243715564738292, + "grad_norm": 3.796875, + "learning_rate": 1.1117079619170256e-05, + "loss": 4.5801, + "step": 492950 + }, + { + "epoch": 4.244146005509642, + "grad_norm": 4.0625, + "learning_rate": 1.1104668263009622e-05, + "loss": 4.5561, + "step": 493000 + }, + { + "epoch": 4.244576446280992, + "grad_norm": 3.296875, + "learning_rate": 1.10922634315751e-05, + "loss": 4.7425, + "step": 493050 + }, + { + "epoch": 4.245006887052342, + "grad_norm": 3.921875, + "learning_rate": 1.1079865125777177e-05, + "loss": 4.5495, + "step": 493100 + }, + { + "epoch": 4.245437327823692, + "grad_norm": 2.90625, + "learning_rate": 1.1067473346525791e-05, + "loss": 3.8888, + "step": 493150 + }, + { + "epoch": 4.2458677685950414, + "grad_norm": 5.34375, + "learning_rate": 1.1055088094730538e-05, + "loss": 4.0591, + "step": 493200 + }, + { + "epoch": 4.246298209366391, + "grad_norm": 3.4375, + "learning_rate": 1.1042709371300453e-05, + "loss": 4.4691, + "step": 493250 + }, + { + "epoch": 4.246728650137741, + "grad_norm": 1.21875, + "learning_rate": 1.103033717714408e-05, + "loss": 4.0431, + "step": 493300 + }, + { + "epoch": 4.247159090909091, + "grad_norm": 3.09375, + "learning_rate": 1.1017971513169555e-05, + "loss": 4.3939, + "step": 493350 + }, + { + "epoch": 4.247589531680441, + "grad_norm": 4.8125, + "learning_rate": 1.1005612380284469e-05, + "loss": 4.3246, + "step": 493400 + }, + { + "epoch": 4.24801997245179, + "grad_norm": 3.0625, + "learning_rate": 1.099325977939597e-05, + "loss": 4.3051, + "step": 493450 + }, + { + "epoch": 4.24845041322314, + "grad_norm": 2.171875, + "learning_rate": 1.098091371141069e-05, + "loss": 4.3707, + "step": 493500 + }, + { + "epoch": 4.24888085399449, + "grad_norm": 3.15625, + "learning_rate": 1.0968574177234815e-05, + "loss": 4.4762, + "step": 493550 + }, + { + "epoch": 4.24931129476584, + "grad_norm": 5.5625, + "learning_rate": 1.0956241177774019e-05, + "loss": 4.3958, + "step": 493600 + }, + { + "epoch": 4.2497417355371905, + "grad_norm": 2.8125, + "learning_rate": 1.0943914713933578e-05, + "loss": 4.351, + "step": 493650 + }, + { + "epoch": 4.25017217630854, + "grad_norm": 3.234375, + "learning_rate": 1.0931594786618149e-05, + "loss": 4.0912, + "step": 493700 + }, + { + "epoch": 4.25060261707989, + "grad_norm": 4.9375, + "learning_rate": 1.091928139673204e-05, + "loss": 4.5503, + "step": 493750 + }, + { + "epoch": 4.25103305785124, + "grad_norm": 3.4375, + "learning_rate": 1.0906974545179027e-05, + "loss": 4.2763, + "step": 493800 + }, + { + "epoch": 4.25146349862259, + "grad_norm": 3.28125, + "learning_rate": 1.0894674232862367e-05, + "loss": 4.3316, + "step": 493850 + }, + { + "epoch": 4.2518939393939394, + "grad_norm": 1.21875, + "learning_rate": 1.0882380460684938e-05, + "loss": 3.9106, + "step": 493900 + }, + { + "epoch": 4.252324380165289, + "grad_norm": 3.171875, + "learning_rate": 1.0870093229549028e-05, + "loss": 4.0961, + "step": 493950 + }, + { + "epoch": 4.252754820936639, + "grad_norm": 4.8125, + "learning_rate": 1.0857812540356494e-05, + "loss": 4.435, + "step": 494000 + }, + { + "epoch": 4.253185261707989, + "grad_norm": 3.5, + "learning_rate": 1.0845538394008736e-05, + "loss": 4.5434, + "step": 494050 + }, + { + "epoch": 4.253615702479339, + "grad_norm": 4.0, + "learning_rate": 1.0833270791406635e-05, + "loss": 4.1598, + "step": 494100 + }, + { + "epoch": 4.254046143250688, + "grad_norm": 3.34375, + "learning_rate": 1.0821009733450616e-05, + "loss": 4.1035, + "step": 494150 + }, + { + "epoch": 4.254476584022038, + "grad_norm": 3.0, + "learning_rate": 1.0808755221040613e-05, + "loss": 4.4697, + "step": 494200 + }, + { + "epoch": 4.254907024793388, + "grad_norm": 3.109375, + "learning_rate": 1.079650725507607e-05, + "loss": 4.1072, + "step": 494250 + }, + { + "epoch": 4.255337465564739, + "grad_norm": 2.734375, + "learning_rate": 1.0784265836455954e-05, + "loss": 4.5201, + "step": 494300 + }, + { + "epoch": 4.2557679063360885, + "grad_norm": 3.21875, + "learning_rate": 1.0772030966078795e-05, + "loss": 4.5165, + "step": 494350 + }, + { + "epoch": 4.256198347107438, + "grad_norm": 4.25, + "learning_rate": 1.0759802644842576e-05, + "loss": 4.6163, + "step": 494400 + }, + { + "epoch": 4.256628787878788, + "grad_norm": 1.46875, + "learning_rate": 1.0747580873644848e-05, + "loss": 4.406, + "step": 494450 + }, + { + "epoch": 4.257059228650138, + "grad_norm": 2.078125, + "learning_rate": 1.0735365653382645e-05, + "loss": 4.2456, + "step": 494500 + }, + { + "epoch": 4.257489669421488, + "grad_norm": 3.90625, + "learning_rate": 1.072315698495252e-05, + "loss": 4.0858, + "step": 494550 + }, + { + "epoch": 4.2579201101928374, + "grad_norm": 4.46875, + "learning_rate": 1.0710954869250622e-05, + "loss": 4.5307, + "step": 494600 + }, + { + "epoch": 4.258350550964187, + "grad_norm": 4.28125, + "learning_rate": 1.0698759307172534e-05, + "loss": 4.0033, + "step": 494650 + }, + { + "epoch": 4.258780991735537, + "grad_norm": 3.359375, + "learning_rate": 1.0686570299613341e-05, + "loss": 3.7, + "step": 494700 + }, + { + "epoch": 4.259211432506887, + "grad_norm": 4.75, + "learning_rate": 1.0674387847467748e-05, + "loss": 4.5149, + "step": 494750 + }, + { + "epoch": 4.259641873278237, + "grad_norm": 2.703125, + "learning_rate": 1.0662211951629896e-05, + "loss": 4.3212, + "step": 494800 + }, + { + "epoch": 4.260072314049586, + "grad_norm": 3.296875, + "learning_rate": 1.0650042612993449e-05, + "loss": 4.1083, + "step": 494850 + }, + { + "epoch": 4.260502754820937, + "grad_norm": 4.65625, + "learning_rate": 1.063787983245167e-05, + "loss": 4.3666, + "step": 494900 + }, + { + "epoch": 4.260933195592287, + "grad_norm": 2.546875, + "learning_rate": 1.0625723610897231e-05, + "loss": 4.482, + "step": 494950 + }, + { + "epoch": 4.261363636363637, + "grad_norm": 3.109375, + "learning_rate": 1.0613573949222367e-05, + "loss": 4.7658, + "step": 495000 + }, + { + "epoch": 4.261363636363637, + "eval_loss": 4.990317344665527, + "eval_runtime": 22.5933, + "eval_samples_per_second": 28.327, + "eval_steps_per_second": 14.163, + "eval_tts_loss": 7.480617183203461, + "step": 495000 + }, + { + "epoch": 4.2617940771349865, + "grad_norm": 3.9375, + "learning_rate": 1.060143084831886e-05, + "loss": 4.3694, + "step": 495050 + }, + { + "epoch": 4.262224517906336, + "grad_norm": 3.765625, + "learning_rate": 1.0589294309078002e-05, + "loss": 3.9857, + "step": 495100 + }, + { + "epoch": 4.262654958677686, + "grad_norm": 1.7265625, + "learning_rate": 1.0577164332390532e-05, + "loss": 4.3198, + "step": 495150 + }, + { + "epoch": 4.263085399449036, + "grad_norm": 3.9375, + "learning_rate": 1.0565040919146851e-05, + "loss": 4.4039, + "step": 495200 + }, + { + "epoch": 4.263515840220386, + "grad_norm": 5.1875, + "learning_rate": 1.0552924070236703e-05, + "loss": 4.6953, + "step": 495250 + }, + { + "epoch": 4.2639462809917354, + "grad_norm": 5.40625, + "learning_rate": 1.0540813786549497e-05, + "loss": 4.1689, + "step": 495300 + }, + { + "epoch": 4.264376721763085, + "grad_norm": 5.34375, + "learning_rate": 1.052871006897409e-05, + "loss": 4.4301, + "step": 495350 + }, + { + "epoch": 4.264807162534435, + "grad_norm": 3.296875, + "learning_rate": 1.0516612918398827e-05, + "loss": 4.3754, + "step": 495400 + }, + { + "epoch": 4.265237603305785, + "grad_norm": 3.5, + "learning_rate": 1.0504522335711698e-05, + "loss": 4.3632, + "step": 495450 + }, + { + "epoch": 4.265668044077135, + "grad_norm": 4.71875, + "learning_rate": 1.0492438321800046e-05, + "loss": 4.1471, + "step": 495500 + }, + { + "epoch": 4.266098484848484, + "grad_norm": 3.796875, + "learning_rate": 1.0480360877550832e-05, + "loss": 4.4319, + "step": 495550 + }, + { + "epoch": 4.266528925619835, + "grad_norm": 2.1875, + "learning_rate": 1.0468290003850534e-05, + "loss": 4.1945, + "step": 495600 + }, + { + "epoch": 4.266959366391185, + "grad_norm": 5.9375, + "learning_rate": 1.045622570158512e-05, + "loss": 4.5434, + "step": 495650 + }, + { + "epoch": 4.267389807162535, + "grad_norm": 5.59375, + "learning_rate": 1.0444167971640085e-05, + "loss": 4.1925, + "step": 495700 + }, + { + "epoch": 4.2678202479338845, + "grad_norm": 2.9375, + "learning_rate": 1.0432116814900439e-05, + "loss": 3.9483, + "step": 495750 + }, + { + "epoch": 4.268250688705234, + "grad_norm": 4.375, + "learning_rate": 1.04200722322507e-05, + "loss": 4.3413, + "step": 495800 + }, + { + "epoch": 4.268681129476584, + "grad_norm": 3.578125, + "learning_rate": 1.0408034224574903e-05, + "loss": 4.1517, + "step": 495850 + }, + { + "epoch": 4.269111570247934, + "grad_norm": 1.8515625, + "learning_rate": 1.0396002792756654e-05, + "loss": 4.3048, + "step": 495900 + }, + { + "epoch": 4.269542011019284, + "grad_norm": 1.921875, + "learning_rate": 1.0383977937679013e-05, + "loss": 3.8577, + "step": 495950 + }, + { + "epoch": 4.2699724517906334, + "grad_norm": 3.703125, + "learning_rate": 1.0371959660224574e-05, + "loss": 4.1211, + "step": 496000 + }, + { + "epoch": 4.270402892561983, + "grad_norm": 2.140625, + "learning_rate": 1.0359947961275463e-05, + "loss": 4.0847, + "step": 496050 + }, + { + "epoch": 4.270833333333333, + "grad_norm": 3.0, + "learning_rate": 1.0347942841713287e-05, + "loss": 4.2619, + "step": 496100 + }, + { + "epoch": 4.271263774104683, + "grad_norm": 5.71875, + "learning_rate": 1.0335944302419232e-05, + "loss": 4.6816, + "step": 496150 + }, + { + "epoch": 4.2716942148760335, + "grad_norm": 2.15625, + "learning_rate": 1.0323952344273956e-05, + "loss": 4.1899, + "step": 496200 + }, + { + "epoch": 4.272124655647383, + "grad_norm": 3.0625, + "learning_rate": 1.0311966968157604e-05, + "loss": 4.5647, + "step": 496250 + }, + { + "epoch": 4.272555096418733, + "grad_norm": 4.65625, + "learning_rate": 1.0299988174949937e-05, + "loss": 4.0187, + "step": 496300 + }, + { + "epoch": 4.272985537190083, + "grad_norm": 5.375, + "learning_rate": 1.0288015965530129e-05, + "loss": 4.3155, + "step": 496350 + }, + { + "epoch": 4.273415977961433, + "grad_norm": 3.5, + "learning_rate": 1.0276050340776921e-05, + "loss": 4.5485, + "step": 496400 + }, + { + "epoch": 4.2738464187327825, + "grad_norm": 2.84375, + "learning_rate": 1.02640913015686e-05, + "loss": 4.3729, + "step": 496450 + }, + { + "epoch": 4.274276859504132, + "grad_norm": 2.5625, + "learning_rate": 1.0252138848782899e-05, + "loss": 4.3204, + "step": 496500 + }, + { + "epoch": 4.274707300275482, + "grad_norm": 1.890625, + "learning_rate": 1.0240192983297081e-05, + "loss": 4.1798, + "step": 496550 + }, + { + "epoch": 4.275137741046832, + "grad_norm": 3.828125, + "learning_rate": 1.0228253705988e-05, + "loss": 4.3045, + "step": 496600 + }, + { + "epoch": 4.275568181818182, + "grad_norm": 4.21875, + "learning_rate": 1.0216321017731945e-05, + "loss": 4.5311, + "step": 496650 + }, + { + "epoch": 4.2759986225895315, + "grad_norm": 4.21875, + "learning_rate": 1.0204394919404747e-05, + "loss": 4.5882, + "step": 496700 + }, + { + "epoch": 4.276429063360881, + "grad_norm": 1.5625, + "learning_rate": 1.0192475411881764e-05, + "loss": 4.1879, + "step": 496750 + }, + { + "epoch": 4.276859504132231, + "grad_norm": 4.59375, + "learning_rate": 1.0180562496037849e-05, + "loss": 4.5052, + "step": 496800 + }, + { + "epoch": 4.277289944903581, + "grad_norm": 4.5625, + "learning_rate": 1.0168656172747404e-05, + "loss": 4.51, + "step": 496850 + }, + { + "epoch": 4.2777203856749315, + "grad_norm": 4.40625, + "learning_rate": 1.015675644288432e-05, + "loss": 4.0977, + "step": 496900 + }, + { + "epoch": 4.278150826446281, + "grad_norm": 5.4375, + "learning_rate": 1.0144863307322017e-05, + "loss": 4.4966, + "step": 496950 + }, + { + "epoch": 4.278581267217631, + "grad_norm": 4.4375, + "learning_rate": 1.013297676693341e-05, + "loss": 4.6574, + "step": 497000 + }, + { + "epoch": 4.279011707988981, + "grad_norm": 1.71875, + "learning_rate": 1.0121096822590969e-05, + "loss": 4.5093, + "step": 497050 + }, + { + "epoch": 4.279442148760331, + "grad_norm": 4.9375, + "learning_rate": 1.0109223475166607e-05, + "loss": 4.3858, + "step": 497100 + }, + { + "epoch": 4.2798725895316805, + "grad_norm": 6.09375, + "learning_rate": 1.009735672553187e-05, + "loss": 4.2499, + "step": 497150 + }, + { + "epoch": 4.28030303030303, + "grad_norm": 3.328125, + "learning_rate": 1.0085496574557708e-05, + "loss": 4.4491, + "step": 497200 + }, + { + "epoch": 4.28073347107438, + "grad_norm": 3.25, + "learning_rate": 1.0073643023114642e-05, + "loss": 4.2579, + "step": 497250 + }, + { + "epoch": 4.28116391184573, + "grad_norm": 2.796875, + "learning_rate": 1.0061796072072705e-05, + "loss": 4.5695, + "step": 497300 + }, + { + "epoch": 4.28159435261708, + "grad_norm": 1.9609375, + "learning_rate": 1.0049955722301419e-05, + "loss": 3.7701, + "step": 497350 + }, + { + "epoch": 4.2820247933884295, + "grad_norm": 2.96875, + "learning_rate": 1.0038121974669834e-05, + "loss": 4.6326, + "step": 497400 + }, + { + "epoch": 4.282455234159779, + "grad_norm": 4.09375, + "learning_rate": 1.0026294830046556e-05, + "loss": 4.4966, + "step": 497450 + }, + { + "epoch": 4.28288567493113, + "grad_norm": 3.28125, + "learning_rate": 1.0014474289299657e-05, + "loss": 4.0949, + "step": 497500 + }, + { + "epoch": 4.28331611570248, + "grad_norm": 4.0625, + "learning_rate": 1.0002660353296745e-05, + "loss": 4.2209, + "step": 497550 + }, + { + "epoch": 4.2837465564738295, + "grad_norm": 2.75, + "learning_rate": 9.990853022904922e-06, + "loss": 4.1878, + "step": 497600 + }, + { + "epoch": 4.284176997245179, + "grad_norm": 2.640625, + "learning_rate": 9.979052298990798e-06, + "loss": 4.1017, + "step": 497650 + }, + { + "epoch": 4.284607438016529, + "grad_norm": 1.8671875, + "learning_rate": 9.96725818242058e-06, + "loss": 4.2861, + "step": 497700 + }, + { + "epoch": 4.285037878787879, + "grad_norm": 4.8125, + "learning_rate": 9.95547067405992e-06, + "loss": 4.4236, + "step": 497750 + }, + { + "epoch": 4.285468319559229, + "grad_norm": 3.375, + "learning_rate": 9.943689774773935e-06, + "loss": 4.0353, + "step": 497800 + }, + { + "epoch": 4.2858987603305785, + "grad_norm": 3.21875, + "learning_rate": 9.931915485427379e-06, + "loss": 4.2317, + "step": 497850 + }, + { + "epoch": 4.286329201101928, + "grad_norm": 2.578125, + "learning_rate": 9.92014780688444e-06, + "loss": 4.4292, + "step": 497900 + }, + { + "epoch": 4.286759641873278, + "grad_norm": 3.46875, + "learning_rate": 9.908386740008823e-06, + "loss": 4.6995, + "step": 497950 + }, + { + "epoch": 4.287190082644628, + "grad_norm": 3.4375, + "learning_rate": 9.89663228566382e-06, + "loss": 4.3831, + "step": 498000 + }, + { + "epoch": 4.287190082644628, + "eval_loss": 4.990382194519043, + "eval_runtime": 23.7313, + "eval_samples_per_second": 26.969, + "eval_steps_per_second": 13.484, + "eval_tts_loss": 7.480655207959233, + "step": 498000 + }, + { + "epoch": 4.287620523415978, + "grad_norm": 1.9375, + "learning_rate": 9.884884444712128e-06, + "loss": 4.4527, + "step": 498050 + }, + { + "epoch": 4.2880509641873275, + "grad_norm": 2.625, + "learning_rate": 9.873143218016013e-06, + "loss": 4.3169, + "step": 498100 + }, + { + "epoch": 4.288481404958677, + "grad_norm": 6.15625, + "learning_rate": 9.861408606437283e-06, + "loss": 4.1037, + "step": 498150 + }, + { + "epoch": 4.288911845730028, + "grad_norm": 1.734375, + "learning_rate": 9.84968061083723e-06, + "loss": 4.3089, + "step": 498200 + }, + { + "epoch": 4.289342286501378, + "grad_norm": 1.2890625, + "learning_rate": 9.837959232076654e-06, + "loss": 4.4333, + "step": 498250 + }, + { + "epoch": 4.2897727272727275, + "grad_norm": 1.5234375, + "learning_rate": 9.826244471015877e-06, + "loss": 4.0469, + "step": 498300 + }, + { + "epoch": 4.290203168044077, + "grad_norm": 4.28125, + "learning_rate": 9.814536328514723e-06, + "loss": 4.1831, + "step": 498350 + }, + { + "epoch": 4.290633608815427, + "grad_norm": 5.3125, + "learning_rate": 9.802834805432593e-06, + "loss": 4.4076, + "step": 498400 + }, + { + "epoch": 4.291064049586777, + "grad_norm": 1.765625, + "learning_rate": 9.79113990262832e-06, + "loss": 4.666, + "step": 498450 + }, + { + "epoch": 4.291494490358127, + "grad_norm": 3.078125, + "learning_rate": 9.77945162096029e-06, + "loss": 4.378, + "step": 498500 + }, + { + "epoch": 4.2919249311294765, + "grad_norm": 6.40625, + "learning_rate": 9.767769961286387e-06, + "loss": 4.38, + "step": 498550 + }, + { + "epoch": 4.292355371900826, + "grad_norm": 2.203125, + "learning_rate": 9.756094924464032e-06, + "loss": 4.2723, + "step": 498600 + }, + { + "epoch": 4.292785812672176, + "grad_norm": 4.25, + "learning_rate": 9.744426511350135e-06, + "loss": 4.2122, + "step": 498650 + }, + { + "epoch": 4.293216253443526, + "grad_norm": 3.140625, + "learning_rate": 9.732764722801157e-06, + "loss": 4.5124, + "step": 498700 + }, + { + "epoch": 4.293646694214876, + "grad_norm": 5.4375, + "learning_rate": 9.721109559673026e-06, + "loss": 4.2672, + "step": 498750 + }, + { + "epoch": 4.294077134986226, + "grad_norm": 2.078125, + "learning_rate": 9.70946102282122e-06, + "loss": 4.1701, + "step": 498800 + }, + { + "epoch": 4.294507575757576, + "grad_norm": 4.875, + "learning_rate": 9.697819113100714e-06, + "loss": 3.9554, + "step": 498850 + }, + { + "epoch": 4.294938016528926, + "grad_norm": 5.1875, + "learning_rate": 9.686183831365991e-06, + "loss": 4.3497, + "step": 498900 + }, + { + "epoch": 4.295368457300276, + "grad_norm": 1.65625, + "learning_rate": 9.674555178471034e-06, + "loss": 4.0724, + "step": 498950 + }, + { + "epoch": 4.2957988980716255, + "grad_norm": 2.09375, + "learning_rate": 9.662933155269415e-06, + "loss": 4.3112, + "step": 499000 + }, + { + "epoch": 4.296229338842975, + "grad_norm": 3.8125, + "learning_rate": 9.651317762614132e-06, + "loss": 4.4373, + "step": 499050 + }, + { + "epoch": 4.296659779614325, + "grad_norm": 6.34375, + "learning_rate": 9.639709001357733e-06, + "loss": 4.3362, + "step": 499100 + }, + { + "epoch": 4.297090220385675, + "grad_norm": 2.84375, + "learning_rate": 9.628106872352283e-06, + "loss": 3.9074, + "step": 499150 + }, + { + "epoch": 4.297520661157025, + "grad_norm": 4.46875, + "learning_rate": 9.616511376449355e-06, + "loss": 4.5121, + "step": 499200 + }, + { + "epoch": 4.2979511019283745, + "grad_norm": 2.75, + "learning_rate": 9.604922514499993e-06, + "loss": 4.1561, + "step": 499250 + }, + { + "epoch": 4.298381542699724, + "grad_norm": 3.359375, + "learning_rate": 9.59334028735488e-06, + "loss": 4.252, + "step": 499300 + }, + { + "epoch": 4.298811983471074, + "grad_norm": 3.4375, + "learning_rate": 9.581764695864026e-06, + "loss": 4.5887, + "step": 499350 + }, + { + "epoch": 4.299242424242424, + "grad_norm": 3.109375, + "learning_rate": 9.57019574087713e-06, + "loss": 4.2836, + "step": 499400 + }, + { + "epoch": 4.299672865013774, + "grad_norm": 2.28125, + "learning_rate": 9.55863342324329e-06, + "loss": 4.3688, + "step": 499450 + }, + { + "epoch": 4.300103305785124, + "grad_norm": 2.921875, + "learning_rate": 9.547077743811162e-06, + "loss": 4.263, + "step": 499500 + }, + { + "epoch": 4.300533746556474, + "grad_norm": 2.90625, + "learning_rate": 9.535528703428942e-06, + "loss": 3.8768, + "step": 499550 + }, + { + "epoch": 4.300964187327824, + "grad_norm": 1.3359375, + "learning_rate": 9.523986302944265e-06, + "loss": 4.1092, + "step": 499600 + }, + { + "epoch": 4.301394628099174, + "grad_norm": 2.078125, + "learning_rate": 9.51245054320431e-06, + "loss": 4.65, + "step": 499650 + }, + { + "epoch": 4.3018250688705235, + "grad_norm": 5.78125, + "learning_rate": 9.500921425055809e-06, + "loss": 4.3907, + "step": 499700 + }, + { + "epoch": 4.302255509641873, + "grad_norm": 3.078125, + "learning_rate": 9.489398949344963e-06, + "loss": 4.0317, + "step": 499750 + }, + { + "epoch": 4.302685950413223, + "grad_norm": 6.0625, + "learning_rate": 9.477883116917496e-06, + "loss": 4.2088, + "step": 499800 + }, + { + "epoch": 4.303116391184573, + "grad_norm": 5.4375, + "learning_rate": 9.466373928618643e-06, + "loss": 4.0614, + "step": 499850 + }, + { + "epoch": 4.303546831955923, + "grad_norm": 2.765625, + "learning_rate": 9.45487138529314e-06, + "loss": 4.3172, + "step": 499900 + }, + { + "epoch": 4.3039772727272725, + "grad_norm": 2.96875, + "learning_rate": 9.443375487785278e-06, + "loss": 4.2179, + "step": 499950 + }, + { + "epoch": 4.304407713498622, + "grad_norm": 4.59375, + "learning_rate": 9.431886236938826e-06, + "loss": 4.1902, + "step": 500000 + }, + { + "epoch": 4.304838154269972, + "grad_norm": 3.90625, + "learning_rate": 9.420403633597052e-06, + "loss": 4.277, + "step": 500050 + }, + { + "epoch": 4.305268595041323, + "grad_norm": 5.125, + "learning_rate": 9.408927678602775e-06, + "loss": 4.1179, + "step": 500100 + }, + { + "epoch": 4.3056990358126725, + "grad_norm": 2.625, + "learning_rate": 9.397458372798285e-06, + "loss": 4.484, + "step": 500150 + }, + { + "epoch": 4.306129476584022, + "grad_norm": 2.328125, + "learning_rate": 9.385995717025408e-06, + "loss": 4.38, + "step": 500200 + }, + { + "epoch": 4.306559917355372, + "grad_norm": 2.390625, + "learning_rate": 9.374539712125484e-06, + "loss": 3.9738, + "step": 500250 + }, + { + "epoch": 4.306990358126722, + "grad_norm": 3.375, + "learning_rate": 9.363090358939398e-06, + "loss": 4.6081, + "step": 500300 + }, + { + "epoch": 4.307420798898072, + "grad_norm": 6.75, + "learning_rate": 9.351647658307416e-06, + "loss": 4.644, + "step": 500350 + }, + { + "epoch": 4.3078512396694215, + "grad_norm": 2.71875, + "learning_rate": 9.340211611069493e-06, + "loss": 3.9021, + "step": 500400 + }, + { + "epoch": 4.308281680440771, + "grad_norm": 3.46875, + "learning_rate": 9.328782218064968e-06, + "loss": 4.2323, + "step": 500450 + }, + { + "epoch": 4.308712121212121, + "grad_norm": 2.125, + "learning_rate": 9.317359480132726e-06, + "loss": 4.2335, + "step": 500500 + }, + { + "epoch": 4.309142561983471, + "grad_norm": 2.234375, + "learning_rate": 9.30594339811124e-06, + "loss": 4.1903, + "step": 500550 + }, + { + "epoch": 4.309573002754821, + "grad_norm": 2.703125, + "learning_rate": 9.294533972838327e-06, + "loss": 4.4114, + "step": 500600 + }, + { + "epoch": 4.3100034435261705, + "grad_norm": 3.3125, + "learning_rate": 9.283131205151474e-06, + "loss": 4.5617, + "step": 500650 + }, + { + "epoch": 4.31043388429752, + "grad_norm": 2.75, + "learning_rate": 9.271735095887623e-06, + "loss": 4.2109, + "step": 500700 + }, + { + "epoch": 4.31086432506887, + "grad_norm": 2.078125, + "learning_rate": 9.260345645883195e-06, + "loss": 4.5076, + "step": 500750 + }, + { + "epoch": 4.311294765840221, + "grad_norm": 5.21875, + "learning_rate": 9.248962855974153e-06, + "loss": 4.0628, + "step": 500800 + }, + { + "epoch": 4.3117252066115705, + "grad_norm": 6.03125, + "learning_rate": 9.237586726996028e-06, + "loss": 3.9696, + "step": 500850 + }, + { + "epoch": 4.31215564738292, + "grad_norm": 4.75, + "learning_rate": 9.226217259783709e-06, + "loss": 4.4315, + "step": 500900 + }, + { + "epoch": 4.31258608815427, + "grad_norm": 2.765625, + "learning_rate": 9.214854455171751e-06, + "loss": 4.5924, + "step": 500950 + }, + { + "epoch": 4.31301652892562, + "grad_norm": 2.625, + "learning_rate": 9.203498313994164e-06, + "loss": 4.3333, + "step": 501000 + }, + { + "epoch": 4.31301652892562, + "eval_loss": 4.990403175354004, + "eval_runtime": 24.1551, + "eval_samples_per_second": 26.495, + "eval_steps_per_second": 13.248, + "eval_tts_loss": 7.480433057473844, + "step": 501000 + }, + { + "epoch": 4.31344696969697, + "grad_norm": 2.578125, + "learning_rate": 9.192148837084414e-06, + "loss": 4.2487, + "step": 501050 + }, + { + "epoch": 4.3138774104683195, + "grad_norm": 2.578125, + "learning_rate": 9.1808060252756e-06, + "loss": 4.3544, + "step": 501100 + }, + { + "epoch": 4.314307851239669, + "grad_norm": 3.0625, + "learning_rate": 9.169469879400205e-06, + "loss": 4.3581, + "step": 501150 + }, + { + "epoch": 4.314738292011019, + "grad_norm": 2.71875, + "learning_rate": 9.158140400290282e-06, + "loss": 4.299, + "step": 501200 + }, + { + "epoch": 4.315168732782369, + "grad_norm": 1.984375, + "learning_rate": 9.146817588777413e-06, + "loss": 4.455, + "step": 501250 + }, + { + "epoch": 4.315599173553719, + "grad_norm": 2.90625, + "learning_rate": 9.135501445692662e-06, + "loss": 4.3407, + "step": 501300 + }, + { + "epoch": 4.3160296143250685, + "grad_norm": 3.296875, + "learning_rate": 9.124191971866603e-06, + "loss": 4.2062, + "step": 501350 + }, + { + "epoch": 4.316460055096419, + "grad_norm": 2.4375, + "learning_rate": 9.112889168129323e-06, + "loss": 4.9257, + "step": 501400 + }, + { + "epoch": 4.316890495867769, + "grad_norm": 2.578125, + "learning_rate": 9.10159303531044e-06, + "loss": 4.5144, + "step": 501450 + }, + { + "epoch": 4.317320936639119, + "grad_norm": 3.59375, + "learning_rate": 9.090303574239034e-06, + "loss": 4.3472, + "step": 501500 + }, + { + "epoch": 4.3177513774104685, + "grad_norm": 4.5, + "learning_rate": 9.079020785743765e-06, + "loss": 4.2618, + "step": 501550 + }, + { + "epoch": 4.318181818181818, + "grad_norm": 3.6875, + "learning_rate": 9.067744670652744e-06, + "loss": 4.5121, + "step": 501600 + }, + { + "epoch": 4.318612258953168, + "grad_norm": 1.5, + "learning_rate": 9.056475229793627e-06, + "loss": 4.1174, + "step": 501650 + }, + { + "epoch": 4.319042699724518, + "grad_norm": 3.25, + "learning_rate": 9.045212463993546e-06, + "loss": 4.4511, + "step": 501700 + }, + { + "epoch": 4.319473140495868, + "grad_norm": 4.0625, + "learning_rate": 9.033956374079167e-06, + "loss": 4.0417, + "step": 501750 + }, + { + "epoch": 4.3199035812672175, + "grad_norm": 2.25, + "learning_rate": 9.022706960876692e-06, + "loss": 4.1759, + "step": 501800 + }, + { + "epoch": 4.320334022038567, + "grad_norm": 3.875, + "learning_rate": 9.011464225211797e-06, + "loss": 4.2091, + "step": 501850 + }, + { + "epoch": 4.320764462809917, + "grad_norm": 5.40625, + "learning_rate": 9.00022816790963e-06, + "loss": 4.0665, + "step": 501900 + }, + { + "epoch": 4.321194903581267, + "grad_norm": 3.828125, + "learning_rate": 8.988998789794934e-06, + "loss": 4.1778, + "step": 501950 + }, + { + "epoch": 4.321625344352617, + "grad_norm": 4.03125, + "learning_rate": 8.977776091691915e-06, + "loss": 4.1469, + "step": 502000 + }, + { + "epoch": 4.3220557851239665, + "grad_norm": 3.0, + "learning_rate": 8.96656007442428e-06, + "loss": 4.1407, + "step": 502050 + }, + { + "epoch": 4.322486225895317, + "grad_norm": 3.234375, + "learning_rate": 8.955350738815293e-06, + "loss": 4.3537, + "step": 502100 + }, + { + "epoch": 4.322916666666667, + "grad_norm": 2.46875, + "learning_rate": 8.944148085687665e-06, + "loss": 4.0011, + "step": 502150 + }, + { + "epoch": 4.323347107438017, + "grad_norm": 3.46875, + "learning_rate": 8.932952115863646e-06, + "loss": 4.2977, + "step": 502200 + }, + { + "epoch": 4.3237775482093666, + "grad_norm": 2.984375, + "learning_rate": 8.921762830165015e-06, + "loss": 4.244, + "step": 502250 + }, + { + "epoch": 4.324207988980716, + "grad_norm": 1.4453125, + "learning_rate": 8.910580229413035e-06, + "loss": 4.4479, + "step": 502300 + }, + { + "epoch": 4.324638429752066, + "grad_norm": 1.71875, + "learning_rate": 8.899404314428472e-06, + "loss": 4.1162, + "step": 502350 + }, + { + "epoch": 4.325068870523416, + "grad_norm": 2.359375, + "learning_rate": 8.888235086031637e-06, + "loss": 4.3571, + "step": 502400 + }, + { + "epoch": 4.325499311294766, + "grad_norm": 3.53125, + "learning_rate": 8.877072545042298e-06, + "loss": 4.4373, + "step": 502450 + }, + { + "epoch": 4.3259297520661155, + "grad_norm": 3.1875, + "learning_rate": 8.865916692279785e-06, + "loss": 4.0097, + "step": 502500 + }, + { + "epoch": 4.326360192837465, + "grad_norm": 4.625, + "learning_rate": 8.854767528562913e-06, + "loss": 4.1214, + "step": 502550 + }, + { + "epoch": 4.326790633608815, + "grad_norm": 2.796875, + "learning_rate": 8.843625054709981e-06, + "loss": 4.4159, + "step": 502600 + }, + { + "epoch": 4.327221074380165, + "grad_norm": 2.734375, + "learning_rate": 8.83248927153888e-06, + "loss": 4.5618, + "step": 502650 + }, + { + "epoch": 4.327651515151516, + "grad_norm": 4.21875, + "learning_rate": 8.8213601798669e-06, + "loss": 4.5627, + "step": 502700 + }, + { + "epoch": 4.328081955922865, + "grad_norm": 2.6875, + "learning_rate": 8.810237780510878e-06, + "loss": 4.3087, + "step": 502750 + }, + { + "epoch": 4.328512396694215, + "grad_norm": 3.8125, + "learning_rate": 8.799122074287224e-06, + "loss": 4.0904, + "step": 502800 + }, + { + "epoch": 4.328942837465565, + "grad_norm": 3.828125, + "learning_rate": 8.78801306201179e-06, + "loss": 4.023, + "step": 502850 + }, + { + "epoch": 4.329373278236915, + "grad_norm": 2.875, + "learning_rate": 8.776910744499956e-06, + "loss": 4.3179, + "step": 502900 + }, + { + "epoch": 4.3298037190082646, + "grad_norm": 3.1875, + "learning_rate": 8.76581512256659e-06, + "loss": 4.3492, + "step": 502950 + }, + { + "epoch": 4.330234159779614, + "grad_norm": 1.765625, + "learning_rate": 8.754726197026108e-06, + "loss": 4.1772, + "step": 503000 + }, + { + "epoch": 4.330664600550964, + "grad_norm": 3.0, + "learning_rate": 8.743643968692384e-06, + "loss": 4.321, + "step": 503050 + }, + { + "epoch": 4.331095041322314, + "grad_norm": 3.140625, + "learning_rate": 8.732568438378864e-06, + "loss": 4.4251, + "step": 503100 + }, + { + "epoch": 4.331525482093664, + "grad_norm": 2.53125, + "learning_rate": 8.721499606898453e-06, + "loss": 4.6072, + "step": 503150 + }, + { + "epoch": 4.3319559228650135, + "grad_norm": 4.21875, + "learning_rate": 8.710437475063582e-06, + "loss": 4.4484, + "step": 503200 + }, + { + "epoch": 4.332386363636363, + "grad_norm": 1.7265625, + "learning_rate": 8.699382043686177e-06, + "loss": 4.3609, + "step": 503250 + }, + { + "epoch": 4.332816804407713, + "grad_norm": 1.2890625, + "learning_rate": 8.68833331357769e-06, + "loss": 4.4118, + "step": 503300 + }, + { + "epoch": 4.333247245179063, + "grad_norm": 3.796875, + "learning_rate": 8.677291285549084e-06, + "loss": 4.3946, + "step": 503350 + }, + { + "epoch": 4.333677685950414, + "grad_norm": 2.75, + "learning_rate": 8.66625596041083e-06, + "loss": 4.3638, + "step": 503400 + }, + { + "epoch": 4.334108126721763, + "grad_norm": 4.3125, + "learning_rate": 8.655227338972848e-06, + "loss": 4.5647, + "step": 503450 + }, + { + "epoch": 4.334538567493113, + "grad_norm": 3.8125, + "learning_rate": 8.64420542204466e-06, + "loss": 3.8277, + "step": 503500 + }, + { + "epoch": 4.334969008264463, + "grad_norm": 1.3046875, + "learning_rate": 8.633190210435237e-06, + "loss": 4.1098, + "step": 503550 + }, + { + "epoch": 4.335399449035813, + "grad_norm": 3.640625, + "learning_rate": 8.622181704953047e-06, + "loss": 4.2239, + "step": 503600 + }, + { + "epoch": 4.3358298898071626, + "grad_norm": 2.625, + "learning_rate": 8.611179906406165e-06, + "loss": 4.2217, + "step": 503650 + }, + { + "epoch": 4.336260330578512, + "grad_norm": 2.609375, + "learning_rate": 8.60018481560202e-06, + "loss": 4.1941, + "step": 503700 + }, + { + "epoch": 4.336690771349862, + "grad_norm": 2.703125, + "learning_rate": 8.589196433347646e-06, + "loss": 4.3679, + "step": 503750 + }, + { + "epoch": 4.337121212121212, + "grad_norm": 1.484375, + "learning_rate": 8.578214760449599e-06, + "loss": 3.8968, + "step": 503800 + }, + { + "epoch": 4.337551652892562, + "grad_norm": 2.015625, + "learning_rate": 8.567239797713888e-06, + "loss": 4.3936, + "step": 503850 + }, + { + "epoch": 4.3379820936639115, + "grad_norm": 2.6875, + "learning_rate": 8.556271545946049e-06, + "loss": 4.4982, + "step": 503900 + }, + { + "epoch": 4.338412534435261, + "grad_norm": 3.59375, + "learning_rate": 8.545310005951135e-06, + "loss": 4.4685, + "step": 503950 + }, + { + "epoch": 4.338842975206612, + "grad_norm": 4.15625, + "learning_rate": 8.53435517853367e-06, + "loss": 4.1723, + "step": 504000 + }, + { + "epoch": 4.338842975206612, + "eval_loss": 4.990280628204346, + "eval_runtime": 24.3293, + "eval_samples_per_second": 26.306, + "eval_steps_per_second": 13.153, + "eval_tts_loss": 7.478574276202835, + "step": 504000 + }, + { + "epoch": 4.339273415977962, + "grad_norm": 3.5, + "learning_rate": 8.523407064497769e-06, + "loss": 4.7386, + "step": 504050 + }, + { + "epoch": 4.339703856749312, + "grad_norm": 4.1875, + "learning_rate": 8.512465664646973e-06, + "loss": 4.3512, + "step": 504100 + }, + { + "epoch": 4.340134297520661, + "grad_norm": 1.9453125, + "learning_rate": 8.501530979784356e-06, + "loss": 4.2346, + "step": 504150 + }, + { + "epoch": 4.340564738292011, + "grad_norm": 3.671875, + "learning_rate": 8.490603010712505e-06, + "loss": 4.8081, + "step": 504200 + }, + { + "epoch": 4.340995179063361, + "grad_norm": 4.25, + "learning_rate": 8.479681758233494e-06, + "loss": 4.301, + "step": 504250 + }, + { + "epoch": 4.341425619834711, + "grad_norm": 4.125, + "learning_rate": 8.468767223148921e-06, + "loss": 4.3199, + "step": 504300 + }, + { + "epoch": 4.3418560606060606, + "grad_norm": 3.75, + "learning_rate": 8.457859406259916e-06, + "loss": 3.8378, + "step": 504350 + }, + { + "epoch": 4.34228650137741, + "grad_norm": 1.3046875, + "learning_rate": 8.44695830836707e-06, + "loss": 3.7013, + "step": 504400 + }, + { + "epoch": 4.34271694214876, + "grad_norm": 1.9765625, + "learning_rate": 8.43606393027051e-06, + "loss": 4.3266, + "step": 504450 + }, + { + "epoch": 4.34314738292011, + "grad_norm": 4.125, + "learning_rate": 8.425176272769842e-06, + "loss": 4.3817, + "step": 504500 + }, + { + "epoch": 4.34357782369146, + "grad_norm": 3.265625, + "learning_rate": 8.414295336664214e-06, + "loss": 4.4488, + "step": 504550 + }, + { + "epoch": 4.3440082644628095, + "grad_norm": 6.34375, + "learning_rate": 8.403421122752242e-06, + "loss": 3.9959, + "step": 504600 + }, + { + "epoch": 4.344438705234159, + "grad_norm": 3.484375, + "learning_rate": 8.39255363183209e-06, + "loss": 4.2442, + "step": 504650 + }, + { + "epoch": 4.34486914600551, + "grad_norm": 2.484375, + "learning_rate": 8.381692864701419e-06, + "loss": 4.776, + "step": 504700 + }, + { + "epoch": 4.34529958677686, + "grad_norm": 3.375, + "learning_rate": 8.370838822157357e-06, + "loss": 4.2805, + "step": 504750 + }, + { + "epoch": 4.34573002754821, + "grad_norm": 5.1875, + "learning_rate": 8.359991504996578e-06, + "loss": 4.1398, + "step": 504800 + }, + { + "epoch": 4.346160468319559, + "grad_norm": 1.171875, + "learning_rate": 8.349150914015247e-06, + "loss": 4.4873, + "step": 504850 + }, + { + "epoch": 4.346590909090909, + "grad_norm": 3.21875, + "learning_rate": 8.338317050009058e-06, + "loss": 4.1217, + "step": 504900 + }, + { + "epoch": 4.347021349862259, + "grad_norm": 2.484375, + "learning_rate": 8.327489913773212e-06, + "loss": 4.4885, + "step": 504950 + }, + { + "epoch": 4.347451790633609, + "grad_norm": 2.828125, + "learning_rate": 8.316669506102314e-06, + "loss": 4.3804, + "step": 505000 + }, + { + "epoch": 4.3478822314049586, + "grad_norm": 5.0625, + "learning_rate": 8.305855827790642e-06, + "loss": 4.2444, + "step": 505050 + }, + { + "epoch": 4.348312672176308, + "grad_norm": 3.390625, + "learning_rate": 8.295048879631861e-06, + "loss": 4.4931, + "step": 505100 + }, + { + "epoch": 4.348743112947658, + "grad_norm": 5.75, + "learning_rate": 8.28424866241917e-06, + "loss": 4.6092, + "step": 505150 + }, + { + "epoch": 4.349173553719008, + "grad_norm": 3.359375, + "learning_rate": 8.273455176945333e-06, + "loss": 3.9988, + "step": 505200 + }, + { + "epoch": 4.349603994490358, + "grad_norm": 6.5, + "learning_rate": 8.262668424002517e-06, + "loss": 4.4437, + "step": 505250 + }, + { + "epoch": 4.350034435261708, + "grad_norm": 4.1875, + "learning_rate": 8.251888404382435e-06, + "loss": 4.4969, + "step": 505300 + }, + { + "epoch": 4.350464876033058, + "grad_norm": 2.5625, + "learning_rate": 8.241115118876364e-06, + "loss": 4.2591, + "step": 505350 + }, + { + "epoch": 4.350895316804408, + "grad_norm": 3.59375, + "learning_rate": 8.230348568275015e-06, + "loss": 4.0655, + "step": 505400 + }, + { + "epoch": 4.351325757575758, + "grad_norm": 2.46875, + "learning_rate": 8.219588753368634e-06, + "loss": 4.7343, + "step": 505450 + }, + { + "epoch": 4.351756198347108, + "grad_norm": 3.328125, + "learning_rate": 8.208835674946968e-06, + "loss": 4.2602, + "step": 505500 + }, + { + "epoch": 4.352186639118457, + "grad_norm": 6.59375, + "learning_rate": 8.198089333799264e-06, + "loss": 4.5904, + "step": 505550 + }, + { + "epoch": 4.352617079889807, + "grad_norm": 3.15625, + "learning_rate": 8.18734973071429e-06, + "loss": 4.3814, + "step": 505600 + }, + { + "epoch": 4.353047520661157, + "grad_norm": 3.796875, + "learning_rate": 8.176616866480313e-06, + "loss": 4.1741, + "step": 505650 + }, + { + "epoch": 4.353477961432507, + "grad_norm": 6.53125, + "learning_rate": 8.165890741885084e-06, + "loss": 4.6812, + "step": 505700 + }, + { + "epoch": 4.3539084022038566, + "grad_norm": 3.265625, + "learning_rate": 8.155171357715897e-06, + "loss": 4.4283, + "step": 505750 + }, + { + "epoch": 4.354338842975206, + "grad_norm": 5.9375, + "learning_rate": 8.144458714759528e-06, + "loss": 4.1386, + "step": 505800 + }, + { + "epoch": 4.354769283746556, + "grad_norm": 1.625, + "learning_rate": 8.13375281380223e-06, + "loss": 4.093, + "step": 505850 + }, + { + "epoch": 4.355199724517906, + "grad_norm": 3.5625, + "learning_rate": 8.123053655629842e-06, + "loss": 4.3499, + "step": 505900 + }, + { + "epoch": 4.355630165289256, + "grad_norm": 3.515625, + "learning_rate": 8.112361241027656e-06, + "loss": 4.5391, + "step": 505950 + }, + { + "epoch": 4.356060606060606, + "grad_norm": 2.78125, + "learning_rate": 8.10167557078042e-06, + "loss": 4.3041, + "step": 506000 + }, + { + "epoch": 4.356491046831956, + "grad_norm": 3.0625, + "learning_rate": 8.090996645672477e-06, + "loss": 3.7522, + "step": 506050 + }, + { + "epoch": 4.356921487603306, + "grad_norm": 4.84375, + "learning_rate": 8.080324466487643e-06, + "loss": 4.3118, + "step": 506100 + }, + { + "epoch": 4.357351928374656, + "grad_norm": 2.890625, + "learning_rate": 8.069659034009214e-06, + "loss": 4.3671, + "step": 506150 + }, + { + "epoch": 4.357782369146006, + "grad_norm": 3.828125, + "learning_rate": 8.05900034902003e-06, + "loss": 4.1839, + "step": 506200 + }, + { + "epoch": 4.358212809917355, + "grad_norm": 3.78125, + "learning_rate": 8.048348412302398e-06, + "loss": 4.3766, + "step": 506250 + }, + { + "epoch": 4.358643250688705, + "grad_norm": 2.84375, + "learning_rate": 8.03770322463816e-06, + "loss": 4.229, + "step": 506300 + }, + { + "epoch": 4.359073691460055, + "grad_norm": 2.109375, + "learning_rate": 8.027064786808635e-06, + "loss": 4.2059, + "step": 506350 + }, + { + "epoch": 4.359504132231405, + "grad_norm": 3.28125, + "learning_rate": 8.016433099594678e-06, + "loss": 4.2764, + "step": 506400 + }, + { + "epoch": 4.3599345730027546, + "grad_norm": 3.578125, + "learning_rate": 8.005808163776596e-06, + "loss": 4.1407, + "step": 506450 + }, + { + "epoch": 4.360365013774104, + "grad_norm": 0.78515625, + "learning_rate": 7.99518998013431e-06, + "loss": 4.2728, + "step": 506500 + }, + { + "epoch": 4.360795454545454, + "grad_norm": 4.0, + "learning_rate": 7.984578549447086e-06, + "loss": 4.5802, + "step": 506550 + }, + { + "epoch": 4.361225895316805, + "grad_norm": 5.125, + "learning_rate": 7.973973872493846e-06, + "loss": 4.2125, + "step": 506600 + }, + { + "epoch": 4.361656336088155, + "grad_norm": 4.09375, + "learning_rate": 7.96337595005291e-06, + "loss": 4.5784, + "step": 506650 + }, + { + "epoch": 4.362086776859504, + "grad_norm": 3.65625, + "learning_rate": 7.952784782902156e-06, + "loss": 4.2129, + "step": 506700 + }, + { + "epoch": 4.362517217630854, + "grad_norm": 4.625, + "learning_rate": 7.942200371818975e-06, + "loss": 3.9921, + "step": 506750 + }, + { + "epoch": 4.362947658402204, + "grad_norm": 3.46875, + "learning_rate": 7.931622717580211e-06, + "loss": 4.2852, + "step": 506800 + }, + { + "epoch": 4.363378099173554, + "grad_norm": 5.09375, + "learning_rate": 7.921051820962222e-06, + "loss": 4.3809, + "step": 506850 + }, + { + "epoch": 4.363808539944904, + "grad_norm": 4.5625, + "learning_rate": 7.910487682740942e-06, + "loss": 4.6271, + "step": 506900 + }, + { + "epoch": 4.364238980716253, + "grad_norm": 5.75, + "learning_rate": 7.899930303691728e-06, + "loss": 4.0671, + "step": 506950 + }, + { + "epoch": 4.364669421487603, + "grad_norm": 3.125, + "learning_rate": 7.889379684589482e-06, + "loss": 4.063, + "step": 507000 + }, + { + "epoch": 4.364669421487603, + "eval_loss": 4.990376949310303, + "eval_runtime": 24.8331, + "eval_samples_per_second": 25.772, + "eval_steps_per_second": 12.886, + "eval_tts_loss": 7.482996264546997, + "step": 507000 + }, + { + "epoch": 4.365099862258953, + "grad_norm": 2.59375, + "learning_rate": 7.878835826208575e-06, + "loss": 4.074, + "step": 507050 + }, + { + "epoch": 4.365530303030303, + "grad_norm": 3.71875, + "learning_rate": 7.868298729322898e-06, + "loss": 4.291, + "step": 507100 + }, + { + "epoch": 4.3659607438016526, + "grad_norm": 3.484375, + "learning_rate": 7.857768394705878e-06, + "loss": 4.2775, + "step": 507150 + }, + { + "epoch": 4.366391184573002, + "grad_norm": 4.28125, + "learning_rate": 7.847244823130418e-06, + "loss": 4.8031, + "step": 507200 + }, + { + "epoch": 4.366821625344353, + "grad_norm": 2.3125, + "learning_rate": 7.836728015368922e-06, + "loss": 4.7203, + "step": 507250 + }, + { + "epoch": 4.367252066115703, + "grad_norm": 7.46875, + "learning_rate": 7.826217972193285e-06, + "loss": 4.3567, + "step": 507300 + }, + { + "epoch": 4.367682506887053, + "grad_norm": 4.46875, + "learning_rate": 7.815714694374932e-06, + "loss": 4.5966, + "step": 507350 + }, + { + "epoch": 4.368112947658402, + "grad_norm": 4.25, + "learning_rate": 7.80521818268477e-06, + "loss": 4.1714, + "step": 507400 + }, + { + "epoch": 4.368543388429752, + "grad_norm": 4.875, + "learning_rate": 7.79472843789324e-06, + "loss": 4.0177, + "step": 507450 + }, + { + "epoch": 4.368973829201102, + "grad_norm": 2.109375, + "learning_rate": 7.78424546077028e-06, + "loss": 4.0039, + "step": 507500 + }, + { + "epoch": 4.369404269972452, + "grad_norm": 4.375, + "learning_rate": 7.77376925208526e-06, + "loss": 4.2144, + "step": 507550 + }, + { + "epoch": 4.369834710743802, + "grad_norm": 3.671875, + "learning_rate": 7.763299812607161e-06, + "loss": 4.1329, + "step": 507600 + }, + { + "epoch": 4.370265151515151, + "grad_norm": 3.15625, + "learning_rate": 7.752837143104408e-06, + "loss": 4.112, + "step": 507650 + }, + { + "epoch": 4.370695592286501, + "grad_norm": 4.78125, + "learning_rate": 7.742381244344921e-06, + "loss": 3.8912, + "step": 507700 + }, + { + "epoch": 4.371126033057851, + "grad_norm": 2.75, + "learning_rate": 7.731932117096185e-06, + "loss": 4.1174, + "step": 507750 + }, + { + "epoch": 4.371556473829201, + "grad_norm": 2.96875, + "learning_rate": 7.721489762125078e-06, + "loss": 4.2519, + "step": 507800 + }, + { + "epoch": 4.3719869146005506, + "grad_norm": 3.328125, + "learning_rate": 7.711054180198085e-06, + "loss": 4.6386, + "step": 507850 + }, + { + "epoch": 4.372417355371901, + "grad_norm": 3.34375, + "learning_rate": 7.70062537208116e-06, + "loss": 4.3162, + "step": 507900 + }, + { + "epoch": 4.372847796143251, + "grad_norm": 4.03125, + "learning_rate": 7.690203338539748e-06, + "loss": 4.3752, + "step": 507950 + }, + { + "epoch": 4.373278236914601, + "grad_norm": 1.9921875, + "learning_rate": 7.67978808033879e-06, + "loss": 4.3337, + "step": 508000 + }, + { + "epoch": 4.373708677685951, + "grad_norm": 3.3125, + "learning_rate": 7.669379598242776e-06, + "loss": 4.1397, + "step": 508050 + }, + { + "epoch": 4.3741391184573, + "grad_norm": 5.375, + "learning_rate": 7.658977893015618e-06, + "loss": 4.2318, + "step": 508100 + }, + { + "epoch": 4.37456955922865, + "grad_norm": 4.65625, + "learning_rate": 7.648582965420813e-06, + "loss": 4.4174, + "step": 508150 + }, + { + "epoch": 4.375, + "grad_norm": 4.65625, + "learning_rate": 7.638194816221334e-06, + "loss": 4.4217, + "step": 508200 + }, + { + "epoch": 4.37543044077135, + "grad_norm": 3.015625, + "learning_rate": 7.62781344617961e-06, + "loss": 4.1805, + "step": 508250 + }, + { + "epoch": 4.3758608815427, + "grad_norm": 2.71875, + "learning_rate": 7.617438856057668e-06, + "loss": 4.4549, + "step": 508300 + }, + { + "epoch": 4.376291322314049, + "grad_norm": 2.359375, + "learning_rate": 7.607071046616931e-06, + "loss": 4.2371, + "step": 508350 + }, + { + "epoch": 4.376721763085399, + "grad_norm": 3.953125, + "learning_rate": 7.596710018618369e-06, + "loss": 4.1101, + "step": 508400 + }, + { + "epoch": 4.377152203856749, + "grad_norm": 4.625, + "learning_rate": 7.586355772822506e-06, + "loss": 4.3206, + "step": 508450 + }, + { + "epoch": 4.377582644628099, + "grad_norm": 3.09375, + "learning_rate": 7.576008309989291e-06, + "loss": 4.4091, + "step": 508500 + }, + { + "epoch": 4.3780130853994494, + "grad_norm": 2.671875, + "learning_rate": 7.565667630878215e-06, + "loss": 4.2373, + "step": 508550 + }, + { + "epoch": 4.378443526170799, + "grad_norm": 1.5703125, + "learning_rate": 7.55533373624826e-06, + "loss": 4.4331, + "step": 508600 + }, + { + "epoch": 4.378873966942149, + "grad_norm": 4.5, + "learning_rate": 7.545006626857898e-06, + "loss": 4.6513, + "step": 508650 + }, + { + "epoch": 4.379304407713499, + "grad_norm": 3.515625, + "learning_rate": 7.534686303465122e-06, + "loss": 4.445, + "step": 508700 + }, + { + "epoch": 4.379734848484849, + "grad_norm": 3.109375, + "learning_rate": 7.524372766827437e-06, + "loss": 4.3127, + "step": 508750 + }, + { + "epoch": 4.380165289256198, + "grad_norm": 2.765625, + "learning_rate": 7.5140660177018266e-06, + "loss": 4.6362, + "step": 508800 + }, + { + "epoch": 4.380595730027548, + "grad_norm": 1.8671875, + "learning_rate": 7.5037660568447745e-06, + "loss": 4.3959, + "step": 508850 + }, + { + "epoch": 4.381026170798898, + "grad_norm": 3.5625, + "learning_rate": 7.493472885012298e-06, + "loss": 4.13, + "step": 508900 + }, + { + "epoch": 4.381456611570248, + "grad_norm": 4.625, + "learning_rate": 7.483186502959849e-06, + "loss": 4.0516, + "step": 508950 + }, + { + "epoch": 4.381887052341598, + "grad_norm": 3.34375, + "learning_rate": 7.472906911442478e-06, + "loss": 4.4367, + "step": 509000 + }, + { + "epoch": 4.382317493112947, + "grad_norm": 2.53125, + "learning_rate": 7.462634111214684e-06, + "loss": 4.1198, + "step": 509050 + }, + { + "epoch": 4.382747933884297, + "grad_norm": 4.71875, + "learning_rate": 7.452368103030405e-06, + "loss": 4.4191, + "step": 509100 + }, + { + "epoch": 4.383178374655647, + "grad_norm": 2.53125, + "learning_rate": 7.442108887643206e-06, + "loss": 4.0874, + "step": 509150 + }, + { + "epoch": 4.383608815426998, + "grad_norm": 2.921875, + "learning_rate": 7.431856465806064e-06, + "loss": 4.0599, + "step": 509200 + }, + { + "epoch": 4.3840392561983474, + "grad_norm": 7.15625, + "learning_rate": 7.421610838271487e-06, + "loss": 4.2546, + "step": 509250 + }, + { + "epoch": 4.384469696969697, + "grad_norm": 2.5, + "learning_rate": 7.4113720057915174e-06, + "loss": 4.4595, + "step": 509300 + }, + { + "epoch": 4.384900137741047, + "grad_norm": 2.5625, + "learning_rate": 7.4011399691175985e-06, + "loss": 4.3373, + "step": 509350 + }, + { + "epoch": 4.385330578512397, + "grad_norm": 3.5625, + "learning_rate": 7.390914729000786e-06, + "loss": 4.3771, + "step": 509400 + }, + { + "epoch": 4.385761019283747, + "grad_norm": 4.59375, + "learning_rate": 7.38069628619158e-06, + "loss": 4.1849, + "step": 509450 + }, + { + "epoch": 4.386191460055096, + "grad_norm": 3.09375, + "learning_rate": 7.370484641440001e-06, + "loss": 4.3355, + "step": 509500 + }, + { + "epoch": 4.386621900826446, + "grad_norm": 3.078125, + "learning_rate": 7.360279795495539e-06, + "loss": 4.4696, + "step": 509550 + }, + { + "epoch": 4.387052341597796, + "grad_norm": 1.53125, + "learning_rate": 7.350081749107229e-06, + "loss": 4.197, + "step": 509600 + }, + { + "epoch": 4.387482782369146, + "grad_norm": 4.5, + "learning_rate": 7.339890503023561e-06, + "loss": 4.1877, + "step": 509650 + }, + { + "epoch": 4.387913223140496, + "grad_norm": 1.1796875, + "learning_rate": 7.32970605799258e-06, + "loss": 3.9881, + "step": 509700 + }, + { + "epoch": 4.388343663911845, + "grad_norm": 4.6875, + "learning_rate": 7.319528414761789e-06, + "loss": 4.2678, + "step": 509750 + }, + { + "epoch": 4.388774104683195, + "grad_norm": 5.3125, + "learning_rate": 7.309357574078191e-06, + "loss": 4.2218, + "step": 509800 + }, + { + "epoch": 4.389204545454546, + "grad_norm": 4.375, + "learning_rate": 7.299193536688343e-06, + "loss": 4.1335, + "step": 509850 + }, + { + "epoch": 4.389634986225896, + "grad_norm": 3.109375, + "learning_rate": 7.2890363033382254e-06, + "loss": 4.0931, + "step": 509900 + }, + { + "epoch": 4.3900654269972454, + "grad_norm": 3.875, + "learning_rate": 7.278885874773356e-06, + "loss": 4.263, + "step": 509950 + }, + { + "epoch": 4.390495867768595, + "grad_norm": 4.90625, + "learning_rate": 7.268742251738792e-06, + "loss": 4.4627, + "step": 510000 + }, + { + "epoch": 4.390495867768595, + "eval_loss": 4.990321159362793, + "eval_runtime": 25.1056, + "eval_samples_per_second": 25.492, + "eval_steps_per_second": 12.746, + "eval_tts_loss": 7.480910927547624, + "step": 510000 + }, + { + "epoch": 4.390926308539945, + "grad_norm": 4.1875, + "learning_rate": 7.258605434979026e-06, + "loss": 4.5134, + "step": 510050 + }, + { + "epoch": 4.391356749311295, + "grad_norm": 3.859375, + "learning_rate": 7.2484754252380775e-06, + "loss": 4.4191, + "step": 510100 + }, + { + "epoch": 4.391787190082645, + "grad_norm": 4.46875, + "learning_rate": 7.23835222325947e-06, + "loss": 3.9403, + "step": 510150 + }, + { + "epoch": 4.392217630853994, + "grad_norm": 4.34375, + "learning_rate": 7.228235829786234e-06, + "loss": 4.4877, + "step": 510200 + }, + { + "epoch": 4.392648071625344, + "grad_norm": 3.453125, + "learning_rate": 7.2181262455608745e-06, + "loss": 3.9123, + "step": 510250 + }, + { + "epoch": 4.393078512396694, + "grad_norm": 3.859375, + "learning_rate": 7.208023471325431e-06, + "loss": 4.4258, + "step": 510300 + }, + { + "epoch": 4.393508953168044, + "grad_norm": 3.953125, + "learning_rate": 7.197927507821422e-06, + "loss": 4.3587, + "step": 510350 + }, + { + "epoch": 4.393939393939394, + "grad_norm": 4.9375, + "learning_rate": 7.187838355789855e-06, + "loss": 4.2157, + "step": 510400 + }, + { + "epoch": 4.394369834710744, + "grad_norm": 2.859375, + "learning_rate": 7.177756015971271e-06, + "loss": 4.2579, + "step": 510450 + }, + { + "epoch": 4.394800275482094, + "grad_norm": 2.203125, + "learning_rate": 7.1676804891056655e-06, + "loss": 4.2333, + "step": 510500 + }, + { + "epoch": 4.395230716253444, + "grad_norm": 3.109375, + "learning_rate": 7.157611775932593e-06, + "loss": 4.4928, + "step": 510550 + }, + { + "epoch": 4.395661157024794, + "grad_norm": 5.375, + "learning_rate": 7.147549877191084e-06, + "loss": 4.4959, + "step": 510600 + }, + { + "epoch": 4.3960915977961434, + "grad_norm": 3.75, + "learning_rate": 7.137494793619593e-06, + "loss": 4.1776, + "step": 510650 + }, + { + "epoch": 4.396522038567493, + "grad_norm": 3.40625, + "learning_rate": 7.127446525956205e-06, + "loss": 4.5399, + "step": 510700 + }, + { + "epoch": 4.396952479338843, + "grad_norm": 5.21875, + "learning_rate": 7.117405074938432e-06, + "loss": 4.5661, + "step": 510750 + }, + { + "epoch": 4.397382920110193, + "grad_norm": 3.5625, + "learning_rate": 7.107370441303252e-06, + "loss": 4.3027, + "step": 510800 + }, + { + "epoch": 4.397813360881543, + "grad_norm": 3.09375, + "learning_rate": 7.097342625787262e-06, + "loss": 4.3491, + "step": 510850 + }, + { + "epoch": 4.398243801652892, + "grad_norm": 5.46875, + "learning_rate": 7.08732162912642e-06, + "loss": 4.1872, + "step": 510900 + }, + { + "epoch": 4.398674242424242, + "grad_norm": 2.96875, + "learning_rate": 7.0773074520562476e-06, + "loss": 4.3698, + "step": 510950 + }, + { + "epoch": 4.399104683195592, + "grad_norm": 7.09375, + "learning_rate": 7.06730009531179e-06, + "loss": 4.5419, + "step": 511000 + }, + { + "epoch": 4.399535123966942, + "grad_norm": 3.546875, + "learning_rate": 7.057299559627562e-06, + "loss": 4.5538, + "step": 511050 + }, + { + "epoch": 4.399965564738292, + "grad_norm": 4.21875, + "learning_rate": 7.047305845737573e-06, + "loss": 4.0321, + "step": 511100 + }, + { + "epoch": 4.400396005509642, + "grad_norm": 3.453125, + "learning_rate": 7.037318954375338e-06, + "loss": 4.2875, + "step": 511150 + }, + { + "epoch": 4.400826446280992, + "grad_norm": 5.15625, + "learning_rate": 7.027338886273871e-06, + "loss": 4.3006, + "step": 511200 + }, + { + "epoch": 4.401256887052342, + "grad_norm": 3.265625, + "learning_rate": 7.017365642165696e-06, + "loss": 4.4986, + "step": 511250 + }, + { + "epoch": 4.401687327823692, + "grad_norm": 3.203125, + "learning_rate": 7.00739922278284e-06, + "loss": 4.2387, + "step": 511300 + }, + { + "epoch": 4.4021177685950414, + "grad_norm": 5.71875, + "learning_rate": 6.9974396288567945e-06, + "loss": 4.4865, + "step": 511350 + }, + { + "epoch": 4.402548209366391, + "grad_norm": 4.0625, + "learning_rate": 6.987486861118575e-06, + "loss": 4.08, + "step": 511400 + }, + { + "epoch": 4.402978650137741, + "grad_norm": 3.96875, + "learning_rate": 6.977540920298709e-06, + "loss": 4.1794, + "step": 511450 + }, + { + "epoch": 4.403409090909091, + "grad_norm": 3.65625, + "learning_rate": 6.967601807127177e-06, + "loss": 4.4802, + "step": 511500 + }, + { + "epoch": 4.403839531680441, + "grad_norm": 7.71875, + "learning_rate": 6.957669522333521e-06, + "loss": 4.2602, + "step": 511550 + }, + { + "epoch": 4.40426997245179, + "grad_norm": 1.8125, + "learning_rate": 6.947744066646744e-06, + "loss": 4.1937, + "step": 511600 + }, + { + "epoch": 4.40470041322314, + "grad_norm": 4.53125, + "learning_rate": 6.937825440795332e-06, + "loss": 4.2611, + "step": 511650 + }, + { + "epoch": 4.40513085399449, + "grad_norm": 2.8125, + "learning_rate": 6.927913645507311e-06, + "loss": 4.3111, + "step": 511700 + }, + { + "epoch": 4.405561294765841, + "grad_norm": 3.296875, + "learning_rate": 6.91800868151018e-06, + "loss": 4.3315, + "step": 511750 + }, + { + "epoch": 4.4059917355371905, + "grad_norm": 3.171875, + "learning_rate": 6.908110549530933e-06, + "loss": 4.5461, + "step": 511800 + }, + { + "epoch": 4.40642217630854, + "grad_norm": 3.046875, + "learning_rate": 6.898219250296078e-06, + "loss": 4.3747, + "step": 511850 + }, + { + "epoch": 4.40685261707989, + "grad_norm": 2.640625, + "learning_rate": 6.888334784531625e-06, + "loss": 4.4115, + "step": 511900 + }, + { + "epoch": 4.40728305785124, + "grad_norm": 5.125, + "learning_rate": 6.8784571529630696e-06, + "loss": 4.4447, + "step": 511950 + }, + { + "epoch": 4.40771349862259, + "grad_norm": 2.234375, + "learning_rate": 6.8685863563153985e-06, + "loss": 4.2815, + "step": 512000 + }, + { + "epoch": 4.4081439393939394, + "grad_norm": 4.0625, + "learning_rate": 6.858722395313089e-06, + "loss": 4.2215, + "step": 512050 + }, + { + "epoch": 4.408574380165289, + "grad_norm": 3.0, + "learning_rate": 6.848865270680183e-06, + "loss": 4.5206, + "step": 512100 + }, + { + "epoch": 4.409004820936639, + "grad_norm": 3.390625, + "learning_rate": 6.839014983140158e-06, + "loss": 4.0088, + "step": 512150 + }, + { + "epoch": 4.409435261707989, + "grad_norm": 3.0625, + "learning_rate": 6.8291715334159564e-06, + "loss": 4.0994, + "step": 512200 + }, + { + "epoch": 4.409865702479339, + "grad_norm": 2.328125, + "learning_rate": 6.819334922230125e-06, + "loss": 4.4212, + "step": 512250 + }, + { + "epoch": 4.410296143250688, + "grad_norm": 3.765625, + "learning_rate": 6.8095051503046185e-06, + "loss": 4.3757, + "step": 512300 + }, + { + "epoch": 4.410726584022038, + "grad_norm": 3.140625, + "learning_rate": 6.799682218360926e-06, + "loss": 4.3514, + "step": 512350 + }, + { + "epoch": 4.411157024793388, + "grad_norm": 2.9375, + "learning_rate": 6.789866127120048e-06, + "loss": 4.4842, + "step": 512400 + }, + { + "epoch": 4.411587465564739, + "grad_norm": 2.15625, + "learning_rate": 6.780056877302443e-06, + "loss": 4.2474, + "step": 512450 + }, + { + "epoch": 4.4120179063360885, + "grad_norm": 2.84375, + "learning_rate": 6.770254469628079e-06, + "loss": 4.1866, + "step": 512500 + }, + { + "epoch": 4.412448347107438, + "grad_norm": 5.25, + "learning_rate": 6.760458904816469e-06, + "loss": 4.5259, + "step": 512550 + }, + { + "epoch": 4.412878787878788, + "grad_norm": 4.25, + "learning_rate": 6.7506701835865605e-06, + "loss": 4.3188, + "step": 512600 + }, + { + "epoch": 4.413309228650138, + "grad_norm": 1.8515625, + "learning_rate": 6.740888306656823e-06, + "loss": 4.1352, + "step": 512650 + }, + { + "epoch": 4.413739669421488, + "grad_norm": 2.03125, + "learning_rate": 6.731113274745249e-06, + "loss": 4.028, + "step": 512700 + }, + { + "epoch": 4.4141701101928374, + "grad_norm": 5.25, + "learning_rate": 6.721345088569253e-06, + "loss": 4.2058, + "step": 512750 + }, + { + "epoch": 4.414600550964187, + "grad_norm": 3.90625, + "learning_rate": 6.7115837488458625e-06, + "loss": 4.5951, + "step": 512800 + }, + { + "epoch": 4.415030991735537, + "grad_norm": 3.328125, + "learning_rate": 6.701829256291503e-06, + "loss": 4.5285, + "step": 512850 + }, + { + "epoch": 4.415461432506887, + "grad_norm": 4.03125, + "learning_rate": 6.6920816116221365e-06, + "loss": 4.1794, + "step": 512900 + }, + { + "epoch": 4.415891873278237, + "grad_norm": 3.8125, + "learning_rate": 6.682340815553223e-06, + "loss": 4.1023, + "step": 512950 + }, + { + "epoch": 4.416322314049586, + "grad_norm": 4.0625, + "learning_rate": 6.672606868799725e-06, + "loss": 4.3001, + "step": 513000 + }, + { + "epoch": 4.416322314049586, + "eval_loss": 4.990197658538818, + "eval_runtime": 23.9965, + "eval_samples_per_second": 26.671, + "eval_steps_per_second": 13.335, + "eval_tts_loss": 7.4809472596331625, + "step": 513000 + }, + { + "epoch": 4.416752754820937, + "grad_norm": 3.109375, + "learning_rate": 6.6628797720760586e-06, + "loss": 4.5626, + "step": 513050 + }, + { + "epoch": 4.417183195592287, + "grad_norm": 2.171875, + "learning_rate": 6.65315952609622e-06, + "loss": 4.6766, + "step": 513100 + }, + { + "epoch": 4.417613636363637, + "grad_norm": 2.625, + "learning_rate": 6.643446131573638e-06, + "loss": 4.4865, + "step": 513150 + }, + { + "epoch": 4.4180440771349865, + "grad_norm": 2.546875, + "learning_rate": 6.633739589221221e-06, + "loss": 4.2308, + "step": 513200 + }, + { + "epoch": 4.418474517906336, + "grad_norm": 3.515625, + "learning_rate": 6.624039899751455e-06, + "loss": 4.2097, + "step": 513250 + }, + { + "epoch": 4.418904958677686, + "grad_norm": 2.109375, + "learning_rate": 6.614347063876259e-06, + "loss": 4.3102, + "step": 513300 + }, + { + "epoch": 4.419335399449036, + "grad_norm": 4.625, + "learning_rate": 6.60466108230704e-06, + "loss": 4.4591, + "step": 513350 + }, + { + "epoch": 4.419765840220386, + "grad_norm": 2.015625, + "learning_rate": 6.594981955754786e-06, + "loss": 4.2024, + "step": 513400 + }, + { + "epoch": 4.4201962809917354, + "grad_norm": 3.109375, + "learning_rate": 6.585309684929864e-06, + "loss": 4.412, + "step": 513450 + }, + { + "epoch": 4.420626721763085, + "grad_norm": 4.125, + "learning_rate": 6.575644270542236e-06, + "loss": 4.2866, + "step": 513500 + }, + { + "epoch": 4.421057162534435, + "grad_norm": 3.9375, + "learning_rate": 6.565985713301315e-06, + "loss": 4.4881, + "step": 513550 + }, + { + "epoch": 4.421487603305785, + "grad_norm": 3.78125, + "learning_rate": 6.556334013915988e-06, + "loss": 4.2589, + "step": 513600 + }, + { + "epoch": 4.421918044077135, + "grad_norm": 3.6875, + "learning_rate": 6.546689173094722e-06, + "loss": 4.1817, + "step": 513650 + }, + { + "epoch": 4.422348484848484, + "grad_norm": 1.6171875, + "learning_rate": 6.537051191545407e-06, + "loss": 4.0419, + "step": 513700 + }, + { + "epoch": 4.422778925619835, + "grad_norm": 4.4375, + "learning_rate": 6.5274200699754205e-06, + "loss": 4.0975, + "step": 513750 + }, + { + "epoch": 4.423209366391185, + "grad_norm": 2.765625, + "learning_rate": 6.517795809091698e-06, + "loss": 4.0693, + "step": 513800 + }, + { + "epoch": 4.423639807162535, + "grad_norm": 2.96875, + "learning_rate": 6.50817840960064e-06, + "loss": 3.9669, + "step": 513850 + }, + { + "epoch": 4.4240702479338845, + "grad_norm": 3.171875, + "learning_rate": 6.498567872208105e-06, + "loss": 4.1007, + "step": 513900 + }, + { + "epoch": 4.424500688705234, + "grad_norm": 3.34375, + "learning_rate": 6.488964197619563e-06, + "loss": 4.3688, + "step": 513950 + }, + { + "epoch": 4.424931129476584, + "grad_norm": 2.5, + "learning_rate": 6.479367386539825e-06, + "loss": 4.3292, + "step": 514000 + }, + { + "epoch": 4.425361570247934, + "grad_norm": 5.53125, + "learning_rate": 6.469777439673297e-06, + "loss": 4.3969, + "step": 514050 + }, + { + "epoch": 4.425792011019284, + "grad_norm": 3.4375, + "learning_rate": 6.460194357723881e-06, + "loss": 4.7134, + "step": 514100 + }, + { + "epoch": 4.4262224517906334, + "grad_norm": 2.71875, + "learning_rate": 6.450618141394949e-06, + "loss": 4.1469, + "step": 514150 + }, + { + "epoch": 4.426652892561983, + "grad_norm": 2.578125, + "learning_rate": 6.441048791389359e-06, + "loss": 4.4025, + "step": 514200 + }, + { + "epoch": 4.427083333333333, + "grad_norm": 3.8125, + "learning_rate": 6.431486308409496e-06, + "loss": 4.1695, + "step": 514250 + }, + { + "epoch": 4.427513774104683, + "grad_norm": 3.203125, + "learning_rate": 6.421930693157207e-06, + "loss": 4.3792, + "step": 514300 + }, + { + "epoch": 4.4279442148760335, + "grad_norm": 3.84375, + "learning_rate": 6.4123819463338785e-06, + "loss": 4.2667, + "step": 514350 + }, + { + "epoch": 4.428374655647383, + "grad_norm": 3.34375, + "learning_rate": 6.402840068640359e-06, + "loss": 3.8351, + "step": 514400 + }, + { + "epoch": 4.428805096418733, + "grad_norm": 3.109375, + "learning_rate": 6.393305060777e-06, + "loss": 4.3111, + "step": 514450 + }, + { + "epoch": 4.429235537190083, + "grad_norm": 6.0, + "learning_rate": 6.383776923443652e-06, + "loss": 4.532, + "step": 514500 + }, + { + "epoch": 4.429665977961433, + "grad_norm": 3.53125, + "learning_rate": 6.374255657339645e-06, + "loss": 4.8584, + "step": 514550 + }, + { + "epoch": 4.4300964187327825, + "grad_norm": 3.859375, + "learning_rate": 6.364741263163831e-06, + "loss": 4.0418, + "step": 514600 + }, + { + "epoch": 4.430526859504132, + "grad_norm": 5.40625, + "learning_rate": 6.355233741614552e-06, + "loss": 4.04, + "step": 514650 + }, + { + "epoch": 4.430957300275482, + "grad_norm": 1.4765625, + "learning_rate": 6.345733093389661e-06, + "loss": 4.2641, + "step": 514700 + }, + { + "epoch": 4.431387741046832, + "grad_norm": 4.71875, + "learning_rate": 6.33623931918641e-06, + "loss": 4.5212, + "step": 514750 + }, + { + "epoch": 4.431818181818182, + "grad_norm": 1.8359375, + "learning_rate": 6.326752419701698e-06, + "loss": 4.4224, + "step": 514800 + }, + { + "epoch": 4.4322486225895315, + "grad_norm": 4.03125, + "learning_rate": 6.3172723956318124e-06, + "loss": 4.221, + "step": 514850 + }, + { + "epoch": 4.432679063360881, + "grad_norm": 3.546875, + "learning_rate": 6.307799247672552e-06, + "loss": 4.101, + "step": 514900 + }, + { + "epoch": 4.433109504132231, + "grad_norm": 5.75, + "learning_rate": 6.298332976519283e-06, + "loss": 4.5273, + "step": 514950 + }, + { + "epoch": 4.433539944903581, + "grad_norm": 2.828125, + "learning_rate": 6.2888735828667275e-06, + "loss": 4.3606, + "step": 515000 + }, + { + "epoch": 4.4339703856749315, + "grad_norm": 1.15625, + "learning_rate": 6.279421067409252e-06, + "loss": 3.9342, + "step": 515050 + }, + { + "epoch": 4.434400826446281, + "grad_norm": 4.0625, + "learning_rate": 6.269975430840613e-06, + "loss": 4.5658, + "step": 515100 + }, + { + "epoch": 4.434831267217631, + "grad_norm": 4.34375, + "learning_rate": 6.260536673854123e-06, + "loss": 4.3632, + "step": 515150 + }, + { + "epoch": 4.435261707988981, + "grad_norm": 4.6875, + "learning_rate": 6.2511047971425485e-06, + "loss": 4.1757, + "step": 515200 + }, + { + "epoch": 4.435692148760331, + "grad_norm": 4.21875, + "learning_rate": 6.2416798013981815e-06, + "loss": 4.3564, + "step": 515250 + }, + { + "epoch": 4.4361225895316805, + "grad_norm": 1.703125, + "learning_rate": 6.232261687312768e-06, + "loss": 4.2246, + "step": 515300 + }, + { + "epoch": 4.43655303030303, + "grad_norm": 4.8125, + "learning_rate": 6.222850455577622e-06, + "loss": 4.1892, + "step": 515350 + }, + { + "epoch": 4.43698347107438, + "grad_norm": 6.65625, + "learning_rate": 6.213446106883481e-06, + "loss": 4.3059, + "step": 515400 + }, + { + "epoch": 4.43741391184573, + "grad_norm": 4.40625, + "learning_rate": 6.20404864192059e-06, + "loss": 4.1826, + "step": 515450 + }, + { + "epoch": 4.43784435261708, + "grad_norm": 2.609375, + "learning_rate": 6.1946580613787664e-06, + "loss": 4.3087, + "step": 515500 + }, + { + "epoch": 4.4382747933884295, + "grad_norm": 3.125, + "learning_rate": 6.185274365947191e-06, + "loss": 4.1832, + "step": 515550 + }, + { + "epoch": 4.438705234159779, + "grad_norm": 6.625, + "learning_rate": 6.175897556314603e-06, + "loss": 4.4377, + "step": 515600 + }, + { + "epoch": 4.43913567493113, + "grad_norm": 3.46875, + "learning_rate": 6.166527633169295e-06, + "loss": 4.245, + "step": 515650 + }, + { + "epoch": 4.43956611570248, + "grad_norm": 4.28125, + "learning_rate": 6.157164597198973e-06, + "loss": 3.8926, + "step": 515700 + }, + { + "epoch": 4.4399965564738295, + "grad_norm": 0.875, + "learning_rate": 6.147808449090853e-06, + "loss": 4.3383, + "step": 515750 + }, + { + "epoch": 4.440426997245179, + "grad_norm": 3.46875, + "learning_rate": 6.138459189531665e-06, + "loss": 4.3676, + "step": 515800 + }, + { + "epoch": 4.440857438016529, + "grad_norm": 4.625, + "learning_rate": 6.129116819207614e-06, + "loss": 4.2748, + "step": 515850 + }, + { + "epoch": 4.441287878787879, + "grad_norm": 2.546875, + "learning_rate": 6.1197813388044315e-06, + "loss": 4.4972, + "step": 515900 + }, + { + "epoch": 4.441718319559229, + "grad_norm": 3.671875, + "learning_rate": 6.110452749007311e-06, + "loss": 4.1191, + "step": 515950 + }, + { + "epoch": 4.4421487603305785, + "grad_norm": 1.9140625, + "learning_rate": 6.101131050500952e-06, + "loss": 4.3234, + "step": 516000 + }, + { + "epoch": 4.4421487603305785, + "eval_loss": 4.990259647369385, + "eval_runtime": 24.5592, + "eval_samples_per_second": 26.06, + "eval_steps_per_second": 13.03, + "eval_tts_loss": 7.480381709480102, + "step": 516000 + }, + { + "epoch": 4.442579201101928, + "grad_norm": 4.375, + "learning_rate": 6.09181624396955e-06, + "loss": 4.116, + "step": 516050 + }, + { + "epoch": 4.443009641873278, + "grad_norm": 4.46875, + "learning_rate": 6.082508330096781e-06, + "loss": 4.3434, + "step": 516100 + }, + { + "epoch": 4.443440082644628, + "grad_norm": 3.328125, + "learning_rate": 6.073207309565809e-06, + "loss": 4.6337, + "step": 516150 + }, + { + "epoch": 4.443870523415978, + "grad_norm": 4.59375, + "learning_rate": 6.0639131830593556e-06, + "loss": 4.404, + "step": 516200 + }, + { + "epoch": 4.4443009641873275, + "grad_norm": 4.25, + "learning_rate": 6.0546259512595845e-06, + "loss": 4.4904, + "step": 516250 + }, + { + "epoch": 4.444731404958677, + "grad_norm": 3.609375, + "learning_rate": 6.045345614848108e-06, + "loss": 4.1054, + "step": 516300 + }, + { + "epoch": 4.445161845730028, + "grad_norm": 2.71875, + "learning_rate": 6.036072174506124e-06, + "loss": 4.309, + "step": 516350 + }, + { + "epoch": 4.445592286501378, + "grad_norm": 2.265625, + "learning_rate": 6.026805630914279e-06, + "loss": 4.3595, + "step": 516400 + }, + { + "epoch": 4.4460227272727275, + "grad_norm": 2.375, + "learning_rate": 6.0175459847526925e-06, + "loss": 4.3329, + "step": 516450 + }, + { + "epoch": 4.446453168044077, + "grad_norm": 2.671875, + "learning_rate": 6.008293236701057e-06, + "loss": 4.5525, + "step": 516500 + }, + { + "epoch": 4.446883608815427, + "grad_norm": 2.484375, + "learning_rate": 5.999047387438439e-06, + "loss": 4.2963, + "step": 516550 + }, + { + "epoch": 4.447314049586777, + "grad_norm": 2.765625, + "learning_rate": 5.989808437643507e-06, + "loss": 4.2487, + "step": 516600 + }, + { + "epoch": 4.447744490358127, + "grad_norm": 2.125, + "learning_rate": 5.980576387994363e-06, + "loss": 4.5549, + "step": 516650 + }, + { + "epoch": 4.4481749311294765, + "grad_norm": 4.03125, + "learning_rate": 5.971351239168632e-06, + "loss": 4.1427, + "step": 516700 + }, + { + "epoch": 4.448605371900826, + "grad_norm": 3.96875, + "learning_rate": 5.962132991843405e-06, + "loss": 4.1719, + "step": 516750 + }, + { + "epoch": 4.449035812672176, + "grad_norm": 3.515625, + "learning_rate": 5.952921646695297e-06, + "loss": 3.8879, + "step": 516800 + }, + { + "epoch": 4.449466253443526, + "grad_norm": 2.453125, + "learning_rate": 5.943717204400379e-06, + "loss": 3.9986, + "step": 516850 + }, + { + "epoch": 4.449896694214876, + "grad_norm": 3.359375, + "learning_rate": 5.9345196656342525e-06, + "loss": 4.3833, + "step": 516900 + }, + { + "epoch": 4.450327134986226, + "grad_norm": 4.3125, + "learning_rate": 5.925329031072014e-06, + "loss": 4.1024, + "step": 516950 + }, + { + "epoch": 4.450757575757576, + "grad_norm": 2.90625, + "learning_rate": 5.9161453013881985e-06, + "loss": 4.5635, + "step": 517000 + }, + { + "epoch": 4.451188016528926, + "grad_norm": 0.8125, + "learning_rate": 5.906968477256902e-06, + "loss": 3.7934, + "step": 517050 + }, + { + "epoch": 4.451618457300276, + "grad_norm": 2.359375, + "learning_rate": 5.897798559351675e-06, + "loss": 4.347, + "step": 517100 + }, + { + "epoch": 4.4520488980716255, + "grad_norm": 5.28125, + "learning_rate": 5.888635548345545e-06, + "loss": 4.3105, + "step": 517150 + }, + { + "epoch": 4.452479338842975, + "grad_norm": 2.359375, + "learning_rate": 5.8794794449111065e-06, + "loss": 4.2003, + "step": 517200 + }, + { + "epoch": 4.452909779614325, + "grad_norm": 2.734375, + "learning_rate": 5.870330249720357e-06, + "loss": 4.359, + "step": 517250 + }, + { + "epoch": 4.453340220385675, + "grad_norm": 3.046875, + "learning_rate": 5.861187963444848e-06, + "loss": 4.2676, + "step": 517300 + }, + { + "epoch": 4.453770661157025, + "grad_norm": 4.6875, + "learning_rate": 5.852052586755597e-06, + "loss": 4.2297, + "step": 517350 + }, + { + "epoch": 4.4542011019283745, + "grad_norm": 5.03125, + "learning_rate": 5.842924120323123e-06, + "loss": 4.1846, + "step": 517400 + }, + { + "epoch": 4.454631542699724, + "grad_norm": 1.3203125, + "learning_rate": 5.833802564817425e-06, + "loss": 4.3206, + "step": 517450 + }, + { + "epoch": 4.455061983471074, + "grad_norm": 2.59375, + "learning_rate": 5.824687920908023e-06, + "loss": 4.6782, + "step": 517500 + }, + { + "epoch": 4.455492424242424, + "grad_norm": 2.875, + "learning_rate": 5.815580189263903e-06, + "loss": 4.3217, + "step": 517550 + }, + { + "epoch": 4.455922865013774, + "grad_norm": 3.15625, + "learning_rate": 5.806479370553553e-06, + "loss": 4.4238, + "step": 517600 + }, + { + "epoch": 4.456353305785124, + "grad_norm": 3.234375, + "learning_rate": 5.79738546544496e-06, + "loss": 4.302, + "step": 517650 + }, + { + "epoch": 4.456783746556474, + "grad_norm": 3.3125, + "learning_rate": 5.788298474605581e-06, + "loss": 4.2024, + "step": 517700 + }, + { + "epoch": 4.457214187327824, + "grad_norm": 3.671875, + "learning_rate": 5.779218398702402e-06, + "loss": 4.2905, + "step": 517750 + }, + { + "epoch": 4.457644628099174, + "grad_norm": 2.03125, + "learning_rate": 5.770145238401881e-06, + "loss": 4.1912, + "step": 517800 + }, + { + "epoch": 4.4580750688705235, + "grad_norm": 4.0, + "learning_rate": 5.7610789943699285e-06, + "loss": 4.4892, + "step": 517850 + }, + { + "epoch": 4.458505509641873, + "grad_norm": 5.28125, + "learning_rate": 5.752019667272046e-06, + "loss": 4.5276, + "step": 517900 + }, + { + "epoch": 4.458935950413223, + "grad_norm": 4.03125, + "learning_rate": 5.742967257773124e-06, + "loss": 4.5385, + "step": 517950 + }, + { + "epoch": 4.459366391184573, + "grad_norm": 2.59375, + "learning_rate": 5.733921766537598e-06, + "loss": 4.0946, + "step": 518000 + }, + { + "epoch": 4.459796831955923, + "grad_norm": 3.53125, + "learning_rate": 5.724883194229425e-06, + "loss": 4.5273, + "step": 518050 + }, + { + "epoch": 4.4602272727272725, + "grad_norm": 2.6875, + "learning_rate": 5.715851541511974e-06, + "loss": 4.1284, + "step": 518100 + }, + { + "epoch": 4.460657713498622, + "grad_norm": 2.90625, + "learning_rate": 5.706826809048149e-06, + "loss": 3.9941, + "step": 518150 + }, + { + "epoch": 4.461088154269972, + "grad_norm": 2.171875, + "learning_rate": 5.697808997500376e-06, + "loss": 4.1627, + "step": 518200 + }, + { + "epoch": 4.461518595041323, + "grad_norm": 3.046875, + "learning_rate": 5.688798107530513e-06, + "loss": 4.3969, + "step": 518250 + }, + { + "epoch": 4.4619490358126725, + "grad_norm": 2.96875, + "learning_rate": 5.679794139799965e-06, + "loss": 4.7208, + "step": 518300 + }, + { + "epoch": 4.462379476584022, + "grad_norm": 3.421875, + "learning_rate": 5.670797094969593e-06, + "loss": 4.33, + "step": 518350 + }, + { + "epoch": 4.462809917355372, + "grad_norm": 2.875, + "learning_rate": 5.661806973699735e-06, + "loss": 4.2921, + "step": 518400 + }, + { + "epoch": 4.463240358126722, + "grad_norm": 1.2734375, + "learning_rate": 5.652823776650295e-06, + "loss": 4.2804, + "step": 518450 + }, + { + "epoch": 4.463670798898072, + "grad_norm": 3.796875, + "learning_rate": 5.6438475044805925e-06, + "loss": 4.0299, + "step": 518500 + }, + { + "epoch": 4.4641012396694215, + "grad_norm": 1.2265625, + "learning_rate": 5.634878157849477e-06, + "loss": 4.5831, + "step": 518550 + }, + { + "epoch": 4.464531680440771, + "grad_norm": 3.859375, + "learning_rate": 5.625915737415266e-06, + "loss": 4.331, + "step": 518600 + }, + { + "epoch": 4.464962121212121, + "grad_norm": 2.65625, + "learning_rate": 5.616960243835789e-06, + "loss": 4.189, + "step": 518650 + }, + { + "epoch": 4.465392561983471, + "grad_norm": 3.15625, + "learning_rate": 5.608011677768343e-06, + "loss": 4.2639, + "step": 518700 + }, + { + "epoch": 4.465823002754821, + "grad_norm": 3.484375, + "learning_rate": 5.59907003986977e-06, + "loss": 4.5521, + "step": 518750 + }, + { + "epoch": 4.4662534435261705, + "grad_norm": 6.03125, + "learning_rate": 5.590135330796342e-06, + "loss": 4.2766, + "step": 518800 + }, + { + "epoch": 4.46668388429752, + "grad_norm": 1.34375, + "learning_rate": 5.58120755120386e-06, + "loss": 4.2728, + "step": 518850 + }, + { + "epoch": 4.46711432506887, + "grad_norm": 0.84375, + "learning_rate": 5.572286701747598e-06, + "loss": 4.0894, + "step": 518900 + }, + { + "epoch": 4.467544765840221, + "grad_norm": 4.625, + "learning_rate": 5.5633727830823215e-06, + "loss": 4.4455, + "step": 518950 + }, + { + "epoch": 4.4679752066115705, + "grad_norm": 2.09375, + "learning_rate": 5.554465795862274e-06, + "loss": 4.1913, + "step": 519000 + }, + { + "epoch": 4.4679752066115705, + "eval_loss": 4.990195274353027, + "eval_runtime": 25.0097, + "eval_samples_per_second": 25.59, + "eval_steps_per_second": 12.795, + "eval_tts_loss": 7.48109567921881, + "step": 519000 + }, + { + "epoch": 4.46840564738292, + "grad_norm": 3.171875, + "learning_rate": 5.545565740741254e-06, + "loss": 4.2293, + "step": 519050 + }, + { + "epoch": 4.46883608815427, + "grad_norm": 3.40625, + "learning_rate": 5.536672618372485e-06, + "loss": 4.7192, + "step": 519100 + }, + { + "epoch": 4.46926652892562, + "grad_norm": 4.59375, + "learning_rate": 5.52778642940871e-06, + "loss": 3.7329, + "step": 519150 + }, + { + "epoch": 4.46969696969697, + "grad_norm": 2.4375, + "learning_rate": 5.518907174502141e-06, + "loss": 4.2293, + "step": 519200 + }, + { + "epoch": 4.4701274104683195, + "grad_norm": 3.140625, + "learning_rate": 5.510034854304491e-06, + "loss": 4.3989, + "step": 519250 + }, + { + "epoch": 4.470557851239669, + "grad_norm": 3.453125, + "learning_rate": 5.501169469466994e-06, + "loss": 4.4038, + "step": 519300 + }, + { + "epoch": 4.470988292011019, + "grad_norm": 4.90625, + "learning_rate": 5.492311020640351e-06, + "loss": 4.3744, + "step": 519350 + }, + { + "epoch": 4.471418732782369, + "grad_norm": 2.5, + "learning_rate": 5.48345950847472e-06, + "loss": 4.3404, + "step": 519400 + }, + { + "epoch": 4.471849173553719, + "grad_norm": 2.8125, + "learning_rate": 5.474614933619815e-06, + "loss": 4.1944, + "step": 519450 + }, + { + "epoch": 4.4722796143250685, + "grad_norm": 3.59375, + "learning_rate": 5.465777296724794e-06, + "loss": 4.2664, + "step": 519500 + }, + { + "epoch": 4.472710055096419, + "grad_norm": 2.390625, + "learning_rate": 5.456946598438306e-06, + "loss": 3.9764, + "step": 519550 + }, + { + "epoch": 4.473140495867769, + "grad_norm": 6.875, + "learning_rate": 5.4481228394085646e-06, + "loss": 4.4265, + "step": 519600 + }, + { + "epoch": 4.473570936639119, + "grad_norm": 4.625, + "learning_rate": 5.439306020283141e-06, + "loss": 4.3629, + "step": 519650 + }, + { + "epoch": 4.4740013774104685, + "grad_norm": 3.328125, + "learning_rate": 5.430496141709207e-06, + "loss": 4.652, + "step": 519700 + }, + { + "epoch": 4.474431818181818, + "grad_norm": 3.640625, + "learning_rate": 5.421693204333389e-06, + "loss": 4.3145, + "step": 519750 + }, + { + "epoch": 4.474862258953168, + "grad_norm": 4.21875, + "learning_rate": 5.412897208801803e-06, + "loss": 4.3534, + "step": 519800 + }, + { + "epoch": 4.475292699724518, + "grad_norm": 2.796875, + "learning_rate": 5.404108155760046e-06, + "loss": 4.0033, + "step": 519850 + }, + { + "epoch": 4.475723140495868, + "grad_norm": 2.578125, + "learning_rate": 5.3953260458532215e-06, + "loss": 4.1615, + "step": 519900 + }, + { + "epoch": 4.4761535812672175, + "grad_norm": 6.5625, + "learning_rate": 5.386550879725915e-06, + "loss": 4.3982, + "step": 519950 + }, + { + "epoch": 4.476584022038567, + "grad_norm": 3.09375, + "learning_rate": 5.3777826580222126e-06, + "loss": 4.2962, + "step": 520000 + }, + { + "epoch": 4.477014462809917, + "grad_norm": 4.78125, + "learning_rate": 5.369021381385686e-06, + "loss": 4.2393, + "step": 520050 + }, + { + "epoch": 4.477444903581267, + "grad_norm": 2.59375, + "learning_rate": 5.360267050459378e-06, + "loss": 4.3622, + "step": 520100 + }, + { + "epoch": 4.477875344352617, + "grad_norm": 2.8125, + "learning_rate": 5.351519665885852e-06, + "loss": 4.4195, + "step": 520150 + }, + { + "epoch": 4.4783057851239665, + "grad_norm": 2.0, + "learning_rate": 5.342779228307138e-06, + "loss": 4.4245, + "step": 520200 + }, + { + "epoch": 4.478736225895317, + "grad_norm": 2.90625, + "learning_rate": 5.334045738364757e-06, + "loss": 4.5866, + "step": 520250 + }, + { + "epoch": 4.479166666666667, + "grad_norm": 1.0390625, + "learning_rate": 5.325319196699752e-06, + "loss": 4.5064, + "step": 520300 + }, + { + "epoch": 4.479597107438017, + "grad_norm": 3.140625, + "learning_rate": 5.316599603952632e-06, + "loss": 4.1147, + "step": 520350 + }, + { + "epoch": 4.4800275482093666, + "grad_norm": 3.390625, + "learning_rate": 5.307886960763353e-06, + "loss": 4.499, + "step": 520400 + }, + { + "epoch": 4.480457988980716, + "grad_norm": 4.6875, + "learning_rate": 5.299181267771458e-06, + "loss": 4.1368, + "step": 520450 + }, + { + "epoch": 4.480888429752066, + "grad_norm": 3.75, + "learning_rate": 5.290482525615903e-06, + "loss": 4.385, + "step": 520500 + }, + { + "epoch": 4.481318870523416, + "grad_norm": 10.0, + "learning_rate": 5.2817907349351435e-06, + "loss": 4.1886, + "step": 520550 + }, + { + "epoch": 4.481749311294766, + "grad_norm": 4.375, + "learning_rate": 5.273105896367181e-06, + "loss": 4.3442, + "step": 520600 + }, + { + "epoch": 4.4821797520661155, + "grad_norm": 1.9453125, + "learning_rate": 5.264428010549405e-06, + "loss": 3.8715, + "step": 520650 + }, + { + "epoch": 4.482610192837465, + "grad_norm": 4.9375, + "learning_rate": 5.255757078118795e-06, + "loss": 4.2651, + "step": 520700 + }, + { + "epoch": 4.483040633608815, + "grad_norm": 4.5, + "learning_rate": 5.247093099711775e-06, + "loss": 3.9816, + "step": 520750 + }, + { + "epoch": 4.483471074380165, + "grad_norm": 4.125, + "learning_rate": 5.238436075964237e-06, + "loss": 4.3572, + "step": 520800 + }, + { + "epoch": 4.483901515151516, + "grad_norm": 5.3125, + "learning_rate": 5.2297860075116164e-06, + "loss": 4.1667, + "step": 520850 + }, + { + "epoch": 4.484331955922865, + "grad_norm": 2.9375, + "learning_rate": 5.221142894988818e-06, + "loss": 3.9319, + "step": 520900 + }, + { + "epoch": 4.484762396694215, + "grad_norm": 3.609375, + "learning_rate": 5.212506739030187e-06, + "loss": 4.2496, + "step": 520950 + }, + { + "epoch": 4.485192837465565, + "grad_norm": 3.515625, + "learning_rate": 5.20387754026962e-06, + "loss": 4.4324, + "step": 521000 + }, + { + "epoch": 4.485623278236915, + "grad_norm": 3.515625, + "learning_rate": 5.195255299340496e-06, + "loss": 4.5426, + "step": 521050 + }, + { + "epoch": 4.4860537190082646, + "grad_norm": 2.875, + "learning_rate": 5.186640016875632e-06, + "loss": 4.3715, + "step": 521100 + }, + { + "epoch": 4.486484159779614, + "grad_norm": 3.0625, + "learning_rate": 5.1780316935074345e-06, + "loss": 3.9493, + "step": 521150 + }, + { + "epoch": 4.486914600550964, + "grad_norm": 4.65625, + "learning_rate": 5.169430329867675e-06, + "loss": 3.9616, + "step": 521200 + }, + { + "epoch": 4.487345041322314, + "grad_norm": 0.95703125, + "learning_rate": 5.1608359265876815e-06, + "loss": 4.6169, + "step": 521250 + }, + { + "epoch": 4.487775482093664, + "grad_norm": 2.03125, + "learning_rate": 5.152248484298294e-06, + "loss": 4.55, + "step": 521300 + }, + { + "epoch": 4.4882059228650135, + "grad_norm": 2.53125, + "learning_rate": 5.143668003629798e-06, + "loss": 4.5889, + "step": 521350 + }, + { + "epoch": 4.488636363636363, + "grad_norm": 3.203125, + "learning_rate": 5.135094485211977e-06, + "loss": 4.2215, + "step": 521400 + }, + { + "epoch": 4.489066804407713, + "grad_norm": 4.125, + "learning_rate": 5.126527929674119e-06, + "loss": 4.5214, + "step": 521450 + }, + { + "epoch": 4.489497245179063, + "grad_norm": 4.90625, + "learning_rate": 5.117968337644974e-06, + "loss": 4.1815, + "step": 521500 + }, + { + "epoch": 4.489927685950414, + "grad_norm": 2.765625, + "learning_rate": 5.109415709752818e-06, + "loss": 4.3825, + "step": 521550 + }, + { + "epoch": 4.490358126721763, + "grad_norm": 3.3125, + "learning_rate": 5.100870046625384e-06, + "loss": 4.3089, + "step": 521600 + }, + { + "epoch": 4.490788567493113, + "grad_norm": 3.796875, + "learning_rate": 5.092331348889912e-06, + "loss": 4.0753, + "step": 521650 + }, + { + "epoch": 4.491219008264463, + "grad_norm": 4.0625, + "learning_rate": 5.0837996171731235e-06, + "loss": 4.1591, + "step": 521700 + }, + { + "epoch": 4.491649449035813, + "grad_norm": 3.234375, + "learning_rate": 5.07527485210122e-06, + "loss": 4.533, + "step": 521750 + }, + { + "epoch": 4.4920798898071626, + "grad_norm": 3.34375, + "learning_rate": 5.066757054299898e-06, + "loss": 4.6478, + "step": 521800 + }, + { + "epoch": 4.492510330578512, + "grad_norm": 2.71875, + "learning_rate": 5.058246224394358e-06, + "loss": 4.3133, + "step": 521850 + }, + { + "epoch": 4.492940771349862, + "grad_norm": 3.03125, + "learning_rate": 5.049742363009291e-06, + "loss": 4.0999, + "step": 521900 + }, + { + "epoch": 4.493371212121212, + "grad_norm": 2.15625, + "learning_rate": 5.041245470768818e-06, + "loss": 4.4241, + "step": 521950 + }, + { + "epoch": 4.493801652892562, + "grad_norm": 5.40625, + "learning_rate": 5.0327555482966305e-06, + "loss": 4.5213, + "step": 522000 + }, + { + "epoch": 4.493801652892562, + "eval_loss": 4.990315914154053, + "eval_runtime": 24.6409, + "eval_samples_per_second": 25.973, + "eval_steps_per_second": 12.987, + "eval_tts_loss": 7.481205293532995, + "step": 522000 + }, + { + "epoch": 4.4942320936639115, + "grad_norm": 4.46875, + "learning_rate": 5.024272596215862e-06, + "loss": 4.48, + "step": 522050 + }, + { + "epoch": 4.494662534435261, + "grad_norm": 2.296875, + "learning_rate": 5.0157966151491265e-06, + "loss": 4.5593, + "step": 522100 + }, + { + "epoch": 4.495092975206612, + "grad_norm": 3.09375, + "learning_rate": 5.007327605718582e-06, + "loss": 4.3271, + "step": 522150 + }, + { + "epoch": 4.495523415977962, + "grad_norm": 3.40625, + "learning_rate": 4.998865568545774e-06, + "loss": 3.9936, + "step": 522200 + }, + { + "epoch": 4.495953856749312, + "grad_norm": 3.375, + "learning_rate": 4.990410504251863e-06, + "loss": 4.3553, + "step": 522250 + }, + { + "epoch": 4.496384297520661, + "grad_norm": 3.421875, + "learning_rate": 4.981962413457386e-06, + "loss": 4.653, + "step": 522300 + }, + { + "epoch": 4.496814738292011, + "grad_norm": 3.953125, + "learning_rate": 4.973521296782435e-06, + "loss": 4.3257, + "step": 522350 + }, + { + "epoch": 4.497245179063361, + "grad_norm": 4.21875, + "learning_rate": 4.96508715484657e-06, + "loss": 4.2562, + "step": 522400 + }, + { + "epoch": 4.497675619834711, + "grad_norm": 3.0625, + "learning_rate": 4.956659988268841e-06, + "loss": 4.2002, + "step": 522450 + }, + { + "epoch": 4.4981060606060606, + "grad_norm": 3.4375, + "learning_rate": 4.948239797667753e-06, + "loss": 4.2381, + "step": 522500 + }, + { + "epoch": 4.49853650137741, + "grad_norm": 2.203125, + "learning_rate": 4.939826583661378e-06, + "loss": 4.2369, + "step": 522550 + }, + { + "epoch": 4.49896694214876, + "grad_norm": 6.96875, + "learning_rate": 4.931420346867188e-06, + "loss": 4.3375, + "step": 522600 + }, + { + "epoch": 4.49939738292011, + "grad_norm": 3.671875, + "learning_rate": 4.923021087902202e-06, + "loss": 4.5467, + "step": 522650 + }, + { + "epoch": 4.49982782369146, + "grad_norm": 3.109375, + "learning_rate": 4.914628807382926e-06, + "loss": 4.049, + "step": 522700 + }, + { + "epoch": 4.5002582644628095, + "grad_norm": 3.140625, + "learning_rate": 4.906243505925301e-06, + "loss": 4.6011, + "step": 522750 + }, + { + "epoch": 4.500688705234159, + "grad_norm": 2.3125, + "learning_rate": 4.89786518414479e-06, + "loss": 4.0835, + "step": 522800 + }, + { + "epoch": 4.50111914600551, + "grad_norm": 2.890625, + "learning_rate": 4.889493842656367e-06, + "loss": 4.1293, + "step": 522850 + }, + { + "epoch": 4.50154958677686, + "grad_norm": 5.6875, + "learning_rate": 4.881129482074465e-06, + "loss": 4.5677, + "step": 522900 + }, + { + "epoch": 4.50198002754821, + "grad_norm": 2.375, + "learning_rate": 4.872772103013001e-06, + "loss": 3.8966, + "step": 522950 + }, + { + "epoch": 4.502410468319559, + "grad_norm": 2.328125, + "learning_rate": 4.864421706085398e-06, + "loss": 4.9562, + "step": 523000 + }, + { + "epoch": 4.502840909090909, + "grad_norm": 3.125, + "learning_rate": 4.856078291904542e-06, + "loss": 4.3891, + "step": 523050 + }, + { + "epoch": 4.503271349862259, + "grad_norm": 2.9375, + "learning_rate": 4.8477418610828325e-06, + "loss": 4.5279, + "step": 523100 + }, + { + "epoch": 4.503701790633609, + "grad_norm": 3.3125, + "learning_rate": 4.839412414232158e-06, + "loss": 4.1265, + "step": 523150 + }, + { + "epoch": 4.5041322314049586, + "grad_norm": 2.890625, + "learning_rate": 4.831089951963863e-06, + "loss": 4.112, + "step": 523200 + }, + { + "epoch": 4.504562672176308, + "grad_norm": 3.21875, + "learning_rate": 4.822774474888803e-06, + "loss": 4.0946, + "step": 523250 + }, + { + "epoch": 4.504993112947658, + "grad_norm": 5.125, + "learning_rate": 4.814465983617322e-06, + "loss": 4.2552, + "step": 523300 + }, + { + "epoch": 4.505423553719008, + "grad_norm": 5.5, + "learning_rate": 4.806164478759223e-06, + "loss": 4.4739, + "step": 523350 + }, + { + "epoch": 4.505853994490359, + "grad_norm": 3.40625, + "learning_rate": 4.797869960923851e-06, + "loss": 4.2649, + "step": 523400 + }, + { + "epoch": 4.506284435261708, + "grad_norm": 3.859375, + "learning_rate": 4.7895824307200075e-06, + "loss": 4.1929, + "step": 523450 + }, + { + "epoch": 4.506714876033058, + "grad_norm": 2.8125, + "learning_rate": 4.78130188875594e-06, + "loss": 4.3796, + "step": 523500 + }, + { + "epoch": 4.507145316804408, + "grad_norm": 2.265625, + "learning_rate": 4.773028335639451e-06, + "loss": 4.2762, + "step": 523550 + }, + { + "epoch": 4.507575757575758, + "grad_norm": 1.5, + "learning_rate": 4.7647617719778e-06, + "loss": 4.6251, + "step": 523600 + }, + { + "epoch": 4.508006198347108, + "grad_norm": 4.75, + "learning_rate": 4.7565021983777105e-06, + "loss": 4.4283, + "step": 523650 + }, + { + "epoch": 4.508436639118457, + "grad_norm": 1.9765625, + "learning_rate": 4.748249615445477e-06, + "loss": 3.6475, + "step": 523700 + }, + { + "epoch": 4.508867079889807, + "grad_norm": 1.3984375, + "learning_rate": 4.740004023786748e-06, + "loss": 4.4065, + "step": 523750 + }, + { + "epoch": 4.509297520661157, + "grad_norm": 1.4765625, + "learning_rate": 4.731765424006773e-06, + "loss": 4.0777, + "step": 523800 + }, + { + "epoch": 4.509727961432507, + "grad_norm": 2.171875, + "learning_rate": 4.723533816710257e-06, + "loss": 4.337, + "step": 523850 + }, + { + "epoch": 4.5101584022038566, + "grad_norm": 2.046875, + "learning_rate": 4.71530920250135e-06, + "loss": 4.0588, + "step": 523900 + }, + { + "epoch": 4.510588842975206, + "grad_norm": 2.734375, + "learning_rate": 4.707091581983747e-06, + "loss": 4.8354, + "step": 523950 + }, + { + "epoch": 4.511019283746556, + "grad_norm": 2.640625, + "learning_rate": 4.698880955760598e-06, + "loss": 4.1805, + "step": 524000 + }, + { + "epoch": 4.511449724517906, + "grad_norm": 1.6015625, + "learning_rate": 4.690677324434511e-06, + "loss": 4.2731, + "step": 524050 + }, + { + "epoch": 4.511880165289256, + "grad_norm": 3.421875, + "learning_rate": 4.6824806886076715e-06, + "loss": 4.5869, + "step": 524100 + }, + { + "epoch": 4.512310606060606, + "grad_norm": 2.296875, + "learning_rate": 4.674291048881674e-06, + "loss": 4.1706, + "step": 524150 + }, + { + "epoch": 4.512741046831956, + "grad_norm": 2.1875, + "learning_rate": 4.666108405857606e-06, + "loss": 4.1391, + "step": 524200 + }, + { + "epoch": 4.513171487603306, + "grad_norm": 4.15625, + "learning_rate": 4.657932760136063e-06, + "loss": 4.2125, + "step": 524250 + }, + { + "epoch": 4.513601928374656, + "grad_norm": 2.953125, + "learning_rate": 4.649764112317123e-06, + "loss": 4.2078, + "step": 524300 + }, + { + "epoch": 4.514032369146006, + "grad_norm": 3.578125, + "learning_rate": 4.641602463000339e-06, + "loss": 4.1623, + "step": 524350 + }, + { + "epoch": 4.514462809917355, + "grad_norm": 3.75, + "learning_rate": 4.633447812784775e-06, + "loss": 4.2372, + "step": 524400 + }, + { + "epoch": 4.514893250688705, + "grad_norm": 3.0625, + "learning_rate": 4.625300162268953e-06, + "loss": 4.406, + "step": 524450 + }, + { + "epoch": 4.515323691460055, + "grad_norm": 6.5, + "learning_rate": 4.617159512050895e-06, + "loss": 4.2278, + "step": 524500 + }, + { + "epoch": 4.515754132231405, + "grad_norm": 3.703125, + "learning_rate": 4.6090258627281115e-06, + "loss": 4.6562, + "step": 524550 + }, + { + "epoch": 4.5161845730027546, + "grad_norm": 6.875, + "learning_rate": 4.600899214897581e-06, + "loss": 4.3026, + "step": 524600 + }, + { + "epoch": 4.516615013774104, + "grad_norm": 3.734375, + "learning_rate": 4.592779569155792e-06, + "loss": 4.4567, + "step": 524650 + }, + { + "epoch": 4.517045454545455, + "grad_norm": 2.65625, + "learning_rate": 4.584666926098713e-06, + "loss": 4.2037, + "step": 524700 + }, + { + "epoch": 4.517475895316805, + "grad_norm": 4.15625, + "learning_rate": 4.57656128632179e-06, + "loss": 4.385, + "step": 524750 + }, + { + "epoch": 4.517906336088155, + "grad_norm": 5.15625, + "learning_rate": 4.568462650419958e-06, + "loss": 4.381, + "step": 524800 + }, + { + "epoch": 4.518336776859504, + "grad_norm": 2.3125, + "learning_rate": 4.560371018987652e-06, + "loss": 4.2931, + "step": 524850 + }, + { + "epoch": 4.518767217630854, + "grad_norm": 2.984375, + "learning_rate": 4.552286392618743e-06, + "loss": 4.4834, + "step": 524900 + }, + { + "epoch": 4.519197658402204, + "grad_norm": 5.4375, + "learning_rate": 4.544208771906677e-06, + "loss": 4.1684, + "step": 524950 + }, + { + "epoch": 4.519628099173554, + "grad_norm": 3.484375, + "learning_rate": 4.536138157444314e-06, + "loss": 4.1747, + "step": 525000 + }, + { + "epoch": 4.519628099173554, + "eval_loss": 4.99021053314209, + "eval_runtime": 24.1038, + "eval_samples_per_second": 26.552, + "eval_steps_per_second": 13.276, + "eval_tts_loss": 7.480371839671666, + "step": 525000 + }, + { + "epoch": 4.520058539944904, + "grad_norm": 3.5, + "learning_rate": 4.528074549823991e-06, + "loss": 4.3075, + "step": 525050 + }, + { + "epoch": 4.520488980716253, + "grad_norm": 4.34375, + "learning_rate": 4.5200179496376e-06, + "loss": 4.2856, + "step": 525100 + }, + { + "epoch": 4.520919421487603, + "grad_norm": 2.375, + "learning_rate": 4.511968357476448e-06, + "loss": 4.3544, + "step": 525150 + }, + { + "epoch": 4.521349862258953, + "grad_norm": 1.953125, + "learning_rate": 4.5039257739313724e-06, + "loss": 4.3256, + "step": 525200 + }, + { + "epoch": 4.521780303030303, + "grad_norm": 1.1484375, + "learning_rate": 4.4958901995927004e-06, + "loss": 4.2391, + "step": 525250 + }, + { + "epoch": 4.5222107438016526, + "grad_norm": 4.875, + "learning_rate": 4.487861635050172e-06, + "loss": 4.1394, + "step": 525300 + }, + { + "epoch": 4.522641184573002, + "grad_norm": 3.671875, + "learning_rate": 4.479840080893127e-06, + "loss": 4.0337, + "step": 525350 + }, + { + "epoch": 4.523071625344352, + "grad_norm": 4.71875, + "learning_rate": 4.471825537710295e-06, + "loss": 4.1654, + "step": 525400 + }, + { + "epoch": 4.523502066115703, + "grad_norm": 3.03125, + "learning_rate": 4.463818006089937e-06, + "loss": 4.1178, + "step": 525450 + }, + { + "epoch": 4.523932506887053, + "grad_norm": 3.515625, + "learning_rate": 4.455817486619784e-06, + "loss": 4.0843, + "step": 525500 + }, + { + "epoch": 4.524362947658402, + "grad_norm": 2.09375, + "learning_rate": 4.447823979887056e-06, + "loss": 4.2138, + "step": 525550 + }, + { + "epoch": 4.524793388429752, + "grad_norm": 2.125, + "learning_rate": 4.439837486478448e-06, + "loss": 4.466, + "step": 525600 + }, + { + "epoch": 4.525223829201102, + "grad_norm": 3.359375, + "learning_rate": 4.431858006980183e-06, + "loss": 4.3179, + "step": 525650 + }, + { + "epoch": 4.525654269972452, + "grad_norm": 7.375, + "learning_rate": 4.423885541977923e-06, + "loss": 4.0016, + "step": 525700 + }, + { + "epoch": 4.526084710743802, + "grad_norm": 5.0625, + "learning_rate": 4.415920092056824e-06, + "loss": 4.7607, + "step": 525750 + }, + { + "epoch": 4.526515151515151, + "grad_norm": 3.296875, + "learning_rate": 4.40796165780153e-06, + "loss": 4.3584, + "step": 525800 + }, + { + "epoch": 4.526945592286501, + "grad_norm": 2.390625, + "learning_rate": 4.400010239796193e-06, + "loss": 4.1827, + "step": 525850 + }, + { + "epoch": 4.527376033057851, + "grad_norm": 4.40625, + "learning_rate": 4.392065838624393e-06, + "loss": 4.6886, + "step": 525900 + }, + { + "epoch": 4.527806473829201, + "grad_norm": 4.15625, + "learning_rate": 4.384128454869263e-06, + "loss": 4.6378, + "step": 525950 + }, + { + "epoch": 4.528236914600551, + "grad_norm": 3.0625, + "learning_rate": 4.376198089113392e-06, + "loss": 4.4241, + "step": 526000 + }, + { + "epoch": 4.528667355371901, + "grad_norm": 3.140625, + "learning_rate": 4.3682747419388385e-06, + "loss": 4.1943, + "step": 526050 + }, + { + "epoch": 4.529097796143251, + "grad_norm": 4.84375, + "learning_rate": 4.360358413927157e-06, + "loss": 4.5571, + "step": 526100 + }, + { + "epoch": 4.529528236914601, + "grad_norm": 2.125, + "learning_rate": 4.3524491056593954e-06, + "loss": 4.146, + "step": 526150 + }, + { + "epoch": 4.529958677685951, + "grad_norm": 1.0390625, + "learning_rate": 4.344546817716067e-06, + "loss": 4.4104, + "step": 526200 + }, + { + "epoch": 4.5303891184573, + "grad_norm": 1.7265625, + "learning_rate": 4.336651550677217e-06, + "loss": 4.6521, + "step": 526250 + }, + { + "epoch": 4.53081955922865, + "grad_norm": 1.953125, + "learning_rate": 4.3287633051222965e-06, + "loss": 4.1393, + "step": 526300 + }, + { + "epoch": 4.53125, + "grad_norm": 3.5, + "learning_rate": 4.320882081630317e-06, + "loss": 4.0434, + "step": 526350 + }, + { + "epoch": 4.53168044077135, + "grad_norm": 3.546875, + "learning_rate": 4.31300788077974e-06, + "loss": 4.302, + "step": 526400 + }, + { + "epoch": 4.5321108815427, + "grad_norm": 3.53125, + "learning_rate": 4.305140703148491e-06, + "loss": 4.4763, + "step": 526450 + }, + { + "epoch": 4.532541322314049, + "grad_norm": 1.3515625, + "learning_rate": 4.2972805493140315e-06, + "loss": 4.5437, + "step": 526500 + }, + { + "epoch": 4.532971763085399, + "grad_norm": 3.15625, + "learning_rate": 4.289427419853287e-06, + "loss": 4.1178, + "step": 526550 + }, + { + "epoch": 4.533402203856749, + "grad_norm": 5.5, + "learning_rate": 4.28158131534262e-06, + "loss": 4.2925, + "step": 526600 + }, + { + "epoch": 4.533832644628099, + "grad_norm": 3.609375, + "learning_rate": 4.273742236357947e-06, + "loss": 4.2234, + "step": 526650 + }, + { + "epoch": 4.534263085399449, + "grad_norm": 2.53125, + "learning_rate": 4.265910183474642e-06, + "loss": 4.0474, + "step": 526700 + }, + { + "epoch": 4.534693526170799, + "grad_norm": 2.65625, + "learning_rate": 4.258085157267533e-06, + "loss": 4.4759, + "step": 526750 + }, + { + "epoch": 4.535123966942149, + "grad_norm": 3.4375, + "learning_rate": 4.250267158311005e-06, + "loss": 4.3425, + "step": 526800 + }, + { + "epoch": 4.535554407713499, + "grad_norm": 3.515625, + "learning_rate": 4.242456187178845e-06, + "loss": 4.5469, + "step": 526850 + }, + { + "epoch": 4.535984848484849, + "grad_norm": 2.796875, + "learning_rate": 4.234652244444359e-06, + "loss": 4.3049, + "step": 526900 + }, + { + "epoch": 4.536415289256198, + "grad_norm": 2.828125, + "learning_rate": 4.22685533068038e-06, + "loss": 4.1405, + "step": 526950 + }, + { + "epoch": 4.536845730027548, + "grad_norm": 3.171875, + "learning_rate": 4.219065446459136e-06, + "loss": 4.2781, + "step": 527000 + }, + { + "epoch": 4.537276170798898, + "grad_norm": 2.6875, + "learning_rate": 4.211282592352428e-06, + "loss": 4.5228, + "step": 527050 + }, + { + "epoch": 4.537706611570248, + "grad_norm": 3.09375, + "learning_rate": 4.203506768931476e-06, + "loss": 4.2064, + "step": 527100 + }, + { + "epoch": 4.538137052341598, + "grad_norm": 1.953125, + "learning_rate": 4.195737976767e-06, + "loss": 4.243, + "step": 527150 + }, + { + "epoch": 4.538567493112947, + "grad_norm": 3.3125, + "learning_rate": 4.1879762164292455e-06, + "loss": 4.3149, + "step": 527200 + }, + { + "epoch": 4.538997933884297, + "grad_norm": 3.03125, + "learning_rate": 4.1802214884878895e-06, + "loss": 4.4569, + "step": 527250 + }, + { + "epoch": 4.539428374655648, + "grad_norm": 4.71875, + "learning_rate": 4.172473793512122e-06, + "loss": 4.3919, + "step": 527300 + }, + { + "epoch": 4.539858815426998, + "grad_norm": 4.71875, + "learning_rate": 4.164733132070586e-06, + "loss": 4.926, + "step": 527350 + }, + { + "epoch": 4.5402892561983474, + "grad_norm": 2.859375, + "learning_rate": 4.156999504731451e-06, + "loss": 4.1797, + "step": 527400 + }, + { + "epoch": 4.540719696969697, + "grad_norm": 3.453125, + "learning_rate": 4.14927291206233e-06, + "loss": 4.5263, + "step": 527450 + }, + { + "epoch": 4.541150137741047, + "grad_norm": 4.34375, + "learning_rate": 4.141553354630367e-06, + "loss": 4.1936, + "step": 527500 + }, + { + "epoch": 4.541580578512397, + "grad_norm": 2.96875, + "learning_rate": 4.133840833002134e-06, + "loss": 3.9898, + "step": 527550 + }, + { + "epoch": 4.542011019283747, + "grad_norm": 1.8515625, + "learning_rate": 4.12613534774372e-06, + "loss": 4.4491, + "step": 527600 + }, + { + "epoch": 4.542441460055096, + "grad_norm": 2.390625, + "learning_rate": 4.118436899420697e-06, + "loss": 4.3879, + "step": 527650 + }, + { + "epoch": 4.542871900826446, + "grad_norm": 0.953125, + "learning_rate": 4.110745488598111e-06, + "loss": 4.3766, + "step": 527700 + }, + { + "epoch": 4.543302341597796, + "grad_norm": 3.03125, + "learning_rate": 4.10306111584049e-06, + "loss": 4.4666, + "step": 527750 + }, + { + "epoch": 4.543732782369146, + "grad_norm": 4.1875, + "learning_rate": 4.09538378171187e-06, + "loss": 4.2749, + "step": 527800 + }, + { + "epoch": 4.544163223140496, + "grad_norm": 4.59375, + "learning_rate": 4.087713486775713e-06, + "loss": 4.4894, + "step": 527850 + }, + { + "epoch": 4.544593663911845, + "grad_norm": 3.515625, + "learning_rate": 4.080050231595045e-06, + "loss": 4.5309, + "step": 527900 + }, + { + "epoch": 4.545024104683195, + "grad_norm": 2.640625, + "learning_rate": 4.072394016732306e-06, + "loss": 4.1269, + "step": 527950 + }, + { + "epoch": 4.545454545454545, + "grad_norm": 3.828125, + "learning_rate": 4.064744842749446e-06, + "loss": 4.1829, + "step": 528000 + }, + { + "epoch": 4.545454545454545, + "eval_loss": 4.990324974060059, + "eval_runtime": 24.3963, + "eval_samples_per_second": 26.234, + "eval_steps_per_second": 13.117, + "eval_tts_loss": 7.480024797597445, + "step": 528000 + }, + { + "epoch": 4.545884986225896, + "grad_norm": 3.4375, + "learning_rate": 4.0571027102079275e-06, + "loss": 4.5549, + "step": 528050 + }, + { + "epoch": 4.5463154269972454, + "grad_norm": 5.09375, + "learning_rate": 4.0494676196686236e-06, + "loss": 4.2084, + "step": 528100 + }, + { + "epoch": 4.546745867768595, + "grad_norm": 2.515625, + "learning_rate": 4.041839571691952e-06, + "loss": 4.2983, + "step": 528150 + }, + { + "epoch": 4.547176308539945, + "grad_norm": 3.359375, + "learning_rate": 4.0342185668377995e-06, + "loss": 4.6226, + "step": 528200 + }, + { + "epoch": 4.547606749311295, + "grad_norm": 1.6796875, + "learning_rate": 4.026604605665529e-06, + "loss": 4.3992, + "step": 528250 + }, + { + "epoch": 4.548037190082645, + "grad_norm": 3.140625, + "learning_rate": 4.018997688733983e-06, + "loss": 4.6786, + "step": 528300 + }, + { + "epoch": 4.548467630853994, + "grad_norm": 4.65625, + "learning_rate": 4.0113978166015255e-06, + "loss": 4.5706, + "step": 528350 + }, + { + "epoch": 4.548898071625344, + "grad_norm": 3.78125, + "learning_rate": 4.003804989825921e-06, + "loss": 3.9249, + "step": 528400 + }, + { + "epoch": 4.549328512396694, + "grad_norm": 2.8125, + "learning_rate": 3.99621920896448e-06, + "loss": 4.002, + "step": 528450 + }, + { + "epoch": 4.549758953168044, + "grad_norm": 3.375, + "learning_rate": 3.9886404745740125e-06, + "loss": 3.8839, + "step": 528500 + }, + { + "epoch": 4.550189393939394, + "grad_norm": 2.0625, + "learning_rate": 3.981068787210751e-06, + "loss": 4.2406, + "step": 528550 + }, + { + "epoch": 4.550619834710744, + "grad_norm": 2.53125, + "learning_rate": 3.973504147430451e-06, + "loss": 4.4745, + "step": 528600 + }, + { + "epoch": 4.551050275482094, + "grad_norm": 4.46875, + "learning_rate": 3.965946555788336e-06, + "loss": 4.4182, + "step": 528650 + }, + { + "epoch": 4.551480716253444, + "grad_norm": 2.203125, + "learning_rate": 3.958396012839116e-06, + "loss": 4.3205, + "step": 528700 + }, + { + "epoch": 4.551911157024794, + "grad_norm": 3.0625, + "learning_rate": 3.950852519137005e-06, + "loss": 4.4146, + "step": 528750 + }, + { + "epoch": 4.5523415977961434, + "grad_norm": 5.59375, + "learning_rate": 3.943316075235659e-06, + "loss": 4.5433, + "step": 528800 + }, + { + "epoch": 4.552772038567493, + "grad_norm": 3.734375, + "learning_rate": 3.935786681688247e-06, + "loss": 4.9725, + "step": 528850 + }, + { + "epoch": 4.553202479338843, + "grad_norm": 2.96875, + "learning_rate": 3.928264339047394e-06, + "loss": 4.2192, + "step": 528900 + }, + { + "epoch": 4.553632920110193, + "grad_norm": 3.59375, + "learning_rate": 3.920749047865247e-06, + "loss": 4.189, + "step": 528950 + }, + { + "epoch": 4.554063360881543, + "grad_norm": 1.9453125, + "learning_rate": 3.913240808693375e-06, + "loss": 4.0661, + "step": 529000 + }, + { + "epoch": 4.554493801652892, + "grad_norm": 5.53125, + "learning_rate": 3.905739622082916e-06, + "loss": 4.4467, + "step": 529050 + }, + { + "epoch": 4.554924242424242, + "grad_norm": 3.84375, + "learning_rate": 3.898245488584418e-06, + "loss": 4.4869, + "step": 529100 + }, + { + "epoch": 4.555354683195592, + "grad_norm": 3.859375, + "learning_rate": 3.890758408747919e-06, + "loss": 4.2002, + "step": 529150 + }, + { + "epoch": 4.555785123966942, + "grad_norm": 1.921875, + "learning_rate": 3.883278383122979e-06, + "loss": 4.3617, + "step": 529200 + }, + { + "epoch": 4.556215564738292, + "grad_norm": 5.125, + "learning_rate": 3.875805412258604e-06, + "loss": 3.9994, + "step": 529250 + }, + { + "epoch": 4.556646005509641, + "grad_norm": 2.015625, + "learning_rate": 3.868339496703277e-06, + "loss": 3.85, + "step": 529300 + }, + { + "epoch": 4.557076446280992, + "grad_norm": 6.59375, + "learning_rate": 3.860880637005027e-06, + "loss": 4.3406, + "step": 529350 + }, + { + "epoch": 4.557506887052342, + "grad_norm": 3.09375, + "learning_rate": 3.85342883371127e-06, + "loss": 4.1945, + "step": 529400 + }, + { + "epoch": 4.557937327823692, + "grad_norm": 4.71875, + "learning_rate": 3.84598408736897e-06, + "loss": 4.6027, + "step": 529450 + }, + { + "epoch": 4.5583677685950414, + "grad_norm": 3.296875, + "learning_rate": 3.838546398524567e-06, + "loss": 4.1592, + "step": 529500 + }, + { + "epoch": 4.558798209366391, + "grad_norm": 3.765625, + "learning_rate": 3.831115767723959e-06, + "loss": 4.2978, + "step": 529550 + }, + { + "epoch": 4.559228650137741, + "grad_norm": 4.1875, + "learning_rate": 3.823692195512552e-06, + "loss": 4.3131, + "step": 529600 + }, + { + "epoch": 4.559659090909091, + "grad_norm": 4.25, + "learning_rate": 3.8162756824351995e-06, + "loss": 4.3137, + "step": 529650 + }, + { + "epoch": 4.560089531680441, + "grad_norm": 2.625, + "learning_rate": 3.8088662290362563e-06, + "loss": 4.0229, + "step": 529700 + }, + { + "epoch": 4.56051997245179, + "grad_norm": 4.1875, + "learning_rate": 3.8014638358595865e-06, + "loss": 4.444, + "step": 529750 + }, + { + "epoch": 4.56095041322314, + "grad_norm": 2.84375, + "learning_rate": 3.794068503448489e-06, + "loss": 3.9654, + "step": 529800 + }, + { + "epoch": 4.56138085399449, + "grad_norm": 2.921875, + "learning_rate": 3.786680232345774e-06, + "loss": 4.5227, + "step": 529850 + }, + { + "epoch": 4.561811294765841, + "grad_norm": 3.828125, + "learning_rate": 3.7792990230937186e-06, + "loss": 4.4995, + "step": 529900 + }, + { + "epoch": 4.5622417355371905, + "grad_norm": 2.9375, + "learning_rate": 3.7719248762340897e-06, + "loss": 4.3621, + "step": 529950 + }, + { + "epoch": 4.56267217630854, + "grad_norm": 3.453125, + "learning_rate": 3.764557792308132e-06, + "loss": 4.357, + "step": 530000 + }, + { + "epoch": 4.56310261707989, + "grad_norm": 3.34375, + "learning_rate": 3.7571977718565797e-06, + "loss": 4.2724, + "step": 530050 + }, + { + "epoch": 4.56353305785124, + "grad_norm": 5.5, + "learning_rate": 3.7498448154196452e-06, + "loss": 4.7383, + "step": 530100 + }, + { + "epoch": 4.56396349862259, + "grad_norm": 3.3125, + "learning_rate": 3.7424989235370076e-06, + "loss": 4.4627, + "step": 530150 + }, + { + "epoch": 4.5643939393939394, + "grad_norm": 2.734375, + "learning_rate": 3.7351600967478474e-06, + "loss": 4.4491, + "step": 530200 + }, + { + "epoch": 4.564824380165289, + "grad_norm": 2.734375, + "learning_rate": 3.7278283355908107e-06, + "loss": 4.312, + "step": 530250 + }, + { + "epoch": 4.565254820936639, + "grad_norm": 2.21875, + "learning_rate": 3.720503640604045e-06, + "loss": 4.1889, + "step": 530300 + }, + { + "epoch": 4.565685261707989, + "grad_norm": 3.203125, + "learning_rate": 3.713186012325165e-06, + "loss": 4.3733, + "step": 530350 + }, + { + "epoch": 4.566115702479339, + "grad_norm": 3.359375, + "learning_rate": 3.705875451291263e-06, + "loss": 4.5894, + "step": 530400 + }, + { + "epoch": 4.566546143250688, + "grad_norm": 3.140625, + "learning_rate": 3.698571958038921e-06, + "loss": 4.1079, + "step": 530450 + }, + { + "epoch": 4.566976584022038, + "grad_norm": 3.609375, + "learning_rate": 3.6912755331041992e-06, + "loss": 4.0916, + "step": 530500 + }, + { + "epoch": 4.567407024793388, + "grad_norm": 2.765625, + "learning_rate": 3.6839861770226246e-06, + "loss": 4.2991, + "step": 530550 + }, + { + "epoch": 4.567837465564738, + "grad_norm": 3.171875, + "learning_rate": 3.676703890329247e-06, + "loss": 4.4796, + "step": 530600 + }, + { + "epoch": 4.5682679063360885, + "grad_norm": 6.34375, + "learning_rate": 3.6694286735585836e-06, + "loss": 4.0997, + "step": 530650 + }, + { + "epoch": 4.568698347107438, + "grad_norm": 2.265625, + "learning_rate": 3.6621605272445625e-06, + "loss": 4.3753, + "step": 530700 + }, + { + "epoch": 4.569128787878788, + "grad_norm": 4.84375, + "learning_rate": 3.6548994519206903e-06, + "loss": 4.5602, + "step": 530750 + }, + { + "epoch": 4.569559228650138, + "grad_norm": 3.6875, + "learning_rate": 3.6476454481199074e-06, + "loss": 4.3599, + "step": 530800 + }, + { + "epoch": 4.569989669421488, + "grad_norm": 2.875, + "learning_rate": 3.6403985163746325e-06, + "loss": 4.4335, + "step": 530850 + }, + { + "epoch": 4.5704201101928374, + "grad_norm": 2.796875, + "learning_rate": 3.633158657216806e-06, + "loss": 4.1776, + "step": 530900 + }, + { + "epoch": 4.570850550964187, + "grad_norm": 4.28125, + "learning_rate": 3.625925871177771e-06, + "loss": 4.2518, + "step": 530950 + }, + { + "epoch": 4.571280991735537, + "grad_norm": 6.28125, + "learning_rate": 3.6187001587884463e-06, + "loss": 4.5516, + "step": 531000 + }, + { + "epoch": 4.571280991735537, + "eval_loss": 4.990326404571533, + "eval_runtime": 25.3331, + "eval_samples_per_second": 25.263, + "eval_steps_per_second": 12.632, + "eval_tts_loss": 7.480250930217977, + "step": 531000 + }, + { + "epoch": 4.571711432506887, + "grad_norm": 3.625, + "learning_rate": 3.611481520579141e-06, + "loss": 4.7823, + "step": 531050 + }, + { + "epoch": 4.572141873278237, + "grad_norm": 4.875, + "learning_rate": 3.60426995707972e-06, + "loss": 4.4082, + "step": 531100 + }, + { + "epoch": 4.572572314049586, + "grad_norm": 3.203125, + "learning_rate": 3.5970654688194827e-06, + "loss": 4.1592, + "step": 531150 + }, + { + "epoch": 4.573002754820937, + "grad_norm": 4.4375, + "learning_rate": 3.5898680563272168e-06, + "loss": 4.378, + "step": 531200 + }, + { + "epoch": 4.573433195592287, + "grad_norm": 1.9296875, + "learning_rate": 3.5826777201312e-06, + "loss": 3.7464, + "step": 531250 + }, + { + "epoch": 4.573863636363637, + "grad_norm": 5.5, + "learning_rate": 3.5754944607591987e-06, + "loss": 4.4324, + "step": 531300 + }, + { + "epoch": 4.5742940771349865, + "grad_norm": 4.625, + "learning_rate": 3.5683182787384364e-06, + "loss": 4.1571, + "step": 531350 + }, + { + "epoch": 4.574724517906336, + "grad_norm": 2.953125, + "learning_rate": 3.561149174595646e-06, + "loss": 4.153, + "step": 531400 + }, + { + "epoch": 4.575154958677686, + "grad_norm": 3.640625, + "learning_rate": 3.5539871488570078e-06, + "loss": 4.4161, + "step": 531450 + }, + { + "epoch": 4.575585399449036, + "grad_norm": 5.0, + "learning_rate": 3.5468322020482e-06, + "loss": 4.055, + "step": 531500 + }, + { + "epoch": 4.576015840220386, + "grad_norm": 3.53125, + "learning_rate": 3.539684334694371e-06, + "loss": 4.0948, + "step": 531550 + }, + { + "epoch": 4.5764462809917354, + "grad_norm": 2.890625, + "learning_rate": 3.5325435473201884e-06, + "loss": 4.2058, + "step": 531600 + }, + { + "epoch": 4.576876721763085, + "grad_norm": 4.59375, + "learning_rate": 3.525409840449756e-06, + "loss": 4.8002, + "step": 531650 + }, + { + "epoch": 4.577307162534435, + "grad_norm": 3.390625, + "learning_rate": 3.518283214606666e-06, + "loss": 4.1627, + "step": 531700 + }, + { + "epoch": 4.577737603305785, + "grad_norm": 3.078125, + "learning_rate": 3.5111636703140104e-06, + "loss": 4.1937, + "step": 531750 + }, + { + "epoch": 4.578168044077135, + "grad_norm": 1.96875, + "learning_rate": 3.5040512080943278e-06, + "loss": 4.2847, + "step": 531800 + }, + { + "epoch": 4.578598484848484, + "grad_norm": 4.0625, + "learning_rate": 3.4969458284696664e-06, + "loss": 4.2713, + "step": 531850 + }, + { + "epoch": 4.579028925619834, + "grad_norm": 3.75, + "learning_rate": 3.4898475319615655e-06, + "loss": 4.2822, + "step": 531900 + }, + { + "epoch": 4.579459366391185, + "grad_norm": 5.875, + "learning_rate": 3.4827563190909963e-06, + "loss": 4.3862, + "step": 531950 + }, + { + "epoch": 4.579889807162535, + "grad_norm": 2.296875, + "learning_rate": 3.475672190378465e-06, + "loss": 4.3589, + "step": 532000 + }, + { + "epoch": 4.5803202479338845, + "grad_norm": 3.234375, + "learning_rate": 3.4685951463439116e-06, + "loss": 4.254, + "step": 532050 + }, + { + "epoch": 4.580750688705234, + "grad_norm": 2.5, + "learning_rate": 3.4615251875067644e-06, + "loss": 4.0744, + "step": 532100 + }, + { + "epoch": 4.581181129476584, + "grad_norm": 6.96875, + "learning_rate": 3.4544623143859756e-06, + "loss": 4.2891, + "step": 532150 + }, + { + "epoch": 4.581611570247934, + "grad_norm": 2.5625, + "learning_rate": 3.447406527499941e-06, + "loss": 4.3272, + "step": 532200 + }, + { + "epoch": 4.582042011019284, + "grad_norm": 3.234375, + "learning_rate": 3.440357827366514e-06, + "loss": 4.3425, + "step": 532250 + }, + { + "epoch": 4.5824724517906334, + "grad_norm": 5.15625, + "learning_rate": 3.4333162145030685e-06, + "loss": 4.5651, + "step": 532300 + }, + { + "epoch": 4.582902892561983, + "grad_norm": 6.125, + "learning_rate": 3.4262816894264473e-06, + "loss": 4.2723, + "step": 532350 + }, + { + "epoch": 4.583333333333333, + "grad_norm": 2.484375, + "learning_rate": 3.419254252652959e-06, + "loss": 3.9293, + "step": 532400 + }, + { + "epoch": 4.583763774104683, + "grad_norm": 2.203125, + "learning_rate": 3.4122339046984366e-06, + "loss": 4.4241, + "step": 532450 + }, + { + "epoch": 4.5841942148760335, + "grad_norm": 4.9375, + "learning_rate": 3.4052206460781e-06, + "loss": 4.3243, + "step": 532500 + }, + { + "epoch": 4.584624655647383, + "grad_norm": 1.6875, + "learning_rate": 3.3982144773067604e-06, + "loss": 4.1942, + "step": 532550 + }, + { + "epoch": 4.585055096418733, + "grad_norm": 2.890625, + "learning_rate": 3.3912153988986172e-06, + "loss": 4.5547, + "step": 532600 + }, + { + "epoch": 4.585485537190083, + "grad_norm": 5.5625, + "learning_rate": 3.3842234113674154e-06, + "loss": 4.2556, + "step": 532650 + }, + { + "epoch": 4.585915977961433, + "grad_norm": 5.09375, + "learning_rate": 3.3772385152263442e-06, + "loss": 4.3848, + "step": 532700 + }, + { + "epoch": 4.5863464187327825, + "grad_norm": 4.1875, + "learning_rate": 3.3702607109880713e-06, + "loss": 4.346, + "step": 532750 + }, + { + "epoch": 4.586776859504132, + "grad_norm": 3.078125, + "learning_rate": 3.3632899991647537e-06, + "loss": 4.4291, + "step": 532800 + }, + { + "epoch": 4.587207300275482, + "grad_norm": 4.53125, + "learning_rate": 3.356326380268038e-06, + "loss": 4.4228, + "step": 532850 + }, + { + "epoch": 4.587637741046832, + "grad_norm": 5.15625, + "learning_rate": 3.349369854809026e-06, + "loss": 4.3154, + "step": 532900 + }, + { + "epoch": 4.588068181818182, + "grad_norm": 2.984375, + "learning_rate": 3.3424204232983204e-06, + "loss": 3.9473, + "step": 532950 + }, + { + "epoch": 4.5884986225895315, + "grad_norm": 3.515625, + "learning_rate": 3.335478086245991e-06, + "loss": 4.0014, + "step": 533000 + }, + { + "epoch": 4.588929063360881, + "grad_norm": 6.5, + "learning_rate": 3.3285428441615973e-06, + "loss": 4.4765, + "step": 533050 + }, + { + "epoch": 4.589359504132231, + "grad_norm": 2.65625, + "learning_rate": 3.321614697554154e-06, + "loss": 4.3522, + "step": 533100 + }, + { + "epoch": 4.589789944903581, + "grad_norm": 3.625, + "learning_rate": 3.3146936469321875e-06, + "loss": 4.1383, + "step": 533150 + }, + { + "epoch": 4.5902203856749315, + "grad_norm": 3.875, + "learning_rate": 3.307779692803681e-06, + "loss": 4.6708, + "step": 533200 + }, + { + "epoch": 4.590650826446281, + "grad_norm": 3.625, + "learning_rate": 3.3008728356761164e-06, + "loss": 4.6503, + "step": 533250 + }, + { + "epoch": 4.591081267217631, + "grad_norm": 3.4375, + "learning_rate": 3.2939730760564224e-06, + "loss": 4.3016, + "step": 533300 + }, + { + "epoch": 4.591511707988981, + "grad_norm": 4.40625, + "learning_rate": 3.287080414451038e-06, + "loss": 4.2083, + "step": 533350 + }, + { + "epoch": 4.591942148760331, + "grad_norm": 3.4375, + "learning_rate": 3.2801948513658585e-06, + "loss": 4.3242, + "step": 533400 + }, + { + "epoch": 4.5923725895316805, + "grad_norm": 3.09375, + "learning_rate": 3.2733163873063023e-06, + "loss": 4.2407, + "step": 533450 + }, + { + "epoch": 4.59280303030303, + "grad_norm": 3.125, + "learning_rate": 3.2664450227771983e-06, + "loss": 4.5062, + "step": 533500 + }, + { + "epoch": 4.59323347107438, + "grad_norm": 3.421875, + "learning_rate": 3.2595807582828984e-06, + "loss": 4.3134, + "step": 533550 + }, + { + "epoch": 4.59366391184573, + "grad_norm": 6.65625, + "learning_rate": 3.252723594327234e-06, + "loss": 4.2919, + "step": 533600 + }, + { + "epoch": 4.59409435261708, + "grad_norm": 3.796875, + "learning_rate": 3.24587353141349e-06, + "loss": 4.4213, + "step": 533650 + }, + { + "epoch": 4.5945247933884295, + "grad_norm": 3.75, + "learning_rate": 3.239030570044466e-06, + "loss": 4.3427, + "step": 533700 + }, + { + "epoch": 4.594955234159779, + "grad_norm": 4.1875, + "learning_rate": 3.232194710722425e-06, + "loss": 4.2234, + "step": 533750 + }, + { + "epoch": 4.59538567493113, + "grad_norm": 2.046875, + "learning_rate": 3.2253659539490667e-06, + "loss": 3.8704, + "step": 533800 + }, + { + "epoch": 4.59581611570248, + "grad_norm": 5.53125, + "learning_rate": 3.2185443002256343e-06, + "loss": 4.2281, + "step": 533850 + }, + { + "epoch": 4.5962465564738295, + "grad_norm": 2.78125, + "learning_rate": 3.2117297500528276e-06, + "loss": 4.1042, + "step": 533900 + }, + { + "epoch": 4.596676997245179, + "grad_norm": 6.75, + "learning_rate": 3.2049223039308017e-06, + "loss": 3.956, + "step": 533950 + }, + { + "epoch": 4.597107438016529, + "grad_norm": 3.015625, + "learning_rate": 3.1981219623592463e-06, + "loss": 4.5374, + "step": 534000 + }, + { + "epoch": 4.597107438016529, + "eval_loss": 4.990235805511475, + "eval_runtime": 24.5558, + "eval_samples_per_second": 26.063, + "eval_steps_per_second": 13.032, + "eval_tts_loss": 7.479965384752732, + "step": 534000 + } + ], + "logging_steps": 50, + "max_steps": 580800, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 3000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.925328502150398e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}