diff --git "a/checkpoint-19000/trainer_state.json" "b/checkpoint-19000/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-19000/trainer_state.json" @@ -0,0 +1,4747 @@ +{ + "best_metric": 9.778974739970282, + "best_model_checkpoint": "./checkpoint-9000", + "epoch": 1116.8235294117646, + "global_step": 19000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 2.78, + "learning_rate": 5.0453611334320685e-06, + "loss": 0.6804, + "step": 25 + }, + { + "epoch": 5.56, + "learning_rate": 6.229195710491767e-06, + "loss": 0.1847, + "step": 50 + }, + { + "epoch": 8.33, + "learning_rate": 6.903829450223392e-06, + "loss": 0.0821, + "step": 75 + }, + { + "epoch": 11.11, + "learning_rate": 7.377725845391017e-06, + "loss": 0.0485, + "step": 100 + }, + { + "epoch": 13.89, + "learning_rate": 7.743343231239583e-06, + "loss": 0.0432, + "step": 125 + }, + { + "epoch": 16.67, + "learning_rate": 8.041073861170494e-06, + "loss": 0.0328, + "step": 150 + }, + { + "epoch": 19.44, + "learning_rate": 8.292222957399574e-06, + "loss": 0.0291, + "step": 175 + }, + { + "epoch": 22.22, + "learning_rate": 8.509413541357755e-06, + "loss": 0.0298, + "step": 200 + }, + { + "epoch": 25.0, + "learning_rate": 8.700744577655557e-06, + "loss": 0.0269, + "step": 225 + }, + { + "epoch": 27.78, + "learning_rate": 8.871723942761204e-06, + "loss": 0.0272, + "step": 250 + }, + { + "epoch": 30.56, + "learning_rate": 9.026267958246849e-06, + "loss": 0.027, + "step": 275 + }, + { + "epoch": 33.33, + "learning_rate": 9.16726106663399e-06, + "loss": 0.0213, + "step": 300 + }, + { + "epoch": 36.11, + "learning_rate": 9.296889251455016e-06, + "loss": 0.0215, + "step": 325 + }, + { + "epoch": 38.89, + "learning_rate": 9.416848797368692e-06, + "loss": 0.0195, + "step": 350 + }, + { + "epoch": 41.67, + "learning_rate": 9.528482449516371e-06, + "loss": 0.0167, + "step": 375 + }, + { + "epoch": 44.44, + "learning_rate": 9.632871309784314e-06, + "loss": 0.0184, + "step": 400 + }, + { + "epoch": 47.22, + "learning_rate": 9.73089868785391e-06, + "loss": 0.0159, + "step": 425 + }, + { + "epoch": 50.0, + "learning_rate": 9.823295589572114e-06, + "loss": 0.0172, + "step": 450 + }, + { + "epoch": 52.78, + "learning_rate": 9.910673836465484e-06, + "loss": 0.0123, + "step": 475 + }, + { + "epoch": 55.56, + "learning_rate": 9.993550644973805e-06, + "loss": 0.0144, + "step": 500 + }, + { + "epoch": 58.33, + "learning_rate": 9.951111111111111e-06, + "loss": 0.0135, + "step": 525 + }, + { + "epoch": 61.11, + "learning_rate": 9.895555555555557e-06, + "loss": 0.0128, + "step": 550 + }, + { + "epoch": 63.89, + "learning_rate": 9.84e-06, + "loss": 0.0115, + "step": 575 + }, + { + "epoch": 66.67, + "learning_rate": 9.784444444444445e-06, + "loss": 0.0105, + "step": 600 + }, + { + "epoch": 69.44, + "learning_rate": 9.72888888888889e-06, + "loss": 0.0104, + "step": 625 + }, + { + "epoch": 72.22, + "learning_rate": 9.673333333333334e-06, + "loss": 0.0087, + "step": 650 + }, + { + "epoch": 75.0, + "learning_rate": 9.617777777777778e-06, + "loss": 0.0091, + "step": 675 + }, + { + "epoch": 77.78, + "learning_rate": 9.562222222222223e-06, + "loss": 0.0085, + "step": 700 + }, + { + "epoch": 80.56, + "learning_rate": 9.506666666666667e-06, + "loss": 0.011, + "step": 725 + }, + { + "epoch": 83.33, + "learning_rate": 9.451111111111112e-06, + "loss": 0.0117, + "step": 750 + }, + { + "epoch": 86.11, + "learning_rate": 9.395555555555556e-06, + "loss": 0.0088, + "step": 775 + }, + { + "epoch": 88.89, + "learning_rate": 9.340000000000002e-06, + "loss": 0.0077, + "step": 800 + }, + { + "epoch": 91.67, + "learning_rate": 9.284444444444444e-06, + "loss": 0.0091, + "step": 825 + }, + { + "epoch": 94.44, + "learning_rate": 9.22888888888889e-06, + "loss": 0.0067, + "step": 850 + }, + { + "epoch": 97.22, + "learning_rate": 9.173333333333334e-06, + "loss": 0.0082, + "step": 875 + }, + { + "epoch": 100.0, + "learning_rate": 9.117777777777778e-06, + "loss": 0.0055, + "step": 900 + }, + { + "epoch": 102.78, + "learning_rate": 9.062222222222224e-06, + "loss": 0.0077, + "step": 925 + }, + { + "epoch": 105.56, + "learning_rate": 9.006666666666666e-06, + "loss": 0.0055, + "step": 950 + }, + { + "epoch": 108.33, + "learning_rate": 8.951111111111112e-06, + "loss": 0.005, + "step": 975 + }, + { + "epoch": 111.11, + "learning_rate": 8.895555555555556e-06, + "loss": 0.0066, + "step": 1000 + }, + { + "epoch": 111.11, + "eval_loss": 0.2357177734375, + "eval_runtime": 64.7785, + "eval_samples_per_second": 2.022, + "eval_steps_per_second": 0.139, + "eval_wer": 23.044096728307252, + "step": 1000 + }, + { + "epoch": 113.89, + "learning_rate": 8.844444444444445e-06, + "loss": 0.0057, + "step": 1025 + }, + { + "epoch": 116.67, + "learning_rate": 8.788888888888891e-06, + "loss": 0.0096, + "step": 1050 + }, + { + "epoch": 119.44, + "learning_rate": 8.733333333333333e-06, + "loss": 0.0063, + "step": 1075 + }, + { + "epoch": 122.22, + "learning_rate": 8.677777777777779e-06, + "loss": 0.0069, + "step": 1100 + }, + { + "epoch": 125.0, + "learning_rate": 8.622222222222223e-06, + "loss": 0.0069, + "step": 1125 + }, + { + "epoch": 127.78, + "learning_rate": 8.566666666666667e-06, + "loss": 0.0046, + "step": 1150 + }, + { + "epoch": 130.56, + "learning_rate": 8.511111111111113e-06, + "loss": 0.0051, + "step": 1175 + }, + { + "epoch": 133.33, + "learning_rate": 8.455555555555555e-06, + "loss": 0.0055, + "step": 1200 + }, + { + "epoch": 136.11, + "learning_rate": 8.400000000000001e-06, + "loss": 0.0042, + "step": 1225 + }, + { + "epoch": 138.89, + "learning_rate": 8.344444444444445e-06, + "loss": 0.0042, + "step": 1250 + }, + { + "epoch": 141.67, + "learning_rate": 8.288888888888889e-06, + "loss": 0.005, + "step": 1275 + }, + { + "epoch": 144.44, + "learning_rate": 8.233333333333335e-06, + "loss": 0.0054, + "step": 1300 + }, + { + "epoch": 147.22, + "learning_rate": 8.177777777777779e-06, + "loss": 0.0052, + "step": 1325 + }, + { + "epoch": 150.0, + "learning_rate": 8.122222222222223e-06, + "loss": 0.0057, + "step": 1350 + }, + { + "epoch": 152.78, + "learning_rate": 8.066666666666667e-06, + "loss": 0.0039, + "step": 1375 + }, + { + "epoch": 155.56, + "learning_rate": 8.011111111111113e-06, + "loss": 0.0032, + "step": 1400 + }, + { + "epoch": 158.33, + "learning_rate": 7.955555555555557e-06, + "loss": 0.0034, + "step": 1425 + }, + { + "epoch": 161.11, + "learning_rate": 7.902222222222223e-06, + "loss": 0.0068, + "step": 1450 + }, + { + "epoch": 163.89, + "learning_rate": 7.846666666666667e-06, + "loss": 0.0034, + "step": 1475 + }, + { + "epoch": 166.67, + "learning_rate": 7.791111111111111e-06, + "loss": 0.0026, + "step": 1500 + }, + { + "epoch": 169.44, + "learning_rate": 7.735555555555557e-06, + "loss": 0.0036, + "step": 1525 + }, + { + "epoch": 172.22, + "learning_rate": 7.680000000000001e-06, + "loss": 0.0033, + "step": 1550 + }, + { + "epoch": 175.0, + "learning_rate": 7.624444444444445e-06, + "loss": 0.0021, + "step": 1575 + }, + { + "epoch": 177.78, + "learning_rate": 7.56888888888889e-06, + "loss": 0.0033, + "step": 1600 + }, + { + "epoch": 180.56, + "learning_rate": 7.513333333333334e-06, + "loss": 0.0037, + "step": 1625 + }, + { + "epoch": 183.33, + "learning_rate": 7.457777777777778e-06, + "loss": 0.0032, + "step": 1650 + }, + { + "epoch": 186.11, + "learning_rate": 7.402222222222223e-06, + "loss": 0.0037, + "step": 1675 + }, + { + "epoch": 188.89, + "learning_rate": 7.346666666666668e-06, + "loss": 0.0022, + "step": 1700 + }, + { + "epoch": 191.67, + "learning_rate": 7.291111111111112e-06, + "loss": 0.0024, + "step": 1725 + }, + { + "epoch": 194.44, + "learning_rate": 7.235555555555556e-06, + "loss": 0.0026, + "step": 1750 + }, + { + "epoch": 197.22, + "learning_rate": 7.180000000000001e-06, + "loss": 0.0022, + "step": 1775 + }, + { + "epoch": 200.0, + "learning_rate": 7.124444444444445e-06, + "loss": 0.0026, + "step": 1800 + }, + { + "epoch": 202.78, + "learning_rate": 7.06888888888889e-06, + "loss": 0.0032, + "step": 1825 + }, + { + "epoch": 205.56, + "learning_rate": 7.0133333333333345e-06, + "loss": 0.0033, + "step": 1850 + }, + { + "epoch": 208.33, + "learning_rate": 6.9577777777777785e-06, + "loss": 0.0027, + "step": 1875 + }, + { + "epoch": 211.11, + "learning_rate": 6.902222222222223e-06, + "loss": 0.0043, + "step": 1900 + }, + { + "epoch": 213.89, + "learning_rate": 6.846666666666667e-06, + "loss": 0.0028, + "step": 1925 + }, + { + "epoch": 216.67, + "learning_rate": 6.7911111111111115e-06, + "loss": 0.0012, + "step": 1950 + }, + { + "epoch": 219.44, + "learning_rate": 6.735555555555556e-06, + "loss": 0.0015, + "step": 1975 + }, + { + "epoch": 222.22, + "learning_rate": 6.680000000000001e-06, + "loss": 0.0024, + "step": 2000 + }, + { + "epoch": 222.22, + "eval_loss": 0.2607421875, + "eval_runtime": 57.0802, + "eval_samples_per_second": 2.295, + "eval_steps_per_second": 0.158, + "eval_wer": 19.665718349928877, + "step": 2000 + }, + { + "epoch": 225.0, + "learning_rate": 6.6244444444444445e-06, + "loss": 0.0029, + "step": 2025 + }, + { + "epoch": 227.78, + "learning_rate": 6.568888888888889e-06, + "loss": 0.0021, + "step": 2050 + }, + { + "epoch": 230.56, + "learning_rate": 6.513333333333333e-06, + "loss": 0.0022, + "step": 2075 + }, + { + "epoch": 233.33, + "learning_rate": 6.457777777777778e-06, + "loss": 0.0022, + "step": 2100 + }, + { + "epoch": 236.11, + "learning_rate": 6.402222222222223e-06, + "loss": 0.0011, + "step": 2125 + }, + { + "epoch": 238.89, + "learning_rate": 6.346666666666668e-06, + "loss": 0.0026, + "step": 2150 + }, + { + "epoch": 241.67, + "learning_rate": 6.291111111111111e-06, + "loss": 0.0021, + "step": 2175 + }, + { + "epoch": 244.44, + "learning_rate": 6.235555555555556e-06, + "loss": 0.0016, + "step": 2200 + }, + { + "epoch": 247.22, + "learning_rate": 6.18e-06, + "loss": 0.0024, + "step": 2225 + }, + { + "epoch": 250.0, + "learning_rate": 6.124444444444445e-06, + "loss": 0.0046, + "step": 2250 + }, + { + "epoch": 252.78, + "learning_rate": 6.06888888888889e-06, + "loss": 0.0018, + "step": 2275 + }, + { + "epoch": 255.56, + "learning_rate": 6.013333333333335e-06, + "loss": 0.0012, + "step": 2300 + }, + { + "epoch": 258.33, + "learning_rate": 5.957777777777778e-06, + "loss": 0.0014, + "step": 2325 + }, + { + "epoch": 261.11, + "learning_rate": 5.902222222222223e-06, + "loss": 0.0007, + "step": 2350 + }, + { + "epoch": 263.89, + "learning_rate": 5.846666666666667e-06, + "loss": 0.0014, + "step": 2375 + }, + { + "epoch": 266.67, + "learning_rate": 5.791111111111112e-06, + "loss": 0.0009, + "step": 2400 + }, + { + "epoch": 269.44, + "learning_rate": 5.735555555555557e-06, + "loss": 0.0008, + "step": 2425 + }, + { + "epoch": 272.22, + "learning_rate": 5.68e-06, + "loss": 0.0028, + "step": 2450 + }, + { + "epoch": 275.0, + "learning_rate": 5.624444444444445e-06, + "loss": 0.002, + "step": 2475 + }, + { + "epoch": 277.78, + "learning_rate": 5.56888888888889e-06, + "loss": 0.0011, + "step": 2500 + }, + { + "epoch": 280.56, + "learning_rate": 5.513333333333334e-06, + "loss": 0.001, + "step": 2525 + }, + { + "epoch": 283.33, + "learning_rate": 5.4577777777777785e-06, + "loss": 0.0007, + "step": 2550 + }, + { + "epoch": 286.11, + "learning_rate": 5.402222222222223e-06, + "loss": 0.0007, + "step": 2575 + }, + { + "epoch": 288.89, + "learning_rate": 5.346666666666667e-06, + "loss": 0.0008, + "step": 2600 + }, + { + "epoch": 291.67, + "learning_rate": 5.2911111111111115e-06, + "loss": 0.0012, + "step": 2625 + }, + { + "epoch": 294.44, + "learning_rate": 5.235555555555556e-06, + "loss": 0.0016, + "step": 2650 + }, + { + "epoch": 297.22, + "learning_rate": 5.18e-06, + "loss": 0.0012, + "step": 2675 + }, + { + "epoch": 300.0, + "learning_rate": 5.124444444444445e-06, + "loss": 0.001, + "step": 2700 + }, + { + "epoch": 302.78, + "learning_rate": 5.06888888888889e-06, + "loss": 0.0012, + "step": 2725 + }, + { + "epoch": 305.56, + "learning_rate": 5.013333333333333e-06, + "loss": 0.001, + "step": 2750 + }, + { + "epoch": 308.33, + "learning_rate": 4.957777777777778e-06, + "loss": 0.0013, + "step": 2775 + }, + { + "epoch": 311.11, + "learning_rate": 4.902222222222222e-06, + "loss": 0.0015, + "step": 2800 + }, + { + "epoch": 313.89, + "learning_rate": 4.846666666666667e-06, + "loss": 0.0014, + "step": 2825 + }, + { + "epoch": 316.67, + "learning_rate": 4.791111111111111e-06, + "loss": 0.0007, + "step": 2850 + }, + { + "epoch": 319.44, + "learning_rate": 4.735555555555556e-06, + "loss": 0.0009, + "step": 2875 + }, + { + "epoch": 322.22, + "learning_rate": 4.680000000000001e-06, + "loss": 0.0021, + "step": 2900 + }, + { + "epoch": 325.0, + "learning_rate": 4.624444444444445e-06, + "loss": 0.0015, + "step": 2925 + }, + { + "epoch": 327.78, + "learning_rate": 4.568888888888889e-06, + "loss": 0.0012, + "step": 2950 + }, + { + "epoch": 330.56, + "learning_rate": 4.513333333333333e-06, + "loss": 0.0009, + "step": 2975 + }, + { + "epoch": 333.33, + "learning_rate": 4.457777777777778e-06, + "loss": 0.0011, + "step": 3000 + }, + { + "epoch": 333.33, + "eval_loss": 0.277099609375, + "eval_runtime": 58.1634, + "eval_samples_per_second": 2.252, + "eval_steps_per_second": 0.155, + "eval_wer": 20.874822190611663, + "step": 3000 + }, + { + "epoch": 177.47, + "learning_rate": 1.760888888888889e-06, + "loss": 0.5801, + "step": 3025 + }, + { + "epoch": 178.94, + "learning_rate": 1.7386666666666666e-06, + "loss": 0.1501, + "step": 3050 + }, + { + "epoch": 180.41, + "learning_rate": 1.7164444444444444e-06, + "loss": 0.0789, + "step": 3075 + }, + { + "epoch": 181.88, + "learning_rate": 1.6942222222222222e-06, + "loss": 0.0531, + "step": 3100 + }, + { + "epoch": 183.35, + "learning_rate": 1.6719999999999998e-06, + "loss": 0.0409, + "step": 3125 + }, + { + "epoch": 184.82, + "learning_rate": 1.6497777777777777e-06, + "loss": 0.032, + "step": 3150 + }, + { + "epoch": 186.29, + "learning_rate": 1.6275555555555555e-06, + "loss": 0.0251, + "step": 3175 + }, + { + "epoch": 187.76, + "learning_rate": 1.6053333333333333e-06, + "loss": 0.0203, + "step": 3200 + }, + { + "epoch": 189.24, + "learning_rate": 1.5831111111111111e-06, + "loss": 0.0167, + "step": 3225 + }, + { + "epoch": 190.71, + "learning_rate": 1.560888888888889e-06, + "loss": 0.0159, + "step": 3250 + }, + { + "epoch": 192.18, + "learning_rate": 1.5386666666666666e-06, + "loss": 0.0137, + "step": 3275 + }, + { + "epoch": 193.65, + "learning_rate": 1.5164444444444444e-06, + "loss": 0.0122, + "step": 3300 + }, + { + "epoch": 195.12, + "learning_rate": 1.494222222222222e-06, + "loss": 0.0106, + "step": 3325 + }, + { + "epoch": 196.59, + "learning_rate": 1.4719999999999998e-06, + "loss": 0.0094, + "step": 3350 + }, + { + "epoch": 198.06, + "learning_rate": 1.4497777777777777e-06, + "loss": 0.009, + "step": 3375 + }, + { + "epoch": 199.53, + "learning_rate": 1.4275555555555555e-06, + "loss": 0.0104, + "step": 3400 + }, + { + "epoch": 201.0, + "learning_rate": 1.4053333333333333e-06, + "loss": 0.0069, + "step": 3425 + }, + { + "epoch": 202.47, + "learning_rate": 1.3848888888888889e-06, + "loss": 0.0073, + "step": 3450 + }, + { + "epoch": 203.94, + "learning_rate": 1.3626666666666667e-06, + "loss": 0.0073, + "step": 3475 + }, + { + "epoch": 205.41, + "learning_rate": 1.3404444444444445e-06, + "loss": 0.0063, + "step": 3500 + }, + { + "epoch": 206.88, + "learning_rate": 1.3182222222222221e-06, + "loss": 0.007, + "step": 3525 + }, + { + "epoch": 208.35, + "learning_rate": 1.296e-06, + "loss": 0.0061, + "step": 3550 + }, + { + "epoch": 209.82, + "learning_rate": 1.2737777777777776e-06, + "loss": 0.0053, + "step": 3575 + }, + { + "epoch": 211.29, + "learning_rate": 1.2515555555555554e-06, + "loss": 0.0056, + "step": 3600 + }, + { + "epoch": 212.76, + "learning_rate": 1.2293333333333334e-06, + "loss": 0.005, + "step": 3625 + }, + { + "epoch": 214.24, + "learning_rate": 1.207111111111111e-06, + "loss": 0.0047, + "step": 3650 + }, + { + "epoch": 215.71, + "learning_rate": 1.1848888888888889e-06, + "loss": 0.0052, + "step": 3675 + }, + { + "epoch": 217.18, + "learning_rate": 1.1626666666666667e-06, + "loss": 0.0044, + "step": 3700 + }, + { + "epoch": 218.65, + "learning_rate": 1.1404444444444443e-06, + "loss": 0.0046, + "step": 3725 + }, + { + "epoch": 220.12, + "learning_rate": 1.1182222222222221e-06, + "loss": 0.0045, + "step": 3750 + }, + { + "epoch": 221.59, + "learning_rate": 1.096e-06, + "loss": 0.0041, + "step": 3775 + }, + { + "epoch": 223.06, + "learning_rate": 1.0737777777777776e-06, + "loss": 0.0054, + "step": 3800 + }, + { + "epoch": 224.53, + "learning_rate": 1.0515555555555556e-06, + "loss": 0.0038, + "step": 3825 + }, + { + "epoch": 226.0, + "learning_rate": 1.0293333333333334e-06, + "loss": 0.0038, + "step": 3850 + }, + { + "epoch": 227.47, + "learning_rate": 1.007111111111111e-06, + "loss": 0.004, + "step": 3875 + }, + { + "epoch": 228.94, + "learning_rate": 9.848888888888889e-07, + "loss": 0.0036, + "step": 3900 + }, + { + "epoch": 230.41, + "learning_rate": 9.626666666666667e-07, + "loss": 0.0041, + "step": 3925 + }, + { + "epoch": 231.88, + "learning_rate": 9.404444444444443e-07, + "loss": 0.0032, + "step": 3950 + }, + { + "epoch": 233.35, + "learning_rate": 9.182222222222223e-07, + "loss": 0.0038, + "step": 3975 + }, + { + "epoch": 234.82, + "learning_rate": 8.96e-07, + "loss": 0.0043, + "step": 4000 + }, + { + "epoch": 234.82, + "eval_loss": 0.45361328125, + "eval_runtime": 157.593, + "eval_samples_per_second": 1.726, + "eval_steps_per_second": 0.108, + "eval_wer": 10.707652303120357, + "step": 4000 + }, + { + "epoch": 236.29, + "learning_rate": 8.737777777777777e-07, + "loss": 0.004, + "step": 4025 + }, + { + "epoch": 237.76, + "learning_rate": 8.515555555555555e-07, + "loss": 0.0029, + "step": 4050 + }, + { + "epoch": 239.24, + "learning_rate": 8.293333333333333e-07, + "loss": 0.0034, + "step": 4075 + }, + { + "epoch": 240.71, + "learning_rate": 8.071111111111111e-07, + "loss": 0.0032, + "step": 4100 + }, + { + "epoch": 242.18, + "learning_rate": 7.848888888888888e-07, + "loss": 0.003, + "step": 4125 + }, + { + "epoch": 243.65, + "learning_rate": 7.626666666666667e-07, + "loss": 0.0034, + "step": 4150 + }, + { + "epoch": 245.12, + "learning_rate": 7.404444444444444e-07, + "loss": 0.0032, + "step": 4175 + }, + { + "epoch": 246.59, + "learning_rate": 7.182222222222222e-07, + "loss": 0.0032, + "step": 4200 + }, + { + "epoch": 248.06, + "learning_rate": 6.959999999999999e-07, + "loss": 0.0028, + "step": 4225 + }, + { + "epoch": 249.53, + "learning_rate": 6.737777777777778e-07, + "loss": 0.0028, + "step": 4250 + }, + { + "epoch": 251.0, + "learning_rate": 6.515555555555555e-07, + "loss": 0.0025, + "step": 4275 + }, + { + "epoch": 252.47, + "learning_rate": 6.293333333333333e-07, + "loss": 0.0026, + "step": 4300 + }, + { + "epoch": 253.94, + "learning_rate": 6.071111111111111e-07, + "loss": 0.003, + "step": 4325 + }, + { + "epoch": 255.41, + "learning_rate": 5.848888888888889e-07, + "loss": 0.0026, + "step": 4350 + }, + { + "epoch": 256.88, + "learning_rate": 5.626666666666666e-07, + "loss": 0.0027, + "step": 4375 + }, + { + "epoch": 258.35, + "learning_rate": 5.404444444444443e-07, + "loss": 0.003, + "step": 4400 + }, + { + "epoch": 259.82, + "learning_rate": 5.182222222222223e-07, + "loss": 0.0027, + "step": 4425 + }, + { + "epoch": 261.29, + "learning_rate": 4.977777777777777e-07, + "loss": 0.0026, + "step": 4450 + }, + { + "epoch": 262.76, + "learning_rate": 4.7555555555555554e-07, + "loss": 0.0023, + "step": 4475 + }, + { + "epoch": 264.24, + "learning_rate": 4.5333333333333326e-07, + "loss": 0.0021, + "step": 4500 + }, + { + "epoch": 265.71, + "learning_rate": 4.311111111111111e-07, + "loss": 0.0022, + "step": 4525 + }, + { + "epoch": 267.18, + "learning_rate": 4.088888888888889e-07, + "loss": 0.0034, + "step": 4550 + }, + { + "epoch": 268.65, + "learning_rate": 3.8666666666666664e-07, + "loss": 0.0023, + "step": 4575 + }, + { + "epoch": 270.12, + "learning_rate": 3.6444444444444446e-07, + "loss": 0.0022, + "step": 4600 + }, + { + "epoch": 271.59, + "learning_rate": 3.422222222222222e-07, + "loss": 0.0022, + "step": 4625 + }, + { + "epoch": 273.06, + "learning_rate": 3.2e-07, + "loss": 0.0024, + "step": 4650 + }, + { + "epoch": 274.53, + "learning_rate": 2.9777777777777773e-07, + "loss": 0.0031, + "step": 4675 + }, + { + "epoch": 276.0, + "learning_rate": 2.7555555555555555e-07, + "loss": 0.0022, + "step": 4700 + }, + { + "epoch": 277.47, + "learning_rate": 2.533333333333333e-07, + "loss": 0.0022, + "step": 4725 + }, + { + "epoch": 278.94, + "learning_rate": 2.311111111111111e-07, + "loss": 0.0021, + "step": 4750 + }, + { + "epoch": 280.41, + "learning_rate": 2.088888888888889e-07, + "loss": 0.0023, + "step": 4775 + }, + { + "epoch": 281.88, + "learning_rate": 1.8666666666666667e-07, + "loss": 0.0025, + "step": 4800 + }, + { + "epoch": 283.35, + "learning_rate": 1.6444444444444444e-07, + "loss": 0.0022, + "step": 4825 + }, + { + "epoch": 284.82, + "learning_rate": 1.4222222222222222e-07, + "loss": 0.0022, + "step": 4850 + }, + { + "epoch": 286.29, + "learning_rate": 1.2e-07, + "loss": 0.0021, + "step": 4875 + }, + { + "epoch": 287.76, + "learning_rate": 9.777777777777778e-08, + "loss": 0.0023, + "step": 4900 + }, + { + "epoch": 289.24, + "learning_rate": 7.555555555555555e-08, + "loss": 0.002, + "step": 4925 + }, + { + "epoch": 290.71, + "learning_rate": 5.3333333333333334e-08, + "loss": 0.0025, + "step": 4950 + }, + { + "epoch": 292.18, + "learning_rate": 3.111111111111111e-08, + "loss": 0.002, + "step": 4975 + }, + { + "epoch": 293.65, + "learning_rate": 8.888888888888889e-09, + "loss": 0.0024, + "step": 5000 + }, + { + "epoch": 293.65, + "eval_loss": 0.465576171875, + "eval_runtime": 158.123, + "eval_samples_per_second": 1.72, + "eval_steps_per_second": 0.108, + "eval_wer": 10.642644873699851, + "step": 5000 + }, + { + "epoch": 295.47, + "learning_rate": 2.7544827586206896e-06, + "loss": 0.0021, + "step": 5025 + }, + { + "epoch": 296.94, + "learning_rate": 2.7475862068965512e-06, + "loss": 0.0024, + "step": 5050 + }, + { + "epoch": 298.41, + "learning_rate": 2.7406896551724137e-06, + "loss": 0.0025, + "step": 5075 + }, + { + "epoch": 299.88, + "learning_rate": 2.7337931034482757e-06, + "loss": 0.0022, + "step": 5100 + }, + { + "epoch": 301.35, + "learning_rate": 2.7268965517241378e-06, + "loss": 0.0027, + "step": 5125 + }, + { + "epoch": 302.82, + "learning_rate": 2.7200000000000002e-06, + "loss": 0.0024, + "step": 5150 + }, + { + "epoch": 304.29, + "learning_rate": 2.713103448275862e-06, + "loss": 0.0024, + "step": 5175 + }, + { + "epoch": 305.76, + "learning_rate": 2.7062068965517243e-06, + "loss": 0.0023, + "step": 5200 + }, + { + "epoch": 307.24, + "learning_rate": 2.699310344827586e-06, + "loss": 0.0027, + "step": 5225 + }, + { + "epoch": 308.71, + "learning_rate": 2.6924137931034483e-06, + "loss": 0.0023, + "step": 5250 + }, + { + "epoch": 310.18, + "learning_rate": 2.68551724137931e-06, + "loss": 0.0021, + "step": 5275 + }, + { + "epoch": 311.65, + "learning_rate": 2.6786206896551724e-06, + "loss": 0.0025, + "step": 5300 + }, + { + "epoch": 313.12, + "learning_rate": 2.6717241379310344e-06, + "loss": 0.0021, + "step": 5325 + }, + { + "epoch": 314.59, + "learning_rate": 2.6648275862068965e-06, + "loss": 0.0019, + "step": 5350 + }, + { + "epoch": 316.06, + "learning_rate": 2.6579310344827585e-06, + "loss": 0.0019, + "step": 5375 + }, + { + "epoch": 317.53, + "learning_rate": 2.6510344827586205e-06, + "loss": 0.0018, + "step": 5400 + }, + { + "epoch": 319.0, + "learning_rate": 2.6441379310344826e-06, + "loss": 0.0022, + "step": 5425 + }, + { + "epoch": 320.47, + "learning_rate": 2.6377931034482757e-06, + "loss": 0.0019, + "step": 5450 + }, + { + "epoch": 321.94, + "learning_rate": 2.6308965517241377e-06, + "loss": 0.0016, + "step": 5475 + }, + { + "epoch": 323.41, + "learning_rate": 2.624e-06, + "loss": 0.0013, + "step": 5500 + }, + { + "epoch": 324.88, + "learning_rate": 2.617103448275862e-06, + "loss": 0.0019, + "step": 5525 + }, + { + "epoch": 326.35, + "learning_rate": 2.6102068965517243e-06, + "loss": 0.0017, + "step": 5550 + }, + { + "epoch": 327.82, + "learning_rate": 2.603310344827586e-06, + "loss": 0.0018, + "step": 5575 + }, + { + "epoch": 329.29, + "learning_rate": 2.5964137931034483e-06, + "loss": 0.0013, + "step": 5600 + }, + { + "epoch": 330.76, + "learning_rate": 2.58951724137931e-06, + "loss": 0.0016, + "step": 5625 + }, + { + "epoch": 332.24, + "learning_rate": 2.5826206896551724e-06, + "loss": 0.0013, + "step": 5650 + }, + { + "epoch": 333.71, + "learning_rate": 2.575724137931034e-06, + "loss": 0.0018, + "step": 5675 + }, + { + "epoch": 335.18, + "learning_rate": 2.5688275862068965e-06, + "loss": 0.0014, + "step": 5700 + }, + { + "epoch": 336.65, + "learning_rate": 2.561931034482759e-06, + "loss": 0.0013, + "step": 5725 + }, + { + "epoch": 338.12, + "learning_rate": 2.5550344827586205e-06, + "loss": 0.0011, + "step": 5750 + }, + { + "epoch": 339.59, + "learning_rate": 2.548137931034483e-06, + "loss": 0.0018, + "step": 5775 + }, + { + "epoch": 341.06, + "learning_rate": 2.5412413793103446e-06, + "loss": 0.0013, + "step": 5800 + }, + { + "epoch": 342.53, + "learning_rate": 2.534344827586207e-06, + "loss": 0.0012, + "step": 5825 + }, + { + "epoch": 344.0, + "learning_rate": 2.5274482758620687e-06, + "loss": 0.0014, + "step": 5850 + }, + { + "epoch": 345.47, + "learning_rate": 2.520551724137931e-06, + "loss": 0.001, + "step": 5875 + }, + { + "epoch": 346.94, + "learning_rate": 2.5136551724137927e-06, + "loss": 0.0012, + "step": 5900 + }, + { + "epoch": 348.41, + "learning_rate": 2.506758620689655e-06, + "loss": 0.0012, + "step": 5925 + }, + { + "epoch": 349.88, + "learning_rate": 2.499862068965517e-06, + "loss": 0.0012, + "step": 5950 + }, + { + "epoch": 351.35, + "learning_rate": 2.4929655172413792e-06, + "loss": 0.0013, + "step": 5975 + }, + { + "epoch": 352.82, + "learning_rate": 2.4860689655172413e-06, + "loss": 0.0015, + "step": 6000 + }, + { + "epoch": 352.82, + "eval_loss": 0.497802734375, + "eval_runtime": 156.7207, + "eval_samples_per_second": 1.736, + "eval_steps_per_second": 0.108, + "eval_wer": 10.503343239227341, + "step": 6000 + }, + { + "epoch": 354.29, + "learning_rate": 2.4791724137931033e-06, + "loss": 0.0013, + "step": 6025 + }, + { + "epoch": 355.76, + "learning_rate": 2.4722758620689653e-06, + "loss": 0.0012, + "step": 6050 + }, + { + "epoch": 357.24, + "learning_rate": 2.4653793103448274e-06, + "loss": 0.0011, + "step": 6075 + }, + { + "epoch": 358.71, + "learning_rate": 2.4584827586206894e-06, + "loss": 0.0008, + "step": 6100 + }, + { + "epoch": 360.18, + "learning_rate": 2.4515862068965514e-06, + "loss": 0.0008, + "step": 6125 + }, + { + "epoch": 361.65, + "learning_rate": 2.444689655172414e-06, + "loss": 0.0011, + "step": 6150 + }, + { + "epoch": 363.12, + "learning_rate": 2.4377931034482755e-06, + "loss": 0.0012, + "step": 6175 + }, + { + "epoch": 364.59, + "learning_rate": 2.430896551724138e-06, + "loss": 0.0013, + "step": 6200 + }, + { + "epoch": 366.06, + "learning_rate": 2.424e-06, + "loss": 0.0011, + "step": 6225 + }, + { + "epoch": 367.53, + "learning_rate": 2.417103448275862e-06, + "loss": 0.0012, + "step": 6250 + }, + { + "epoch": 369.0, + "learning_rate": 2.410206896551724e-06, + "loss": 0.0011, + "step": 6275 + }, + { + "epoch": 370.47, + "learning_rate": 2.403310344827586e-06, + "loss": 0.0009, + "step": 6300 + }, + { + "epoch": 371.94, + "learning_rate": 2.396413793103448e-06, + "loss": 0.0014, + "step": 6325 + }, + { + "epoch": 373.41, + "learning_rate": 2.38951724137931e-06, + "loss": 0.0018, + "step": 6350 + }, + { + "epoch": 374.88, + "learning_rate": 2.382620689655172e-06, + "loss": 0.0009, + "step": 6375 + }, + { + "epoch": 376.35, + "learning_rate": 2.3757241379310342e-06, + "loss": 0.001, + "step": 6400 + }, + { + "epoch": 377.82, + "learning_rate": 2.3688275862068963e-06, + "loss": 0.0009, + "step": 6425 + }, + { + "epoch": 379.29, + "learning_rate": 2.36248275862069e-06, + "loss": 0.0008, + "step": 6450 + }, + { + "epoch": 380.76, + "learning_rate": 2.3555862068965514e-06, + "loss": 0.0009, + "step": 6475 + }, + { + "epoch": 382.24, + "learning_rate": 2.348689655172414e-06, + "loss": 0.0009, + "step": 6500 + }, + { + "epoch": 383.71, + "learning_rate": 2.3417931034482755e-06, + "loss": 0.0011, + "step": 6525 + }, + { + "epoch": 385.18, + "learning_rate": 2.334896551724138e-06, + "loss": 0.0008, + "step": 6550 + }, + { + "epoch": 386.65, + "learning_rate": 2.3279999999999996e-06, + "loss": 0.0006, + "step": 6575 + }, + { + "epoch": 388.12, + "learning_rate": 2.321103448275862e-06, + "loss": 0.001, + "step": 6600 + }, + { + "epoch": 389.59, + "learning_rate": 2.314206896551724e-06, + "loss": 0.0009, + "step": 6625 + }, + { + "epoch": 391.06, + "learning_rate": 2.307310344827586e-06, + "loss": 0.0008, + "step": 6650 + }, + { + "epoch": 392.53, + "learning_rate": 2.300413793103448e-06, + "loss": 0.001, + "step": 6675 + }, + { + "epoch": 394.0, + "learning_rate": 2.29351724137931e-06, + "loss": 0.0009, + "step": 6700 + }, + { + "epoch": 395.47, + "learning_rate": 2.2866206896551726e-06, + "loss": 0.0011, + "step": 6725 + }, + { + "epoch": 396.94, + "learning_rate": 2.2797241379310342e-06, + "loss": 0.0008, + "step": 6750 + }, + { + "epoch": 398.41, + "learning_rate": 2.2728275862068967e-06, + "loss": 0.0007, + "step": 6775 + }, + { + "epoch": 399.88, + "learning_rate": 2.2659310344827583e-06, + "loss": 0.0006, + "step": 6800 + }, + { + "epoch": 401.35, + "learning_rate": 2.2590344827586207e-06, + "loss": 0.0007, + "step": 6825 + }, + { + "epoch": 402.82, + "learning_rate": 2.2521379310344828e-06, + "loss": 0.0011, + "step": 6850 + }, + { + "epoch": 404.29, + "learning_rate": 2.245241379310345e-06, + "loss": 0.001, + "step": 6875 + }, + { + "epoch": 405.76, + "learning_rate": 2.238344827586207e-06, + "loss": 0.0007, + "step": 6900 + }, + { + "epoch": 407.24, + "learning_rate": 2.231448275862069e-06, + "loss": 0.0008, + "step": 6925 + }, + { + "epoch": 408.71, + "learning_rate": 2.224551724137931e-06, + "loss": 0.0007, + "step": 6950 + }, + { + "epoch": 410.18, + "learning_rate": 2.217655172413793e-06, + "loss": 0.0008, + "step": 6975 + }, + { + "epoch": 411.65, + "learning_rate": 2.210758620689655e-06, + "loss": 0.0007, + "step": 7000 + }, + { + "epoch": 411.65, + "eval_loss": 0.5146484375, + "eval_runtime": 159.9051, + "eval_samples_per_second": 1.701, + "eval_steps_per_second": 0.106, + "eval_wer": 10.057578008915305, + "step": 7000 + }, + { + "epoch": 413.12, + "learning_rate": 2.203862068965517e-06, + "loss": 0.0007, + "step": 7025 + }, + { + "epoch": 414.59, + "learning_rate": 2.196965517241379e-06, + "loss": 0.0006, + "step": 7050 + }, + { + "epoch": 416.06, + "learning_rate": 2.1900689655172415e-06, + "loss": 0.0009, + "step": 7075 + }, + { + "epoch": 417.53, + "learning_rate": 2.183172413793103e-06, + "loss": 0.0008, + "step": 7100 + }, + { + "epoch": 419.0, + "learning_rate": 2.1762758620689656e-06, + "loss": 0.0007, + "step": 7125 + }, + { + "epoch": 420.47, + "learning_rate": 2.1693793103448276e-06, + "loss": 0.0008, + "step": 7150 + }, + { + "epoch": 421.94, + "learning_rate": 2.1624827586206896e-06, + "loss": 0.0007, + "step": 7175 + }, + { + "epoch": 423.41, + "learning_rate": 2.1555862068965517e-06, + "loss": 0.0005, + "step": 7200 + }, + { + "epoch": 424.88, + "learning_rate": 2.1486896551724137e-06, + "loss": 0.0008, + "step": 7225 + }, + { + "epoch": 426.35, + "learning_rate": 2.1417931034482757e-06, + "loss": 0.0009, + "step": 7250 + }, + { + "epoch": 427.82, + "learning_rate": 2.1348965517241378e-06, + "loss": 0.0009, + "step": 7275 + }, + { + "epoch": 429.29, + "learning_rate": 2.128e-06, + "loss": 0.0006, + "step": 7300 + }, + { + "epoch": 430.76, + "learning_rate": 2.121103448275862e-06, + "loss": 0.0006, + "step": 7325 + }, + { + "epoch": 432.24, + "learning_rate": 2.1142068965517243e-06, + "loss": 0.0006, + "step": 7350 + }, + { + "epoch": 433.71, + "learning_rate": 2.107310344827586e-06, + "loss": 0.0006, + "step": 7375 + }, + { + "epoch": 435.18, + "learning_rate": 2.1004137931034483e-06, + "loss": 0.0007, + "step": 7400 + }, + { + "epoch": 436.65, + "learning_rate": 2.09351724137931e-06, + "loss": 0.0006, + "step": 7425 + }, + { + "epoch": 438.12, + "learning_rate": 2.0871724137931035e-06, + "loss": 0.0007, + "step": 7450 + }, + { + "epoch": 439.59, + "learning_rate": 2.080275862068965e-06, + "loss": 0.0006, + "step": 7475 + }, + { + "epoch": 441.06, + "learning_rate": 2.0733793103448276e-06, + "loss": 0.0009, + "step": 7500 + }, + { + "epoch": 442.53, + "learning_rate": 2.0664827586206896e-06, + "loss": 0.0008, + "step": 7525 + }, + { + "epoch": 444.0, + "learning_rate": 2.0595862068965516e-06, + "loss": 0.0005, + "step": 7550 + }, + { + "epoch": 445.47, + "learning_rate": 2.0526896551724137e-06, + "loss": 0.0004, + "step": 7575 + }, + { + "epoch": 446.94, + "learning_rate": 2.0457931034482757e-06, + "loss": 0.0006, + "step": 7600 + }, + { + "epoch": 448.41, + "learning_rate": 2.0388965517241377e-06, + "loss": 0.0007, + "step": 7625 + }, + { + "epoch": 449.88, + "learning_rate": 2.0319999999999998e-06, + "loss": 0.0005, + "step": 7650 + }, + { + "epoch": 451.35, + "learning_rate": 2.025103448275862e-06, + "loss": 0.0005, + "step": 7675 + }, + { + "epoch": 452.82, + "learning_rate": 2.018206896551724e-06, + "loss": 0.0009, + "step": 7700 + }, + { + "epoch": 454.29, + "learning_rate": 2.0113103448275863e-06, + "loss": 0.0005, + "step": 7725 + }, + { + "epoch": 455.76, + "learning_rate": 2.0044137931034483e-06, + "loss": 0.0005, + "step": 7750 + }, + { + "epoch": 457.24, + "learning_rate": 1.9975172413793104e-06, + "loss": 0.0006, + "step": 7775 + }, + { + "epoch": 458.71, + "learning_rate": 1.9906206896551724e-06, + "loss": 0.0005, + "step": 7800 + }, + { + "epoch": 460.18, + "learning_rate": 1.9837241379310344e-06, + "loss": 0.0005, + "step": 7825 + }, + { + "epoch": 461.65, + "learning_rate": 1.9768275862068965e-06, + "loss": 0.0006, + "step": 7850 + }, + { + "epoch": 463.12, + "learning_rate": 1.9699310344827585e-06, + "loss": 0.0004, + "step": 7875 + }, + { + "epoch": 464.59, + "learning_rate": 1.9630344827586205e-06, + "loss": 0.0007, + "step": 7900 + }, + { + "epoch": 466.06, + "learning_rate": 1.956137931034483e-06, + "loss": 0.0005, + "step": 7925 + }, + { + "epoch": 467.53, + "learning_rate": 1.949241379310345e-06, + "loss": 0.0006, + "step": 7950 + }, + { + "epoch": 469.0, + "learning_rate": 1.942344827586207e-06, + "loss": 0.0006, + "step": 7975 + }, + { + "epoch": 470.47, + "learning_rate": 1.935448275862069e-06, + "loss": 0.0007, + "step": 8000 + }, + { + "epoch": 470.47, + "eval_loss": 0.53857421875, + "eval_runtime": 158.4391, + "eval_samples_per_second": 1.717, + "eval_steps_per_second": 0.107, + "eval_wer": 10.131872213967311, + "step": 8000 + }, + { + "epoch": 471.94, + "learning_rate": 1.928551724137931e-06, + "loss": 0.0005, + "step": 8025 + }, + { + "epoch": 473.41, + "learning_rate": 1.921655172413793e-06, + "loss": 0.0008, + "step": 8050 + }, + { + "epoch": 474.88, + "learning_rate": 1.914758620689655e-06, + "loss": 0.0005, + "step": 8075 + }, + { + "epoch": 476.35, + "learning_rate": 1.907862068965517e-06, + "loss": 0.0004, + "step": 8100 + }, + { + "epoch": 477.82, + "learning_rate": 1.9009655172413792e-06, + "loss": 0.0005, + "step": 8125 + }, + { + "epoch": 479.29, + "learning_rate": 1.8940689655172413e-06, + "loss": 0.0004, + "step": 8150 + }, + { + "epoch": 480.76, + "learning_rate": 1.8871724137931033e-06, + "loss": 0.0007, + "step": 8175 + }, + { + "epoch": 482.24, + "learning_rate": 1.8802758620689653e-06, + "loss": 0.0005, + "step": 8200 + }, + { + "epoch": 483.71, + "learning_rate": 1.8733793103448274e-06, + "loss": 0.0007, + "step": 8225 + }, + { + "epoch": 485.18, + "learning_rate": 1.8664827586206894e-06, + "loss": 0.0005, + "step": 8250 + }, + { + "epoch": 486.65, + "learning_rate": 1.8595862068965517e-06, + "loss": 0.0004, + "step": 8275 + }, + { + "epoch": 488.12, + "learning_rate": 1.8526896551724137e-06, + "loss": 0.0005, + "step": 8300 + }, + { + "epoch": 489.59, + "learning_rate": 1.845793103448276e-06, + "loss": 0.0004, + "step": 8325 + }, + { + "epoch": 491.06, + "learning_rate": 1.838896551724138e-06, + "loss": 0.0004, + "step": 8350 + }, + { + "epoch": 492.53, + "learning_rate": 1.832e-06, + "loss": 0.0005, + "step": 8375 + }, + { + "epoch": 494.0, + "learning_rate": 1.825103448275862e-06, + "loss": 0.0004, + "step": 8400 + }, + { + "epoch": 495.47, + "learning_rate": 1.818206896551724e-06, + "loss": 0.0007, + "step": 8425 + }, + { + "epoch": 496.94, + "learning_rate": 1.811862068965517e-06, + "loss": 0.0008, + "step": 8450 + }, + { + "epoch": 498.41, + "learning_rate": 1.8049655172413792e-06, + "loss": 0.0005, + "step": 8475 + }, + { + "epoch": 499.88, + "learning_rate": 1.7980689655172413e-06, + "loss": 0.0006, + "step": 8500 + }, + { + "epoch": 501.35, + "learning_rate": 1.7911724137931035e-06, + "loss": 0.0004, + "step": 8525 + }, + { + "epoch": 502.82, + "learning_rate": 1.7842758620689655e-06, + "loss": 0.0004, + "step": 8550 + }, + { + "epoch": 504.29, + "learning_rate": 1.7773793103448276e-06, + "loss": 0.0006, + "step": 8575 + }, + { + "epoch": 505.76, + "learning_rate": 1.7704827586206896e-06, + "loss": 0.0004, + "step": 8600 + }, + { + "epoch": 507.24, + "learning_rate": 1.7635862068965516e-06, + "loss": 0.0004, + "step": 8625 + }, + { + "epoch": 508.71, + "learning_rate": 1.7566896551724137e-06, + "loss": 0.0006, + "step": 8650 + }, + { + "epoch": 510.18, + "learning_rate": 1.7497931034482757e-06, + "loss": 0.0004, + "step": 8675 + }, + { + "epoch": 511.65, + "learning_rate": 1.742896551724138e-06, + "loss": 0.0005, + "step": 8700 + }, + { + "epoch": 513.12, + "learning_rate": 1.736e-06, + "loss": 0.0006, + "step": 8725 + }, + { + "epoch": 514.59, + "learning_rate": 1.729103448275862e-06, + "loss": 0.0006, + "step": 8750 + }, + { + "epoch": 516.06, + "learning_rate": 1.722206896551724e-06, + "loss": 0.0004, + "step": 8775 + }, + { + "epoch": 517.53, + "learning_rate": 1.715310344827586e-06, + "loss": 0.0003, + "step": 8800 + }, + { + "epoch": 519.0, + "learning_rate": 1.7084137931034481e-06, + "loss": 0.0003, + "step": 8825 + }, + { + "epoch": 520.47, + "learning_rate": 1.7015172413793101e-06, + "loss": 0.0004, + "step": 8850 + }, + { + "epoch": 521.94, + "learning_rate": 1.6946206896551722e-06, + "loss": 0.0006, + "step": 8875 + }, + { + "epoch": 523.41, + "learning_rate": 1.6877241379310342e-06, + "loss": 0.0005, + "step": 8900 + }, + { + "epoch": 524.88, + "learning_rate": 1.6808275862068967e-06, + "loss": 0.0029, + "step": 8925 + }, + { + "epoch": 526.35, + "learning_rate": 1.6739310344827587e-06, + "loss": 0.0004, + "step": 8950 + }, + { + "epoch": 527.82, + "learning_rate": 1.6670344827586207e-06, + "loss": 0.0003, + "step": 8975 + }, + { + "epoch": 529.29, + "learning_rate": 1.6601379310344828e-06, + "loss": 0.0004, + "step": 9000 + }, + { + "epoch": 529.29, + "eval_loss": 0.5361328125, + "eval_runtime": 156.9399, + "eval_samples_per_second": 1.733, + "eval_steps_per_second": 0.108, + "eval_wer": 9.778974739970282, + "step": 9000 + }, + { + "epoch": 530.76, + "learning_rate": 1.6532413793103448e-06, + "loss": 0.0006, + "step": 9025 + }, + { + "epoch": 532.24, + "learning_rate": 1.6463448275862068e-06, + "loss": 0.0003, + "step": 9050 + }, + { + "epoch": 533.71, + "learning_rate": 1.6394482758620689e-06, + "loss": 0.0003, + "step": 9075 + }, + { + "epoch": 535.18, + "learning_rate": 1.632551724137931e-06, + "loss": 0.0005, + "step": 9100 + }, + { + "epoch": 536.65, + "learning_rate": 1.625655172413793e-06, + "loss": 0.0006, + "step": 9125 + }, + { + "epoch": 538.12, + "learning_rate": 1.6187586206896552e-06, + "loss": 0.0003, + "step": 9150 + }, + { + "epoch": 539.59, + "learning_rate": 1.6118620689655172e-06, + "loss": 0.0004, + "step": 9175 + }, + { + "epoch": 541.06, + "learning_rate": 1.6049655172413792e-06, + "loss": 0.0003, + "step": 9200 + }, + { + "epoch": 542.53, + "learning_rate": 1.5980689655172413e-06, + "loss": 0.0004, + "step": 9225 + }, + { + "epoch": 544.0, + "learning_rate": 1.5911724137931033e-06, + "loss": 0.0006, + "step": 9250 + }, + { + "epoch": 545.47, + "learning_rate": 1.5842758620689653e-06, + "loss": 0.0002, + "step": 9275 + }, + { + "epoch": 546.94, + "learning_rate": 1.5773793103448274e-06, + "loss": 0.0003, + "step": 9300 + }, + { + "epoch": 548.41, + "learning_rate": 1.5704827586206896e-06, + "loss": 0.0003, + "step": 9325 + }, + { + "epoch": 549.88, + "learning_rate": 1.5635862068965516e-06, + "loss": 0.0003, + "step": 9350 + }, + { + "epoch": 551.35, + "learning_rate": 1.5566896551724139e-06, + "loss": 0.0004, + "step": 9375 + }, + { + "epoch": 552.82, + "learning_rate": 1.549793103448276e-06, + "loss": 0.0004, + "step": 9400 + }, + { + "epoch": 554.29, + "learning_rate": 1.542896551724138e-06, + "loss": 0.0005, + "step": 9425 + }, + { + "epoch": 555.76, + "learning_rate": 1.5365517241379309e-06, + "loss": 0.0004, + "step": 9450 + }, + { + "epoch": 557.24, + "learning_rate": 1.529655172413793e-06, + "loss": 0.0003, + "step": 9475 + }, + { + "epoch": 558.71, + "learning_rate": 1.522758620689655e-06, + "loss": 0.0003, + "step": 9500 + }, + { + "epoch": 560.18, + "learning_rate": 1.5158620689655172e-06, + "loss": 0.0003, + "step": 9525 + }, + { + "epoch": 561.65, + "learning_rate": 1.5089655172413792e-06, + "loss": 0.0005, + "step": 9550 + }, + { + "epoch": 563.12, + "learning_rate": 1.5020689655172415e-06, + "loss": 0.0004, + "step": 9575 + }, + { + "epoch": 564.59, + "learning_rate": 1.4951724137931035e-06, + "loss": 0.0004, + "step": 9600 + }, + { + "epoch": 566.06, + "learning_rate": 1.4882758620689655e-06, + "loss": 0.0003, + "step": 9625 + }, + { + "epoch": 567.53, + "learning_rate": 1.4813793103448276e-06, + "loss": 0.0005, + "step": 9650 + }, + { + "epoch": 569.0, + "learning_rate": 1.4744827586206896e-06, + "loss": 0.0003, + "step": 9675 + }, + { + "epoch": 570.47, + "learning_rate": 1.4675862068965516e-06, + "loss": 0.0003, + "step": 9700 + }, + { + "epoch": 571.94, + "learning_rate": 1.4606896551724137e-06, + "loss": 0.0003, + "step": 9725 + }, + { + "epoch": 573.41, + "learning_rate": 1.4537931034482757e-06, + "loss": 0.0002, + "step": 9750 + }, + { + "epoch": 574.88, + "learning_rate": 1.4468965517241377e-06, + "loss": 0.0002, + "step": 9775 + }, + { + "epoch": 576.35, + "learning_rate": 1.44e-06, + "loss": 0.0004, + "step": 9800 + }, + { + "epoch": 577.82, + "learning_rate": 1.433103448275862e-06, + "loss": 0.0002, + "step": 9825 + }, + { + "epoch": 579.29, + "learning_rate": 1.426206896551724e-06, + "loss": 0.0005, + "step": 9850 + }, + { + "epoch": 580.76, + "learning_rate": 1.419310344827586e-06, + "loss": 0.0004, + "step": 9875 + }, + { + "epoch": 582.24, + "learning_rate": 1.4124137931034481e-06, + "loss": 0.0003, + "step": 9900 + }, + { + "epoch": 583.71, + "learning_rate": 1.4055172413793104e-06, + "loss": 0.0004, + "step": 9925 + }, + { + "epoch": 585.18, + "learning_rate": 1.3986206896551724e-06, + "loss": 0.0004, + "step": 9950 + }, + { + "epoch": 586.65, + "learning_rate": 1.3917241379310344e-06, + "loss": 0.0004, + "step": 9975 + }, + { + "epoch": 588.12, + "learning_rate": 1.3848275862068965e-06, + "loss": 0.0003, + "step": 10000 + }, + { + "epoch": 588.12, + "eval_loss": 0.54296875, + "eval_runtime": 156.5622, + "eval_samples_per_second": 1.737, + "eval_steps_per_second": 0.109, + "eval_wer": 9.973997028231798, + "step": 10000 + }, + { + "epoch": 589.59, + "learning_rate": 1.3779310344827587e-06, + "loss": 0.0002, + "step": 10025 + }, + { + "epoch": 591.06, + "learning_rate": 1.3710344827586207e-06, + "loss": 0.0003, + "step": 10050 + }, + { + "epoch": 592.53, + "learning_rate": 1.3641379310344828e-06, + "loss": 0.0002, + "step": 10075 + }, + { + "epoch": 594.0, + "learning_rate": 1.3572413793103448e-06, + "loss": 0.0003, + "step": 10100 + }, + { + "epoch": 595.47, + "learning_rate": 1.3503448275862068e-06, + "loss": 0.0003, + "step": 10125 + }, + { + "epoch": 596.94, + "learning_rate": 1.3434482758620689e-06, + "loss": 0.0002, + "step": 10150 + }, + { + "epoch": 598.41, + "learning_rate": 1.3365517241379309e-06, + "loss": 0.0004, + "step": 10175 + }, + { + "epoch": 599.88, + "learning_rate": 1.329655172413793e-06, + "loss": 0.0002, + "step": 10200 + }, + { + "epoch": 601.35, + "learning_rate": 1.322758620689655e-06, + "loss": 0.0003, + "step": 10225 + }, + { + "epoch": 602.82, + "learning_rate": 1.3158620689655172e-06, + "loss": 0.0003, + "step": 10250 + }, + { + "epoch": 604.29, + "learning_rate": 1.3089655172413792e-06, + "loss": 0.0002, + "step": 10275 + }, + { + "epoch": 605.76, + "learning_rate": 1.3020689655172413e-06, + "loss": 0.0002, + "step": 10300 + }, + { + "epoch": 607.24, + "learning_rate": 1.2951724137931035e-06, + "loss": 0.0003, + "step": 10325 + }, + { + "epoch": 608.71, + "learning_rate": 1.2882758620689655e-06, + "loss": 0.0002, + "step": 10350 + }, + { + "epoch": 610.18, + "learning_rate": 1.2813793103448276e-06, + "loss": 0.0003, + "step": 10375 + }, + { + "epoch": 611.65, + "learning_rate": 1.2744827586206896e-06, + "loss": 0.0003, + "step": 10400 + }, + { + "epoch": 613.12, + "learning_rate": 1.2675862068965516e-06, + "loss": 0.0003, + "step": 10425 + }, + { + "epoch": 614.59, + "learning_rate": 1.2612413793103448e-06, + "loss": 0.0005, + "step": 10450 + }, + { + "epoch": 616.06, + "learning_rate": 1.2543448275862068e-06, + "loss": 0.0003, + "step": 10475 + }, + { + "epoch": 617.53, + "learning_rate": 1.2474482758620688e-06, + "loss": 0.0003, + "step": 10500 + }, + { + "epoch": 619.0, + "learning_rate": 1.240551724137931e-06, + "loss": 0.0001, + "step": 10525 + }, + { + "epoch": 620.47, + "learning_rate": 1.2336551724137931e-06, + "loss": 0.0002, + "step": 10550 + }, + { + "epoch": 621.94, + "learning_rate": 1.2267586206896552e-06, + "loss": 0.0005, + "step": 10575 + }, + { + "epoch": 623.41, + "learning_rate": 1.2198620689655172e-06, + "loss": 0.0002, + "step": 10600 + }, + { + "epoch": 624.88, + "learning_rate": 1.2129655172413792e-06, + "loss": 0.0003, + "step": 10625 + }, + { + "epoch": 626.35, + "learning_rate": 1.2060689655172413e-06, + "loss": 0.0002, + "step": 10650 + }, + { + "epoch": 627.82, + "learning_rate": 1.1991724137931035e-06, + "loss": 0.0003, + "step": 10675 + }, + { + "epoch": 629.29, + "learning_rate": 1.1922758620689655e-06, + "loss": 0.0003, + "step": 10700 + }, + { + "epoch": 630.76, + "learning_rate": 1.1853793103448276e-06, + "loss": 0.0003, + "step": 10725 + }, + { + "epoch": 632.24, + "learning_rate": 1.1784827586206896e-06, + "loss": 0.0002, + "step": 10750 + }, + { + "epoch": 633.71, + "learning_rate": 1.1715862068965516e-06, + "loss": 0.0002, + "step": 10775 + }, + { + "epoch": 635.18, + "learning_rate": 1.1646896551724137e-06, + "loss": 0.0004, + "step": 10800 + }, + { + "epoch": 636.65, + "learning_rate": 1.1577931034482757e-06, + "loss": 0.0003, + "step": 10825 + }, + { + "epoch": 638.12, + "learning_rate": 1.1508965517241377e-06, + "loss": 0.0002, + "step": 10850 + }, + { + "epoch": 639.59, + "learning_rate": 1.1439999999999998e-06, + "loss": 0.0002, + "step": 10875 + }, + { + "epoch": 641.06, + "learning_rate": 1.137103448275862e-06, + "loss": 0.0003, + "step": 10900 + }, + { + "epoch": 642.53, + "learning_rate": 1.1302068965517243e-06, + "loss": 0.0002, + "step": 10925 + }, + { + "epoch": 644.0, + "learning_rate": 1.1233103448275863e-06, + "loss": 0.0004, + "step": 10950 + }, + { + "epoch": 645.47, + "learning_rate": 1.1164137931034483e-06, + "loss": 0.0004, + "step": 10975 + }, + { + "epoch": 646.94, + "learning_rate": 1.1095172413793103e-06, + "loss": 0.0002, + "step": 11000 + }, + { + "epoch": 646.94, + "eval_loss": 0.5458984375, + "eval_runtime": 157.5866, + "eval_samples_per_second": 1.726, + "eval_steps_per_second": 0.108, + "eval_wer": 9.955423476968797, + "step": 11000 + }, + { + "epoch": 648.41, + "learning_rate": 1.1026206896551724e-06, + "loss": 0.0003, + "step": 11025 + }, + { + "epoch": 649.88, + "learning_rate": 1.0957241379310344e-06, + "loss": 0.0002, + "step": 11050 + }, + { + "epoch": 651.35, + "learning_rate": 1.0888275862068964e-06, + "loss": 0.0002, + "step": 11075 + }, + { + "epoch": 652.82, + "learning_rate": 1.0819310344827585e-06, + "loss": 0.0003, + "step": 11100 + }, + { + "epoch": 654.29, + "learning_rate": 1.0750344827586207e-06, + "loss": 0.0002, + "step": 11125 + }, + { + "epoch": 655.76, + "learning_rate": 1.0681379310344828e-06, + "loss": 0.0003, + "step": 11150 + }, + { + "epoch": 657.24, + "learning_rate": 1.0612413793103448e-06, + "loss": 0.0003, + "step": 11175 + }, + { + "epoch": 658.71, + "learning_rate": 1.0543448275862068e-06, + "loss": 0.0005, + "step": 11200 + }, + { + "epoch": 660.18, + "learning_rate": 1.0474482758620689e-06, + "loss": 0.0002, + "step": 11225 + }, + { + "epoch": 661.65, + "learning_rate": 1.0405517241379309e-06, + "loss": 0.0002, + "step": 11250 + }, + { + "epoch": 663.12, + "learning_rate": 1.033655172413793e-06, + "loss": 0.0003, + "step": 11275 + }, + { + "epoch": 664.59, + "learning_rate": 1.026758620689655e-06, + "loss": 0.0002, + "step": 11300 + }, + { + "epoch": 666.06, + "learning_rate": 1.0198620689655172e-06, + "loss": 0.0002, + "step": 11325 + }, + { + "epoch": 667.53, + "learning_rate": 1.0129655172413794e-06, + "loss": 0.0003, + "step": 11350 + }, + { + "epoch": 669.0, + "learning_rate": 1.0060689655172415e-06, + "loss": 0.0009, + "step": 11375 + }, + { + "epoch": 670.47, + "learning_rate": 9.991724137931033e-07, + "loss": 0.0002, + "step": 11400 + }, + { + "epoch": 671.94, + "learning_rate": 9.922758620689655e-07, + "loss": 0.0002, + "step": 11425 + }, + { + "epoch": 673.41, + "learning_rate": 9.859310344827587e-07, + "loss": 0.0003, + "step": 11450 + }, + { + "epoch": 674.88, + "learning_rate": 9.790344827586207e-07, + "loss": 0.0002, + "step": 11475 + }, + { + "epoch": 676.35, + "learning_rate": 9.721379310344827e-07, + "loss": 0.0002, + "step": 11500 + }, + { + "epoch": 677.82, + "learning_rate": 9.652413793103448e-07, + "loss": 0.0002, + "step": 11525 + }, + { + "epoch": 679.29, + "learning_rate": 9.583448275862068e-07, + "loss": 0.0003, + "step": 11550 + }, + { + "epoch": 680.76, + "learning_rate": 9.514482758620688e-07, + "loss": 0.0003, + "step": 11575 + }, + { + "epoch": 682.24, + "learning_rate": 9.44551724137931e-07, + "loss": 0.0003, + "step": 11600 + }, + { + "epoch": 683.71, + "learning_rate": 9.376551724137931e-07, + "loss": 0.0002, + "step": 11625 + }, + { + "epoch": 685.18, + "learning_rate": 9.307586206896552e-07, + "loss": 0.0002, + "step": 11650 + }, + { + "epoch": 686.65, + "learning_rate": 9.238620689655172e-07, + "loss": 0.0003, + "step": 11675 + }, + { + "epoch": 688.12, + "learning_rate": 9.169655172413792e-07, + "loss": 0.0003, + "step": 11700 + }, + { + "epoch": 689.59, + "learning_rate": 9.100689655172414e-07, + "loss": 0.0001, + "step": 11725 + }, + { + "epoch": 691.06, + "learning_rate": 9.031724137931034e-07, + "loss": 0.0004, + "step": 11750 + }, + { + "epoch": 692.53, + "learning_rate": 8.962758620689654e-07, + "loss": 0.0003, + "step": 11775 + }, + { + "epoch": 694.0, + "learning_rate": 8.893793103448275e-07, + "loss": 0.0005, + "step": 11800 + }, + { + "epoch": 695.47, + "learning_rate": 8.824827586206897e-07, + "loss": 0.0002, + "step": 11825 + }, + { + "epoch": 696.94, + "learning_rate": 8.755862068965517e-07, + "loss": 0.0002, + "step": 11850 + }, + { + "epoch": 698.41, + "learning_rate": 8.686896551724138e-07, + "loss": 0.0002, + "step": 11875 + }, + { + "epoch": 699.88, + "learning_rate": 8.617931034482758e-07, + "loss": 0.0002, + "step": 11900 + }, + { + "epoch": 701.35, + "learning_rate": 8.548965517241378e-07, + "loss": 0.0003, + "step": 11925 + }, + { + "epoch": 702.82, + "learning_rate": 8.48e-07, + "loss": 0.0002, + "step": 11950 + }, + { + "epoch": 704.29, + "learning_rate": 8.41103448275862e-07, + "loss": 0.0002, + "step": 11975 + }, + { + "epoch": 705.76, + "learning_rate": 8.34206896551724e-07, + "loss": 0.0003, + "step": 12000 + }, + { + "epoch": 705.76, + "eval_loss": 0.55615234375, + "eval_runtime": 158.1148, + "eval_samples_per_second": 1.72, + "eval_steps_per_second": 0.108, + "eval_wer": 9.9832838038633, + "step": 12000 + }, + { + "epoch": 706.47, + "learning_rate": 3.1968e-07, + "loss": 0.0002, + "step": 12025 + }, + { + "epoch": 707.94, + "learning_rate": 3.1168e-07, + "loss": 0.0003, + "step": 12050 + }, + { + "epoch": 709.41, + "learning_rate": 3.0368e-07, + "loss": 0.0002, + "step": 12075 + }, + { + "epoch": 710.88, + "learning_rate": 2.9568e-07, + "loss": 0.0002, + "step": 12100 + }, + { + "epoch": 712.35, + "learning_rate": 2.8768e-07, + "loss": 0.0003, + "step": 12125 + }, + { + "epoch": 713.82, + "learning_rate": 2.7968e-07, + "loss": 0.0002, + "step": 12150 + }, + { + "epoch": 715.29, + "learning_rate": 2.7167999999999996e-07, + "loss": 0.0005, + "step": 12175 + }, + { + "epoch": 716.76, + "learning_rate": 2.6368e-07, + "loss": 0.0002, + "step": 12200 + }, + { + "epoch": 718.24, + "learning_rate": 2.5568e-07, + "loss": 0.0002, + "step": 12225 + }, + { + "epoch": 719.71, + "learning_rate": 2.4768e-07, + "loss": 0.0002, + "step": 12250 + }, + { + "epoch": 721.18, + "learning_rate": 2.3968e-07, + "loss": 0.0003, + "step": 12275 + }, + { + "epoch": 722.65, + "learning_rate": 2.3168e-07, + "loss": 0.0002, + "step": 12300 + }, + { + "epoch": 724.12, + "learning_rate": 2.2367999999999998e-07, + "loss": 0.0002, + "step": 12325 + }, + { + "epoch": 725.59, + "learning_rate": 2.1568e-07, + "loss": 0.0002, + "step": 12350 + }, + { + "epoch": 727.06, + "learning_rate": 2.0768e-07, + "loss": 0.0001, + "step": 12375 + }, + { + "epoch": 728.53, + "learning_rate": 1.9968e-07, + "loss": 0.0002, + "step": 12400 + }, + { + "epoch": 730.0, + "learning_rate": 1.9167999999999998e-07, + "loss": 0.0002, + "step": 12425 + }, + { + "epoch": 731.47, + "learning_rate": 1.8432e-07, + "loss": 0.0003, + "step": 12450 + }, + { + "epoch": 732.94, + "learning_rate": 1.7632e-07, + "loss": 0.0002, + "step": 12475 + }, + { + "epoch": 734.41, + "learning_rate": 1.6832e-07, + "loss": 0.0001, + "step": 12500 + }, + { + "epoch": 735.88, + "learning_rate": 1.6032e-07, + "loss": 0.0001, + "step": 12525 + }, + { + "epoch": 737.35, + "learning_rate": 1.5232e-07, + "loss": 0.0001, + "step": 12550 + }, + { + "epoch": 738.82, + "learning_rate": 1.4431999999999998e-07, + "loss": 0.0002, + "step": 12575 + }, + { + "epoch": 740.29, + "learning_rate": 1.3632e-07, + "loss": 0.0002, + "step": 12600 + }, + { + "epoch": 741.76, + "learning_rate": 1.2831999999999997e-07, + "loss": 0.0001, + "step": 12625 + }, + { + "epoch": 743.24, + "learning_rate": 1.2031999999999998e-07, + "loss": 0.0003, + "step": 12650 + }, + { + "epoch": 744.71, + "learning_rate": 1.1232e-07, + "loss": 0.0002, + "step": 12675 + }, + { + "epoch": 746.18, + "learning_rate": 1.0432e-07, + "loss": 0.0002, + "step": 12700 + }, + { + "epoch": 747.65, + "learning_rate": 9.632e-08, + "loss": 0.0002, + "step": 12725 + }, + { + "epoch": 749.12, + "learning_rate": 8.831999999999999e-08, + "loss": 0.0002, + "step": 12750 + }, + { + "epoch": 750.59, + "learning_rate": 8.032e-08, + "loss": 0.0002, + "step": 12775 + }, + { + "epoch": 752.06, + "learning_rate": 7.231999999999999e-08, + "loss": 0.0002, + "step": 12800 + }, + { + "epoch": 753.53, + "learning_rate": 6.432e-08, + "loss": 0.0002, + "step": 12825 + }, + { + "epoch": 755.0, + "learning_rate": 5.632e-08, + "loss": 0.0002, + "step": 12850 + }, + { + "epoch": 756.47, + "learning_rate": 4.832e-08, + "loss": 0.0002, + "step": 12875 + }, + { + "epoch": 757.94, + "learning_rate": 4.032e-08, + "loss": 0.0002, + "step": 12900 + }, + { + "epoch": 759.41, + "learning_rate": 3.232e-08, + "loss": 0.0001, + "step": 12925 + }, + { + "epoch": 760.88, + "learning_rate": 2.432e-08, + "loss": 0.0002, + "step": 12950 + }, + { + "epoch": 762.35, + "learning_rate": 1.632e-08, + "loss": 0.0001, + "step": 12975 + }, + { + "epoch": 763.82, + "learning_rate": 8.32e-09, + "loss": 0.0001, + "step": 13000 + }, + { + "epoch": 763.82, + "eval_loss": 0.5546875, + "eval_runtime": 156.9741, + "eval_samples_per_second": 1.733, + "eval_steps_per_second": 0.108, + "eval_wer": 9.9925705794948, + "step": 13000 + }, + { + "epoch": 765.47, + "learning_rate": 2.965925925925926e-07, + "loss": 0.0002, + "step": 13025 + }, + { + "epoch": 766.94, + "learning_rate": 2.891851851851852e-07, + "loss": 0.0001, + "step": 13050 + }, + { + "epoch": 768.41, + "learning_rate": 2.817777777777778e-07, + "loss": 0.0002, + "step": 13075 + }, + { + "epoch": 769.88, + "learning_rate": 2.7437037037037035e-07, + "loss": 0.0001, + "step": 13100 + }, + { + "epoch": 771.35, + "learning_rate": 2.6696296296296296e-07, + "loss": 0.0001, + "step": 13125 + }, + { + "epoch": 772.82, + "learning_rate": 2.595555555555555e-07, + "loss": 0.0002, + "step": 13150 + }, + { + "epoch": 774.29, + "learning_rate": 2.521481481481481e-07, + "loss": 0.0002, + "step": 13175 + }, + { + "epoch": 775.76, + "learning_rate": 2.4474074074074073e-07, + "loss": 0.0001, + "step": 13200 + }, + { + "epoch": 777.24, + "learning_rate": 2.3733333333333334e-07, + "loss": 0.0001, + "step": 13225 + }, + { + "epoch": 778.71, + "learning_rate": 2.2992592592592592e-07, + "loss": 0.0001, + "step": 13250 + }, + { + "epoch": 780.18, + "learning_rate": 2.2251851851851853e-07, + "loss": 0.0003, + "step": 13275 + }, + { + "epoch": 781.65, + "learning_rate": 2.1511111111111111e-07, + "loss": 0.0001, + "step": 13300 + }, + { + "epoch": 783.12, + "learning_rate": 2.077037037037037e-07, + "loss": 0.0001, + "step": 13325 + }, + { + "epoch": 784.59, + "learning_rate": 2.002962962962963e-07, + "loss": 0.0001, + "step": 13350 + }, + { + "epoch": 786.06, + "learning_rate": 1.9288888888888889e-07, + "loss": 0.0001, + "step": 13375 + }, + { + "epoch": 787.53, + "learning_rate": 1.8548148148148147e-07, + "loss": 0.0002, + "step": 13400 + }, + { + "epoch": 789.0, + "learning_rate": 1.7807407407407408e-07, + "loss": 0.0002, + "step": 13425 + }, + { + "epoch": 790.47, + "learning_rate": 1.7066666666666666e-07, + "loss": 0.0002, + "step": 13450 + }, + { + "epoch": 791.94, + "learning_rate": 1.6385185185185184e-07, + "loss": 0.0001, + "step": 13475 + }, + { + "epoch": 793.41, + "learning_rate": 1.5644444444444442e-07, + "loss": 0.0003, + "step": 13500 + }, + { + "epoch": 794.88, + "learning_rate": 1.49037037037037e-07, + "loss": 0.0001, + "step": 13525 + }, + { + "epoch": 796.35, + "learning_rate": 1.4162962962962962e-07, + "loss": 0.0001, + "step": 13550 + }, + { + "epoch": 797.82, + "learning_rate": 1.342222222222222e-07, + "loss": 0.0001, + "step": 13575 + }, + { + "epoch": 799.29, + "learning_rate": 1.268148148148148e-07, + "loss": 0.0001, + "step": 13600 + }, + { + "epoch": 800.76, + "learning_rate": 1.194074074074074e-07, + "loss": 0.0002, + "step": 13625 + }, + { + "epoch": 802.24, + "learning_rate": 1.12e-07, + "loss": 0.0001, + "step": 13650 + }, + { + "epoch": 803.71, + "learning_rate": 1.0459259259259259e-07, + "loss": 0.0002, + "step": 13675 + }, + { + "epoch": 805.18, + "learning_rate": 9.718518518518517e-08, + "loss": 0.0002, + "step": 13700 + }, + { + "epoch": 806.65, + "learning_rate": 8.977777777777777e-08, + "loss": 0.0002, + "step": 13725 + }, + { + "epoch": 808.12, + "learning_rate": 8.237037037037037e-08, + "loss": 0.0002, + "step": 13750 + }, + { + "epoch": 809.59, + "learning_rate": 7.496296296296296e-08, + "loss": 0.0002, + "step": 13775 + }, + { + "epoch": 811.06, + "learning_rate": 6.755555555555554e-08, + "loss": 0.0001, + "step": 13800 + }, + { + "epoch": 812.53, + "learning_rate": 6.014814814814814e-08, + "loss": 0.0001, + "step": 13825 + }, + { + "epoch": 814.0, + "learning_rate": 5.274074074074074e-08, + "loss": 0.0002, + "step": 13850 + }, + { + "epoch": 815.47, + "learning_rate": 4.5333333333333336e-08, + "loss": 0.0001, + "step": 13875 + }, + { + "epoch": 816.94, + "learning_rate": 3.7925925925925924e-08, + "loss": 0.0002, + "step": 13900 + }, + { + "epoch": 818.41, + "learning_rate": 3.051851851851851e-08, + "loss": 0.0001, + "step": 13925 + }, + { + "epoch": 819.88, + "learning_rate": 2.311111111111111e-08, + "loss": 0.0002, + "step": 13950 + }, + { + "epoch": 821.35, + "learning_rate": 1.57037037037037e-08, + "loss": 0.0001, + "step": 13975 + }, + { + "epoch": 822.82, + "learning_rate": 8.296296296296296e-09, + "loss": 0.0001, + "step": 14000 + }, + { + "epoch": 822.82, + "eval_loss": 0.5576171875, + "eval_runtime": 157.6735, + "eval_samples_per_second": 1.725, + "eval_steps_per_second": 0.108, + "eval_wer": 9.899702823179792, + "step": 14000 + }, + { + "epoch": 824.47, + "learning_rate": 0.00012324102564102563, + "loss": 7.1148, + "step": 14025 + }, + { + "epoch": 825.94, + "learning_rate": 0.00012272820512820512, + "loss": 5.3802, + "step": 14050 + }, + { + "epoch": 827.41, + "learning_rate": 0.00012221538461538463, + "loss": 4.0038, + "step": 14075 + }, + { + "epoch": 828.88, + "learning_rate": 0.0001217025641025641, + "loss": 3.0771, + "step": 14100 + }, + { + "epoch": 830.35, + "learning_rate": 0.00012118974358974359, + "loss": 2.4888, + "step": 14125 + }, + { + "epoch": 831.82, + "learning_rate": 0.0001206769230769231, + "loss": 2.0454, + "step": 14150 + }, + { + "epoch": 833.29, + "learning_rate": 0.00012016410256410258, + "loss": 1.6123, + "step": 14175 + }, + { + "epoch": 834.76, + "learning_rate": 0.00011965128205128207, + "loss": 1.1082, + "step": 14200 + }, + { + "epoch": 836.24, + "learning_rate": 0.00011913846153846155, + "loss": 0.6733, + "step": 14225 + }, + { + "epoch": 837.71, + "learning_rate": 0.00011862564102564103, + "loss": 0.4108, + "step": 14250 + }, + { + "epoch": 839.18, + "learning_rate": 0.00011811282051282051, + "loss": 0.2879, + "step": 14275 + }, + { + "epoch": 840.65, + "learning_rate": 0.0001176, + "loss": 0.2274, + "step": 14300 + }, + { + "epoch": 842.12, + "learning_rate": 0.00011708717948717949, + "loss": 0.1869, + "step": 14325 + }, + { + "epoch": 843.59, + "learning_rate": 0.00011657435897435897, + "loss": 0.1548, + "step": 14350 + }, + { + "epoch": 845.06, + "learning_rate": 0.00011606153846153847, + "loss": 2.892, + "step": 14375 + }, + { + "epoch": 846.53, + "learning_rate": 0.00011556923076923078, + "loss": 4.4433, + "step": 14400 + }, + { + "epoch": 848.0, + "learning_rate": 0.00011505641025641026, + "loss": 0.9719, + "step": 14425 + }, + { + "epoch": 849.47, + "learning_rate": 0.00011454358974358974, + "loss": 0.0969, + "step": 14450 + }, + { + "epoch": 850.94, + "learning_rate": 0.00011403076923076923, + "loss": 0.0932, + "step": 14475 + }, + { + "epoch": 852.41, + "learning_rate": 0.00011351794871794871, + "loss": 0.0829, + "step": 14500 + }, + { + "epoch": 853.88, + "learning_rate": 0.0001130051282051282, + "loss": 0.0785, + "step": 14525 + }, + { + "epoch": 855.35, + "learning_rate": 0.0001124923076923077, + "loss": 0.0679, + "step": 14550 + }, + { + "epoch": 856.82, + "learning_rate": 0.00011197948717948719, + "loss": 0.0656, + "step": 14575 + }, + { + "epoch": 858.29, + "learning_rate": 0.00011146666666666667, + "loss": 0.064, + "step": 14600 + }, + { + "epoch": 859.76, + "learning_rate": 0.00011095384615384616, + "loss": 0.0614, + "step": 14625 + }, + { + "epoch": 861.24, + "learning_rate": 0.00011044102564102565, + "loss": 0.0612, + "step": 14650 + }, + { + "epoch": 862.71, + "learning_rate": 0.00010992820512820515, + "loss": 0.0609, + "step": 14675 + }, + { + "epoch": 864.18, + "learning_rate": 0.00010941538461538463, + "loss": 0.0586, + "step": 14700 + }, + { + "epoch": 865.65, + "learning_rate": 0.0001089025641025641, + "loss": 0.0581, + "step": 14725 + }, + { + "epoch": 867.12, + "learning_rate": 0.00010838974358974358, + "loss": 0.0569, + "step": 14750 + }, + { + "epoch": 868.59, + "learning_rate": 0.00010787692307692308, + "loss": 0.0573, + "step": 14775 + }, + { + "epoch": 870.06, + "learning_rate": 0.00010736410256410257, + "loss": 0.0555, + "step": 14800 + }, + { + "epoch": 871.53, + "learning_rate": 0.00010685128205128205, + "loss": 0.0546, + "step": 14825 + }, + { + "epoch": 873.0, + "learning_rate": 0.00010633846153846154, + "loss": 0.0548, + "step": 14850 + }, + { + "epoch": 874.47, + "learning_rate": 0.00010582564102564103, + "loss": 0.0541, + "step": 14875 + }, + { + "epoch": 875.94, + "learning_rate": 0.00010531282051282053, + "loss": 0.0526, + "step": 14900 + }, + { + "epoch": 877.41, + "learning_rate": 0.00010480000000000001, + "loss": 0.0521, + "step": 14925 + }, + { + "epoch": 878.88, + "learning_rate": 0.0001042871794871795, + "loss": 0.0539, + "step": 14950 + }, + { + "epoch": 880.35, + "learning_rate": 0.00010377435897435899, + "loss": 0.0535, + "step": 14975 + }, + { + "epoch": 881.82, + "learning_rate": 0.00010326153846153847, + "loss": 0.0538, + "step": 15000 + }, + { + "epoch": 881.82, + "eval_loss": 5.33984375, + "eval_runtime": 102.1523, + "eval_samples_per_second": 2.663, + "eval_steps_per_second": 0.166, + "eval_wer": 99.87927191679049, + "step": 15000 + }, + { + "epoch": 883.29, + "learning_rate": 0.00010274871794871795, + "loss": 0.0535, + "step": 15025 + }, + { + "epoch": 884.76, + "learning_rate": 0.00010223589743589743, + "loss": 0.0516, + "step": 15050 + }, + { + "epoch": 886.24, + "learning_rate": 0.00010172307692307692, + "loss": 0.0503, + "step": 15075 + }, + { + "epoch": 887.71, + "learning_rate": 0.0001012102564102564, + "loss": 0.05, + "step": 15100 + }, + { + "epoch": 889.18, + "learning_rate": 0.0001006974358974359, + "loss": 0.0512, + "step": 15125 + }, + { + "epoch": 890.65, + "learning_rate": 0.00010018461538461539, + "loss": 0.0503, + "step": 15150 + }, + { + "epoch": 892.12, + "learning_rate": 9.967179487179488e-05, + "loss": 0.0516, + "step": 15175 + }, + { + "epoch": 893.59, + "learning_rate": 9.915897435897436e-05, + "loss": 0.0518, + "step": 15200 + }, + { + "epoch": 895.06, + "learning_rate": 9.864615384615385e-05, + "loss": 0.0521, + "step": 15225 + }, + { + "epoch": 896.53, + "learning_rate": 9.813333333333334e-05, + "loss": 0.0508, + "step": 15250 + }, + { + "epoch": 898.0, + "learning_rate": 9.762051282051282e-05, + "loss": 0.0507, + "step": 15275 + }, + { + "epoch": 899.47, + "learning_rate": 9.710769230769231e-05, + "loss": 0.0506, + "step": 15300 + }, + { + "epoch": 900.94, + "learning_rate": 9.65948717948718e-05, + "loss": 0.0496, + "step": 15325 + }, + { + "epoch": 902.41, + "learning_rate": 9.608205128205128e-05, + "loss": 0.052, + "step": 15350 + }, + { + "epoch": 903.88, + "learning_rate": 9.556923076923078e-05, + "loss": 0.05, + "step": 15375 + }, + { + "epoch": 905.35, + "learning_rate": 9.505641025641026e-05, + "loss": 0.0498, + "step": 15400 + }, + { + "epoch": 906.82, + "learning_rate": 9.454358974358974e-05, + "loss": 0.0501, + "step": 15425 + }, + { + "epoch": 908.29, + "learning_rate": 9.403076923076923e-05, + "loss": 0.0512, + "step": 15450 + }, + { + "epoch": 909.76, + "learning_rate": 9.351794871794872e-05, + "loss": 0.0499, + "step": 15475 + }, + { + "epoch": 911.24, + "learning_rate": 9.300512820512822e-05, + "loss": 0.05, + "step": 15500 + }, + { + "epoch": 912.71, + "learning_rate": 9.24923076923077e-05, + "loss": 0.0516, + "step": 15525 + }, + { + "epoch": 914.18, + "learning_rate": 9.197948717948719e-05, + "loss": 0.0517, + "step": 15550 + }, + { + "epoch": 915.65, + "learning_rate": 9.146666666666666e-05, + "loss": 0.0499, + "step": 15575 + }, + { + "epoch": 917.12, + "learning_rate": 9.095384615384616e-05, + "loss": 0.0531, + "step": 15600 + }, + { + "epoch": 918.59, + "learning_rate": 9.044102564102565e-05, + "loss": 0.0502, + "step": 15625 + }, + { + "epoch": 920.06, + "learning_rate": 8.992820512820514e-05, + "loss": 0.0495, + "step": 15650 + }, + { + "epoch": 921.53, + "learning_rate": 8.941538461538462e-05, + "loss": 0.0499, + "step": 15675 + }, + { + "epoch": 923.0, + "learning_rate": 8.890256410256411e-05, + "loss": 0.0515, + "step": 15700 + }, + { + "epoch": 924.47, + "learning_rate": 8.83897435897436e-05, + "loss": 0.0491, + "step": 15725 + }, + { + "epoch": 925.94, + "learning_rate": 8.787692307692308e-05, + "loss": 0.0491, + "step": 15750 + }, + { + "epoch": 927.41, + "learning_rate": 8.736410256410257e-05, + "loss": 0.0482, + "step": 15775 + }, + { + "epoch": 928.88, + "learning_rate": 8.685128205128206e-05, + "loss": 0.0487, + "step": 15800 + }, + { + "epoch": 930.35, + "learning_rate": 8.633846153846154e-05, + "loss": 0.0494, + "step": 15825 + }, + { + "epoch": 931.82, + "learning_rate": 8.582564102564103e-05, + "loss": 0.0491, + "step": 15850 + }, + { + "epoch": 933.29, + "learning_rate": 8.531282051282051e-05, + "loss": 0.0483, + "step": 15875 + }, + { + "epoch": 934.76, + "learning_rate": 8.48e-05, + "loss": 0.048, + "step": 15900 + }, + { + "epoch": 936.24, + "learning_rate": 8.428717948717949e-05, + "loss": 0.0488, + "step": 15925 + }, + { + "epoch": 937.71, + "learning_rate": 8.377435897435897e-05, + "loss": 0.0494, + "step": 15950 + }, + { + "epoch": 939.18, + "learning_rate": 8.326153846153847e-05, + "loss": 0.0491, + "step": 15975 + }, + { + "epoch": 940.65, + "learning_rate": 8.274871794871796e-05, + "loss": 0.0482, + "step": 16000 + }, + { + "epoch": 940.65, + "eval_loss": 5.62109375, + "eval_runtime": 164.5773, + "eval_samples_per_second": 1.653, + "eval_steps_per_second": 0.103, + "eval_wer": 136.06983655274888, + "step": 16000 + }, + { + "epoch": 942.12, + "learning_rate": 8.223589743589743e-05, + "loss": 0.0492, + "step": 16025 + }, + { + "epoch": 943.59, + "learning_rate": 8.172307692307692e-05, + "loss": 0.0485, + "step": 16050 + }, + { + "epoch": 945.06, + "learning_rate": 8.121025641025641e-05, + "loss": 0.0489, + "step": 16075 + }, + { + "epoch": 946.53, + "learning_rate": 8.069743589743591e-05, + "loss": 0.0494, + "step": 16100 + }, + { + "epoch": 948.0, + "learning_rate": 8.01846153846154e-05, + "loss": 0.0487, + "step": 16125 + }, + { + "epoch": 949.47, + "learning_rate": 7.967179487179488e-05, + "loss": 0.0473, + "step": 16150 + }, + { + "epoch": 950.94, + "learning_rate": 7.915897435897435e-05, + "loss": 0.0489, + "step": 16175 + }, + { + "epoch": 952.41, + "learning_rate": 7.864615384615385e-05, + "loss": 0.048, + "step": 16200 + }, + { + "epoch": 953.88, + "learning_rate": 7.813333333333334e-05, + "loss": 0.0479, + "step": 16225 + }, + { + "epoch": 955.35, + "learning_rate": 7.762051282051283e-05, + "loss": 0.0549, + "step": 16250 + }, + { + "epoch": 956.82, + "learning_rate": 7.710769230769231e-05, + "loss": 0.0479, + "step": 16275 + }, + { + "epoch": 958.29, + "learning_rate": 7.65948717948718e-05, + "loss": 0.0468, + "step": 16300 + }, + { + "epoch": 959.76, + "learning_rate": 7.608205128205129e-05, + "loss": 0.0477, + "step": 16325 + }, + { + "epoch": 961.24, + "learning_rate": 7.556923076923077e-05, + "loss": 0.0482, + "step": 16350 + }, + { + "epoch": 962.71, + "learning_rate": 7.505641025641026e-05, + "loss": 0.0493, + "step": 16375 + }, + { + "epoch": 964.18, + "learning_rate": 7.454358974358975e-05, + "loss": 0.0499, + "step": 16400 + }, + { + "epoch": 965.65, + "learning_rate": 7.403076923076923e-05, + "loss": 0.0516, + "step": 16425 + }, + { + "epoch": 967.12, + "learning_rate": 7.351794871794873e-05, + "loss": 0.052, + "step": 16450 + }, + { + "epoch": 968.59, + "learning_rate": 7.30051282051282e-05, + "loss": 0.0495, + "step": 16475 + }, + { + "epoch": 970.06, + "learning_rate": 7.249230769230769e-05, + "loss": 0.0495, + "step": 16500 + }, + { + "epoch": 971.53, + "learning_rate": 7.197948717948718e-05, + "loss": 0.0482, + "step": 16525 + }, + { + "epoch": 973.0, + "learning_rate": 7.146666666666666e-05, + "loss": 0.0511, + "step": 16550 + }, + { + "epoch": 974.47, + "learning_rate": 7.095384615384616e-05, + "loss": 0.0487, + "step": 16575 + }, + { + "epoch": 975.94, + "learning_rate": 7.044102564102565e-05, + "loss": 0.049, + "step": 16600 + }, + { + "epoch": 977.41, + "learning_rate": 6.992820512820512e-05, + "loss": 0.048, + "step": 16625 + }, + { + "epoch": 978.88, + "learning_rate": 6.941538461538461e-05, + "loss": 0.0485, + "step": 16650 + }, + { + "epoch": 980.35, + "learning_rate": 6.890256410256411e-05, + "loss": 0.0525, + "step": 16675 + }, + { + "epoch": 981.82, + "learning_rate": 6.83897435897436e-05, + "loss": 0.0478, + "step": 16700 + }, + { + "epoch": 983.29, + "learning_rate": 6.787692307692308e-05, + "loss": 0.0481, + "step": 16725 + }, + { + "epoch": 984.76, + "learning_rate": 6.736410256410257e-05, + "loss": 0.0494, + "step": 16750 + }, + { + "epoch": 986.24, + "learning_rate": 6.685128205128204e-05, + "loss": 0.0468, + "step": 16775 + }, + { + "epoch": 987.71, + "learning_rate": 6.633846153846154e-05, + "loss": 0.0631, + "step": 16800 + }, + { + "epoch": 989.18, + "learning_rate": 6.582564102564103e-05, + "loss": 0.0468, + "step": 16825 + }, + { + "epoch": 990.65, + "learning_rate": 6.531282051282052e-05, + "loss": 0.0464, + "step": 16850 + }, + { + "epoch": 992.12, + "learning_rate": 6.48e-05, + "loss": 0.0625, + "step": 16875 + }, + { + "epoch": 993.59, + "learning_rate": 6.428717948717949e-05, + "loss": 0.0497, + "step": 16900 + }, + { + "epoch": 995.06, + "learning_rate": 6.377435897435898e-05, + "loss": 0.0481, + "step": 16925 + }, + { + "epoch": 996.53, + "learning_rate": 6.326153846153846e-05, + "loss": 0.0484, + "step": 16950 + }, + { + "epoch": 998.0, + "learning_rate": 6.274871794871795e-05, + "loss": 0.0506, + "step": 16975 + }, + { + "epoch": 999.47, + "learning_rate": 6.223589743589744e-05, + "loss": 0.0471, + "step": 17000 + }, + { + "epoch": 999.47, + "eval_loss": 5.6484375, + "eval_runtime": 155.9288, + "eval_samples_per_second": 1.744, + "eval_steps_per_second": 0.109, + "eval_wer": 121.2481426448737, + "step": 17000 + }, + { + "epoch": 1000.94, + "learning_rate": 6.172307692307692e-05, + "loss": 0.0499, + "step": 17025 + }, + { + "epoch": 1002.41, + "learning_rate": 6.121025641025642e-05, + "loss": 0.0476, + "step": 17050 + }, + { + "epoch": 1003.88, + "learning_rate": 6.069743589743591e-05, + "loss": 0.0482, + "step": 17075 + }, + { + "epoch": 1005.35, + "learning_rate": 6.018461538461538e-05, + "loss": 0.0471, + "step": 17100 + }, + { + "epoch": 1006.82, + "learning_rate": 5.9671794871794875e-05, + "loss": 0.0461, + "step": 17125 + }, + { + "epoch": 1008.29, + "learning_rate": 5.915897435897436e-05, + "loss": 0.046, + "step": 17150 + }, + { + "epoch": 1009.76, + "learning_rate": 5.864615384615385e-05, + "loss": 0.0466, + "step": 17175 + }, + { + "epoch": 1011.24, + "learning_rate": 5.813333333333334e-05, + "loss": 0.0462, + "step": 17200 + }, + { + "epoch": 1012.71, + "learning_rate": 5.762051282051283e-05, + "loss": 0.0468, + "step": 17225 + }, + { + "epoch": 1014.18, + "learning_rate": 5.710769230769231e-05, + "loss": 0.0463, + "step": 17250 + }, + { + "epoch": 1015.65, + "learning_rate": 5.6594871794871794e-05, + "loss": 0.0463, + "step": 17275 + }, + { + "epoch": 1017.12, + "learning_rate": 5.608205128205129e-05, + "loss": 0.0451, + "step": 17300 + }, + { + "epoch": 1018.59, + "learning_rate": 5.5569230769230774e-05, + "loss": 0.0465, + "step": 17325 + }, + { + "epoch": 1020.06, + "learning_rate": 5.505641025641026e-05, + "loss": 0.0473, + "step": 17350 + }, + { + "epoch": 1021.53, + "learning_rate": 5.4543589743589754e-05, + "loss": 0.0456, + "step": 17375 + }, + { + "epoch": 1023.0, + "learning_rate": 5.403076923076923e-05, + "loss": 0.0466, + "step": 17400 + }, + { + "epoch": 1024.47, + "learning_rate": 5.351794871794872e-05, + "loss": 0.046, + "step": 17425 + }, + { + "epoch": 1025.94, + "learning_rate": 5.300512820512821e-05, + "loss": 0.0475, + "step": 17450 + }, + { + "epoch": 1027.41, + "learning_rate": 5.249230769230769e-05, + "loss": 0.0461, + "step": 17475 + }, + { + "epoch": 1028.88, + "learning_rate": 5.1979487179487187e-05, + "loss": 0.0467, + "step": 17500 + }, + { + "epoch": 1030.35, + "learning_rate": 5.146666666666667e-05, + "loss": 0.0457, + "step": 17525 + }, + { + "epoch": 1031.82, + "learning_rate": 5.095384615384615e-05, + "loss": 0.0454, + "step": 17550 + }, + { + "epoch": 1033.29, + "learning_rate": 5.044102564102564e-05, + "loss": 0.0458, + "step": 17575 + }, + { + "epoch": 1034.76, + "learning_rate": 4.992820512820513e-05, + "loss": 0.0445, + "step": 17600 + }, + { + "epoch": 1036.24, + "learning_rate": 4.941538461538462e-05, + "loss": 0.0456, + "step": 17625 + }, + { + "epoch": 1037.71, + "learning_rate": 4.8902564102564106e-05, + "loss": 0.0442, + "step": 17650 + }, + { + "epoch": 1039.18, + "learning_rate": 4.838974358974359e-05, + "loss": 0.0438, + "step": 17675 + }, + { + "epoch": 1040.65, + "learning_rate": 4.787692307692308e-05, + "loss": 0.0443, + "step": 17700 + }, + { + "epoch": 1042.12, + "learning_rate": 4.7364102564102565e-05, + "loss": 0.0439, + "step": 17725 + }, + { + "epoch": 1043.59, + "learning_rate": 4.685128205128205e-05, + "loss": 0.0428, + "step": 17750 + }, + { + "epoch": 1045.06, + "learning_rate": 4.633846153846154e-05, + "loss": 0.0427, + "step": 17775 + }, + { + "epoch": 1046.53, + "learning_rate": 4.5825641025641025e-05, + "loss": 0.0418, + "step": 17800 + }, + { + "epoch": 1048.0, + "learning_rate": 4.531282051282051e-05, + "loss": 0.0416, + "step": 17825 + }, + { + "epoch": 1049.47, + "learning_rate": 4.4800000000000005e-05, + "loss": 0.0417, + "step": 17850 + }, + { + "epoch": 1050.94, + "learning_rate": 4.428717948717949e-05, + "loss": 0.0418, + "step": 17875 + }, + { + "epoch": 1052.41, + "learning_rate": 4.377435897435898e-05, + "loss": 0.0408, + "step": 17900 + }, + { + "epoch": 1053.88, + "learning_rate": 4.3261538461538464e-05, + "loss": 0.0412, + "step": 17925 + }, + { + "epoch": 1055.35, + "learning_rate": 4.274871794871795e-05, + "loss": 0.0407, + "step": 17950 + }, + { + "epoch": 1056.82, + "learning_rate": 4.223589743589744e-05, + "loss": 0.0408, + "step": 17975 + }, + { + "epoch": 1058.29, + "learning_rate": 4.1723076923076924e-05, + "loss": 0.0405, + "step": 18000 + }, + { + "epoch": 1058.29, + "eval_loss": 5.7265625, + "eval_runtime": 164.1653, + "eval_samples_per_second": 1.657, + "eval_steps_per_second": 0.104, + "eval_wer": 119.38150074294205, + "step": 18000 + }, + { + "epoch": 1059.47, + "learning_rate": 0.00014558545454545455, + "loss": 0.0491, + "step": 18025 + }, + { + "epoch": 1060.94, + "learning_rate": 0.00014522181818181818, + "loss": 0.1303, + "step": 18050 + }, + { + "epoch": 1062.41, + "learning_rate": 0.00014485818181818184, + "loss": 0.2144, + "step": 18075 + }, + { + "epoch": 1063.88, + "learning_rate": 0.00014449454545454547, + "loss": 0.1902, + "step": 18100 + }, + { + "epoch": 1065.35, + "learning_rate": 0.0001441309090909091, + "loss": 0.1509, + "step": 18125 + }, + { + "epoch": 1066.82, + "learning_rate": 0.00014376727272727273, + "loss": 0.136, + "step": 18150 + }, + { + "epoch": 1068.29, + "learning_rate": 0.00014340363636363636, + "loss": 0.1177, + "step": 18175 + }, + { + "epoch": 1069.76, + "learning_rate": 0.00014303999999999999, + "loss": 0.1049, + "step": 18200 + }, + { + "epoch": 1071.24, + "learning_rate": 0.00014267636363636364, + "loss": 0.0927, + "step": 18225 + }, + { + "epoch": 1072.71, + "learning_rate": 0.0001423127272727273, + "loss": 0.0821, + "step": 18250 + }, + { + "epoch": 1074.18, + "learning_rate": 0.00014194909090909093, + "loss": 0.07, + "step": 18275 + }, + { + "epoch": 1075.65, + "learning_rate": 0.00014158545454545456, + "loss": 0.0688, + "step": 18300 + }, + { + "epoch": 1077.12, + "learning_rate": 0.0001412218181818182, + "loss": 0.0611, + "step": 18325 + }, + { + "epoch": 1078.59, + "learning_rate": 0.00014085818181818182, + "loss": 0.0577, + "step": 18350 + }, + { + "epoch": 1080.06, + "learning_rate": 0.00014049454545454545, + "loss": 0.0576, + "step": 18375 + }, + { + "epoch": 1081.53, + "learning_rate": 0.00014013090909090908, + "loss": 0.0519, + "step": 18400 + }, + { + "epoch": 1083.0, + "learning_rate": 0.00013976727272727274, + "loss": 0.0576, + "step": 18425 + }, + { + "epoch": 1084.47, + "learning_rate": 0.0001394036363636364, + "loss": 0.0557, + "step": 18450 + }, + { + "epoch": 1085.94, + "learning_rate": 0.00013904000000000002, + "loss": 0.0524, + "step": 18475 + }, + { + "epoch": 1087.41, + "learning_rate": 0.00013867636363636365, + "loss": 0.0501, + "step": 18500 + }, + { + "epoch": 1088.88, + "learning_rate": 0.00013831272727272728, + "loss": 0.0485, + "step": 18525 + }, + { + "epoch": 1090.35, + "learning_rate": 0.00013794909090909091, + "loss": 0.0494, + "step": 18550 + }, + { + "epoch": 1091.82, + "learning_rate": 0.00013758545454545454, + "loss": 0.0484, + "step": 18575 + }, + { + "epoch": 1093.29, + "learning_rate": 0.00013722181818181817, + "loss": 0.0475, + "step": 18600 + }, + { + "epoch": 1094.76, + "learning_rate": 0.00013685818181818183, + "loss": 0.0474, + "step": 18625 + }, + { + "epoch": 1096.24, + "learning_rate": 0.00013649454545454546, + "loss": 0.0457, + "step": 18650 + }, + { + "epoch": 1097.71, + "learning_rate": 0.0001361309090909091, + "loss": 0.0463, + "step": 18675 + }, + { + "epoch": 1099.18, + "learning_rate": 0.00013576727272727275, + "loss": 0.0466, + "step": 18700 + }, + { + "epoch": 1100.65, + "learning_rate": 0.00013540363636363638, + "loss": 0.0457, + "step": 18725 + }, + { + "epoch": 1102.12, + "learning_rate": 0.00013504, + "loss": 0.0448, + "step": 18750 + }, + { + "epoch": 1103.59, + "learning_rate": 0.00013467636363636364, + "loss": 0.0439, + "step": 18775 + }, + { + "epoch": 1105.06, + "learning_rate": 0.00013431272727272727, + "loss": 0.0448, + "step": 18800 + }, + { + "epoch": 1106.53, + "learning_rate": 0.00013394909090909093, + "loss": 0.0444, + "step": 18825 + }, + { + "epoch": 1108.0, + "learning_rate": 0.00013358545454545456, + "loss": 0.044, + "step": 18850 + }, + { + "epoch": 1109.47, + "learning_rate": 0.00013322181818181819, + "loss": 0.0469, + "step": 18875 + }, + { + "epoch": 1110.94, + "learning_rate": 0.00013285818181818182, + "loss": 0.0454, + "step": 18900 + }, + { + "epoch": 1112.41, + "learning_rate": 0.00013249454545454547, + "loss": 0.0461, + "step": 18925 + }, + { + "epoch": 1113.88, + "learning_rate": 0.0001321309090909091, + "loss": 0.0456, + "step": 18950 + }, + { + "epoch": 1115.35, + "learning_rate": 0.00013176727272727273, + "loss": 0.0435, + "step": 18975 + }, + { + "epoch": 1116.82, + "learning_rate": 0.00013140363636363636, + "loss": 0.043, + "step": 19000 + }, + { + "epoch": 1116.82, + "eval_loss": 6.1875, + "eval_runtime": 182.8966, + "eval_samples_per_second": 1.487, + "eval_steps_per_second": 0.093, + "eval_wer": 121.69390787518573, + "step": 19000 + } + ], + "max_steps": 28000, + "num_train_epochs": 1648, + "total_flos": 5.868039282110591e+20, + "trial_name": null, + "trial_params": null +}