whisper_base_fr / trainer_state.json
dearyoungjo's picture
Initial clean upload of whisper_base_fr
8cf428b
raw
history blame
26.2 kB
{
"best_metric": 23.795498749652683,
"best_model_checkpoint": "./checkpoint-4000",
"epoch": 526.3157894736842,
"eval_steps": 1000,
"global_step": 5000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 2.63,
"learning_rate": 4.4e-09,
"loss": 0.8459,
"step": 25
},
{
"epoch": 5.26,
"learning_rate": 9.4e-09,
"loss": 0.8681,
"step": 50
},
{
"epoch": 7.89,
"learning_rate": 1.4399999999999998e-08,
"loss": 0.8349,
"step": 75
},
{
"epoch": 10.53,
"learning_rate": 1.94e-08,
"loss": 0.8471,
"step": 100
},
{
"epoch": 13.16,
"learning_rate": 2.4399999999999997e-08,
"loss": 0.8299,
"step": 125
},
{
"epoch": 15.79,
"learning_rate": 2.94e-08,
"loss": 0.8228,
"step": 150
},
{
"epoch": 18.42,
"learning_rate": 3.44e-08,
"loss": 0.8015,
"step": 175
},
{
"epoch": 21.05,
"learning_rate": 3.94e-08,
"loss": 0.8041,
"step": 200
},
{
"epoch": 23.68,
"learning_rate": 4.44e-08,
"loss": 0.797,
"step": 225
},
{
"epoch": 26.32,
"learning_rate": 4.94e-08,
"loss": 0.7771,
"step": 250
},
{
"epoch": 28.95,
"learning_rate": 5.4400000000000004e-08,
"loss": 0.7623,
"step": 275
},
{
"epoch": 31.58,
"learning_rate": 5.9399999999999996e-08,
"loss": 0.7524,
"step": 300
},
{
"epoch": 34.21,
"learning_rate": 6.44e-08,
"loss": 0.7384,
"step": 325
},
{
"epoch": 36.84,
"learning_rate": 6.939999999999999e-08,
"loss": 0.715,
"step": 350
},
{
"epoch": 39.47,
"learning_rate": 7.439999999999999e-08,
"loss": 0.6994,
"step": 375
},
{
"epoch": 42.11,
"learning_rate": 7.94e-08,
"loss": 0.6721,
"step": 400
},
{
"epoch": 44.74,
"learning_rate": 8.439999999999999e-08,
"loss": 0.676,
"step": 425
},
{
"epoch": 47.37,
"learning_rate": 8.939999999999999e-08,
"loss": 0.6614,
"step": 450
},
{
"epoch": 50.0,
"learning_rate": 9.439999999999999e-08,
"loss": 0.625,
"step": 475
},
{
"epoch": 52.63,
"learning_rate": 9.939999999999999e-08,
"loss": 0.5845,
"step": 500
},
{
"epoch": 55.26,
"learning_rate": 9.95111111111111e-08,
"loss": 0.5787,
"step": 525
},
{
"epoch": 57.89,
"learning_rate": 9.895555555555555e-08,
"loss": 0.5624,
"step": 550
},
{
"epoch": 60.53,
"learning_rate": 9.84e-08,
"loss": 0.5237,
"step": 575
},
{
"epoch": 63.16,
"learning_rate": 9.784444444444444e-08,
"loss": 0.5334,
"step": 600
},
{
"epoch": 65.79,
"learning_rate": 9.728888888888889e-08,
"loss": 0.5064,
"step": 625
},
{
"epoch": 68.42,
"learning_rate": 9.673333333333334e-08,
"loss": 0.5078,
"step": 650
},
{
"epoch": 71.05,
"learning_rate": 9.617777777777777e-08,
"loss": 0.485,
"step": 675
},
{
"epoch": 73.68,
"learning_rate": 9.562222222222221e-08,
"loss": 0.4678,
"step": 700
},
{
"epoch": 76.32,
"learning_rate": 9.506666666666666e-08,
"loss": 0.4636,
"step": 725
},
{
"epoch": 78.95,
"learning_rate": 9.45111111111111e-08,
"loss": 0.4707,
"step": 750
},
{
"epoch": 81.58,
"learning_rate": 9.395555555555555e-08,
"loss": 0.4725,
"step": 775
},
{
"epoch": 84.21,
"learning_rate": 9.34e-08,
"loss": 0.4595,
"step": 800
},
{
"epoch": 86.84,
"learning_rate": 9.284444444444444e-08,
"loss": 0.4449,
"step": 825
},
{
"epoch": 89.47,
"learning_rate": 9.228888888888889e-08,
"loss": 0.4254,
"step": 850
},
{
"epoch": 92.11,
"learning_rate": 9.173333333333333e-08,
"loss": 0.4222,
"step": 875
},
{
"epoch": 94.74,
"learning_rate": 9.117777777777777e-08,
"loss": 0.4247,
"step": 900
},
{
"epoch": 97.37,
"learning_rate": 9.062222222222222e-08,
"loss": 0.4306,
"step": 925
},
{
"epoch": 100.0,
"learning_rate": 9.006666666666665e-08,
"loss": 0.4017,
"step": 950
},
{
"epoch": 102.63,
"learning_rate": 8.95111111111111e-08,
"loss": 0.4176,
"step": 975
},
{
"epoch": 105.26,
"learning_rate": 8.895555555555555e-08,
"loss": 0.3835,
"step": 1000
},
{
"epoch": 105.26,
"eval_loss": 0.5891541838645935,
"eval_runtime": 160.0944,
"eval_samples_per_second": 4.223,
"eval_steps_per_second": 0.137,
"eval_wer": 25.423728813559322,
"step": 1000
},
{
"epoch": 107.89,
"learning_rate": 8.84e-08,
"loss": 0.4049,
"step": 1025
},
{
"epoch": 110.53,
"learning_rate": 8.784444444444445e-08,
"loss": 0.396,
"step": 1050
},
{
"epoch": 113.16,
"learning_rate": 8.72888888888889e-08,
"loss": 0.3819,
"step": 1075
},
{
"epoch": 115.79,
"learning_rate": 8.673333333333333e-08,
"loss": 0.3813,
"step": 1100
},
{
"epoch": 118.42,
"learning_rate": 8.617777777777778e-08,
"loss": 0.3786,
"step": 1125
},
{
"epoch": 121.05,
"learning_rate": 8.562222222222221e-08,
"loss": 0.3623,
"step": 1150
},
{
"epoch": 123.68,
"learning_rate": 8.506666666666666e-08,
"loss": 0.3702,
"step": 1175
},
{
"epoch": 126.32,
"learning_rate": 8.451111111111111e-08,
"loss": 0.3578,
"step": 1200
},
{
"epoch": 128.95,
"learning_rate": 8.395555555555556e-08,
"loss": 0.3559,
"step": 1225
},
{
"epoch": 131.58,
"learning_rate": 8.339999999999999e-08,
"loss": 0.3548,
"step": 1250
},
{
"epoch": 134.21,
"learning_rate": 8.284444444444444e-08,
"loss": 0.345,
"step": 1275
},
{
"epoch": 136.84,
"learning_rate": 8.228888888888889e-08,
"loss": 0.3574,
"step": 1300
},
{
"epoch": 139.47,
"learning_rate": 8.173333333333334e-08,
"loss": 0.3467,
"step": 1325
},
{
"epoch": 142.11,
"learning_rate": 8.117777777777777e-08,
"loss": 0.3511,
"step": 1350
},
{
"epoch": 144.74,
"learning_rate": 8.062222222222221e-08,
"loss": 0.3376,
"step": 1375
},
{
"epoch": 147.37,
"learning_rate": 8.006666666666666e-08,
"loss": 0.3369,
"step": 1400
},
{
"epoch": 150.0,
"learning_rate": 7.95111111111111e-08,
"loss": 0.3302,
"step": 1425
},
{
"epoch": 152.63,
"learning_rate": 7.895555555555555e-08,
"loss": 0.3339,
"step": 1450
},
{
"epoch": 155.26,
"learning_rate": 7.84e-08,
"loss": 0.33,
"step": 1475
},
{
"epoch": 157.89,
"learning_rate": 7.784444444444445e-08,
"loss": 0.3285,
"step": 1500
},
{
"epoch": 160.53,
"learning_rate": 7.728888888888888e-08,
"loss": 0.3236,
"step": 1525
},
{
"epoch": 163.16,
"learning_rate": 7.673333333333333e-08,
"loss": 0.3167,
"step": 1550
},
{
"epoch": 165.79,
"learning_rate": 7.617777777777778e-08,
"loss": 0.3117,
"step": 1575
},
{
"epoch": 168.42,
"learning_rate": 7.562222222222222e-08,
"loss": 0.3191,
"step": 1600
},
{
"epoch": 171.05,
"learning_rate": 7.506666666666666e-08,
"loss": 0.3058,
"step": 1625
},
{
"epoch": 173.68,
"learning_rate": 7.45111111111111e-08,
"loss": 0.3177,
"step": 1650
},
{
"epoch": 176.32,
"learning_rate": 7.395555555555555e-08,
"loss": 0.3017,
"step": 1675
},
{
"epoch": 178.95,
"learning_rate": 7.34e-08,
"loss": 0.3059,
"step": 1700
},
{
"epoch": 181.58,
"learning_rate": 7.284444444444444e-08,
"loss": 0.3124,
"step": 1725
},
{
"epoch": 184.21,
"learning_rate": 7.228888888888889e-08,
"loss": 0.2987,
"step": 1750
},
{
"epoch": 186.84,
"learning_rate": 7.173333333333334e-08,
"loss": 0.2981,
"step": 1775
},
{
"epoch": 189.47,
"learning_rate": 7.117777777777778e-08,
"loss": 0.3084,
"step": 1800
},
{
"epoch": 192.11,
"learning_rate": 7.062222222222221e-08,
"loss": 0.2858,
"step": 1825
},
{
"epoch": 194.74,
"learning_rate": 7.006666666666666e-08,
"loss": 0.2978,
"step": 1850
},
{
"epoch": 197.37,
"learning_rate": 6.951111111111111e-08,
"loss": 0.2818,
"step": 1875
},
{
"epoch": 200.0,
"learning_rate": 6.895555555555556e-08,
"loss": 0.2902,
"step": 1900
},
{
"epoch": 202.63,
"learning_rate": 6.84e-08,
"loss": 0.2838,
"step": 1925
},
{
"epoch": 205.26,
"learning_rate": 6.784444444444444e-08,
"loss": 0.2992,
"step": 1950
},
{
"epoch": 207.89,
"learning_rate": 6.728888888888889e-08,
"loss": 0.2774,
"step": 1975
},
{
"epoch": 210.53,
"learning_rate": 6.673333333333334e-08,
"loss": 0.2837,
"step": 2000
},
{
"epoch": 210.53,
"eval_loss": 0.5525617003440857,
"eval_runtime": 144.9276,
"eval_samples_per_second": 4.664,
"eval_steps_per_second": 0.152,
"eval_wer": 23.895526535148655,
"step": 2000
},
{
"epoch": 213.16,
"learning_rate": 6.617777777777777e-08,
"loss": 0.2805,
"step": 2025
},
{
"epoch": 215.79,
"learning_rate": 6.562222222222222e-08,
"loss": 0.2759,
"step": 2050
},
{
"epoch": 218.42,
"learning_rate": 6.506666666666665e-08,
"loss": 0.2667,
"step": 2075
},
{
"epoch": 221.05,
"learning_rate": 6.45111111111111e-08,
"loss": 0.2777,
"step": 2100
},
{
"epoch": 223.68,
"learning_rate": 6.395555555555555e-08,
"loss": 0.2672,
"step": 2125
},
{
"epoch": 226.32,
"learning_rate": 6.34e-08,
"loss": 0.2739,
"step": 2150
},
{
"epoch": 228.95,
"learning_rate": 6.284444444444445e-08,
"loss": 0.2819,
"step": 2175
},
{
"epoch": 231.58,
"learning_rate": 6.22888888888889e-08,
"loss": 0.2646,
"step": 2200
},
{
"epoch": 234.21,
"learning_rate": 6.173333333333333e-08,
"loss": 0.2618,
"step": 2225
},
{
"epoch": 236.84,
"learning_rate": 6.117777777777778e-08,
"loss": 0.2711,
"step": 2250
},
{
"epoch": 239.47,
"learning_rate": 6.062222222222221e-08,
"loss": 0.2644,
"step": 2275
},
{
"epoch": 242.11,
"learning_rate": 6.006666666666666e-08,
"loss": 0.2494,
"step": 2300
},
{
"epoch": 244.74,
"learning_rate": 5.951111111111111e-08,
"loss": 0.2601,
"step": 2325
},
{
"epoch": 247.37,
"learning_rate": 5.895555555555556e-08,
"loss": 0.2538,
"step": 2350
},
{
"epoch": 250.0,
"learning_rate": 5.8399999999999994e-08,
"loss": 0.2551,
"step": 2375
},
{
"epoch": 252.63,
"learning_rate": 5.784444444444444e-08,
"loss": 0.2494,
"step": 2400
},
{
"epoch": 255.26,
"learning_rate": 5.7288888888888884e-08,
"loss": 0.2518,
"step": 2425
},
{
"epoch": 257.89,
"learning_rate": 5.673333333333333e-08,
"loss": 0.2455,
"step": 2450
},
{
"epoch": 260.53,
"learning_rate": 5.617777777777778e-08,
"loss": 0.2606,
"step": 2475
},
{
"epoch": 263.16,
"learning_rate": 5.5622222222222215e-08,
"loss": 0.2533,
"step": 2500
},
{
"epoch": 265.79,
"learning_rate": 5.5066666666666663e-08,
"loss": 0.2436,
"step": 2525
},
{
"epoch": 268.42,
"learning_rate": 5.4511111111111105e-08,
"loss": 0.2346,
"step": 2550
},
{
"epoch": 271.05,
"learning_rate": 5.3955555555555553e-08,
"loss": 0.2456,
"step": 2575
},
{
"epoch": 273.68,
"learning_rate": 5.34e-08,
"loss": 0.2462,
"step": 2600
},
{
"epoch": 276.32,
"learning_rate": 5.284444444444445e-08,
"loss": 0.24,
"step": 2625
},
{
"epoch": 278.95,
"learning_rate": 5.2288888888888885e-08,
"loss": 0.2466,
"step": 2650
},
{
"epoch": 281.58,
"learning_rate": 5.173333333333333e-08,
"loss": 0.24,
"step": 2675
},
{
"epoch": 284.21,
"learning_rate": 5.1177777777777775e-08,
"loss": 0.2419,
"step": 2700
},
{
"epoch": 286.84,
"learning_rate": 5.0622222222222223e-08,
"loss": 0.2366,
"step": 2725
},
{
"epoch": 289.47,
"learning_rate": 5.0066666666666665e-08,
"loss": 0.232,
"step": 2750
},
{
"epoch": 292.11,
"learning_rate": 4.9511111111111107e-08,
"loss": 0.235,
"step": 2775
},
{
"epoch": 294.74,
"learning_rate": 4.8955555555555555e-08,
"loss": 0.234,
"step": 2800
},
{
"epoch": 297.37,
"learning_rate": 4.8399999999999997e-08,
"loss": 0.2368,
"step": 2825
},
{
"epoch": 300.0,
"learning_rate": 4.7844444444444445e-08,
"loss": 0.2231,
"step": 2850
},
{
"epoch": 302.63,
"learning_rate": 4.7288888888888887e-08,
"loss": 0.224,
"step": 2875
},
{
"epoch": 305.26,
"learning_rate": 4.673333333333333e-08,
"loss": 0.2285,
"step": 2900
},
{
"epoch": 307.89,
"learning_rate": 4.6177777777777777e-08,
"loss": 0.2265,
"step": 2925
},
{
"epoch": 310.53,
"learning_rate": 4.562222222222222e-08,
"loss": 0.2238,
"step": 2950
},
{
"epoch": 313.16,
"learning_rate": 4.5066666666666667e-08,
"loss": 0.2224,
"step": 2975
},
{
"epoch": 315.79,
"learning_rate": 4.451111111111111e-08,
"loss": 0.2323,
"step": 3000
},
{
"epoch": 315.79,
"eval_loss": 0.5431821942329407,
"eval_runtime": 144.0249,
"eval_samples_per_second": 4.694,
"eval_steps_per_second": 0.153,
"eval_wer": 24.012225618227284,
"step": 3000
},
{
"epoch": 318.42,
"learning_rate": 4.395555555555555e-08,
"loss": 0.2231,
"step": 3025
},
{
"epoch": 321.05,
"learning_rate": 4.34e-08,
"loss": 0.2132,
"step": 3050
},
{
"epoch": 323.68,
"learning_rate": 4.2844444444444446e-08,
"loss": 0.2275,
"step": 3075
},
{
"epoch": 326.32,
"learning_rate": 4.228888888888889e-08,
"loss": 0.2149,
"step": 3100
},
{
"epoch": 328.95,
"learning_rate": 4.173333333333333e-08,
"loss": 0.214,
"step": 3125
},
{
"epoch": 331.58,
"learning_rate": 4.117777777777778e-08,
"loss": 0.2181,
"step": 3150
},
{
"epoch": 334.21,
"learning_rate": 4.062222222222222e-08,
"loss": 0.2153,
"step": 3175
},
{
"epoch": 336.84,
"learning_rate": 4.006666666666667e-08,
"loss": 0.2266,
"step": 3200
},
{
"epoch": 339.47,
"learning_rate": 3.951111111111111e-08,
"loss": 0.2154,
"step": 3225
},
{
"epoch": 342.11,
"learning_rate": 3.895555555555555e-08,
"loss": 0.2088,
"step": 3250
},
{
"epoch": 344.74,
"learning_rate": 3.84e-08,
"loss": 0.2104,
"step": 3275
},
{
"epoch": 347.37,
"learning_rate": 3.784444444444445e-08,
"loss": 0.2115,
"step": 3300
},
{
"epoch": 350.0,
"learning_rate": 3.728888888888888e-08,
"loss": 0.2103,
"step": 3325
},
{
"epoch": 352.63,
"learning_rate": 3.673333333333333e-08,
"loss": 0.209,
"step": 3350
},
{
"epoch": 355.26,
"learning_rate": 3.617777777777777e-08,
"loss": 0.216,
"step": 3375
},
{
"epoch": 357.89,
"learning_rate": 3.562222222222222e-08,
"loss": 0.2125,
"step": 3400
},
{
"epoch": 360.53,
"learning_rate": 3.506666666666667e-08,
"loss": 0.2161,
"step": 3425
},
{
"epoch": 363.16,
"learning_rate": 3.4511111111111105e-08,
"loss": 0.2148,
"step": 3450
},
{
"epoch": 365.79,
"learning_rate": 3.395555555555555e-08,
"loss": 0.2093,
"step": 3475
},
{
"epoch": 368.42,
"learning_rate": 3.34e-08,
"loss": 0.215,
"step": 3500
},
{
"epoch": 371.05,
"learning_rate": 3.284444444444444e-08,
"loss": 0.2142,
"step": 3525
},
{
"epoch": 373.68,
"learning_rate": 3.2288888888888885e-08,
"loss": 0.2069,
"step": 3550
},
{
"epoch": 376.32,
"learning_rate": 3.173333333333333e-08,
"loss": 0.2067,
"step": 3575
},
{
"epoch": 378.95,
"learning_rate": 3.1177777777777775e-08,
"loss": 0.2084,
"step": 3600
},
{
"epoch": 381.58,
"learning_rate": 3.062222222222222e-08,
"loss": 0.209,
"step": 3625
},
{
"epoch": 384.21,
"learning_rate": 3.006666666666667e-08,
"loss": 0.2139,
"step": 3650
},
{
"epoch": 386.84,
"learning_rate": 2.951111111111111e-08,
"loss": 0.2019,
"step": 3675
},
{
"epoch": 389.47,
"learning_rate": 2.8955555555555555e-08,
"loss": 0.1983,
"step": 3700
},
{
"epoch": 392.11,
"learning_rate": 2.8399999999999996e-08,
"loss": 0.2043,
"step": 3725
},
{
"epoch": 394.74,
"learning_rate": 2.784444444444444e-08,
"loss": 0.2044,
"step": 3750
},
{
"epoch": 397.37,
"learning_rate": 2.728888888888889e-08,
"loss": 0.2025,
"step": 3775
},
{
"epoch": 400.0,
"learning_rate": 2.673333333333333e-08,
"loss": 0.2036,
"step": 3800
},
{
"epoch": 402.63,
"learning_rate": 2.6177777777777776e-08,
"loss": 0.1942,
"step": 3825
},
{
"epoch": 405.26,
"learning_rate": 2.5622222222222224e-08,
"loss": 0.1977,
"step": 3850
},
{
"epoch": 407.89,
"learning_rate": 2.5066666666666663e-08,
"loss": 0.2042,
"step": 3875
},
{
"epoch": 410.53,
"learning_rate": 2.451111111111111e-08,
"loss": 0.1933,
"step": 3900
},
{
"epoch": 413.16,
"learning_rate": 2.3955555555555553e-08,
"loss": 0.1988,
"step": 3925
},
{
"epoch": 415.79,
"learning_rate": 2.34e-08,
"loss": 0.1943,
"step": 3950
},
{
"epoch": 418.42,
"learning_rate": 2.2844444444444443e-08,
"loss": 0.2033,
"step": 3975
},
{
"epoch": 421.05,
"learning_rate": 2.2288888888888888e-08,
"loss": 0.1961,
"step": 4000
},
{
"epoch": 421.05,
"eval_loss": 0.5402421951293945,
"eval_runtime": 143.5894,
"eval_samples_per_second": 4.708,
"eval_steps_per_second": 0.153,
"eval_wer": 23.795498749652683,
"step": 4000
},
{
"epoch": 423.68,
"learning_rate": 2.1733333333333333e-08,
"loss": 0.2009,
"step": 4025
},
{
"epoch": 426.32,
"learning_rate": 2.1177777777777778e-08,
"loss": 0.1961,
"step": 4050
},
{
"epoch": 428.95,
"learning_rate": 2.0622222222222223e-08,
"loss": 0.1987,
"step": 4075
},
{
"epoch": 431.58,
"learning_rate": 2.0066666666666664e-08,
"loss": 0.1951,
"step": 4100
},
{
"epoch": 434.21,
"learning_rate": 1.9533333333333333e-08,
"loss": 0.1914,
"step": 4125
},
{
"epoch": 436.84,
"learning_rate": 1.8977777777777775e-08,
"loss": 0.1955,
"step": 4150
},
{
"epoch": 439.47,
"learning_rate": 1.8422222222222223e-08,
"loss": 0.1967,
"step": 4175
},
{
"epoch": 442.11,
"learning_rate": 1.7866666666666665e-08,
"loss": 0.2011,
"step": 4200
},
{
"epoch": 444.74,
"learning_rate": 1.731111111111111e-08,
"loss": 0.1953,
"step": 4225
},
{
"epoch": 447.37,
"learning_rate": 1.6755555555555554e-08,
"loss": 0.1927,
"step": 4250
},
{
"epoch": 450.0,
"learning_rate": 1.62e-08,
"loss": 0.1918,
"step": 4275
},
{
"epoch": 452.63,
"learning_rate": 1.5644444444444444e-08,
"loss": 0.195,
"step": 4300
},
{
"epoch": 455.26,
"learning_rate": 1.5088888888888886e-08,
"loss": 0.1939,
"step": 4325
},
{
"epoch": 457.89,
"learning_rate": 1.4533333333333333e-08,
"loss": 0.1916,
"step": 4350
},
{
"epoch": 460.53,
"learning_rate": 1.3977777777777778e-08,
"loss": 0.1968,
"step": 4375
},
{
"epoch": 463.16,
"learning_rate": 1.3422222222222221e-08,
"loss": 0.193,
"step": 4400
},
{
"epoch": 465.79,
"learning_rate": 1.2866666666666668e-08,
"loss": 0.1888,
"step": 4425
},
{
"epoch": 468.42,
"learning_rate": 1.2311111111111111e-08,
"loss": 0.1858,
"step": 4450
},
{
"epoch": 471.05,
"learning_rate": 1.1755555555555554e-08,
"loss": 0.1872,
"step": 4475
},
{
"epoch": 473.68,
"learning_rate": 1.12e-08,
"loss": 0.1919,
"step": 4500
},
{
"epoch": 476.32,
"learning_rate": 1.0644444444444443e-08,
"loss": 0.1919,
"step": 4525
},
{
"epoch": 478.95,
"learning_rate": 1.0088888888888888e-08,
"loss": 0.1901,
"step": 4550
},
{
"epoch": 481.58,
"learning_rate": 9.533333333333334e-09,
"loss": 0.1947,
"step": 4575
},
{
"epoch": 484.21,
"learning_rate": 8.977777777777778e-09,
"loss": 0.189,
"step": 4600
},
{
"epoch": 486.84,
"learning_rate": 8.422222222222223e-09,
"loss": 0.1872,
"step": 4625
},
{
"epoch": 489.47,
"learning_rate": 7.866666666666666e-09,
"loss": 0.1848,
"step": 4650
},
{
"epoch": 492.11,
"learning_rate": 7.311111111111111e-09,
"loss": 0.1876,
"step": 4675
},
{
"epoch": 494.74,
"learning_rate": 6.755555555555554e-09,
"loss": 0.1911,
"step": 4700
},
{
"epoch": 497.37,
"learning_rate": 6.199999999999999e-09,
"loss": 0.1882,
"step": 4725
},
{
"epoch": 500.0,
"learning_rate": 5.644444444444444e-09,
"loss": 0.1872,
"step": 4750
},
{
"epoch": 502.63,
"learning_rate": 5.088888888888888e-09,
"loss": 0.1895,
"step": 4775
},
{
"epoch": 505.26,
"learning_rate": 4.533333333333333e-09,
"loss": 0.1895,
"step": 4800
},
{
"epoch": 507.89,
"learning_rate": 3.9777777777777776e-09,
"loss": 0.1834,
"step": 4825
},
{
"epoch": 510.53,
"learning_rate": 3.422222222222222e-09,
"loss": 0.1923,
"step": 4850
},
{
"epoch": 513.16,
"learning_rate": 2.8666666666666667e-09,
"loss": 0.1884,
"step": 4875
},
{
"epoch": 515.79,
"learning_rate": 2.311111111111111e-09,
"loss": 0.1834,
"step": 4900
},
{
"epoch": 518.42,
"learning_rate": 1.7555555555555556e-09,
"loss": 0.1964,
"step": 4925
},
{
"epoch": 521.05,
"learning_rate": 1.2e-09,
"loss": 0.1923,
"step": 4950
},
{
"epoch": 523.68,
"learning_rate": 6.444444444444445e-10,
"loss": 0.1866,
"step": 4975
},
{
"epoch": 526.32,
"learning_rate": 8.888888888888889e-11,
"loss": 0.1863,
"step": 5000
},
{
"epoch": 526.32,
"eval_loss": 0.5395069122314453,
"eval_runtime": 143.5193,
"eval_samples_per_second": 4.71,
"eval_steps_per_second": 0.153,
"eval_wer": 23.795498749652683,
"step": 5000
},
{
"epoch": 526.32,
"step": 5000,
"total_flos": 1.97317080317952e+19,
"train_loss": 0.31816294193267824,
"train_runtime": 12909.7746,
"train_samples_per_second": 24.787,
"train_steps_per_second": 0.387
}
],
"logging_steps": 25,
"max_steps": 5000,
"num_input_tokens_seen": 0,
"num_train_epochs": 556,
"save_steps": 1000,
"total_flos": 1.97317080317952e+19,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}