|
{ |
|
"best_metric": 0.05613677854233956, |
|
"best_model_checkpoint": "./whisper-large-v3-turbo-finetuned-lora/checkpoint-500", |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 11.033143997192383, |
|
"learning_rate": 8.333333333333334e-07, |
|
"loss": 3.7365, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 10.168129920959473, |
|
"learning_rate": 1.875e-06, |
|
"loss": 3.6756, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 10.131425857543945, |
|
"learning_rate": 2.916666666666667e-06, |
|
"loss": 3.6681, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 9.962166786193848, |
|
"learning_rate": 3.958333333333334e-06, |
|
"loss": 3.6567, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 9.637451171875, |
|
"learning_rate": 4.791666666666667e-06, |
|
"loss": 3.5903, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 9.27942943572998, |
|
"learning_rate": 5.833333333333334e-06, |
|
"loss": 3.4592, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 9.690427780151367, |
|
"learning_rate": 6.875e-06, |
|
"loss": 3.299, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 8.123926162719727, |
|
"learning_rate": 7.916666666666668e-06, |
|
"loss": 3.2058, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 6.938026428222656, |
|
"learning_rate": 8.958333333333334e-06, |
|
"loss": 3.0613, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 6.615925312042236, |
|
"learning_rate": 1e-05, |
|
"loss": 2.8859, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 5.712332725524902, |
|
"learning_rate": 1.1041666666666666e-05, |
|
"loss": 2.6746, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 4.229877471923828, |
|
"learning_rate": 1.2083333333333333e-05, |
|
"loss": 2.4948, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 3.9951322078704834, |
|
"learning_rate": 1.3125e-05, |
|
"loss": 2.3496, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 4.010512351989746, |
|
"learning_rate": 1.4166666666666666e-05, |
|
"loss": 2.2345, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 3.2869503498077393, |
|
"learning_rate": 1.5208333333333335e-05, |
|
"loss": 2.0418, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 3.47694993019104, |
|
"learning_rate": 1.6250000000000002e-05, |
|
"loss": 1.8212, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 2.761810779571533, |
|
"learning_rate": 1.7291666666666666e-05, |
|
"loss": 1.7471, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 2.83661150932312, |
|
"learning_rate": 1.8333333333333333e-05, |
|
"loss": 1.6647, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 2.7371621131896973, |
|
"learning_rate": 1.9375e-05, |
|
"loss": 1.5239, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 2.5980722904205322, |
|
"learning_rate": 2.0416666666666667e-05, |
|
"loss": 1.3501, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 2.8566689491271973, |
|
"learning_rate": 2.1458333333333334e-05, |
|
"loss": 1.3153, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 2.052793264389038, |
|
"learning_rate": 2.2499999999999998e-05, |
|
"loss": 1.175, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2.08168363571167, |
|
"learning_rate": 2.3541666666666665e-05, |
|
"loss": 1.0395, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.830390453338623, |
|
"learning_rate": 2.4583333333333332e-05, |
|
"loss": 0.9517, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 2.9608633518218994, |
|
"learning_rate": 2.5625e-05, |
|
"loss": 0.8602, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 2.2221925258636475, |
|
"learning_rate": 2.666666666666667e-05, |
|
"loss": 0.7687, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 2.185246706008911, |
|
"learning_rate": 2.7708333333333334e-05, |
|
"loss": 0.6957, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 2.3892769813537598, |
|
"learning_rate": 2.875e-05, |
|
"loss": 0.6452, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 2.2923471927642822, |
|
"learning_rate": 2.9791666666666668e-05, |
|
"loss": 0.6001, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 3.059990882873535, |
|
"learning_rate": 3.0833333333333335e-05, |
|
"loss": 0.5505, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 2.1722524166107178, |
|
"learning_rate": 3.1875e-05, |
|
"loss": 0.513, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 2.4212610721588135, |
|
"learning_rate": 3.291666666666666e-05, |
|
"loss": 0.4868, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 2.297727584838867, |
|
"learning_rate": 3.3958333333333337e-05, |
|
"loss": 0.4696, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.7377690076828003, |
|
"learning_rate": 3.5000000000000004e-05, |
|
"loss": 0.4174, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.821341872215271, |
|
"learning_rate": 3.6041666666666664e-05, |
|
"loss": 0.4204, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 2.0993902683258057, |
|
"learning_rate": 3.708333333333334e-05, |
|
"loss": 0.3846, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 2.5224227905273438, |
|
"learning_rate": 3.8125e-05, |
|
"loss": 0.3499, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.8540211915969849, |
|
"learning_rate": 3.916666666666667e-05, |
|
"loss": 0.3414, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.9813562631607056, |
|
"learning_rate": 4.020833333333333e-05, |
|
"loss": 0.3274, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.385871171951294, |
|
"learning_rate": 4.125e-05, |
|
"loss": 0.2907, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 2.0511081218719482, |
|
"learning_rate": 4.229166666666667e-05, |
|
"loss": 0.2735, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 2.0850329399108887, |
|
"learning_rate": 4.3333333333333334e-05, |
|
"loss": 0.2384, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 2.191450595855713, |
|
"learning_rate": 4.4375e-05, |
|
"loss": 0.2244, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 3.4809000492095947, |
|
"learning_rate": 4.541666666666667e-05, |
|
"loss": 0.223, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 1.4290976524353027, |
|
"learning_rate": 4.645833333333333e-05, |
|
"loss": 0.194, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 1.8528721332550049, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.1817, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.4630467891693115, |
|
"learning_rate": 4.854166666666666e-05, |
|
"loss": 0.1728, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.6305458545684814, |
|
"learning_rate": 4.958333333333334e-05, |
|
"loss": 0.1859, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 1.455244779586792, |
|
"learning_rate": 5.0625000000000004e-05, |
|
"loss": 0.1531, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.8049136400222778, |
|
"learning_rate": 5.1666666666666664e-05, |
|
"loss": 0.1412, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 1.577805519104004, |
|
"learning_rate": 5.270833333333334e-05, |
|
"loss": 0.1384, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 2.020803213119507, |
|
"learning_rate": 5.375e-05, |
|
"loss": 0.1398, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 1.4404263496398926, |
|
"learning_rate": 5.479166666666667e-05, |
|
"loss": 0.1484, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 2.497192621231079, |
|
"learning_rate": 5.583333333333333e-05, |
|
"loss": 0.1353, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 3.2407219409942627, |
|
"learning_rate": 5.6875e-05, |
|
"loss": 0.1433, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 1.4051156044006348, |
|
"learning_rate": 5.791666666666667e-05, |
|
"loss": 0.1181, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.1400000000000001, |
|
"grad_norm": 1.4117530584335327, |
|
"learning_rate": 5.8958333333333334e-05, |
|
"loss": 0.1252, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 1.6360172033309937, |
|
"learning_rate": 6e-05, |
|
"loss": 0.1233, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 1.225799560546875, |
|
"learning_rate": 6.104166666666667e-05, |
|
"loss": 0.1084, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 1.4874345064163208, |
|
"learning_rate": 6.208333333333333e-05, |
|
"loss": 0.125, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 1.3238331079483032, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.1132, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 2.354384183883667, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.0993, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 2.2216718196868896, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.1325, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 1.026408076286316, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.1035, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 2.0583767890930176, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.1208, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 1.9303004741668701, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.1119, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 1.7043157815933228, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.1023, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.3599999999999999, |
|
"grad_norm": 1.3245861530303955, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.1008, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 1.541318655014038, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.0977, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 1.8400285243988037, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.0887, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 1.0839234590530396, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.0912, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 1.269062876701355, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.0959, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 1.0546581745147705, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.0875, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 1.683465838432312, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.0933, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 1.470189094543457, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.0986, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 1.183585524559021, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.0979, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 1.3886022567749023, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.1031, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 1.105749487876892, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.0995, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 1.0494953393936157, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.0755, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 1.7028089761734009, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.0981, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 1.5404858589172363, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.0917, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.6400000000000001, |
|
"grad_norm": 1.6659576892852783, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.0891, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.6600000000000001, |
|
"grad_norm": 1.353579044342041, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.0889, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.6800000000000002, |
|
"grad_norm": 2.1539247035980225, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.1092, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 1.4106309413909912, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.0951, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 1.1167716979980469, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.0899, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 1.2029541730880737, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.0902, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 1.0979869365692139, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.089, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 1.1568419933319092, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.0859, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 1.2472410202026367, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.0877, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.8199999999999998, |
|
"grad_norm": 1.2323497533798218, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.0865, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.8399999999999999, |
|
"grad_norm": 1.2814995050430298, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.0831, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.8599999999999999, |
|
"grad_norm": 0.912714421749115, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.0813, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 1.2273714542388916, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.0884, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 1.5928541421890259, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.0852, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 0.9074931740760803, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.0792, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 0.9795681834220886, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.0781, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 1.4303114414215088, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.0757, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 1.0313260555267334, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.0881, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.1294418573379517, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.0785, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_cer": 0.05613677854233956, |
|
"eval_loss": 0.08199143409729004, |
|
"eval_runtime": 495.5562, |
|
"eval_samples_per_second": 2.018, |
|
"eval_steps_per_second": 0.504, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.376040517632e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|