|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 30.0, |
|
"global_step": 27510, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.982e-05, |
|
"loss": 4.6277, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.9449092928544985e-05, |
|
"loss": 1.5086, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.889374305812662e-05, |
|
"loss": 0.7041, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 2.834061458718993e-05, |
|
"loss": 0.5814, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.7785264716771567e-05, |
|
"loss": 0.5086, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 2.7232136245834875e-05, |
|
"loss": 0.462, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 2.667789707515735e-05, |
|
"loss": 0.4247, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 2.6123657904479824e-05, |
|
"loss": 0.3919, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 2.556830803406146e-05, |
|
"loss": 0.3743, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 2.5014068863383933e-05, |
|
"loss": 0.3452, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"eval_loss": 0.38393500447273254, |
|
"eval_runtime": 207.46, |
|
"eval_samples_per_second": 59.009, |
|
"eval_steps_per_second": 1.846, |
|
"eval_wer": 0.4573501807782915, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 2.445871899296557e-05, |
|
"loss": 0.3357, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 2.3904479822288042e-05, |
|
"loss": 0.3131, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 2.334912995186968e-05, |
|
"loss": 0.3039, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 2.279489078119215e-05, |
|
"loss": 0.2879, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 2.2240651610514624e-05, |
|
"loss": 0.2772, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 2.1686412439837097e-05, |
|
"loss": 0.2649, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 2.1132173269159573e-05, |
|
"loss": 0.2596, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 9.81, |
|
"learning_rate": 2.057682339874121e-05, |
|
"loss": 0.2526, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 10.36, |
|
"learning_rate": 2.0021473528322843e-05, |
|
"loss": 0.2451, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 10.91, |
|
"learning_rate": 1.946834505738615e-05, |
|
"loss": 0.2466, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 10.91, |
|
"eval_loss": 0.40105822682380676, |
|
"eval_runtime": 206.4585, |
|
"eval_samples_per_second": 59.295, |
|
"eval_steps_per_second": 1.855, |
|
"eval_wer": 0.4067448569520906, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 11.45, |
|
"learning_rate": 1.8914105886708627e-05, |
|
"loss": 0.2564, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 1.8358756016290264e-05, |
|
"loss": 0.3219, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 12.54, |
|
"learning_rate": 1.7804516845612736e-05, |
|
"loss": 0.3501, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 13.09, |
|
"learning_rate": 1.7249166975194373e-05, |
|
"loss": 0.375, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 13.63, |
|
"learning_rate": 1.669381710477601e-05, |
|
"loss": 0.3621, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 14.18, |
|
"learning_rate": 1.614068863383932e-05, |
|
"loss": 0.4168, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 14.72, |
|
"learning_rate": 1.5585338763420955e-05, |
|
"loss": 0.515, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 15.27, |
|
"learning_rate": 1.5033320992225103e-05, |
|
"loss": 0.8026, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 15.81, |
|
"learning_rate": 1.4477971121806738e-05, |
|
"loss": 1.3623, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 16.36, |
|
"learning_rate": 1.3922621251388376e-05, |
|
"loss": 1.5753, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 16.36, |
|
"eval_loss": 1.2936806678771973, |
|
"eval_runtime": 205.5873, |
|
"eval_samples_per_second": 59.546, |
|
"eval_steps_per_second": 1.863, |
|
"eval_wer": 0.8843522846092795, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 16.9, |
|
"learning_rate": 1.3368382080710849e-05, |
|
"loss": 1.6596, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 17.45, |
|
"learning_rate": 1.2813032210292485e-05, |
|
"loss": 1.6553, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 17.99, |
|
"learning_rate": 1.2258793039614958e-05, |
|
"loss": 1.7339, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 18.54, |
|
"learning_rate": 1.1703443169196594e-05, |
|
"loss": 1.8133, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 19.08, |
|
"learning_rate": 1.1149203998519067e-05, |
|
"loss": 1.7555, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 19.63, |
|
"learning_rate": 1.059496482784154e-05, |
|
"loss": 1.7767, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 20.17, |
|
"learning_rate": 1.0039614957423176e-05, |
|
"loss": 1.8686, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 20.72, |
|
"learning_rate": 9.485375786745649e-06, |
|
"loss": 1.8748, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 21.26, |
|
"learning_rate": 8.931136616068122e-06, |
|
"loss": 1.9346, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 21.81, |
|
"learning_rate": 8.37578674564976e-06, |
|
"loss": 1.9454, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 21.81, |
|
"eval_loss": 1.822685718536377, |
|
"eval_runtime": 205.1767, |
|
"eval_samples_per_second": 59.666, |
|
"eval_steps_per_second": 1.867, |
|
"eval_wer": 0.9391778682019174, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 22.36, |
|
"learning_rate": 7.820436875231396e-06, |
|
"loss": 1.9335, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 22.9, |
|
"learning_rate": 7.267308404294706e-06, |
|
"loss": 1.9261, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 23.45, |
|
"learning_rate": 6.7130692336171785e-06, |
|
"loss": 1.9216, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 23.99, |
|
"learning_rate": 6.158830062939652e-06, |
|
"loss": 1.9228, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 24.54, |
|
"learning_rate": 5.604590892262125e-06, |
|
"loss": 1.9233, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 25.08, |
|
"learning_rate": 5.049241021843762e-06, |
|
"loss": 1.9197, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 25.63, |
|
"learning_rate": 4.495001851166235e-06, |
|
"loss": 1.9206, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 26.17, |
|
"learning_rate": 3.939651980747871e-06, |
|
"loss": 1.9235, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 26.72, |
|
"learning_rate": 3.384302110329508e-06, |
|
"loss": 1.9229, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 27.26, |
|
"learning_rate": 2.8311736393928178e-06, |
|
"loss": 1.922, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 27.26, |
|
"eval_loss": 1.8085108995437622, |
|
"eval_runtime": 206.76, |
|
"eval_samples_per_second": 59.209, |
|
"eval_steps_per_second": 1.852, |
|
"eval_wer": 0.9420627621915116, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 27.81, |
|
"learning_rate": 2.275823768974454e-06, |
|
"loss": 1.9211, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 28.35, |
|
"learning_rate": 1.7204738985560903e-06, |
|
"loss": 1.9205, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 28.9, |
|
"learning_rate": 1.1662347278785636e-06, |
|
"loss": 1.9253, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 29.44, |
|
"learning_rate": 6.119955572010366e-07, |
|
"loss": 1.92, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 29.99, |
|
"learning_rate": 5.6645686782673083e-08, |
|
"loss": 1.9231, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"step": 27510, |
|
"total_flos": 6.935706045316176e+19, |
|
"train_loss": 1.198725398988214, |
|
"train_runtime": 26753.1928, |
|
"train_samples_per_second": 65.77, |
|
"train_steps_per_second": 1.028 |
|
} |
|
], |
|
"max_steps": 27510, |
|
"num_train_epochs": 30, |
|
"total_flos": 6.935706045316176e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|