|
{ |
|
"best_metric": 0.5734274387359619, |
|
"best_model_checkpoint": "./Hubert-common_voice_JSUT-ja-demo-kana/checkpoint-10200", |
|
"epoch": 19.962245885769605, |
|
"eval_steps": 100, |
|
"global_step": 10320, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1936108422071636, |
|
"eval_cer": 6.16957909562534, |
|
"eval_loss": 41.956024169921875, |
|
"eval_runtime": 229.6586, |
|
"eval_samples_per_second": 23.779, |
|
"eval_steps_per_second": 2.974, |
|
"eval_wer": 1.5293902215711408, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3872216844143272, |
|
"eval_cer": 5.972248763190861, |
|
"eval_loss": 41.465721130371094, |
|
"eval_runtime": 227.3403, |
|
"eval_samples_per_second": 24.021, |
|
"eval_steps_per_second": 3.004, |
|
"eval_wer": 1.4235488005859733, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5808325266214908, |
|
"eval_cer": 3.732693519049338, |
|
"eval_loss": 40.27690505981445, |
|
"eval_runtime": 227.0532, |
|
"eval_samples_per_second": 24.052, |
|
"eval_steps_per_second": 3.008, |
|
"eval_wer": 1.1847646951107855, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7744433688286544, |
|
"eval_cer": 0.9962559540239647, |
|
"eval_loss": 36.3010139465332, |
|
"eval_runtime": 226.5945, |
|
"eval_samples_per_second": 24.1, |
|
"eval_steps_per_second": 3.014, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.968054211035818, |
|
"grad_norm": 145.3476104736328, |
|
"learning_rate": 1.188e-06, |
|
"loss": 31.4419, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.968054211035818, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 24.542619705200195, |
|
"eval_runtime": 252.7587, |
|
"eval_samples_per_second": 21.606, |
|
"eval_steps_per_second": 2.702, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.1606969990319458, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 18.864185333251953, |
|
"eval_runtime": 245.7063, |
|
"eval_samples_per_second": 22.226, |
|
"eval_steps_per_second": 2.78, |
|
"eval_wer": 1.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.3543078412391094, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 17.665119171142578, |
|
"eval_runtime": 248.5924, |
|
"eval_samples_per_second": 21.968, |
|
"eval_steps_per_second": 2.747, |
|
"eval_wer": 1.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.547918683446273, |
|
"eval_cer": 0.9991719217784573, |
|
"eval_loss": 17.200698852539062, |
|
"eval_runtime": 253.0263, |
|
"eval_samples_per_second": 21.583, |
|
"eval_steps_per_second": 2.699, |
|
"eval_wer": 1.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.7415295256534367, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 16.761735916137695, |
|
"eval_runtime": 259.4018, |
|
"eval_samples_per_second": 21.052, |
|
"eval_steps_per_second": 2.633, |
|
"eval_wer": 1.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.9351403678606003, |
|
"grad_norm": 84.73486328125, |
|
"learning_rate": 2.3880000000000003e-06, |
|
"loss": 14.8315, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.9351403678606003, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 16.289474487304688, |
|
"eval_runtime": 251.3459, |
|
"eval_samples_per_second": 21.727, |
|
"eval_steps_per_second": 2.717, |
|
"eval_wer": 1.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.127783155856728, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 15.787718772888184, |
|
"eval_runtime": 256.1973, |
|
"eval_samples_per_second": 21.316, |
|
"eval_steps_per_second": 2.666, |
|
"eval_wer": 1.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.3213939980638916, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 15.248761177062988, |
|
"eval_runtime": 250.6433, |
|
"eval_samples_per_second": 21.788, |
|
"eval_steps_per_second": 2.725, |
|
"eval_wer": 1.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.515004840271055, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 14.66799259185791, |
|
"eval_runtime": 257.3763, |
|
"eval_samples_per_second": 21.218, |
|
"eval_steps_per_second": 2.654, |
|
"eval_wer": 1.0, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.708615682478219, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 14.063650131225586, |
|
"eval_runtime": 244.7195, |
|
"eval_samples_per_second": 22.315, |
|
"eval_steps_per_second": 2.791, |
|
"eval_wer": 1.0, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.9022265246853824, |
|
"grad_norm": 72.20648193359375, |
|
"learning_rate": 3.588e-06, |
|
"loss": 12.4363, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.9022265246853824, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 13.421667098999023, |
|
"eval_runtime": 256.8698, |
|
"eval_samples_per_second": 21.26, |
|
"eval_steps_per_second": 2.659, |
|
"eval_wer": 1.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.09486931268151, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 12.737384796142578, |
|
"eval_runtime": 223.924, |
|
"eval_samples_per_second": 24.388, |
|
"eval_steps_per_second": 3.05, |
|
"eval_wer": 1.0, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.2884801548886737, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 12.031902313232422, |
|
"eval_runtime": 222.2009, |
|
"eval_samples_per_second": 24.577, |
|
"eval_steps_per_second": 3.074, |
|
"eval_wer": 1.0, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.4820909970958374, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 11.298233032226562, |
|
"eval_runtime": 225.5639, |
|
"eval_samples_per_second": 24.21, |
|
"eval_steps_per_second": 3.028, |
|
"eval_wer": 1.0, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.675701839303001, |
|
"eval_cer": 0.9991719217784573, |
|
"eval_loss": 10.557957649230957, |
|
"eval_runtime": 227.931, |
|
"eval_samples_per_second": 23.959, |
|
"eval_steps_per_second": 2.997, |
|
"eval_wer": 1.0, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.8693126815101646, |
|
"grad_norm": 87.52015686035156, |
|
"learning_rate": 4.788e-06, |
|
"loss": 9.8267, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.8693126815101646, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 9.812894821166992, |
|
"eval_runtime": 227.4635, |
|
"eval_samples_per_second": 24.008, |
|
"eval_steps_per_second": 3.003, |
|
"eval_wer": 1.0, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.061955469506293, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 9.064043998718262, |
|
"eval_runtime": 223.8694, |
|
"eval_samples_per_second": 24.394, |
|
"eval_steps_per_second": 3.051, |
|
"eval_wer": 1.0, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 4.255566311713456, |
|
"eval_cer": 0.9991719217784573, |
|
"eval_loss": 8.337604522705078, |
|
"eval_runtime": 224.9536, |
|
"eval_samples_per_second": 24.276, |
|
"eval_steps_per_second": 3.036, |
|
"eval_wer": 1.0, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 4.44917715392062, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 7.628673076629639, |
|
"eval_runtime": 228.2923, |
|
"eval_samples_per_second": 23.921, |
|
"eval_steps_per_second": 2.992, |
|
"eval_wer": 1.0, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 4.642787996127783, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 6.967807769775391, |
|
"eval_runtime": 228.5115, |
|
"eval_samples_per_second": 23.898, |
|
"eval_steps_per_second": 2.989, |
|
"eval_wer": 1.0, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.836398838334947, |
|
"grad_norm": 34.805992126464844, |
|
"learning_rate": 5.988e-06, |
|
"loss": 6.9778, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 4.836398838334947, |
|
"eval_cer": 0.9991719217784573, |
|
"eval_loss": 6.363549709320068, |
|
"eval_runtime": 227.6007, |
|
"eval_samples_per_second": 23.994, |
|
"eval_steps_per_second": 3.001, |
|
"eval_wer": 1.0, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.029041626331074, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 5.8258442878723145, |
|
"eval_runtime": 265.7388, |
|
"eval_samples_per_second": 20.55, |
|
"eval_steps_per_second": 2.57, |
|
"eval_wer": 1.0, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 5.2226524685382385, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 5.367654800415039, |
|
"eval_runtime": 226.9427, |
|
"eval_samples_per_second": 24.063, |
|
"eval_steps_per_second": 3.01, |
|
"eval_wer": 1.0, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 5.416263310745402, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 4.988757133483887, |
|
"eval_runtime": 227.6207, |
|
"eval_samples_per_second": 23.992, |
|
"eval_steps_per_second": 3.001, |
|
"eval_wer": 1.0, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 5.609874152952566, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 4.695638656616211, |
|
"eval_runtime": 230.3237, |
|
"eval_samples_per_second": 23.71, |
|
"eval_steps_per_second": 2.965, |
|
"eval_wer": 1.0, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 5.803484995159729, |
|
"grad_norm": 3.1527926921844482, |
|
"learning_rate": 7.1880000000000005e-06, |
|
"loss": 4.8731, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.803484995159729, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 4.478826999664307, |
|
"eval_runtime": 234.3555, |
|
"eval_samples_per_second": 23.302, |
|
"eval_steps_per_second": 2.914, |
|
"eval_wer": 1.0, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 5.997095837366892, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 4.328735828399658, |
|
"eval_runtime": 226.0428, |
|
"eval_samples_per_second": 24.159, |
|
"eval_steps_per_second": 3.022, |
|
"eval_wer": 1.0, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 6.18973862536302, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 4.20569372177124, |
|
"eval_runtime": 227.3968, |
|
"eval_samples_per_second": 24.015, |
|
"eval_steps_per_second": 3.004, |
|
"eval_wer": 1.0, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 6.383349467570184, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 4.1447553634643555, |
|
"eval_runtime": 226.8138, |
|
"eval_samples_per_second": 24.077, |
|
"eval_steps_per_second": 3.011, |
|
"eval_wer": 1.0, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 6.5769603097773475, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 4.109466552734375, |
|
"eval_runtime": 223.6971, |
|
"eval_samples_per_second": 24.412, |
|
"eval_steps_per_second": 3.053, |
|
"eval_wer": 1.0, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 6.770571151984511, |
|
"grad_norm": 1.822492241859436, |
|
"learning_rate": 8.388e-06, |
|
"loss": 4.1216, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 6.770571151984511, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 4.085766792297363, |
|
"eval_runtime": 228.1097, |
|
"eval_samples_per_second": 23.94, |
|
"eval_steps_per_second": 2.994, |
|
"eval_wer": 1.0, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 6.964181994191675, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 4.072465896606445, |
|
"eval_runtime": 276.0608, |
|
"eval_samples_per_second": 19.782, |
|
"eval_steps_per_second": 2.474, |
|
"eval_wer": 1.0, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 7.156824782187803, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 4.064761161804199, |
|
"eval_runtime": 223.6339, |
|
"eval_samples_per_second": 24.419, |
|
"eval_steps_per_second": 3.054, |
|
"eval_wer": 1.0, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 7.350435624394966, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 4.057798385620117, |
|
"eval_runtime": 223.5104, |
|
"eval_samples_per_second": 24.433, |
|
"eval_steps_per_second": 3.056, |
|
"eval_wer": 1.0, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 7.54404646660213, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 4.0493927001953125, |
|
"eval_runtime": 227.6334, |
|
"eval_samples_per_second": 23.99, |
|
"eval_steps_per_second": 3.0, |
|
"eval_wer": 1.0, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 7.737657308809293, |
|
"grad_norm": 0.8575032949447632, |
|
"learning_rate": 9.588e-06, |
|
"loss": 4.0264, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 7.737657308809293, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 4.036725044250488, |
|
"eval_runtime": 232.0149, |
|
"eval_samples_per_second": 23.537, |
|
"eval_steps_per_second": 2.944, |
|
"eval_wer": 1.0, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 7.931268151016457, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 4.027583122253418, |
|
"eval_runtime": 228.3311, |
|
"eval_samples_per_second": 23.917, |
|
"eval_steps_per_second": 2.991, |
|
"eval_wer": 1.0, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 8.123910939012585, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 4.012051582336426, |
|
"eval_runtime": 230.306, |
|
"eval_samples_per_second": 23.712, |
|
"eval_steps_per_second": 2.966, |
|
"eval_wer": 1.0, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 8.317521781219748, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 3.9720146656036377, |
|
"eval_runtime": 232.5487, |
|
"eval_samples_per_second": 23.483, |
|
"eval_steps_per_second": 2.937, |
|
"eval_wer": 1.0, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 8.511132623426912, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 3.903093099594116, |
|
"eval_runtime": 225.8081, |
|
"eval_samples_per_second": 24.184, |
|
"eval_steps_per_second": 3.025, |
|
"eval_wer": 1.0, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 8.704743465634076, |
|
"grad_norm": 1.4530484676361084, |
|
"learning_rate": 1.0787999999999999e-05, |
|
"loss": 3.937, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 8.704743465634076, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 3.8090686798095703, |
|
"eval_runtime": 230.0214, |
|
"eval_samples_per_second": 23.741, |
|
"eval_steps_per_second": 2.969, |
|
"eval_wer": 1.0, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 8.89835430784124, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 3.6690022945404053, |
|
"eval_runtime": 229.4694, |
|
"eval_samples_per_second": 23.798, |
|
"eval_steps_per_second": 2.976, |
|
"eval_wer": 1.0, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 9.090997095837366, |
|
"eval_cer": 0.9991365338202717, |
|
"eval_loss": 3.475857973098755, |
|
"eval_runtime": 229.5875, |
|
"eval_samples_per_second": 23.786, |
|
"eval_steps_per_second": 2.975, |
|
"eval_wer": 1.0, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 9.28460793804453, |
|
"eval_cer": 0.998676490363859, |
|
"eval_loss": 3.2108352184295654, |
|
"eval_runtime": 224.0108, |
|
"eval_samples_per_second": 24.378, |
|
"eval_steps_per_second": 3.049, |
|
"eval_wer": 1.0, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 9.478218780251694, |
|
"eval_cer": 0.6453277278807568, |
|
"eval_loss": 2.681295394897461, |
|
"eval_runtime": 226.867, |
|
"eval_samples_per_second": 24.071, |
|
"eval_steps_per_second": 3.011, |
|
"eval_wer": 1.0, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 9.671829622458858, |
|
"grad_norm": 13.226855278015137, |
|
"learning_rate": 1.1988000000000001e-05, |
|
"loss": 3.1866, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 9.671829622458858, |
|
"eval_cer": 0.5372387483986949, |
|
"eval_loss": 2.3876163959503174, |
|
"eval_runtime": 226.669, |
|
"eval_samples_per_second": 24.092, |
|
"eval_steps_per_second": 3.013, |
|
"eval_wer": 1.000183116645303, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 9.86544046466602, |
|
"eval_cer": 0.4901869191951363, |
|
"eval_loss": 2.16782808303833, |
|
"eval_runtime": 225.6765, |
|
"eval_samples_per_second": 24.198, |
|
"eval_steps_per_second": 3.026, |
|
"eval_wer": 1.0, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 10.058083252662149, |
|
"eval_cer": 0.4530012527337198, |
|
"eval_loss": 1.9945032596588135, |
|
"eval_runtime": 231.2758, |
|
"eval_samples_per_second": 23.612, |
|
"eval_steps_per_second": 2.953, |
|
"eval_wer": 1.000183116645303, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 10.251694094869313, |
|
"eval_cer": 0.4269698706924008, |
|
"eval_loss": 1.8575785160064697, |
|
"eval_runtime": 226.1334, |
|
"eval_samples_per_second": 24.149, |
|
"eval_steps_per_second": 3.02, |
|
"eval_wer": 1.0, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 10.445304937076477, |
|
"eval_cer": 0.43989355302177774, |
|
"eval_loss": 1.7787507772445679, |
|
"eval_runtime": 226.9349, |
|
"eval_samples_per_second": 24.064, |
|
"eval_steps_per_second": 3.01, |
|
"eval_wer": 1.0, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 10.63891577928364, |
|
"grad_norm": 8.360028266906738, |
|
"learning_rate": 1.3188e-05, |
|
"loss": 1.9458, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 10.63891577928364, |
|
"eval_cer": 0.40938205547416323, |
|
"eval_loss": 1.6519718170166016, |
|
"eval_runtime": 227.1281, |
|
"eval_samples_per_second": 24.044, |
|
"eval_steps_per_second": 3.007, |
|
"eval_wer": 1.0, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 10.832526621490803, |
|
"eval_cer": 0.38743444380746117, |
|
"eval_loss": 1.5545194149017334, |
|
"eval_runtime": 225.52, |
|
"eval_samples_per_second": 24.215, |
|
"eval_steps_per_second": 3.029, |
|
"eval_wer": 1.0, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 11.025169409486931, |
|
"eval_cer": 0.3800312829550361, |
|
"eval_loss": 1.4698398113250732, |
|
"eval_runtime": 230.154, |
|
"eval_samples_per_second": 23.728, |
|
"eval_steps_per_second": 2.968, |
|
"eval_wer": 1.0, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 11.218780251694096, |
|
"eval_cer": 0.3777310656729728, |
|
"eval_loss": 1.4052294492721558, |
|
"eval_runtime": 229.123, |
|
"eval_samples_per_second": 23.834, |
|
"eval_steps_per_second": 2.981, |
|
"eval_wer": 1.0, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 11.412391093901258, |
|
"eval_cer": 0.3658124013560666, |
|
"eval_loss": 1.3275731801986694, |
|
"eval_runtime": 230.2917, |
|
"eval_samples_per_second": 23.713, |
|
"eval_steps_per_second": 2.966, |
|
"eval_wer": 1.0, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 11.606001936108422, |
|
"grad_norm": 4.284916877746582, |
|
"learning_rate": 1.4388000000000002e-05, |
|
"loss": 1.4263, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 11.606001936108422, |
|
"eval_cer": 0.36683865214344863, |
|
"eval_loss": 1.2710145711898804, |
|
"eval_runtime": 237.3574, |
|
"eval_samples_per_second": 23.008, |
|
"eval_steps_per_second": 2.878, |
|
"eval_wer": 1.0, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 11.799612778315586, |
|
"eval_cer": 0.35358940059876426, |
|
"eval_loss": 1.2150152921676636, |
|
"eval_runtime": 229.8802, |
|
"eval_samples_per_second": 23.756, |
|
"eval_steps_per_second": 2.971, |
|
"eval_wer": 1.0, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 11.99322362052275, |
|
"eval_cer": 0.3531364347339887, |
|
"eval_loss": 1.1585793495178223, |
|
"eval_runtime": 229.1811, |
|
"eval_samples_per_second": 23.828, |
|
"eval_steps_per_second": 2.98, |
|
"eval_wer": 1.0, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 12.185866408518876, |
|
"eval_cer": 0.3518553906476704, |
|
"eval_loss": 1.1155860424041748, |
|
"eval_runtime": 229.1686, |
|
"eval_samples_per_second": 23.83, |
|
"eval_steps_per_second": 2.98, |
|
"eval_wer": 1.0, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 12.37947725072604, |
|
"eval_cer": 0.34836613797057137, |
|
"eval_loss": 1.0729304552078247, |
|
"eval_runtime": 235.2642, |
|
"eval_samples_per_second": 23.212, |
|
"eval_steps_per_second": 2.903, |
|
"eval_wer": 1.0, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 12.573088092933205, |
|
"grad_norm": 7.104965686798096, |
|
"learning_rate": 1.5588e-05, |
|
"loss": 1.1212, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 12.573088092933205, |
|
"eval_cer": 0.3466533607943889, |
|
"eval_loss": 1.0344808101654053, |
|
"eval_runtime": 229.0405, |
|
"eval_samples_per_second": 23.843, |
|
"eval_steps_per_second": 2.982, |
|
"eval_wer": 1.0, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 12.766698935140369, |
|
"eval_cer": 0.3428314613103453, |
|
"eval_loss": 0.988746166229248, |
|
"eval_runtime": 228.7064, |
|
"eval_samples_per_second": 23.878, |
|
"eval_steps_per_second": 2.986, |
|
"eval_wer": 1.0, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 12.96030977734753, |
|
"eval_cer": 0.3416636586902209, |
|
"eval_loss": 0.963031530380249, |
|
"eval_runtime": 229.2204, |
|
"eval_samples_per_second": 23.824, |
|
"eval_steps_per_second": 2.98, |
|
"eval_wer": 1.0, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 13.152952565343659, |
|
"eval_cer": 0.3380753197302022, |
|
"eval_loss": 0.9259727597236633, |
|
"eval_runtime": 230.8473, |
|
"eval_samples_per_second": 23.656, |
|
"eval_steps_per_second": 2.959, |
|
"eval_wer": 1.0, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 13.346563407550823, |
|
"eval_cer": 0.3397102433983764, |
|
"eval_loss": 0.9005178809165955, |
|
"eval_runtime": 229.79, |
|
"eval_samples_per_second": 23.765, |
|
"eval_steps_per_second": 2.972, |
|
"eval_wer": 1.0, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 13.540174249757987, |
|
"grad_norm": 8.212745666503906, |
|
"learning_rate": 1.6788e-05, |
|
"loss": 0.9141, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 13.540174249757987, |
|
"eval_cer": 0.33693582747662626, |
|
"eval_loss": 0.8763672709465027, |
|
"eval_runtime": 233.0771, |
|
"eval_samples_per_second": 23.43, |
|
"eval_steps_per_second": 2.93, |
|
"eval_wer": 1.0, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 13.73378509196515, |
|
"eval_cer": 0.3362776114543743, |
|
"eval_loss": 0.8511508703231812, |
|
"eval_runtime": 228.4896, |
|
"eval_samples_per_second": 23.9, |
|
"eval_steps_per_second": 2.989, |
|
"eval_wer": 1.0, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 13.927395934172313, |
|
"eval_cer": 0.3351098088342499, |
|
"eval_loss": 0.8273207545280457, |
|
"eval_runtime": 228.9007, |
|
"eval_samples_per_second": 23.858, |
|
"eval_steps_per_second": 2.984, |
|
"eval_wer": 1.0, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 14.120038722168442, |
|
"eval_cer": 0.3328520571020093, |
|
"eval_loss": 0.8083305358886719, |
|
"eval_runtime": 230.4678, |
|
"eval_samples_per_second": 23.695, |
|
"eval_steps_per_second": 2.964, |
|
"eval_wer": 1.0, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 14.313649564375606, |
|
"eval_cer": 0.33002102044716225, |
|
"eval_loss": 0.7850707769393921, |
|
"eval_runtime": 234.2407, |
|
"eval_samples_per_second": 23.314, |
|
"eval_steps_per_second": 2.916, |
|
"eval_wer": 0.999816883354697, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 14.507260406582768, |
|
"grad_norm": 5.25850772857666, |
|
"learning_rate": 1.7988e-05, |
|
"loss": 0.7811, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 14.507260406582768, |
|
"eval_cer": 0.3312312886171094, |
|
"eval_loss": 0.7742574214935303, |
|
"eval_runtime": 228.7569, |
|
"eval_samples_per_second": 23.873, |
|
"eval_steps_per_second": 2.986, |
|
"eval_wer": 1.0, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 14.700871248789932, |
|
"eval_cer": 0.3272112165672265, |
|
"eval_loss": 0.7509779334068298, |
|
"eval_runtime": 225.7839, |
|
"eval_samples_per_second": 24.187, |
|
"eval_steps_per_second": 3.025, |
|
"eval_wer": 0.999816883354697, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 14.894482090997096, |
|
"eval_cer": 0.3267299403359025, |
|
"eval_loss": 0.7366129159927368, |
|
"eval_runtime": 234.848, |
|
"eval_samples_per_second": 23.253, |
|
"eval_steps_per_second": 2.908, |
|
"eval_wer": 1.0, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 15.087124878993224, |
|
"eval_cer": 0.3253427323750274, |
|
"eval_loss": 0.7289799451828003, |
|
"eval_runtime": 233.7629, |
|
"eval_samples_per_second": 23.361, |
|
"eval_steps_per_second": 2.922, |
|
"eval_wer": 1.0, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 15.280735721200386, |
|
"eval_cer": 0.3247199043109611, |
|
"eval_loss": 0.7132401466369629, |
|
"eval_runtime": 240.827, |
|
"eval_samples_per_second": 22.676, |
|
"eval_steps_per_second": 2.836, |
|
"eval_wer": 1.0, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 15.47434656340755, |
|
"grad_norm": 13.825493812561035, |
|
"learning_rate": 1.9188e-05, |
|
"loss": 0.6725, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 15.47434656340755, |
|
"eval_cer": 0.3276924927985505, |
|
"eval_loss": 0.7190116047859192, |
|
"eval_runtime": 231.4321, |
|
"eval_samples_per_second": 23.597, |
|
"eval_steps_per_second": 2.951, |
|
"eval_wer": 1.0, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 15.667957405614715, |
|
"eval_cer": 0.3241253866134432, |
|
"eval_loss": 0.7005703449249268, |
|
"eval_runtime": 229.0636, |
|
"eval_samples_per_second": 23.841, |
|
"eval_steps_per_second": 2.982, |
|
"eval_wer": 1.0, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 15.861568247821879, |
|
"eval_cer": 0.322554161270003, |
|
"eval_loss": 0.683487594127655, |
|
"eval_runtime": 230.5943, |
|
"eval_samples_per_second": 23.682, |
|
"eval_steps_per_second": 2.962, |
|
"eval_wer": 1.0, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 16.054211035818007, |
|
"eval_cer": 0.32088384964364325, |
|
"eval_loss": 0.6697654724121094, |
|
"eval_runtime": 253.1459, |
|
"eval_samples_per_second": 21.573, |
|
"eval_steps_per_second": 2.698, |
|
"eval_wer": 0.999816883354697, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 16.24782187802517, |
|
"eval_cer": 0.3213722034666044, |
|
"eval_loss": 0.6627610921859741, |
|
"eval_runtime": 228.1606, |
|
"eval_samples_per_second": 23.935, |
|
"eval_steps_per_second": 2.994, |
|
"eval_wer": 0.999816883354697, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 16.44143272023233, |
|
"grad_norm": 4.491298675537109, |
|
"learning_rate": 2.0388e-05, |
|
"loss": 0.606, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 16.44143272023233, |
|
"eval_cer": 0.320508737286876, |
|
"eval_loss": 0.6538371443748474, |
|
"eval_runtime": 229.6742, |
|
"eval_samples_per_second": 23.777, |
|
"eval_steps_per_second": 2.974, |
|
"eval_wer": 1.0, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 16.635043562439495, |
|
"eval_cer": 0.3186119427281285, |
|
"eval_loss": 0.652263343334198, |
|
"eval_runtime": 229.4944, |
|
"eval_samples_per_second": 23.796, |
|
"eval_steps_per_second": 2.976, |
|
"eval_wer": 1.0, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 16.82865440464666, |
|
"eval_cer": 0.3183005286960953, |
|
"eval_loss": 0.6448878049850464, |
|
"eval_runtime": 253.2088, |
|
"eval_samples_per_second": 21.567, |
|
"eval_steps_per_second": 2.697, |
|
"eval_wer": 1.0, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 17.021297192642788, |
|
"eval_cer": 0.3179466491142394, |
|
"eval_loss": 0.6400793194770813, |
|
"eval_runtime": 227.3314, |
|
"eval_samples_per_second": 24.022, |
|
"eval_steps_per_second": 3.004, |
|
"eval_wer": 1.0, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 17.21490803484995, |
|
"eval_cer": 0.3199849955057293, |
|
"eval_loss": 0.6333277821540833, |
|
"eval_runtime": 227.9774, |
|
"eval_samples_per_second": 23.954, |
|
"eval_steps_per_second": 2.996, |
|
"eval_wer": 1.0, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 17.408518877057116, |
|
"grad_norm": 3.5857622623443604, |
|
"learning_rate": 2.1588e-05, |
|
"loss": 0.5492, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 17.408518877057116, |
|
"eval_cer": 0.320147780113383, |
|
"eval_loss": 0.6332593560218811, |
|
"eval_runtime": 229.5302, |
|
"eval_samples_per_second": 23.792, |
|
"eval_steps_per_second": 2.976, |
|
"eval_wer": 1.0, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 17.60212971926428, |
|
"eval_cer": 0.3179324939309652, |
|
"eval_loss": 0.6219143867492676, |
|
"eval_runtime": 227.7602, |
|
"eval_samples_per_second": 23.977, |
|
"eval_steps_per_second": 2.999, |
|
"eval_wer": 1.0, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 17.795740561471444, |
|
"eval_cer": 0.32011239215519743, |
|
"eval_loss": 0.6189157366752625, |
|
"eval_runtime": 234.0727, |
|
"eval_samples_per_second": 23.33, |
|
"eval_steps_per_second": 2.918, |
|
"eval_wer": 1.0, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 17.989351403678604, |
|
"eval_cer": 0.3165523635617272, |
|
"eval_loss": 0.6023225784301758, |
|
"eval_runtime": 252.0388, |
|
"eval_samples_per_second": 21.667, |
|
"eval_steps_per_second": 2.71, |
|
"eval_wer": 0.999816883354697, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 18.181994191674733, |
|
"eval_cer": 0.31535625057505434, |
|
"eval_loss": 0.6083632707595825, |
|
"eval_runtime": 237.956, |
|
"eval_samples_per_second": 22.95, |
|
"eval_steps_per_second": 2.87, |
|
"eval_wer": 1.0, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 18.375605033881897, |
|
"grad_norm": 4.794497489929199, |
|
"learning_rate": 2.2788000000000003e-05, |
|
"loss": 0.5057, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 18.375605033881897, |
|
"eval_cer": 0.31467680177789104, |
|
"eval_loss": 0.6001758575439453, |
|
"eval_runtime": 227.7296, |
|
"eval_samples_per_second": 23.98, |
|
"eval_steps_per_second": 2.999, |
|
"eval_wer": 0.999816883354697, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 18.56921587608906, |
|
"eval_cer": 0.31280123999405485, |
|
"eval_loss": 0.5875076055526733, |
|
"eval_runtime": 226.4211, |
|
"eval_samples_per_second": 24.119, |
|
"eval_steps_per_second": 3.017, |
|
"eval_wer": 1.0, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 18.762826718296225, |
|
"eval_cer": 0.31375671486506573, |
|
"eval_loss": 0.5903081297874451, |
|
"eval_runtime": 227.027, |
|
"eval_samples_per_second": 24.054, |
|
"eval_steps_per_second": 3.008, |
|
"eval_wer": 0.999816883354697, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 18.95643756050339, |
|
"eval_cer": 0.3126738433445867, |
|
"eval_loss": 0.5929713249206543, |
|
"eval_runtime": 243.7928, |
|
"eval_samples_per_second": 22.4, |
|
"eval_steps_per_second": 2.802, |
|
"eval_wer": 1.0, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 19.149080348499517, |
|
"eval_cer": 0.31413890481347007, |
|
"eval_loss": 0.5854855179786682, |
|
"eval_runtime": 230.7398, |
|
"eval_samples_per_second": 23.667, |
|
"eval_steps_per_second": 2.96, |
|
"eval_wer": 1.0, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 19.34269119070668, |
|
"grad_norm": 4.687788009643555, |
|
"learning_rate": 2.39856e-05, |
|
"loss": 0.4709, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 19.34269119070668, |
|
"eval_cer": 0.3119943945474234, |
|
"eval_loss": 0.5880363583564758, |
|
"eval_runtime": 227.5508, |
|
"eval_samples_per_second": 23.999, |
|
"eval_steps_per_second": 3.002, |
|
"eval_wer": 1.0, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 19.53630203291384, |
|
"eval_cer": 0.3131197316177251, |
|
"eval_loss": 0.5854519605636597, |
|
"eval_runtime": 235.4586, |
|
"eval_samples_per_second": 23.193, |
|
"eval_steps_per_second": 2.901, |
|
"eval_wer": 1.0, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 19.729912875121006, |
|
"eval_cer": 0.310649652136371, |
|
"eval_loss": 0.5734274387359619, |
|
"eval_runtime": 227.5849, |
|
"eval_samples_per_second": 23.995, |
|
"eval_steps_per_second": 3.001, |
|
"eval_wer": 1.0, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 19.92352371732817, |
|
"eval_cer": 0.31090444543530726, |
|
"eval_loss": 0.5776930451393127, |
|
"eval_runtime": 227.4224, |
|
"eval_samples_per_second": 24.013, |
|
"eval_steps_per_second": 3.003, |
|
"eval_wer": 1.0, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 19.962245885769605, |
|
"step": 10320, |
|
"total_flos": 1.4589649587726338e+19, |
|
"train_loss": 5.084269817855007, |
|
"train_runtime": 64283.8792, |
|
"train_samples_per_second": 5.142, |
|
"train_steps_per_second": 0.161 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 10320, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.4589649587726338e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|