|
{ |
|
"best_metric": 0.46736299991607666, |
|
"best_model_checkpoint": "/scratch/skscla001/speech/results/whisper-medium-swagen-combined-10hrs-model/checkpoint-1600", |
|
"epoch": 2.6247391952309984, |
|
"eval_steps": 200, |
|
"global_step": 2200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.029806259314456036, |
|
"grad_norm": 139.82965087890625, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 10.6377, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05961251862891207, |
|
"grad_norm": 98.2421646118164, |
|
"learning_rate": 8.8e-07, |
|
"loss": 8.4119, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08941877794336811, |
|
"grad_norm": 64.38908386230469, |
|
"learning_rate": 1.3800000000000001e-06, |
|
"loss": 5.9129, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.11922503725782414, |
|
"grad_norm": 71.607421875, |
|
"learning_rate": 1.8800000000000002e-06, |
|
"loss": 4.4812, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.14903129657228018, |
|
"grad_norm": 61.29523468017578, |
|
"learning_rate": 2.38e-06, |
|
"loss": 3.4448, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.17883755588673622, |
|
"grad_norm": 65.6143798828125, |
|
"learning_rate": 2.88e-06, |
|
"loss": 3.1512, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.20864381520119224, |
|
"grad_norm": 64.80217742919922, |
|
"learning_rate": 3.3800000000000007e-06, |
|
"loss": 3.125, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.23845007451564829, |
|
"grad_norm": 57.09419631958008, |
|
"learning_rate": 3.88e-06, |
|
"loss": 2.6418, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.23845007451564829, |
|
"eval_loss": 0.8311507105827332, |
|
"eval_runtime": 571.9983, |
|
"eval_samples_per_second": 1.984, |
|
"eval_steps_per_second": 0.993, |
|
"eval_wer": 0.48349360563101024, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.26825633383010433, |
|
"grad_norm": 53.5373649597168, |
|
"learning_rate": 4.38e-06, |
|
"loss": 2.559, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.29806259314456035, |
|
"grad_norm": 71.17430114746094, |
|
"learning_rate": 4.880000000000001e-06, |
|
"loss": 2.5908, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.32786885245901637, |
|
"grad_norm": 46.32373809814453, |
|
"learning_rate": 5.380000000000001e-06, |
|
"loss": 2.1127, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.35767511177347244, |
|
"grad_norm": 58.01958084106445, |
|
"learning_rate": 5.8800000000000005e-06, |
|
"loss": 2.2759, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.38748137108792846, |
|
"grad_norm": 68.27952575683594, |
|
"learning_rate": 6.380000000000001e-06, |
|
"loss": 2.2239, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.4172876304023845, |
|
"grad_norm": 57.420711517333984, |
|
"learning_rate": 6.88e-06, |
|
"loss": 1.9789, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.44709388971684055, |
|
"grad_norm": 56.379539489746094, |
|
"learning_rate": 7.3800000000000005e-06, |
|
"loss": 2.0375, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.47690014903129657, |
|
"grad_norm": 65.61225128173828, |
|
"learning_rate": 7.88e-06, |
|
"loss": 1.9381, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.47690014903129657, |
|
"eval_loss": 0.6573777794837952, |
|
"eval_runtime": 571.3807, |
|
"eval_samples_per_second": 1.986, |
|
"eval_steps_per_second": 0.994, |
|
"eval_wer": 0.40497670268662633, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5067064083457526, |
|
"grad_norm": 49.37194061279297, |
|
"learning_rate": 8.380000000000001e-06, |
|
"loss": 2.0002, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.5365126676602087, |
|
"grad_norm": 55.09164810180664, |
|
"learning_rate": 8.880000000000001e-06, |
|
"loss": 1.9199, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.5663189269746647, |
|
"grad_norm": 46.766258239746094, |
|
"learning_rate": 9.38e-06, |
|
"loss": 1.9346, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.5961251862891207, |
|
"grad_norm": 66.73670196533203, |
|
"learning_rate": 9.88e-06, |
|
"loss": 1.8662, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.6259314456035767, |
|
"grad_norm": 64.63770294189453, |
|
"learning_rate": 9.992288961038962e-06, |
|
"loss": 1.8393, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.6557377049180327, |
|
"grad_norm": 59.25200271606445, |
|
"learning_rate": 9.982142857142858e-06, |
|
"loss": 1.6775, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.6855439642324889, |
|
"grad_norm": 43.544105529785156, |
|
"learning_rate": 9.971996753246755e-06, |
|
"loss": 1.6314, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.7153502235469449, |
|
"grad_norm": 37.862571716308594, |
|
"learning_rate": 9.96185064935065e-06, |
|
"loss": 1.7528, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.7153502235469449, |
|
"eval_loss": 0.5705794095993042, |
|
"eval_runtime": 573.4955, |
|
"eval_samples_per_second": 1.979, |
|
"eval_steps_per_second": 0.99, |
|
"eval_wer": 0.35451571329433923, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.7451564828614009, |
|
"grad_norm": 80.73561096191406, |
|
"learning_rate": 9.951704545454546e-06, |
|
"loss": 1.7955, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.7749627421758569, |
|
"grad_norm": 59.763912200927734, |
|
"learning_rate": 9.941558441558441e-06, |
|
"loss": 1.5321, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.8047690014903129, |
|
"grad_norm": 40.65838623046875, |
|
"learning_rate": 9.931412337662338e-06, |
|
"loss": 1.745, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.834575260804769, |
|
"grad_norm": 38.0390625, |
|
"learning_rate": 9.921266233766234e-06, |
|
"loss": 1.5962, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.8643815201192251, |
|
"grad_norm": 65.32721710205078, |
|
"learning_rate": 9.911120129870131e-06, |
|
"loss": 1.8341, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.8941877794336811, |
|
"grad_norm": 59.77840805053711, |
|
"learning_rate": 9.900974025974028e-06, |
|
"loss": 1.7181, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.9239940387481371, |
|
"grad_norm": 53.4656867980957, |
|
"learning_rate": 9.890827922077922e-06, |
|
"loss": 1.568, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.9538002980625931, |
|
"grad_norm": 69.80867767333984, |
|
"learning_rate": 9.880681818181819e-06, |
|
"loss": 1.7421, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.9538002980625931, |
|
"eval_loss": 0.5140205025672913, |
|
"eval_runtime": 584.6629, |
|
"eval_samples_per_second": 1.941, |
|
"eval_steps_per_second": 0.971, |
|
"eval_wer": 0.3503519381381977, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.9836065573770492, |
|
"grad_norm": 58.31004333496094, |
|
"learning_rate": 9.870535714285716e-06, |
|
"loss": 1.4786, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.0143070044709388, |
|
"grad_norm": 26.68948745727539, |
|
"learning_rate": 9.86038961038961e-06, |
|
"loss": 1.3642, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.044113263785395, |
|
"grad_norm": 25.357746124267578, |
|
"learning_rate": 9.850243506493507e-06, |
|
"loss": 0.9665, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.073919523099851, |
|
"grad_norm": 25.640474319458008, |
|
"learning_rate": 9.840097402597404e-06, |
|
"loss": 0.9145, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.103725782414307, |
|
"grad_norm": 39.184608459472656, |
|
"learning_rate": 9.829951298701298e-06, |
|
"loss": 0.8596, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.133532041728763, |
|
"grad_norm": 25.94022560119629, |
|
"learning_rate": 9.819805194805195e-06, |
|
"loss": 0.8788, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.163338301043219, |
|
"grad_norm": 21.88936996459961, |
|
"learning_rate": 9.809659090909092e-06, |
|
"loss": 0.8949, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.1931445603576751, |
|
"grad_norm": 34.77507400512695, |
|
"learning_rate": 9.799512987012988e-06, |
|
"loss": 0.9259, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.1931445603576751, |
|
"eval_loss": 0.5174579620361328, |
|
"eval_runtime": 573.7506, |
|
"eval_samples_per_second": 1.978, |
|
"eval_steps_per_second": 0.99, |
|
"eval_wer": 0.32616238723108953, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.222950819672131, |
|
"grad_norm": 32.29697799682617, |
|
"learning_rate": 9.789366883116885e-06, |
|
"loss": 0.776, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.2527570789865872, |
|
"grad_norm": 22.945188522338867, |
|
"learning_rate": 9.779220779220781e-06, |
|
"loss": 0.8614, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.2825633383010433, |
|
"grad_norm": 29.45697593688965, |
|
"learning_rate": 9.769074675324676e-06, |
|
"loss": 0.9076, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.3123695976154992, |
|
"grad_norm": 21.750009536743164, |
|
"learning_rate": 9.758928571428573e-06, |
|
"loss": 0.8417, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.3421758569299553, |
|
"grad_norm": 23.066518783569336, |
|
"learning_rate": 9.748782467532468e-06, |
|
"loss": 0.8405, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 1.3719821162444115, |
|
"grad_norm": 47.0228157043457, |
|
"learning_rate": 9.738636363636364e-06, |
|
"loss": 0.9124, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.4017883755588674, |
|
"grad_norm": 41.16919708251953, |
|
"learning_rate": 9.72849025974026e-06, |
|
"loss": 0.8915, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 1.4315946348733233, |
|
"grad_norm": 24.697216033935547, |
|
"learning_rate": 9.718344155844157e-06, |
|
"loss": 0.8254, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.4315946348733233, |
|
"eval_loss": 0.4977598488330841, |
|
"eval_runtime": 571.6911, |
|
"eval_samples_per_second": 1.985, |
|
"eval_steps_per_second": 0.994, |
|
"eval_wer": 0.3096064241102409, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.4614008941877794, |
|
"grad_norm": 27.103702545166016, |
|
"learning_rate": 9.708198051948052e-06, |
|
"loss": 0.8325, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 1.4912071535022355, |
|
"grad_norm": 37.062477111816406, |
|
"learning_rate": 9.698051948051949e-06, |
|
"loss": 0.9808, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.5210134128166914, |
|
"grad_norm": 29.10225486755371, |
|
"learning_rate": 9.687905844155845e-06, |
|
"loss": 0.8731, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 1.5508196721311476, |
|
"grad_norm": 33.52768325805664, |
|
"learning_rate": 9.677759740259742e-06, |
|
"loss": 0.8828, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.5806259314456037, |
|
"grad_norm": 27.66816520690918, |
|
"learning_rate": 9.667613636363637e-06, |
|
"loss": 0.9907, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 1.6104321907600596, |
|
"grad_norm": 41.44624710083008, |
|
"learning_rate": 9.657467532467533e-06, |
|
"loss": 0.8091, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.6402384500745155, |
|
"grad_norm": 39.36080551147461, |
|
"learning_rate": 9.64732142857143e-06, |
|
"loss": 0.9308, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 1.6700447093889716, |
|
"grad_norm": 47.41120147705078, |
|
"learning_rate": 9.637175324675325e-06, |
|
"loss": 0.9616, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.6700447093889716, |
|
"eval_loss": 0.4754635989665985, |
|
"eval_runtime": 570.3412, |
|
"eval_samples_per_second": 1.99, |
|
"eval_steps_per_second": 0.996, |
|
"eval_wer": 0.2998909487459106, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.6998509687034278, |
|
"grad_norm": 43.4237174987793, |
|
"learning_rate": 9.627029220779221e-06, |
|
"loss": 0.7285, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 1.7296572280178837, |
|
"grad_norm": 30.96293830871582, |
|
"learning_rate": 9.616883116883118e-06, |
|
"loss": 0.8987, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.7594634873323398, |
|
"grad_norm": 35.58831787109375, |
|
"learning_rate": 9.606737012987014e-06, |
|
"loss": 0.9382, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 1.789269746646796, |
|
"grad_norm": 25.283161163330078, |
|
"learning_rate": 9.596590909090911e-06, |
|
"loss": 0.7889, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.8190760059612519, |
|
"grad_norm": 25.010147094726562, |
|
"learning_rate": 9.586444805194806e-06, |
|
"loss": 0.886, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 1.8488822652757078, |
|
"grad_norm": 37.391361236572266, |
|
"learning_rate": 9.576298701298702e-06, |
|
"loss": 0.7991, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.8786885245901639, |
|
"grad_norm": 41.15829086303711, |
|
"learning_rate": 9.566152597402599e-06, |
|
"loss": 0.7902, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 1.90849478390462, |
|
"grad_norm": 33.89030838012695, |
|
"learning_rate": 9.556006493506494e-06, |
|
"loss": 0.7893, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.90849478390462, |
|
"eval_loss": 0.46736299991607666, |
|
"eval_runtime": 576.0496, |
|
"eval_samples_per_second": 1.97, |
|
"eval_steps_per_second": 0.986, |
|
"eval_wer": 0.3075245365321701, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.938301043219076, |
|
"grad_norm": 35.02192687988281, |
|
"learning_rate": 9.54586038961039e-06, |
|
"loss": 0.94, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 1.968107302533532, |
|
"grad_norm": 21.93513298034668, |
|
"learning_rate": 9.535714285714287e-06, |
|
"loss": 0.8729, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.9979135618479882, |
|
"grad_norm": 30.547515869140625, |
|
"learning_rate": 9.525568181818182e-06, |
|
"loss": 0.7377, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 2.0286140089418776, |
|
"grad_norm": 18.494037628173828, |
|
"learning_rate": 9.515422077922078e-06, |
|
"loss": 0.339, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.058420268256334, |
|
"grad_norm": 27.289072036743164, |
|
"learning_rate": 9.505275974025975e-06, |
|
"loss": 0.393, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 2.08822652757079, |
|
"grad_norm": 20.814571380615234, |
|
"learning_rate": 9.495129870129871e-06, |
|
"loss": 0.3645, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.1180327868852458, |
|
"grad_norm": 14.510347366333008, |
|
"learning_rate": 9.484983766233768e-06, |
|
"loss": 0.3043, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 2.147839046199702, |
|
"grad_norm": 30.937259674072266, |
|
"learning_rate": 9.474837662337663e-06, |
|
"loss": 0.3547, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.147839046199702, |
|
"eval_loss": 0.48483970761299133, |
|
"eval_runtime": 577.3872, |
|
"eval_samples_per_second": 1.966, |
|
"eval_steps_per_second": 0.984, |
|
"eval_wer": 0.3157529493407356, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.177645305514158, |
|
"grad_norm": 32.33222579956055, |
|
"learning_rate": 9.46469155844156e-06, |
|
"loss": 0.3615, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 2.207451564828614, |
|
"grad_norm": 19.55378532409668, |
|
"learning_rate": 9.454545454545456e-06, |
|
"loss": 0.4072, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.23725782414307, |
|
"grad_norm": 31.809528350830078, |
|
"learning_rate": 9.44439935064935e-06, |
|
"loss": 0.3636, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 2.267064083457526, |
|
"grad_norm": 16.221904754638672, |
|
"learning_rate": 9.434253246753247e-06, |
|
"loss": 0.3927, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.296870342771982, |
|
"grad_norm": 23.400074005126953, |
|
"learning_rate": 9.424107142857144e-06, |
|
"loss": 0.267, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 2.326676602086438, |
|
"grad_norm": 19.966495513916016, |
|
"learning_rate": 9.413961038961039e-06, |
|
"loss": 0.3991, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.3564828614008944, |
|
"grad_norm": 23.508337020874023, |
|
"learning_rate": 9.403814935064935e-06, |
|
"loss": 0.3535, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 2.3862891207153503, |
|
"grad_norm": 39.417266845703125, |
|
"learning_rate": 9.393668831168832e-06, |
|
"loss": 0.3568, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.3862891207153503, |
|
"eval_loss": 0.4912998676300049, |
|
"eval_runtime": 564.2324, |
|
"eval_samples_per_second": 2.012, |
|
"eval_steps_per_second": 1.007, |
|
"eval_wer": 0.26162387231089523, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.416095380029806, |
|
"grad_norm": 25.93727684020996, |
|
"learning_rate": 9.383522727272729e-06, |
|
"loss": 0.3906, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 2.445901639344262, |
|
"grad_norm": 22.80946159362793, |
|
"learning_rate": 9.373376623376625e-06, |
|
"loss": 0.3112, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.4757078986587184, |
|
"grad_norm": 9.392194747924805, |
|
"learning_rate": 9.36323051948052e-06, |
|
"loss": 0.3682, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 2.5055141579731743, |
|
"grad_norm": 21.888240814208984, |
|
"learning_rate": 9.353084415584416e-06, |
|
"loss": 0.3414, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.5353204172876302, |
|
"grad_norm": 17.335634231567383, |
|
"learning_rate": 9.342938311688311e-06, |
|
"loss": 0.3824, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 2.5651266766020866, |
|
"grad_norm": 37.35403060913086, |
|
"learning_rate": 9.332792207792208e-06, |
|
"loss": 0.3799, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.5949329359165425, |
|
"grad_norm": 15.803303718566895, |
|
"learning_rate": 9.322646103896104e-06, |
|
"loss": 0.39, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 2.6247391952309984, |
|
"grad_norm": 21.457035064697266, |
|
"learning_rate": 9.312500000000001e-06, |
|
"loss": 0.3759, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.6247391952309984, |
|
"eval_loss": 0.4685392677783966, |
|
"eval_runtime": 585.3696, |
|
"eval_samples_per_second": 1.939, |
|
"eval_steps_per_second": 0.97, |
|
"eval_wer": 0.3103995241399822, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.6247391952309984, |
|
"step": 2200, |
|
"total_flos": 1.797081121161216e+19, |
|
"train_loss": 1.4413999737392773, |
|
"train_runtime": 10621.5138, |
|
"train_samples_per_second": 18.952, |
|
"train_steps_per_second": 2.367 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 25140, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 3 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.797081121161216e+19, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|