|
{ |
|
"best_global_step": 13000, |
|
"best_metric": 0.44776962168266515, |
|
"best_model_checkpoint": "/home/cluster-dgxa100/slp01/bagas-fine-tune-whisper/whisper-tiny-javanese-openslr-v7/checkpoint-13000", |
|
"epoch": 400.0, |
|
"eval_steps": 1000, |
|
"global_step": 50000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 28.316560745239258, |
|
"learning_rate": 7.88e-07, |
|
"loss": 3.6257, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 19.273317337036133, |
|
"learning_rate": 1.588e-06, |
|
"loss": 2.0745, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"grad_norm": 20.253143310546875, |
|
"learning_rate": 2.3880000000000003e-06, |
|
"loss": 1.5564, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"grad_norm": 18.321447372436523, |
|
"learning_rate": 3.188e-06, |
|
"loss": 1.2332, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 15.124577522277832, |
|
"learning_rate": 3.988000000000001e-06, |
|
"loss": 0.9973, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 1.0427539348602295, |
|
"eval_runtime": 42.8057, |
|
"eval_samples_per_second": 5.84, |
|
"eval_steps_per_second": 1.472, |
|
"eval_wer": 0.6821005081874647, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"grad_norm": 17.642391204833984, |
|
"learning_rate": 4.7880000000000006e-06, |
|
"loss": 0.7932, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"grad_norm": 12.182209014892578, |
|
"learning_rate": 5.588e-06, |
|
"loss": 0.6471, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"grad_norm": 14.519808769226074, |
|
"learning_rate": 6.3880000000000005e-06, |
|
"loss": 0.5151, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"grad_norm": 13.092401504516602, |
|
"learning_rate": 7.1880000000000005e-06, |
|
"loss": 0.3948, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 11.094600677490234, |
|
"learning_rate": 7.988e-06, |
|
"loss": 0.3139, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.8733022212982178, |
|
"eval_runtime": 46.221, |
|
"eval_samples_per_second": 5.409, |
|
"eval_steps_per_second": 1.363, |
|
"eval_wer": 0.6233766233766234, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"grad_norm": 13.814520835876465, |
|
"learning_rate": 8.788000000000001e-06, |
|
"loss": 0.2389, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"grad_norm": 10.147465705871582, |
|
"learning_rate": 9.588e-06, |
|
"loss": 0.1868, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"grad_norm": 6.512983798980713, |
|
"learning_rate": 1.0388e-05, |
|
"loss": 0.1454, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 22.4, |
|
"grad_norm": 7.759674549102783, |
|
"learning_rate": 1.1188e-05, |
|
"loss": 0.1186, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"grad_norm": 8.339158058166504, |
|
"learning_rate": 1.1988000000000001e-05, |
|
"loss": 0.0988, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 0.8925266861915588, |
|
"eval_runtime": 44.0477, |
|
"eval_samples_per_second": 5.676, |
|
"eval_steps_per_second": 1.43, |
|
"eval_wer": 0.6346696781479391, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 25.6, |
|
"grad_norm": 4.570150375366211, |
|
"learning_rate": 1.2788e-05, |
|
"loss": 0.0821, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 27.2, |
|
"grad_norm": 6.510232925415039, |
|
"learning_rate": 1.3588000000000001e-05, |
|
"loss": 0.0745, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 28.8, |
|
"grad_norm": 6.720197677612305, |
|
"learning_rate": 1.4388000000000002e-05, |
|
"loss": 0.0664, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 30.4, |
|
"grad_norm": 6.113977432250977, |
|
"learning_rate": 1.5188000000000001e-05, |
|
"loss": 0.0579, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"grad_norm": 4.528390884399414, |
|
"learning_rate": 1.5988e-05, |
|
"loss": 0.0499, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_loss": 0.9389314651489258, |
|
"eval_runtime": 43.1155, |
|
"eval_samples_per_second": 5.798, |
|
"eval_steps_per_second": 1.461, |
|
"eval_wer": 0.6149068322981367, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 33.6, |
|
"grad_norm": 8.023102760314941, |
|
"learning_rate": 1.6784e-05, |
|
"loss": 0.0488, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 35.2, |
|
"grad_norm": 5.378859043121338, |
|
"learning_rate": 1.7584e-05, |
|
"loss": 0.0443, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 36.8, |
|
"grad_norm": 6.565428256988525, |
|
"learning_rate": 1.8380000000000004e-05, |
|
"loss": 0.0415, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 38.4, |
|
"grad_norm": 3.81632137298584, |
|
"learning_rate": 1.918e-05, |
|
"loss": 0.0387, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 1.75031316280365, |
|
"learning_rate": 1.9980000000000002e-05, |
|
"loss": 0.0349, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 0.9878742098808289, |
|
"eval_runtime": 41.8583, |
|
"eval_samples_per_second": 5.973, |
|
"eval_steps_per_second": 1.505, |
|
"eval_wer": 0.6205533596837944, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 41.6, |
|
"grad_norm": 1.5769927501678467, |
|
"learning_rate": 1.9913333333333335e-05, |
|
"loss": 0.0335, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 43.2, |
|
"grad_norm": 4.837329387664795, |
|
"learning_rate": 1.9824444444444445e-05, |
|
"loss": 0.0303, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 44.8, |
|
"grad_norm": 4.526083469390869, |
|
"learning_rate": 1.9735555555555556e-05, |
|
"loss": 0.0282, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 46.4, |
|
"grad_norm": 4.3648176193237305, |
|
"learning_rate": 1.9646666666666666e-05, |
|
"loss": 0.0285, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"grad_norm": 4.617649078369141, |
|
"learning_rate": 1.955777777777778e-05, |
|
"loss": 0.0246, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_loss": 1.02446711063385, |
|
"eval_runtime": 41.3069, |
|
"eval_samples_per_second": 6.052, |
|
"eval_steps_per_second": 1.525, |
|
"eval_wer": 0.46809712027103334, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 49.6, |
|
"grad_norm": 1.5408967733383179, |
|
"learning_rate": 1.946888888888889e-05, |
|
"loss": 0.0235, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 51.2, |
|
"grad_norm": 4.78511381149292, |
|
"learning_rate": 1.938e-05, |
|
"loss": 0.0212, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 52.8, |
|
"grad_norm": 0.8843356370925903, |
|
"learning_rate": 1.9291111111111115e-05, |
|
"loss": 0.0193, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 54.4, |
|
"grad_norm": 2.4114930629730225, |
|
"learning_rate": 1.9202222222222225e-05, |
|
"loss": 0.0193, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"grad_norm": 3.1046841144561768, |
|
"learning_rate": 1.9113333333333336e-05, |
|
"loss": 0.0164, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_loss": 1.103171706199646, |
|
"eval_runtime": 41.2052, |
|
"eval_samples_per_second": 6.067, |
|
"eval_steps_per_second": 1.529, |
|
"eval_wer": 0.47148503670242803, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 57.6, |
|
"grad_norm": 2.163294792175293, |
|
"learning_rate": 1.902488888888889e-05, |
|
"loss": 0.0157, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 59.2, |
|
"grad_norm": 4.016397953033447, |
|
"learning_rate": 1.8936e-05, |
|
"loss": 0.015, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 60.8, |
|
"grad_norm": 3.4655325412750244, |
|
"learning_rate": 1.884711111111111e-05, |
|
"loss": 0.0124, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 62.4, |
|
"grad_norm": 2.6923351287841797, |
|
"learning_rate": 1.8758222222222224e-05, |
|
"loss": 0.0121, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"grad_norm": 3.1834566593170166, |
|
"learning_rate": 1.8669333333333334e-05, |
|
"loss": 0.0118, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_loss": 1.1389034986495972, |
|
"eval_runtime": 42.003, |
|
"eval_samples_per_second": 5.952, |
|
"eval_steps_per_second": 1.5, |
|
"eval_wer": 0.598531902879729, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 65.6, |
|
"grad_norm": 1.3980759382247925, |
|
"learning_rate": 1.8580444444444445e-05, |
|
"loss": 0.0126, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 67.2, |
|
"grad_norm": 3.1268885135650635, |
|
"learning_rate": 1.849155555555556e-05, |
|
"loss": 0.0109, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 68.8, |
|
"grad_norm": 2.908175468444824, |
|
"learning_rate": 1.840266666666667e-05, |
|
"loss": 0.0126, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 70.4, |
|
"grad_norm": 3.613409996032715, |
|
"learning_rate": 1.831377777777778e-05, |
|
"loss": 0.0105, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"grad_norm": 2.127668619155884, |
|
"learning_rate": 1.822488888888889e-05, |
|
"loss": 0.0094, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_loss": 1.145694375038147, |
|
"eval_runtime": 41.9604, |
|
"eval_samples_per_second": 5.958, |
|
"eval_steps_per_second": 1.501, |
|
"eval_wer": 0.5793337097684924, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 73.6, |
|
"grad_norm": 3.8510990142822266, |
|
"learning_rate": 1.8136000000000004e-05, |
|
"loss": 0.0106, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 75.2, |
|
"grad_norm": 0.6357870697975159, |
|
"learning_rate": 1.8047111111111114e-05, |
|
"loss": 0.0077, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 76.8, |
|
"grad_norm": 0.7974900603294373, |
|
"learning_rate": 1.7958222222222225e-05, |
|
"loss": 0.0092, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 78.4, |
|
"grad_norm": 3.332366943359375, |
|
"learning_rate": 1.7869333333333335e-05, |
|
"loss": 0.0097, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"grad_norm": 3.0786008834838867, |
|
"learning_rate": 1.7780444444444446e-05, |
|
"loss": 0.0086, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_loss": 1.1853593587875366, |
|
"eval_runtime": 42.1314, |
|
"eval_samples_per_second": 5.934, |
|
"eval_steps_per_second": 1.495, |
|
"eval_wer": 0.5375494071146245, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 81.6, |
|
"grad_norm": 1.0725177526474, |
|
"learning_rate": 1.7691555555555556e-05, |
|
"loss": 0.0072, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 83.2, |
|
"grad_norm": 0.27933818101882935, |
|
"learning_rate": 1.7602666666666667e-05, |
|
"loss": 0.0074, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 84.8, |
|
"grad_norm": 1.2738243341445923, |
|
"learning_rate": 1.7513777777777777e-05, |
|
"loss": 0.0061, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 86.4, |
|
"grad_norm": 6.480189323425293, |
|
"learning_rate": 1.742488888888889e-05, |
|
"loss": 0.0065, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"grad_norm": 0.33281826972961426, |
|
"learning_rate": 1.7336e-05, |
|
"loss": 0.0067, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_loss": 1.237168788909912, |
|
"eval_runtime": 40.9166, |
|
"eval_samples_per_second": 6.11, |
|
"eval_steps_per_second": 1.54, |
|
"eval_wer": 0.466403162055336, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 89.6, |
|
"grad_norm": 0.395781934261322, |
|
"learning_rate": 1.7247111111111112e-05, |
|
"loss": 0.0055, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 91.2, |
|
"grad_norm": 2.044802188873291, |
|
"learning_rate": 1.7158222222222222e-05, |
|
"loss": 0.0071, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 92.8, |
|
"grad_norm": 0.7053709030151367, |
|
"learning_rate": 1.7069333333333336e-05, |
|
"loss": 0.0069, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 94.4, |
|
"grad_norm": 2.6809892654418945, |
|
"learning_rate": 1.6980444444444447e-05, |
|
"loss": 0.0059, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"grad_norm": 0.43073830008506775, |
|
"learning_rate": 1.6891555555555557e-05, |
|
"loss": 0.005, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_loss": 1.2564021348953247, |
|
"eval_runtime": 43.0999, |
|
"eval_samples_per_second": 5.8, |
|
"eval_steps_per_second": 1.462, |
|
"eval_wer": 0.4991530208921513, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 97.6, |
|
"grad_norm": 0.7439399361610413, |
|
"learning_rate": 1.6802666666666668e-05, |
|
"loss": 0.0052, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 99.2, |
|
"grad_norm": 0.18939250707626343, |
|
"learning_rate": 1.671377777777778e-05, |
|
"loss": 0.005, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 100.8, |
|
"grad_norm": 2.827566385269165, |
|
"learning_rate": 1.6624888888888892e-05, |
|
"loss": 0.0052, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 102.4, |
|
"grad_norm": 3.124176502227783, |
|
"learning_rate": 1.6536000000000002e-05, |
|
"loss": 0.0041, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"grad_norm": 1.91826593875885, |
|
"learning_rate": 1.6447111111111113e-05, |
|
"loss": 0.0043, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"eval_loss": 1.27247154712677, |
|
"eval_runtime": 41.9766, |
|
"eval_samples_per_second": 5.956, |
|
"eval_steps_per_second": 1.501, |
|
"eval_wer": 0.44776962168266515, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 105.6, |
|
"grad_norm": 0.8713739514350891, |
|
"learning_rate": 1.6358222222222223e-05, |
|
"loss": 0.0032, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 107.2, |
|
"grad_norm": 0.05066121742129326, |
|
"learning_rate": 1.6269333333333334e-05, |
|
"loss": 0.0042, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 108.8, |
|
"grad_norm": 1.1562632322311401, |
|
"learning_rate": 1.6180444444444444e-05, |
|
"loss": 0.0032, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 110.4, |
|
"grad_norm": 0.07406218349933624, |
|
"learning_rate": 1.6091555555555555e-05, |
|
"loss": 0.0037, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"grad_norm": 0.997751772403717, |
|
"learning_rate": 1.600266666666667e-05, |
|
"loss": 0.0039, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"eval_loss": 1.3331384658813477, |
|
"eval_runtime": 43.8205, |
|
"eval_samples_per_second": 5.705, |
|
"eval_steps_per_second": 1.438, |
|
"eval_wer": 0.5307735742518351, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 113.6, |
|
"grad_norm": 0.19617311656475067, |
|
"learning_rate": 1.591377777777778e-05, |
|
"loss": 0.0043, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 115.2, |
|
"grad_norm": 3.2143056392669678, |
|
"learning_rate": 1.5825333333333336e-05, |
|
"loss": 0.0051, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 116.8, |
|
"grad_norm": 0.6758298873901367, |
|
"learning_rate": 1.5736444444444446e-05, |
|
"loss": 0.0033, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 118.4, |
|
"grad_norm": 2.0771877765655518, |
|
"learning_rate": 1.5647555555555557e-05, |
|
"loss": 0.0037, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"grad_norm": 2.100074529647827, |
|
"learning_rate": 1.5558666666666667e-05, |
|
"loss": 0.0036, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"eval_loss": 1.3492019176483154, |
|
"eval_runtime": 43.5776, |
|
"eval_samples_per_second": 5.737, |
|
"eval_steps_per_second": 1.446, |
|
"eval_wer": 0.603613777526821, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 121.6, |
|
"grad_norm": 0.3508625030517578, |
|
"learning_rate": 1.5469777777777778e-05, |
|
"loss": 0.0026, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 123.2, |
|
"grad_norm": 0.0235364381223917, |
|
"learning_rate": 1.5380888888888888e-05, |
|
"loss": 0.0033, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 124.8, |
|
"grad_norm": 1.745226263999939, |
|
"learning_rate": 1.5292e-05, |
|
"loss": 0.0024, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 126.4, |
|
"grad_norm": 1.7244856357574463, |
|
"learning_rate": 1.5203111111111112e-05, |
|
"loss": 0.0027, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"grad_norm": 2.978816509246826, |
|
"learning_rate": 1.5114222222222223e-05, |
|
"loss": 0.0028, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"eval_loss": 1.3909789323806763, |
|
"eval_runtime": 41.3413, |
|
"eval_samples_per_second": 6.047, |
|
"eval_steps_per_second": 1.524, |
|
"eval_wer": 0.45511010728402035, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 129.6, |
|
"grad_norm": 0.11152245104312897, |
|
"learning_rate": 1.5025333333333333e-05, |
|
"loss": 0.0023, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 131.2, |
|
"grad_norm": 0.12898045778274536, |
|
"learning_rate": 1.4936444444444447e-05, |
|
"loss": 0.0029, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 132.8, |
|
"grad_norm": 0.8840957880020142, |
|
"learning_rate": 1.4847555555555558e-05, |
|
"loss": 0.0026, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 134.4, |
|
"grad_norm": 0.07675749063491821, |
|
"learning_rate": 1.4758666666666668e-05, |
|
"loss": 0.0027, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"grad_norm": 0.5362949371337891, |
|
"learning_rate": 1.4669777777777779e-05, |
|
"loss": 0.0034, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"eval_loss": 1.436603307723999, |
|
"eval_runtime": 41.9502, |
|
"eval_samples_per_second": 5.959, |
|
"eval_steps_per_second": 1.502, |
|
"eval_wer": 0.4906832298136646, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 137.6, |
|
"grad_norm": 1.4152299165725708, |
|
"learning_rate": 1.458088888888889e-05, |
|
"loss": 0.0024, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 139.2, |
|
"grad_norm": 0.1923665702342987, |
|
"learning_rate": 1.4492000000000001e-05, |
|
"loss": 0.0023, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 140.8, |
|
"grad_norm": 0.9098421931266785, |
|
"learning_rate": 1.4403111111111112e-05, |
|
"loss": 0.0018, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 142.4, |
|
"grad_norm": 0.06011312082409859, |
|
"learning_rate": 1.4314222222222222e-05, |
|
"loss": 0.0023, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"grad_norm": 0.4919435381889343, |
|
"learning_rate": 1.4225333333333336e-05, |
|
"loss": 0.0024, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"eval_loss": 1.3797271251678467, |
|
"eval_runtime": 46.4998, |
|
"eval_samples_per_second": 5.376, |
|
"eval_steps_per_second": 1.355, |
|
"eval_wer": 0.6126482213438735, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 145.6, |
|
"grad_norm": 1.19382905960083, |
|
"learning_rate": 1.4136444444444446e-05, |
|
"loss": 0.0026, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 147.2, |
|
"grad_norm": 0.1949184685945511, |
|
"learning_rate": 1.4047555555555557e-05, |
|
"loss": 0.0017, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 148.8, |
|
"grad_norm": 0.030353059992194176, |
|
"learning_rate": 1.3959111111111112e-05, |
|
"loss": 0.002, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 150.4, |
|
"grad_norm": 0.8148756623268127, |
|
"learning_rate": 1.3870222222222223e-05, |
|
"loss": 0.0016, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 152.0, |
|
"grad_norm": 0.1468518227338791, |
|
"learning_rate": 1.3781333333333335e-05, |
|
"loss": 0.0019, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 152.0, |
|
"eval_loss": 1.4368208646774292, |
|
"eval_runtime": 42.0557, |
|
"eval_samples_per_second": 5.945, |
|
"eval_steps_per_second": 1.498, |
|
"eval_wer": 0.5228684359119141, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 153.6, |
|
"grad_norm": 3.330853223800659, |
|
"learning_rate": 1.3692888888888892e-05, |
|
"loss": 0.0024, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 155.2, |
|
"grad_norm": 2.411620616912842, |
|
"learning_rate": 1.3604000000000002e-05, |
|
"loss": 0.0018, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 156.8, |
|
"grad_norm": 1.7824522256851196, |
|
"learning_rate": 1.3515111111111113e-05, |
|
"loss": 0.0019, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 158.4, |
|
"grad_norm": 0.05631573125720024, |
|
"learning_rate": 1.3426222222222223e-05, |
|
"loss": 0.0018, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"grad_norm": 0.20461511611938477, |
|
"learning_rate": 1.3337333333333335e-05, |
|
"loss": 0.0017, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"eval_loss": 1.4499236345291138, |
|
"eval_runtime": 43.1323, |
|
"eval_samples_per_second": 5.796, |
|
"eval_steps_per_second": 1.461, |
|
"eval_wer": 0.5979672501411631, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 161.6, |
|
"grad_norm": 0.01897699199616909, |
|
"learning_rate": 1.3248444444444446e-05, |
|
"loss": 0.0028, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 163.2, |
|
"grad_norm": 0.9474685192108154, |
|
"learning_rate": 1.3160000000000001e-05, |
|
"loss": 0.0027, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 164.8, |
|
"grad_norm": 0.10914404690265656, |
|
"learning_rate": 1.3071111111111112e-05, |
|
"loss": 0.0026, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 166.4, |
|
"grad_norm": 0.6951639652252197, |
|
"learning_rate": 1.2982222222222222e-05, |
|
"loss": 0.0015, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 168.0, |
|
"grad_norm": 0.591296911239624, |
|
"learning_rate": 1.2893333333333336e-05, |
|
"loss": 0.0014, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 168.0, |
|
"eval_loss": 1.4369308948516846, |
|
"eval_runtime": 43.7269, |
|
"eval_samples_per_second": 5.717, |
|
"eval_steps_per_second": 1.441, |
|
"eval_wer": 0.5940146809712027, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 169.6, |
|
"grad_norm": 0.07852908968925476, |
|
"learning_rate": 1.2804444444444446e-05, |
|
"loss": 0.0018, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 171.2, |
|
"grad_norm": 0.4466964304447174, |
|
"learning_rate": 1.2715555555555557e-05, |
|
"loss": 0.0016, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 172.8, |
|
"grad_norm": 0.6805703043937683, |
|
"learning_rate": 1.2626666666666667e-05, |
|
"loss": 0.001, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 174.4, |
|
"grad_norm": 0.14335931837558746, |
|
"learning_rate": 1.253777777777778e-05, |
|
"loss": 0.001, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 176.0, |
|
"grad_norm": 1.0165632963180542, |
|
"learning_rate": 1.244888888888889e-05, |
|
"loss": 0.0015, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 176.0, |
|
"eval_loss": 1.4614824056625366, |
|
"eval_runtime": 42.489, |
|
"eval_samples_per_second": 5.884, |
|
"eval_steps_per_second": 1.483, |
|
"eval_wer": 0.5307735742518351, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 177.6, |
|
"grad_norm": 0.4234725832939148, |
|
"learning_rate": 1.236e-05, |
|
"loss": 0.0019, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 179.2, |
|
"grad_norm": 4.320431232452393, |
|
"learning_rate": 1.227111111111111e-05, |
|
"loss": 0.0027, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 180.8, |
|
"grad_norm": 0.04678142070770264, |
|
"learning_rate": 1.2182222222222225e-05, |
|
"loss": 0.0018, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 182.4, |
|
"grad_norm": 0.19874203205108643, |
|
"learning_rate": 1.2093333333333335e-05, |
|
"loss": 0.0013, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 184.0, |
|
"grad_norm": 0.636381208896637, |
|
"learning_rate": 1.2004444444444445e-05, |
|
"loss": 0.001, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 184.0, |
|
"eval_loss": 1.445263147354126, |
|
"eval_runtime": 41.9644, |
|
"eval_samples_per_second": 5.957, |
|
"eval_steps_per_second": 1.501, |
|
"eval_wer": 0.49858836815358554, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 185.6, |
|
"grad_norm": 0.021843163296580315, |
|
"learning_rate": 1.1915555555555556e-05, |
|
"loss": 0.0017, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 187.2, |
|
"grad_norm": 0.0671222060918808, |
|
"learning_rate": 1.1826666666666668e-05, |
|
"loss": 0.0012, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 188.8, |
|
"grad_norm": 0.04560863971710205, |
|
"learning_rate": 1.1737777777777779e-05, |
|
"loss": 0.0019, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 190.4, |
|
"grad_norm": 0.17737437784671783, |
|
"learning_rate": 1.1648888888888889e-05, |
|
"loss": 0.0008, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 192.0, |
|
"grad_norm": 0.05606027692556381, |
|
"learning_rate": 1.156e-05, |
|
"loss": 0.0009, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 192.0, |
|
"eval_loss": 1.4906095266342163, |
|
"eval_runtime": 43.2268, |
|
"eval_samples_per_second": 5.783, |
|
"eval_steps_per_second": 1.457, |
|
"eval_wer": 0.5471485036702428, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 193.6, |
|
"grad_norm": 2.6839418411254883, |
|
"learning_rate": 1.1471111111111113e-05, |
|
"loss": 0.0016, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 195.2, |
|
"grad_norm": 0.05472835153341293, |
|
"learning_rate": 1.1382222222222224e-05, |
|
"loss": 0.0014, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 196.8, |
|
"grad_norm": 0.4221028685569763, |
|
"learning_rate": 1.1293333333333334e-05, |
|
"loss": 0.001, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 198.4, |
|
"grad_norm": 0.025390515103936195, |
|
"learning_rate": 1.120488888888889e-05, |
|
"loss": 0.0017, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"grad_norm": 0.02648915909230709, |
|
"learning_rate": 1.1116e-05, |
|
"loss": 0.0007, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"eval_loss": 1.4574248790740967, |
|
"eval_runtime": 42.0773, |
|
"eval_samples_per_second": 5.941, |
|
"eval_steps_per_second": 1.497, |
|
"eval_wer": 0.49407114624505927, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 201.6, |
|
"grad_norm": 0.05941193923354149, |
|
"learning_rate": 1.1027111111111112e-05, |
|
"loss": 0.0011, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 203.2, |
|
"grad_norm": 0.02862560749053955, |
|
"learning_rate": 1.0938222222222223e-05, |
|
"loss": 0.0012, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 204.8, |
|
"grad_norm": 0.024127991870045662, |
|
"learning_rate": 1.0849333333333335e-05, |
|
"loss": 0.0011, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 206.4, |
|
"grad_norm": 0.08150752633810043, |
|
"learning_rate": 1.0760444444444445e-05, |
|
"loss": 0.0008, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 208.0, |
|
"grad_norm": 0.05071726813912392, |
|
"learning_rate": 1.0671555555555557e-05, |
|
"loss": 0.0011, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 208.0, |
|
"eval_loss": 1.4995194673538208, |
|
"eval_runtime": 41.3368, |
|
"eval_samples_per_second": 6.048, |
|
"eval_steps_per_second": 1.524, |
|
"eval_wer": 0.4647092038396386, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 209.6, |
|
"grad_norm": 8.064842224121094, |
|
"learning_rate": 1.0582666666666668e-05, |
|
"loss": 0.001, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 211.2, |
|
"grad_norm": 0.02605404146015644, |
|
"learning_rate": 1.0493777777777778e-05, |
|
"loss": 0.001, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 212.8, |
|
"grad_norm": 0.06135524809360504, |
|
"learning_rate": 1.0404888888888889e-05, |
|
"loss": 0.0011, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 214.4, |
|
"grad_norm": 0.014893967658281326, |
|
"learning_rate": 1.0316e-05, |
|
"loss": 0.0008, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 216.0, |
|
"grad_norm": 0.009221619926393032, |
|
"learning_rate": 1.0227111111111111e-05, |
|
"loss": 0.0007, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 216.0, |
|
"eval_loss": 1.5195382833480835, |
|
"eval_runtime": 42.2229, |
|
"eval_samples_per_second": 5.921, |
|
"eval_steps_per_second": 1.492, |
|
"eval_wer": 0.515527950310559, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 217.6, |
|
"grad_norm": 0.3534085154533386, |
|
"learning_rate": 1.0138222222222223e-05, |
|
"loss": 0.0012, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 219.2, |
|
"grad_norm": 0.09608615189790726, |
|
"learning_rate": 1.0049333333333334e-05, |
|
"loss": 0.0007, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 220.8, |
|
"grad_norm": 0.03040822595357895, |
|
"learning_rate": 9.960444444444444e-06, |
|
"loss": 0.0008, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 222.4, |
|
"grad_norm": 0.030991466715931892, |
|
"learning_rate": 9.871555555555557e-06, |
|
"loss": 0.0008, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 224.0, |
|
"grad_norm": 0.270130455493927, |
|
"learning_rate": 9.783111111111112e-06, |
|
"loss": 0.0011, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 224.0, |
|
"eval_loss": 1.4928404092788696, |
|
"eval_runtime": 42.9045, |
|
"eval_samples_per_second": 5.827, |
|
"eval_steps_per_second": 1.468, |
|
"eval_wer": 0.5482778091473743, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 225.6, |
|
"grad_norm": 0.059342194348573685, |
|
"learning_rate": 9.694222222222222e-06, |
|
"loss": 0.0008, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 227.2, |
|
"grad_norm": 0.09850233793258667, |
|
"learning_rate": 9.605333333333334e-06, |
|
"loss": 0.001, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 228.8, |
|
"grad_norm": 0.012932940386235714, |
|
"learning_rate": 9.516444444444445e-06, |
|
"loss": 0.0007, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 230.4, |
|
"grad_norm": 0.4113960862159729, |
|
"learning_rate": 9.427555555555557e-06, |
|
"loss": 0.0008, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 232.0, |
|
"grad_norm": 11.596172332763672, |
|
"learning_rate": 9.338666666666667e-06, |
|
"loss": 0.0011, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 232.0, |
|
"eval_loss": 1.5243357419967651, |
|
"eval_runtime": 42.099, |
|
"eval_samples_per_second": 5.938, |
|
"eval_steps_per_second": 1.496, |
|
"eval_wer": 0.5143986448334275, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 233.6, |
|
"grad_norm": 0.14402435719966888, |
|
"learning_rate": 9.249777777777778e-06, |
|
"loss": 0.0005, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 235.2, |
|
"grad_norm": 0.037319615483284, |
|
"learning_rate": 9.160888888888888e-06, |
|
"loss": 0.0006, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 236.8, |
|
"grad_norm": 0.05602966248989105, |
|
"learning_rate": 9.072e-06, |
|
"loss": 0.0009, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 238.4, |
|
"grad_norm": 0.018490877002477646, |
|
"learning_rate": 8.983111111111111e-06, |
|
"loss": 0.0007, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 240.0, |
|
"grad_norm": 3.7446510791778564, |
|
"learning_rate": 8.894222222222223e-06, |
|
"loss": 0.0007, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 240.0, |
|
"eval_loss": 1.5804709196090698, |
|
"eval_runtime": 41.044, |
|
"eval_samples_per_second": 6.091, |
|
"eval_steps_per_second": 1.535, |
|
"eval_wer": 0.48842461885940147, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 241.6, |
|
"grad_norm": 0.09505221247673035, |
|
"learning_rate": 8.805333333333334e-06, |
|
"loss": 0.0005, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 243.2, |
|
"grad_norm": 0.031215248629450798, |
|
"learning_rate": 8.716444444444446e-06, |
|
"loss": 0.0007, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 244.8, |
|
"grad_norm": 0.3624882996082306, |
|
"learning_rate": 8.627555555555556e-06, |
|
"loss": 0.0004, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 246.4, |
|
"grad_norm": 0.025587618350982666, |
|
"learning_rate": 8.538666666666667e-06, |
|
"loss": 0.0007, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 248.0, |
|
"grad_norm": 0.8972709774971008, |
|
"learning_rate": 8.450222222222224e-06, |
|
"loss": 0.0005, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 248.0, |
|
"eval_loss": 1.5293784141540527, |
|
"eval_runtime": 42.2688, |
|
"eval_samples_per_second": 5.915, |
|
"eval_steps_per_second": 1.49, |
|
"eval_wer": 0.5115753811405985, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 249.6, |
|
"grad_norm": 0.02251521684229374, |
|
"learning_rate": 8.361333333333334e-06, |
|
"loss": 0.0007, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 251.2, |
|
"grad_norm": 0.007881904020905495, |
|
"learning_rate": 8.272444444444445e-06, |
|
"loss": 0.0004, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 252.8, |
|
"grad_norm": 0.0084000863134861, |
|
"learning_rate": 8.183555555555555e-06, |
|
"loss": 0.0005, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 254.4, |
|
"grad_norm": 0.44422510266304016, |
|
"learning_rate": 8.094666666666667e-06, |
|
"loss": 0.0004, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 256.0, |
|
"grad_norm": 0.024682149291038513, |
|
"learning_rate": 8.00577777777778e-06, |
|
"loss": 0.0005, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 256.0, |
|
"eval_loss": 1.5940375328063965, |
|
"eval_runtime": 42.3505, |
|
"eval_samples_per_second": 5.903, |
|
"eval_steps_per_second": 1.488, |
|
"eval_wer": 0.49745906267645396, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 257.6, |
|
"grad_norm": 0.019801704213023186, |
|
"learning_rate": 7.91688888888889e-06, |
|
"loss": 0.0005, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 259.2, |
|
"grad_norm": 0.006728316657245159, |
|
"learning_rate": 7.828000000000002e-06, |
|
"loss": 0.0003, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 260.8, |
|
"grad_norm": 0.1475154608488083, |
|
"learning_rate": 7.739111111111112e-06, |
|
"loss": 0.0005, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 262.4, |
|
"grad_norm": 0.038090407848358154, |
|
"learning_rate": 7.650222222222223e-06, |
|
"loss": 0.0005, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 264.0, |
|
"grad_norm": 1.3189762830734253, |
|
"learning_rate": 7.561333333333334e-06, |
|
"loss": 0.0003, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 264.0, |
|
"eval_loss": 1.5760223865509033, |
|
"eval_runtime": 41.6476, |
|
"eval_samples_per_second": 6.003, |
|
"eval_steps_per_second": 1.513, |
|
"eval_wer": 0.5002823263692829, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 265.6, |
|
"grad_norm": 0.005491426680237055, |
|
"learning_rate": 7.4728888888888895e-06, |
|
"loss": 0.0007, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 267.2, |
|
"grad_norm": 0.010416628792881966, |
|
"learning_rate": 7.384e-06, |
|
"loss": 0.0008, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 268.8, |
|
"grad_norm": 0.004440742079168558, |
|
"learning_rate": 7.295111111111112e-06, |
|
"loss": 0.0006, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 270.4, |
|
"grad_norm": 0.010887986980378628, |
|
"learning_rate": 7.206222222222223e-06, |
|
"loss": 0.0007, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 272.0, |
|
"grad_norm": 0.02552446536719799, |
|
"learning_rate": 7.117333333333334e-06, |
|
"loss": 0.0004, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 272.0, |
|
"eval_loss": 1.5940096378326416, |
|
"eval_runtime": 41.5805, |
|
"eval_samples_per_second": 6.012, |
|
"eval_steps_per_second": 1.515, |
|
"eval_wer": 0.4872953133822699, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 273.6, |
|
"grad_norm": 0.0572277270257473, |
|
"learning_rate": 7.028444444444445e-06, |
|
"loss": 0.0006, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 275.2, |
|
"grad_norm": 0.10096795111894608, |
|
"learning_rate": 6.9395555555555565e-06, |
|
"loss": 0.0009, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 276.8, |
|
"grad_norm": 0.005929913371801376, |
|
"learning_rate": 6.850666666666668e-06, |
|
"loss": 0.0005, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 278.4, |
|
"grad_norm": 0.005341435316950083, |
|
"learning_rate": 6.761777777777778e-06, |
|
"loss": 0.0005, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 280.0, |
|
"grad_norm": 0.021666768938302994, |
|
"learning_rate": 6.6728888888888895e-06, |
|
"loss": 0.0003, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 280.0, |
|
"eval_loss": 1.6009721755981445, |
|
"eval_runtime": 41.0006, |
|
"eval_samples_per_second": 6.097, |
|
"eval_steps_per_second": 1.537, |
|
"eval_wer": 0.46809712027103334, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 281.6, |
|
"grad_norm": 0.019466301426291466, |
|
"learning_rate": 6.584444444444446e-06, |
|
"loss": 0.0005, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 283.2, |
|
"grad_norm": 0.007609171327203512, |
|
"learning_rate": 6.495555555555556e-06, |
|
"loss": 0.0006, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 284.8, |
|
"grad_norm": 0.006095775868743658, |
|
"learning_rate": 6.4066666666666674e-06, |
|
"loss": 0.0004, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 286.4, |
|
"grad_norm": 0.008476600050926208, |
|
"learning_rate": 6.317777777777778e-06, |
|
"loss": 0.0002, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 288.0, |
|
"grad_norm": 0.06994141638278961, |
|
"learning_rate": 6.22888888888889e-06, |
|
"loss": 0.0004, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 288.0, |
|
"eval_loss": 1.5837336778640747, |
|
"eval_runtime": 42.3396, |
|
"eval_samples_per_second": 5.905, |
|
"eval_steps_per_second": 1.488, |
|
"eval_wer": 0.484472049689441, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 289.6, |
|
"grad_norm": 0.9534348845481873, |
|
"learning_rate": 6.1400000000000005e-06, |
|
"loss": 0.0003, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 291.2, |
|
"grad_norm": 0.00798807479441166, |
|
"learning_rate": 6.051111111111112e-06, |
|
"loss": 0.0003, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 292.8, |
|
"grad_norm": 0.4499492347240448, |
|
"learning_rate": 5.962222222222222e-06, |
|
"loss": 0.0004, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 294.4, |
|
"grad_norm": 0.00747877499088645, |
|
"learning_rate": 5.873333333333334e-06, |
|
"loss": 0.0006, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 296.0, |
|
"grad_norm": 0.007594792637974024, |
|
"learning_rate": 5.784444444444445e-06, |
|
"loss": 0.0006, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 296.0, |
|
"eval_loss": 1.5838834047317505, |
|
"eval_runtime": 41.0456, |
|
"eval_samples_per_second": 6.091, |
|
"eval_steps_per_second": 1.535, |
|
"eval_wer": 0.47939017504234893, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 297.6, |
|
"grad_norm": 0.0062100524082779884, |
|
"learning_rate": 5.695555555555556e-06, |
|
"loss": 0.0003, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 299.2, |
|
"grad_norm": 1.5888539552688599, |
|
"learning_rate": 5.606666666666667e-06, |
|
"loss": 0.0006, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 300.8, |
|
"grad_norm": 0.004372656811028719, |
|
"learning_rate": 5.517777777777779e-06, |
|
"loss": 0.0003, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 302.4, |
|
"grad_norm": 0.004411238245666027, |
|
"learning_rate": 5.428888888888889e-06, |
|
"loss": 0.0004, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 304.0, |
|
"grad_norm": 0.007507936097681522, |
|
"learning_rate": 5.3400000000000005e-06, |
|
"loss": 0.0002, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 304.0, |
|
"eval_loss": 1.5652457475662231, |
|
"eval_runtime": 41.4549, |
|
"eval_samples_per_second": 6.031, |
|
"eval_steps_per_second": 1.52, |
|
"eval_wer": 0.4754376058723885, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 305.6, |
|
"grad_norm": 0.005323062185198069, |
|
"learning_rate": 5.251555555555557e-06, |
|
"loss": 0.0005, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 307.2, |
|
"grad_norm": 0.0021106544882059097, |
|
"learning_rate": 5.162666666666667e-06, |
|
"loss": 0.0005, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 308.8, |
|
"grad_norm": 0.0051486073061823845, |
|
"learning_rate": 5.0737777777777785e-06, |
|
"loss": 0.0007, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 310.4, |
|
"grad_norm": 0.06730670481920242, |
|
"learning_rate": 4.984888888888889e-06, |
|
"loss": 0.0004, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 312.0, |
|
"grad_norm": 0.005158218555152416, |
|
"learning_rate": 4.896e-06, |
|
"loss": 0.0003, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 312.0, |
|
"eval_loss": 1.6083264350891113, |
|
"eval_runtime": 41.1796, |
|
"eval_samples_per_second": 6.071, |
|
"eval_steps_per_second": 1.53, |
|
"eval_wer": 0.48334274421230944, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 313.6, |
|
"grad_norm": 0.007136846426874399, |
|
"learning_rate": 4.8071111111111115e-06, |
|
"loss": 0.0002, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 315.2, |
|
"grad_norm": 0.0384252667427063, |
|
"learning_rate": 4.718222222222222e-06, |
|
"loss": 0.0005, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 316.8, |
|
"grad_norm": 0.03144264966249466, |
|
"learning_rate": 4.629333333333333e-06, |
|
"loss": 0.0003, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 318.4, |
|
"grad_norm": 0.04305962473154068, |
|
"learning_rate": 4.540444444444445e-06, |
|
"loss": 0.0002, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 320.0, |
|
"grad_norm": 0.011067772284150124, |
|
"learning_rate": 4.451555555555556e-06, |
|
"loss": 0.0002, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 320.0, |
|
"eval_loss": 1.575042724609375, |
|
"eval_runtime": 42.1507, |
|
"eval_samples_per_second": 5.931, |
|
"eval_steps_per_second": 1.495, |
|
"eval_wer": 0.5189158667419537, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 321.6, |
|
"grad_norm": 0.002925801556557417, |
|
"learning_rate": 4.362666666666667e-06, |
|
"loss": 0.0004, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 323.2, |
|
"grad_norm": 0.03247452154755592, |
|
"learning_rate": 4.2737777777777785e-06, |
|
"loss": 0.0002, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 324.8, |
|
"grad_norm": 0.017400383949279785, |
|
"learning_rate": 4.18488888888889e-06, |
|
"loss": 0.0002, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 326.4, |
|
"grad_norm": 0.0033240150660276413, |
|
"learning_rate": 4.096e-06, |
|
"loss": 0.0001, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 328.0, |
|
"grad_norm": 0.009032553061842918, |
|
"learning_rate": 4.0071111111111116e-06, |
|
"loss": 0.0004, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 328.0, |
|
"eval_loss": 1.619897484779358, |
|
"eval_runtime": 44.7114, |
|
"eval_samples_per_second": 5.591, |
|
"eval_steps_per_second": 1.409, |
|
"eval_wer": 0.5979672501411631, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 329.6, |
|
"grad_norm": 0.0036150990054011345, |
|
"learning_rate": 3.918222222222223e-06, |
|
"loss": 0.0002, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 331.2, |
|
"grad_norm": 0.029861323535442352, |
|
"learning_rate": 3.829333333333334e-06, |
|
"loss": 0.0002, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 332.8, |
|
"grad_norm": 0.02380683459341526, |
|
"learning_rate": 3.740444444444445e-06, |
|
"loss": 0.0001, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 334.4, |
|
"grad_norm": 0.031281813979148865, |
|
"learning_rate": 3.651555555555556e-06, |
|
"loss": 0.0002, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 336.0, |
|
"grad_norm": 0.0028394584078341722, |
|
"learning_rate": 3.5626666666666672e-06, |
|
"loss": 0.0001, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 336.0, |
|
"eval_loss": 1.5782560110092163, |
|
"eval_runtime": 42.7351, |
|
"eval_samples_per_second": 5.85, |
|
"eval_steps_per_second": 1.474, |
|
"eval_wer": 0.5352907961603613, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 337.6, |
|
"grad_norm": 0.11405123770236969, |
|
"learning_rate": 3.473777777777778e-06, |
|
"loss": 0.0002, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 339.2, |
|
"grad_norm": 0.022922609001398087, |
|
"learning_rate": 3.3848888888888894e-06, |
|
"loss": 0.0003, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 340.8, |
|
"grad_norm": 0.0035330464597791433, |
|
"learning_rate": 3.2960000000000003e-06, |
|
"loss": 0.0002, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 342.4, |
|
"grad_norm": 0.0057142325676977634, |
|
"learning_rate": 3.2071111111111116e-06, |
|
"loss": 0.0003, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 344.0, |
|
"grad_norm": 0.009726474992930889, |
|
"learning_rate": 3.1182222222222225e-06, |
|
"loss": 0.0001, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 344.0, |
|
"eval_loss": 1.5897696018218994, |
|
"eval_runtime": 42.1045, |
|
"eval_samples_per_second": 5.938, |
|
"eval_steps_per_second": 1.496, |
|
"eval_wer": 0.5098814229249012, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 345.6, |
|
"grad_norm": 0.002822835696861148, |
|
"learning_rate": 3.0293333333333338e-06, |
|
"loss": 0.0001, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 347.2, |
|
"grad_norm": 0.18968430161476135, |
|
"learning_rate": 2.9404444444444447e-06, |
|
"loss": 0.0003, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 348.8, |
|
"grad_norm": 0.01890464872121811, |
|
"learning_rate": 2.8515555555555555e-06, |
|
"loss": 0.0004, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 350.4, |
|
"grad_norm": 0.01565505564212799, |
|
"learning_rate": 2.7631111111111113e-06, |
|
"loss": 0.0006, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 352.0, |
|
"grad_norm": 0.031101664528250694, |
|
"learning_rate": 2.6742222222222226e-06, |
|
"loss": 0.0005, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 352.0, |
|
"eval_loss": 1.6004997491836548, |
|
"eval_runtime": 43.7134, |
|
"eval_samples_per_second": 5.719, |
|
"eval_steps_per_second": 1.441, |
|
"eval_wer": 0.5832862789384529, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 353.6, |
|
"grad_norm": 0.0026319867465645075, |
|
"learning_rate": 2.5853333333333335e-06, |
|
"loss": 0.0002, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 355.2, |
|
"grad_norm": 0.0039254105649888515, |
|
"learning_rate": 2.4964444444444448e-06, |
|
"loss": 0.0002, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 356.8, |
|
"grad_norm": 0.10020512342453003, |
|
"learning_rate": 2.4075555555555556e-06, |
|
"loss": 0.0002, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 358.4, |
|
"grad_norm": 0.0036830666940659285, |
|
"learning_rate": 2.318666666666667e-06, |
|
"loss": 0.0001, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 360.0, |
|
"grad_norm": 0.008613605052232742, |
|
"learning_rate": 2.229777777777778e-06, |
|
"loss": 0.0002, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 360.0, |
|
"eval_loss": 1.5903429985046387, |
|
"eval_runtime": 41.6423, |
|
"eval_samples_per_second": 6.004, |
|
"eval_steps_per_second": 1.513, |
|
"eval_wer": 0.4872953133822699, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 361.6, |
|
"grad_norm": 0.0025289321783930063, |
|
"learning_rate": 2.140888888888889e-06, |
|
"loss": 0.0003, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 363.2, |
|
"grad_norm": 0.004556621424853802, |
|
"learning_rate": 2.052e-06, |
|
"loss": 0.0002, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 364.8, |
|
"grad_norm": 0.06288538128137589, |
|
"learning_rate": 1.9631111111111113e-06, |
|
"loss": 0.0002, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 366.4, |
|
"grad_norm": 0.002961724065244198, |
|
"learning_rate": 1.8742222222222222e-06, |
|
"loss": 0.0001, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 368.0, |
|
"grad_norm": 0.0020394865423440933, |
|
"learning_rate": 1.7853333333333333e-06, |
|
"loss": 0.0002, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 368.0, |
|
"eval_loss": 1.619638442993164, |
|
"eval_runtime": 42.1006, |
|
"eval_samples_per_second": 5.938, |
|
"eval_steps_per_second": 1.496, |
|
"eval_wer": 0.5149632975719932, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 369.6, |
|
"grad_norm": 0.0018333194311708212, |
|
"learning_rate": 1.6964444444444444e-06, |
|
"loss": 0.0001, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 371.2, |
|
"grad_norm": 0.00915595330297947, |
|
"learning_rate": 1.6075555555555559e-06, |
|
"loss": 0.0001, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 372.8, |
|
"grad_norm": 0.006657126825302839, |
|
"learning_rate": 1.518666666666667e-06, |
|
"loss": 0.0001, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 374.4, |
|
"grad_norm": 0.1112598404288292, |
|
"learning_rate": 1.429777777777778e-06, |
|
"loss": 0.0001, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 376.0, |
|
"grad_norm": 0.004303216468542814, |
|
"learning_rate": 1.3408888888888892e-06, |
|
"loss": 0.0001, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 376.0, |
|
"eval_loss": 1.6212029457092285, |
|
"eval_runtime": 42.453, |
|
"eval_samples_per_second": 5.889, |
|
"eval_steps_per_second": 1.484, |
|
"eval_wer": 0.5251270468661773, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 377.6, |
|
"grad_norm": 0.003241022815927863, |
|
"learning_rate": 1.2520000000000003e-06, |
|
"loss": 0.0001, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 379.2, |
|
"grad_norm": 0.008798100054264069, |
|
"learning_rate": 1.1631111111111113e-06, |
|
"loss": 0.0001, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 380.8, |
|
"grad_norm": 0.013746335171163082, |
|
"learning_rate": 1.0742222222222224e-06, |
|
"loss": 0.0001, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 382.4, |
|
"grad_norm": 0.0022988717537373304, |
|
"learning_rate": 9.853333333333333e-07, |
|
"loss": 0.0001, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 384.0, |
|
"grad_norm": 0.0472625233232975, |
|
"learning_rate": 8.964444444444445e-07, |
|
"loss": 0.0002, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 384.0, |
|
"eval_loss": 1.618031620979309, |
|
"eval_runtime": 43.7587, |
|
"eval_samples_per_second": 5.713, |
|
"eval_steps_per_second": 1.44, |
|
"eval_wer": 0.5539243365330322, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 385.6, |
|
"grad_norm": 0.0067636617459356785, |
|
"learning_rate": 8.075555555555556e-07, |
|
"loss": 0.0001, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 387.2, |
|
"grad_norm": 0.0014746059896424413, |
|
"learning_rate": 7.186666666666667e-07, |
|
"loss": 0.0001, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 388.8, |
|
"grad_norm": 0.03201240673661232, |
|
"learning_rate": 6.297777777777778e-07, |
|
"loss": 0.0001, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 390.4, |
|
"grad_norm": 0.0024084942415356636, |
|
"learning_rate": 5.408888888888889e-07, |
|
"loss": 0.0001, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 392.0, |
|
"grad_norm": 0.0010368505027145147, |
|
"learning_rate": 4.5244444444444445e-07, |
|
"loss": 0.0001, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 392.0, |
|
"eval_loss": 1.6104307174682617, |
|
"eval_runtime": 41.3895, |
|
"eval_samples_per_second": 6.04, |
|
"eval_steps_per_second": 1.522, |
|
"eval_wer": 0.49632975719932243, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 393.6, |
|
"grad_norm": 0.6977916359901428, |
|
"learning_rate": 3.635555555555556e-07, |
|
"loss": 0.0001, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 395.2, |
|
"grad_norm": 0.0007687339093536139, |
|
"learning_rate": 2.746666666666667e-07, |
|
"loss": 0.0001, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 396.8, |
|
"grad_norm": 0.009420102462172508, |
|
"learning_rate": 1.8577777777777778e-07, |
|
"loss": 0.0001, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 398.4, |
|
"grad_norm": 0.0071104601956903934, |
|
"learning_rate": 9.68888888888889e-08, |
|
"loss": 0.0001, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 400.0, |
|
"grad_norm": 0.0014255736023187637, |
|
"learning_rate": 8e-09, |
|
"loss": 0.0001, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 400.0, |
|
"eval_loss": 1.6090388298034668, |
|
"eval_runtime": 42.8761, |
|
"eval_samples_per_second": 5.831, |
|
"eval_steps_per_second": 1.469, |
|
"eval_wer": 0.510446075663467, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 400.0, |
|
"step": 50000, |
|
"total_flos": 1.9695108096e+19, |
|
"train_loss": 0.05668430684482679, |
|
"train_runtime": 102928.0936, |
|
"train_samples_per_second": 7.772, |
|
"train_steps_per_second": 0.486 |
|
} |
|
], |
|
"logging_steps": 200, |
|
"max_steps": 50000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 400, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.9695108096e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|