|
{ |
|
"best_metric": 0.5404770374298096, |
|
"best_model_checkpoint": "/scratch/skscla001/speech/results/whisper-medium-swagen-female-model/checkpoint-1200", |
|
"epoch": 4.225616921269095, |
|
"eval_steps": 200, |
|
"global_step": 1800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05875440658049354, |
|
"grad_norm": 73.04696655273438, |
|
"learning_rate": 4.2000000000000006e-07, |
|
"loss": 5.3434, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.11750881316098707, |
|
"grad_norm": Infinity, |
|
"learning_rate": 9.000000000000001e-07, |
|
"loss": 4.1395, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1762632197414806, |
|
"grad_norm": 34.922523498535156, |
|
"learning_rate": 1.4000000000000001e-06, |
|
"loss": 3.1888, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.23501762632197415, |
|
"grad_norm": 34.749629974365234, |
|
"learning_rate": 1.9000000000000002e-06, |
|
"loss": 2.3154, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2937720329024677, |
|
"grad_norm": 33.44667053222656, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 1.691, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.3525264394829612, |
|
"grad_norm": 38.09309768676758, |
|
"learning_rate": 2.9e-06, |
|
"loss": 1.5853, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.4112808460634548, |
|
"grad_norm": 31.004915237426758, |
|
"learning_rate": 3.4000000000000005e-06, |
|
"loss": 1.522, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.4700352526439483, |
|
"grad_norm": 31.333637237548828, |
|
"learning_rate": 3.900000000000001e-06, |
|
"loss": 1.4291, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4700352526439483, |
|
"eval_loss": 0.8216601610183716, |
|
"eval_runtime": 224.1212, |
|
"eval_samples_per_second": 2.579, |
|
"eval_steps_per_second": 0.647, |
|
"eval_wer": 0.49672320740169623, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5287896592244419, |
|
"grad_norm": 34.124698638916016, |
|
"learning_rate": 4.4e-06, |
|
"loss": 1.2954, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.5875440658049353, |
|
"grad_norm": 38.50987243652344, |
|
"learning_rate": 4.9000000000000005e-06, |
|
"loss": 1.2541, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6462984723854289, |
|
"grad_norm": 38.04581069946289, |
|
"learning_rate": 5.400000000000001e-06, |
|
"loss": 1.1264, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.7050528789659224, |
|
"grad_norm": 28.410049438476562, |
|
"learning_rate": 5.9e-06, |
|
"loss": 1.1311, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.763807285546416, |
|
"grad_norm": 30.25857925415039, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 1.0838, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.8225616921269095, |
|
"grad_norm": 31.186445236206055, |
|
"learning_rate": 6.9e-06, |
|
"loss": 1.1503, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.881316098707403, |
|
"grad_norm": 24.111587524414062, |
|
"learning_rate": 7.4e-06, |
|
"loss": 1.0892, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.9400705052878966, |
|
"grad_norm": 22.854475021362305, |
|
"learning_rate": 7.9e-06, |
|
"loss": 0.9176, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9400705052878966, |
|
"eval_loss": 0.634784460067749, |
|
"eval_runtime": 226.4974, |
|
"eval_samples_per_second": 2.552, |
|
"eval_steps_per_second": 0.64, |
|
"eval_wer": 0.42656129529683884, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9988249118683902, |
|
"grad_norm": 36.47032928466797, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 1.0234, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.0564042303172738, |
|
"grad_norm": 21.01776123046875, |
|
"learning_rate": 8.900000000000001e-06, |
|
"loss": 0.5964, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.1151586368977673, |
|
"grad_norm": 17.60657501220703, |
|
"learning_rate": 9.4e-06, |
|
"loss": 0.7129, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.1739130434782608, |
|
"grad_norm": 19.744020462036133, |
|
"learning_rate": 9.9e-06, |
|
"loss": 0.5569, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.2326674500587544, |
|
"grad_norm": 15.34373664855957, |
|
"learning_rate": 9.955555555555556e-06, |
|
"loss": 0.5868, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.291421856639248, |
|
"grad_norm": 26.74762535095215, |
|
"learning_rate": 9.9e-06, |
|
"loss": 0.6199, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.3501762632197414, |
|
"grad_norm": 13.160558700561523, |
|
"learning_rate": 9.844444444444446e-06, |
|
"loss": 0.557, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.408930669800235, |
|
"grad_norm": 14.749505043029785, |
|
"learning_rate": 9.78888888888889e-06, |
|
"loss": 0.5492, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.408930669800235, |
|
"eval_loss": 0.5867528319358826, |
|
"eval_runtime": 230.3509, |
|
"eval_samples_per_second": 2.509, |
|
"eval_steps_per_second": 0.629, |
|
"eval_wer": 0.40940632228218965, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.4676850763807285, |
|
"grad_norm": 16.107746124267578, |
|
"learning_rate": 9.733333333333334e-06, |
|
"loss": 0.5442, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.526439482961222, |
|
"grad_norm": 29.692367553710938, |
|
"learning_rate": 9.677777777777778e-06, |
|
"loss": 0.6473, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.5851938895417157, |
|
"grad_norm": 18.0280704498291, |
|
"learning_rate": 9.622222222222222e-06, |
|
"loss": 0.5739, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.6439482961222092, |
|
"grad_norm": 27.483768463134766, |
|
"learning_rate": 9.566666666666668e-06, |
|
"loss": 0.6705, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.7027027027027026, |
|
"grad_norm": 20.081012725830078, |
|
"learning_rate": 9.511111111111112e-06, |
|
"loss": 0.6338, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.7614571092831963, |
|
"grad_norm": 22.985151290893555, |
|
"learning_rate": 9.455555555555557e-06, |
|
"loss": 0.6409, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.8202115158636898, |
|
"grad_norm": 19.641082763671875, |
|
"learning_rate": 9.4e-06, |
|
"loss": 0.6777, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.8789659224441833, |
|
"grad_norm": 27.92637825012207, |
|
"learning_rate": 9.344444444444446e-06, |
|
"loss": 0.6243, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.8789659224441833, |
|
"eval_loss": 0.5535122752189636, |
|
"eval_runtime": 218.1215, |
|
"eval_samples_per_second": 2.65, |
|
"eval_steps_per_second": 0.665, |
|
"eval_wer": 0.3274865073245952, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.937720329024677, |
|
"grad_norm": 24.763235092163086, |
|
"learning_rate": 9.28888888888889e-06, |
|
"loss": 0.5296, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.9964747356051704, |
|
"grad_norm": 22.559097290039062, |
|
"learning_rate": 9.233333333333334e-06, |
|
"loss": 0.6109, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.054054054054054, |
|
"grad_norm": 9.163862228393555, |
|
"learning_rate": 9.17777777777778e-06, |
|
"loss": 0.2217, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 2.1128084606345476, |
|
"grad_norm": 14.249441146850586, |
|
"learning_rate": 9.122222222222223e-06, |
|
"loss": 0.2233, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.1715628672150413, |
|
"grad_norm": 12.58755111694336, |
|
"learning_rate": 9.066666666666667e-06, |
|
"loss": 0.2103, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 2.2303172737955346, |
|
"grad_norm": 12.163936614990234, |
|
"learning_rate": 9.011111111111111e-06, |
|
"loss": 0.2184, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.2890716803760283, |
|
"grad_norm": 24.493022918701172, |
|
"learning_rate": 8.955555555555555e-06, |
|
"loss": 0.2056, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 2.3478260869565215, |
|
"grad_norm": 16.613021850585938, |
|
"learning_rate": 8.900000000000001e-06, |
|
"loss": 0.2196, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.3478260869565215, |
|
"eval_loss": 0.5643105506896973, |
|
"eval_runtime": 222.0189, |
|
"eval_samples_per_second": 2.603, |
|
"eval_steps_per_second": 0.653, |
|
"eval_wer": 0.35774865073245954, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.406580493537015, |
|
"grad_norm": 9.612153053283691, |
|
"learning_rate": 8.844444444444445e-06, |
|
"loss": 0.2273, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 2.465334900117509, |
|
"grad_norm": 8.410961151123047, |
|
"learning_rate": 8.788888888888891e-06, |
|
"loss": 0.2034, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.524089306698002, |
|
"grad_norm": 21.08755874633789, |
|
"learning_rate": 8.733333333333333e-06, |
|
"loss": 0.2239, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 2.582843713278496, |
|
"grad_norm": 14.133148193359375, |
|
"learning_rate": 8.677777777777779e-06, |
|
"loss": 0.2086, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.6415981198589895, |
|
"grad_norm": 6.469407558441162, |
|
"learning_rate": 8.622222222222223e-06, |
|
"loss": 0.2349, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 2.7003525264394828, |
|
"grad_norm": 12.72140121459961, |
|
"learning_rate": 8.566666666666667e-06, |
|
"loss": 0.2101, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.7591069330199764, |
|
"grad_norm": 7.142697334289551, |
|
"learning_rate": 8.511111111111113e-06, |
|
"loss": 0.2615, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 2.81786133960047, |
|
"grad_norm": 11.419723510742188, |
|
"learning_rate": 8.455555555555555e-06, |
|
"loss": 0.2211, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.81786133960047, |
|
"eval_loss": 0.5404770374298096, |
|
"eval_runtime": 217.6495, |
|
"eval_samples_per_second": 2.656, |
|
"eval_steps_per_second": 0.666, |
|
"eval_wer": 0.33982266769468006, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.8766157461809634, |
|
"grad_norm": 8.013970375061035, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 0.2023, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 2.935370152761457, |
|
"grad_norm": 11.708134651184082, |
|
"learning_rate": 8.344444444444445e-06, |
|
"loss": 0.2406, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.9941245593419508, |
|
"grad_norm": 13.683923721313477, |
|
"learning_rate": 8.288888888888889e-06, |
|
"loss": 0.2345, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 3.0517038777908345, |
|
"grad_norm": 4.092015743255615, |
|
"learning_rate": 8.233333333333335e-06, |
|
"loss": 0.0736, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.1104582843713278, |
|
"grad_norm": 7.464056015014648, |
|
"learning_rate": 8.177777777777779e-06, |
|
"loss": 0.0939, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 3.1692126909518215, |
|
"grad_norm": 6.3624677658081055, |
|
"learning_rate": 8.122222222222223e-06, |
|
"loss": 0.0829, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 3.227967097532315, |
|
"grad_norm": 9.64356803894043, |
|
"learning_rate": 8.066666666666667e-06, |
|
"loss": 0.0939, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 3.2867215041128084, |
|
"grad_norm": 9.63764476776123, |
|
"learning_rate": 8.011111111111113e-06, |
|
"loss": 0.0999, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.2867215041128084, |
|
"eval_loss": 0.5825645923614502, |
|
"eval_runtime": 218.2885, |
|
"eval_samples_per_second": 2.648, |
|
"eval_steps_per_second": 0.664, |
|
"eval_wer": 0.3282575173477255, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.345475910693302, |
|
"grad_norm": 6.401581764221191, |
|
"learning_rate": 7.955555555555557e-06, |
|
"loss": 0.0829, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 3.4042303172737958, |
|
"grad_norm": 6.341639041900635, |
|
"learning_rate": 7.9e-06, |
|
"loss": 0.0914, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 3.462984723854289, |
|
"grad_norm": 5.883626937866211, |
|
"learning_rate": 7.844444444444446e-06, |
|
"loss": 0.1057, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 3.5217391304347827, |
|
"grad_norm": 10.137381553649902, |
|
"learning_rate": 7.788888888888889e-06, |
|
"loss": 0.0948, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.5804935370152764, |
|
"grad_norm": 3.2198357582092285, |
|
"learning_rate": 7.733333333333334e-06, |
|
"loss": 0.0916, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 3.6392479435957696, |
|
"grad_norm": 4.958520412445068, |
|
"learning_rate": 7.677777777777778e-06, |
|
"loss": 0.0847, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 3.6980023501762633, |
|
"grad_norm": 7.396123886108398, |
|
"learning_rate": 7.622222222222223e-06, |
|
"loss": 0.1091, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 3.756756756756757, |
|
"grad_norm": 8.138400077819824, |
|
"learning_rate": 7.566666666666667e-06, |
|
"loss": 0.1111, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.756756756756757, |
|
"eval_loss": 0.5536873936653137, |
|
"eval_runtime": 217.469, |
|
"eval_samples_per_second": 2.658, |
|
"eval_steps_per_second": 0.667, |
|
"eval_wer": 0.3276792598303778, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.8155111633372503, |
|
"grad_norm": 7.745422840118408, |
|
"learning_rate": 7.511111111111111e-06, |
|
"loss": 0.1031, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 3.874265569917744, |
|
"grad_norm": 7.68823766708374, |
|
"learning_rate": 7.455555555555556e-06, |
|
"loss": 0.0919, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 3.933019976498237, |
|
"grad_norm": 4.057219505310059, |
|
"learning_rate": 7.4e-06, |
|
"loss": 0.1069, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 3.991774383078731, |
|
"grad_norm": 7.178942680358887, |
|
"learning_rate": 7.344444444444445e-06, |
|
"loss": 0.0951, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 4.049353701527615, |
|
"grad_norm": 5.237444877624512, |
|
"learning_rate": 7.28888888888889e-06, |
|
"loss": 0.0495, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 4.108108108108108, |
|
"grad_norm": 7.6568684577941895, |
|
"learning_rate": 7.233333333333334e-06, |
|
"loss": 0.0546, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 4.166862514688602, |
|
"grad_norm": 5.4397149085998535, |
|
"learning_rate": 7.177777777777778e-06, |
|
"loss": 0.0521, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 4.225616921269095, |
|
"grad_norm": 7.218142032623291, |
|
"learning_rate": 7.122222222222222e-06, |
|
"loss": 0.0423, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.225616921269095, |
|
"eval_loss": 0.6012063026428223, |
|
"eval_runtime": 217.9147, |
|
"eval_samples_per_second": 2.652, |
|
"eval_steps_per_second": 0.665, |
|
"eval_wer": 0.31881264456437935, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.225616921269095, |
|
"step": 1800, |
|
"total_flos": 1.468038098976768e+19, |
|
"train_loss": 0.6555014891094632, |
|
"train_runtime": 4980.8387, |
|
"train_samples_per_second": 8.031, |
|
"train_steps_per_second": 1.004 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 12, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 3 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.468038098976768e+19, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|