csikasote's picture
End of training
79fd0d5 verified
{
"best_metric": 0.5404770374298096,
"best_model_checkpoint": "/scratch/skscla001/speech/results/whisper-medium-swagen-female-model/checkpoint-1200",
"epoch": 4.225616921269095,
"eval_steps": 200,
"global_step": 1800,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05875440658049354,
"grad_norm": 73.04696655273438,
"learning_rate": 4.2000000000000006e-07,
"loss": 5.3434,
"step": 25
},
{
"epoch": 0.11750881316098707,
"grad_norm": Infinity,
"learning_rate": 9.000000000000001e-07,
"loss": 4.1395,
"step": 50
},
{
"epoch": 0.1762632197414806,
"grad_norm": 34.922523498535156,
"learning_rate": 1.4000000000000001e-06,
"loss": 3.1888,
"step": 75
},
{
"epoch": 0.23501762632197415,
"grad_norm": 34.749629974365234,
"learning_rate": 1.9000000000000002e-06,
"loss": 2.3154,
"step": 100
},
{
"epoch": 0.2937720329024677,
"grad_norm": 33.44667053222656,
"learning_rate": 2.4000000000000003e-06,
"loss": 1.691,
"step": 125
},
{
"epoch": 0.3525264394829612,
"grad_norm": 38.09309768676758,
"learning_rate": 2.9e-06,
"loss": 1.5853,
"step": 150
},
{
"epoch": 0.4112808460634548,
"grad_norm": 31.004915237426758,
"learning_rate": 3.4000000000000005e-06,
"loss": 1.522,
"step": 175
},
{
"epoch": 0.4700352526439483,
"grad_norm": 31.333637237548828,
"learning_rate": 3.900000000000001e-06,
"loss": 1.4291,
"step": 200
},
{
"epoch": 0.4700352526439483,
"eval_loss": 0.8216601610183716,
"eval_runtime": 224.1212,
"eval_samples_per_second": 2.579,
"eval_steps_per_second": 0.647,
"eval_wer": 0.49672320740169623,
"step": 200
},
{
"epoch": 0.5287896592244419,
"grad_norm": 34.124698638916016,
"learning_rate": 4.4e-06,
"loss": 1.2954,
"step": 225
},
{
"epoch": 0.5875440658049353,
"grad_norm": 38.50987243652344,
"learning_rate": 4.9000000000000005e-06,
"loss": 1.2541,
"step": 250
},
{
"epoch": 0.6462984723854289,
"grad_norm": 38.04581069946289,
"learning_rate": 5.400000000000001e-06,
"loss": 1.1264,
"step": 275
},
{
"epoch": 0.7050528789659224,
"grad_norm": 28.410049438476562,
"learning_rate": 5.9e-06,
"loss": 1.1311,
"step": 300
},
{
"epoch": 0.763807285546416,
"grad_norm": 30.25857925415039,
"learning_rate": 6.4000000000000006e-06,
"loss": 1.0838,
"step": 325
},
{
"epoch": 0.8225616921269095,
"grad_norm": 31.186445236206055,
"learning_rate": 6.9e-06,
"loss": 1.1503,
"step": 350
},
{
"epoch": 0.881316098707403,
"grad_norm": 24.111587524414062,
"learning_rate": 7.4e-06,
"loss": 1.0892,
"step": 375
},
{
"epoch": 0.9400705052878966,
"grad_norm": 22.854475021362305,
"learning_rate": 7.9e-06,
"loss": 0.9176,
"step": 400
},
{
"epoch": 0.9400705052878966,
"eval_loss": 0.634784460067749,
"eval_runtime": 226.4974,
"eval_samples_per_second": 2.552,
"eval_steps_per_second": 0.64,
"eval_wer": 0.42656129529683884,
"step": 400
},
{
"epoch": 0.9988249118683902,
"grad_norm": 36.47032928466797,
"learning_rate": 8.400000000000001e-06,
"loss": 1.0234,
"step": 425
},
{
"epoch": 1.0564042303172738,
"grad_norm": 21.01776123046875,
"learning_rate": 8.900000000000001e-06,
"loss": 0.5964,
"step": 450
},
{
"epoch": 1.1151586368977673,
"grad_norm": 17.60657501220703,
"learning_rate": 9.4e-06,
"loss": 0.7129,
"step": 475
},
{
"epoch": 1.1739130434782608,
"grad_norm": 19.744020462036133,
"learning_rate": 9.9e-06,
"loss": 0.5569,
"step": 500
},
{
"epoch": 1.2326674500587544,
"grad_norm": 15.34373664855957,
"learning_rate": 9.955555555555556e-06,
"loss": 0.5868,
"step": 525
},
{
"epoch": 1.291421856639248,
"grad_norm": 26.74762535095215,
"learning_rate": 9.9e-06,
"loss": 0.6199,
"step": 550
},
{
"epoch": 1.3501762632197414,
"grad_norm": 13.160558700561523,
"learning_rate": 9.844444444444446e-06,
"loss": 0.557,
"step": 575
},
{
"epoch": 1.408930669800235,
"grad_norm": 14.749505043029785,
"learning_rate": 9.78888888888889e-06,
"loss": 0.5492,
"step": 600
},
{
"epoch": 1.408930669800235,
"eval_loss": 0.5867528319358826,
"eval_runtime": 230.3509,
"eval_samples_per_second": 2.509,
"eval_steps_per_second": 0.629,
"eval_wer": 0.40940632228218965,
"step": 600
},
{
"epoch": 1.4676850763807285,
"grad_norm": 16.107746124267578,
"learning_rate": 9.733333333333334e-06,
"loss": 0.5442,
"step": 625
},
{
"epoch": 1.526439482961222,
"grad_norm": 29.692367553710938,
"learning_rate": 9.677777777777778e-06,
"loss": 0.6473,
"step": 650
},
{
"epoch": 1.5851938895417157,
"grad_norm": 18.0280704498291,
"learning_rate": 9.622222222222222e-06,
"loss": 0.5739,
"step": 675
},
{
"epoch": 1.6439482961222092,
"grad_norm": 27.483768463134766,
"learning_rate": 9.566666666666668e-06,
"loss": 0.6705,
"step": 700
},
{
"epoch": 1.7027027027027026,
"grad_norm": 20.081012725830078,
"learning_rate": 9.511111111111112e-06,
"loss": 0.6338,
"step": 725
},
{
"epoch": 1.7614571092831963,
"grad_norm": 22.985151290893555,
"learning_rate": 9.455555555555557e-06,
"loss": 0.6409,
"step": 750
},
{
"epoch": 1.8202115158636898,
"grad_norm": 19.641082763671875,
"learning_rate": 9.4e-06,
"loss": 0.6777,
"step": 775
},
{
"epoch": 1.8789659224441833,
"grad_norm": 27.92637825012207,
"learning_rate": 9.344444444444446e-06,
"loss": 0.6243,
"step": 800
},
{
"epoch": 1.8789659224441833,
"eval_loss": 0.5535122752189636,
"eval_runtime": 218.1215,
"eval_samples_per_second": 2.65,
"eval_steps_per_second": 0.665,
"eval_wer": 0.3274865073245952,
"step": 800
},
{
"epoch": 1.937720329024677,
"grad_norm": 24.763235092163086,
"learning_rate": 9.28888888888889e-06,
"loss": 0.5296,
"step": 825
},
{
"epoch": 1.9964747356051704,
"grad_norm": 22.559097290039062,
"learning_rate": 9.233333333333334e-06,
"loss": 0.6109,
"step": 850
},
{
"epoch": 2.054054054054054,
"grad_norm": 9.163862228393555,
"learning_rate": 9.17777777777778e-06,
"loss": 0.2217,
"step": 875
},
{
"epoch": 2.1128084606345476,
"grad_norm": 14.249441146850586,
"learning_rate": 9.122222222222223e-06,
"loss": 0.2233,
"step": 900
},
{
"epoch": 2.1715628672150413,
"grad_norm": 12.58755111694336,
"learning_rate": 9.066666666666667e-06,
"loss": 0.2103,
"step": 925
},
{
"epoch": 2.2303172737955346,
"grad_norm": 12.163936614990234,
"learning_rate": 9.011111111111111e-06,
"loss": 0.2184,
"step": 950
},
{
"epoch": 2.2890716803760283,
"grad_norm": 24.493022918701172,
"learning_rate": 8.955555555555555e-06,
"loss": 0.2056,
"step": 975
},
{
"epoch": 2.3478260869565215,
"grad_norm": 16.613021850585938,
"learning_rate": 8.900000000000001e-06,
"loss": 0.2196,
"step": 1000
},
{
"epoch": 2.3478260869565215,
"eval_loss": 0.5643105506896973,
"eval_runtime": 222.0189,
"eval_samples_per_second": 2.603,
"eval_steps_per_second": 0.653,
"eval_wer": 0.35774865073245954,
"step": 1000
},
{
"epoch": 2.406580493537015,
"grad_norm": 9.612153053283691,
"learning_rate": 8.844444444444445e-06,
"loss": 0.2273,
"step": 1025
},
{
"epoch": 2.465334900117509,
"grad_norm": 8.410961151123047,
"learning_rate": 8.788888888888891e-06,
"loss": 0.2034,
"step": 1050
},
{
"epoch": 2.524089306698002,
"grad_norm": 21.08755874633789,
"learning_rate": 8.733333333333333e-06,
"loss": 0.2239,
"step": 1075
},
{
"epoch": 2.582843713278496,
"grad_norm": 14.133148193359375,
"learning_rate": 8.677777777777779e-06,
"loss": 0.2086,
"step": 1100
},
{
"epoch": 2.6415981198589895,
"grad_norm": 6.469407558441162,
"learning_rate": 8.622222222222223e-06,
"loss": 0.2349,
"step": 1125
},
{
"epoch": 2.7003525264394828,
"grad_norm": 12.72140121459961,
"learning_rate": 8.566666666666667e-06,
"loss": 0.2101,
"step": 1150
},
{
"epoch": 2.7591069330199764,
"grad_norm": 7.142697334289551,
"learning_rate": 8.511111111111113e-06,
"loss": 0.2615,
"step": 1175
},
{
"epoch": 2.81786133960047,
"grad_norm": 11.419723510742188,
"learning_rate": 8.455555555555555e-06,
"loss": 0.2211,
"step": 1200
},
{
"epoch": 2.81786133960047,
"eval_loss": 0.5404770374298096,
"eval_runtime": 217.6495,
"eval_samples_per_second": 2.656,
"eval_steps_per_second": 0.666,
"eval_wer": 0.33982266769468006,
"step": 1200
},
{
"epoch": 2.8766157461809634,
"grad_norm": 8.013970375061035,
"learning_rate": 8.400000000000001e-06,
"loss": 0.2023,
"step": 1225
},
{
"epoch": 2.935370152761457,
"grad_norm": 11.708134651184082,
"learning_rate": 8.344444444444445e-06,
"loss": 0.2406,
"step": 1250
},
{
"epoch": 2.9941245593419508,
"grad_norm": 13.683923721313477,
"learning_rate": 8.288888888888889e-06,
"loss": 0.2345,
"step": 1275
},
{
"epoch": 3.0517038777908345,
"grad_norm": 4.092015743255615,
"learning_rate": 8.233333333333335e-06,
"loss": 0.0736,
"step": 1300
},
{
"epoch": 3.1104582843713278,
"grad_norm": 7.464056015014648,
"learning_rate": 8.177777777777779e-06,
"loss": 0.0939,
"step": 1325
},
{
"epoch": 3.1692126909518215,
"grad_norm": 6.3624677658081055,
"learning_rate": 8.122222222222223e-06,
"loss": 0.0829,
"step": 1350
},
{
"epoch": 3.227967097532315,
"grad_norm": 9.64356803894043,
"learning_rate": 8.066666666666667e-06,
"loss": 0.0939,
"step": 1375
},
{
"epoch": 3.2867215041128084,
"grad_norm": 9.63764476776123,
"learning_rate": 8.011111111111113e-06,
"loss": 0.0999,
"step": 1400
},
{
"epoch": 3.2867215041128084,
"eval_loss": 0.5825645923614502,
"eval_runtime": 218.2885,
"eval_samples_per_second": 2.648,
"eval_steps_per_second": 0.664,
"eval_wer": 0.3282575173477255,
"step": 1400
},
{
"epoch": 3.345475910693302,
"grad_norm": 6.401581764221191,
"learning_rate": 7.955555555555557e-06,
"loss": 0.0829,
"step": 1425
},
{
"epoch": 3.4042303172737958,
"grad_norm": 6.341639041900635,
"learning_rate": 7.9e-06,
"loss": 0.0914,
"step": 1450
},
{
"epoch": 3.462984723854289,
"grad_norm": 5.883626937866211,
"learning_rate": 7.844444444444446e-06,
"loss": 0.1057,
"step": 1475
},
{
"epoch": 3.5217391304347827,
"grad_norm": 10.137381553649902,
"learning_rate": 7.788888888888889e-06,
"loss": 0.0948,
"step": 1500
},
{
"epoch": 3.5804935370152764,
"grad_norm": 3.2198357582092285,
"learning_rate": 7.733333333333334e-06,
"loss": 0.0916,
"step": 1525
},
{
"epoch": 3.6392479435957696,
"grad_norm": 4.958520412445068,
"learning_rate": 7.677777777777778e-06,
"loss": 0.0847,
"step": 1550
},
{
"epoch": 3.6980023501762633,
"grad_norm": 7.396123886108398,
"learning_rate": 7.622222222222223e-06,
"loss": 0.1091,
"step": 1575
},
{
"epoch": 3.756756756756757,
"grad_norm": 8.138400077819824,
"learning_rate": 7.566666666666667e-06,
"loss": 0.1111,
"step": 1600
},
{
"epoch": 3.756756756756757,
"eval_loss": 0.5536873936653137,
"eval_runtime": 217.469,
"eval_samples_per_second": 2.658,
"eval_steps_per_second": 0.667,
"eval_wer": 0.3276792598303778,
"step": 1600
},
{
"epoch": 3.8155111633372503,
"grad_norm": 7.745422840118408,
"learning_rate": 7.511111111111111e-06,
"loss": 0.1031,
"step": 1625
},
{
"epoch": 3.874265569917744,
"grad_norm": 7.68823766708374,
"learning_rate": 7.455555555555556e-06,
"loss": 0.0919,
"step": 1650
},
{
"epoch": 3.933019976498237,
"grad_norm": 4.057219505310059,
"learning_rate": 7.4e-06,
"loss": 0.1069,
"step": 1675
},
{
"epoch": 3.991774383078731,
"grad_norm": 7.178942680358887,
"learning_rate": 7.344444444444445e-06,
"loss": 0.0951,
"step": 1700
},
{
"epoch": 4.049353701527615,
"grad_norm": 5.237444877624512,
"learning_rate": 7.28888888888889e-06,
"loss": 0.0495,
"step": 1725
},
{
"epoch": 4.108108108108108,
"grad_norm": 7.6568684577941895,
"learning_rate": 7.233333333333334e-06,
"loss": 0.0546,
"step": 1750
},
{
"epoch": 4.166862514688602,
"grad_norm": 5.4397149085998535,
"learning_rate": 7.177777777777778e-06,
"loss": 0.0521,
"step": 1775
},
{
"epoch": 4.225616921269095,
"grad_norm": 7.218142032623291,
"learning_rate": 7.122222222222222e-06,
"loss": 0.0423,
"step": 1800
},
{
"epoch": 4.225616921269095,
"eval_loss": 0.6012063026428223,
"eval_runtime": 217.9147,
"eval_samples_per_second": 2.652,
"eval_steps_per_second": 0.665,
"eval_wer": 0.31881264456437935,
"step": 1800
},
{
"epoch": 4.225616921269095,
"step": 1800,
"total_flos": 1.468038098976768e+19,
"train_loss": 0.6555014891094632,
"train_runtime": 4980.8387,
"train_samples_per_second": 8.031,
"train_steps_per_second": 1.004
}
],
"logging_steps": 25,
"max_steps": 5000,
"num_input_tokens_seen": 0,
"num_train_epochs": 12,
"save_steps": 200,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 3
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.468038098976768e+19,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}