whisper-small-splitted / trainer_state.json
Leonel-Maia's picture
End of training
84d172e verified
{
"best_global_step": 1500,
"best_metric": 0.10838954150676727,
"best_model_checkpoint": "./whisper-small-splitted/checkpoint-1500",
"epoch": 12.656151419558359,
"eval_steps": 500,
"global_step": 3000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.10515247108307045,
"grad_norm": 37.98896408081055,
"learning_rate": 4.800000000000001e-07,
"loss": 4.0211,
"step": 25
},
{
"epoch": 0.2103049421661409,
"grad_norm": 28.073650360107422,
"learning_rate": 9.800000000000001e-07,
"loss": 3.1838,
"step": 50
},
{
"epoch": 0.31545741324921134,
"grad_norm": 12.10400676727295,
"learning_rate": 1.48e-06,
"loss": 2.0459,
"step": 75
},
{
"epoch": 0.4206098843322818,
"grad_norm": 8.05373477935791,
"learning_rate": 1.98e-06,
"loss": 1.3254,
"step": 100
},
{
"epoch": 0.5257623554153522,
"grad_norm": 6.276533126831055,
"learning_rate": 2.4800000000000004e-06,
"loss": 1.0124,
"step": 125
},
{
"epoch": 0.6309148264984227,
"grad_norm": 5.450071334838867,
"learning_rate": 2.9800000000000003e-06,
"loss": 0.7972,
"step": 150
},
{
"epoch": 0.7360672975814931,
"grad_norm": 6.463825225830078,
"learning_rate": 3.48e-06,
"loss": 0.6987,
"step": 175
},
{
"epoch": 0.8412197686645636,
"grad_norm": 6.113511085510254,
"learning_rate": 3.980000000000001e-06,
"loss": 0.6371,
"step": 200
},
{
"epoch": 0.9463722397476341,
"grad_norm": 5.203503131866455,
"learning_rate": 4.48e-06,
"loss": 0.5403,
"step": 225
},
{
"epoch": 1.0546792849631967,
"grad_norm": 3.6247618198394775,
"learning_rate": 4.980000000000001e-06,
"loss": 0.4597,
"step": 250
},
{
"epoch": 1.159831756046267,
"grad_norm": 4.7159552574157715,
"learning_rate": 5.480000000000001e-06,
"loss": 0.4074,
"step": 275
},
{
"epoch": 1.2649842271293377,
"grad_norm": 5.012338638305664,
"learning_rate": 5.98e-06,
"loss": 0.3846,
"step": 300
},
{
"epoch": 1.370136698212408,
"grad_norm": 4.738580703735352,
"learning_rate": 6.480000000000001e-06,
"loss": 0.3306,
"step": 325
},
{
"epoch": 1.4752891692954784,
"grad_norm": 4.1813578605651855,
"learning_rate": 6.98e-06,
"loss": 0.3145,
"step": 350
},
{
"epoch": 1.5804416403785488,
"grad_norm": 5.1083855628967285,
"learning_rate": 7.48e-06,
"loss": 0.2901,
"step": 375
},
{
"epoch": 1.6855941114616193,
"grad_norm": 3.654137372970581,
"learning_rate": 7.980000000000002e-06,
"loss": 0.2473,
"step": 400
},
{
"epoch": 1.7907465825446898,
"grad_norm": 5.576925754547119,
"learning_rate": 8.48e-06,
"loss": 0.2408,
"step": 425
},
{
"epoch": 1.8958990536277602,
"grad_norm": 5.050299167633057,
"learning_rate": 8.98e-06,
"loss": 0.222,
"step": 450
},
{
"epoch": 2.004206098843323,
"grad_norm": 4.0772705078125,
"learning_rate": 9.48e-06,
"loss": 0.2311,
"step": 475
},
{
"epoch": 2.1093585699263935,
"grad_norm": 3.189274311065674,
"learning_rate": 9.980000000000001e-06,
"loss": 0.1324,
"step": 500
},
{
"epoch": 2.1093585699263935,
"eval_loss": 0.1909431368112564,
"eval_runtime": 1339.9903,
"eval_samples_per_second": 2.432,
"eval_steps_per_second": 0.608,
"eval_wer": 0.3306828586574141,
"step": 500
},
{
"epoch": 2.2145110410094637,
"grad_norm": 3.6630983352661133,
"learning_rate": 9.982507288629738e-06,
"loss": 0.1425,
"step": 525
},
{
"epoch": 2.319663512092534,
"grad_norm": 3.793405532836914,
"learning_rate": 9.964285714285714e-06,
"loss": 0.1394,
"step": 550
},
{
"epoch": 2.4248159831756047,
"grad_norm": 3.442934036254883,
"learning_rate": 9.94606413994169e-06,
"loss": 0.1445,
"step": 575
},
{
"epoch": 2.5299684542586753,
"grad_norm": 2.4245970249176025,
"learning_rate": 9.927842565597668e-06,
"loss": 0.1247,
"step": 600
},
{
"epoch": 2.6351209253417456,
"grad_norm": 2.134147882461548,
"learning_rate": 9.909620991253646e-06,
"loss": 0.1246,
"step": 625
},
{
"epoch": 2.740273396424816,
"grad_norm": 3.135070562362671,
"learning_rate": 9.891399416909622e-06,
"loss": 0.1233,
"step": 650
},
{
"epoch": 2.8454258675078865,
"grad_norm": 2.723932981491089,
"learning_rate": 9.873177842565598e-06,
"loss": 0.119,
"step": 675
},
{
"epoch": 2.9505783385909568,
"grad_norm": 3.9131784439086914,
"learning_rate": 9.854956268221574e-06,
"loss": 0.1334,
"step": 700
},
{
"epoch": 3.0588853838065195,
"grad_norm": 4.044662952423096,
"learning_rate": 9.836734693877552e-06,
"loss": 0.0902,
"step": 725
},
{
"epoch": 3.1640378548895898,
"grad_norm": 2.1829943656921387,
"learning_rate": 9.818513119533528e-06,
"loss": 0.0702,
"step": 750
},
{
"epoch": 3.2691903259726605,
"grad_norm": 2.548558235168457,
"learning_rate": 9.800291545189506e-06,
"loss": 0.0712,
"step": 775
},
{
"epoch": 3.3743427970557307,
"grad_norm": 1.3480279445648193,
"learning_rate": 9.782069970845482e-06,
"loss": 0.0654,
"step": 800
},
{
"epoch": 3.4794952681388014,
"grad_norm": 2.091961622238159,
"learning_rate": 9.763848396501458e-06,
"loss": 0.0658,
"step": 825
},
{
"epoch": 3.5846477392218716,
"grad_norm": 2.2356009483337402,
"learning_rate": 9.745626822157436e-06,
"loss": 0.0682,
"step": 850
},
{
"epoch": 3.6898002103049423,
"grad_norm": 2.5908384323120117,
"learning_rate": 9.727405247813412e-06,
"loss": 0.056,
"step": 875
},
{
"epoch": 3.7949526813880126,
"grad_norm": 2.618177890777588,
"learning_rate": 9.70918367346939e-06,
"loss": 0.0623,
"step": 900
},
{
"epoch": 3.900105152471083,
"grad_norm": 2.1948764324188232,
"learning_rate": 9.690962099125366e-06,
"loss": 0.0752,
"step": 925
},
{
"epoch": 4.008412197686646,
"grad_norm": 1.5540448427200317,
"learning_rate": 9.672740524781342e-06,
"loss": 0.0638,
"step": 950
},
{
"epoch": 4.113564668769716,
"grad_norm": 1.905039668083191,
"learning_rate": 9.654518950437318e-06,
"loss": 0.0397,
"step": 975
},
{
"epoch": 4.218717139852787,
"grad_norm": 2.011355400085449,
"learning_rate": 9.636297376093296e-06,
"loss": 0.0383,
"step": 1000
},
{
"epoch": 4.218717139852787,
"eval_loss": 0.1153440847992897,
"eval_runtime": 1137.0265,
"eval_samples_per_second": 2.866,
"eval_steps_per_second": 0.717,
"eval_wer": 0.11305479254011855,
"step": 1000
},
{
"epoch": 4.323869610935857,
"grad_norm": 1.983497142791748,
"learning_rate": 9.618075801749272e-06,
"loss": 0.0376,
"step": 1025
},
{
"epoch": 4.429022082018927,
"grad_norm": 2.7597391605377197,
"learning_rate": 9.599854227405248e-06,
"loss": 0.0384,
"step": 1050
},
{
"epoch": 4.534174553101998,
"grad_norm": 2.3190319538116455,
"learning_rate": 9.581632653061226e-06,
"loss": 0.041,
"step": 1075
},
{
"epoch": 4.639327024185068,
"grad_norm": 1.6487782001495361,
"learning_rate": 9.563411078717202e-06,
"loss": 0.0373,
"step": 1100
},
{
"epoch": 4.744479495268139,
"grad_norm": 1.493482232093811,
"learning_rate": 9.54518950437318e-06,
"loss": 0.0453,
"step": 1125
},
{
"epoch": 4.849631966351209,
"grad_norm": 1.9630647897720337,
"learning_rate": 9.526967930029156e-06,
"loss": 0.0447,
"step": 1150
},
{
"epoch": 4.95478443743428,
"grad_norm": 2.1700949668884277,
"learning_rate": 9.508746355685132e-06,
"loss": 0.0416,
"step": 1175
},
{
"epoch": 5.063091482649842,
"grad_norm": 2.0949151515960693,
"learning_rate": 9.490524781341108e-06,
"loss": 0.0296,
"step": 1200
},
{
"epoch": 5.168243953732913,
"grad_norm": 1.232323169708252,
"learning_rate": 9.472303206997086e-06,
"loss": 0.0227,
"step": 1225
},
{
"epoch": 5.273396424815983,
"grad_norm": 6.17132568359375,
"learning_rate": 9.454081632653062e-06,
"loss": 0.0284,
"step": 1250
},
{
"epoch": 5.378548895899054,
"grad_norm": 0.9878859519958496,
"learning_rate": 9.43586005830904e-06,
"loss": 0.0211,
"step": 1275
},
{
"epoch": 5.483701366982124,
"grad_norm": 2.1974642276763916,
"learning_rate": 9.417638483965016e-06,
"loss": 0.0203,
"step": 1300
},
{
"epoch": 5.588853838065194,
"grad_norm": 1.2606751918792725,
"learning_rate": 9.399416909620992e-06,
"loss": 0.0271,
"step": 1325
},
{
"epoch": 5.694006309148265,
"grad_norm": 1.851982593536377,
"learning_rate": 9.38119533527697e-06,
"loss": 0.0271,
"step": 1350
},
{
"epoch": 5.799158780231336,
"grad_norm": 1.9509834051132202,
"learning_rate": 9.362973760932945e-06,
"loss": 0.0261,
"step": 1375
},
{
"epoch": 5.904311251314406,
"grad_norm": 1.3138453960418701,
"learning_rate": 9.344752186588922e-06,
"loss": 0.0269,
"step": 1400
},
{
"epoch": 6.012618296529968,
"grad_norm": 1.0709035396575928,
"learning_rate": 9.326530612244898e-06,
"loss": 0.0292,
"step": 1425
},
{
"epoch": 6.117770767613039,
"grad_norm": 0.9309059381484985,
"learning_rate": 9.308309037900875e-06,
"loss": 0.0128,
"step": 1450
},
{
"epoch": 6.22292323869611,
"grad_norm": 1.078444004058838,
"learning_rate": 9.290087463556851e-06,
"loss": 0.0189,
"step": 1475
},
{
"epoch": 6.3280757097791795,
"grad_norm": 2.0890743732452393,
"learning_rate": 9.27186588921283e-06,
"loss": 0.0184,
"step": 1500
},
{
"epoch": 6.3280757097791795,
"eval_loss": 0.10838954150676727,
"eval_runtime": 1142.8117,
"eval_samples_per_second": 2.852,
"eval_steps_per_second": 0.713,
"eval_wer": 0.11257288805358778,
"step": 1500
},
{
"epoch": 6.43322818086225,
"grad_norm": 0.5473038554191589,
"learning_rate": 9.253644314868805e-06,
"loss": 0.0153,
"step": 1525
},
{
"epoch": 6.538380651945321,
"grad_norm": 1.075249433517456,
"learning_rate": 9.235422740524781e-06,
"loss": 0.0172,
"step": 1550
},
{
"epoch": 6.643533123028391,
"grad_norm": 1.237701177597046,
"learning_rate": 9.21720116618076e-06,
"loss": 0.0167,
"step": 1575
},
{
"epoch": 6.748685594111461,
"grad_norm": 1.9220877885818481,
"learning_rate": 9.198979591836735e-06,
"loss": 0.0175,
"step": 1600
},
{
"epoch": 6.853838065194532,
"grad_norm": 1.435571312904358,
"learning_rate": 9.180758017492713e-06,
"loss": 0.0182,
"step": 1625
},
{
"epoch": 6.958990536277603,
"grad_norm": 1.9936102628707886,
"learning_rate": 9.162536443148689e-06,
"loss": 0.0158,
"step": 1650
},
{
"epoch": 7.067297581493165,
"grad_norm": 1.101319670677185,
"learning_rate": 9.144314868804665e-06,
"loss": 0.0147,
"step": 1675
},
{
"epoch": 7.172450052576235,
"grad_norm": 1.3670756816864014,
"learning_rate": 9.126093294460641e-06,
"loss": 0.0127,
"step": 1700
},
{
"epoch": 7.277602523659306,
"grad_norm": 0.6474692225456238,
"learning_rate": 9.107871720116619e-06,
"loss": 0.0107,
"step": 1725
},
{
"epoch": 7.382754994742377,
"grad_norm": 0.941483736038208,
"learning_rate": 9.089650145772595e-06,
"loss": 0.0129,
"step": 1750
},
{
"epoch": 7.4879074658254465,
"grad_norm": 0.7835807204246521,
"learning_rate": 9.071428571428573e-06,
"loss": 0.015,
"step": 1775
},
{
"epoch": 7.593059936908517,
"grad_norm": 0.7577527761459351,
"learning_rate": 9.053206997084549e-06,
"loss": 0.0119,
"step": 1800
},
{
"epoch": 7.698212407991588,
"grad_norm": 1.8214268684387207,
"learning_rate": 9.034985422740525e-06,
"loss": 0.0116,
"step": 1825
},
{
"epoch": 7.803364879074659,
"grad_norm": 0.8769547343254089,
"learning_rate": 9.016763848396503e-06,
"loss": 0.0131,
"step": 1850
},
{
"epoch": 7.908517350157728,
"grad_norm": 1.227581262588501,
"learning_rate": 8.998542274052479e-06,
"loss": 0.012,
"step": 1875
},
{
"epoch": 8.016824395373291,
"grad_norm": 1.28843355178833,
"learning_rate": 8.980320699708455e-06,
"loss": 0.0134,
"step": 1900
},
{
"epoch": 8.121976866456361,
"grad_norm": 1.5814619064331055,
"learning_rate": 8.962099125364431e-06,
"loss": 0.0086,
"step": 1925
},
{
"epoch": 8.227129337539433,
"grad_norm": 0.8933718800544739,
"learning_rate": 8.943877551020409e-06,
"loss": 0.0089,
"step": 1950
},
{
"epoch": 8.332281808622502,
"grad_norm": 0.9096396565437317,
"learning_rate": 8.925655976676385e-06,
"loss": 0.0097,
"step": 1975
},
{
"epoch": 8.437434279705574,
"grad_norm": 0.42109328508377075,
"learning_rate": 8.907434402332363e-06,
"loss": 0.0098,
"step": 2000
},
{
"epoch": 8.437434279705574,
"eval_loss": 0.11219166219234467,
"eval_runtime": 1135.5499,
"eval_samples_per_second": 2.87,
"eval_steps_per_second": 0.718,
"eval_wer": 0.10139270396607393,
"step": 2000
},
{
"epoch": 8.542586750788644,
"grad_norm": 1.2390159368515015,
"learning_rate": 8.889212827988339e-06,
"loss": 0.0106,
"step": 2025
},
{
"epoch": 8.647739221871714,
"grad_norm": 0.9200573563575745,
"learning_rate": 8.870991253644315e-06,
"loss": 0.0082,
"step": 2050
},
{
"epoch": 8.752891692954785,
"grad_norm": 0.814224362373352,
"learning_rate": 8.852769679300293e-06,
"loss": 0.0105,
"step": 2075
},
{
"epoch": 8.858044164037855,
"grad_norm": 0.7412490248680115,
"learning_rate": 8.834548104956269e-06,
"loss": 0.0101,
"step": 2100
},
{
"epoch": 8.963196635120925,
"grad_norm": 0.8766764402389526,
"learning_rate": 8.816326530612247e-06,
"loss": 0.0092,
"step": 2125
},
{
"epoch": 9.071503680336487,
"grad_norm": 0.09623388946056366,
"learning_rate": 8.798104956268223e-06,
"loss": 0.0088,
"step": 2150
},
{
"epoch": 9.176656151419559,
"grad_norm": 0.6915100812911987,
"learning_rate": 8.779883381924199e-06,
"loss": 0.0073,
"step": 2175
},
{
"epoch": 9.281808622502629,
"grad_norm": 0.6448748707771301,
"learning_rate": 8.761661807580175e-06,
"loss": 0.0089,
"step": 2200
},
{
"epoch": 9.386961093585699,
"grad_norm": 1.2592213153839111,
"learning_rate": 8.743440233236153e-06,
"loss": 0.0096,
"step": 2225
},
{
"epoch": 9.49211356466877,
"grad_norm": 0.8664531111717224,
"learning_rate": 8.72521865889213e-06,
"loss": 0.0073,
"step": 2250
},
{
"epoch": 9.59726603575184,
"grad_norm": 0.49285972118377686,
"learning_rate": 8.706997084548106e-06,
"loss": 0.0078,
"step": 2275
},
{
"epoch": 9.70241850683491,
"grad_norm": 0.9507968425750732,
"learning_rate": 8.688775510204082e-06,
"loss": 0.0092,
"step": 2300
},
{
"epoch": 9.807570977917981,
"grad_norm": 1.0036101341247559,
"learning_rate": 8.670553935860059e-06,
"loss": 0.0103,
"step": 2325
},
{
"epoch": 9.912723449001051,
"grad_norm": 0.8855874538421631,
"learning_rate": 8.653061224489798e-06,
"loss": 0.0111,
"step": 2350
},
{
"epoch": 10.021030494216614,
"grad_norm": 0.918958306312561,
"learning_rate": 8.634839650145774e-06,
"loss": 0.0097,
"step": 2375
},
{
"epoch": 10.126182965299684,
"grad_norm": 1.442686915397644,
"learning_rate": 8.61661807580175e-06,
"loss": 0.0083,
"step": 2400
},
{
"epoch": 10.231335436382755,
"grad_norm": 1.1829726696014404,
"learning_rate": 8.598396501457726e-06,
"loss": 0.0076,
"step": 2425
},
{
"epoch": 10.336487907465825,
"grad_norm": 1.1316958665847778,
"learning_rate": 8.580174927113704e-06,
"loss": 0.0086,
"step": 2450
},
{
"epoch": 10.441640378548897,
"grad_norm": 0.5993586778640747,
"learning_rate": 8.56195335276968e-06,
"loss": 0.0085,
"step": 2475
},
{
"epoch": 10.546792849631967,
"grad_norm": 1.1639779806137085,
"learning_rate": 8.543731778425656e-06,
"loss": 0.0076,
"step": 2500
},
{
"epoch": 10.546792849631967,
"eval_loss": 0.1101309061050415,
"eval_runtime": 1133.8828,
"eval_samples_per_second": 2.874,
"eval_steps_per_second": 0.719,
"eval_wer": 0.09657365910076623,
"step": 2500
},
{
"epoch": 10.651945320715036,
"grad_norm": 1.1275087594985962,
"learning_rate": 8.525510204081632e-06,
"loss": 0.008,
"step": 2525
},
{
"epoch": 10.757097791798108,
"grad_norm": 1.17903470993042,
"learning_rate": 8.50728862973761e-06,
"loss": 0.0082,
"step": 2550
},
{
"epoch": 10.862250262881178,
"grad_norm": 0.35520169138908386,
"learning_rate": 8.489067055393588e-06,
"loss": 0.0085,
"step": 2575
},
{
"epoch": 10.967402733964247,
"grad_norm": 1.5375522375106812,
"learning_rate": 8.470845481049564e-06,
"loss": 0.0062,
"step": 2600
},
{
"epoch": 11.07570977917981,
"grad_norm": 0.49886855483055115,
"learning_rate": 8.45262390670554e-06,
"loss": 0.006,
"step": 2625
},
{
"epoch": 11.180862250262882,
"grad_norm": 0.5618315935134888,
"learning_rate": 8.434402332361516e-06,
"loss": 0.0053,
"step": 2650
},
{
"epoch": 11.286014721345952,
"grad_norm": 0.4988507032394409,
"learning_rate": 8.416180758017494e-06,
"loss": 0.006,
"step": 2675
},
{
"epoch": 11.391167192429021,
"grad_norm": 0.4689825773239136,
"learning_rate": 8.39795918367347e-06,
"loss": 0.0059,
"step": 2700
},
{
"epoch": 11.496319663512093,
"grad_norm": 0.637856662273407,
"learning_rate": 8.379737609329447e-06,
"loss": 0.0066,
"step": 2725
},
{
"epoch": 11.601472134595163,
"grad_norm": 0.46745094656944275,
"learning_rate": 8.361516034985424e-06,
"loss": 0.007,
"step": 2750
},
{
"epoch": 11.706624605678233,
"grad_norm": 1.153106689453125,
"learning_rate": 8.3432944606414e-06,
"loss": 0.0077,
"step": 2775
},
{
"epoch": 11.811777076761304,
"grad_norm": 1.1093589067459106,
"learning_rate": 8.325072886297377e-06,
"loss": 0.0068,
"step": 2800
},
{
"epoch": 11.916929547844374,
"grad_norm": 0.7349961400032043,
"learning_rate": 8.306851311953353e-06,
"loss": 0.0075,
"step": 2825
},
{
"epoch": 12.025236593059937,
"grad_norm": 3.071120500564575,
"learning_rate": 8.288629737609331e-06,
"loss": 0.0067,
"step": 2850
},
{
"epoch": 12.130389064143007,
"grad_norm": 1.33828604221344,
"learning_rate": 8.270408163265307e-06,
"loss": 0.0049,
"step": 2875
},
{
"epoch": 12.235541535226078,
"grad_norm": 0.5961343050003052,
"learning_rate": 8.252186588921283e-06,
"loss": 0.0058,
"step": 2900
},
{
"epoch": 12.340694006309148,
"grad_norm": 0.5771873593330383,
"learning_rate": 8.23396501457726e-06,
"loss": 0.0049,
"step": 2925
},
{
"epoch": 12.44584647739222,
"grad_norm": 1.0507683753967285,
"learning_rate": 8.215743440233237e-06,
"loss": 0.0065,
"step": 2950
},
{
"epoch": 12.55099894847529,
"grad_norm": 0.579768180847168,
"learning_rate": 8.197521865889213e-06,
"loss": 0.0058,
"step": 2975
},
{
"epoch": 12.656151419558359,
"grad_norm": 0.8235365748405457,
"learning_rate": 8.17930029154519e-06,
"loss": 0.0075,
"step": 3000
},
{
"epoch": 12.656151419558359,
"eval_loss": 0.11558376252651215,
"eval_runtime": 1148.2367,
"eval_samples_per_second": 2.838,
"eval_steps_per_second": 0.71,
"eval_wer": 0.09917594332803238,
"step": 3000
},
{
"epoch": 12.656151419558359,
"step": 3000,
"total_flos": 5.55469178535936e+19,
"train_loss": 0.17092564110457897,
"train_runtime": 92948.5763,
"train_samples_per_second": 9.82,
"train_steps_per_second": 0.153
}
],
"logging_steps": 25,
"max_steps": 14220,
"num_input_tokens_seen": 0,
"num_train_epochs": 60,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 3
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.55469178535936e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}