{ "best_global_step": 20000, "best_metric": 0.06115037298782882, "best_model_checkpoint": "/home/cluster-dgxa100/slp01/bagas-fine-tune-whisper/whisper-large-v2-javanese-openslr-v2/checkpoint-20000", "epoch": 8.0, "eval_steps": 1000, "global_step": 20000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "grad_norm": 19.06084442138672, "learning_rate": 1.97e-06, "loss": 1.8907, "step": 200 }, { "epoch": 0.16, "grad_norm": 16.973052978515625, "learning_rate": 3.97e-06, "loss": 0.6026, "step": 400 }, { "epoch": 0.24, "grad_norm": 8.261700630187988, "learning_rate": 5.9700000000000004e-06, "loss": 0.5317, "step": 600 }, { "epoch": 0.32, "grad_norm": 7.87921667098999, "learning_rate": 7.970000000000002e-06, "loss": 0.491, "step": 800 }, { "epoch": 0.4, "grad_norm": 7.55085563659668, "learning_rate": 9.970000000000001e-06, "loss": 0.4821, "step": 1000 }, { "epoch": 0.4, "eval_loss": 0.3569938838481903, "eval_runtime": 880.4774, "eval_samples_per_second": 1.42, "eval_steps_per_second": 0.71, "eval_wer": 0.22683549273655282, "step": 1000 }, { "epoch": 0.48, "grad_norm": 5.795778751373291, "learning_rate": 1.1970000000000002e-05, "loss": 0.4608, "step": 1200 }, { "epoch": 0.56, "grad_norm": 8.08930492401123, "learning_rate": 1.3970000000000002e-05, "loss": 0.4761, "step": 1400 }, { "epoch": 0.64, "grad_norm": 7.31475305557251, "learning_rate": 1.597e-05, "loss": 0.4942, "step": 1600 }, { "epoch": 0.72, "grad_norm": 7.746321201324463, "learning_rate": 1.796e-05, "loss": 0.4758, "step": 1800 }, { "epoch": 0.8, "grad_norm": 5.482341289520264, "learning_rate": 1.9960000000000002e-05, "loss": 0.4761, "step": 2000 }, { "epoch": 0.8, "eval_loss": 0.34740468859672546, "eval_runtime": 901.2547, "eval_samples_per_second": 1.387, "eval_steps_per_second": 0.693, "eval_wer": 0.2396937573616019, "step": 2000 }, { "epoch": 0.88, "grad_norm": 7.196951866149902, "learning_rate": 1.9782222222222226e-05, "loss": 0.4924, "step": 2200 }, { "epoch": 0.96, "grad_norm": 6.221120834350586, "learning_rate": 1.9560000000000002e-05, "loss": 0.4779, "step": 2400 }, { "epoch": 1.04, "grad_norm": 6.380108833312988, "learning_rate": 1.933777777777778e-05, "loss": 0.3806, "step": 2600 }, { "epoch": 1.12, "grad_norm": 5.491684913635254, "learning_rate": 1.9115555555555555e-05, "loss": 0.3465, "step": 2800 }, { "epoch": 1.2, "grad_norm": 4.4581379890441895, "learning_rate": 1.8893333333333334e-05, "loss": 0.3099, "step": 3000 }, { "epoch": 1.2, "eval_loss": 0.30754217505455017, "eval_runtime": 891.4205, "eval_samples_per_second": 1.402, "eval_steps_per_second": 0.701, "eval_wer": 0.20259128386336867, "step": 3000 }, { "epoch": 1.28, "grad_norm": 7.791854381561279, "learning_rate": 1.8671111111111114e-05, "loss": 0.3275, "step": 3200 }, { "epoch": 1.3599999999999999, "grad_norm": 5.212815761566162, "learning_rate": 1.844888888888889e-05, "loss": 0.3296, "step": 3400 }, { "epoch": 1.44, "grad_norm": 9.706353187561035, "learning_rate": 1.822666666666667e-05, "loss": 0.3142, "step": 3600 }, { "epoch": 1.52, "grad_norm": 5.222635269165039, "learning_rate": 1.8004444444444446e-05, "loss": 0.3097, "step": 3800 }, { "epoch": 1.6, "grad_norm": 3.670520067214966, "learning_rate": 1.7782222222222222e-05, "loss": 0.3108, "step": 4000 }, { "epoch": 1.6, "eval_loss": 0.26660874485969543, "eval_runtime": 896.6083, "eval_samples_per_second": 1.394, "eval_steps_per_second": 0.697, "eval_wer": 0.19709462112288967, "step": 4000 }, { "epoch": 1.6800000000000002, "grad_norm": 5.417144298553467, "learning_rate": 1.756e-05, "loss": 0.3096, "step": 4200 }, { "epoch": 1.76, "grad_norm": 5.865799427032471, "learning_rate": 1.733777777777778e-05, "loss": 0.3266, "step": 4400 }, { "epoch": 1.8399999999999999, "grad_norm": 3.2357375621795654, "learning_rate": 1.7115555555555557e-05, "loss": 0.3134, "step": 4600 }, { "epoch": 1.92, "grad_norm": 6.072234630584717, "learning_rate": 1.6893333333333336e-05, "loss": 0.289, "step": 4800 }, { "epoch": 2.0, "grad_norm": 3.143366813659668, "learning_rate": 1.6671111111111113e-05, "loss": 0.3131, "step": 5000 }, { "epoch": 2.0, "eval_loss": 0.21725231409072876, "eval_runtime": 885.5009, "eval_samples_per_second": 1.412, "eval_steps_per_second": 0.706, "eval_wer": 0.15312131919905772, "step": 5000 }, { "epoch": 2.08, "grad_norm": 4.550586700439453, "learning_rate": 1.644888888888889e-05, "loss": 0.1525, "step": 5200 }, { "epoch": 2.16, "grad_norm": 3.604379892349243, "learning_rate": 1.6226666666666668e-05, "loss": 0.1656, "step": 5400 }, { "epoch": 2.24, "grad_norm": 3.664323568344116, "learning_rate": 1.6004444444444444e-05, "loss": 0.1743, "step": 5600 }, { "epoch": 2.32, "grad_norm": 2.5190961360931396, "learning_rate": 1.5782222222222224e-05, "loss": 0.175, "step": 5800 }, { "epoch": 2.4, "grad_norm": 3.1211793422698975, "learning_rate": 1.556e-05, "loss": 0.1796, "step": 6000 }, { "epoch": 2.4, "eval_loss": 0.2026291936635971, "eval_runtime": 894.7957, "eval_samples_per_second": 1.397, "eval_steps_per_second": 0.698, "eval_wer": 0.13839811542991756, "step": 6000 }, { "epoch": 2.48, "grad_norm": 3.203092336654663, "learning_rate": 1.533777777777778e-05, "loss": 0.1721, "step": 6200 }, { "epoch": 2.56, "grad_norm": 1.1959407329559326, "learning_rate": 1.5115555555555557e-05, "loss": 0.1722, "step": 6400 }, { "epoch": 2.64, "grad_norm": 4.8345160484313965, "learning_rate": 1.4893333333333335e-05, "loss": 0.17, "step": 6600 }, { "epoch": 2.7199999999999998, "grad_norm": 5.333383560180664, "learning_rate": 1.4671111111111111e-05, "loss": 0.1751, "step": 6800 }, { "epoch": 2.8, "grad_norm": 1.5021018981933594, "learning_rate": 1.444888888888889e-05, "loss": 0.17, "step": 7000 }, { "epoch": 2.8, "eval_loss": 0.19221611320972443, "eval_runtime": 891.3109, "eval_samples_per_second": 1.402, "eval_steps_per_second": 0.701, "eval_wer": 0.13780918727915195, "step": 7000 }, { "epoch": 2.88, "grad_norm": 3.0855863094329834, "learning_rate": 1.4226666666666669e-05, "loss": 0.1672, "step": 7200 }, { "epoch": 2.96, "grad_norm": 4.673288822174072, "learning_rate": 1.4004444444444445e-05, "loss": 0.1724, "step": 7400 }, { "epoch": 3.04, "grad_norm": 2.8060922622680664, "learning_rate": 1.3782222222222223e-05, "loss": 0.1262, "step": 7600 }, { "epoch": 3.12, "grad_norm": 1.5213590860366821, "learning_rate": 1.3560000000000002e-05, "loss": 0.094, "step": 7800 }, { "epoch": 3.2, "grad_norm": 5.938966751098633, "learning_rate": 1.3337777777777778e-05, "loss": 0.0995, "step": 8000 }, { "epoch": 3.2, "eval_loss": 0.17917467653751373, "eval_runtime": 900.0369, "eval_samples_per_second": 1.389, "eval_steps_per_second": 0.694, "eval_wer": 0.12063211621515509, "step": 8000 }, { "epoch": 3.2800000000000002, "grad_norm": 3.5296432971954346, "learning_rate": 1.3115555555555556e-05, "loss": 0.1002, "step": 8200 }, { "epoch": 3.36, "grad_norm": 5.334218502044678, "learning_rate": 1.2893333333333336e-05, "loss": 0.1061, "step": 8400 }, { "epoch": 3.44, "grad_norm": 5.724509239196777, "learning_rate": 1.2671111111111112e-05, "loss": 0.1012, "step": 8600 }, { "epoch": 3.52, "grad_norm": 2.5498409271240234, "learning_rate": 1.244888888888889e-05, "loss": 0.099, "step": 8800 }, { "epoch": 3.6, "grad_norm": 6.009134769439697, "learning_rate": 1.2226666666666666e-05, "loss": 0.0972, "step": 9000 }, { "epoch": 3.6, "eval_loss": 0.16701579093933105, "eval_runtime": 899.6835, "eval_samples_per_second": 1.389, "eval_steps_per_second": 0.695, "eval_wer": 0.11493914409108755, "step": 9000 }, { "epoch": 3.68, "grad_norm": 2.472468137741089, "learning_rate": 1.2004444444444445e-05, "loss": 0.0933, "step": 9200 }, { "epoch": 3.76, "grad_norm": 2.989001750946045, "learning_rate": 1.1782222222222223e-05, "loss": 0.0999, "step": 9400 }, { "epoch": 3.84, "grad_norm": 3.3118479251861572, "learning_rate": 1.156e-05, "loss": 0.0949, "step": 9600 }, { "epoch": 3.92, "grad_norm": 4.284938812255859, "learning_rate": 1.1337777777777779e-05, "loss": 0.0964, "step": 9800 }, { "epoch": 4.0, "grad_norm": 3.4380695819854736, "learning_rate": 1.1115555555555557e-05, "loss": 0.097, "step": 10000 }, { "epoch": 4.0, "eval_loss": 0.1545259952545166, "eval_runtime": 903.5859, "eval_samples_per_second": 1.383, "eval_steps_per_second": 0.692, "eval_wer": 0.10963879073419709, "step": 10000 }, { "epoch": 4.08, "grad_norm": 1.92518949508667, "learning_rate": 1.0893333333333333e-05, "loss": 0.0524, "step": 10200 }, { "epoch": 4.16, "grad_norm": 2.2278802394866943, "learning_rate": 1.0671111111111112e-05, "loss": 0.0523, "step": 10400 }, { "epoch": 4.24, "grad_norm": 0.5611541271209717, "learning_rate": 1.044888888888889e-05, "loss": 0.0521, "step": 10600 }, { "epoch": 4.32, "grad_norm": 1.7205854654312134, "learning_rate": 1.0226666666666666e-05, "loss": 0.0499, "step": 10800 }, { "epoch": 4.4, "grad_norm": 2.563293218612671, "learning_rate": 1.0004444444444446e-05, "loss": 0.0553, "step": 11000 }, { "epoch": 4.4, "eval_loss": 0.1574954092502594, "eval_runtime": 893.7485, "eval_samples_per_second": 1.399, "eval_steps_per_second": 0.699, "eval_wer": 0.10296427169218689, "step": 11000 }, { "epoch": 4.48, "grad_norm": 2.2215042114257812, "learning_rate": 9.783333333333335e-06, "loss": 0.0537, "step": 11200 }, { "epoch": 4.5600000000000005, "grad_norm": 1.360498070716858, "learning_rate": 9.561111111111113e-06, "loss": 0.0588, "step": 11400 }, { "epoch": 4.64, "grad_norm": 4.086482048034668, "learning_rate": 9.340000000000002e-06, "loss": 0.0537, "step": 11600 }, { "epoch": 4.72, "grad_norm": 2.1945817470550537, "learning_rate": 9.117777777777778e-06, "loss": 0.0599, "step": 11800 }, { "epoch": 4.8, "grad_norm": 2.7617948055267334, "learning_rate": 8.895555555555556e-06, "loss": 0.0526, "step": 12000 }, { "epoch": 4.8, "eval_loss": 0.14308780431747437, "eval_runtime": 905.6641, "eval_samples_per_second": 1.38, "eval_steps_per_second": 0.69, "eval_wer": 0.08902630545740087, "step": 12000 }, { "epoch": 4.88, "grad_norm": 0.3309612572193146, "learning_rate": 8.673333333333334e-06, "loss": 0.0527, "step": 12200 }, { "epoch": 4.96, "grad_norm": 0.3702127933502197, "learning_rate": 8.451111111111112e-06, "loss": 0.0574, "step": 12400 }, { "epoch": 5.04, "grad_norm": 1.9310526847839355, "learning_rate": 8.22888888888889e-06, "loss": 0.0385, "step": 12600 }, { "epoch": 5.12, "grad_norm": 0.8932788372039795, "learning_rate": 8.006666666666667e-06, "loss": 0.0369, "step": 12800 }, { "epoch": 5.2, "grad_norm": 2.246431589126587, "learning_rate": 7.784444444444445e-06, "loss": 0.0299, "step": 13000 }, { "epoch": 5.2, "eval_loss": 0.13719478249549866, "eval_runtime": 896.1487, "eval_samples_per_second": 1.395, "eval_steps_per_second": 0.697, "eval_wer": 0.08333333333333333, "step": 13000 }, { "epoch": 5.28, "grad_norm": 2.8669216632843018, "learning_rate": 7.562222222222223e-06, "loss": 0.0276, "step": 13200 }, { "epoch": 5.36, "grad_norm": 0.38674354553222656, "learning_rate": 7.341111111111112e-06, "loss": 0.0334, "step": 13400 }, { "epoch": 5.44, "grad_norm": 4.218255996704102, "learning_rate": 7.11888888888889e-06, "loss": 0.0253, "step": 13600 }, { "epoch": 5.52, "grad_norm": 2.424956798553467, "learning_rate": 6.896666666666667e-06, "loss": 0.0287, "step": 13800 }, { "epoch": 5.6, "grad_norm": 1.3083245754241943, "learning_rate": 6.674444444444445e-06, "loss": 0.0311, "step": 14000 }, { "epoch": 5.6, "eval_loss": 0.12584801018238068, "eval_runtime": 892.859, "eval_samples_per_second": 1.4, "eval_steps_per_second": 0.7, "eval_wer": 0.07803297997644287, "step": 14000 }, { "epoch": 5.68, "grad_norm": 1.7961013317108154, "learning_rate": 6.452222222222223e-06, "loss": 0.0291, "step": 14200 }, { "epoch": 5.76, "grad_norm": 0.16184474527835846, "learning_rate": 6.2300000000000005e-06, "loss": 0.0287, "step": 14400 }, { "epoch": 5.84, "grad_norm": 0.13447080552577972, "learning_rate": 6.007777777777778e-06, "loss": 0.0303, "step": 14600 }, { "epoch": 5.92, "grad_norm": 2.1226162910461426, "learning_rate": 5.785555555555556e-06, "loss": 0.0261, "step": 14800 }, { "epoch": 6.0, "grad_norm": 1.8055452108383179, "learning_rate": 5.563333333333334e-06, "loss": 0.0295, "step": 15000 }, { "epoch": 6.0, "eval_loss": 0.12012948095798492, "eval_runtime": 888.9428, "eval_samples_per_second": 1.406, "eval_steps_per_second": 0.703, "eval_wer": 0.07253631723596388, "step": 15000 }, { "epoch": 6.08, "grad_norm": 0.1165083572268486, "learning_rate": 5.341111111111111e-06, "loss": 0.0143, "step": 15200 }, { "epoch": 6.16, "grad_norm": 0.12827950716018677, "learning_rate": 5.118888888888889e-06, "loss": 0.0169, "step": 15400 }, { "epoch": 6.24, "grad_norm": 3.075695276260376, "learning_rate": 4.896666666666667e-06, "loss": 0.0144, "step": 15600 }, { "epoch": 6.32, "grad_norm": 0.4469108581542969, "learning_rate": 4.6744444444444445e-06, "loss": 0.0124, "step": 15800 }, { "epoch": 6.4, "grad_norm": 0.264642596244812, "learning_rate": 4.452222222222223e-06, "loss": 0.0151, "step": 16000 }, { "epoch": 6.4, "eval_loss": 0.12285350263118744, "eval_runtime": 883.7262, "eval_samples_per_second": 1.414, "eval_steps_per_second": 0.707, "eval_wer": 0.07420494699646643, "step": 16000 }, { "epoch": 6.48, "grad_norm": 3.0525002479553223, "learning_rate": 4.23e-06, "loss": 0.0122, "step": 16200 }, { "epoch": 6.5600000000000005, "grad_norm": 1.7435777187347412, "learning_rate": 4.007777777777778e-06, "loss": 0.0134, "step": 16400 }, { "epoch": 6.64, "grad_norm": 0.03412042185664177, "learning_rate": 3.785555555555556e-06, "loss": 0.0146, "step": 16600 }, { "epoch": 6.72, "grad_norm": 4.467521667480469, "learning_rate": 3.5633333333333337e-06, "loss": 0.0139, "step": 16800 }, { "epoch": 6.8, "grad_norm": 1.6740899085998535, "learning_rate": 3.3411111111111115e-06, "loss": 0.0163, "step": 17000 }, { "epoch": 6.8, "eval_loss": 0.1136574074625969, "eval_runtime": 890.0494, "eval_samples_per_second": 1.404, "eval_steps_per_second": 0.702, "eval_wer": 0.06576364350215941, "step": 17000 }, { "epoch": 6.88, "grad_norm": 0.25668439269065857, "learning_rate": 3.118888888888889e-06, "loss": 0.0143, "step": 17200 }, { "epoch": 6.96, "grad_norm": 1.1925643682479858, "learning_rate": 2.896666666666667e-06, "loss": 0.0132, "step": 17400 }, { "epoch": 7.04, "grad_norm": 0.45678913593292236, "learning_rate": 2.6744444444444446e-06, "loss": 0.0103, "step": 17600 }, { "epoch": 7.12, "grad_norm": 0.25028079748153687, "learning_rate": 2.4522222222222224e-06, "loss": 0.0089, "step": 17800 }, { "epoch": 7.2, "grad_norm": 0.04460795596241951, "learning_rate": 2.2300000000000002e-06, "loss": 0.0082, "step": 18000 }, { "epoch": 7.2, "eval_loss": 0.1142345443367958, "eval_runtime": 885.5644, "eval_samples_per_second": 1.412, "eval_steps_per_second": 0.706, "eval_wer": 0.06389870435806831, "step": 18000 }, { "epoch": 7.28, "grad_norm": 6.106534957885742, "learning_rate": 2.007777777777778e-06, "loss": 0.0063, "step": 18200 }, { "epoch": 7.36, "grad_norm": 0.5640923380851746, "learning_rate": 1.7855555555555557e-06, "loss": 0.0088, "step": 18400 }, { "epoch": 7.44, "grad_norm": 0.09705500304698944, "learning_rate": 1.5633333333333333e-06, "loss": 0.0091, "step": 18600 }, { "epoch": 7.52, "grad_norm": 0.042120128870010376, "learning_rate": 1.3411111111111112e-06, "loss": 0.0066, "step": 18800 }, { "epoch": 7.6, "grad_norm": 0.024996856227517128, "learning_rate": 1.118888888888889e-06, "loss": 0.0092, "step": 19000 }, { "epoch": 7.6, "eval_loss": 0.11208222806453705, "eval_runtime": 886.2816, "eval_samples_per_second": 1.41, "eval_steps_per_second": 0.705, "eval_wer": 0.0627208480565371, "step": 19000 }, { "epoch": 7.68, "grad_norm": 0.38041412830352783, "learning_rate": 8.966666666666668e-07, "loss": 0.0068, "step": 19200 }, { "epoch": 7.76, "grad_norm": 3.13415265083313, "learning_rate": 6.744444444444446e-07, "loss": 0.0062, "step": 19400 }, { "epoch": 7.84, "grad_norm": 0.7191887497901917, "learning_rate": 4.5222222222222224e-07, "loss": 0.0067, "step": 19600 }, { "epoch": 7.92, "grad_norm": 0.09878556430339813, "learning_rate": 2.3000000000000002e-07, "loss": 0.0077, "step": 19800 }, { "epoch": 8.0, "grad_norm": 0.007985732518136501, "learning_rate": 7.777777777777778e-09, "loss": 0.006, "step": 20000 }, { "epoch": 8.0, "eval_loss": 0.11124598234891891, "eval_runtime": 889.6592, "eval_samples_per_second": 1.405, "eval_steps_per_second": 0.703, "eval_wer": 0.06115037298782882, "step": 20000 }, { "epoch": 8.0, "step": 20000, "total_flos": 1.698366962958336e+20, "train_loss": 0.16200368287563324, "train_runtime": 40923.5162, "train_samples_per_second": 1.955, "train_steps_per_second": 0.489 } ], "logging_steps": 200, "max_steps": 20000, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.698366962958336e+20, "train_batch_size": 2, "trial_name": null, "trial_params": null }