{ "best_metric": 41.511269513669795, "best_model_checkpoint": "./whisper-small-all-ka/checkpoint-8000", "epoch": 3.9623576027736505, "eval_steps": 1000, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09905894006934125, "grad_norm": 2.2366678714752197, "learning_rate": 2.0000000000000003e-06, "loss": 1.2185, "step": 200 }, { "epoch": 0.1981178801386825, "grad_norm": 2.3154804706573486, "learning_rate": 4.000000000000001e-06, "loss": 0.3894, "step": 400 }, { "epoch": 0.2971768202080238, "grad_norm": 2.071636915206909, "learning_rate": 6e-06, "loss": 0.2868, "step": 600 }, { "epoch": 0.396235760277365, "grad_norm": 1.6677467823028564, "learning_rate": 8.000000000000001e-06, "loss": 0.241, "step": 800 }, { "epoch": 0.4952947003467063, "grad_norm": 2.1703829765319824, "learning_rate": 1e-05, "loss": 0.2149, "step": 1000 }, { "epoch": 0.4952947003467063, "eval_loss": 0.20653420686721802, "eval_runtime": 3478.9242, "eval_samples_per_second": 2.805, "eval_steps_per_second": 0.351, "eval_wer": 54.861029030703634, "step": 1000 }, { "epoch": 0.5943536404160475, "grad_norm": 1.6030035018920898, "learning_rate": 9.94921279837481e-06, "loss": 0.1983, "step": 1200 }, { "epoch": 0.6934125804853888, "grad_norm": 2.294896125793457, "learning_rate": 9.89842559674962e-06, "loss": 0.1817, "step": 1400 }, { "epoch": 0.79247152055473, "grad_norm": 1.2499381303787231, "learning_rate": 9.84763839512443e-06, "loss": 0.1717, "step": 1600 }, { "epoch": 0.8915304606240714, "grad_norm": 1.322660207748413, "learning_rate": 9.796851193499239e-06, "loss": 0.165, "step": 1800 }, { "epoch": 0.9905894006934126, "grad_norm": 1.1454402208328247, "learning_rate": 9.746063991874048e-06, "loss": 0.1589, "step": 2000 }, { "epoch": 0.9905894006934126, "eval_loss": 0.16001442074775696, "eval_runtime": 3476.6879, "eval_samples_per_second": 2.807, "eval_steps_per_second": 0.351, "eval_wer": 47.45924518352169, "step": 2000 }, { "epoch": 1.0896483407627537, "grad_norm": 1.3895001411437988, "learning_rate": 9.695276790248857e-06, "loss": 0.1459, "step": 2200 }, { "epoch": 1.188707280832095, "grad_norm": 1.2915339469909668, "learning_rate": 9.644489588623668e-06, "loss": 0.1432, "step": 2400 }, { "epoch": 1.2877662209014362, "grad_norm": 1.4027740955352783, "learning_rate": 9.593702386998477e-06, "loss": 0.1396, "step": 2600 }, { "epoch": 1.3868251609707776, "grad_norm": 1.3594259023666382, "learning_rate": 9.542915185373287e-06, "loss": 0.1359, "step": 2800 }, { "epoch": 1.485884101040119, "grad_norm": 1.2143176794052124, "learning_rate": 9.492127983748096e-06, "loss": 0.1332, "step": 3000 }, { "epoch": 1.485884101040119, "eval_loss": 0.14520840346813202, "eval_runtime": 3482.9046, "eval_samples_per_second": 2.802, "eval_steps_per_second": 0.35, "eval_wer": 45.08623746442761, "step": 3000 }, { "epoch": 1.5849430411094603, "grad_norm": 1.00477135181427, "learning_rate": 9.441340782122905e-06, "loss": 0.133, "step": 3200 }, { "epoch": 1.6840019811788014, "grad_norm": 1.0558192729949951, "learning_rate": 9.390553580497716e-06, "loss": 0.1294, "step": 3400 }, { "epoch": 1.7830609212481425, "grad_norm": 1.1081092357635498, "learning_rate": 9.339766378872525e-06, "loss": 0.1296, "step": 3600 }, { "epoch": 1.8821198613174839, "grad_norm": 1.0857079029083252, "learning_rate": 9.288979177247334e-06, "loss": 0.1262, "step": 3800 }, { "epoch": 1.9811788013868252, "grad_norm": 0.9101235866546631, "learning_rate": 9.238191975622143e-06, "loss": 0.1259, "step": 4000 }, { "epoch": 1.9811788013868252, "eval_loss": 0.13690388202667236, "eval_runtime": 3481.4796, "eval_samples_per_second": 2.803, "eval_steps_per_second": 0.35, "eval_wer": 43.51605189704238, "step": 4000 }, { "epoch": 2.0802377414561666, "grad_norm": 1.0484652519226074, "learning_rate": 9.18765871000508e-06, "loss": 0.1131, "step": 4200 }, { "epoch": 2.1792966815255075, "grad_norm": 1.1338391304016113, "learning_rate": 9.13687150837989e-06, "loss": 0.1112, "step": 4400 }, { "epoch": 2.278355621594849, "grad_norm": 0.9634003043174744, "learning_rate": 9.086084306754698e-06, "loss": 0.1106, "step": 4600 }, { "epoch": 2.37741456166419, "grad_norm": 0.9234340786933899, "learning_rate": 9.035297105129508e-06, "loss": 0.1094, "step": 4800 }, { "epoch": 2.4764735017335315, "grad_norm": 1.1325962543487549, "learning_rate": 8.984509903504318e-06, "loss": 0.11, "step": 5000 }, { "epoch": 2.4764735017335315, "eval_loss": 0.1342398077249527, "eval_runtime": 3538.4773, "eval_samples_per_second": 2.758, "eval_steps_per_second": 0.345, "eval_wer": 42.52138889141444, "step": 5000 }, { "epoch": 2.5755324418028724, "grad_norm": 0.9091247916221619, "learning_rate": 8.933722701879128e-06, "loss": 0.1098, "step": 5200 }, { "epoch": 2.6745913818722142, "grad_norm": 0.9756883978843689, "learning_rate": 8.882935500253937e-06, "loss": 0.107, "step": 5400 }, { "epoch": 2.773650321941555, "grad_norm": 1.0698374509811401, "learning_rate": 8.832148298628746e-06, "loss": 0.1076, "step": 5600 }, { "epoch": 2.8727092620108965, "grad_norm": 1.0525143146514893, "learning_rate": 8.781361097003555e-06, "loss": 0.1069, "step": 5800 }, { "epoch": 2.971768202080238, "grad_norm": 0.9000458717346191, "learning_rate": 8.730573895378366e-06, "loss": 0.1071, "step": 6000 }, { "epoch": 2.971768202080238, "eval_loss": 0.13021869957447052, "eval_runtime": 3466.5665, "eval_samples_per_second": 2.815, "eval_steps_per_second": 0.352, "eval_wer": 42.22862701955686, "step": 6000 }, { "epoch": 3.070827142149579, "grad_norm": 0.9356966018676758, "learning_rate": 8.679786693753175e-06, "loss": 0.0966, "step": 6200 }, { "epoch": 3.16988608221892, "grad_norm": 0.8852076530456543, "learning_rate": 8.628999492127984e-06, "loss": 0.0935, "step": 6400 }, { "epoch": 3.2689450222882614, "grad_norm": 0.8325952291488647, "learning_rate": 8.578212290502793e-06, "loss": 0.0928, "step": 6600 }, { "epoch": 3.368003962357603, "grad_norm": 0.9712433218955994, "learning_rate": 8.527425088877603e-06, "loss": 0.0925, "step": 6800 }, { "epoch": 3.467062902426944, "grad_norm": 0.8990651369094849, "learning_rate": 8.476637887252413e-06, "loss": 0.0932, "step": 7000 }, { "epoch": 3.467062902426944, "eval_loss": 0.1307862401008606, "eval_runtime": 3471.2099, "eval_samples_per_second": 2.811, "eval_steps_per_second": 0.351, "eval_wer": 41.745842690499785, "step": 7000 }, { "epoch": 3.566121842496285, "grad_norm": 0.9829621911048889, "learning_rate": 8.425850685627223e-06, "loss": 0.0923, "step": 7200 }, { "epoch": 3.6651807825656264, "grad_norm": 1.0099509954452515, "learning_rate": 8.375063484002032e-06, "loss": 0.0926, "step": 7400 }, { "epoch": 3.7642397226349678, "grad_norm": 0.8921763896942139, "learning_rate": 8.324276282376843e-06, "loss": 0.0921, "step": 7600 }, { "epoch": 3.863298662704309, "grad_norm": 0.9494955539703369, "learning_rate": 8.273489080751652e-06, "loss": 0.0925, "step": 7800 }, { "epoch": 3.9623576027736505, "grad_norm": 0.8222095966339111, "learning_rate": 8.222701879126461e-06, "loss": 0.0916, "step": 8000 }, { "epoch": 3.9623576027736505, "eval_loss": 0.12850907444953918, "eval_runtime": 3467.0941, "eval_samples_per_second": 2.815, "eval_steps_per_second": 0.352, "eval_wer": 41.511269513669795, "step": 8000 } ], "logging_steps": 200, "max_steps": 40380, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.9550365812187136e+20, "train_batch_size": 64, "trial_name": null, "trial_params": null }