{ "best_metric": 0.9724182168056447, "best_model_checkpoint": "/data3/akoudounas/speech_unlearning/models/fsc/facebook/wav2vec2-base/4/_retain/checkpoint-2800", "epoch": 3.872752420470263, "eval_steps": 400, "global_step": 2800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13831258644536654, "grad_norm": 10.856929779052734, "learning_rate": 3.571428571428572e-05, "loss": 13.3808, "step": 100 }, { "epoch": 0.2766251728907331, "grad_norm": 47.9949836730957, "learning_rate": 7.142857142857143e-05, "loss": 11.8806, "step": 200 }, { "epoch": 0.4149377593360996, "grad_norm": 24.35114097595215, "learning_rate": 9.920634920634922e-05, "loss": 8.8407, "step": 300 }, { "epoch": 0.5532503457814661, "grad_norm": 63.59002685546875, "learning_rate": 9.523809523809524e-05, "loss": 6.359, "step": 400 }, { "epoch": 0.5532503457814661, "eval_accuracy": 0.5381654906991661, "eval_f1_macro": 0.42880994677778567, "eval_loss": 1.3551914691925049, "eval_runtime": 15.4249, "eval_samples_per_second": 202.141, "eval_steps_per_second": 25.284, "step": 400 }, { "epoch": 0.6915629322268326, "grad_norm": 81.84086608886719, "learning_rate": 9.126984126984128e-05, "loss": 4.7754, "step": 500 }, { "epoch": 0.8298755186721992, "grad_norm": 30.993141174316406, "learning_rate": 8.730158730158731e-05, "loss": 3.5042, "step": 600 }, { "epoch": 0.9681881051175657, "grad_norm": 53.2839469909668, "learning_rate": 8.333333333333334e-05, "loss": 2.5084, "step": 700 }, { "epoch": 1.1065006915629323, "grad_norm": 19.756397247314453, "learning_rate": 7.936507936507937e-05, "loss": 1.6743, "step": 800 }, { "epoch": 1.1065006915629323, "eval_accuracy": 0.9339320076972418, "eval_f1_macro": 0.9281889642509323, "eval_loss": 0.3620944619178772, "eval_runtime": 15.3176, "eval_samples_per_second": 203.557, "eval_steps_per_second": 25.461, "step": 800 }, { "epoch": 1.2448132780082988, "grad_norm": 94.57241821289062, "learning_rate": 7.53968253968254e-05, "loss": 1.3345, "step": 900 }, { "epoch": 1.3831258644536653, "grad_norm": 33.056297302246094, "learning_rate": 7.142857142857143e-05, "loss": 1.2201, "step": 1000 }, { "epoch": 1.5214384508990317, "grad_norm": 59.42671585083008, "learning_rate": 6.746031746031747e-05, "loss": 1.0254, "step": 1100 }, { "epoch": 1.6597510373443982, "grad_norm": 42.64637756347656, "learning_rate": 6.349206349206349e-05, "loss": 0.955, "step": 1200 }, { "epoch": 1.6597510373443982, "eval_accuracy": 0.9464400256574728, "eval_f1_macro": 0.9447013212949815, "eval_loss": 0.2722650468349457, "eval_runtime": 15.1717, "eval_samples_per_second": 205.514, "eval_steps_per_second": 25.706, "step": 1200 }, { "epoch": 1.798063623789765, "grad_norm": 8.055658340454102, "learning_rate": 5.9523809523809524e-05, "loss": 0.6846, "step": 1300 }, { "epoch": 1.9363762102351314, "grad_norm": 49.374576568603516, "learning_rate": 5.555555555555556e-05, "loss": 0.671, "step": 1400 }, { "epoch": 2.074688796680498, "grad_norm": 31.296615600585938, "learning_rate": 5.158730158730159e-05, "loss": 0.7596, "step": 1500 }, { "epoch": 2.2130013831258646, "grad_norm": 25.859086990356445, "learning_rate": 4.761904761904762e-05, "loss": 0.6647, "step": 1600 }, { "epoch": 2.2130013831258646, "eval_accuracy": 0.9554201411161001, "eval_f1_macro": 0.9536541863078912, "eval_loss": 0.2484109252691269, "eval_runtime": 15.2579, "eval_samples_per_second": 204.353, "eval_steps_per_second": 25.561, "step": 1600 }, { "epoch": 2.351313969571231, "grad_norm": 1.0564467906951904, "learning_rate": 4.3650793650793655e-05, "loss": 0.5915, "step": 1700 }, { "epoch": 2.4896265560165975, "grad_norm": 5.982853412628174, "learning_rate": 3.968253968253968e-05, "loss": 0.5167, "step": 1800 }, { "epoch": 2.627939142461964, "grad_norm": 0.10894600301980972, "learning_rate": 3.571428571428572e-05, "loss": 0.4604, "step": 1900 }, { "epoch": 2.7662517289073305, "grad_norm": 29.308454513549805, "learning_rate": 3.1746031746031745e-05, "loss": 0.5138, "step": 2000 }, { "epoch": 2.7662517289073305, "eval_accuracy": 0.9541372674791533, "eval_f1_macro": 0.9495278002552265, "eval_loss": 0.25238117575645447, "eval_runtime": 14.8444, "eval_samples_per_second": 210.046, "eval_steps_per_second": 26.273, "step": 2000 }, { "epoch": 2.904564315352697, "grad_norm": 124.017578125, "learning_rate": 2.777777777777778e-05, "loss": 0.5587, "step": 2100 }, { "epoch": 3.0428769017980635, "grad_norm": 11.674728393554688, "learning_rate": 2.380952380952381e-05, "loss": 0.4878, "step": 2200 }, { "epoch": 3.18118948824343, "grad_norm": 0.5218621492385864, "learning_rate": 1.984126984126984e-05, "loss": 0.3506, "step": 2300 }, { "epoch": 3.3195020746887964, "grad_norm": 10.221122741699219, "learning_rate": 1.5873015873015872e-05, "loss": 0.3697, "step": 2400 }, { "epoch": 3.3195020746887964, "eval_accuracy": 0.9666452854393842, "eval_f1_macro": 0.9636798531055095, "eval_loss": 0.19276651740074158, "eval_runtime": 14.4155, "eval_samples_per_second": 216.295, "eval_steps_per_second": 27.054, "step": 2400 }, { "epoch": 3.4578146611341634, "grad_norm": 162.04151916503906, "learning_rate": 1.1904761904761905e-05, "loss": 0.2989, "step": 2500 }, { "epoch": 3.59612724757953, "grad_norm": 0.0918952003121376, "learning_rate": 7.936507936507936e-06, "loss": 0.3396, "step": 2600 }, { "epoch": 3.7344398340248963, "grad_norm": 0.07032816857099533, "learning_rate": 3.968253968253968e-06, "loss": 0.3549, "step": 2700 }, { "epoch": 3.872752420470263, "grad_norm": 0.07183802127838135, "learning_rate": 0.0, "loss": 0.3393, "step": 2800 }, { "epoch": 3.872752420470263, "eval_accuracy": 0.9724182168056447, "eval_f1_macro": 0.9697252951761609, "eval_loss": 0.15897579491138458, "eval_runtime": 15.3271, "eval_samples_per_second": 203.431, "eval_steps_per_second": 25.445, "step": 2800 } ], "logging_steps": 100, "max_steps": 2800, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 2800, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.253602560365056e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }