{ "best_metric": 0.9445157152020526, "best_model_checkpoint": "results/facebook/hubert-base-ls960/42/checkpoint-2800", "epoch": 3.872752420470263, "eval_steps": 400, "global_step": 2800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13831258644536654, "grad_norm": 13.194905281066895, "learning_rate": 0.00017857142857142857, "loss": 3.2443, "step": 100 }, { "epoch": 0.2766251728907331, "grad_norm": 30.376169204711914, "learning_rate": 0.00035714285714285714, "loss": 2.3348, "step": 200 }, { "epoch": 0.4149377593360996, "grad_norm": 27.163475036621094, "learning_rate": 0.000496031746031746, "loss": 1.7975, "step": 300 }, { "epoch": 0.5532503457814661, "grad_norm": 36.59109115600586, "learning_rate": 0.0004761904761904762, "loss": 2.0247, "step": 400 }, { "epoch": 0.5532503457814661, "eval_accuracy": 0.3563181526619628, "eval_f1_macro": 0.22129907013787337, "eval_loss": 1.99261474609375, "eval_runtime": 12.572, "eval_samples_per_second": 248.011, "eval_steps_per_second": 31.021, "step": 400 }, { "epoch": 0.6915629322268326, "grad_norm": 35.70507049560547, "learning_rate": 0.0004563492063492063, "loss": 1.7503, "step": 500 }, { "epoch": 0.8298755186721992, "grad_norm": 39.92527389526367, "learning_rate": 0.0004365079365079365, "loss": 1.5947, "step": 600 }, { "epoch": 0.9681881051175657, "grad_norm": 56.568458557128906, "learning_rate": 0.0004166666666666667, "loss": 1.5551, "step": 700 }, { "epoch": 1.1065006915629323, "grad_norm": 13.754646301269531, "learning_rate": 0.0003968253968253968, "loss": 1.4236, "step": 800 }, { "epoch": 1.1065006915629323, "eval_accuracy": 0.47241821680564466, "eval_f1_macro": 0.347385003863519, "eval_loss": 1.1670863628387451, "eval_runtime": 12.5271, "eval_samples_per_second": 248.899, "eval_steps_per_second": 31.132, "step": 800 }, { "epoch": 1.2448132780082988, "grad_norm": 71.1982421875, "learning_rate": 0.000376984126984127, "loss": 1.3711, "step": 900 }, { "epoch": 1.3831258644536653, "grad_norm": 9.379792213439941, "learning_rate": 0.00035714285714285714, "loss": 1.3099, "step": 1000 }, { "epoch": 1.5214384508990317, "grad_norm": 90.52821350097656, "learning_rate": 0.00033730158730158733, "loss": 1.2128, "step": 1100 }, { "epoch": 1.6597510373443982, "grad_norm": 42.78731155395508, "learning_rate": 0.00031746031746031746, "loss": 1.1781, "step": 1200 }, { "epoch": 1.6597510373443982, "eval_accuracy": 0.5016035920461834, "eval_f1_macro": 0.37959579544420224, "eval_loss": 1.1146905422210693, "eval_runtime": 12.5371, "eval_samples_per_second": 248.703, "eval_steps_per_second": 31.108, "step": 1200 }, { "epoch": 1.798063623789765, "grad_norm": 8.767428398132324, "learning_rate": 0.00029761904761904765, "loss": 1.0924, "step": 1300 }, { "epoch": 1.9363762102351314, "grad_norm": 35.30613708496094, "learning_rate": 0.0002777777777777778, "loss": 1.0372, "step": 1400 }, { "epoch": 2.074688796680498, "grad_norm": 13.835848808288574, "learning_rate": 0.00025793650793650796, "loss": 0.9892, "step": 1500 }, { "epoch": 2.2130013831258646, "grad_norm": 22.654861450195312, "learning_rate": 0.0002380952380952381, "loss": 0.9702, "step": 1600 }, { "epoch": 2.2130013831258646, "eval_accuracy": 0.5651058370750481, "eval_f1_macro": 0.4853545303034886, "eval_loss": 0.9121200442314148, "eval_runtime": 12.5089, "eval_samples_per_second": 249.263, "eval_steps_per_second": 31.178, "step": 1600 }, { "epoch": 2.351313969571231, "grad_norm": 14.196218490600586, "learning_rate": 0.00021825396825396825, "loss": 0.9265, "step": 1700 }, { "epoch": 2.4896265560165975, "grad_norm": 16.53809928894043, "learning_rate": 0.0001984126984126984, "loss": 0.8034, "step": 1800 }, { "epoch": 2.627939142461964, "grad_norm": 36.136844635009766, "learning_rate": 0.00017857142857142857, "loss": 0.6985, "step": 1900 }, { "epoch": 2.7662517289073305, "grad_norm": 17.420682907104492, "learning_rate": 0.00015873015873015873, "loss": 0.67, "step": 2000 }, { "epoch": 2.7662517289073305, "eval_accuracy": 0.7514432328415651, "eval_f1_macro": 0.763988729870444, "eval_loss": 0.8275340795516968, "eval_runtime": 12.5495, "eval_samples_per_second": 248.457, "eval_steps_per_second": 31.077, "step": 2000 }, { "epoch": 2.904564315352697, "grad_norm": 18.59613800048828, "learning_rate": 0.0001388888888888889, "loss": 0.6103, "step": 2100 }, { "epoch": 3.0428769017980635, "grad_norm": 65.93004608154297, "learning_rate": 0.00011904761904761905, "loss": 0.4783, "step": 2200 }, { "epoch": 3.18118948824343, "grad_norm": 36.451961517333984, "learning_rate": 9.92063492063492e-05, "loss": 0.3912, "step": 2300 }, { "epoch": 3.3195020746887964, "grad_norm": 27.90683364868164, "learning_rate": 7.936507936507937e-05, "loss": 0.3462, "step": 2400 }, { "epoch": 3.3195020746887964, "eval_accuracy": 0.9313662604233482, "eval_f1_macro": 0.9296400757951072, "eval_loss": 0.31274116039276123, "eval_runtime": 12.7469, "eval_samples_per_second": 244.608, "eval_steps_per_second": 30.596, "step": 2400 }, { "epoch": 3.4578146611341634, "grad_norm": 22.907320022583008, "learning_rate": 5.9523809523809524e-05, "loss": 0.2888, "step": 2500 }, { "epoch": 3.59612724757953, "grad_norm": 40.974483489990234, "learning_rate": 3.968253968253968e-05, "loss": 0.2711, "step": 2600 }, { "epoch": 3.7344398340248963, "grad_norm": 1.7515863180160522, "learning_rate": 1.984126984126984e-05, "loss": 0.2292, "step": 2700 }, { "epoch": 3.872752420470263, "grad_norm": 26.21529197692871, "learning_rate": 0.0, "loss": 0.2277, "step": 2800 }, { "epoch": 3.872752420470263, "eval_accuracy": 0.9445157152020526, "eval_f1_macro": 0.9452855408900767, "eval_loss": 0.28722265362739563, "eval_runtime": 12.7658, "eval_samples_per_second": 244.245, "eval_steps_per_second": 30.55, "step": 2800 } ], "logging_steps": 100, "max_steps": 2800, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 2800, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.253602560365056e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }