|
{ |
|
"best_metric": 0.9445157152020526, |
|
"best_model_checkpoint": "results/facebook/hubert-base-ls960/42/checkpoint-2800", |
|
"epoch": 3.872752420470263, |
|
"eval_steps": 400, |
|
"global_step": 2800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.13831258644536654, |
|
"grad_norm": 13.194905281066895, |
|
"learning_rate": 0.00017857142857142857, |
|
"loss": 3.2443, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2766251728907331, |
|
"grad_norm": 30.376169204711914, |
|
"learning_rate": 0.00035714285714285714, |
|
"loss": 2.3348, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4149377593360996, |
|
"grad_norm": 27.163475036621094, |
|
"learning_rate": 0.000496031746031746, |
|
"loss": 1.7975, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5532503457814661, |
|
"grad_norm": 36.59109115600586, |
|
"learning_rate": 0.0004761904761904762, |
|
"loss": 2.0247, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5532503457814661, |
|
"eval_accuracy": 0.3563181526619628, |
|
"eval_f1_macro": 0.22129907013787337, |
|
"eval_loss": 1.99261474609375, |
|
"eval_runtime": 12.572, |
|
"eval_samples_per_second": 248.011, |
|
"eval_steps_per_second": 31.021, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6915629322268326, |
|
"grad_norm": 35.70507049560547, |
|
"learning_rate": 0.0004563492063492063, |
|
"loss": 1.7503, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8298755186721992, |
|
"grad_norm": 39.92527389526367, |
|
"learning_rate": 0.0004365079365079365, |
|
"loss": 1.5947, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9681881051175657, |
|
"grad_norm": 56.568458557128906, |
|
"learning_rate": 0.0004166666666666667, |
|
"loss": 1.5551, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.1065006915629323, |
|
"grad_norm": 13.754646301269531, |
|
"learning_rate": 0.0003968253968253968, |
|
"loss": 1.4236, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.1065006915629323, |
|
"eval_accuracy": 0.47241821680564466, |
|
"eval_f1_macro": 0.347385003863519, |
|
"eval_loss": 1.1670863628387451, |
|
"eval_runtime": 12.5271, |
|
"eval_samples_per_second": 248.899, |
|
"eval_steps_per_second": 31.132, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.2448132780082988, |
|
"grad_norm": 71.1982421875, |
|
"learning_rate": 0.000376984126984127, |
|
"loss": 1.3711, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.3831258644536653, |
|
"grad_norm": 9.379792213439941, |
|
"learning_rate": 0.00035714285714285714, |
|
"loss": 1.3099, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.5214384508990317, |
|
"grad_norm": 90.52821350097656, |
|
"learning_rate": 0.00033730158730158733, |
|
"loss": 1.2128, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.6597510373443982, |
|
"grad_norm": 42.78731155395508, |
|
"learning_rate": 0.00031746031746031746, |
|
"loss": 1.1781, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.6597510373443982, |
|
"eval_accuracy": 0.5016035920461834, |
|
"eval_f1_macro": 0.37959579544420224, |
|
"eval_loss": 1.1146905422210693, |
|
"eval_runtime": 12.5371, |
|
"eval_samples_per_second": 248.703, |
|
"eval_steps_per_second": 31.108, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.798063623789765, |
|
"grad_norm": 8.767428398132324, |
|
"learning_rate": 0.00029761904761904765, |
|
"loss": 1.0924, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.9363762102351314, |
|
"grad_norm": 35.30613708496094, |
|
"learning_rate": 0.0002777777777777778, |
|
"loss": 1.0372, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.074688796680498, |
|
"grad_norm": 13.835848808288574, |
|
"learning_rate": 0.00025793650793650796, |
|
"loss": 0.9892, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.2130013831258646, |
|
"grad_norm": 22.654861450195312, |
|
"learning_rate": 0.0002380952380952381, |
|
"loss": 0.9702, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.2130013831258646, |
|
"eval_accuracy": 0.5651058370750481, |
|
"eval_f1_macro": 0.4853545303034886, |
|
"eval_loss": 0.9121200442314148, |
|
"eval_runtime": 12.5089, |
|
"eval_samples_per_second": 249.263, |
|
"eval_steps_per_second": 31.178, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.351313969571231, |
|
"grad_norm": 14.196218490600586, |
|
"learning_rate": 0.00021825396825396825, |
|
"loss": 0.9265, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.4896265560165975, |
|
"grad_norm": 16.53809928894043, |
|
"learning_rate": 0.0001984126984126984, |
|
"loss": 0.8034, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.627939142461964, |
|
"grad_norm": 36.136844635009766, |
|
"learning_rate": 0.00017857142857142857, |
|
"loss": 0.6985, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.7662517289073305, |
|
"grad_norm": 17.420682907104492, |
|
"learning_rate": 0.00015873015873015873, |
|
"loss": 0.67, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.7662517289073305, |
|
"eval_accuracy": 0.7514432328415651, |
|
"eval_f1_macro": 0.763988729870444, |
|
"eval_loss": 0.8275340795516968, |
|
"eval_runtime": 12.5495, |
|
"eval_samples_per_second": 248.457, |
|
"eval_steps_per_second": 31.077, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.904564315352697, |
|
"grad_norm": 18.59613800048828, |
|
"learning_rate": 0.0001388888888888889, |
|
"loss": 0.6103, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.0428769017980635, |
|
"grad_norm": 65.93004608154297, |
|
"learning_rate": 0.00011904761904761905, |
|
"loss": 0.4783, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.18118948824343, |
|
"grad_norm": 36.451961517333984, |
|
"learning_rate": 9.92063492063492e-05, |
|
"loss": 0.3912, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.3195020746887964, |
|
"grad_norm": 27.90683364868164, |
|
"learning_rate": 7.936507936507937e-05, |
|
"loss": 0.3462, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.3195020746887964, |
|
"eval_accuracy": 0.9313662604233482, |
|
"eval_f1_macro": 0.9296400757951072, |
|
"eval_loss": 0.31274116039276123, |
|
"eval_runtime": 12.7469, |
|
"eval_samples_per_second": 244.608, |
|
"eval_steps_per_second": 30.596, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.4578146611341634, |
|
"grad_norm": 22.907320022583008, |
|
"learning_rate": 5.9523809523809524e-05, |
|
"loss": 0.2888, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.59612724757953, |
|
"grad_norm": 40.974483489990234, |
|
"learning_rate": 3.968253968253968e-05, |
|
"loss": 0.2711, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.7344398340248963, |
|
"grad_norm": 1.7515863180160522, |
|
"learning_rate": 1.984126984126984e-05, |
|
"loss": 0.2292, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 3.872752420470263, |
|
"grad_norm": 26.21529197692871, |
|
"learning_rate": 0.0, |
|
"loss": 0.2277, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.872752420470263, |
|
"eval_accuracy": 0.9445157152020526, |
|
"eval_f1_macro": 0.9452855408900767, |
|
"eval_loss": 0.28722265362739563, |
|
"eval_runtime": 12.7658, |
|
"eval_samples_per_second": 244.245, |
|
"eval_steps_per_second": 30.55, |
|
"step": 2800 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 2800, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 2800, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.253602560365056e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|