|
{ |
|
"best_metric": 0.9329698524695318, |
|
"best_model_checkpoint": "results/facebook/hubert-base-ls960/42/_retain/checkpoint-2800", |
|
"epoch": 4.017216642754663, |
|
"eval_steps": 400, |
|
"global_step": 2800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14347202295552366, |
|
"grad_norm": 7.395946025848389, |
|
"learning_rate": 0.00017857142857142857, |
|
"loss": 3.276, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.28694404591104733, |
|
"grad_norm": 64.37727355957031, |
|
"learning_rate": 0.00035714285714285714, |
|
"loss": 2.5895, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.430416068866571, |
|
"grad_norm": 37.723262786865234, |
|
"learning_rate": 0.000496031746031746, |
|
"loss": 2.0702, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5738880918220947, |
|
"grad_norm": 34.24871826171875, |
|
"learning_rate": 0.0004761904761904762, |
|
"loss": 1.9753, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5738880918220947, |
|
"eval_accuracy": 0.39191789608723543, |
|
"eval_f1_macro": 0.2435662437168276, |
|
"eval_loss": 1.6742650270462036, |
|
"eval_runtime": 12.5972, |
|
"eval_samples_per_second": 247.516, |
|
"eval_steps_per_second": 30.959, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7173601147776184, |
|
"grad_norm": 54.40435791015625, |
|
"learning_rate": 0.0004563492063492063, |
|
"loss": 1.8347, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.860832137733142, |
|
"grad_norm": 95.11190795898438, |
|
"learning_rate": 0.0004365079365079365, |
|
"loss": 1.6797, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.0043041606886658, |
|
"grad_norm": 57.426300048828125, |
|
"learning_rate": 0.0004166666666666667, |
|
"loss": 1.5546, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.1477761836441893, |
|
"grad_norm": 26.83301544189453, |
|
"learning_rate": 0.0003968253968253968, |
|
"loss": 1.4852, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.1477761836441893, |
|
"eval_accuracy": 0.43585631815266196, |
|
"eval_f1_macro": 0.35693788277668864, |
|
"eval_loss": 1.400166392326355, |
|
"eval_runtime": 12.5264, |
|
"eval_samples_per_second": 248.914, |
|
"eval_steps_per_second": 31.134, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.291248206599713, |
|
"grad_norm": 110.81356048583984, |
|
"learning_rate": 0.000376984126984127, |
|
"loss": 1.3436, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.4347202295552366, |
|
"grad_norm": 32.543121337890625, |
|
"learning_rate": 0.00035714285714285714, |
|
"loss": 1.2294, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.5781922525107603, |
|
"grad_norm": 56.1235237121582, |
|
"learning_rate": 0.00033730158730158733, |
|
"loss": 1.2193, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.721664275466284, |
|
"grad_norm": 45.8999137878418, |
|
"learning_rate": 0.00031746031746031746, |
|
"loss": 1.1012, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.721664275466284, |
|
"eval_accuracy": 0.5, |
|
"eval_f1_macro": 0.40820204518718034, |
|
"eval_loss": 1.1420663595199585, |
|
"eval_runtime": 12.5482, |
|
"eval_samples_per_second": 248.481, |
|
"eval_steps_per_second": 31.08, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.8651362984218078, |
|
"grad_norm": 61.26133728027344, |
|
"learning_rate": 0.00029761904761904765, |
|
"loss": 1.0592, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.0086083213773316, |
|
"grad_norm": 14.982177734375, |
|
"learning_rate": 0.0002777777777777778, |
|
"loss": 0.9699, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.152080344332855, |
|
"grad_norm": 81.27915954589844, |
|
"learning_rate": 0.00025793650793650796, |
|
"loss": 0.8729, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.2955523672883786, |
|
"grad_norm": 20.86075210571289, |
|
"learning_rate": 0.0002380952380952381, |
|
"loss": 0.8175, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.2955523672883786, |
|
"eval_accuracy": 0.7113534316869788, |
|
"eval_f1_macro": 0.7266787228808548, |
|
"eval_loss": 0.9044921398162842, |
|
"eval_runtime": 12.5282, |
|
"eval_samples_per_second": 248.878, |
|
"eval_steps_per_second": 31.13, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.4390243902439024, |
|
"grad_norm": 25.487239837646484, |
|
"learning_rate": 0.00021825396825396825, |
|
"loss": 0.7922, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.582496413199426, |
|
"grad_norm": 63.39326477050781, |
|
"learning_rate": 0.0001984126984126984, |
|
"loss": 0.6825, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.72596843615495, |
|
"grad_norm": 28.30609893798828, |
|
"learning_rate": 0.00017857142857142857, |
|
"loss": 0.6263, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.869440459110473, |
|
"grad_norm": 20.287073135375977, |
|
"learning_rate": 0.00015873015873015873, |
|
"loss": 0.5372, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.869440459110473, |
|
"eval_accuracy": 0.8787684413085312, |
|
"eval_f1_macro": 0.8885767692084414, |
|
"eval_loss": 0.48355233669281006, |
|
"eval_runtime": 12.5225, |
|
"eval_samples_per_second": 248.991, |
|
"eval_steps_per_second": 31.144, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.012912482065997, |
|
"grad_norm": 23.214784622192383, |
|
"learning_rate": 0.0001388888888888889, |
|
"loss": 0.4395, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.1563845050215207, |
|
"grad_norm": 60.17699432373047, |
|
"learning_rate": 0.00011904761904761905, |
|
"loss": 0.3497, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.2998565279770444, |
|
"grad_norm": 1.4547343254089355, |
|
"learning_rate": 9.92063492063492e-05, |
|
"loss": 0.3281, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.443328550932568, |
|
"grad_norm": 8.235297203063965, |
|
"learning_rate": 7.936507936507937e-05, |
|
"loss": 0.3182, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.443328550932568, |
|
"eval_accuracy": 0.9153303399615138, |
|
"eval_f1_macro": 0.916711729246869, |
|
"eval_loss": 0.41809144616127014, |
|
"eval_runtime": 12.7913, |
|
"eval_samples_per_second": 243.759, |
|
"eval_steps_per_second": 30.489, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.586800573888092, |
|
"grad_norm": 24.18486213684082, |
|
"learning_rate": 5.9523809523809524e-05, |
|
"loss": 0.2798, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.7302725968436157, |
|
"grad_norm": 12.47843074798584, |
|
"learning_rate": 3.968253968253968e-05, |
|
"loss": 0.2553, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.8737446197991394, |
|
"grad_norm": 24.14739990234375, |
|
"learning_rate": 1.984126984126984e-05, |
|
"loss": 0.231, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 4.017216642754663, |
|
"grad_norm": 4.063531398773193, |
|
"learning_rate": 0.0, |
|
"loss": 0.2035, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 4.017216642754663, |
|
"eval_accuracy": 0.9329698524695318, |
|
"eval_f1_macro": 0.9320770657825714, |
|
"eval_loss": 0.33222800493240356, |
|
"eval_runtime": 12.7416, |
|
"eval_samples_per_second": 244.711, |
|
"eval_steps_per_second": 30.608, |
|
"step": 2800 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 2800, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 2800, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.2540383690752e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|