|
{ |
|
"best_metric": 2.303269863128662, |
|
"best_model_checkpoint": "autotrain-x03ts-qcf7e/checkpoint-435", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 435, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1724137931034483, |
|
"grad_norm": 6.1133551597595215, |
|
"learning_rate": 2.8409090909090912e-05, |
|
"loss": 3.4255, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.3448275862068966, |
|
"grad_norm": 8.178117752075195, |
|
"learning_rate": 4.923273657289003e-05, |
|
"loss": 3.0089, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5172413793103449, |
|
"grad_norm": 6.662604808807373, |
|
"learning_rate": 4.603580562659847e-05, |
|
"loss": 2.825, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.6896551724137931, |
|
"grad_norm": 9.514023780822754, |
|
"learning_rate": 4.283887468030691e-05, |
|
"loss": 2.8092, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8620689655172413, |
|
"grad_norm": 11.465914726257324, |
|
"learning_rate": 3.964194373401535e-05, |
|
"loss": 2.9111, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.28484848484848485, |
|
"eval_f1_macro": 0.013436249285305889, |
|
"eval_f1_micro": 0.28484848484848485, |
|
"eval_f1_weighted": 0.12630074328187538, |
|
"eval_loss": 2.676663398742676, |
|
"eval_precision_macro": 0.008631772268135905, |
|
"eval_precision_micro": 0.28484848484848485, |
|
"eval_precision_weighted": 0.08113865932047751, |
|
"eval_recall_macro": 0.030303030303030304, |
|
"eval_recall_micro": 0.28484848484848485, |
|
"eval_recall_weighted": 0.28484848484848485, |
|
"eval_runtime": 255.7518, |
|
"eval_samples_per_second": 4.516, |
|
"eval_steps_per_second": 0.285, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.0344827586206897, |
|
"grad_norm": 6.465944766998291, |
|
"learning_rate": 3.644501278772379e-05, |
|
"loss": 2.5956, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.206896551724138, |
|
"grad_norm": 8.603558540344238, |
|
"learning_rate": 3.324808184143223e-05, |
|
"loss": 2.6285, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.3793103448275863, |
|
"grad_norm": 8.11929702758789, |
|
"learning_rate": 3.0051150895140666e-05, |
|
"loss": 2.6964, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.5517241379310345, |
|
"grad_norm": 7.409820556640625, |
|
"learning_rate": 2.6854219948849106e-05, |
|
"loss": 2.7531, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.7241379310344827, |
|
"grad_norm": 7.499230861663818, |
|
"learning_rate": 2.3657289002557546e-05, |
|
"loss": 2.5273, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.896551724137931, |
|
"grad_norm": 14.026644706726074, |
|
"learning_rate": 2.0460358056265986e-05, |
|
"loss": 2.5975, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.3064935064935065, |
|
"eval_f1_macro": 0.028228053192069863, |
|
"eval_f1_micro": 0.3064935064935065, |
|
"eval_f1_weighted": 0.23352329691937518, |
|
"eval_loss": 2.477508544921875, |
|
"eval_precision_macro": 0.02386167652983027, |
|
"eval_precision_micro": 0.3064935064935065, |
|
"eval_precision_weighted": 0.2057449099744913, |
|
"eval_recall_macro": 0.04448079579994474, |
|
"eval_recall_micro": 0.3064935064935065, |
|
"eval_recall_weighted": 0.3064935064935065, |
|
"eval_runtime": 256.8636, |
|
"eval_samples_per_second": 4.497, |
|
"eval_steps_per_second": 0.284, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.0689655172413794, |
|
"grad_norm": 8.43802547454834, |
|
"learning_rate": 1.7263427109974426e-05, |
|
"loss": 2.7131, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.2413793103448274, |
|
"grad_norm": 11.071104049682617, |
|
"learning_rate": 1.4066496163682865e-05, |
|
"loss": 2.4292, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 2.413793103448276, |
|
"grad_norm": 7.782072067260742, |
|
"learning_rate": 1.0869565217391305e-05, |
|
"loss": 2.3514, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.586206896551724, |
|
"grad_norm": 12.599653244018555, |
|
"learning_rate": 7.672634271099745e-06, |
|
"loss": 2.4591, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.7586206896551726, |
|
"grad_norm": 8.950291633605957, |
|
"learning_rate": 4.475703324808185e-06, |
|
"loss": 2.3889, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.9310344827586206, |
|
"grad_norm": 8.916004180908203, |
|
"learning_rate": 1.2787723785166241e-06, |
|
"loss": 2.3493, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.3367965367965368, |
|
"eval_f1_macro": 0.040508170738823165, |
|
"eval_f1_micro": 0.3367965367965368, |
|
"eval_f1_weighted": 0.2413478397672539, |
|
"eval_loss": 2.303269863128662, |
|
"eval_precision_macro": 0.0434917012251169, |
|
"eval_precision_micro": 0.3367965367965368, |
|
"eval_precision_weighted": 0.21644678791353103, |
|
"eval_recall_macro": 0.05803911531326899, |
|
"eval_recall_micro": 0.3367965367965368, |
|
"eval_recall_weighted": 0.3367965367965368, |
|
"eval_runtime": 261.9634, |
|
"eval_samples_per_second": 4.409, |
|
"eval_steps_per_second": 0.279, |
|
"step": 435 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 435, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 227987477625600.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|