|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 7.0, |
|
"eval_steps": 500, |
|
"global_step": 7721, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"step": 1103, |
|
"train_eval_accuracy": 0.7887755102040817, |
|
"train_eval_f1": 0.7499765233661925, |
|
"train_eval_loss": 0.6592453122138977, |
|
"train_eval_precision": 0.7927385476759929, |
|
"train_eval_recall": 0.7853664471213178, |
|
"train_loss": 0.6592453718185425, |
|
"train_runtime": 296.3413, |
|
"train_samples_per_second": 29.763, |
|
"train_steps_per_second": 3.722 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7685185185185185, |
|
"eval_f1": 0.7339765316819828, |
|
"eval_loss": 0.7040889859199524, |
|
"eval_precision": 0.7863260957104617, |
|
"eval_recall": 0.7767501150267258, |
|
"eval_runtime": 126.8841, |
|
"eval_samples_per_second": 29.791, |
|
"eval_steps_per_second": 3.728, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 2206, |
|
"train_eval_accuracy": 0.8997732426303855, |
|
"train_eval_f1": 0.8934703863536795, |
|
"train_eval_loss": 0.3266890048980713, |
|
"train_eval_precision": 0.9101662801242012, |
|
"train_eval_recall": 0.8995948840878806, |
|
"train_loss": 0.3266890048980713, |
|
"train_runtime": 296.0976, |
|
"train_samples_per_second": 29.787, |
|
"train_steps_per_second": 3.725 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8748677248677249, |
|
"eval_f1": 0.8663052621269062, |
|
"eval_loss": 0.41356033086776733, |
|
"eval_precision": 0.8855161789508988, |
|
"eval_recall": 0.8760987779104309, |
|
"eval_runtime": 127.0363, |
|
"eval_samples_per_second": 29.755, |
|
"eval_steps_per_second": 3.723, |
|
"step": 2206 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 3309, |
|
"train_eval_accuracy": 0.9326530612244898, |
|
"train_eval_f1": 0.9319607388705672, |
|
"train_eval_loss": 0.25093671679496765, |
|
"train_eval_precision": 0.934719996865382, |
|
"train_eval_recall": 0.9321842163347528, |
|
"train_loss": 0.25093671679496765, |
|
"train_runtime": 296.2824, |
|
"train_samples_per_second": 29.769, |
|
"train_steps_per_second": 3.723 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9042328042328043, |
|
"eval_f1": 0.9039529271468891, |
|
"eval_loss": 0.39229774475097656, |
|
"eval_precision": 0.9080753320256745, |
|
"eval_recall": 0.9062393448267169, |
|
"eval_runtime": 126.7759, |
|
"eval_samples_per_second": 29.816, |
|
"eval_steps_per_second": 3.731, |
|
"step": 3309 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 4412, |
|
"train_eval_accuracy": 0.9439909297052154, |
|
"train_eval_f1": 0.9436709080792405, |
|
"train_eval_loss": 0.2164522260427475, |
|
"train_eval_precision": 0.9456422075056565, |
|
"train_eval_recall": 0.9435377727151975, |
|
"train_loss": 0.2164521962404251, |
|
"train_runtime": 296.0068, |
|
"train_samples_per_second": 29.797, |
|
"train_steps_per_second": 3.726 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9050264550264551, |
|
"eval_f1": 0.9051255458970469, |
|
"eval_loss": 0.42613422870635986, |
|
"eval_precision": 0.9081998981335793, |
|
"eval_recall": 0.9074460110563182, |
|
"eval_runtime": 126.5384, |
|
"eval_samples_per_second": 29.872, |
|
"eval_steps_per_second": 3.738, |
|
"step": 4412 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 5515, |
|
"train_eval_accuracy": 0.9621315192743765, |
|
"train_eval_f1": 0.9621058175036092, |
|
"train_eval_loss": 0.1465868204832077, |
|
"train_eval_precision": 0.963177617453184, |
|
"train_eval_recall": 0.9619998226212546, |
|
"train_loss": 0.1465868204832077, |
|
"train_runtime": 295.9817, |
|
"train_samples_per_second": 29.799, |
|
"train_steps_per_second": 3.727 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.903968253968254, |
|
"eval_f1": 0.9038650845517326, |
|
"eval_loss": 0.4323442280292511, |
|
"eval_precision": 0.9057544963051973, |
|
"eval_recall": 0.9056543685348158, |
|
"eval_runtime": 126.9132, |
|
"eval_samples_per_second": 29.784, |
|
"eval_steps_per_second": 3.727, |
|
"step": 5515 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"step": 6618, |
|
"train_eval_accuracy": 0.9688208616780045, |
|
"train_eval_f1": 0.9686094121707401, |
|
"train_eval_loss": 0.12130556255578995, |
|
"train_eval_precision": 0.9692092193201128, |
|
"train_eval_recall": 0.968624092631756, |
|
"train_loss": 0.12130556255578995, |
|
"train_runtime": 295.4832, |
|
"train_samples_per_second": 29.849, |
|
"train_steps_per_second": 3.733 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9074074074074074, |
|
"eval_f1": 0.9074718452978288, |
|
"eval_loss": 0.4809330999851227, |
|
"eval_precision": 0.9099023812111475, |
|
"eval_recall": 0.9094968555241478, |
|
"eval_runtime": 126.5478, |
|
"eval_samples_per_second": 29.87, |
|
"eval_steps_per_second": 3.738, |
|
"step": 6618 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 7721, |
|
"train_eval_accuracy": 0.9763038548752835, |
|
"train_eval_f1": 0.9762453458735343, |
|
"train_eval_loss": 0.09657437354326248, |
|
"train_eval_precision": 0.9765774895508033, |
|
"train_eval_recall": 0.9763401145817671, |
|
"train_loss": 0.09657437354326248, |
|
"train_runtime": 296.9603, |
|
"train_samples_per_second": 29.701, |
|
"train_steps_per_second": 3.714 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9082010582010582, |
|
"eval_f1": 0.9079087150920282, |
|
"eval_loss": 0.4926854968070984, |
|
"eval_precision": 0.9100340807600719, |
|
"eval_recall": 0.9087758232905191, |
|
"eval_runtime": 127.7522, |
|
"eval_samples_per_second": 29.589, |
|
"eval_steps_per_second": 3.702, |
|
"step": 7721 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 7721, |
|
"total_flos": 1.62503106619392e+16, |
|
"train_loss": 0.5412681450144088, |
|
"train_runtime": 9360.1511, |
|
"train_samples_per_second": 14.134, |
|
"train_steps_per_second": 1.768 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9082010582010582, |
|
"eval_f1": 0.9079087150920282, |
|
"eval_loss": 0.4926854968070984, |
|
"eval_precision": 0.9100340807600719, |
|
"eval_recall": 0.9087758232905191, |
|
"eval_runtime": 127.3673, |
|
"eval_samples_per_second": 29.678, |
|
"eval_steps_per_second": 3.714, |
|
"step": 7721 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 7721, |
|
"train_en_eval_accuracy": 0.9763038548752835, |
|
"train_en_eval_f1": 0.9762453458735343, |
|
"train_en_eval_loss": 0.09657437354326248, |
|
"train_en_eval_precision": 0.9765774895508033, |
|
"train_en_eval_recall": 0.9763401145817671, |
|
"train_en_loss": 0.09657437354326248, |
|
"train_en_runtime": 296.2956, |
|
"train_en_samples_per_second": 29.768, |
|
"train_en_steps_per_second": 3.723 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 7721, |
|
"test_en_eval_accuracy": 0.9082010582010582, |
|
"test_en_eval_f1": 0.9079087150920282, |
|
"test_en_eval_loss": 0.4926854372024536, |
|
"test_en_eval_precision": 0.9100340807600719, |
|
"test_en_eval_recall": 0.9087758232905191, |
|
"test_en_loss": 0.4926854968070984, |
|
"test_en_runtime": 126.7992, |
|
"test_en_samples_per_second": 29.811, |
|
"test_en_steps_per_second": 3.73 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 16545, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.62503106619392e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|