|
{ |
|
"best_metric": 0.7413793103448276, |
|
"best_model_checkpoint": "/nesi/nobackup/uoa04081/wxy/model/bert_ft/sft/epoch35_6_1/warmup_ratio_lrtype/schedule-0.3-constant_with_warmup-epoch40-lr-2e-5/checkpoint-476", |
|
"epoch": 28.0, |
|
"eval_steps": 500, |
|
"global_step": 476, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.034482758620689655, |
|
"eval_loss": 1.9626049995422363, |
|
"eval_runtime": 0.3947, |
|
"eval_samples_per_second": 146.932, |
|
"eval_steps_per_second": 5.067, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5172413793103449, |
|
"eval_loss": 1.8860366344451904, |
|
"eval_runtime": 0.3952, |
|
"eval_samples_per_second": 146.746, |
|
"eval_steps_per_second": 5.06, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5172413793103449, |
|
"eval_loss": 1.559799313545227, |
|
"eval_runtime": 0.3943, |
|
"eval_samples_per_second": 147.102, |
|
"eval_steps_per_second": 5.072, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5172413793103449, |
|
"eval_loss": 1.460962176322937, |
|
"eval_runtime": 0.3951, |
|
"eval_samples_per_second": 146.813, |
|
"eval_steps_per_second": 5.063, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5172413793103449, |
|
"eval_loss": 1.4569206237792969, |
|
"eval_runtime": 0.3956, |
|
"eval_samples_per_second": 146.594, |
|
"eval_steps_per_second": 5.055, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5172413793103449, |
|
"eval_loss": 1.4397205114364624, |
|
"eval_runtime": 0.3971, |
|
"eval_samples_per_second": 146.051, |
|
"eval_steps_per_second": 5.036, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.5172413793103449, |
|
"eval_loss": 1.3534083366394043, |
|
"eval_runtime": 0.3961, |
|
"eval_samples_per_second": 146.439, |
|
"eval_steps_per_second": 5.05, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5689655172413793, |
|
"eval_loss": 1.2757465839385986, |
|
"eval_runtime": 0.3944, |
|
"eval_samples_per_second": 147.045, |
|
"eval_steps_per_second": 5.071, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.5862068965517241, |
|
"eval_loss": 1.1723886728286743, |
|
"eval_runtime": 0.3961, |
|
"eval_samples_per_second": 146.442, |
|
"eval_steps_per_second": 5.05, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.6551724137931034, |
|
"eval_loss": 1.0931514501571655, |
|
"eval_runtime": 0.394, |
|
"eval_samples_per_second": 147.197, |
|
"eval_steps_per_second": 5.076, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.6206896551724138, |
|
"eval_loss": 1.0942814350128174, |
|
"eval_runtime": 0.3944, |
|
"eval_samples_per_second": 147.077, |
|
"eval_steps_per_second": 5.072, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6896551724137931, |
|
"eval_loss": 1.0222851037979126, |
|
"eval_runtime": 0.395, |
|
"eval_samples_per_second": 146.846, |
|
"eval_steps_per_second": 5.064, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.6379310344827587, |
|
"eval_loss": 1.0476372241973877, |
|
"eval_runtime": 0.3945, |
|
"eval_samples_per_second": 147.003, |
|
"eval_steps_per_second": 5.069, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.6724137931034483, |
|
"eval_loss": 1.054601788520813, |
|
"eval_runtime": 0.3939, |
|
"eval_samples_per_second": 147.227, |
|
"eval_steps_per_second": 5.077, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.6896551724137931, |
|
"eval_loss": 1.0961247682571411, |
|
"eval_runtime": 0.3955, |
|
"eval_samples_per_second": 146.649, |
|
"eval_steps_per_second": 5.057, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.6551724137931034, |
|
"eval_loss": 1.135161280632019, |
|
"eval_runtime": 0.3944, |
|
"eval_samples_per_second": 147.048, |
|
"eval_steps_per_second": 5.071, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.6724137931034483, |
|
"eval_loss": 1.132529377937317, |
|
"eval_runtime": 0.3947, |
|
"eval_samples_per_second": 146.937, |
|
"eval_steps_per_second": 5.067, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.6896551724137931, |
|
"eval_loss": 1.1694955825805664, |
|
"eval_runtime": 0.3942, |
|
"eval_samples_per_second": 147.136, |
|
"eval_steps_per_second": 5.074, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.6206896551724138, |
|
"eval_loss": 1.2848495244979858, |
|
"eval_runtime": 0.3945, |
|
"eval_samples_per_second": 147.012, |
|
"eval_steps_per_second": 5.069, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.5689655172413793, |
|
"eval_loss": 1.4167975187301636, |
|
"eval_runtime": 0.3944, |
|
"eval_samples_per_second": 147.046, |
|
"eval_steps_per_second": 5.071, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.6379310344827587, |
|
"eval_loss": 1.3460503816604614, |
|
"eval_runtime": 0.3949, |
|
"eval_samples_per_second": 146.89, |
|
"eval_steps_per_second": 5.065, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.6896551724137931, |
|
"eval_loss": 1.1948856115341187, |
|
"eval_runtime": 0.3941, |
|
"eval_samples_per_second": 147.152, |
|
"eval_steps_per_second": 5.074, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.6896551724137931, |
|
"eval_loss": 1.2731900215148926, |
|
"eval_runtime": 0.4055, |
|
"eval_samples_per_second": 143.034, |
|
"eval_steps_per_second": 4.932, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.6206896551724138, |
|
"eval_loss": 1.2199504375457764, |
|
"eval_runtime": 0.3945, |
|
"eval_samples_per_second": 147.003, |
|
"eval_steps_per_second": 5.069, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.5172413793103449, |
|
"eval_loss": 1.481724739074707, |
|
"eval_runtime": 0.3944, |
|
"eval_samples_per_second": 147.043, |
|
"eval_steps_per_second": 5.07, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.7068965517241379, |
|
"eval_loss": 1.297101616859436, |
|
"eval_runtime": 0.3951, |
|
"eval_samples_per_second": 146.796, |
|
"eval_steps_per_second": 5.062, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.6551724137931034, |
|
"eval_loss": 1.3901363611221313, |
|
"eval_runtime": 0.394, |
|
"eval_samples_per_second": 147.193, |
|
"eval_steps_per_second": 5.076, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.7413793103448276, |
|
"eval_loss": 1.278436541557312, |
|
"eval_runtime": 0.3946, |
|
"eval_samples_per_second": 146.998, |
|
"eval_steps_per_second": 5.069, |
|
"step": 476 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 680, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 40, |
|
"save_steps": 500, |
|
"total_flos": 3816334065868800.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|