|
{ |
|
"best_metric": 0.9522393282015396, |
|
"best_model_checkpoint": "../../checkpoint/ddi/pubmedbert-abstract/checkpoint-13447", |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 15820, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.938593421973408, |
|
"eval_loss": 0.23401664197444916, |
|
"eval_runtime": 9.3525, |
|
"eval_samples_per_second": 611.174, |
|
"eval_steps_per_second": 2.459, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 22.48932647705078, |
|
"learning_rate": 4.683944374209861e-05, |
|
"loss": 0.1776, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9419174247725682, |
|
"eval_loss": 0.27159374952316284, |
|
"eval_runtime": 9.4015, |
|
"eval_samples_per_second": 607.985, |
|
"eval_steps_per_second": 2.446, |
|
"step": 1582 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"grad_norm": 0.30297374725341797, |
|
"learning_rate": 4.367888748419722e-05, |
|
"loss": 0.0855, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9431420573827851, |
|
"eval_loss": 0.27298399806022644, |
|
"eval_runtime": 9.4208, |
|
"eval_samples_per_second": 606.741, |
|
"eval_steps_per_second": 2.441, |
|
"step": 2373 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"grad_norm": 0.08143208175897598, |
|
"learning_rate": 4.051833122629583e-05, |
|
"loss": 0.0627, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9382435269419174, |
|
"eval_loss": 0.33226683735847473, |
|
"eval_runtime": 9.4056, |
|
"eval_samples_per_second": 607.725, |
|
"eval_steps_per_second": 2.445, |
|
"step": 3164 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9450664800559833, |
|
"eval_loss": 0.3307989239692688, |
|
"eval_runtime": 9.3945, |
|
"eval_samples_per_second": 608.444, |
|
"eval_steps_per_second": 2.448, |
|
"step": 3955 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"grad_norm": 0.012913365848362446, |
|
"learning_rate": 3.735777496839444e-05, |
|
"loss": 0.0463, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9412176347095871, |
|
"eval_loss": 0.39864641427993774, |
|
"eval_runtime": 9.3742, |
|
"eval_samples_per_second": 609.757, |
|
"eval_steps_per_second": 2.454, |
|
"step": 4746 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"grad_norm": 2.0644302368164062, |
|
"learning_rate": 3.419721871049305e-05, |
|
"loss": 0.0308, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9419174247725682, |
|
"eval_loss": 0.42111822962760925, |
|
"eval_runtime": 9.3868, |
|
"eval_samples_per_second": 608.939, |
|
"eval_steps_per_second": 2.45, |
|
"step": 5537 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"grad_norm": 30.287784576416016, |
|
"learning_rate": 3.1036662452591655e-05, |
|
"loss": 0.0312, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.943666899930021, |
|
"eval_loss": 0.36164581775665283, |
|
"eval_runtime": 9.3894, |
|
"eval_samples_per_second": 608.769, |
|
"eval_steps_per_second": 2.45, |
|
"step": 6328 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"grad_norm": 0.03705460578203201, |
|
"learning_rate": 2.7876106194690264e-05, |
|
"loss": 0.0221, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9396431070678797, |
|
"eval_loss": 0.4309725761413574, |
|
"eval_runtime": 9.4203, |
|
"eval_samples_per_second": 606.777, |
|
"eval_steps_per_second": 2.442, |
|
"step": 7119 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9438418474457663, |
|
"eval_loss": 0.4221705198287964, |
|
"eval_runtime": 9.4325, |
|
"eval_samples_per_second": 605.989, |
|
"eval_steps_per_second": 2.438, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"grad_norm": 0.11050642281770706, |
|
"learning_rate": 2.4715549936788876e-05, |
|
"loss": 0.0181, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9445416375087474, |
|
"eval_loss": 0.4184616804122925, |
|
"eval_runtime": 9.4176, |
|
"eval_samples_per_second": 606.948, |
|
"eval_steps_per_second": 2.442, |
|
"step": 8701 |
|
}, |
|
{ |
|
"epoch": 11.38, |
|
"grad_norm": 0.03253033012151718, |
|
"learning_rate": 2.1554993678887485e-05, |
|
"loss": 0.0141, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.945591322603219, |
|
"eval_loss": 0.46782681345939636, |
|
"eval_runtime": 9.4133, |
|
"eval_samples_per_second": 607.226, |
|
"eval_steps_per_second": 2.443, |
|
"step": 9492 |
|
}, |
|
{ |
|
"epoch": 12.64, |
|
"grad_norm": 0.005740019958466291, |
|
"learning_rate": 1.8394437420986094e-05, |
|
"loss": 0.0133, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9503149055283415, |
|
"eval_loss": 0.40269356966018677, |
|
"eval_runtime": 9.4053, |
|
"eval_samples_per_second": 607.74, |
|
"eval_steps_per_second": 2.445, |
|
"step": 10283 |
|
}, |
|
{ |
|
"epoch": 13.91, |
|
"grad_norm": 0.0024239453487098217, |
|
"learning_rate": 1.5233881163084704e-05, |
|
"loss": 0.0082, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9473407977606718, |
|
"eval_loss": 0.45041143894195557, |
|
"eval_runtime": 9.639, |
|
"eval_samples_per_second": 593.006, |
|
"eval_steps_per_second": 2.386, |
|
"step": 11074 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9504898530440867, |
|
"eval_loss": 0.47598323225975037, |
|
"eval_runtime": 9.4326, |
|
"eval_samples_per_second": 605.982, |
|
"eval_steps_per_second": 2.438, |
|
"step": 11865 |
|
}, |
|
{ |
|
"epoch": 15.17, |
|
"grad_norm": 0.0034840325824916363, |
|
"learning_rate": 1.2073324905183313e-05, |
|
"loss": 0.0052, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9448915325402379, |
|
"eval_loss": 0.45725104212760925, |
|
"eval_runtime": 9.4366, |
|
"eval_samples_per_second": 605.728, |
|
"eval_steps_per_second": 2.437, |
|
"step": 12656 |
|
}, |
|
{ |
|
"epoch": 16.43, |
|
"grad_norm": 0.4831530451774597, |
|
"learning_rate": 8.912768647281922e-06, |
|
"loss": 0.0042, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.9522393282015396, |
|
"eval_loss": 0.43564239144325256, |
|
"eval_runtime": 9.4029, |
|
"eval_samples_per_second": 607.9, |
|
"eval_steps_per_second": 2.446, |
|
"step": 13447 |
|
}, |
|
{ |
|
"epoch": 17.7, |
|
"grad_norm": 0.0005753316800110042, |
|
"learning_rate": 5.752212389380531e-06, |
|
"loss": 0.0037, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.948740377886634, |
|
"eval_loss": 0.45767056941986084, |
|
"eval_runtime": 9.3872, |
|
"eval_samples_per_second": 608.913, |
|
"eval_steps_per_second": 2.45, |
|
"step": 14238 |
|
}, |
|
{ |
|
"epoch": 18.96, |
|
"grad_norm": 0.0019946701359003782, |
|
"learning_rate": 2.59165613147914e-06, |
|
"loss": 0.0024, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.9492652204338698, |
|
"eval_loss": 0.4641525149345398, |
|
"eval_runtime": 9.4075, |
|
"eval_samples_per_second": 607.599, |
|
"eval_steps_per_second": 2.445, |
|
"step": 15029 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9501399580125962, |
|
"eval_loss": 0.46178367733955383, |
|
"eval_runtime": 9.3958, |
|
"eval_samples_per_second": 608.356, |
|
"eval_steps_per_second": 2.448, |
|
"step": 15820 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 15820, |
|
"total_flos": 3.327918265884672e+16, |
|
"train_loss": 0.03330213655864846, |
|
"train_runtime": 3337.005, |
|
"train_samples_per_second": 151.609, |
|
"train_steps_per_second": 4.741 |
|
} |
|
], |
|
"logging_steps": 1000, |
|
"max_steps": 15820, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 3.327918265884672e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|