ddi-pubmedbert-abstract / trainer_state.json
jialicheng's picture
Upload folder using huggingface_hub
99d9323 verified
{
"best_metric": 0.9522393282015396,
"best_model_checkpoint": "../../checkpoint/ddi/pubmedbert-abstract/checkpoint-13447",
"epoch": 20.0,
"eval_steps": 500,
"global_step": 15820,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.938593421973408,
"eval_loss": 0.23401664197444916,
"eval_runtime": 9.3525,
"eval_samples_per_second": 611.174,
"eval_steps_per_second": 2.459,
"step": 791
},
{
"epoch": 1.26,
"grad_norm": 22.48932647705078,
"learning_rate": 4.683944374209861e-05,
"loss": 0.1776,
"step": 1000
},
{
"epoch": 2.0,
"eval_accuracy": 0.9419174247725682,
"eval_loss": 0.27159374952316284,
"eval_runtime": 9.4015,
"eval_samples_per_second": 607.985,
"eval_steps_per_second": 2.446,
"step": 1582
},
{
"epoch": 2.53,
"grad_norm": 0.30297374725341797,
"learning_rate": 4.367888748419722e-05,
"loss": 0.0855,
"step": 2000
},
{
"epoch": 3.0,
"eval_accuracy": 0.9431420573827851,
"eval_loss": 0.27298399806022644,
"eval_runtime": 9.4208,
"eval_samples_per_second": 606.741,
"eval_steps_per_second": 2.441,
"step": 2373
},
{
"epoch": 3.79,
"grad_norm": 0.08143208175897598,
"learning_rate": 4.051833122629583e-05,
"loss": 0.0627,
"step": 3000
},
{
"epoch": 4.0,
"eval_accuracy": 0.9382435269419174,
"eval_loss": 0.33226683735847473,
"eval_runtime": 9.4056,
"eval_samples_per_second": 607.725,
"eval_steps_per_second": 2.445,
"step": 3164
},
{
"epoch": 5.0,
"eval_accuracy": 0.9450664800559833,
"eval_loss": 0.3307989239692688,
"eval_runtime": 9.3945,
"eval_samples_per_second": 608.444,
"eval_steps_per_second": 2.448,
"step": 3955
},
{
"epoch": 5.06,
"grad_norm": 0.012913365848362446,
"learning_rate": 3.735777496839444e-05,
"loss": 0.0463,
"step": 4000
},
{
"epoch": 6.0,
"eval_accuracy": 0.9412176347095871,
"eval_loss": 0.39864641427993774,
"eval_runtime": 9.3742,
"eval_samples_per_second": 609.757,
"eval_steps_per_second": 2.454,
"step": 4746
},
{
"epoch": 6.32,
"grad_norm": 2.0644302368164062,
"learning_rate": 3.419721871049305e-05,
"loss": 0.0308,
"step": 5000
},
{
"epoch": 7.0,
"eval_accuracy": 0.9419174247725682,
"eval_loss": 0.42111822962760925,
"eval_runtime": 9.3868,
"eval_samples_per_second": 608.939,
"eval_steps_per_second": 2.45,
"step": 5537
},
{
"epoch": 7.59,
"grad_norm": 30.287784576416016,
"learning_rate": 3.1036662452591655e-05,
"loss": 0.0312,
"step": 6000
},
{
"epoch": 8.0,
"eval_accuracy": 0.943666899930021,
"eval_loss": 0.36164581775665283,
"eval_runtime": 9.3894,
"eval_samples_per_second": 608.769,
"eval_steps_per_second": 2.45,
"step": 6328
},
{
"epoch": 8.85,
"grad_norm": 0.03705460578203201,
"learning_rate": 2.7876106194690264e-05,
"loss": 0.0221,
"step": 7000
},
{
"epoch": 9.0,
"eval_accuracy": 0.9396431070678797,
"eval_loss": 0.4309725761413574,
"eval_runtime": 9.4203,
"eval_samples_per_second": 606.777,
"eval_steps_per_second": 2.442,
"step": 7119
},
{
"epoch": 10.0,
"eval_accuracy": 0.9438418474457663,
"eval_loss": 0.4221705198287964,
"eval_runtime": 9.4325,
"eval_samples_per_second": 605.989,
"eval_steps_per_second": 2.438,
"step": 7910
},
{
"epoch": 10.11,
"grad_norm": 0.11050642281770706,
"learning_rate": 2.4715549936788876e-05,
"loss": 0.0181,
"step": 8000
},
{
"epoch": 11.0,
"eval_accuracy": 0.9445416375087474,
"eval_loss": 0.4184616804122925,
"eval_runtime": 9.4176,
"eval_samples_per_second": 606.948,
"eval_steps_per_second": 2.442,
"step": 8701
},
{
"epoch": 11.38,
"grad_norm": 0.03253033012151718,
"learning_rate": 2.1554993678887485e-05,
"loss": 0.0141,
"step": 9000
},
{
"epoch": 12.0,
"eval_accuracy": 0.945591322603219,
"eval_loss": 0.46782681345939636,
"eval_runtime": 9.4133,
"eval_samples_per_second": 607.226,
"eval_steps_per_second": 2.443,
"step": 9492
},
{
"epoch": 12.64,
"grad_norm": 0.005740019958466291,
"learning_rate": 1.8394437420986094e-05,
"loss": 0.0133,
"step": 10000
},
{
"epoch": 13.0,
"eval_accuracy": 0.9503149055283415,
"eval_loss": 0.40269356966018677,
"eval_runtime": 9.4053,
"eval_samples_per_second": 607.74,
"eval_steps_per_second": 2.445,
"step": 10283
},
{
"epoch": 13.91,
"grad_norm": 0.0024239453487098217,
"learning_rate": 1.5233881163084704e-05,
"loss": 0.0082,
"step": 11000
},
{
"epoch": 14.0,
"eval_accuracy": 0.9473407977606718,
"eval_loss": 0.45041143894195557,
"eval_runtime": 9.639,
"eval_samples_per_second": 593.006,
"eval_steps_per_second": 2.386,
"step": 11074
},
{
"epoch": 15.0,
"eval_accuracy": 0.9504898530440867,
"eval_loss": 0.47598323225975037,
"eval_runtime": 9.4326,
"eval_samples_per_second": 605.982,
"eval_steps_per_second": 2.438,
"step": 11865
},
{
"epoch": 15.17,
"grad_norm": 0.0034840325824916363,
"learning_rate": 1.2073324905183313e-05,
"loss": 0.0052,
"step": 12000
},
{
"epoch": 16.0,
"eval_accuracy": 0.9448915325402379,
"eval_loss": 0.45725104212760925,
"eval_runtime": 9.4366,
"eval_samples_per_second": 605.728,
"eval_steps_per_second": 2.437,
"step": 12656
},
{
"epoch": 16.43,
"grad_norm": 0.4831530451774597,
"learning_rate": 8.912768647281922e-06,
"loss": 0.0042,
"step": 13000
},
{
"epoch": 17.0,
"eval_accuracy": 0.9522393282015396,
"eval_loss": 0.43564239144325256,
"eval_runtime": 9.4029,
"eval_samples_per_second": 607.9,
"eval_steps_per_second": 2.446,
"step": 13447
},
{
"epoch": 17.7,
"grad_norm": 0.0005753316800110042,
"learning_rate": 5.752212389380531e-06,
"loss": 0.0037,
"step": 14000
},
{
"epoch": 18.0,
"eval_accuracy": 0.948740377886634,
"eval_loss": 0.45767056941986084,
"eval_runtime": 9.3872,
"eval_samples_per_second": 608.913,
"eval_steps_per_second": 2.45,
"step": 14238
},
{
"epoch": 18.96,
"grad_norm": 0.0019946701359003782,
"learning_rate": 2.59165613147914e-06,
"loss": 0.0024,
"step": 15000
},
{
"epoch": 19.0,
"eval_accuracy": 0.9492652204338698,
"eval_loss": 0.4641525149345398,
"eval_runtime": 9.4075,
"eval_samples_per_second": 607.599,
"eval_steps_per_second": 2.445,
"step": 15029
},
{
"epoch": 20.0,
"eval_accuracy": 0.9501399580125962,
"eval_loss": 0.46178367733955383,
"eval_runtime": 9.3958,
"eval_samples_per_second": 608.356,
"eval_steps_per_second": 2.448,
"step": 15820
},
{
"epoch": 20.0,
"step": 15820,
"total_flos": 3.327918265884672e+16,
"train_loss": 0.03330213655864846,
"train_runtime": 3337.005,
"train_samples_per_second": 151.609,
"train_steps_per_second": 4.741
}
],
"logging_steps": 1000,
"max_steps": 15820,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 3.327918265884672e+16,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}