{ "best_metric": 0.9522393282015396, "best_model_checkpoint": "../../checkpoint/ddi/pubmedbert-abstract/checkpoint-13447", "epoch": 20.0, "eval_steps": 500, "global_step": 15820, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.938593421973408, "eval_loss": 0.23401664197444916, "eval_runtime": 9.3525, "eval_samples_per_second": 611.174, "eval_steps_per_second": 2.459, "step": 791 }, { "epoch": 1.26, "grad_norm": 22.48932647705078, "learning_rate": 4.683944374209861e-05, "loss": 0.1776, "step": 1000 }, { "epoch": 2.0, "eval_accuracy": 0.9419174247725682, "eval_loss": 0.27159374952316284, "eval_runtime": 9.4015, "eval_samples_per_second": 607.985, "eval_steps_per_second": 2.446, "step": 1582 }, { "epoch": 2.53, "grad_norm": 0.30297374725341797, "learning_rate": 4.367888748419722e-05, "loss": 0.0855, "step": 2000 }, { "epoch": 3.0, "eval_accuracy": 0.9431420573827851, "eval_loss": 0.27298399806022644, "eval_runtime": 9.4208, "eval_samples_per_second": 606.741, "eval_steps_per_second": 2.441, "step": 2373 }, { "epoch": 3.79, "grad_norm": 0.08143208175897598, "learning_rate": 4.051833122629583e-05, "loss": 0.0627, "step": 3000 }, { "epoch": 4.0, "eval_accuracy": 0.9382435269419174, "eval_loss": 0.33226683735847473, "eval_runtime": 9.4056, "eval_samples_per_second": 607.725, "eval_steps_per_second": 2.445, "step": 3164 }, { "epoch": 5.0, "eval_accuracy": 0.9450664800559833, "eval_loss": 0.3307989239692688, "eval_runtime": 9.3945, "eval_samples_per_second": 608.444, "eval_steps_per_second": 2.448, "step": 3955 }, { "epoch": 5.06, "grad_norm": 0.012913365848362446, "learning_rate": 3.735777496839444e-05, "loss": 0.0463, "step": 4000 }, { "epoch": 6.0, "eval_accuracy": 0.9412176347095871, "eval_loss": 0.39864641427993774, "eval_runtime": 9.3742, "eval_samples_per_second": 609.757, "eval_steps_per_second": 2.454, "step": 4746 }, { "epoch": 6.32, "grad_norm": 2.0644302368164062, "learning_rate": 3.419721871049305e-05, "loss": 0.0308, "step": 5000 }, { "epoch": 7.0, "eval_accuracy": 0.9419174247725682, "eval_loss": 0.42111822962760925, "eval_runtime": 9.3868, "eval_samples_per_second": 608.939, "eval_steps_per_second": 2.45, "step": 5537 }, { "epoch": 7.59, "grad_norm": 30.287784576416016, "learning_rate": 3.1036662452591655e-05, "loss": 0.0312, "step": 6000 }, { "epoch": 8.0, "eval_accuracy": 0.943666899930021, "eval_loss": 0.36164581775665283, "eval_runtime": 9.3894, "eval_samples_per_second": 608.769, "eval_steps_per_second": 2.45, "step": 6328 }, { "epoch": 8.85, "grad_norm": 0.03705460578203201, "learning_rate": 2.7876106194690264e-05, "loss": 0.0221, "step": 7000 }, { "epoch": 9.0, "eval_accuracy": 0.9396431070678797, "eval_loss": 0.4309725761413574, "eval_runtime": 9.4203, "eval_samples_per_second": 606.777, "eval_steps_per_second": 2.442, "step": 7119 }, { "epoch": 10.0, "eval_accuracy": 0.9438418474457663, "eval_loss": 0.4221705198287964, "eval_runtime": 9.4325, "eval_samples_per_second": 605.989, "eval_steps_per_second": 2.438, "step": 7910 }, { "epoch": 10.11, "grad_norm": 0.11050642281770706, "learning_rate": 2.4715549936788876e-05, "loss": 0.0181, "step": 8000 }, { "epoch": 11.0, "eval_accuracy": 0.9445416375087474, "eval_loss": 0.4184616804122925, "eval_runtime": 9.4176, "eval_samples_per_second": 606.948, "eval_steps_per_second": 2.442, "step": 8701 }, { "epoch": 11.38, "grad_norm": 0.03253033012151718, "learning_rate": 2.1554993678887485e-05, "loss": 0.0141, "step": 9000 }, { "epoch": 12.0, "eval_accuracy": 0.945591322603219, "eval_loss": 0.46782681345939636, "eval_runtime": 9.4133, "eval_samples_per_second": 607.226, "eval_steps_per_second": 2.443, "step": 9492 }, { "epoch": 12.64, "grad_norm": 0.005740019958466291, "learning_rate": 1.8394437420986094e-05, "loss": 0.0133, "step": 10000 }, { "epoch": 13.0, "eval_accuracy": 0.9503149055283415, "eval_loss": 0.40269356966018677, "eval_runtime": 9.4053, "eval_samples_per_second": 607.74, "eval_steps_per_second": 2.445, "step": 10283 }, { "epoch": 13.91, "grad_norm": 0.0024239453487098217, "learning_rate": 1.5233881163084704e-05, "loss": 0.0082, "step": 11000 }, { "epoch": 14.0, "eval_accuracy": 0.9473407977606718, "eval_loss": 0.45041143894195557, "eval_runtime": 9.639, "eval_samples_per_second": 593.006, "eval_steps_per_second": 2.386, "step": 11074 }, { "epoch": 15.0, "eval_accuracy": 0.9504898530440867, "eval_loss": 0.47598323225975037, "eval_runtime": 9.4326, "eval_samples_per_second": 605.982, "eval_steps_per_second": 2.438, "step": 11865 }, { "epoch": 15.17, "grad_norm": 0.0034840325824916363, "learning_rate": 1.2073324905183313e-05, "loss": 0.0052, "step": 12000 }, { "epoch": 16.0, "eval_accuracy": 0.9448915325402379, "eval_loss": 0.45725104212760925, "eval_runtime": 9.4366, "eval_samples_per_second": 605.728, "eval_steps_per_second": 2.437, "step": 12656 }, { "epoch": 16.43, "grad_norm": 0.4831530451774597, "learning_rate": 8.912768647281922e-06, "loss": 0.0042, "step": 13000 }, { "epoch": 17.0, "eval_accuracy": 0.9522393282015396, "eval_loss": 0.43564239144325256, "eval_runtime": 9.4029, "eval_samples_per_second": 607.9, "eval_steps_per_second": 2.446, "step": 13447 }, { "epoch": 17.7, "grad_norm": 0.0005753316800110042, "learning_rate": 5.752212389380531e-06, "loss": 0.0037, "step": 14000 }, { "epoch": 18.0, "eval_accuracy": 0.948740377886634, "eval_loss": 0.45767056941986084, "eval_runtime": 9.3872, "eval_samples_per_second": 608.913, "eval_steps_per_second": 2.45, "step": 14238 }, { "epoch": 18.96, "grad_norm": 0.0019946701359003782, "learning_rate": 2.59165613147914e-06, "loss": 0.0024, "step": 15000 }, { "epoch": 19.0, "eval_accuracy": 0.9492652204338698, "eval_loss": 0.4641525149345398, "eval_runtime": 9.4075, "eval_samples_per_second": 607.599, "eval_steps_per_second": 2.445, "step": 15029 }, { "epoch": 20.0, "eval_accuracy": 0.9501399580125962, "eval_loss": 0.46178367733955383, "eval_runtime": 9.3958, "eval_samples_per_second": 608.356, "eval_steps_per_second": 2.448, "step": 15820 }, { "epoch": 20.0, "step": 15820, "total_flos": 3.327918265884672e+16, "train_loss": 0.03330213655864846, "train_runtime": 3337.005, "train_samples_per_second": 151.609, "train_steps_per_second": 4.741 } ], "logging_steps": 1000, "max_steps": 15820, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 3.327918265884672e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }