gpt-medmentions / trainer_state.json
Ben10x's picture
End of training
78e15bb verified
raw
history blame
13 kB
{
"best_global_step": 11668,
"best_metric": 0.5086450576782227,
"best_model_checkpoint": "./output/gpt-medmentions/checkpoint-11668",
"epoch": 5.0,
"eval_steps": 500,
"global_step": 29170,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08570449091532396,
"grad_norm": 7.563724040985107,
"learning_rate": 4.914295509084677e-05,
"loss": 0.8639,
"step": 500
},
{
"epoch": 0.17140898183064793,
"grad_norm": 4.639763832092285,
"learning_rate": 4.828591018169352e-05,
"loss": 0.674,
"step": 1000
},
{
"epoch": 0.2571134727459719,
"grad_norm": 4.49621057510376,
"learning_rate": 4.7428865272540284e-05,
"loss": 0.6427,
"step": 1500
},
{
"epoch": 0.34281796366129585,
"grad_norm": 4.740731239318848,
"learning_rate": 4.657182036338704e-05,
"loss": 0.6284,
"step": 2000
},
{
"epoch": 0.4285224545766198,
"grad_norm": 3.477789878845215,
"learning_rate": 4.57147754542338e-05,
"loss": 0.5904,
"step": 2500
},
{
"epoch": 0.5142269454919438,
"grad_norm": 5.801304340362549,
"learning_rate": 4.485773054508056e-05,
"loss": 0.592,
"step": 3000
},
{
"epoch": 0.5999314364072678,
"grad_norm": 3.1393306255340576,
"learning_rate": 4.4000685635927325e-05,
"loss": 0.5751,
"step": 3500
},
{
"epoch": 0.6856359273225917,
"grad_norm": 2.137924909591675,
"learning_rate": 4.314364072677409e-05,
"loss": 0.5647,
"step": 4000
},
{
"epoch": 0.7713404182379157,
"grad_norm": 4.005846977233887,
"learning_rate": 4.228659581762084e-05,
"loss": 0.5525,
"step": 4500
},
{
"epoch": 0.8570449091532396,
"grad_norm": 2.6204190254211426,
"learning_rate": 4.142955090846761e-05,
"loss": 0.5499,
"step": 5000
},
{
"epoch": 0.9427494000685636,
"grad_norm": 2.9211952686309814,
"learning_rate": 4.0572505999314365e-05,
"loss": 0.5389,
"step": 5500
},
{
"epoch": 1.0,
"eval_accuracy": 0.8351150895140665,
"eval_f1": 0.4352736750651607,
"eval_loss": 0.5593962669372559,
"eval_precision": 0.435084672166739,
"eval_recall": 0.43546284224250326,
"eval_runtime": 34.7107,
"eval_samples_per_second": 84.009,
"eval_steps_per_second": 10.515,
"step": 5834
},
{
"epoch": 1.0284538909838876,
"grad_norm": 2.6330394744873047,
"learning_rate": 3.9715461090161124e-05,
"loss": 0.4835,
"step": 6000
},
{
"epoch": 1.1141583818992116,
"grad_norm": 7.3211798667907715,
"learning_rate": 3.885841618100788e-05,
"loss": 0.3699,
"step": 6500
},
{
"epoch": 1.1998628728145355,
"grad_norm": 4.461019515991211,
"learning_rate": 3.800137127185465e-05,
"loss": 0.3823,
"step": 7000
},
{
"epoch": 1.2855673637298595,
"grad_norm": 3.2148802280426025,
"learning_rate": 3.7144326362701406e-05,
"loss": 0.3817,
"step": 7500
},
{
"epoch": 1.3712718546451834,
"grad_norm": 6.220280170440674,
"learning_rate": 3.6287281453548164e-05,
"loss": 0.3725,
"step": 8000
},
{
"epoch": 1.4569763455605074,
"grad_norm": 2.2955079078674316,
"learning_rate": 3.543023654439493e-05,
"loss": 0.38,
"step": 8500
},
{
"epoch": 1.5426808364758313,
"grad_norm": 4.852652072906494,
"learning_rate": 3.457319163524169e-05,
"loss": 0.3705,
"step": 9000
},
{
"epoch": 1.6283853273911553,
"grad_norm": 3.233278751373291,
"learning_rate": 3.3716146726088446e-05,
"loss": 0.3745,
"step": 9500
},
{
"epoch": 1.7140898183064792,
"grad_norm": 3.185788154602051,
"learning_rate": 3.285910181693521e-05,
"loss": 0.3738,
"step": 10000
},
{
"epoch": 1.7997943092218032,
"grad_norm": 2.9877729415893555,
"learning_rate": 3.200205690778197e-05,
"loss": 0.3733,
"step": 10500
},
{
"epoch": 1.8854988001371273,
"grad_norm": 2.5933837890625,
"learning_rate": 3.114501199862873e-05,
"loss": 0.3559,
"step": 11000
},
{
"epoch": 1.971203291052451,
"grad_norm": 2.8704030513763428,
"learning_rate": 3.0287967089475487e-05,
"loss": 0.3762,
"step": 11500
},
{
"epoch": 2.0,
"eval_accuracy": 0.846457800511509,
"eval_f1": 0.4924993145587718,
"eval_loss": 0.5086450576782227,
"eval_precision": 0.44823898474262086,
"eval_recall": 0.546458061712299,
"eval_runtime": 34.716,
"eval_samples_per_second": 83.996,
"eval_steps_per_second": 10.514,
"step": 11668
},
{
"epoch": 2.0569077819677752,
"grad_norm": 2.7803287506103516,
"learning_rate": 2.9430922180322252e-05,
"loss": 0.2413,
"step": 12000
},
{
"epoch": 2.142612272883099,
"grad_norm": 2.0437328815460205,
"learning_rate": 2.8573877271169007e-05,
"loss": 0.1788,
"step": 12500
},
{
"epoch": 2.228316763798423,
"grad_norm": 1.26553213596344,
"learning_rate": 2.7716832362015772e-05,
"loss": 0.1809,
"step": 13000
},
{
"epoch": 2.314021254713747,
"grad_norm": 2.584317922592163,
"learning_rate": 2.6859787452862534e-05,
"loss": 0.1759,
"step": 13500
},
{
"epoch": 2.399725745629071,
"grad_norm": 15.791213989257812,
"learning_rate": 2.600274254370929e-05,
"loss": 0.1818,
"step": 14000
},
{
"epoch": 2.4854302365443948,
"grad_norm": 1.3076903820037842,
"learning_rate": 2.5145697634556054e-05,
"loss": 0.1749,
"step": 14500
},
{
"epoch": 2.571134727459719,
"grad_norm": 1.6942801475524902,
"learning_rate": 2.4288652725402813e-05,
"loss": 0.1767,
"step": 15000
},
{
"epoch": 2.656839218375043,
"grad_norm": 3.2078938484191895,
"learning_rate": 2.3431607816249575e-05,
"loss": 0.1737,
"step": 15500
},
{
"epoch": 2.742543709290367,
"grad_norm": 2.383434295654297,
"learning_rate": 2.2574562907096333e-05,
"loss": 0.1771,
"step": 16000
},
{
"epoch": 2.8282482002056906,
"grad_norm": 1.1726810932159424,
"learning_rate": 2.171751799794309e-05,
"loss": 0.1783,
"step": 16500
},
{
"epoch": 2.9139526911210147,
"grad_norm": 8.384498596191406,
"learning_rate": 2.0860473088789853e-05,
"loss": 0.1775,
"step": 17000
},
{
"epoch": 2.999657182036339,
"grad_norm": 2.227762222290039,
"learning_rate": 2.0003428179636612e-05,
"loss": 0.1736,
"step": 17500
},
{
"epoch": 3.0,
"eval_accuracy": 0.8486956521739131,
"eval_f1": 0.4947288251092316,
"eval_loss": 0.6310177445411682,
"eval_precision": 0.4590834697217676,
"eval_recall": 0.5363754889178618,
"eval_runtime": 34.7382,
"eval_samples_per_second": 83.942,
"eval_steps_per_second": 10.507,
"step": 17502
},
{
"epoch": 3.0853616729516626,
"grad_norm": 0.670129120349884,
"learning_rate": 1.9146383270483374e-05,
"loss": 0.0719,
"step": 18000
},
{
"epoch": 3.171066163866987,
"grad_norm": 1.9619492292404175,
"learning_rate": 1.8289338361330135e-05,
"loss": 0.0725,
"step": 18500
},
{
"epoch": 3.2567706547823105,
"grad_norm": 0.24141933023929596,
"learning_rate": 1.7432293452176894e-05,
"loss": 0.0724,
"step": 19000
},
{
"epoch": 3.3424751456976347,
"grad_norm": 2.3773982524871826,
"learning_rate": 1.6575248543023656e-05,
"loss": 0.0777,
"step": 19500
},
{
"epoch": 3.4281796366129584,
"grad_norm": 1.7786751985549927,
"learning_rate": 1.5718203633870414e-05,
"loss": 0.0774,
"step": 20000
},
{
"epoch": 3.5138841275282826,
"grad_norm": 3.449632167816162,
"learning_rate": 1.4861158724717176e-05,
"loss": 0.0786,
"step": 20500
},
{
"epoch": 3.5995886184436063,
"grad_norm": 4.504425048828125,
"learning_rate": 1.4004113815563934e-05,
"loss": 0.0763,
"step": 21000
},
{
"epoch": 3.6852931093589305,
"grad_norm": 2.4624173641204834,
"learning_rate": 1.3147068906410698e-05,
"loss": 0.0726,
"step": 21500
},
{
"epoch": 3.7709976002742542,
"grad_norm": 2.6595826148986816,
"learning_rate": 1.2290023997257456e-05,
"loss": 0.0766,
"step": 22000
},
{
"epoch": 3.8567020911895784,
"grad_norm": 0.6136459708213806,
"learning_rate": 1.1432979088104218e-05,
"loss": 0.072,
"step": 22500
},
{
"epoch": 3.942406582104902,
"grad_norm": 3.4279706478118896,
"learning_rate": 1.0575934178950978e-05,
"loss": 0.0777,
"step": 23000
},
{
"epoch": 4.0,
"eval_accuracy": 0.8520971867007673,
"eval_f1": 0.49943597242531856,
"eval_loss": 0.9267778992652893,
"eval_precision": 0.480852775543041,
"eval_recall": 0.5195132551064754,
"eval_runtime": 34.7136,
"eval_samples_per_second": 84.002,
"eval_steps_per_second": 10.515,
"step": 23336
},
{
"epoch": 4.028111073020226,
"grad_norm": 1.2734886407852173,
"learning_rate": 9.718889269797737e-06,
"loss": 0.0573,
"step": 23500
},
{
"epoch": 4.1138155639355505,
"grad_norm": 3.2875871658325195,
"learning_rate": 8.861844360644498e-06,
"loss": 0.039,
"step": 24000
},
{
"epoch": 4.199520054850874,
"grad_norm": 13.373528480529785,
"learning_rate": 8.004799451491259e-06,
"loss": 0.0401,
"step": 24500
},
{
"epoch": 4.285224545766198,
"grad_norm": 0.31780633330345154,
"learning_rate": 7.147754542338019e-06,
"loss": 0.0391,
"step": 25000
},
{
"epoch": 4.370929036681522,
"grad_norm": 1.2009053230285645,
"learning_rate": 6.29070963318478e-06,
"loss": 0.0416,
"step": 25500
},
{
"epoch": 4.456633527596846,
"grad_norm": 1.145856261253357,
"learning_rate": 5.43366472403154e-06,
"loss": 0.0386,
"step": 26000
},
{
"epoch": 4.5423380185121704,
"grad_norm": 1.447941780090332,
"learning_rate": 4.5766198148783e-06,
"loss": 0.0387,
"step": 26500
},
{
"epoch": 4.628042509427494,
"grad_norm": 2.3324902057647705,
"learning_rate": 3.71957490572506e-06,
"loss": 0.0434,
"step": 27000
},
{
"epoch": 4.713747000342818,
"grad_norm": 1.5352897644042969,
"learning_rate": 2.8625299965718206e-06,
"loss": 0.037,
"step": 27500
},
{
"epoch": 4.799451491258142,
"grad_norm": 0.3736858069896698,
"learning_rate": 2.0054850874185807e-06,
"loss": 0.0347,
"step": 28000
},
{
"epoch": 4.885155982173466,
"grad_norm": 0.8886938095092773,
"learning_rate": 1.148440178265341e-06,
"loss": 0.0379,
"step": 28500
},
{
"epoch": 4.9708604730887895,
"grad_norm": 1.5339548587799072,
"learning_rate": 2.913952691121015e-07,
"loss": 0.0394,
"step": 29000
},
{
"epoch": 5.0,
"eval_accuracy": 0.8531202046035805,
"eval_f1": 0.5048873426110007,
"eval_loss": 1.0743063688278198,
"eval_precision": 0.4822375187910436,
"eval_recall": 0.5297696653628857,
"eval_runtime": 34.8878,
"eval_samples_per_second": 83.582,
"eval_steps_per_second": 10.462,
"step": 29170
},
{
"epoch": 5.0,
"step": 29170,
"total_flos": 4.62662533103459e+16,
"train_loss": 0.2550947672543478,
"train_runtime": 8451.3725,
"train_samples_per_second": 13.805,
"train_steps_per_second": 3.452
}
],
"logging_steps": 500,
"max_steps": 29170,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.62662533103459e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}