|
{ |
|
"best_global_step": 11668, |
|
"best_metric": 0.5086450576782227, |
|
"best_model_checkpoint": "./output/gpt-medmentions/checkpoint-11668", |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 29170, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08570449091532396, |
|
"grad_norm": 7.563724040985107, |
|
"learning_rate": 4.914295509084677e-05, |
|
"loss": 0.8639, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.17140898183064793, |
|
"grad_norm": 4.639763832092285, |
|
"learning_rate": 4.828591018169352e-05, |
|
"loss": 0.674, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2571134727459719, |
|
"grad_norm": 4.49621057510376, |
|
"learning_rate": 4.7428865272540284e-05, |
|
"loss": 0.6427, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.34281796366129585, |
|
"grad_norm": 4.740731239318848, |
|
"learning_rate": 4.657182036338704e-05, |
|
"loss": 0.6284, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4285224545766198, |
|
"grad_norm": 3.477789878845215, |
|
"learning_rate": 4.57147754542338e-05, |
|
"loss": 0.5904, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.5142269454919438, |
|
"grad_norm": 5.801304340362549, |
|
"learning_rate": 4.485773054508056e-05, |
|
"loss": 0.592, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.5999314364072678, |
|
"grad_norm": 3.1393306255340576, |
|
"learning_rate": 4.4000685635927325e-05, |
|
"loss": 0.5751, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.6856359273225917, |
|
"grad_norm": 2.137924909591675, |
|
"learning_rate": 4.314364072677409e-05, |
|
"loss": 0.5647, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.7713404182379157, |
|
"grad_norm": 4.005846977233887, |
|
"learning_rate": 4.228659581762084e-05, |
|
"loss": 0.5525, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.8570449091532396, |
|
"grad_norm": 2.6204190254211426, |
|
"learning_rate": 4.142955090846761e-05, |
|
"loss": 0.5499, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.9427494000685636, |
|
"grad_norm": 2.9211952686309814, |
|
"learning_rate": 4.0572505999314365e-05, |
|
"loss": 0.5389, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8351150895140665, |
|
"eval_f1": 0.4352736750651607, |
|
"eval_loss": 0.5593962669372559, |
|
"eval_precision": 0.435084672166739, |
|
"eval_recall": 0.43546284224250326, |
|
"eval_runtime": 34.7107, |
|
"eval_samples_per_second": 84.009, |
|
"eval_steps_per_second": 10.515, |
|
"step": 5834 |
|
}, |
|
{ |
|
"epoch": 1.0284538909838876, |
|
"grad_norm": 2.6330394744873047, |
|
"learning_rate": 3.9715461090161124e-05, |
|
"loss": 0.4835, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.1141583818992116, |
|
"grad_norm": 7.3211798667907715, |
|
"learning_rate": 3.885841618100788e-05, |
|
"loss": 0.3699, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.1998628728145355, |
|
"grad_norm": 4.461019515991211, |
|
"learning_rate": 3.800137127185465e-05, |
|
"loss": 0.3823, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.2855673637298595, |
|
"grad_norm": 3.2148802280426025, |
|
"learning_rate": 3.7144326362701406e-05, |
|
"loss": 0.3817, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.3712718546451834, |
|
"grad_norm": 6.220280170440674, |
|
"learning_rate": 3.6287281453548164e-05, |
|
"loss": 0.3725, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.4569763455605074, |
|
"grad_norm": 2.2955079078674316, |
|
"learning_rate": 3.543023654439493e-05, |
|
"loss": 0.38, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.5426808364758313, |
|
"grad_norm": 4.852652072906494, |
|
"learning_rate": 3.457319163524169e-05, |
|
"loss": 0.3705, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.6283853273911553, |
|
"grad_norm": 3.233278751373291, |
|
"learning_rate": 3.3716146726088446e-05, |
|
"loss": 0.3745, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.7140898183064792, |
|
"grad_norm": 3.185788154602051, |
|
"learning_rate": 3.285910181693521e-05, |
|
"loss": 0.3738, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.7997943092218032, |
|
"grad_norm": 2.9877729415893555, |
|
"learning_rate": 3.200205690778197e-05, |
|
"loss": 0.3733, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.8854988001371273, |
|
"grad_norm": 2.5933837890625, |
|
"learning_rate": 3.114501199862873e-05, |
|
"loss": 0.3559, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.971203291052451, |
|
"grad_norm": 2.8704030513763428, |
|
"learning_rate": 3.0287967089475487e-05, |
|
"loss": 0.3762, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.846457800511509, |
|
"eval_f1": 0.4924993145587718, |
|
"eval_loss": 0.5086450576782227, |
|
"eval_precision": 0.44823898474262086, |
|
"eval_recall": 0.546458061712299, |
|
"eval_runtime": 34.716, |
|
"eval_samples_per_second": 83.996, |
|
"eval_steps_per_second": 10.514, |
|
"step": 11668 |
|
}, |
|
{ |
|
"epoch": 2.0569077819677752, |
|
"grad_norm": 2.7803287506103516, |
|
"learning_rate": 2.9430922180322252e-05, |
|
"loss": 0.2413, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.142612272883099, |
|
"grad_norm": 2.0437328815460205, |
|
"learning_rate": 2.8573877271169007e-05, |
|
"loss": 0.1788, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.228316763798423, |
|
"grad_norm": 1.26553213596344, |
|
"learning_rate": 2.7716832362015772e-05, |
|
"loss": 0.1809, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.314021254713747, |
|
"grad_norm": 2.584317922592163, |
|
"learning_rate": 2.6859787452862534e-05, |
|
"loss": 0.1759, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.399725745629071, |
|
"grad_norm": 15.791213989257812, |
|
"learning_rate": 2.600274254370929e-05, |
|
"loss": 0.1818, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.4854302365443948, |
|
"grad_norm": 1.3076903820037842, |
|
"learning_rate": 2.5145697634556054e-05, |
|
"loss": 0.1749, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.571134727459719, |
|
"grad_norm": 1.6942801475524902, |
|
"learning_rate": 2.4288652725402813e-05, |
|
"loss": 0.1767, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.656839218375043, |
|
"grad_norm": 3.2078938484191895, |
|
"learning_rate": 2.3431607816249575e-05, |
|
"loss": 0.1737, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.742543709290367, |
|
"grad_norm": 2.383434295654297, |
|
"learning_rate": 2.2574562907096333e-05, |
|
"loss": 0.1771, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.8282482002056906, |
|
"grad_norm": 1.1726810932159424, |
|
"learning_rate": 2.171751799794309e-05, |
|
"loss": 0.1783, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.9139526911210147, |
|
"grad_norm": 8.384498596191406, |
|
"learning_rate": 2.0860473088789853e-05, |
|
"loss": 0.1775, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.999657182036339, |
|
"grad_norm": 2.227762222290039, |
|
"learning_rate": 2.0003428179636612e-05, |
|
"loss": 0.1736, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8486956521739131, |
|
"eval_f1": 0.4947288251092316, |
|
"eval_loss": 0.6310177445411682, |
|
"eval_precision": 0.4590834697217676, |
|
"eval_recall": 0.5363754889178618, |
|
"eval_runtime": 34.7382, |
|
"eval_samples_per_second": 83.942, |
|
"eval_steps_per_second": 10.507, |
|
"step": 17502 |
|
}, |
|
{ |
|
"epoch": 3.0853616729516626, |
|
"grad_norm": 0.670129120349884, |
|
"learning_rate": 1.9146383270483374e-05, |
|
"loss": 0.0719, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 3.171066163866987, |
|
"grad_norm": 1.9619492292404175, |
|
"learning_rate": 1.8289338361330135e-05, |
|
"loss": 0.0725, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 3.2567706547823105, |
|
"grad_norm": 0.24141933023929596, |
|
"learning_rate": 1.7432293452176894e-05, |
|
"loss": 0.0724, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.3424751456976347, |
|
"grad_norm": 2.3773982524871826, |
|
"learning_rate": 1.6575248543023656e-05, |
|
"loss": 0.0777, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.4281796366129584, |
|
"grad_norm": 1.7786751985549927, |
|
"learning_rate": 1.5718203633870414e-05, |
|
"loss": 0.0774, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.5138841275282826, |
|
"grad_norm": 3.449632167816162, |
|
"learning_rate": 1.4861158724717176e-05, |
|
"loss": 0.0786, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 3.5995886184436063, |
|
"grad_norm": 4.504425048828125, |
|
"learning_rate": 1.4004113815563934e-05, |
|
"loss": 0.0763, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 3.6852931093589305, |
|
"grad_norm": 2.4624173641204834, |
|
"learning_rate": 1.3147068906410698e-05, |
|
"loss": 0.0726, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 3.7709976002742542, |
|
"grad_norm": 2.6595826148986816, |
|
"learning_rate": 1.2290023997257456e-05, |
|
"loss": 0.0766, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 3.8567020911895784, |
|
"grad_norm": 0.6136459708213806, |
|
"learning_rate": 1.1432979088104218e-05, |
|
"loss": 0.072, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 3.942406582104902, |
|
"grad_norm": 3.4279706478118896, |
|
"learning_rate": 1.0575934178950978e-05, |
|
"loss": 0.0777, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8520971867007673, |
|
"eval_f1": 0.49943597242531856, |
|
"eval_loss": 0.9267778992652893, |
|
"eval_precision": 0.480852775543041, |
|
"eval_recall": 0.5195132551064754, |
|
"eval_runtime": 34.7136, |
|
"eval_samples_per_second": 84.002, |
|
"eval_steps_per_second": 10.515, |
|
"step": 23336 |
|
}, |
|
{ |
|
"epoch": 4.028111073020226, |
|
"grad_norm": 1.2734886407852173, |
|
"learning_rate": 9.718889269797737e-06, |
|
"loss": 0.0573, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 4.1138155639355505, |
|
"grad_norm": 3.2875871658325195, |
|
"learning_rate": 8.861844360644498e-06, |
|
"loss": 0.039, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 4.199520054850874, |
|
"grad_norm": 13.373528480529785, |
|
"learning_rate": 8.004799451491259e-06, |
|
"loss": 0.0401, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 4.285224545766198, |
|
"grad_norm": 0.31780633330345154, |
|
"learning_rate": 7.147754542338019e-06, |
|
"loss": 0.0391, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 4.370929036681522, |
|
"grad_norm": 1.2009053230285645, |
|
"learning_rate": 6.29070963318478e-06, |
|
"loss": 0.0416, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 4.456633527596846, |
|
"grad_norm": 1.145856261253357, |
|
"learning_rate": 5.43366472403154e-06, |
|
"loss": 0.0386, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 4.5423380185121704, |
|
"grad_norm": 1.447941780090332, |
|
"learning_rate": 4.5766198148783e-06, |
|
"loss": 0.0387, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 4.628042509427494, |
|
"grad_norm": 2.3324902057647705, |
|
"learning_rate": 3.71957490572506e-06, |
|
"loss": 0.0434, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 4.713747000342818, |
|
"grad_norm": 1.5352897644042969, |
|
"learning_rate": 2.8625299965718206e-06, |
|
"loss": 0.037, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 4.799451491258142, |
|
"grad_norm": 0.3736858069896698, |
|
"learning_rate": 2.0054850874185807e-06, |
|
"loss": 0.0347, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 4.885155982173466, |
|
"grad_norm": 0.8886938095092773, |
|
"learning_rate": 1.148440178265341e-06, |
|
"loss": 0.0379, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 4.9708604730887895, |
|
"grad_norm": 1.5339548587799072, |
|
"learning_rate": 2.913952691121015e-07, |
|
"loss": 0.0394, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8531202046035805, |
|
"eval_f1": 0.5048873426110007, |
|
"eval_loss": 1.0743063688278198, |
|
"eval_precision": 0.4822375187910436, |
|
"eval_recall": 0.5297696653628857, |
|
"eval_runtime": 34.8878, |
|
"eval_samples_per_second": 83.582, |
|
"eval_steps_per_second": 10.462, |
|
"step": 29170 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 29170, |
|
"total_flos": 4.62662533103459e+16, |
|
"train_loss": 0.2550947672543478, |
|
"train_runtime": 8451.3725, |
|
"train_samples_per_second": 13.805, |
|
"train_steps_per_second": 3.452 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 29170, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.62662533103459e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|