bert-base-medmentions / trainer_state.json
Ben10x's picture
End of training
b5efbb6 verified
{
"best_global_step": 5822,
"best_metric": 1.5156257152557373,
"best_model_checkpoint": "./output/bert-base-medmentions/checkpoint-5822",
"epoch": 15.0,
"eval_steps": 500,
"global_step": 43665,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1717622810030917,
"grad_norm": 1.9404675960540771,
"learning_rate": 4.9427459063323026e-05,
"loss": 1.7239,
"step": 500
},
{
"epoch": 0.3435245620061834,
"grad_norm": 1.6719824075698853,
"learning_rate": 4.885491812664606e-05,
"loss": 1.6163,
"step": 1000
},
{
"epoch": 0.5152868430092752,
"grad_norm": 1.837388515472412,
"learning_rate": 4.828237718996909e-05,
"loss": 1.5924,
"step": 1500
},
{
"epoch": 0.6870491240123668,
"grad_norm": 2.1180434226989746,
"learning_rate": 4.770983625329211e-05,
"loss": 1.5717,
"step": 2000
},
{
"epoch": 0.8588114050154586,
"grad_norm": 1.800995945930481,
"learning_rate": 4.7137295316615135e-05,
"loss": 1.5686,
"step": 2500
},
{
"epoch": 1.0,
"eval_accuracy": 0.8549719237162791,
"eval_f1": 0.5650458863307036,
"eval_loss": 1.5439889430999756,
"eval_precision": 0.5245889387144993,
"eval_recall": 0.6122644801116539,
"eval_runtime": 4.1137,
"eval_samples_per_second": 707.39,
"eval_steps_per_second": 88.485,
"step": 2911
},
{
"epoch": 1.0305736860185504,
"grad_norm": 1.0917384624481201,
"learning_rate": 4.6564754379938166e-05,
"loss": 1.5484,
"step": 3000
},
{
"epoch": 1.202335967021642,
"grad_norm": 1.0721220970153809,
"learning_rate": 4.59922134432612e-05,
"loss": 1.4824,
"step": 3500
},
{
"epoch": 1.3740982480247337,
"grad_norm": 1.5045437812805176,
"learning_rate": 4.541967250658422e-05,
"loss": 1.4894,
"step": 4000
},
{
"epoch": 1.5458605290278253,
"grad_norm": 3.3368799686431885,
"learning_rate": 4.484713156990725e-05,
"loss": 1.4863,
"step": 4500
},
{
"epoch": 1.7176228100309172,
"grad_norm": 2.191357374191284,
"learning_rate": 4.427459063323028e-05,
"loss": 1.4806,
"step": 5000
},
{
"epoch": 1.889385091034009,
"grad_norm": 2.575375556945801,
"learning_rate": 4.3702049696553306e-05,
"loss": 1.4792,
"step": 5500
},
{
"epoch": 2.0,
"eval_accuracy": 0.8688595400463357,
"eval_f1": 0.6071204975165909,
"eval_loss": 1.5156257152557373,
"eval_precision": 0.5820728291316527,
"eval_recall": 0.6344207955338451,
"eval_runtime": 4.3513,
"eval_samples_per_second": 668.77,
"eval_steps_per_second": 83.654,
"step": 5822
},
{
"epoch": 2.0611473720371007,
"grad_norm": 4.3115081787109375,
"learning_rate": 4.3129508759876336e-05,
"loss": 1.4566,
"step": 6000
},
{
"epoch": 2.2329096530401924,
"grad_norm": 1.5031124353408813,
"learning_rate": 4.255696782319936e-05,
"loss": 1.4053,
"step": 6500
},
{
"epoch": 2.404671934043284,
"grad_norm": 2.4502875804901123,
"learning_rate": 4.198442688652239e-05,
"loss": 1.4111,
"step": 7000
},
{
"epoch": 2.5764342150463757,
"grad_norm": 1.5572278499603271,
"learning_rate": 4.1411885949845415e-05,
"loss": 1.4096,
"step": 7500
},
{
"epoch": 2.7481964960494674,
"grad_norm": 1.3663930892944336,
"learning_rate": 4.0839345013168445e-05,
"loss": 1.4097,
"step": 8000
},
{
"epoch": 2.9199587770525595,
"grad_norm": 3.3840882778167725,
"learning_rate": 4.026680407649147e-05,
"loss": 1.4111,
"step": 8500
},
{
"epoch": 3.0,
"eval_accuracy": 0.8713595727692771,
"eval_f1": 0.6163334575106585,
"eval_loss": 1.519059419631958,
"eval_precision": 0.5864513588026782,
"eval_recall": 0.6494242847173761,
"eval_runtime": 4.33,
"eval_samples_per_second": 672.06,
"eval_steps_per_second": 84.065,
"step": 8733
},
{
"epoch": 3.091721058055651,
"grad_norm": 1.4997501373291016,
"learning_rate": 3.96942631398145e-05,
"loss": 1.3798,
"step": 9000
},
{
"epoch": 3.2634833390587428,
"grad_norm": 1.4717656373977661,
"learning_rate": 3.912172220313753e-05,
"loss": 1.3517,
"step": 9500
},
{
"epoch": 3.4352456200618344,
"grad_norm": 2.0151214599609375,
"learning_rate": 3.8549181266460554e-05,
"loss": 1.3532,
"step": 10000
},
{
"epoch": 3.607007901064926,
"grad_norm": 2.7060940265655518,
"learning_rate": 3.797664032978358e-05,
"loss": 1.3546,
"step": 10500
},
{
"epoch": 3.7787701820680177,
"grad_norm": 3.2001101970672607,
"learning_rate": 3.740409939310661e-05,
"loss": 1.355,
"step": 11000
},
{
"epoch": 3.9505324630711094,
"grad_norm": 2.374141216278076,
"learning_rate": 3.683155845642964e-05,
"loss": 1.356,
"step": 11500
},
{
"epoch": 4.0,
"eval_accuracy": 0.8777209125773897,
"eval_f1": 0.6318054658919734,
"eval_loss": 1.5293220281600952,
"eval_precision": 0.6235663919802905,
"eval_recall": 0.6402651779483601,
"eval_runtime": 4.3262,
"eval_samples_per_second": 672.648,
"eval_steps_per_second": 84.139,
"step": 11644
},
{
"epoch": 4.1222947440742015,
"grad_norm": 2.5122597217559814,
"learning_rate": 3.6259017519752663e-05,
"loss": 1.3298,
"step": 12000
},
{
"epoch": 4.294057025077293,
"grad_norm": 1.870731234550476,
"learning_rate": 3.568647658307569e-05,
"loss": 1.3148,
"step": 12500
},
{
"epoch": 4.465819306080385,
"grad_norm": 2.1130025386810303,
"learning_rate": 3.511393564639872e-05,
"loss": 1.3147,
"step": 13000
},
{
"epoch": 4.637581587083476,
"grad_norm": 1.9891668558120728,
"learning_rate": 3.454139470972175e-05,
"loss": 1.3176,
"step": 13500
},
{
"epoch": 4.809343868086568,
"grad_norm": 2.144550085067749,
"learning_rate": 3.396885377304477e-05,
"loss": 1.3207,
"step": 14000
},
{
"epoch": 4.98110614908966,
"grad_norm": 1.166013240814209,
"learning_rate": 3.33963128363678e-05,
"loss": 1.3182,
"step": 14500
},
{
"epoch": 5.0,
"eval_accuracy": 0.8789120276443408,
"eval_f1": 0.6353600689952565,
"eval_loss": 1.543265461921692,
"eval_precision": 0.6282619819205185,
"eval_recall": 0.6426203768318214,
"eval_runtime": 4.3312,
"eval_samples_per_second": 671.862,
"eval_steps_per_second": 84.041,
"step": 14555
},
{
"epoch": 5.152868430092751,
"grad_norm": 1.4789066314697266,
"learning_rate": 3.2823771899690834e-05,
"loss": 1.2897,
"step": 15000
},
{
"epoch": 5.3246307110958435,
"grad_norm": 2.067422866821289,
"learning_rate": 3.225123096301386e-05,
"loss": 1.2908,
"step": 15500
},
{
"epoch": 5.496392992098935,
"grad_norm": 1.4258556365966797,
"learning_rate": 3.167869002633688e-05,
"loss": 1.2946,
"step": 16000
},
{
"epoch": 5.668155273102027,
"grad_norm": 1.9385099411010742,
"learning_rate": 3.110614908965991e-05,
"loss": 1.2914,
"step": 16500
},
{
"epoch": 5.839917554105119,
"grad_norm": 2.5622832775115967,
"learning_rate": 3.053360815298294e-05,
"loss": 1.2919,
"step": 17000
},
{
"epoch": 6.0,
"eval_accuracy": 0.8793701488239375,
"eval_f1": 0.6428903837204383,
"eval_loss": 1.5671014785766602,
"eval_precision": 0.6241682411895177,
"eval_recall": 0.6627704117236567,
"eval_runtime": 4.523,
"eval_samples_per_second": 643.376,
"eval_steps_per_second": 80.477,
"step": 17466
},
{
"epoch": 6.01167983510821,
"grad_norm": 1.9045183658599854,
"learning_rate": 2.9961067216305967e-05,
"loss": 1.2899,
"step": 17500
},
{
"epoch": 6.183442116111302,
"grad_norm": 5.356103420257568,
"learning_rate": 2.9388526279628997e-05,
"loss": 1.2716,
"step": 18000
},
{
"epoch": 6.3552043971143934,
"grad_norm": 1.4505314826965332,
"learning_rate": 2.8815985342952025e-05,
"loss": 1.2736,
"step": 18500
},
{
"epoch": 6.5269666781174855,
"grad_norm": 2.0673441886901855,
"learning_rate": 2.824344440627505e-05,
"loss": 1.2755,
"step": 19000
},
{
"epoch": 6.698728959120577,
"grad_norm": 7.07130765914917,
"learning_rate": 2.7670903469598076e-05,
"loss": 1.2739,
"step": 19500
},
{
"epoch": 6.870491240123669,
"grad_norm": 2.692305326461792,
"learning_rate": 2.7098362532921106e-05,
"loss": 1.2743,
"step": 20000
},
{
"epoch": 7.0,
"eval_accuracy": 0.8809277608345659,
"eval_f1": 0.6462844646455984,
"eval_loss": 1.5696512460708618,
"eval_precision": 0.6355739225773804,
"eval_recall": 0.6573621772505234,
"eval_runtime": 4.3283,
"eval_samples_per_second": 672.312,
"eval_steps_per_second": 84.097,
"step": 20377
},
{
"epoch": 7.042253521126761,
"grad_norm": 0.9224966764450073,
"learning_rate": 2.6525821596244134e-05,
"loss": 1.2716,
"step": 20500
},
{
"epoch": 7.214015802129852,
"grad_norm": 2.72609543800354,
"learning_rate": 2.5953280659567158e-05,
"loss": 1.2578,
"step": 21000
},
{
"epoch": 7.385778083132944,
"grad_norm": 1.7019892930984497,
"learning_rate": 2.538073972289019e-05,
"loss": 1.2612,
"step": 21500
},
{
"epoch": 7.5575403641360355,
"grad_norm": 4.017130374908447,
"learning_rate": 2.4808198786213216e-05,
"loss": 1.2622,
"step": 22000
},
{
"epoch": 7.729302645139128,
"grad_norm": 6.522401332855225,
"learning_rate": 2.4235657849536243e-05,
"loss": 1.2611,
"step": 22500
},
{
"epoch": 7.901064926142219,
"grad_norm": 3.355700731277466,
"learning_rate": 2.366311691285927e-05,
"loss": 1.2633,
"step": 23000
},
{
"epoch": 8.0,
"eval_accuracy": 0.8812811686016833,
"eval_f1": 0.6527559389741191,
"eval_loss": 1.5806214809417725,
"eval_precision": 0.636446507002569,
"eval_recall": 0.6699232379623168,
"eval_runtime": 4.329,
"eval_samples_per_second": 672.206,
"eval_steps_per_second": 84.084,
"step": 23288
},
{
"epoch": 8.07282720714531,
"grad_norm": 0.5228517651557922,
"learning_rate": 2.30905759761823e-05,
"loss": 1.2579,
"step": 23500
},
{
"epoch": 8.244589488148403,
"grad_norm": 0.2985314726829529,
"learning_rate": 2.2518035039505325e-05,
"loss": 1.2527,
"step": 24000
},
{
"epoch": 8.416351769151495,
"grad_norm": 1.963086724281311,
"learning_rate": 2.1945494102828355e-05,
"loss": 1.2542,
"step": 24500
},
{
"epoch": 8.588114050154585,
"grad_norm": 0.8812742233276367,
"learning_rate": 2.137295316615138e-05,
"loss": 1.251,
"step": 25000
},
{
"epoch": 8.759876331157677,
"grad_norm": 0.7020455002784729,
"learning_rate": 2.080041222947441e-05,
"loss": 1.2516,
"step": 25500
},
{
"epoch": 8.93163861216077,
"grad_norm": 2.0791664123535156,
"learning_rate": 2.0227871292797437e-05,
"loss": 1.2542,
"step": 26000
},
{
"epoch": 9.0,
"eval_accuracy": 0.880783779892407,
"eval_f1": 0.6498043011657758,
"eval_loss": 1.594204068183899,
"eval_precision": 0.6277953972513621,
"eval_recall": 0.6734124214933705,
"eval_runtime": 4.3509,
"eval_samples_per_second": 668.824,
"eval_steps_per_second": 83.66,
"step": 26199
},
{
"epoch": 9.103400893163862,
"grad_norm": 2.5551717281341553,
"learning_rate": 1.9655330356120464e-05,
"loss": 1.2496,
"step": 26500
},
{
"epoch": 9.275163174166954,
"grad_norm": 3.963749647140503,
"learning_rate": 1.908278941944349e-05,
"loss": 1.2453,
"step": 27000
},
{
"epoch": 9.446925455170044,
"grad_norm": 0.5859785676002502,
"learning_rate": 1.851024848276652e-05,
"loss": 1.2463,
"step": 27500
},
{
"epoch": 9.618687736173136,
"grad_norm": 0.3447531759738922,
"learning_rate": 1.7937707546089546e-05,
"loss": 1.2477,
"step": 28000
},
{
"epoch": 9.790450017176228,
"grad_norm": 0.3794388175010681,
"learning_rate": 1.7365166609412573e-05,
"loss": 1.2468,
"step": 28500
},
{
"epoch": 9.96221229817932,
"grad_norm": 1.6076109409332275,
"learning_rate": 1.67926256727356e-05,
"loss": 1.2457,
"step": 29000
},
{
"epoch": 10.0,
"eval_accuracy": 0.8814251495438422,
"eval_f1": 0.6500277789649131,
"eval_loss": 1.607577919960022,
"eval_precision": 0.6372015081692501,
"eval_recall": 0.6633810188415911,
"eval_runtime": 4.3517,
"eval_samples_per_second": 668.699,
"eval_steps_per_second": 83.645,
"step": 29110
},
{
"epoch": 10.13397457918241,
"grad_norm": 1.843865990638733,
"learning_rate": 1.622008473605863e-05,
"loss": 1.2411,
"step": 29500
},
{
"epoch": 10.305736860185503,
"grad_norm": 2.924731731414795,
"learning_rate": 1.5647543799381655e-05,
"loss": 1.2402,
"step": 30000
},
{
"epoch": 10.477499141188595,
"grad_norm": 0.40096113085746765,
"learning_rate": 1.5075002862704684e-05,
"loss": 1.2416,
"step": 30500
},
{
"epoch": 10.649261422191687,
"grad_norm": 1.3067082166671753,
"learning_rate": 1.4502461926027711e-05,
"loss": 1.2422,
"step": 31000
},
{
"epoch": 10.82102370319478,
"grad_norm": 1.1540168523788452,
"learning_rate": 1.3929920989350739e-05,
"loss": 1.2406,
"step": 31500
},
{
"epoch": 10.99278598419787,
"grad_norm": 2.354355812072754,
"learning_rate": 1.3357380052673768e-05,
"loss": 1.2398,
"step": 32000
},
{
"epoch": 11.0,
"eval_accuracy": 0.8834932394403068,
"eval_f1": 0.6551724137931034,
"eval_loss": 1.6077239513397217,
"eval_precision": 0.6413770053475936,
"eval_recall": 0.6695743196092114,
"eval_runtime": 4.5309,
"eval_samples_per_second": 642.254,
"eval_steps_per_second": 80.337,
"step": 32021
},
{
"epoch": 11.164548265200962,
"grad_norm": 2.941948413848877,
"learning_rate": 1.2784839115996793e-05,
"loss": 1.2384,
"step": 32500
},
{
"epoch": 11.336310546204054,
"grad_norm": 0.06978488713502884,
"learning_rate": 1.2212298179319822e-05,
"loss": 1.238,
"step": 33000
},
{
"epoch": 11.508072827207146,
"grad_norm": 1.1202852725982666,
"learning_rate": 1.163975724264285e-05,
"loss": 1.2375,
"step": 33500
},
{
"epoch": 11.679835108210238,
"grad_norm": 3.7290749549865723,
"learning_rate": 1.1067216305965877e-05,
"loss": 1.2373,
"step": 34000
},
{
"epoch": 11.851597389213328,
"grad_norm": 0.5790780782699585,
"learning_rate": 1.0494675369288904e-05,
"loss": 1.2377,
"step": 34500
},
{
"epoch": 12.0,
"eval_accuracy": 0.8846974436838179,
"eval_f1": 0.6615437158469945,
"eval_loss": 1.6134886741638184,
"eval_precision": 0.6478260869565218,
"eval_recall": 0.6758548499651081,
"eval_runtime": 4.3276,
"eval_samples_per_second": 672.43,
"eval_steps_per_second": 84.112,
"step": 34932
},
{
"epoch": 12.02335967021642,
"grad_norm": 0.04744827747344971,
"learning_rate": 9.922134432611933e-06,
"loss": 1.2362,
"step": 35000
},
{
"epoch": 12.195121951219512,
"grad_norm": 0.07846707850694656,
"learning_rate": 9.34959349593496e-06,
"loss": 1.235,
"step": 35500
},
{
"epoch": 12.366884232222604,
"grad_norm": 3.9505062103271484,
"learning_rate": 8.777052559257987e-06,
"loss": 1.2345,
"step": 36000
},
{
"epoch": 12.538646513225697,
"grad_norm": 0.13419800996780396,
"learning_rate": 8.204511622581015e-06,
"loss": 1.2362,
"step": 36500
},
{
"epoch": 12.710408794228787,
"grad_norm": 0.205936998128891,
"learning_rate": 7.631970685904042e-06,
"loss": 1.2341,
"step": 37000
},
{
"epoch": 12.882171075231879,
"grad_norm": 1.917006254196167,
"learning_rate": 7.05942974922707e-06,
"loss": 1.2349,
"step": 37500
},
{
"epoch": 13.0,
"eval_accuracy": 0.883872825560544,
"eval_f1": 0.6590367597004765,
"eval_loss": 1.619519829750061,
"eval_precision": 0.6432724252491694,
"eval_recall": 0.6755931612002791,
"eval_runtime": 4.3511,
"eval_samples_per_second": 668.801,
"eval_steps_per_second": 83.658,
"step": 37843
},
{
"epoch": 13.053933356234971,
"grad_norm": 0.5815674662590027,
"learning_rate": 6.486888812550097e-06,
"loss": 1.2342,
"step": 38000
},
{
"epoch": 13.225695637238063,
"grad_norm": 0.48151713609695435,
"learning_rate": 5.914347875873125e-06,
"loss": 1.2335,
"step": 38500
},
{
"epoch": 13.397457918241154,
"grad_norm": 4.141974925994873,
"learning_rate": 5.341806939196153e-06,
"loss": 1.2335,
"step": 39000
},
{
"epoch": 13.569220199244246,
"grad_norm": 0.24046790599822998,
"learning_rate": 4.76926600251918e-06,
"loss": 1.2331,
"step": 39500
},
{
"epoch": 13.740982480247338,
"grad_norm": 0.08363146334886551,
"learning_rate": 4.196725065842208e-06,
"loss": 1.233,
"step": 40000
},
{
"epoch": 13.91274476125043,
"grad_norm": 0.5658828616142273,
"learning_rate": 3.6241841291652353e-06,
"loss": 1.2328,
"step": 40500
},
{
"epoch": 14.0,
"eval_accuracy": 0.8845272843885391,
"eval_f1": 0.6591725081210464,
"eval_loss": 1.6228290796279907,
"eval_precision": 0.6462453905464298,
"eval_recall": 0.6726273551988835,
"eval_runtime": 4.3411,
"eval_samples_per_second": 670.343,
"eval_steps_per_second": 83.85,
"step": 40754
},
{
"epoch": 14.084507042253522,
"grad_norm": 1.4916341304779053,
"learning_rate": 3.051643192488263e-06,
"loss": 1.2318,
"step": 41000
},
{
"epoch": 14.256269323256612,
"grad_norm": 0.1434667557477951,
"learning_rate": 2.4791022558112906e-06,
"loss": 1.2305,
"step": 41500
},
{
"epoch": 14.428031604259704,
"grad_norm": 0.10652283579111099,
"learning_rate": 1.906561319134318e-06,
"loss": 1.232,
"step": 42000
},
{
"epoch": 14.599793885262796,
"grad_norm": 0.8040905594825745,
"learning_rate": 1.3340203824573458e-06,
"loss": 1.2321,
"step": 42500
},
{
"epoch": 14.771556166265889,
"grad_norm": 0.040788378566503525,
"learning_rate": 7.614794457803733e-07,
"loss": 1.2319,
"step": 43000
},
{
"epoch": 14.943318447268979,
"grad_norm": 0.5179036259651184,
"learning_rate": 1.889385091034009e-07,
"loss": 1.231,
"step": 43500
},
{
"epoch": 15.0,
"eval_accuracy": 0.8847236220369377,
"eval_f1": 0.660140218878249,
"eval_loss": 1.6247130632400513,
"eval_precision": 0.6473004694835681,
"eval_recall": 0.6734996510816469,
"eval_runtime": 4.3445,
"eval_samples_per_second": 669.806,
"eval_steps_per_second": 83.783,
"step": 43665
},
{
"epoch": 15.0,
"step": 43665,
"total_flos": 1.1901430945516224e+16,
"train_loss": 1.311661433704009,
"train_runtime": 2299.911,
"train_samples_per_second": 151.865,
"train_steps_per_second": 18.986
}
],
"logging_steps": 500,
"max_steps": 43665,
"num_input_tokens_seen": 0,
"num_train_epochs": 15,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.1901430945516224e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}