lettuce_pos_nl_xlm / trainer_state.json
pranaydeeps's picture
Upload folder using huggingface_hub
1d0a9e7 verified
{
"best_metric": 0.978172514732208,
"best_model_checkpoint": "models/pos_final_xlm_nl/checkpoint-2415",
"epoch": 39.99638989169675,
"global_step": 2760,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.3200084762438131,
"eval_f1": 0.2160928249139116,
"eval_loss": 3.483713388442993,
"eval_precision": 0.2936494317356812,
"eval_recall": 0.17094424294584126,
"eval_runtime": 10.7415,
"eval_samples_per_second": 732.58,
"eval_steps_per_second": 2.886,
"step": 69
},
{
"epoch": 2.0,
"eval_accuracy": 0.8496904657393253,
"eval_f1": 0.8458663165617639,
"eval_loss": 0.8298526406288147,
"eval_precision": 0.8501404908642128,
"eval_recall": 0.8416349050224381,
"eval_runtime": 11.2158,
"eval_samples_per_second": 701.6,
"eval_steps_per_second": 2.764,
"step": 138
},
{
"epoch": 3.0,
"eval_accuracy": 0.9429140115337461,
"eval_f1": 0.9413686917810061,
"eval_loss": 0.27647557854652405,
"eval_precision": 0.941929974380871,
"eval_recall": 0.9408080777033258,
"eval_runtime": 10.9493,
"eval_samples_per_second": 718.679,
"eval_steps_per_second": 2.831,
"step": 207
},
{
"epoch": 4.0,
"eval_accuracy": 0.961107663432576,
"eval_f1": 0.9598816317903192,
"eval_loss": 0.17041535675525665,
"eval_precision": 0.9601288546848211,
"eval_recall": 0.9596345361775374,
"eval_runtime": 10.8629,
"eval_samples_per_second": 724.391,
"eval_steps_per_second": 2.854,
"step": 276
},
{
"epoch": 5.0,
"eval_accuracy": 0.969288752327183,
"eval_f1": 0.9685681024447033,
"eval_loss": 0.1259436309337616,
"eval_precision": 0.9685494963155347,
"eval_recall": 0.9685867092887441,
"eval_runtime": 11.2662,
"eval_samples_per_second": 698.46,
"eval_steps_per_second": 2.752,
"step": 345
},
{
"epoch": 6.0,
"eval_accuracy": 0.9719451465936095,
"eval_f1": 0.9712171621320507,
"eval_loss": 0.10845372825860977,
"eval_precision": 0.9711350819772891,
"eval_recall": 0.9712992561627836,
"eval_runtime": 10.8884,
"eval_samples_per_second": 722.693,
"eval_steps_per_second": 2.847,
"step": 414
},
{
"epoch": 7.0,
"eval_accuracy": 0.9737614845535593,
"eval_f1": 0.9729360106642491,
"eval_loss": 0.09838376194238663,
"eval_precision": 0.9727977383942906,
"eval_recall": 0.9730743222474949,
"eval_runtime": 11.171,
"eval_samples_per_second": 704.411,
"eval_steps_per_second": 2.775,
"step": 483
},
{
"epoch": 7.25,
"learning_rate": 4.99e-05,
"loss": 1.1448,
"step": 500
},
{
"epoch": 8.0,
"eval_accuracy": 0.9751691464725203,
"eval_f1": 0.9743453807855432,
"eval_loss": 0.09059575200080872,
"eval_precision": 0.9742256161268514,
"eval_recall": 0.9744651748939571,
"eval_runtime": 10.9149,
"eval_samples_per_second": 720.942,
"eval_steps_per_second": 2.84,
"step": 552
},
{
"epoch": 9.0,
"eval_accuracy": 0.9758427051326684,
"eval_f1": 0.9750796169168182,
"eval_loss": 0.08883357048034668,
"eval_precision": 0.9749410400006145,
"eval_recall": 0.9752182332329256,
"eval_runtime": 10.9703,
"eval_samples_per_second": 717.298,
"eval_steps_per_second": 2.826,
"step": 621
},
{
"epoch": 10.0,
"eval_accuracy": 0.9765313999424826,
"eval_f1": 0.975770544327188,
"eval_loss": 0.08642476052045822,
"eval_precision": 0.9756543517174092,
"eval_recall": 0.9758867646154792,
"eval_runtime": 10.8323,
"eval_samples_per_second": 726.436,
"eval_steps_per_second": 2.862,
"step": 690
},
{
"epoch": 11.0,
"eval_accuracy": 0.9772427989767963,
"eval_f1": 0.9765426312513927,
"eval_loss": 0.08421829342842102,
"eval_precision": 0.9764300969531214,
"eval_recall": 0.9766551914919777,
"eval_runtime": 11.0199,
"eval_samples_per_second": 714.071,
"eval_steps_per_second": 2.813,
"step": 759
},
{
"epoch": 12.0,
"eval_accuracy": 0.9773260477999607,
"eval_f1": 0.9765903503380455,
"eval_loss": 0.08395781368017197,
"eval_precision": 0.9764103115590241,
"eval_recall": 0.9767704555234524,
"eval_runtime": 10.9053,
"eval_samples_per_second": 721.579,
"eval_steps_per_second": 2.843,
"step": 828
},
{
"epoch": 13.0,
"eval_accuracy": 0.9775076815959556,
"eval_f1": 0.976759194523621,
"eval_loss": 0.08459737151861191,
"eval_precision": 0.9765866248790155,
"eval_recall": 0.9769318251675171,
"eval_runtime": 11.552,
"eval_samples_per_second": 681.178,
"eval_steps_per_second": 2.684,
"step": 897
},
{
"epoch": 14.0,
"eval_accuracy": 0.9776363388681187,
"eval_f1": 0.9769464516897355,
"eval_loss": 0.0853676050901413,
"eval_precision": 0.9768151124290356,
"eval_recall": 0.9770778262740517,
"eval_runtime": 11.6095,
"eval_samples_per_second": 677.81,
"eval_steps_per_second": 2.67,
"step": 966
},
{
"epoch": 14.49,
"learning_rate": 3.896017699115044e-05,
"loss": 0.0668,
"step": 1000
},
{
"epoch": 15.0,
"eval_accuracy": 0.9775909304191199,
"eval_f1": 0.976843172808015,
"eval_loss": 0.08673886954784393,
"eval_precision": 0.9766930924287119,
"eval_recall": 0.9769932993176369,
"eval_runtime": 11.3435,
"eval_samples_per_second": 693.7,
"eval_steps_per_second": 2.733,
"step": 1035
},
{
"epoch": 16.0,
"eval_accuracy": 0.9778028365144474,
"eval_f1": 0.9770540169876339,
"eval_loss": 0.0859028622508049,
"eval_precision": 0.9769226632660116,
"eval_recall": 0.9771854060367615,
"eval_runtime": 11.0228,
"eval_samples_per_second": 713.886,
"eval_steps_per_second": 2.812,
"step": 1104
},
{
"epoch": 17.0,
"eval_accuracy": 0.9780979914329393,
"eval_f1": 0.9773386449285661,
"eval_loss": 0.08584524691104889,
"eval_precision": 0.9771922412137507,
"eval_recall": 0.9774850925185959,
"eval_runtime": 11.0063,
"eval_samples_per_second": 714.952,
"eval_steps_per_second": 2.817,
"step": 1173
},
{
"epoch": 18.0,
"eval_accuracy": 0.9784990993990949,
"eval_f1": 0.9777723141226096,
"eval_loss": 0.08779104799032211,
"eval_precision": 0.9776446185757087,
"eval_recall": 0.977900043031905,
"eval_runtime": 11.0526,
"eval_samples_per_second": 711.958,
"eval_steps_per_second": 2.805,
"step": 1242
},
{
"epoch": 19.0,
"eval_accuracy": 0.9784839632494287,
"eval_f1": 0.9777277546442126,
"eval_loss": 0.08868438750505447,
"eval_precision": 0.9775324914738686,
"eval_recall": 0.9779230958382,
"eval_runtime": 10.952,
"eval_samples_per_second": 718.498,
"eval_steps_per_second": 2.831,
"step": 1311
},
{
"epoch": 20.0,
"eval_accuracy": 0.9782796252289343,
"eval_f1": 0.977526622308957,
"eval_loss": 0.09024880826473236,
"eval_precision": 0.9773914513105737,
"eval_recall": 0.9776618307001905,
"eval_runtime": 10.9428,
"eval_samples_per_second": 719.1,
"eval_steps_per_second": 2.833,
"step": 1380
},
{
"epoch": 21.0,
"eval_accuracy": 0.9782115125554361,
"eval_f1": 0.9773772343294419,
"eval_loss": 0.09100791066884995,
"eval_precision": 0.9772233190194889,
"eval_recall": 0.9775311981311858,
"eval_runtime": 10.9089,
"eval_samples_per_second": 721.337,
"eval_steps_per_second": 2.842,
"step": 1449
},
{
"epoch": 21.74,
"learning_rate": 2.7898230088495575e-05,
"loss": 0.0375,
"step": 1500
},
{
"epoch": 22.0,
"eval_accuracy": 0.9783098975282668,
"eval_f1": 0.9775235578160474,
"eval_loss": 0.09260567277669907,
"eval_precision": 0.9773546062789501,
"eval_recall": 0.9776925677752505,
"eval_runtime": 10.9627,
"eval_samples_per_second": 717.797,
"eval_steps_per_second": 2.828,
"step": 1518
},
{
"epoch": 23.0,
"eval_accuracy": 0.9786731651202567,
"eval_f1": 0.9778607567218708,
"eval_loss": 0.09297080338001251,
"eval_precision": 0.9777292945433315,
"eval_recall": 0.9779922542570849,
"eval_runtime": 11.0584,
"eval_samples_per_second": 711.584,
"eval_steps_per_second": 2.803,
"step": 1587
},
{
"epoch": 24.0,
"eval_accuracy": 0.9787034374195892,
"eval_f1": 0.9779114614545398,
"eval_loss": 0.09545727074146271,
"eval_precision": 0.9777461975725918,
"eval_recall": 0.9780767812134997,
"eval_runtime": 12.1178,
"eval_samples_per_second": 649.374,
"eval_steps_per_second": 2.558,
"step": 1656
},
{
"epoch": 25.0,
"eval_accuracy": 0.9787488458685879,
"eval_f1": 0.9779918790071952,
"eval_loss": 0.09549739956855774,
"eval_precision": 0.9778378669042919,
"eval_recall": 0.9781459396323846,
"eval_runtime": 11.1672,
"eval_samples_per_second": 704.655,
"eval_steps_per_second": 2.776,
"step": 1725
},
{
"epoch": 26.0,
"eval_accuracy": 0.9785445078480935,
"eval_f1": 0.977742949116863,
"eval_loss": 0.09780567139387131,
"eval_precision": 0.9775551902662345,
"eval_recall": 0.977930780106965,
"eval_runtime": 10.9619,
"eval_samples_per_second": 717.851,
"eval_steps_per_second": 2.828,
"step": 1794
},
{
"epoch": 27.0,
"eval_accuracy": 0.9782115125554361,
"eval_f1": 0.9773690296457643,
"eval_loss": 0.09968989342451096,
"eval_precision": 0.9772376335742984,
"eval_recall": 0.9775004610561259,
"eval_runtime": 10.8805,
"eval_samples_per_second": 723.22,
"eval_steps_per_second": 2.849,
"step": 1863
},
{
"epoch": 28.0,
"eval_accuracy": 0.9785596439977599,
"eval_f1": 0.9777683870843819,
"eval_loss": 0.10001282393932343,
"eval_precision": 0.9776444468344998,
"eval_recall": 0.9778923587631401,
"eval_runtime": 11.0278,
"eval_samples_per_second": 713.561,
"eval_steps_per_second": 2.811,
"step": 1932
},
{
"epoch": 28.98,
"learning_rate": 1.683628318584071e-05,
"loss": 0.0238,
"step": 2000
},
{
"epoch": 29.0,
"eval_accuracy": 0.9784612590249292,
"eval_f1": 0.9776150651725449,
"eval_loss": 0.10220629721879959,
"eval_precision": 0.977476127922073,
"eval_recall": 0.9777540419253704,
"eval_runtime": 11.5178,
"eval_samples_per_second": 683.205,
"eval_steps_per_second": 2.691,
"step": 2001
},
{
"epoch": 30.0,
"eval_accuracy": 0.9787034374195892,
"eval_f1": 0.9778532436450527,
"eval_loss": 0.10299359261989594,
"eval_precision": 0.9777142725449978,
"eval_recall": 0.9779922542570849,
"eval_runtime": 11.5247,
"eval_samples_per_second": 682.796,
"eval_steps_per_second": 2.69,
"step": 2070
},
{
"epoch": 31.0,
"eval_accuracy": 0.9786504608957574,
"eval_f1": 0.9778916595277151,
"eval_loss": 0.10408657044172287,
"eval_precision": 0.9777526829680502,
"eval_recall": 0.9780306756009098,
"eval_runtime": 11.971,
"eval_samples_per_second": 657.341,
"eval_steps_per_second": 2.59,
"step": 2139
},
{
"epoch": 32.0,
"eval_accuracy": 0.9787185735692554,
"eval_f1": 0.9779299058419483,
"eval_loss": 0.10540538281202316,
"eval_precision": 0.9777984343671018,
"eval_recall": 0.9780614126759698,
"eval_runtime": 11.5819,
"eval_samples_per_second": 679.422,
"eval_steps_per_second": 2.677,
"step": 2208
},
{
"epoch": 33.0,
"eval_accuracy": 0.978635324746091,
"eval_f1": 0.9777916076017933,
"eval_loss": 0.10549841076135635,
"eval_precision": 0.9776601547195612,
"eval_recall": 0.9779230958382,
"eval_runtime": 12.6843,
"eval_samples_per_second": 620.372,
"eval_steps_per_second": 2.444,
"step": 2277
},
{
"epoch": 34.0,
"eval_accuracy": 0.9787488458685879,
"eval_f1": 0.9778990030925261,
"eval_loss": 0.10634943097829819,
"eval_precision": 0.9777750462859821,
"eval_recall": 0.9780229913321449,
"eval_runtime": 11.6157,
"eval_samples_per_second": 677.447,
"eval_steps_per_second": 2.669,
"step": 2346
},
{
"epoch": 35.0,
"eval_accuracy": 0.9789456158142492,
"eval_f1": 0.978172514732208,
"eval_loss": 0.10656328499317169,
"eval_precision": 0.9780147183087772,
"eval_recall": 0.9783303620827442,
"eval_runtime": 11.2324,
"eval_samples_per_second": 700.56,
"eval_steps_per_second": 2.76,
"step": 2415
},
{
"epoch": 36.0,
"eval_accuracy": 0.978756413943421,
"eval_f1": 0.9780024740493733,
"eval_loss": 0.10749900341033936,
"eval_precision": 0.9778897715225174,
"eval_recall": 0.9781152025573246,
"eval_runtime": 11.1336,
"eval_samples_per_second": 706.779,
"eval_steps_per_second": 2.784,
"step": 2484
},
{
"epoch": 36.23,
"learning_rate": 5.774336283185841e-06,
"loss": 0.0167,
"step": 2500
},
{
"epoch": 37.0,
"eval_accuracy": 0.9789153435149167,
"eval_f1": 0.9781257443163047,
"eval_loss": 0.10826310515403748,
"eval_precision": 0.977998002611969,
"eval_recall": 0.9782535193950944,
"eval_runtime": 11.1448,
"eval_samples_per_second": 706.071,
"eval_steps_per_second": 2.782,
"step": 2553
},
{
"epoch": 38.0,
"eval_accuracy": 0.9789002073652504,
"eval_f1": 0.9781295019304278,
"eval_loss": 0.1082502156496048,
"eval_precision": 0.978005515906245,
"eval_recall": 0.9782535193950944,
"eval_runtime": 11.2628,
"eval_samples_per_second": 698.669,
"eval_steps_per_second": 2.752,
"step": 2622
},
{
"epoch": 39.0,
"eval_accuracy": 0.9788547989162517,
"eval_f1": 0.9780682040821029,
"eval_loss": 0.10867351293563843,
"eval_precision": 0.9779367140146423,
"eval_recall": 0.9781997295137395,
"eval_runtime": 11.7323,
"eval_samples_per_second": 670.712,
"eval_steps_per_second": 2.642,
"step": 2691
},
{
"epoch": 40.0,
"eval_accuracy": 0.9788926392904173,
"eval_f1": 0.978110377786144,
"eval_loss": 0.1087782010436058,
"eval_precision": 0.9779826380886533,
"eval_recall": 0.9782381508575644,
"eval_runtime": 11.2634,
"eval_samples_per_second": 698.633,
"eval_steps_per_second": 2.752,
"step": 2760
},
{
"epoch": 40.0,
"step": 2760,
"total_flos": 1.673403126150724e+17,
"train_loss": 0.23496506378270576,
"train_runtime": 2350.6684,
"train_samples_per_second": 1204.968,
"train_steps_per_second": 1.174
}
],
"max_steps": 2760,
"num_train_epochs": 40,
"total_flos": 1.673403126150724e+17,
"trial_name": null,
"trial_params": null
}