|
{ |
|
"best_metric": 0.978172514732208, |
|
"best_model_checkpoint": "models/pos_final_xlm_nl/checkpoint-2415", |
|
"epoch": 39.99638989169675, |
|
"global_step": 2760, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.3200084762438131, |
|
"eval_f1": 0.2160928249139116, |
|
"eval_loss": 3.483713388442993, |
|
"eval_precision": 0.2936494317356812, |
|
"eval_recall": 0.17094424294584126, |
|
"eval_runtime": 10.7415, |
|
"eval_samples_per_second": 732.58, |
|
"eval_steps_per_second": 2.886, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8496904657393253, |
|
"eval_f1": 0.8458663165617639, |
|
"eval_loss": 0.8298526406288147, |
|
"eval_precision": 0.8501404908642128, |
|
"eval_recall": 0.8416349050224381, |
|
"eval_runtime": 11.2158, |
|
"eval_samples_per_second": 701.6, |
|
"eval_steps_per_second": 2.764, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9429140115337461, |
|
"eval_f1": 0.9413686917810061, |
|
"eval_loss": 0.27647557854652405, |
|
"eval_precision": 0.941929974380871, |
|
"eval_recall": 0.9408080777033258, |
|
"eval_runtime": 10.9493, |
|
"eval_samples_per_second": 718.679, |
|
"eval_steps_per_second": 2.831, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.961107663432576, |
|
"eval_f1": 0.9598816317903192, |
|
"eval_loss": 0.17041535675525665, |
|
"eval_precision": 0.9601288546848211, |
|
"eval_recall": 0.9596345361775374, |
|
"eval_runtime": 10.8629, |
|
"eval_samples_per_second": 724.391, |
|
"eval_steps_per_second": 2.854, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.969288752327183, |
|
"eval_f1": 0.9685681024447033, |
|
"eval_loss": 0.1259436309337616, |
|
"eval_precision": 0.9685494963155347, |
|
"eval_recall": 0.9685867092887441, |
|
"eval_runtime": 11.2662, |
|
"eval_samples_per_second": 698.46, |
|
"eval_steps_per_second": 2.752, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9719451465936095, |
|
"eval_f1": 0.9712171621320507, |
|
"eval_loss": 0.10845372825860977, |
|
"eval_precision": 0.9711350819772891, |
|
"eval_recall": 0.9712992561627836, |
|
"eval_runtime": 10.8884, |
|
"eval_samples_per_second": 722.693, |
|
"eval_steps_per_second": 2.847, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9737614845535593, |
|
"eval_f1": 0.9729360106642491, |
|
"eval_loss": 0.09838376194238663, |
|
"eval_precision": 0.9727977383942906, |
|
"eval_recall": 0.9730743222474949, |
|
"eval_runtime": 11.171, |
|
"eval_samples_per_second": 704.411, |
|
"eval_steps_per_second": 2.775, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 4.99e-05, |
|
"loss": 1.1448, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9751691464725203, |
|
"eval_f1": 0.9743453807855432, |
|
"eval_loss": 0.09059575200080872, |
|
"eval_precision": 0.9742256161268514, |
|
"eval_recall": 0.9744651748939571, |
|
"eval_runtime": 10.9149, |
|
"eval_samples_per_second": 720.942, |
|
"eval_steps_per_second": 2.84, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9758427051326684, |
|
"eval_f1": 0.9750796169168182, |
|
"eval_loss": 0.08883357048034668, |
|
"eval_precision": 0.9749410400006145, |
|
"eval_recall": 0.9752182332329256, |
|
"eval_runtime": 10.9703, |
|
"eval_samples_per_second": 717.298, |
|
"eval_steps_per_second": 2.826, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9765313999424826, |
|
"eval_f1": 0.975770544327188, |
|
"eval_loss": 0.08642476052045822, |
|
"eval_precision": 0.9756543517174092, |
|
"eval_recall": 0.9758867646154792, |
|
"eval_runtime": 10.8323, |
|
"eval_samples_per_second": 726.436, |
|
"eval_steps_per_second": 2.862, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9772427989767963, |
|
"eval_f1": 0.9765426312513927, |
|
"eval_loss": 0.08421829342842102, |
|
"eval_precision": 0.9764300969531214, |
|
"eval_recall": 0.9766551914919777, |
|
"eval_runtime": 11.0199, |
|
"eval_samples_per_second": 714.071, |
|
"eval_steps_per_second": 2.813, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9773260477999607, |
|
"eval_f1": 0.9765903503380455, |
|
"eval_loss": 0.08395781368017197, |
|
"eval_precision": 0.9764103115590241, |
|
"eval_recall": 0.9767704555234524, |
|
"eval_runtime": 10.9053, |
|
"eval_samples_per_second": 721.579, |
|
"eval_steps_per_second": 2.843, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9775076815959556, |
|
"eval_f1": 0.976759194523621, |
|
"eval_loss": 0.08459737151861191, |
|
"eval_precision": 0.9765866248790155, |
|
"eval_recall": 0.9769318251675171, |
|
"eval_runtime": 11.552, |
|
"eval_samples_per_second": 681.178, |
|
"eval_steps_per_second": 2.684, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9776363388681187, |
|
"eval_f1": 0.9769464516897355, |
|
"eval_loss": 0.0853676050901413, |
|
"eval_precision": 0.9768151124290356, |
|
"eval_recall": 0.9770778262740517, |
|
"eval_runtime": 11.6095, |
|
"eval_samples_per_second": 677.81, |
|
"eval_steps_per_second": 2.67, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 14.49, |
|
"learning_rate": 3.896017699115044e-05, |
|
"loss": 0.0668, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9775909304191199, |
|
"eval_f1": 0.976843172808015, |
|
"eval_loss": 0.08673886954784393, |
|
"eval_precision": 0.9766930924287119, |
|
"eval_recall": 0.9769932993176369, |
|
"eval_runtime": 11.3435, |
|
"eval_samples_per_second": 693.7, |
|
"eval_steps_per_second": 2.733, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9778028365144474, |
|
"eval_f1": 0.9770540169876339, |
|
"eval_loss": 0.0859028622508049, |
|
"eval_precision": 0.9769226632660116, |
|
"eval_recall": 0.9771854060367615, |
|
"eval_runtime": 11.0228, |
|
"eval_samples_per_second": 713.886, |
|
"eval_steps_per_second": 2.812, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.9780979914329393, |
|
"eval_f1": 0.9773386449285661, |
|
"eval_loss": 0.08584524691104889, |
|
"eval_precision": 0.9771922412137507, |
|
"eval_recall": 0.9774850925185959, |
|
"eval_runtime": 11.0063, |
|
"eval_samples_per_second": 714.952, |
|
"eval_steps_per_second": 2.817, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.9784990993990949, |
|
"eval_f1": 0.9777723141226096, |
|
"eval_loss": 0.08779104799032211, |
|
"eval_precision": 0.9776446185757087, |
|
"eval_recall": 0.977900043031905, |
|
"eval_runtime": 11.0526, |
|
"eval_samples_per_second": 711.958, |
|
"eval_steps_per_second": 2.805, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.9784839632494287, |
|
"eval_f1": 0.9777277546442126, |
|
"eval_loss": 0.08868438750505447, |
|
"eval_precision": 0.9775324914738686, |
|
"eval_recall": 0.9779230958382, |
|
"eval_runtime": 10.952, |
|
"eval_samples_per_second": 718.498, |
|
"eval_steps_per_second": 2.831, |
|
"step": 1311 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9782796252289343, |
|
"eval_f1": 0.977526622308957, |
|
"eval_loss": 0.09024880826473236, |
|
"eval_precision": 0.9773914513105737, |
|
"eval_recall": 0.9776618307001905, |
|
"eval_runtime": 10.9428, |
|
"eval_samples_per_second": 719.1, |
|
"eval_steps_per_second": 2.833, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.9782115125554361, |
|
"eval_f1": 0.9773772343294419, |
|
"eval_loss": 0.09100791066884995, |
|
"eval_precision": 0.9772233190194889, |
|
"eval_recall": 0.9775311981311858, |
|
"eval_runtime": 10.9089, |
|
"eval_samples_per_second": 721.337, |
|
"eval_steps_per_second": 2.842, |
|
"step": 1449 |
|
}, |
|
{ |
|
"epoch": 21.74, |
|
"learning_rate": 2.7898230088495575e-05, |
|
"loss": 0.0375, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.9783098975282668, |
|
"eval_f1": 0.9775235578160474, |
|
"eval_loss": 0.09260567277669907, |
|
"eval_precision": 0.9773546062789501, |
|
"eval_recall": 0.9776925677752505, |
|
"eval_runtime": 10.9627, |
|
"eval_samples_per_second": 717.797, |
|
"eval_steps_per_second": 2.828, |
|
"step": 1518 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.9786731651202567, |
|
"eval_f1": 0.9778607567218708, |
|
"eval_loss": 0.09297080338001251, |
|
"eval_precision": 0.9777292945433315, |
|
"eval_recall": 0.9779922542570849, |
|
"eval_runtime": 11.0584, |
|
"eval_samples_per_second": 711.584, |
|
"eval_steps_per_second": 2.803, |
|
"step": 1587 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.9787034374195892, |
|
"eval_f1": 0.9779114614545398, |
|
"eval_loss": 0.09545727074146271, |
|
"eval_precision": 0.9777461975725918, |
|
"eval_recall": 0.9780767812134997, |
|
"eval_runtime": 12.1178, |
|
"eval_samples_per_second": 649.374, |
|
"eval_steps_per_second": 2.558, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.9787488458685879, |
|
"eval_f1": 0.9779918790071952, |
|
"eval_loss": 0.09549739956855774, |
|
"eval_precision": 0.9778378669042919, |
|
"eval_recall": 0.9781459396323846, |
|
"eval_runtime": 11.1672, |
|
"eval_samples_per_second": 704.655, |
|
"eval_steps_per_second": 2.776, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.9785445078480935, |
|
"eval_f1": 0.977742949116863, |
|
"eval_loss": 0.09780567139387131, |
|
"eval_precision": 0.9775551902662345, |
|
"eval_recall": 0.977930780106965, |
|
"eval_runtime": 10.9619, |
|
"eval_samples_per_second": 717.851, |
|
"eval_steps_per_second": 2.828, |
|
"step": 1794 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.9782115125554361, |
|
"eval_f1": 0.9773690296457643, |
|
"eval_loss": 0.09968989342451096, |
|
"eval_precision": 0.9772376335742984, |
|
"eval_recall": 0.9775004610561259, |
|
"eval_runtime": 10.8805, |
|
"eval_samples_per_second": 723.22, |
|
"eval_steps_per_second": 2.849, |
|
"step": 1863 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.9785596439977599, |
|
"eval_f1": 0.9777683870843819, |
|
"eval_loss": 0.10001282393932343, |
|
"eval_precision": 0.9776444468344998, |
|
"eval_recall": 0.9778923587631401, |
|
"eval_runtime": 11.0278, |
|
"eval_samples_per_second": 713.561, |
|
"eval_steps_per_second": 2.811, |
|
"step": 1932 |
|
}, |
|
{ |
|
"epoch": 28.98, |
|
"learning_rate": 1.683628318584071e-05, |
|
"loss": 0.0238, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.9784612590249292, |
|
"eval_f1": 0.9776150651725449, |
|
"eval_loss": 0.10220629721879959, |
|
"eval_precision": 0.977476127922073, |
|
"eval_recall": 0.9777540419253704, |
|
"eval_runtime": 11.5178, |
|
"eval_samples_per_second": 683.205, |
|
"eval_steps_per_second": 2.691, |
|
"step": 2001 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.9787034374195892, |
|
"eval_f1": 0.9778532436450527, |
|
"eval_loss": 0.10299359261989594, |
|
"eval_precision": 0.9777142725449978, |
|
"eval_recall": 0.9779922542570849, |
|
"eval_runtime": 11.5247, |
|
"eval_samples_per_second": 682.796, |
|
"eval_steps_per_second": 2.69, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.9786504608957574, |
|
"eval_f1": 0.9778916595277151, |
|
"eval_loss": 0.10408657044172287, |
|
"eval_precision": 0.9777526829680502, |
|
"eval_recall": 0.9780306756009098, |
|
"eval_runtime": 11.971, |
|
"eval_samples_per_second": 657.341, |
|
"eval_steps_per_second": 2.59, |
|
"step": 2139 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.9787185735692554, |
|
"eval_f1": 0.9779299058419483, |
|
"eval_loss": 0.10540538281202316, |
|
"eval_precision": 0.9777984343671018, |
|
"eval_recall": 0.9780614126759698, |
|
"eval_runtime": 11.5819, |
|
"eval_samples_per_second": 679.422, |
|
"eval_steps_per_second": 2.677, |
|
"step": 2208 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.978635324746091, |
|
"eval_f1": 0.9777916076017933, |
|
"eval_loss": 0.10549841076135635, |
|
"eval_precision": 0.9776601547195612, |
|
"eval_recall": 0.9779230958382, |
|
"eval_runtime": 12.6843, |
|
"eval_samples_per_second": 620.372, |
|
"eval_steps_per_second": 2.444, |
|
"step": 2277 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.9787488458685879, |
|
"eval_f1": 0.9778990030925261, |
|
"eval_loss": 0.10634943097829819, |
|
"eval_precision": 0.9777750462859821, |
|
"eval_recall": 0.9780229913321449, |
|
"eval_runtime": 11.6157, |
|
"eval_samples_per_second": 677.447, |
|
"eval_steps_per_second": 2.669, |
|
"step": 2346 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.9789456158142492, |
|
"eval_f1": 0.978172514732208, |
|
"eval_loss": 0.10656328499317169, |
|
"eval_precision": 0.9780147183087772, |
|
"eval_recall": 0.9783303620827442, |
|
"eval_runtime": 11.2324, |
|
"eval_samples_per_second": 700.56, |
|
"eval_steps_per_second": 2.76, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.978756413943421, |
|
"eval_f1": 0.9780024740493733, |
|
"eval_loss": 0.10749900341033936, |
|
"eval_precision": 0.9778897715225174, |
|
"eval_recall": 0.9781152025573246, |
|
"eval_runtime": 11.1336, |
|
"eval_samples_per_second": 706.779, |
|
"eval_steps_per_second": 2.784, |
|
"step": 2484 |
|
}, |
|
{ |
|
"epoch": 36.23, |
|
"learning_rate": 5.774336283185841e-06, |
|
"loss": 0.0167, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.9789153435149167, |
|
"eval_f1": 0.9781257443163047, |
|
"eval_loss": 0.10826310515403748, |
|
"eval_precision": 0.977998002611969, |
|
"eval_recall": 0.9782535193950944, |
|
"eval_runtime": 11.1448, |
|
"eval_samples_per_second": 706.071, |
|
"eval_steps_per_second": 2.782, |
|
"step": 2553 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.9789002073652504, |
|
"eval_f1": 0.9781295019304278, |
|
"eval_loss": 0.1082502156496048, |
|
"eval_precision": 0.978005515906245, |
|
"eval_recall": 0.9782535193950944, |
|
"eval_runtime": 11.2628, |
|
"eval_samples_per_second": 698.669, |
|
"eval_steps_per_second": 2.752, |
|
"step": 2622 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.9788547989162517, |
|
"eval_f1": 0.9780682040821029, |
|
"eval_loss": 0.10867351293563843, |
|
"eval_precision": 0.9779367140146423, |
|
"eval_recall": 0.9781997295137395, |
|
"eval_runtime": 11.7323, |
|
"eval_samples_per_second": 670.712, |
|
"eval_steps_per_second": 2.642, |
|
"step": 2691 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.9788926392904173, |
|
"eval_f1": 0.978110377786144, |
|
"eval_loss": 0.1087782010436058, |
|
"eval_precision": 0.9779826380886533, |
|
"eval_recall": 0.9782381508575644, |
|
"eval_runtime": 11.2634, |
|
"eval_samples_per_second": 698.633, |
|
"eval_steps_per_second": 2.752, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"step": 2760, |
|
"total_flos": 1.673403126150724e+17, |
|
"train_loss": 0.23496506378270576, |
|
"train_runtime": 2350.6684, |
|
"train_samples_per_second": 1204.968, |
|
"train_steps_per_second": 1.174 |
|
} |
|
], |
|
"max_steps": 2760, |
|
"num_train_epochs": 40, |
|
"total_flos": 1.673403126150724e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|