xlm-roberta-large-lemma-es / trainer_state.json
oliat's picture
uploading model files
6b46ae3 verified
{
"best_metric": 0.9590698631546456,
"best_model_checkpoint": ".//debugged_es_gsd_ses_udpipe_8_0.1_0.00002_20_04-22-24_22-38/checkpoint-32020",
"epoch": 19.99968779269435,
"global_step": 32020,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"learning_rate": 1.6659729448491154e-05,
"loss": 0.9658,
"step": 1601
},
{
"epoch": 1.0,
"eval_accuracy": 0.9632624280173934,
"eval_f1": 0.9191161314259801,
"eval_loss": 0.153659850358963,
"eval_precision": 0.9176029962546817,
"eval_recall": 0.9206342651856134,
"eval_runtime": 12.5305,
"eval_samples_per_second": 127.848,
"eval_steps_per_second": 16.041,
"step": 1601
},
{
"epoch": 2.0,
"learning_rate": 1.9149445145856868e-05,
"loss": 0.118,
"step": 3202
},
{
"epoch": 2.0,
"eval_accuracy": 0.9749441767540251,
"eval_f1": 0.9426544669476715,
"eval_loss": 0.09417378902435303,
"eval_precision": 0.9387746744205094,
"eval_recall": 0.9465664615765784,
"eval_runtime": 12.5537,
"eval_samples_per_second": 127.612,
"eval_steps_per_second": 16.011,
"step": 3202
},
{
"epoch": 3.0,
"learning_rate": 1.8085587082198154e-05,
"loss": 0.0727,
"step": 4803
},
{
"epoch": 3.0,
"eval_accuracy": 0.9784228463979316,
"eval_f1": 0.9501459044017919,
"eval_loss": 0.07907041162252426,
"eval_precision": 0.9457148701165883,
"eval_recall": 0.9546186563158112,
"eval_runtime": 12.5508,
"eval_samples_per_second": 127.642,
"eval_steps_per_second": 16.015,
"step": 4803
},
{
"epoch": 4.0,
"learning_rate": 1.702172901853944e-05,
"loss": 0.0479,
"step": 6404
},
{
"epoch": 4.0,
"eval_accuracy": 0.9803267128922317,
"eval_f1": 0.9543441998393706,
"eval_loss": 0.07787933200597763,
"eval_precision": 0.9518938460274423,
"eval_recall": 0.9568072015526283,
"eval_runtime": 12.5524,
"eval_samples_per_second": 127.625,
"eval_steps_per_second": 16.013,
"step": 6404
},
{
"epoch": 5.0,
"learning_rate": 1.5957870954880726e-05,
"loss": 0.0342,
"step": 8005
},
{
"epoch": 5.0,
"eval_accuracy": 0.9791984957104243,
"eval_f1": 0.951995205720072,
"eval_loss": 0.08329462260007858,
"eval_precision": 0.9528418962521717,
"eval_recall": 0.9511500185819879,
"eval_runtime": 12.5712,
"eval_samples_per_second": 127.435,
"eval_steps_per_second": 15.989,
"step": 8005
},
{
"epoch": 6.0,
"learning_rate": 1.489401289122201e-05,
"loss": 0.0233,
"step": 9606
},
{
"epoch": 6.0,
"eval_accuracy": 0.9811023622047244,
"eval_f1": 0.956315778633658,
"eval_loss": 0.08429370075464249,
"eval_precision": 0.9539776462853385,
"eval_recall": 0.9586654003386051,
"eval_runtime": 12.5589,
"eval_samples_per_second": 127.559,
"eval_steps_per_second": 16.005,
"step": 9606
},
{
"epoch": 7.0,
"learning_rate": 1.3830154827563293e-05,
"loss": 0.0168,
"step": 11207
},
{
"epoch": 7.0,
"eval_accuracy": 0.9807262898107886,
"eval_f1": 0.9542881840821619,
"eval_loss": 0.09856697916984558,
"eval_precision": 0.9512925728354534,
"eval_recall": 0.9573027212288888,
"eval_runtime": 12.7018,
"eval_samples_per_second": 126.124,
"eval_steps_per_second": 15.825,
"step": 11207
},
{
"epoch": 8.0,
"learning_rate": 1.276629676390458e-05,
"loss": 0.0121,
"step": 12808
},
{
"epoch": 8.0,
"eval_accuracy": 0.9815254436479022,
"eval_f1": 0.9566986580077682,
"eval_loss": 0.10770849883556366,
"eval_precision": 0.9522952295229523,
"eval_recall": 0.9611429987199075,
"eval_runtime": 12.5218,
"eval_samples_per_second": 127.937,
"eval_steps_per_second": 16.052,
"step": 12808
},
{
"epoch": 9.0,
"learning_rate": 1.1702438700245863e-05,
"loss": 0.0102,
"step": 14409
},
{
"epoch": 9.0,
"eval_accuracy": 0.9800916676460218,
"eval_f1": 0.9545660129106535,
"eval_loss": 0.12106840312480927,
"eval_precision": 0.9505015353121802,
"eval_recall": 0.9586654003386051,
"eval_runtime": 12.5371,
"eval_samples_per_second": 127.781,
"eval_steps_per_second": 16.032,
"step": 14409
},
{
"epoch": 10.0,
"learning_rate": 1.063858063658715e-05,
"loss": 0.0073,
"step": 16010
},
{
"epoch": 10.0,
"eval_accuracy": 0.9807732988600305,
"eval_f1": 0.9554837514663813,
"eval_loss": 0.12773701548576355,
"eval_precision": 0.9524454291810274,
"eval_recall": 0.95854152041954,
"eval_runtime": 12.7247,
"eval_samples_per_second": 125.897,
"eval_steps_per_second": 15.796,
"step": 16010
},
{
"epoch": 11.0,
"learning_rate": 9.574722572928434e-06,
"loss": 0.0068,
"step": 17611
},
{
"epoch": 11.0,
"eval_accuracy": 0.9810083441062405,
"eval_f1": 0.9555354647817867,
"eval_loss": 0.13228633999824524,
"eval_precision": 0.9509631507913787,
"eval_recall": 0.9601519593673865,
"eval_runtime": 12.7411,
"eval_samples_per_second": 125.735,
"eval_steps_per_second": 15.776,
"step": 17611
},
{
"epoch": 12.0,
"learning_rate": 8.51086450926972e-06,
"loss": 0.0051,
"step": 19212
},
{
"epoch": 12.0,
"eval_accuracy": 0.9815724526971442,
"eval_f1": 0.95762694407251,
"eval_loss": 0.12687553465366364,
"eval_precision": 0.9554422887208155,
"eval_recall": 0.9598216129165462,
"eval_runtime": 12.7033,
"eval_samples_per_second": 126.109,
"eval_steps_per_second": 15.823,
"step": 19212
},
{
"epoch": 13.0,
"learning_rate": 7.447006445611005e-06,
"loss": 0.0035,
"step": 20813
},
{
"epoch": 13.0,
"eval_accuracy": 0.982089552238806,
"eval_f1": 0.9584635630860339,
"eval_loss": 0.13741779327392578,
"eval_precision": 0.9545771042391972,
"eval_recall": 0.9623817979105587,
"eval_runtime": 12.6726,
"eval_samples_per_second": 126.414,
"eval_steps_per_second": 15.861,
"step": 20813
},
{
"epoch": 14.0,
"learning_rate": 6.38314838195229e-06,
"loss": 0.0027,
"step": 22414
},
{
"epoch": 14.0,
"eval_accuracy": 0.9823481020096368,
"eval_f1": 0.9589226973684211,
"eval_loss": 0.14472806453704834,
"eval_precision": 0.9548785980428285,
"eval_recall": 0.9630011975058843,
"eval_runtime": 12.6834,
"eval_samples_per_second": 126.306,
"eval_steps_per_second": 15.847,
"step": 22414
},
{
"epoch": 15.0,
"learning_rate": 5.319290318293575e-06,
"loss": 0.003,
"step": 24015
},
{
"epoch": 15.0,
"eval_accuracy": 0.9819015160418381,
"eval_f1": 0.9580811200329422,
"eval_loss": 0.1390347182750702,
"eval_precision": 0.955405904816655,
"eval_recall": 0.9607713589627122,
"eval_runtime": 13.4492,
"eval_samples_per_second": 119.115,
"eval_steps_per_second": 14.945,
"step": 24015
},
{
"epoch": 16.0,
"learning_rate": 4.25543225463486e-06,
"loss": 0.0025,
"step": 25616
},
{
"epoch": 16.0,
"eval_accuracy": 0.9812433893524504,
"eval_f1": 0.9562028758600799,
"eval_loss": 0.1537328064441681,
"eval_precision": 0.9540801644398766,
"eval_recall": 0.9583350538877647,
"eval_runtime": 12.9219,
"eval_samples_per_second": 123.976,
"eval_steps_per_second": 15.555,
"step": 25616
},
{
"epoch": 17.0,
"learning_rate": 3.191574190976145e-06,
"loss": 0.0029,
"step": 27217
},
{
"epoch": 17.0,
"eval_accuracy": 0.982207074861911,
"eval_f1": 0.9580219463487947,
"eval_loss": 0.15050023794174194,
"eval_precision": 0.9552882246674331,
"eval_recall": 0.9607713589627122,
"eval_runtime": 12.7344,
"eval_samples_per_second": 125.801,
"eval_steps_per_second": 15.784,
"step": 27217
},
{
"epoch": 18.0,
"learning_rate": 2.12771612731743e-06,
"loss": 0.0022,
"step": 28818
},
{
"epoch": 18.0,
"eval_accuracy": 0.9823245974850159,
"eval_f1": 0.9589283509609449,
"eval_loss": 0.14898641407489777,
"eval_precision": 0.955703211517165,
"eval_recall": 0.9621753313787835,
"eval_runtime": 12.8609,
"eval_samples_per_second": 124.563,
"eval_steps_per_second": 15.629,
"step": 28818
},
{
"epoch": 19.0,
"learning_rate": 1.063858063658715e-06,
"loss": 0.0021,
"step": 30419
},
{
"epoch": 19.0,
"eval_accuracy": 0.982019038664943,
"eval_f1": 0.9582288369033425,
"eval_loss": 0.15266422927379608,
"eval_precision": 0.954070981210856,
"eval_recall": 0.9624230912169137,
"eval_runtime": 12.7956,
"eval_samples_per_second": 125.199,
"eval_steps_per_second": 15.709,
"step": 30419
},
{
"epoch": 20.0,
"learning_rate": 0.0,
"loss": 0.0018,
"step": 32020
},
{
"epoch": 20.0,
"eval_accuracy": 0.9824421201081208,
"eval_f1": 0.9590698631546456,
"eval_loss": 0.15378834307193756,
"eval_precision": 0.9559028632373452,
"eval_recall": 0.9622579179914936,
"eval_runtime": 12.9229,
"eval_samples_per_second": 123.966,
"eval_steps_per_second": 15.554,
"step": 32020
},
{
"epoch": 20.0,
"step": 32020,
"total_flos": 2.8281562629741216e+16,
"train_loss": 0.06704426395528247,
"train_runtime": 9262.2202,
"train_samples_per_second": 27.663,
"train_steps_per_second": 3.457
}
],
"max_steps": 32020,
"num_train_epochs": 20,
"total_flos": 2.8281562629741216e+16,
"trial_name": null,
"trial_params": null
}