|
{ |
|
"best_metric": 0.9590698631546456, |
|
"best_model_checkpoint": ".//debugged_es_gsd_ses_udpipe_8_0.1_0.00002_20_04-22-24_22-38/checkpoint-32020", |
|
"epoch": 19.99968779269435, |
|
"global_step": 32020, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.6659729448491154e-05, |
|
"loss": 0.9658, |
|
"step": 1601 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9632624280173934, |
|
"eval_f1": 0.9191161314259801, |
|
"eval_loss": 0.153659850358963, |
|
"eval_precision": 0.9176029962546817, |
|
"eval_recall": 0.9206342651856134, |
|
"eval_runtime": 12.5305, |
|
"eval_samples_per_second": 127.848, |
|
"eval_steps_per_second": 16.041, |
|
"step": 1601 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.9149445145856868e-05, |
|
"loss": 0.118, |
|
"step": 3202 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9749441767540251, |
|
"eval_f1": 0.9426544669476715, |
|
"eval_loss": 0.09417378902435303, |
|
"eval_precision": 0.9387746744205094, |
|
"eval_recall": 0.9465664615765784, |
|
"eval_runtime": 12.5537, |
|
"eval_samples_per_second": 127.612, |
|
"eval_steps_per_second": 16.011, |
|
"step": 3202 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.8085587082198154e-05, |
|
"loss": 0.0727, |
|
"step": 4803 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9784228463979316, |
|
"eval_f1": 0.9501459044017919, |
|
"eval_loss": 0.07907041162252426, |
|
"eval_precision": 0.9457148701165883, |
|
"eval_recall": 0.9546186563158112, |
|
"eval_runtime": 12.5508, |
|
"eval_samples_per_second": 127.642, |
|
"eval_steps_per_second": 16.015, |
|
"step": 4803 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.702172901853944e-05, |
|
"loss": 0.0479, |
|
"step": 6404 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9803267128922317, |
|
"eval_f1": 0.9543441998393706, |
|
"eval_loss": 0.07787933200597763, |
|
"eval_precision": 0.9518938460274423, |
|
"eval_recall": 0.9568072015526283, |
|
"eval_runtime": 12.5524, |
|
"eval_samples_per_second": 127.625, |
|
"eval_steps_per_second": 16.013, |
|
"step": 6404 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 1.5957870954880726e-05, |
|
"loss": 0.0342, |
|
"step": 8005 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9791984957104243, |
|
"eval_f1": 0.951995205720072, |
|
"eval_loss": 0.08329462260007858, |
|
"eval_precision": 0.9528418962521717, |
|
"eval_recall": 0.9511500185819879, |
|
"eval_runtime": 12.5712, |
|
"eval_samples_per_second": 127.435, |
|
"eval_steps_per_second": 15.989, |
|
"step": 8005 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 1.489401289122201e-05, |
|
"loss": 0.0233, |
|
"step": 9606 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9811023622047244, |
|
"eval_f1": 0.956315778633658, |
|
"eval_loss": 0.08429370075464249, |
|
"eval_precision": 0.9539776462853385, |
|
"eval_recall": 0.9586654003386051, |
|
"eval_runtime": 12.5589, |
|
"eval_samples_per_second": 127.559, |
|
"eval_steps_per_second": 16.005, |
|
"step": 9606 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 1.3830154827563293e-05, |
|
"loss": 0.0168, |
|
"step": 11207 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9807262898107886, |
|
"eval_f1": 0.9542881840821619, |
|
"eval_loss": 0.09856697916984558, |
|
"eval_precision": 0.9512925728354534, |
|
"eval_recall": 0.9573027212288888, |
|
"eval_runtime": 12.7018, |
|
"eval_samples_per_second": 126.124, |
|
"eval_steps_per_second": 15.825, |
|
"step": 11207 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 1.276629676390458e-05, |
|
"loss": 0.0121, |
|
"step": 12808 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9815254436479022, |
|
"eval_f1": 0.9566986580077682, |
|
"eval_loss": 0.10770849883556366, |
|
"eval_precision": 0.9522952295229523, |
|
"eval_recall": 0.9611429987199075, |
|
"eval_runtime": 12.5218, |
|
"eval_samples_per_second": 127.937, |
|
"eval_steps_per_second": 16.052, |
|
"step": 12808 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 1.1702438700245863e-05, |
|
"loss": 0.0102, |
|
"step": 14409 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9800916676460218, |
|
"eval_f1": 0.9545660129106535, |
|
"eval_loss": 0.12106840312480927, |
|
"eval_precision": 0.9505015353121802, |
|
"eval_recall": 0.9586654003386051, |
|
"eval_runtime": 12.5371, |
|
"eval_samples_per_second": 127.781, |
|
"eval_steps_per_second": 16.032, |
|
"step": 14409 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 1.063858063658715e-05, |
|
"loss": 0.0073, |
|
"step": 16010 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9807732988600305, |
|
"eval_f1": 0.9554837514663813, |
|
"eval_loss": 0.12773701548576355, |
|
"eval_precision": 0.9524454291810274, |
|
"eval_recall": 0.95854152041954, |
|
"eval_runtime": 12.7247, |
|
"eval_samples_per_second": 125.897, |
|
"eval_steps_per_second": 15.796, |
|
"step": 16010 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 9.574722572928434e-06, |
|
"loss": 0.0068, |
|
"step": 17611 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9810083441062405, |
|
"eval_f1": 0.9555354647817867, |
|
"eval_loss": 0.13228633999824524, |
|
"eval_precision": 0.9509631507913787, |
|
"eval_recall": 0.9601519593673865, |
|
"eval_runtime": 12.7411, |
|
"eval_samples_per_second": 125.735, |
|
"eval_steps_per_second": 15.776, |
|
"step": 17611 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 8.51086450926972e-06, |
|
"loss": 0.0051, |
|
"step": 19212 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9815724526971442, |
|
"eval_f1": 0.95762694407251, |
|
"eval_loss": 0.12687553465366364, |
|
"eval_precision": 0.9554422887208155, |
|
"eval_recall": 0.9598216129165462, |
|
"eval_runtime": 12.7033, |
|
"eval_samples_per_second": 126.109, |
|
"eval_steps_per_second": 15.823, |
|
"step": 19212 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 7.447006445611005e-06, |
|
"loss": 0.0035, |
|
"step": 20813 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.982089552238806, |
|
"eval_f1": 0.9584635630860339, |
|
"eval_loss": 0.13741779327392578, |
|
"eval_precision": 0.9545771042391972, |
|
"eval_recall": 0.9623817979105587, |
|
"eval_runtime": 12.6726, |
|
"eval_samples_per_second": 126.414, |
|
"eval_steps_per_second": 15.861, |
|
"step": 20813 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 6.38314838195229e-06, |
|
"loss": 0.0027, |
|
"step": 22414 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9823481020096368, |
|
"eval_f1": 0.9589226973684211, |
|
"eval_loss": 0.14472806453704834, |
|
"eval_precision": 0.9548785980428285, |
|
"eval_recall": 0.9630011975058843, |
|
"eval_runtime": 12.6834, |
|
"eval_samples_per_second": 126.306, |
|
"eval_steps_per_second": 15.847, |
|
"step": 22414 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 5.319290318293575e-06, |
|
"loss": 0.003, |
|
"step": 24015 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9819015160418381, |
|
"eval_f1": 0.9580811200329422, |
|
"eval_loss": 0.1390347182750702, |
|
"eval_precision": 0.955405904816655, |
|
"eval_recall": 0.9607713589627122, |
|
"eval_runtime": 13.4492, |
|
"eval_samples_per_second": 119.115, |
|
"eval_steps_per_second": 14.945, |
|
"step": 24015 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 4.25543225463486e-06, |
|
"loss": 0.0025, |
|
"step": 25616 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9812433893524504, |
|
"eval_f1": 0.9562028758600799, |
|
"eval_loss": 0.1537328064441681, |
|
"eval_precision": 0.9540801644398766, |
|
"eval_recall": 0.9583350538877647, |
|
"eval_runtime": 12.9219, |
|
"eval_samples_per_second": 123.976, |
|
"eval_steps_per_second": 15.555, |
|
"step": 25616 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 3.191574190976145e-06, |
|
"loss": 0.0029, |
|
"step": 27217 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.982207074861911, |
|
"eval_f1": 0.9580219463487947, |
|
"eval_loss": 0.15050023794174194, |
|
"eval_precision": 0.9552882246674331, |
|
"eval_recall": 0.9607713589627122, |
|
"eval_runtime": 12.7344, |
|
"eval_samples_per_second": 125.801, |
|
"eval_steps_per_second": 15.784, |
|
"step": 27217 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 2.12771612731743e-06, |
|
"loss": 0.0022, |
|
"step": 28818 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.9823245974850159, |
|
"eval_f1": 0.9589283509609449, |
|
"eval_loss": 0.14898641407489777, |
|
"eval_precision": 0.955703211517165, |
|
"eval_recall": 0.9621753313787835, |
|
"eval_runtime": 12.8609, |
|
"eval_samples_per_second": 124.563, |
|
"eval_steps_per_second": 15.629, |
|
"step": 28818 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 1.063858063658715e-06, |
|
"loss": 0.0021, |
|
"step": 30419 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.982019038664943, |
|
"eval_f1": 0.9582288369033425, |
|
"eval_loss": 0.15266422927379608, |
|
"eval_precision": 0.954070981210856, |
|
"eval_recall": 0.9624230912169137, |
|
"eval_runtime": 12.7956, |
|
"eval_samples_per_second": 125.199, |
|
"eval_steps_per_second": 15.709, |
|
"step": 30419 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.0018, |
|
"step": 32020 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9824421201081208, |
|
"eval_f1": 0.9590698631546456, |
|
"eval_loss": 0.15378834307193756, |
|
"eval_precision": 0.9559028632373452, |
|
"eval_recall": 0.9622579179914936, |
|
"eval_runtime": 12.9229, |
|
"eval_samples_per_second": 123.966, |
|
"eval_steps_per_second": 15.554, |
|
"step": 32020 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 32020, |
|
"total_flos": 2.8281562629741216e+16, |
|
"train_loss": 0.06704426395528247, |
|
"train_runtime": 9262.2202, |
|
"train_samples_per_second": 27.663, |
|
"train_steps_per_second": 3.457 |
|
} |
|
], |
|
"max_steps": 32020, |
|
"num_train_epochs": 20, |
|
"total_flos": 2.8281562629741216e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|