diff --git "a/checkpoint-1176/trainer_state.json" "b/checkpoint-1176/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-1176/trainer_state.json" @@ -0,0 +1,11214 @@ +{ + "best_metric": 0.8602466236054023, + "best_model_checkpoint": "/tmp/logs/binary_classification_model_v3.1.5_Junction_batch1/checkpoint-1176", + "epoch": 196.0, + "eval_steps": 500, + "global_step": 1176, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.16666666666666666, + "grad_norm": 301205.78125, + "learning_rate": 8.333333333333334e-06, + "loss": 0.701, + "step": 1 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 336849.3125, + "learning_rate": 1.6666666666666667e-05, + "loss": 0.7097, + "step": 2 + }, + { + "epoch": 0.5, + "grad_norm": 127303.109375, + "learning_rate": 2.5e-05, + "loss": 0.6876, + "step": 3 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 234841.890625, + "learning_rate": 3.3333333333333335e-05, + "loss": 0.6984, + "step": 4 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 540732.1875, + "learning_rate": 4.1666666666666665e-05, + "loss": 0.7388, + "step": 5 + }, + { + "epoch": 1.0, + "grad_norm": 565904.6875, + "learning_rate": 5e-05, + "loss": 0.7434, + "step": 6 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.6405995438253502, + "eval_best_threshold": 0.4179820716381073, + "eval_f1": 0.7283920216695395, + "eval_loss": 0.7047091722488403, + "eval_pr_auc": 0.6283352362755803, + "eval_precision": 0.6258992805755396, + "eval_recall": 0.8710247349823321, + "eval_roc_auc": 0.6278743001377173, + "eval_runtime": 2.4479, + "eval_samples_per_second": 1253.729, + "eval_steps_per_second": 2.451, + "step": 6 + }, + { + "epoch": 1.1666666666666667, + "grad_norm": 371967.21875, + "learning_rate": 5.833333333333333e-05, + "loss": 0.7082, + "step": 7 + }, + { + "epoch": 1.3333333333333333, + "grad_norm": 43612.97265625, + "learning_rate": 6.666666666666667e-05, + "loss": 0.6722, + "step": 8 + }, + { + "epoch": 1.5, + "grad_norm": 397443.65625, + "learning_rate": 7.5e-05, + "loss": 0.7188, + "step": 9 + }, + { + "epoch": 1.6666666666666665, + "grad_norm": 525898.0, + "learning_rate": 8.333333333333333e-05, + "loss": 0.7528, + "step": 10 + }, + { + "epoch": 1.8333333333333335, + "grad_norm": 535364.5, + "learning_rate": 9.166666666666667e-05, + "loss": 0.7645, + "step": 11 + }, + { + "epoch": 2.0, + "grad_norm": 310039.46875, + "learning_rate": 0.0001, + "loss": 0.7025, + "step": 12 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.6298468556533072, + "eval_best_threshold": 0.4796014130115509, + "eval_f1": 0.7262650602409638, + "eval_loss": 0.6673709154129028, + "eval_pr_auc": 0.6426689105421621, + "eval_precision": 0.6146003262642741, + "eval_recall": 0.8875147232037691, + "eval_roc_auc": 0.6411069271868306, + "eval_runtime": 2.5096, + "eval_samples_per_second": 1222.919, + "eval_steps_per_second": 2.391, + "step": 12 + }, + { + "epoch": 2.1666666666666665, + "grad_norm": 79163.5078125, + "learning_rate": 0.00010833333333333334, + "loss": 0.666, + "step": 13 + }, + { + "epoch": 2.3333333333333335, + "grad_norm": 238443.5625, + "learning_rate": 0.00011666666666666667, + "loss": 0.6792, + "step": 14 + }, + { + "epoch": 2.5, + "grad_norm": 89458.0234375, + "learning_rate": 0.000125, + "loss": 0.6711, + "step": 15 + }, + { + "epoch": 2.6666666666666665, + "grad_norm": 56144.84765625, + "learning_rate": 0.00013333333333333334, + "loss": 0.6595, + "step": 16 + }, + { + "epoch": 2.8333333333333335, + "grad_norm": 152777.34375, + "learning_rate": 0.00014166666666666668, + "loss": 0.6647, + "step": 17 + }, + { + "epoch": 3.0, + "grad_norm": 20701.99609375, + "learning_rate": 0.00015, + "loss": 0.6524, + "step": 18 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.6888237210817856, + "eval_best_threshold": 0.4486621618270874, + "eval_f1": 0.7586555471316654, + "eval_loss": 0.6511502265930176, + "eval_pr_auc": 0.6680483472768871, + "eval_precision": 0.6644532979194334, + "eval_recall": 0.8839811542991755, + "eval_roc_auc": 0.683494934186957, + "eval_runtime": 2.5522, + "eval_samples_per_second": 1202.471, + "eval_steps_per_second": 2.351, + "step": 18 + }, + { + "epoch": 3.1666666666666665, + "grad_norm": 139181.78125, + "learning_rate": 0.00015833333333333332, + "loss": 0.6525, + "step": 19 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 117104.03125, + "learning_rate": 0.00016666666666666666, + "loss": 0.6531, + "step": 20 + }, + { + "epoch": 3.5, + "grad_norm": 93101.1171875, + "learning_rate": 0.000175, + "loss": 0.6425, + "step": 21 + }, + { + "epoch": 3.6666666666666665, + "grad_norm": 467410.875, + "learning_rate": 0.00018333333333333334, + "loss": 0.68, + "step": 22 + }, + { + "epoch": 3.8333333333333335, + "grad_norm": 552058.625, + "learning_rate": 0.00019166666666666667, + "loss": 0.6909, + "step": 23 + }, + { + "epoch": 4.0, + "grad_norm": 75542.5625, + "learning_rate": 0.0002, + "loss": 0.644, + "step": 24 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.6940371456500489, + "eval_best_threshold": 0.6427821516990662, + "eval_f1": 0.7617356001014971, + "eval_loss": 0.730407178401947, + "eval_pr_auc": 0.6693287247885799, + "eval_precision": 0.669193045028979, + "eval_recall": 0.8839811542991755, + "eval_roc_auc": 0.6848871414346822, + "eval_runtime": 2.8443, + "eval_samples_per_second": 1079.0, + "eval_steps_per_second": 2.109, + "step": 24 + }, + { + "epoch": 4.166666666666667, + "grad_norm": 683070.9375, + "learning_rate": 0.00020833333333333335, + "loss": 0.7293, + "step": 25 + }, + { + "epoch": 4.333333333333333, + "grad_norm": 651965.375, + "learning_rate": 0.00021666666666666668, + "loss": 0.7538, + "step": 26 + }, + { + "epoch": 4.5, + "grad_norm": 410760.625, + "learning_rate": 0.00022500000000000002, + "loss": 0.6788, + "step": 27 + }, + { + "epoch": 4.666666666666667, + "grad_norm": 15163.0390625, + "learning_rate": 0.00023333333333333333, + "loss": 0.6325, + "step": 28 + }, + { + "epoch": 4.833333333333333, + "grad_norm": 611296.9375, + "learning_rate": 0.00024166666666666667, + "loss": 0.7411, + "step": 29 + }, + { + "epoch": 5.0, + "grad_norm": 758565.25, + "learning_rate": 0.00025, + "loss": 0.825, + "step": 30 + }, + { + "epoch": 5.0, + "eval_accuracy": 0.6979472140762464, + "eval_best_threshold": 0.23702159523963928, + "eval_f1": 0.7587822014051522, + "eval_loss": 0.7560154795646667, + "eval_pr_auc": 0.668429405093208, + "eval_precision": 0.6797202797202797, + "eval_recall": 0.8586572438162544, + "eval_roc_auc": 0.6841386313670608, + "eval_runtime": 2.6473, + "eval_samples_per_second": 1159.289, + "eval_steps_per_second": 2.266, + "step": 30 + }, + { + "epoch": 5.166666666666667, + "grad_norm": 616902.5625, + "learning_rate": 0.00025833333333333334, + "loss": 0.7771, + "step": 31 + }, + { + "epoch": 5.333333333333333, + "grad_norm": 274811.5625, + "learning_rate": 0.0002666666666666667, + "loss": 0.665, + "step": 32 + }, + { + "epoch": 5.5, + "grad_norm": 98389.8046875, + "learning_rate": 0.000275, + "loss": 0.6466, + "step": 33 + }, + { + "epoch": 5.666666666666667, + "grad_norm": 208001.3125, + "learning_rate": 0.00028333333333333335, + "loss": 0.668, + "step": 34 + }, + { + "epoch": 5.833333333333333, + "grad_norm": 151922.03125, + "learning_rate": 0.0002916666666666667, + "loss": 0.65, + "step": 35 + }, + { + "epoch": 6.0, + "grad_norm": 16863.15234375, + "learning_rate": 0.0003, + "loss": 0.6409, + "step": 36 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.6976213750407299, + "eval_best_threshold": 0.4489557445049286, + "eval_f1": 0.759958613554061, + "eval_loss": 0.634763240814209, + "eval_pr_auc": 0.6705264493262868, + "eval_precision": 0.6775830258302583, + "eval_recall": 0.8651354534746761, + "eval_roc_auc": 0.6867211521857353, + "eval_runtime": 2.6393, + "eval_samples_per_second": 1162.804, + "eval_steps_per_second": 2.273, + "step": 36 + }, + { + "epoch": 6.166666666666667, + "grad_norm": 97510.015625, + "learning_rate": 0.00030833333333333337, + "loss": 0.6443, + "step": 37 + }, + { + "epoch": 6.333333333333333, + "grad_norm": 27098.69140625, + "learning_rate": 0.00031666666666666665, + "loss": 0.6299, + "step": 38 + }, + { + "epoch": 6.5, + "grad_norm": 123789.1640625, + "learning_rate": 0.00032500000000000004, + "loss": 0.6373, + "step": 39 + }, + { + "epoch": 6.666666666666667, + "grad_norm": 149780.671875, + "learning_rate": 0.0003333333333333333, + "loss": 0.629, + "step": 40 + }, + { + "epoch": 6.833333333333333, + "grad_norm": 91179.4140625, + "learning_rate": 0.00034166666666666666, + "loss": 0.6251, + "step": 41 + }, + { + "epoch": 7.0, + "grad_norm": 95404.40625, + "learning_rate": 0.00035, + "loss": 0.6469, + "step": 42 + }, + { + "epoch": 7.0, + "eval_accuracy": 0.696969696969697, + "eval_best_threshold": 0.36696115136146545, + "eval_f1": 0.7593167701863354, + "eval_loss": 0.6393815279006958, + "eval_pr_auc": 0.6808847453038372, + "eval_precision": 0.6772853185595568, + "eval_recall": 0.8639575971731449, + "eval_roc_auc": 0.6971218982473051, + "eval_runtime": 2.6949, + "eval_samples_per_second": 1138.828, + "eval_steps_per_second": 2.226, + "step": 42 + }, + { + "epoch": 7.166666666666667, + "grad_norm": 155450.640625, + "learning_rate": 0.00035833333333333333, + "loss": 0.6309, + "step": 43 + }, + { + "epoch": 7.333333333333333, + "grad_norm": 117154.0625, + "learning_rate": 0.00036666666666666667, + "loss": 0.6391, + "step": 44 + }, + { + "epoch": 7.5, + "grad_norm": 18065.970703125, + "learning_rate": 0.000375, + "loss": 0.617, + "step": 45 + }, + { + "epoch": 7.666666666666667, + "grad_norm": 33959.37890625, + "learning_rate": 0.00038333333333333334, + "loss": 0.6206, + "step": 46 + }, + { + "epoch": 7.833333333333333, + "grad_norm": 19164.263671875, + "learning_rate": 0.0003916666666666667, + "loss": 0.6177, + "step": 47 + }, + { + "epoch": 8.0, + "grad_norm": 100220.8515625, + "learning_rate": 0.0004, + "loss": 0.6098, + "step": 48 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.68198110133594, + "eval_best_threshold": 0.328480988740921, + "eval_f1": 0.7661715380929564, + "eval_loss": 0.6046918034553528, + "eval_pr_auc": 0.6817425061021674, + "eval_precision": 0.6457996768982229, + "eval_recall": 0.941696113074205, + "eval_roc_auc": 0.701265658572878, + "eval_runtime": 2.6753, + "eval_samples_per_second": 1147.147, + "eval_steps_per_second": 2.243, + "step": 48 + }, + { + "epoch": 8.166666666666666, + "grad_norm": 16806.453125, + "learning_rate": 0.00040833333333333336, + "loss": 0.6062, + "step": 49 + }, + { + "epoch": 8.333333333333334, + "grad_norm": 124528.859375, + "learning_rate": 0.0004166666666666667, + "loss": 0.6257, + "step": 50 + }, + { + "epoch": 8.5, + "grad_norm": 153063.265625, + "learning_rate": 0.000425, + "loss": 0.61, + "step": 51 + }, + { + "epoch": 8.666666666666666, + "grad_norm": 78803.90625, + "learning_rate": 0.00043333333333333337, + "loss": 0.621, + "step": 52 + }, + { + "epoch": 8.833333333333334, + "grad_norm": 91850.375, + "learning_rate": 0.00044166666666666665, + "loss": 0.6079, + "step": 53 + }, + { + "epoch": 9.0, + "grad_norm": 169883.046875, + "learning_rate": 0.00045000000000000004, + "loss": 0.6332, + "step": 54 + }, + { + "epoch": 9.0, + "eval_accuracy": 0.6862170087976539, + "eval_best_threshold": 0.37674590945243835, + "eval_f1": 0.7696723271944511, + "eval_loss": 0.6066368818283081, + "eval_pr_auc": 0.674455454194375, + "eval_precision": 0.6480064438179621, + "eval_recall": 0.947585394581861, + "eval_roc_auc": 0.701175880320865, + "eval_runtime": 2.755, + "eval_samples_per_second": 1113.956, + "eval_steps_per_second": 2.178, + "step": 54 + }, + { + "epoch": 9.166666666666666, + "grad_norm": 117117.21875, + "learning_rate": 0.0004583333333333333, + "loss": 0.6193, + "step": 55 + }, + { + "epoch": 9.333333333333334, + "grad_norm": 38909.0, + "learning_rate": 0.00046666666666666666, + "loss": 0.6097, + "step": 56 + }, + { + "epoch": 9.5, + "grad_norm": 69082.0234375, + "learning_rate": 0.000475, + "loss": 0.5995, + "step": 57 + }, + { + "epoch": 9.666666666666666, + "grad_norm": 11782.515625, + "learning_rate": 0.00048333333333333334, + "loss": 0.6056, + "step": 58 + }, + { + "epoch": 9.833333333333334, + "grad_norm": 299948.0625, + "learning_rate": 0.0004916666666666666, + "loss": 0.6576, + "step": 59 + }, + { + "epoch": 10.0, + "grad_norm": 461681.4375, + "learning_rate": 0.0005, + "loss": 0.7297, + "step": 60 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.671228413163897, + "eval_best_threshold": 0.13465377688407898, + "eval_f1": 0.7627557018575123, + "eval_loss": 0.6625670790672302, + "eval_pr_auc": 0.6769399620865038, + "eval_precision": 0.6348336594911937, + "eval_recall": 0.9552414605418139, + "eval_roc_auc": 0.7029843321915601, + "eval_runtime": 3.0432, + "eval_samples_per_second": 1008.479, + "eval_steps_per_second": 1.972, + "step": 60 + }, + { + "epoch": 10.166666666666666, + "grad_norm": 323015.5, + "learning_rate": 0.0005083333333333333, + "loss": 0.6792, + "step": 61 + }, + { + "epoch": 10.333333333333334, + "grad_norm": 41346.72265625, + "learning_rate": 0.0005166666666666667, + "loss": 0.5971, + "step": 62 + }, + { + "epoch": 10.5, + "grad_norm": 165735.25, + "learning_rate": 0.0005250000000000001, + "loss": 0.624, + "step": 63 + }, + { + "epoch": 10.666666666666666, + "grad_norm": 185497.203125, + "learning_rate": 0.0005333333333333334, + "loss": 0.6514, + "step": 64 + }, + { + "epoch": 10.833333333333334, + "grad_norm": 153241.40625, + "learning_rate": 0.0005416666666666666, + "loss": 0.6562, + "step": 65 + }, + { + "epoch": 11.0, + "grad_norm": 57239.83203125, + "learning_rate": 0.00055, + "loss": 0.6358, + "step": 66 + }, + { + "epoch": 11.0, + "eval_accuracy": 0.6992505702183122, + "eval_best_threshold": 0.5167019963264465, + "eval_f1": 0.7631511419040288, + "eval_loss": 0.6300772428512573, + "eval_pr_auc": 0.6678537551436834, + "eval_precision": 0.6762164620281946, + "eval_recall": 0.875736160188457, + "eval_roc_auc": 0.6964240763793849, + "eval_runtime": 2.8763, + "eval_samples_per_second": 1067.013, + "eval_steps_per_second": 2.086, + "step": 66 + }, + { + "epoch": 11.166666666666666, + "grad_norm": 76762.9453125, + "learning_rate": 0.0005583333333333333, + "loss": 0.6305, + "step": 67 + }, + { + "epoch": 11.333333333333334, + "grad_norm": 20210.671875, + "learning_rate": 0.0005666666666666667, + "loss": 0.6153, + "step": 68 + }, + { + "epoch": 11.5, + "grad_norm": 59593.27734375, + "learning_rate": 0.000575, + "loss": 0.6065, + "step": 69 + }, + { + "epoch": 11.666666666666666, + "grad_norm": 11676.9892578125, + "learning_rate": 0.0005833333333333334, + "loss": 0.6214, + "step": 70 + }, + { + "epoch": 11.833333333333334, + "grad_norm": 198962.015625, + "learning_rate": 0.0005916666666666667, + "loss": 0.634, + "step": 71 + }, + { + "epoch": 12.0, + "grad_norm": 289025.59375, + "learning_rate": 0.0006, + "loss": 0.6703, + "step": 72 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.6682958618442489, + "eval_best_threshold": 0.1743956208229065, + "eval_f1": 0.7651130595293032, + "eval_loss": 0.6442840695381165, + "eval_pr_auc": 0.6679981832738308, + "eval_precision": 0.6289833080424886, + "eval_recall": 0.9764428739693758, + "eval_roc_auc": 0.6914338660749034, + "eval_runtime": 3.0962, + "eval_samples_per_second": 991.223, + "eval_steps_per_second": 1.938, + "step": 72 + }, + { + "epoch": 12.166666666666666, + "grad_norm": 264987.90625, + "learning_rate": 0.0006083333333333333, + "loss": 0.6585, + "step": 73 + }, + { + "epoch": 12.333333333333334, + "grad_norm": 23771.486328125, + "learning_rate": 0.0006166666666666667, + "loss": 0.5975, + "step": 74 + }, + { + "epoch": 12.5, + "grad_norm": 299908.5, + "learning_rate": 0.000625, + "loss": 0.6949, + "step": 75 + }, + { + "epoch": 12.666666666666666, + "grad_norm": 361077.90625, + "learning_rate": 0.0006333333333333333, + "loss": 0.7435, + "step": 76 + }, + { + "epoch": 12.833333333333334, + "grad_norm": 336040.4375, + "learning_rate": 0.0006416666666666667, + "loss": 0.7378, + "step": 77 + }, + { + "epoch": 13.0, + "grad_norm": 272416.1875, + "learning_rate": 0.0006500000000000001, + "loss": 0.6772, + "step": 78 + }, + { + "epoch": 13.0, + "eval_accuracy": 0.6855653307266211, + "eval_best_threshold": 0.5437505841255188, + "eval_f1": 0.7697446910045336, + "eval_loss": 0.6109954714775085, + "eval_pr_auc": 0.6833836331698967, + "eval_precision": 0.6470116325711993, + "eval_recall": 0.9499411071849234, + "eval_roc_auc": 0.7098822229610673, + "eval_runtime": 3.0708, + "eval_samples_per_second": 999.429, + "eval_steps_per_second": 1.954, + "step": 78 + }, + { + "epoch": 13.166666666666666, + "grad_norm": 138968.046875, + "learning_rate": 0.0006583333333333334, + "loss": 0.6236, + "step": 79 + }, + { + "epoch": 13.333333333333334, + "grad_norm": 133513.9375, + "learning_rate": 0.0006666666666666666, + "loss": 0.6264, + "step": 80 + }, + { + "epoch": 13.5, + "grad_norm": 265727.65625, + "learning_rate": 0.000675, + "loss": 0.6646, + "step": 81 + }, + { + "epoch": 13.666666666666666, + "grad_norm": 326814.34375, + "learning_rate": 0.0006833333333333333, + "loss": 0.688, + "step": 82 + }, + { + "epoch": 13.833333333333334, + "grad_norm": 237995.25, + "learning_rate": 0.0006916666666666667, + "loss": 0.6591, + "step": 83 + }, + { + "epoch": 14.0, + "grad_norm": 130573.796875, + "learning_rate": 0.0007, + "loss": 0.6188, + "step": 84 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.696969696969697, + "eval_best_threshold": 0.5145177245140076, + "eval_f1": 0.7700296735905044, + "eval_loss": 0.6013315916061401, + "eval_pr_auc": 0.6843879067612049, + "eval_precision": 0.6636828644501279, + "eval_recall": 0.9169611307420494, + "eval_roc_auc": 0.7103317585626545, + "eval_runtime": 3.0866, + "eval_samples_per_second": 994.308, + "eval_steps_per_second": 1.944, + "step": 84 + }, + { + "epoch": 14.166666666666666, + "grad_norm": 38961.95703125, + "learning_rate": 0.0007083333333333334, + "loss": 0.6077, + "step": 85 + }, + { + "epoch": 14.333333333333334, + "grad_norm": 114087.984375, + "learning_rate": 0.0007166666666666667, + "loss": 0.6092, + "step": 86 + }, + { + "epoch": 14.5, + "grad_norm": 85208.2265625, + "learning_rate": 0.000725, + "loss": 0.6075, + "step": 87 + }, + { + "epoch": 14.666666666666666, + "grad_norm": 6441.580078125, + "learning_rate": 0.0007333333333333333, + "loss": 0.5931, + "step": 88 + }, + { + "epoch": 14.833333333333334, + "grad_norm": 107248.140625, + "learning_rate": 0.0007416666666666667, + "loss": 0.6044, + "step": 89 + }, + { + "epoch": 15.0, + "grad_norm": 164448.296875, + "learning_rate": 0.00075, + "loss": 0.6059, + "step": 90 + }, + { + "epoch": 15.0, + "eval_accuracy": 0.6881720430107527, + "eval_best_threshold": 0.324313223361969, + "eval_f1": 0.7685610640870617, + "eval_loss": 0.5854882001876831, + "eval_pr_auc": 0.6890732725571252, + "eval_precision": 0.652031185884284, + "eval_recall": 0.9358068315665489, + "eval_roc_auc": 0.7133827156675507, + "eval_runtime": 3.0825, + "eval_samples_per_second": 995.637, + "eval_steps_per_second": 1.947, + "step": 90 + }, + { + "epoch": 15.166666666666666, + "grad_norm": 34315.7265625, + "learning_rate": 0.0007583333333333333, + "loss": 0.5898, + "step": 91 + }, + { + "epoch": 15.333333333333334, + "grad_norm": 98205.3828125, + "learning_rate": 0.0007666666666666667, + "loss": 0.5922, + "step": 92 + }, + { + "epoch": 15.5, + "grad_norm": 131727.921875, + "learning_rate": 0.0007750000000000001, + "loss": 0.6055, + "step": 93 + }, + { + "epoch": 15.666666666666666, + "grad_norm": 64623.3515625, + "learning_rate": 0.0007833333333333334, + "loss": 0.5943, + "step": 94 + }, + { + "epoch": 15.833333333333334, + "grad_norm": 412366.34375, + "learning_rate": 0.0007916666666666666, + "loss": 0.6883, + "step": 95 + }, + { + "epoch": 16.0, + "grad_norm": 377649.59375, + "learning_rate": 0.0008, + "loss": 0.6986, + "step": 96 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.7015314434669273, + "eval_best_threshold": 0.3653980791568756, + "eval_f1": 0.7690368129097327, + "eval_loss": 0.6090668439865112, + "eval_pr_auc": 0.6800774346982651, + "eval_precision": 0.6723985890652557, + "eval_recall": 0.8981154299175501, + "eval_roc_auc": 0.7118375846986931, + "eval_runtime": 2.9665, + "eval_samples_per_second": 1034.567, + "eval_steps_per_second": 2.023, + "step": 96 + }, + { + "epoch": 16.166666666666668, + "grad_norm": 162461.421875, + "learning_rate": 0.0008083333333333333, + "loss": 0.6176, + "step": 97 + }, + { + "epoch": 16.333333333333332, + "grad_norm": 40319.34765625, + "learning_rate": 0.0008166666666666667, + "loss": 0.6023, + "step": 98 + }, + { + "epoch": 16.5, + "grad_norm": 56993.296875, + "learning_rate": 0.000825, + "loss": 0.6056, + "step": 99 + }, + { + "epoch": 16.666666666666668, + "grad_norm": 33296.796875, + "learning_rate": 0.0008333333333333334, + "loss": 0.6037, + "step": 100 + }, + { + "epoch": 16.833333333333332, + "grad_norm": 137979.578125, + "learning_rate": 0.0008416666666666667, + "loss": 0.598, + "step": 101 + }, + { + "epoch": 17.0, + "grad_norm": 180135.84375, + "learning_rate": 0.00085, + "loss": 0.6112, + "step": 102 + }, + { + "epoch": 17.0, + "eval_accuracy": 0.6950146627565983, + "eval_best_threshold": 0.2957038879394531, + "eval_f1": 0.7717073170731708, + "eval_loss": 0.582582950592041, + "eval_pr_auc": 0.6880525636282292, + "eval_precision": 0.6586178184845962, + "eval_recall": 0.9316843345111896, + "eval_roc_auc": 0.7159605113150667, + "eval_runtime": 3.1459, + "eval_samples_per_second": 975.563, + "eval_steps_per_second": 1.907, + "step": 102 + }, + { + "epoch": 17.166666666666668, + "grad_norm": 58858.67578125, + "learning_rate": 0.0008583333333333333, + "loss": 0.5902, + "step": 103 + }, + { + "epoch": 17.333333333333332, + "grad_norm": 116587.671875, + "learning_rate": 0.0008666666666666667, + "loss": 0.605, + "step": 104 + }, + { + "epoch": 17.5, + "grad_norm": 176699.15625, + "learning_rate": 0.000875, + "loss": 0.6269, + "step": 105 + }, + { + "epoch": 17.666666666666668, + "grad_norm": 132956.703125, + "learning_rate": 0.0008833333333333333, + "loss": 0.6156, + "step": 106 + }, + { + "epoch": 17.833333333333332, + "grad_norm": 26539.21875, + "learning_rate": 0.0008916666666666667, + "loss": 0.584, + "step": 107 + }, + { + "epoch": 18.0, + "grad_norm": 279153.15625, + "learning_rate": 0.0009000000000000001, + "loss": 0.6648, + "step": 108 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.6702508960573477, + "eval_best_threshold": 0.08056089282035828, + "eval_f1": 0.7634408602150538, + "eval_loss": 0.753210186958313, + "eval_pr_auc": 0.6648919074173523, + "eval_precision": 0.6329457364341086, + "eval_recall": 0.9617196702002355, + "eval_roc_auc": 0.6946051002638364, + "eval_runtime": 3.1237, + "eval_samples_per_second": 982.501, + "eval_steps_per_second": 1.921, + "step": 108 + }, + { + "epoch": 18.166666666666668, + "grad_norm": 493320.5625, + "learning_rate": 0.0009083333333333334, + "loss": 0.776, + "step": 109 + }, + { + "epoch": 18.333333333333332, + "grad_norm": 441418.5625, + "learning_rate": 0.0009166666666666666, + "loss": 0.7381, + "step": 110 + }, + { + "epoch": 18.5, + "grad_norm": 206490.640625, + "learning_rate": 0.000925, + "loss": 0.6299, + "step": 111 + }, + { + "epoch": 18.666666666666668, + "grad_norm": 41816.65625, + "learning_rate": 0.0009333333333333333, + "loss": 0.6002, + "step": 112 + }, + { + "epoch": 18.833333333333332, + "grad_norm": 163746.375, + "learning_rate": 0.0009416666666666667, + "loss": 0.6387, + "step": 113 + }, + { + "epoch": 19.0, + "grad_norm": 222226.390625, + "learning_rate": 0.00095, + "loss": 0.6646, + "step": 114 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.703486477680026, + "eval_best_threshold": 0.7201119661331177, + "eval_f1": 0.7642487046632125, + "eval_loss": 0.657902717590332, + "eval_pr_auc": 0.6878116517095747, + "eval_precision": 0.6822386679000925, + "eval_recall": 0.8686690223792697, + "eval_roc_auc": 0.7094689852651981, + "eval_runtime": 3.1363, + "eval_samples_per_second": 978.534, + "eval_steps_per_second": 1.913, + "step": 114 + }, + { + "epoch": 19.166666666666668, + "grad_norm": 224572.46875, + "learning_rate": 0.0009583333333333334, + "loss": 0.6642, + "step": 115 + }, + { + "epoch": 19.333333333333332, + "grad_norm": 196608.109375, + "learning_rate": 0.0009666666666666667, + "loss": 0.6549, + "step": 116 + }, + { + "epoch": 19.5, + "grad_norm": 117076.5234375, + "learning_rate": 0.000975, + "loss": 0.6154, + "step": 117 + }, + { + "epoch": 19.666666666666668, + "grad_norm": 34586.9453125, + "learning_rate": 0.0009833333333333332, + "loss": 0.5969, + "step": 118 + }, + { + "epoch": 19.833333333333332, + "grad_norm": 125128.671875, + "learning_rate": 0.0009916666666666667, + "loss": 0.6043, + "step": 119 + }, + { + "epoch": 20.0, + "grad_norm": 230583.84375, + "learning_rate": 0.001, + "loss": 0.639, + "step": 120 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.6901270772238515, + "eval_best_threshold": 0.285312294960022, + "eval_f1": 0.7669688801764274, + "eval_loss": 0.6092373132705688, + "eval_pr_auc": 0.6895824261804095, + "eval_precision": 0.6567352077213596, + "eval_recall": 0.9216725559481743, + "eval_roc_auc": 0.7143187291179651, + "eval_runtime": 3.0536, + "eval_samples_per_second": 1005.043, + "eval_steps_per_second": 1.965, + "step": 120 + }, + { + "epoch": 20.166666666666668, + "grad_norm": 177096.625, + "learning_rate": 0.0009999978846027431, + "loss": 0.6194, + "step": 121 + }, + { + "epoch": 20.333333333333332, + "grad_norm": 22809.671875, + "learning_rate": 0.0009999915384288722, + "loss": 0.5856, + "step": 122 + }, + { + "epoch": 20.5, + "grad_norm": 84596.2734375, + "learning_rate": 0.0009999809615320856, + "loss": 0.5983, + "step": 123 + }, + { + "epoch": 20.666666666666668, + "grad_norm": 132173.640625, + "learning_rate": 0.0009999661540018813, + "loss": 0.6117, + "step": 124 + }, + { + "epoch": 20.833333333333332, + "grad_norm": 133689.75, + "learning_rate": 0.0009999471159635538, + "loss": 0.6065, + "step": 125 + }, + { + "epoch": 21.0, + "grad_norm": 14212.5078125, + "learning_rate": 0.0009999238475781956, + "loss": 0.5791, + "step": 126 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.6699250570218312, + "eval_best_threshold": 0.131568044424057, + "eval_f1": 0.7661048256753636, + "eval_loss": 0.6092211604118347, + "eval_pr_auc": 0.671715494700898, + "eval_precision": 0.6300797569312572, + "eval_recall": 0.9770318021201413, + "eval_roc_auc": 0.7061214162798469, + "eval_runtime": 2.9753, + "eval_samples_per_second": 1031.483, + "eval_steps_per_second": 2.017, + "step": 126 + }, + { + "epoch": 21.166666666666668, + "grad_norm": 180716.0, + "learning_rate": 0.0009998963490426942, + "loss": 0.6252, + "step": 127 + }, + { + "epoch": 21.333333333333332, + "grad_norm": 180470.5625, + "learning_rate": 0.000999864620589731, + "loss": 0.6253, + "step": 128 + }, + { + "epoch": 21.5, + "grad_norm": 25203.005859375, + "learning_rate": 0.0009998286624877785, + "loss": 0.583, + "step": 129 + }, + { + "epoch": 21.666666666666668, + "grad_norm": 142969.25, + "learning_rate": 0.0009997884750411004, + "loss": 0.6237, + "step": 130 + }, + { + "epoch": 21.833333333333332, + "grad_norm": 161729.53125, + "learning_rate": 0.0009997440585897456, + "loss": 0.6415, + "step": 131 + }, + { + "epoch": 22.0, + "grad_norm": 105764.96875, + "learning_rate": 0.0009996954135095479, + "loss": 0.6369, + "step": 132 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.7070707070707071, + "eval_best_threshold": 0.6199614405632019, + "eval_f1": 0.7642276422764228, + "eval_loss": 0.6236046552658081, + "eval_pr_auc": 0.6916097943168671, + "eval_precision": 0.6888888888888889, + "eval_recall": 0.8580683156654888, + "eval_roc_auc": 0.707266196383268, + "eval_runtime": 3.0077, + "eval_samples_per_second": 1020.391, + "eval_steps_per_second": 1.995, + "step": 132 + }, + { + "epoch": 22.166666666666668, + "grad_norm": 35118.234375, + "learning_rate": 0.0009996425402121222, + "loss": 0.6214, + "step": 133 + }, + { + "epoch": 22.333333333333332, + "grad_norm": 71758.453125, + "learning_rate": 0.0009995854391448608, + "loss": 0.6363, + "step": 134 + }, + { + "epoch": 22.5, + "grad_norm": 171227.109375, + "learning_rate": 0.000999524110790929, + "loss": 0.6473, + "step": 135 + }, + { + "epoch": 22.666666666666668, + "grad_norm": 192111.375, + "learning_rate": 0.0009994585556692623, + "loss": 0.645, + "step": 136 + }, + { + "epoch": 22.833333333333332, + "grad_norm": 133878.125, + "learning_rate": 0.0009993887743345612, + "loss": 0.6269, + "step": 137 + }, + { + "epoch": 23.0, + "grad_norm": 106632.1953125, + "learning_rate": 0.0009993147673772868, + "loss": 0.6224, + "step": 138 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.7044639947865754, + "eval_best_threshold": 0.5462677478790283, + "eval_f1": 0.7639864689045017, + "eval_loss": 0.5997253060340881, + "eval_pr_auc": 0.6833916293267107, + "eval_precision": 0.6843822843822844, + "eval_recall": 0.8645465253239105, + "eval_roc_auc": 0.709266017685886, + "eval_runtime": 3.0311, + "eval_samples_per_second": 1012.516, + "eval_steps_per_second": 1.98, + "step": 138 + }, + { + "epoch": 23.166666666666668, + "grad_norm": 15841.970703125, + "learning_rate": 0.0009992365354236556, + "loss": 0.6118, + "step": 139 + }, + { + "epoch": 23.333333333333332, + "grad_norm": 19277.970703125, + "learning_rate": 0.0009991540791356342, + "loss": 0.5759, + "step": 140 + }, + { + "epoch": 23.5, + "grad_norm": 255906.0, + "learning_rate": 0.0009990673992109334, + "loss": 0.6571, + "step": 141 + }, + { + "epoch": 23.666666666666668, + "grad_norm": 456702.65625, + "learning_rate": 0.0009989764963830037, + "loss": 0.7798, + "step": 142 + }, + { + "epoch": 23.833333333333332, + "grad_norm": 116625.515625, + "learning_rate": 0.0009988813714210273, + "loss": 0.6315, + "step": 143 + }, + { + "epoch": 24.0, + "grad_norm": 246554.75, + "learning_rate": 0.0009987820251299122, + "loss": 0.6794, + "step": 144 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.6979472140762464, + "eval_best_threshold": 0.7202028036117554, + "eval_f1": 0.7644218551461245, + "eval_loss": 0.7107158899307251, + "eval_pr_auc": 0.6772575213237118, + "eval_precision": 0.6723290120697363, + "eval_recall": 0.8857479387514723, + "eval_roc_auc": 0.7083542744327861, + "eval_runtime": 3.2437, + "eval_samples_per_second": 946.132, + "eval_steps_per_second": 1.85, + "step": 144 + }, + { + "epoch": 24.166666666666668, + "grad_norm": 318765.8125, + "learning_rate": 0.0009986784583502861, + "loss": 0.7435, + "step": 145 + }, + { + "epoch": 24.333333333333332, + "grad_norm": 248666.8125, + "learning_rate": 0.0009985706719584887, + "loss": 0.673, + "step": 146 + }, + { + "epoch": 24.5, + "grad_norm": 124225.7421875, + "learning_rate": 0.000998458666866564, + "loss": 0.605, + "step": 147 + }, + { + "epoch": 24.666666666666668, + "grad_norm": 93734.8203125, + "learning_rate": 0.0009983424440222531, + "loss": 0.6139, + "step": 148 + }, + { + "epoch": 24.833333333333332, + "grad_norm": 167052.6875, + "learning_rate": 0.0009982220044089858, + "loss": 0.6204, + "step": 149 + }, + { + "epoch": 25.0, + "grad_norm": 135724.3125, + "learning_rate": 0.0009980973490458728, + "loss": 0.6234, + "step": 150 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.7021831215379603, + "eval_best_threshold": 0.5155451893806458, + "eval_f1": 0.7701207243460765, + "eval_loss": 0.6027764678001404, + "eval_pr_auc": 0.6704919528444424, + "eval_precision": 0.6720807726075505, + "eval_recall": 0.9016489988221437, + "eval_roc_auc": 0.7086511010937482, + "eval_runtime": 3.2106, + "eval_samples_per_second": 955.91, + "eval_steps_per_second": 1.869, + "step": 150 + }, + { + "epoch": 25.166666666666668, + "grad_norm": 20833.037109375, + "learning_rate": 0.0009979684789876964, + "loss": 0.6018, + "step": 151 + }, + { + "epoch": 25.333333333333332, + "grad_norm": 67574.53125, + "learning_rate": 0.0009978353953249022, + "loss": 0.6104, + "step": 152 + }, + { + "epoch": 25.5, + "grad_norm": 76152.453125, + "learning_rate": 0.0009976980991835895, + "loss": 0.6053, + "step": 153 + }, + { + "epoch": 25.666666666666668, + "grad_norm": 51570.14453125, + "learning_rate": 0.0009975565917255016, + "loss": 0.606, + "step": 154 + }, + { + "epoch": 25.833333333333332, + "grad_norm": 66497.03125, + "learning_rate": 0.0009974108741480166, + "loss": 0.5844, + "step": 155 + }, + { + "epoch": 26.0, + "grad_norm": 30464.404296875, + "learning_rate": 0.0009972609476841367, + "loss": 0.5894, + "step": 156 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.6950146627565983, + "eval_best_threshold": 0.5118791460990906, + "eval_f1": 0.7697983275946877, + "eval_loss": 0.5994439125061035, + "eval_pr_auc": 0.6800822574078459, + "eval_precision": 0.6608952702702703, + "eval_recall": 0.9216725559481743, + "eval_roc_auc": 0.7118126701598568, + "eval_runtime": 3.261, + "eval_samples_per_second": 941.128, + "eval_steps_per_second": 1.84, + "step": 156 + }, + { + "epoch": 26.166666666666668, + "grad_norm": 126097.4765625, + "learning_rate": 0.0009971068136024781, + "loss": 0.6058, + "step": 157 + }, + { + "epoch": 26.333333333333332, + "grad_norm": 124055.9609375, + "learning_rate": 0.00099694847320726, + "loss": 0.5917, + "step": 158 + }, + { + "epoch": 26.5, + "grad_norm": 77981.765625, + "learning_rate": 0.0009967859278382939, + "loss": 0.6046, + "step": 159 + }, + { + "epoch": 26.666666666666668, + "grad_norm": 100627.078125, + "learning_rate": 0.0009966191788709714, + "loss": 0.6002, + "step": 160 + }, + { + "epoch": 26.833333333333332, + "grad_norm": 156980.953125, + "learning_rate": 0.0009964482277162545, + "loss": 0.6138, + "step": 161 + }, + { + "epoch": 27.0, + "grad_norm": 35392.578125, + "learning_rate": 0.0009962730758206612, + "loss": 0.5852, + "step": 162 + }, + { + "epoch": 27.0, + "eval_accuracy": 0.6972955360052134, + "eval_best_threshold": 0.5760672092437744, + "eval_f1": 0.769421692727724, + "eval_loss": 0.6142106652259827, + "eval_pr_auc": 0.6808733029108327, + "eval_precision": 0.6649506649506649, + "eval_recall": 0.9128386336866903, + "eval_roc_auc": 0.7136039395899754, + "eval_runtime": 3.2326, + "eval_samples_per_second": 949.392, + "eval_steps_per_second": 1.856, + "step": 162 + }, + { + "epoch": 27.166666666666668, + "grad_norm": 168890.375, + "learning_rate": 0.0009960937246662545, + "loss": 0.6207, + "step": 163 + }, + { + "epoch": 27.333333333333332, + "grad_norm": 213891.765625, + "learning_rate": 0.0009959101757706307, + "loss": 0.6425, + "step": 164 + }, + { + "epoch": 27.5, + "grad_norm": 175997.421875, + "learning_rate": 0.0009957224306869053, + "loss": 0.6338, + "step": 165 + }, + { + "epoch": 27.666666666666668, + "grad_norm": 158954.6875, + "learning_rate": 0.0009955304910036994, + "loss": 0.6388, + "step": 166 + }, + { + "epoch": 27.833333333333332, + "grad_norm": 18399.896484375, + "learning_rate": 0.000995334358345128, + "loss": 0.5959, + "step": 167 + }, + { + "epoch": 28.0, + "grad_norm": 158990.375, + "learning_rate": 0.0009951340343707852, + "loss": 0.6161, + "step": 168 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.7012056044314109, + "eval_best_threshold": 0.2593577802181244, + "eval_f1": 0.7741935483870968, + "eval_loss": 0.6534614562988281, + "eval_pr_auc": 0.6770361224581476, + "eval_precision": 0.6652560304697418, + "eval_recall": 0.9257950530035336, + "eval_roc_auc": 0.7161636936748859, + "eval_runtime": 3.2488, + "eval_samples_per_second": 944.661, + "eval_steps_per_second": 1.847, + "step": 168 + }, + { + "epoch": 28.166666666666668, + "grad_norm": 273902.53125, + "learning_rate": 0.00099492952077573, + "loss": 0.6574, + "step": 169 + }, + { + "epoch": 28.333333333333332, + "grad_norm": 311438.375, + "learning_rate": 0.0009947208192904722, + "loss": 0.6913, + "step": 170 + }, + { + "epoch": 28.5, + "grad_norm": 273603.5625, + "learning_rate": 0.0009945079316809585, + "loss": 0.6621, + "step": 171 + }, + { + "epoch": 28.666666666666668, + "grad_norm": 171142.90625, + "learning_rate": 0.0009942908597485559, + "loss": 0.6109, + "step": 172 + }, + { + "epoch": 28.833333333333332, + "grad_norm": 21905.29296875, + "learning_rate": 0.000994069605330038, + "loss": 0.5994, + "step": 173 + }, + { + "epoch": 29.0, + "grad_norm": 28439.1640625, + "learning_rate": 0.0009938441702975688, + "loss": 0.577, + "step": 174 + }, + { + "epoch": 29.0, + "eval_accuracy": 0.6982730531117628, + "eval_best_threshold": 0.4422946870326996, + "eval_f1": 0.7714708785784797, + "eval_loss": 0.5790307521820068, + "eval_pr_auc": 0.6856352502299288, + "eval_precision": 0.6639762107051826, + "eval_recall": 0.9204946996466431, + "eval_roc_auc": 0.7194105306023562, + "eval_runtime": 3.1704, + "eval_samples_per_second": 968.024, + "eval_steps_per_second": 1.893, + "step": 174 + }, + { + "epoch": 29.166666666666668, + "grad_norm": 64802.87890625, + "learning_rate": 0.000993614556558687, + "loss": 0.5879, + "step": 175 + }, + { + "epoch": 29.333333333333332, + "grad_norm": 96375.390625, + "learning_rate": 0.0009933807660562897, + "loss": 0.6068, + "step": 176 + }, + { + "epoch": 29.5, + "grad_norm": 114612.1171875, + "learning_rate": 0.0009931428007686157, + "loss": 0.605, + "step": 177 + }, + { + "epoch": 29.666666666666668, + "grad_norm": 76551.109375, + "learning_rate": 0.0009929006627092298, + "loss": 0.6021, + "step": 178 + }, + { + "epoch": 29.833333333333332, + "grad_norm": 81290.6875, + "learning_rate": 0.0009926543539270048, + "loss": 0.5932, + "step": 179 + }, + { + "epoch": 30.0, + "grad_norm": 42667.765625, + "learning_rate": 0.000992403876506104, + "loss": 0.5767, + "step": 180 + }, + { + "epoch": 30.0, + "eval_accuracy": 0.6898012381883349, + "eval_best_threshold": 0.3204585909843445, + "eval_f1": 0.7729007633587787, + "eval_loss": 0.5811671018600464, + "eval_pr_auc": 0.6880019352858036, + "eval_precision": 0.6495589414595028, + "eval_recall": 0.9540636042402827, + "eval_roc_auc": 0.7201586111089633, + "eval_runtime": 3.1797, + "eval_samples_per_second": 965.188, + "eval_steps_per_second": 1.887, + "step": 180 + }, + { + "epoch": 30.166666666666668, + "grad_norm": 75468.046875, + "learning_rate": 0.0009921492325659649, + "loss": 0.592, + "step": 181 + }, + { + "epoch": 30.333333333333332, + "grad_norm": 130032.859375, + "learning_rate": 0.0009918904242612794, + "loss": 0.6024, + "step": 182 + }, + { + "epoch": 30.5, + "grad_norm": 42846.8671875, + "learning_rate": 0.0009916274537819774, + "loss": 0.5912, + "step": 183 + }, + { + "epoch": 30.666666666666668, + "grad_norm": 27902.64453125, + "learning_rate": 0.0009913603233532068, + "loss": 0.5837, + "step": 184 + }, + { + "epoch": 30.833333333333332, + "grad_norm": 64019.296875, + "learning_rate": 0.0009910890352353154, + "loss": 0.5779, + "step": 185 + }, + { + "epoch": 31.0, + "grad_norm": 30909.447265625, + "learning_rate": 0.000990813591723832, + "loss": 0.5862, + "step": 186 + }, + { + "epoch": 31.0, + "eval_accuracy": 0.6966438579341805, + "eval_best_threshold": 0.35055145621299744, + "eval_f1": 0.7737545565006075, + "eval_loss": 0.5751112699508667, + "eval_pr_auc": 0.6866912253137908, + "eval_precision": 0.6586677699627638, + "eval_recall": 0.9375736160188457, + "eval_roc_auc": 0.7222192152951212, + "eval_runtime": 3.1318, + "eval_samples_per_second": 979.942, + "eval_steps_per_second": 1.916, + "step": 186 + }, + { + "epoch": 31.166666666666668, + "grad_norm": 64181.921875, + "learning_rate": 0.0009905339951494464, + "loss": 0.57, + "step": 187 + }, + { + "epoch": 31.333333333333332, + "grad_norm": 125966.375, + "learning_rate": 0.0009902502478779896, + "loss": 0.5932, + "step": 188 + }, + { + "epoch": 31.5, + "grad_norm": 17534.990234375, + "learning_rate": 0.0009899623523104148, + "loss": 0.5786, + "step": 189 + }, + { + "epoch": 31.666666666666668, + "grad_norm": 118685.2109375, + "learning_rate": 0.000989670310882776, + "loss": 0.6049, + "step": 190 + }, + { + "epoch": 31.833333333333332, + "grad_norm": 118189.421875, + "learning_rate": 0.000989374126066207, + "loss": 0.617, + "step": 191 + }, + { + "epoch": 32.0, + "grad_norm": 65363.68359375, + "learning_rate": 0.0009890738003669028, + "loss": 0.6002, + "step": 192 + }, + { + "epoch": 32.0, + "eval_accuracy": 0.7057673509286413, + "eval_best_threshold": 0.5547782182693481, + "eval_f1": 0.7664856477889838, + "eval_loss": 0.5928676724433899, + "eval_pr_auc": 0.6915146830214178, + "eval_precision": 0.6832641770401107, + "eval_recall": 0.872791519434629, + "eval_roc_auc": 0.7202294886763422, + "eval_runtime": 3.1162, + "eval_samples_per_second": 984.839, + "eval_steps_per_second": 1.925, + "step": 192 + }, + { + "epoch": 32.166666666666664, + "grad_norm": 27484.95703125, + "learning_rate": 0.0009887693363260957, + "loss": 0.5969, + "step": 193 + }, + { + "epoch": 32.333333333333336, + "grad_norm": 122337.7109375, + "learning_rate": 0.0009884607365200355, + "loss": 0.5997, + "step": 194 + }, + { + "epoch": 32.5, + "grad_norm": 214062.09375, + "learning_rate": 0.0009881480035599667, + "loss": 0.6365, + "step": 195 + }, + { + "epoch": 32.666666666666664, + "grad_norm": 273055.5625, + "learning_rate": 0.0009878311400921072, + "loss": 0.6572, + "step": 196 + }, + { + "epoch": 32.833333333333336, + "grad_norm": 230877.578125, + "learning_rate": 0.0009875101487976253, + "loss": 0.6328, + "step": 197 + }, + { + "epoch": 33.0, + "grad_norm": 112179.0390625, + "learning_rate": 0.0009871850323926177, + "loss": 0.6035, + "step": 198 + }, + { + "epoch": 33.0, + "eval_accuracy": 0.7106549364613881, + "eval_best_threshold": 0.47324299812316895, + "eval_f1": 0.7764350453172205, + "eval_loss": 0.5827791094779968, + "eval_pr_auc": 0.6930905376712406, + "eval_precision": 0.6781002638522428, + "eval_recall": 0.9081272084805654, + "eval_roc_auc": 0.725550890522939, + "eval_runtime": 3.3007, + "eval_samples_per_second": 929.815, + "eval_steps_per_second": 1.818, + "step": 198 + }, + { + "epoch": 33.166666666666664, + "grad_norm": 21957.048828125, + "learning_rate": 0.0009868557936280856, + "loss": 0.5878, + "step": 199 + }, + { + "epoch": 33.333333333333336, + "grad_norm": 58557.421875, + "learning_rate": 0.0009865224352899118, + "loss": 0.578, + "step": 200 + }, + { + "epoch": 33.5, + "grad_norm": 114058.5078125, + "learning_rate": 0.0009861849601988384, + "loss": 0.5892, + "step": 201 + }, + { + "epoch": 33.666666666666664, + "grad_norm": 107046.25, + "learning_rate": 0.0009858433712104401, + "loss": 0.5904, + "step": 202 + }, + { + "epoch": 33.833333333333336, + "grad_norm": 23158.916015625, + "learning_rate": 0.0009854976712151031, + "loss": 0.5933, + "step": 203 + }, + { + "epoch": 34.0, + "grad_norm": 66935.171875, + "learning_rate": 0.0009851478631379982, + "loss": 0.5734, + "step": 204 + }, + { + "epoch": 34.0, + "eval_accuracy": 0.7103290974258716, + "eval_best_threshold": 0.4419545829296112, + "eval_f1": 0.7754483455418035, + "eval_loss": 0.5724775791168213, + "eval_pr_auc": 0.6953394030992055, + "eval_precision": 0.6789031402034498, + "eval_recall": 0.9040047114252061, + "eval_roc_auc": 0.7296596416258367, + "eval_runtime": 3.4261, + "eval_samples_per_second": 895.774, + "eval_steps_per_second": 1.751, + "step": 204 + }, + { + "epoch": 34.166666666666664, + "grad_norm": 45089.19921875, + "learning_rate": 0.000984793949939058, + "loss": 0.584, + "step": 205 + }, + { + "epoch": 34.333333333333336, + "grad_norm": 26124.52734375, + "learning_rate": 0.0009844359346129503, + "loss": 0.5852, + "step": 206 + }, + { + "epoch": 34.5, + "grad_norm": 71812.40625, + "learning_rate": 0.0009840738201890538, + "loss": 0.5733, + "step": 207 + }, + { + "epoch": 34.666666666666664, + "grad_norm": 78225.8125, + "learning_rate": 0.000983707609731432, + "loss": 0.5812, + "step": 208 + }, + { + "epoch": 34.833333333333336, + "grad_norm": 52104.34765625, + "learning_rate": 0.0009833373063388071, + "loss": 0.5843, + "step": 209 + }, + { + "epoch": 35.0, + "grad_norm": 59323.8984375, + "learning_rate": 0.0009829629131445341, + "loss": 0.5729, + "step": 210 + }, + { + "epoch": 35.0, + "eval_accuracy": 0.7103290974258716, + "eval_best_threshold": 0.5349152684211731, + "eval_f1": 0.7753348496335608, + "eval_loss": 0.5791059732437134, + "eval_pr_auc": 0.697594188551865, + "eval_precision": 0.6790615316511731, + "eval_recall": 0.9034157832744405, + "eval_roc_auc": 0.7347727063804417, + "eval_runtime": 3.3061, + "eval_samples_per_second": 928.288, + "eval_steps_per_second": 1.815, + "step": 210 + }, + { + "epoch": 35.166666666666664, + "grad_norm": 68259.4609375, + "learning_rate": 0.0009825844333165748, + "loss": 0.5711, + "step": 211 + }, + { + "epoch": 35.333333333333336, + "grad_norm": 9691.2099609375, + "learning_rate": 0.0009822018700574695, + "loss": 0.5765, + "step": 212 + }, + { + "epoch": 35.5, + "grad_norm": 84118.2265625, + "learning_rate": 0.0009818152266043115, + "loss": 0.5978, + "step": 213 + }, + { + "epoch": 35.666666666666664, + "grad_norm": 39651.7265625, + "learning_rate": 0.0009814245062287187, + "loss": 0.5697, + "step": 214 + }, + { + "epoch": 35.833333333333336, + "grad_norm": 131176.8125, + "learning_rate": 0.0009810297122368067, + "loss": 0.5914, + "step": 215 + }, + { + "epoch": 36.0, + "grad_norm": 187397.1875, + "learning_rate": 0.0009806308479691594, + "loss": 0.6202, + "step": 216 + }, + { + "epoch": 36.0, + "eval_accuracy": 0.704138155751059, + "eval_best_threshold": 0.6704061031341553, + "eval_f1": 0.7657378740970072, + "eval_loss": 0.612671434879303, + "eval_pr_auc": 0.7066488157065188, + "eval_precision": 0.6813590449954087, + "eval_recall": 0.8739693757361602, + "eval_roc_auc": 0.7290279721541368, + "eval_runtime": 3.3102, + "eval_samples_per_second": 927.121, + "eval_steps_per_second": 1.813, + "step": 216 + }, + { + "epoch": 36.166666666666664, + "grad_norm": 144801.84375, + "learning_rate": 0.0009802279168008028, + "loss": 0.6077, + "step": 217 + }, + { + "epoch": 36.333333333333336, + "grad_norm": 70672.8828125, + "learning_rate": 0.0009798209221411748, + "loss": 0.5993, + "step": 218 + }, + { + "epoch": 36.5, + "grad_norm": 117014.921875, + "learning_rate": 0.0009794098674340967, + "loss": 0.5838, + "step": 219 + }, + { + "epoch": 36.666666666666664, + "grad_norm": 179098.9375, + "learning_rate": 0.0009789947561577445, + "loss": 0.5913, + "step": 220 + }, + { + "epoch": 36.833333333333336, + "grad_norm": 133181.796875, + "learning_rate": 0.0009785755918246195, + "loss": 0.5817, + "step": 221 + }, + { + "epoch": 37.0, + "grad_norm": 16482.361328125, + "learning_rate": 0.0009781523779815178, + "loss": 0.5722, + "step": 222 + }, + { + "epoch": 37.0, + "eval_accuracy": 0.7044639947865754, + "eval_best_threshold": 0.47125041484832764, + "eval_f1": 0.7745463584389759, + "eval_loss": 0.5698202252388, + "eval_pr_auc": 0.7141467386682018, + "eval_precision": 0.6701075268817205, + "eval_recall": 0.917550058892815, + "eval_roc_auc": 0.7431517235276582, + "eval_runtime": 3.3671, + "eval_samples_per_second": 911.473, + "eval_steps_per_second": 1.782, + "step": 222 + }, + { + "epoch": 37.166666666666664, + "grad_norm": 28902.521484375, + "learning_rate": 0.000977725118209501, + "loss": 0.5777, + "step": 223 + }, + { + "epoch": 37.333333333333336, + "grad_norm": 24806.671875, + "learning_rate": 0.000977293816123866, + "loss": 0.5778, + "step": 224 + }, + { + "epoch": 37.5, + "grad_norm": 65626.3515625, + "learning_rate": 0.0009768584753741135, + "loss": 0.5654, + "step": 225 + }, + { + "epoch": 37.666666666666664, + "grad_norm": 87890.7265625, + "learning_rate": 0.0009764190996439181, + "loss": 0.5841, + "step": 226 + }, + { + "epoch": 37.833333333333336, + "grad_norm": 78071.140625, + "learning_rate": 0.0009759756926510965, + "loss": 0.5676, + "step": 227 + }, + { + "epoch": 38.0, + "grad_norm": 43882.6015625, + "learning_rate": 0.0009755282581475768, + "loss": 0.5653, + "step": 228 + }, + { + "epoch": 38.0, + "eval_accuracy": 0.6937113066145324, + "eval_best_threshold": 0.318327933549881, + "eval_f1": 0.7702834799608993, + "eval_loss": 0.579859733581543, + "eval_pr_auc": 0.7055837739628394, + "eval_precision": 0.658312447786132, + "eval_recall": 0.928150765606596, + "eval_roc_auc": 0.7329030420651919, + "eval_runtime": 3.3237, + "eval_samples_per_second": 923.359, + "eval_steps_per_second": 1.805, + "step": 228 + }, + { + "epoch": 38.166666666666664, + "grad_norm": 60592.203125, + "learning_rate": 0.0009750767999193656, + "loss": 0.5774, + "step": 229 + }, + { + "epoch": 38.333333333333336, + "grad_norm": 68110.4375, + "learning_rate": 0.000974621321786517, + "loss": 0.5741, + "step": 230 + }, + { + "epoch": 38.5, + "grad_norm": 45131.796875, + "learning_rate": 0.0009741618276030996, + "loss": 0.5557, + "step": 231 + }, + { + "epoch": 38.666666666666664, + "grad_norm": 37979.95703125, + "learning_rate": 0.0009736983212571646, + "loss": 0.5668, + "step": 232 + }, + { + "epoch": 38.833333333333336, + "grad_norm": 72676.8046875, + "learning_rate": 0.0009732308066707122, + "loss": 0.5715, + "step": 233 + }, + { + "epoch": 39.0, + "grad_norm": 110384.1328125, + "learning_rate": 0.0009727592877996585, + "loss": 0.5794, + "step": 234 + }, + { + "epoch": 39.0, + "eval_accuracy": 0.7194525904203324, + "eval_best_threshold": 0.49165481328964233, + "eval_f1": 0.7757228444907528, + "eval_loss": 0.5648224949836731, + "eval_pr_auc": 0.7175625146397417, + "eval_precision": 0.6954694068192433, + "eval_recall": 0.8769140164899882, + "eval_roc_auc": 0.7463987752356358, + "eval_runtime": 3.3315, + "eval_samples_per_second": 921.213, + "eval_steps_per_second": 1.801, + "step": 234 + }, + { + "epoch": 39.166666666666664, + "grad_norm": 33855.453125, + "learning_rate": 0.0009722837686338024, + "loss": 0.5784, + "step": 235 + }, + { + "epoch": 39.333333333333336, + "grad_norm": 82675.9375, + "learning_rate": 0.0009718042531967918, + "loss": 0.5696, + "step": 236 + }, + { + "epoch": 39.5, + "grad_norm": 103150.46875, + "learning_rate": 0.0009713207455460893, + "loss": 0.5773, + "step": 237 + }, + { + "epoch": 39.666666666666664, + "grad_norm": 67929.5625, + "learning_rate": 0.0009708332497729377, + "loss": 0.5606, + "step": 238 + }, + { + "epoch": 39.833333333333336, + "grad_norm": 111831.5390625, + "learning_rate": 0.000970341770002326, + "loss": 0.5724, + "step": 239 + }, + { + "epoch": 40.0, + "grad_norm": 159352.671875, + "learning_rate": 0.0009698463103929542, + "loss": 0.5975, + "step": 240 + }, + { + "epoch": 40.0, + "eval_accuracy": 0.7080482241772564, + "eval_best_threshold": 0.3221179246902466, + "eval_f1": 0.7775571002979146, + "eval_loss": 0.5620133876800537, + "eval_pr_auc": 0.7292111688740706, + "eval_precision": 0.6721030042918454, + "eval_recall": 0.9222614840989399, + "eval_roc_auc": 0.7542481436520763, + "eval_runtime": 3.3798, + "eval_samples_per_second": 908.043, + "eval_steps_per_second": 1.775, + "step": 240 + }, + { + "epoch": 40.166666666666664, + "grad_norm": 64492.37890625, + "learning_rate": 0.0009693468751371977, + "loss": 0.5548, + "step": 241 + }, + { + "epoch": 40.333333333333336, + "grad_norm": 53474.79296875, + "learning_rate": 0.0009688434684610725, + "loss": 0.5691, + "step": 242 + }, + { + "epoch": 40.5, + "grad_norm": 55020.44921875, + "learning_rate": 0.0009683360946241988, + "loss": 0.5637, + "step": 243 + }, + { + "epoch": 40.666666666666664, + "grad_norm": 32489.10546875, + "learning_rate": 0.0009678247579197657, + "loss": 0.5637, + "step": 244 + }, + { + "epoch": 40.833333333333336, + "grad_norm": 170300.09375, + "learning_rate": 0.0009673094626744943, + "loss": 0.5844, + "step": 245 + }, + { + "epoch": 41.0, + "grad_norm": 191009.390625, + "learning_rate": 0.0009667902132486009, + "loss": 0.6063, + "step": 246 + }, + { + "epoch": 41.0, + "eval_accuracy": 0.7204301075268817, + "eval_best_threshold": 0.370286226272583, + "eval_f1": 0.7804503582395087, + "eval_loss": 0.5586404204368591, + "eval_pr_auc": 0.7353634379200227, + "eval_precision": 0.6900452488687783, + "eval_recall": 0.8981154299175501, + "eval_roc_auc": 0.760805822098165, + "eval_runtime": 3.1116, + "eval_samples_per_second": 986.318, + "eval_steps_per_second": 1.928, + "step": 246 + }, + { + "epoch": 41.166666666666664, + "grad_norm": 94776.4140625, + "learning_rate": 0.000966267014035761, + "loss": 0.5513, + "step": 247 + }, + { + "epoch": 41.333333333333336, + "grad_norm": 90176.515625, + "learning_rate": 0.0009657398694630713, + "loss": 0.5744, + "step": 248 + }, + { + "epoch": 41.5, + "grad_norm": 115559.1484375, + "learning_rate": 0.0009652087839910124, + "loss": 0.5936, + "step": 249 + }, + { + "epoch": 41.666666666666664, + "grad_norm": 89386.3125, + "learning_rate": 0.0009646737621134112, + "loss": 0.5794, + "step": 250 + }, + { + "epoch": 41.833333333333336, + "grad_norm": 16934.8046875, + "learning_rate": 0.0009641348083574033, + "loss": 0.5606, + "step": 251 + }, + { + "epoch": 42.0, + "grad_norm": 160944.21875, + "learning_rate": 0.0009635919272833937, + "loss": 0.5878, + "step": 252 + }, + { + "epoch": 42.0, + "eval_accuracy": 0.6917562724014337, + "eval_best_threshold": 0.2776191234588623, + "eval_f1": 0.7723772858517806, + "eval_loss": 0.5870246291160583, + "eval_pr_auc": 0.7297108243263718, + "eval_precision": 0.6529698942229455, + "eval_recall": 0.9452296819787986, + "eval_roc_auc": 0.7465766134956043, + "eval_runtime": 3.1963, + "eval_samples_per_second": 960.178, + "eval_steps_per_second": 1.877, + "step": 252 + }, + { + "epoch": 42.166666666666664, + "grad_norm": 165402.4375, + "learning_rate": 0.0009630451234850189, + "loss": 0.5937, + "step": 253 + }, + { + "epoch": 42.333333333333336, + "grad_norm": 66799.2421875, + "learning_rate": 0.000962494401589108, + "loss": 0.5639, + "step": 254 + }, + { + "epoch": 42.5, + "grad_norm": 130646.4921875, + "learning_rate": 0.0009619397662556434, + "loss": 0.5808, + "step": 255 + }, + { + "epoch": 42.666666666666664, + "grad_norm": 183843.8125, + "learning_rate": 0.0009613812221777212, + "loss": 0.6015, + "step": 256 + }, + { + "epoch": 42.833333333333336, + "grad_norm": 191116.703125, + "learning_rate": 0.000960818774081512, + "loss": 0.6168, + "step": 257 + }, + { + "epoch": 43.0, + "grad_norm": 149275.234375, + "learning_rate": 0.0009602524267262203, + "loss": 0.5984, + "step": 258 + }, + { + "epoch": 43.0, + "eval_accuracy": 0.7129358097100033, + "eval_best_threshold": 0.5024541616439819, + "eval_f1": 0.7796949237309327, + "eval_loss": 0.5609505772590637, + "eval_pr_auc": 0.7453360679888218, + "eval_precision": 0.6775315080399826, + "eval_recall": 0.9181389870435807, + "eval_roc_auc": 0.7664687249512233, + "eval_runtime": 3.0926, + "eval_samples_per_second": 992.366, + "eval_steps_per_second": 1.94, + "step": 258 + }, + { + "epoch": 43.166666666666664, + "grad_norm": 43646.16015625, + "learning_rate": 0.0009596821849040446, + "loss": 0.5642, + "step": 259 + }, + { + "epoch": 43.333333333333336, + "grad_norm": 160944.84375, + "learning_rate": 0.000959108053440137, + "loss": 0.571, + "step": 260 + }, + { + "epoch": 43.5, + "grad_norm": 258851.640625, + "learning_rate": 0.000958530037192562, + "loss": 0.6145, + "step": 261 + }, + { + "epoch": 43.666666666666664, + "grad_norm": 305888.8125, + "learning_rate": 0.0009579481410522556, + "loss": 0.6399, + "step": 262 + }, + { + "epoch": 43.833333333333336, + "grad_norm": 204104.984375, + "learning_rate": 0.0009573623699429836, + "loss": 0.5947, + "step": 263 + }, + { + "epoch": 44.0, + "grad_norm": 62314.6171875, + "learning_rate": 0.0009567727288213005, + "loss": 0.5938, + "step": 264 + }, + { + "epoch": 44.0, + "eval_accuracy": 0.7197784294558488, + "eval_best_threshold": 0.6008195877075195, + "eval_f1": 0.7718832891246684, + "eval_loss": 0.5699706077575684, + "eval_pr_auc": 0.7407351609366284, + "eval_precision": 0.7022200772200772, + "eval_recall": 0.8568904593639576, + "eval_roc_auc": 0.760154607600309, + "eval_runtime": 3.2186, + "eval_samples_per_second": 953.521, + "eval_steps_per_second": 1.864, + "step": 264 + }, + { + "epoch": 44.166666666666664, + "grad_norm": 50153.59765625, + "learning_rate": 0.0009561792226765072, + "loss": 0.5787, + "step": 265 + }, + { + "epoch": 44.333333333333336, + "grad_norm": 68213.96875, + "learning_rate": 0.0009555818565306084, + "loss": 0.5667, + "step": 266 + }, + { + "epoch": 44.5, + "grad_norm": 25824.265625, + "learning_rate": 0.0009549806354382716, + "loss": 0.5499, + "step": 267 + }, + { + "epoch": 44.666666666666664, + "grad_norm": 157802.890625, + "learning_rate": 0.0009543755644867822, + "loss": 0.5828, + "step": 268 + }, + { + "epoch": 44.833333333333336, + "grad_norm": 143645.234375, + "learning_rate": 0.0009537666487960018, + "loss": 0.5936, + "step": 269 + }, + { + "epoch": 45.0, + "grad_norm": 43802.171875, + "learning_rate": 0.0009531538935183251, + "loss": 0.5778, + "step": 270 + }, + { + "epoch": 45.0, + "eval_accuracy": 0.7217334636689475, + "eval_best_threshold": 0.587894856929779, + "eval_f1": 0.7795560144553433, + "eval_loss": 0.5726057887077332, + "eval_pr_auc": 0.7474146922079, + "eval_precision": 0.6939338235294118, + "eval_recall": 0.889281507656066, + "eval_roc_auc": 0.7676195189088462, + "eval_runtime": 3.1802, + "eval_samples_per_second": 965.029, + "eval_steps_per_second": 1.887, + "step": 270 + }, + { + "epoch": 45.166666666666664, + "grad_norm": 97220.7578125, + "learning_rate": 0.0009525373038386351, + "loss": 0.5589, + "step": 271 + }, + { + "epoch": 45.333333333333336, + "grad_norm": 141003.765625, + "learning_rate": 0.0009519168849742603, + "loss": 0.5977, + "step": 272 + }, + { + "epoch": 45.5, + "grad_norm": 139000.796875, + "learning_rate": 0.0009512926421749304, + "loss": 0.6014, + "step": 273 + }, + { + "epoch": 45.666666666666664, + "grad_norm": 118896.6171875, + "learning_rate": 0.0009506645807227311, + "loss": 0.5868, + "step": 274 + }, + { + "epoch": 45.833333333333336, + "grad_norm": 49140.4765625, + "learning_rate": 0.0009500327059320606, + "loss": 0.5658, + "step": 275 + }, + { + "epoch": 46.0, + "grad_norm": 99207.765625, + "learning_rate": 0.0009493970231495835, + "loss": 0.5769, + "step": 276 + }, + { + "epoch": 46.0, + "eval_accuracy": 0.7064190289996741, + "eval_best_threshold": 0.2906385064125061, + "eval_f1": 0.7784607819031227, + "eval_loss": 0.5690336227416992, + "eval_pr_auc": 0.7417411845176535, + "eval_precision": 0.6682144364710848, + "eval_recall": 0.9322732626619552, + "eval_roc_auc": 0.7648114785576027, + "eval_runtime": 3.1389, + "eval_samples_per_second": 977.733, + "eval_steps_per_second": 1.912, + "step": 276 + }, + { + "epoch": 46.166666666666664, + "grad_norm": 127385.4296875, + "learning_rate": 0.0009487575377541864, + "loss": 0.5773, + "step": 277 + }, + { + "epoch": 46.333333333333336, + "grad_norm": 54752.7265625, + "learning_rate": 0.0009481142551569317, + "loss": 0.5695, + "step": 278 + }, + { + "epoch": 46.5, + "grad_norm": 55140.91015625, + "learning_rate": 0.0009474671808010125, + "loss": 0.5552, + "step": 279 + }, + { + "epoch": 46.666666666666664, + "grad_norm": 34584.78515625, + "learning_rate": 0.0009468163201617061, + "loss": 0.5581, + "step": 280 + }, + { + "epoch": 46.833333333333336, + "grad_norm": 62221.57421875, + "learning_rate": 0.0009461616787463277, + "loss": 0.555, + "step": 281 + }, + { + "epoch": 47.0, + "grad_norm": 38826.92578125, + "learning_rate": 0.0009455032620941839, + "loss": 0.5488, + "step": 282 + }, + { + "epoch": 47.0, + "eval_accuracy": 0.7073965461062235, + "eval_best_threshold": 0.489240437746048, + "eval_f1": 0.7765057242409159, + "eval_loss": 0.5562921166419983, + "eval_pr_auc": 0.758716026312086, + "eval_precision": 0.6724137931034483, + "eval_recall": 0.9187279151943463, + "eval_roc_auc": 0.7723453344089541, + "eval_runtime": 3.1347, + "eval_samples_per_second": 979.053, + "eval_steps_per_second": 1.914, + "step": 282 + }, + { + "epoch": 47.166666666666664, + "grad_norm": 73640.1484375, + "learning_rate": 0.0009448410757765259, + "loss": 0.5463, + "step": 283 + }, + { + "epoch": 47.333333333333336, + "grad_norm": 91469.265625, + "learning_rate": 0.0009441751253965021, + "loss": 0.5645, + "step": 284 + }, + { + "epoch": 47.5, + "grad_norm": 42532.26171875, + "learning_rate": 0.0009435054165891108, + "loss": 0.5533, + "step": 285 + }, + { + "epoch": 47.666666666666664, + "grad_norm": 124230.6328125, + "learning_rate": 0.0009428319550211531, + "loss": 0.5645, + "step": 286 + }, + { + "epoch": 47.833333333333336, + "grad_norm": 112197.359375, + "learning_rate": 0.0009421547463911835, + "loss": 0.5687, + "step": 287 + }, + { + "epoch": 48.0, + "grad_norm": 22061.705078125, + "learning_rate": 0.0009414737964294635, + "loss": 0.5414, + "step": 288 + }, + { + "epoch": 48.0, + "eval_accuracy": 0.7174975562072337, + "eval_best_threshold": 0.4483521580696106, + "eval_f1": 0.782543265613243, + "eval_loss": 0.5482359528541565, + "eval_pr_auc": 0.7655134822312841, + "eval_precision": 0.6815203145478375, + "eval_recall": 0.9187279151943463, + "eval_roc_auc": 0.7805329821242479, + "eval_runtime": 3.1627, + "eval_samples_per_second": 970.359, + "eval_steps_per_second": 1.897, + "step": 288 + }, + { + "epoch": 48.166666666666664, + "grad_norm": 65311.2890625, + "learning_rate": 0.0009407891108979117, + "loss": 0.5508, + "step": 289 + }, + { + "epoch": 48.333333333333336, + "grad_norm": 15003.859375, + "learning_rate": 0.0009401006955900555, + "loss": 0.5266, + "step": 290 + }, + { + "epoch": 48.5, + "grad_norm": 134944.453125, + "learning_rate": 0.0009394085563309827, + "loss": 0.5614, + "step": 291 + }, + { + "epoch": 48.666666666666664, + "grad_norm": 112159.2421875, + "learning_rate": 0.0009387126989772909, + "loss": 0.5566, + "step": 292 + }, + { + "epoch": 48.833333333333336, + "grad_norm": 104069.140625, + "learning_rate": 0.0009380131294170393, + "loss": 0.5535, + "step": 293 + }, + { + "epoch": 49.0, + "grad_norm": 139390.40625, + "learning_rate": 0.0009373098535696979, + "loss": 0.5785, + "step": 294 + }, + { + "epoch": 49.0, + "eval_accuracy": 0.7165200391006843, + "eval_best_threshold": 0.40806540846824646, + "eval_f1": 0.7819548872180451, + "eval_loss": 0.5490941405296326, + "eval_pr_auc": 0.7576025952229788, + "eval_precision": 0.680628272251309, + "eval_recall": 0.9187279151943463, + "eval_roc_auc": 0.7739138764530975, + "eval_runtime": 3.1794, + "eval_samples_per_second": 965.263, + "eval_steps_per_second": 1.887, + "step": 294 + }, + { + "epoch": 49.166666666666664, + "grad_norm": 26307.974609375, + "learning_rate": 0.0009366028773860979, + "loss": 0.5608, + "step": 295 + }, + { + "epoch": 49.333333333333336, + "grad_norm": 60580.23828125, + "learning_rate": 0.0009358922068483812, + "loss": 0.5353, + "step": 296 + }, + { + "epoch": 49.5, + "grad_norm": 55138.63671875, + "learning_rate": 0.0009351778479699498, + "loss": 0.543, + "step": 297 + }, + { + "epoch": 49.666666666666664, + "grad_norm": 41980.36328125, + "learning_rate": 0.0009344598067954151, + "loss": 0.5526, + "step": 298 + }, + { + "epoch": 49.833333333333336, + "grad_norm": 28825.138671875, + "learning_rate": 0.0009337380894005462, + "loss": 0.5639, + "step": 299 + }, + { + "epoch": 50.0, + "grad_norm": 36231.70703125, + "learning_rate": 0.0009330127018922195, + "loss": 0.5266, + "step": 300 + }, + { + "epoch": 50.0, + "eval_accuracy": 0.7347670250896058, + "eval_best_threshold": 0.47259435057640076, + "eval_f1": 0.7824692677712454, + "eval_loss": 0.5478315949440002, + "eval_pr_auc": 0.7759952567944263, + "eval_precision": 0.7162426614481409, + "eval_recall": 0.8621908127208481, + "eval_roc_auc": 0.7818392771690897, + "eval_runtime": 3.0541, + "eval_samples_per_second": 1004.894, + "eval_steps_per_second": 1.965, + "step": 300 + }, + { + "epoch": 50.166666666666664, + "grad_norm": 90094.9375, + "learning_rate": 0.0009322836504083654, + "loss": 0.5464, + "step": 301 + }, + { + "epoch": 50.333333333333336, + "grad_norm": 46082.94140625, + "learning_rate": 0.0009315509411179181, + "loss": 0.5416, + "step": 302 + }, + { + "epoch": 50.5, + "grad_norm": 34948.2890625, + "learning_rate": 0.0009308145802207629, + "loss": 0.5505, + "step": 303 + }, + { + "epoch": 50.666666666666664, + "grad_norm": 35781.1640625, + "learning_rate": 0.0009300745739476828, + "loss": 0.5344, + "step": 304 + }, + { + "epoch": 50.833333333333336, + "grad_norm": 35878.0703125, + "learning_rate": 0.0009293309285603067, + "loss": 0.5303, + "step": 305 + }, + { + "epoch": 51.0, + "grad_norm": 172840.984375, + "learning_rate": 0.0009285836503510562, + "loss": 0.5748, + "step": 306 + }, + { + "epoch": 51.0, + "eval_accuracy": 0.718475073313783, + "eval_best_threshold": 0.49718165397644043, + "eval_f1": 0.7808219178082192, + "eval_loss": 0.537997841835022, + "eval_pr_auc": 0.7953585398041019, + "eval_precision": 0.6858288770053476, + "eval_recall": 0.9063604240282686, + "eval_roc_auc": 0.7944567728455583, + "eval_runtime": 3.0929, + "eval_samples_per_second": 992.283, + "eval_steps_per_second": 1.94, + "step": 306 + }, + { + "epoch": 51.166666666666664, + "grad_norm": 57544.5859375, + "learning_rate": 0.0009278327456430926, + "loss": 0.5362, + "step": 307 + }, + { + "epoch": 51.333333333333336, + "grad_norm": 315130.65625, + "learning_rate": 0.0009270782207902628, + "loss": 0.5983, + "step": 308 + }, + { + "epoch": 51.5, + "grad_norm": 210392.46875, + "learning_rate": 0.0009263200821770461, + "loss": 0.5589, + "step": 309 + }, + { + "epoch": 51.666666666666664, + "grad_norm": 60350.46484375, + "learning_rate": 0.0009255583362184998, + "loss": 0.5427, + "step": 310 + }, + { + "epoch": 51.833333333333336, + "grad_norm": 47631.484375, + "learning_rate": 0.0009247929893602054, + "loss": 0.5454, + "step": 311 + }, + { + "epoch": 52.0, + "grad_norm": 87656.984375, + "learning_rate": 0.0009240240480782129, + "loss": 0.543, + "step": 312 + }, + { + "epoch": 52.0, + "eval_accuracy": 0.7344411860540893, + "eval_best_threshold": 0.44728729128837585, + "eval_f1": 0.7871506920867067, + "eval_loss": 0.5385949015617371, + "eval_pr_auc": 0.7933372180577872, + "eval_precision": 0.7071797278273111, + "eval_recall": 0.8875147232037691, + "eval_roc_auc": 0.7921294112694474, + "eval_runtime": 3.3335, + "eval_samples_per_second": 920.652, + "eval_steps_per_second": 1.8, + "step": 312 + }, + { + "epoch": 52.166666666666664, + "grad_norm": 114437.171875, + "learning_rate": 0.0009232515188789881, + "loss": 0.5307, + "step": 313 + }, + { + "epoch": 52.333333333333336, + "grad_norm": 104620.28125, + "learning_rate": 0.0009224754082993551, + "loss": 0.5373, + "step": 314 + }, + { + "epoch": 52.5, + "grad_norm": 112161.15625, + "learning_rate": 0.0009216957229064429, + "loss": 0.5456, + "step": 315 + }, + { + "epoch": 52.666666666666664, + "grad_norm": 19849.81640625, + "learning_rate": 0.0009209124692976287, + "loss": 0.5372, + "step": 316 + }, + { + "epoch": 52.833333333333336, + "grad_norm": 77253.65625, + "learning_rate": 0.0009201256541004829, + "loss": 0.533, + "step": 317 + }, + { + "epoch": 53.0, + "grad_norm": 85743.8828125, + "learning_rate": 0.0009193352839727121, + "loss": 0.552, + "step": 318 + }, + { + "epoch": 53.0, + "eval_accuracy": 0.7344411860540893, + "eval_best_threshold": 0.3785835802555084, + "eval_f1": 0.788586251621271, + "eval_loss": 0.523644208908081, + "eval_pr_auc": 0.8115773421762836, + "eval_precision": 0.7046824292999536, + "eval_recall": 0.8951707891637221, + "eval_roc_auc": 0.8043590133498972, + "eval_runtime": 3.1206, + "eval_samples_per_second": 983.45, + "eval_steps_per_second": 1.923, + "step": 318 + }, + { + "epoch": 53.166666666666664, + "grad_norm": 61792.125, + "learning_rate": 0.0009185413656021035, + "loss": 0.5262, + "step": 319 + }, + { + "epoch": 53.333333333333336, + "grad_norm": 78983.8359375, + "learning_rate": 0.0009177439057064682, + "loss": 0.5294, + "step": 320 + }, + { + "epoch": 53.5, + "grad_norm": 161168.796875, + "learning_rate": 0.0009169429110335841, + "loss": 0.5393, + "step": 321 + }, + { + "epoch": 53.666666666666664, + "grad_norm": 30353.265625, + "learning_rate": 0.000916138388361139, + "loss": 0.5081, + "step": 322 + }, + { + "epoch": 53.833333333333336, + "grad_norm": 146950.171875, + "learning_rate": 0.0009153303444966727, + "loss": 0.5313, + "step": 323 + }, + { + "epoch": 54.0, + "grad_norm": 53159.03515625, + "learning_rate": 0.0009145187862775209, + "loss": 0.5135, + "step": 324 + }, + { + "epoch": 54.0, + "eval_accuracy": 0.7416096448354513, + "eval_best_threshold": 0.44953474402427673, + "eval_f1": 0.7889273356401384, + "eval_loss": 0.5188906192779541, + "eval_pr_auc": 0.8162425693807065, + "eval_precision": 0.7197668771248179, + "eval_recall": 0.872791519434629, + "eval_roc_auc": 0.8099746644913697, + "eval_runtime": 3.3884, + "eval_samples_per_second": 905.737, + "eval_steps_per_second": 1.771, + "step": 324 + }, + { + "epoch": 54.166666666666664, + "grad_norm": 39070.1484375, + "learning_rate": 0.0009137037205707553, + "loss": 0.5175, + "step": 325 + }, + { + "epoch": 54.333333333333336, + "grad_norm": 245568.0, + "learning_rate": 0.0009128851542731271, + "loss": 0.5452, + "step": 326 + }, + { + "epoch": 54.5, + "grad_norm": 52586.328125, + "learning_rate": 0.0009120630943110077, + "loss": 0.5086, + "step": 327 + }, + { + "epoch": 54.666666666666664, + "grad_norm": 335065.6875, + "learning_rate": 0.0009112375476403312, + "loss": 0.544, + "step": 328 + }, + { + "epoch": 54.833333333333336, + "grad_norm": 30743.716796875, + "learning_rate": 0.0009104085212465336, + "loss": 0.5215, + "step": 329 + }, + { + "epoch": 55.0, + "grad_norm": 85467.7734375, + "learning_rate": 0.0009095760221444959, + "loss": 0.5252, + "step": 330 + }, + { + "epoch": 55.0, + "eval_accuracy": 0.7403062886933854, + "eval_best_threshold": 0.5788118839263916, + "eval_f1": 0.7887622581500132, + "eval_loss": 0.5344216823577881, + "eval_pr_auc": 0.8178882682425423, + "eval_precision": 0.7171084337349397, + "eval_recall": 0.8763250883392226, + "eval_roc_auc": 0.8111254584489925, + "eval_runtime": 3.4301, + "eval_samples_per_second": 894.717, + "eval_steps_per_second": 1.749, + "step": 330 + }, + { + "epoch": 55.166666666666664, + "grad_norm": 142337.96875, + "learning_rate": 0.0009087400573784833, + "loss": 0.5131, + "step": 331 + }, + { + "epoch": 55.333333333333336, + "grad_norm": 36073.97265625, + "learning_rate": 0.0009079006340220861, + "loss": 0.5315, + "step": 332 + }, + { + "epoch": 55.5, + "grad_norm": 161002.625, + "learning_rate": 0.0009070577591781596, + "loss": 0.5176, + "step": 333 + }, + { + "epoch": 55.666666666666664, + "grad_norm": 71573.75, + "learning_rate": 0.0009062114399787647, + "loss": 0.5202, + "step": 334 + }, + { + "epoch": 55.833333333333336, + "grad_norm": 163313.109375, + "learning_rate": 0.0009053616835851062, + "loss": 0.5409, + "step": 335 + }, + { + "epoch": 56.0, + "grad_norm": 71877.1328125, + "learning_rate": 0.0009045084971874737, + "loss": 0.524, + "step": 336 + }, + { + "epoch": 56.0, + "eval_accuracy": 0.7396546106223526, + "eval_best_threshold": 0.34169289469718933, + "eval_f1": 0.7896814951302974, + "eval_loss": 0.5214141607284546, + "eval_pr_auc": 0.8224051034771116, + "eval_precision": 0.7139457401237506, + "eval_recall": 0.8833922261484098, + "eval_roc_auc": 0.8150621703656167, + "eval_runtime": 3.4488, + "eval_samples_per_second": 889.878, + "eval_steps_per_second": 1.74, + "step": 336 + }, + { + "epoch": 56.166666666666664, + "grad_norm": 132228.5, + "learning_rate": 0.00090365188800518, + "loss": 0.5285, + "step": 337 + }, + { + "epoch": 56.333333333333336, + "grad_norm": 79436.7265625, + "learning_rate": 0.0009027918632864998, + "loss": 0.5134, + "step": 338 + }, + { + "epoch": 56.5, + "grad_norm": 140446.203125, + "learning_rate": 0.0009019284303086086, + "loss": 0.5139, + "step": 339 + }, + { + "epoch": 56.666666666666664, + "grad_norm": 65026.02734375, + "learning_rate": 0.0009010615963775219, + "loss": 0.5218, + "step": 340 + }, + { + "epoch": 56.833333333333336, + "grad_norm": 141111.953125, + "learning_rate": 0.0009001913688280319, + "loss": 0.5012, + "step": 341 + }, + { + "epoch": 57.0, + "grad_norm": 92607.140625, + "learning_rate": 0.0008993177550236464, + "loss": 0.5093, + "step": 342 + }, + { + "epoch": 57.0, + "eval_accuracy": 0.7337895079830564, + "eval_best_threshold": 0.38702327013015747, + "eval_f1": 0.790566521404768, + "eval_loss": 0.5087202787399292, + "eval_pr_auc": 0.8277830168523511, + "eval_precision": 0.6999546073536087, + "eval_recall": 0.9081272084805654, + "eval_roc_auc": 0.820387223480836, + "eval_runtime": 3.3861, + "eval_samples_per_second": 906.364, + "eval_steps_per_second": 1.772, + "step": 342 + }, + { + "epoch": 57.166666666666664, + "grad_norm": 94696.0234375, + "learning_rate": 0.0008984407623565266, + "loss": 0.5158, + "step": 343 + }, + { + "epoch": 57.333333333333336, + "grad_norm": 248622.8125, + "learning_rate": 0.0008975603982474239, + "loss": 0.5549, + "step": 344 + }, + { + "epoch": 57.5, + "grad_norm": 45057.03515625, + "learning_rate": 0.0008966766701456176, + "loss": 0.4958, + "step": 345 + }, + { + "epoch": 57.666666666666664, + "grad_norm": 71083.1796875, + "learning_rate": 0.0008957895855288517, + "loss": 0.497, + "step": 346 + }, + { + "epoch": 57.833333333333336, + "grad_norm": 191695.15625, + "learning_rate": 0.0008948991519032716, + "loss": 0.5265, + "step": 347 + }, + { + "epoch": 58.0, + "grad_norm": 41673.71484375, + "learning_rate": 0.0008940053768033609, + "loss": 0.491, + "step": 348 + }, + { + "epoch": 58.0, + "eval_accuracy": 0.762137504072988, + "eval_best_threshold": 0.42669785022735596, + "eval_f1": 0.7985651214128036, + "eval_loss": 0.5082907676696777, + "eval_pr_auc": 0.8291562365284104, + "eval_precision": 0.7512980269989615, + "eval_recall": 0.8521790341578327, + "eval_roc_auc": 0.8227326266195524, + "eval_runtime": 3.5884, + "eval_samples_per_second": 855.265, + "eval_steps_per_second": 1.672, + "step": 348 + }, + { + "epoch": 58.166666666666664, + "grad_norm": 123645.5390625, + "learning_rate": 0.0008931082677918772, + "loss": 0.5067, + "step": 349 + }, + { + "epoch": 58.333333333333336, + "grad_norm": 155054.109375, + "learning_rate": 0.0008922078324597878, + "loss": 0.5377, + "step": 350 + }, + { + "epoch": 58.5, + "grad_norm": 90397.453125, + "learning_rate": 0.0008913040784262069, + "loss": 0.5253, + "step": 351 + }, + { + "epoch": 58.666666666666664, + "grad_norm": 75122.953125, + "learning_rate": 0.0008903970133383297, + "loss": 0.4941, + "step": 352 + }, + { + "epoch": 58.833333333333336, + "grad_norm": 174997.9375, + "learning_rate": 0.0008894866448713679, + "loss": 0.4985, + "step": 353 + }, + { + "epoch": 59.0, + "grad_norm": 73979.5, + "learning_rate": 0.0008885729807284854, + "loss": 0.5067, + "step": 354 + }, + { + "epoch": 59.0, + "eval_accuracy": 0.7442163571195829, + "eval_best_threshold": 0.3636360764503479, + "eval_f1": 0.7960509223174851, + "eval_loss": 0.5114080309867859, + "eval_pr_auc": 0.828087073890647, + "eval_precision": 0.7122268712226871, + "eval_recall": 0.9022379269729093, + "eval_roc_auc": 0.8213150752719767, + "eval_runtime": 3.5187, + "eval_samples_per_second": 872.201, + "eval_steps_per_second": 1.705, + "step": 354 + }, + { + "epoch": 59.166666666666664, + "grad_norm": 72822.265625, + "learning_rate": 0.0008876560286407329, + "loss": 0.5072, + "step": 355 + }, + { + "epoch": 59.333333333333336, + "grad_norm": 122237.328125, + "learning_rate": 0.000886735796366982, + "loss": 0.5217, + "step": 356 + }, + { + "epoch": 59.5, + "grad_norm": 88669.0234375, + "learning_rate": 0.00088581229169386, + "loss": 0.5115, + "step": 357 + }, + { + "epoch": 59.666666666666664, + "grad_norm": 44189.984375, + "learning_rate": 0.0008848855224356839, + "loss": 0.4921, + "step": 358 + }, + { + "epoch": 59.833333333333336, + "grad_norm": 222048.953125, + "learning_rate": 0.0008839554964343943, + "loss": 0.4887, + "step": 359 + }, + { + "epoch": 60.0, + "grad_norm": 40720.640625, + "learning_rate": 0.000883022221559489, + "loss": 0.5042, + "step": 360 + }, + { + "epoch": 60.0, + "eval_accuracy": 0.7386770935158032, + "eval_best_threshold": 0.32580840587615967, + "eval_f1": 0.7916883116883117, + "eval_loss": 0.516082227230072, + "eval_pr_auc": 0.8290447998055172, + "eval_precision": 0.70817843866171, + "eval_recall": 0.8975265017667845, + "eval_roc_auc": 0.8196621244885001, + "eval_runtime": 3.559, + "eval_samples_per_second": 862.32, + "eval_steps_per_second": 1.686, + "step": 360 + }, + { + "epoch": 60.166666666666664, + "grad_norm": 236312.984375, + "learning_rate": 0.0008820857057079565, + "loss": 0.5054, + "step": 361 + }, + { + "epoch": 60.333333333333336, + "grad_norm": 268437.3125, + "learning_rate": 0.0008811459568042091, + "loss": 0.519, + "step": 362 + }, + { + "epoch": 60.5, + "grad_norm": 90306.2734375, + "learning_rate": 0.0008802029828000156, + "loss": 0.5026, + "step": 363 + }, + { + "epoch": 60.666666666666664, + "grad_norm": 734178.375, + "learning_rate": 0.0008792567916744345, + "loss": 0.6645, + "step": 364 + }, + { + "epoch": 60.833333333333336, + "grad_norm": 626324.9375, + "learning_rate": 0.0008783073914337465, + "loss": 0.6744, + "step": 365 + }, + { + "epoch": 61.0, + "grad_norm": 299050.0, + "learning_rate": 0.000877354790111386, + "loss": 0.557, + "step": 366 + }, + { + "epoch": 61.0, + "eval_accuracy": 0.7419354838709677, + "eval_best_threshold": 0.6245605945587158, + "eval_f1": 0.7858301784748513, + "eval_loss": 0.542291522026062, + "eval_pr_auc": 0.8285012823959621, + "eval_precision": 0.7265, + "eval_recall": 0.8557126030624264, + "eval_roc_auc": 0.815912486393655, + "eval_runtime": 3.3978, + "eval_samples_per_second": 903.228, + "eval_steps_per_second": 1.766, + "step": 366 + }, + { + "epoch": 61.166666666666664, + "grad_norm": 184496.296875, + "learning_rate": 0.0008763989957678742, + "loss": 0.5311, + "step": 367 + }, + { + "epoch": 61.333333333333336, + "grad_norm": 247417.09375, + "learning_rate": 0.0008754400164907496, + "loss": 0.5739, + "step": 368 + }, + { + "epoch": 61.5, + "grad_norm": 127733.125, + "learning_rate": 0.0008744778603945012, + "loss": 0.5525, + "step": 369 + }, + { + "epoch": 61.666666666666664, + "grad_norm": 96245.8984375, + "learning_rate": 0.000873512535620498, + "loss": 0.5395, + "step": 370 + }, + { + "epoch": 61.833333333333336, + "grad_norm": 138518.046875, + "learning_rate": 0.000872544050336922, + "loss": 0.5446, + "step": 371 + }, + { + "epoch": 62.0, + "grad_norm": 48077.8515625, + "learning_rate": 0.0008715724127386971, + "loss": 0.5098, + "step": 372 + }, + { + "epoch": 62.0, + "eval_accuracy": 0.7347670250896058, + "eval_best_threshold": 0.5379018783569336, + "eval_f1": 0.7909604519774012, + "eval_loss": 0.5349071025848389, + "eval_pr_auc": 0.8189569305770087, + "eval_precision": 0.7012750455373407, + "eval_recall": 0.9069493521790342, + "eval_roc_auc": 0.8158420383872905, + "eval_runtime": 3.4752, + "eval_samples_per_second": 883.106, + "eval_steps_per_second": 1.727, + "step": 372 + }, + { + "epoch": 62.166666666666664, + "grad_norm": 115830.6328125, + "learning_rate": 0.0008705976310474219, + "loss": 0.5403, + "step": 373 + }, + { + "epoch": 62.333333333333336, + "grad_norm": 141982.296875, + "learning_rate": 0.000869619713511298, + "loss": 0.52, + "step": 374 + }, + { + "epoch": 62.5, + "grad_norm": 49035.359375, + "learning_rate": 0.000868638668405062, + "loss": 0.518, + "step": 375 + }, + { + "epoch": 62.666666666666664, + "grad_norm": 322461.53125, + "learning_rate": 0.0008676545040299144, + "loss": 0.5501, + "step": 376 + }, + { + "epoch": 62.833333333333336, + "grad_norm": 462798.125, + "learning_rate": 0.0008666672287134493, + "loss": 0.6246, + "step": 377 + }, + { + "epoch": 63.0, + "grad_norm": 299769.09375, + "learning_rate": 0.0008656768508095852, + "loss": 0.582, + "step": 378 + }, + { + "epoch": 63.0, + "eval_accuracy": 0.7240143369175627, + "eval_best_threshold": 0.30305054783821106, + "eval_f1": 0.7872393870886711, + "eval_loss": 0.5427862405776978, + "eval_pr_auc": 0.8257574777514395, + "eval_precision": 0.6863775733683749, + "eval_recall": 0.9228504122497055, + "eval_roc_auc": 0.8145997479335967, + "eval_runtime": 3.5458, + "eval_samples_per_second": 865.527, + "eval_steps_per_second": 1.692, + "step": 378 + }, + { + "epoch": 63.166666666666664, + "grad_norm": 148813.078125, + "learning_rate": 0.0008646833786984927, + "loss": 0.5409, + "step": 379 + }, + { + "epoch": 63.333333333333336, + "grad_norm": 225467.84375, + "learning_rate": 0.0008636868207865244, + "loss": 0.5438, + "step": 380 + }, + { + "epoch": 63.5, + "grad_norm": 68444.2109375, + "learning_rate": 0.0008626871855061438, + "loss": 0.5249, + "step": 381 + }, + { + "epoch": 63.666666666666664, + "grad_norm": 412893.6875, + "learning_rate": 0.000861684481315854, + "loss": 0.6003, + "step": 382 + }, + { + "epoch": 63.833333333333336, + "grad_norm": 368489.28125, + "learning_rate": 0.0008606787167001256, + "loss": 0.6114, + "step": 383 + }, + { + "epoch": 64.0, + "grad_norm": 166563.171875, + "learning_rate": 0.0008596699001693256, + "loss": 0.5418, + "step": 384 + }, + { + "epoch": 64.0, + "eval_accuracy": 0.7432388400130335, + "eval_best_threshold": 0.4921227991580963, + "eval_f1": 0.7971163748712667, + "eval_loss": 0.5194808840751648, + "eval_pr_auc": 0.8246771013143861, + "eval_precision": 0.7081427264409881, + "eval_recall": 0.911660777385159, + "eval_roc_auc": 0.8198530643594086, + "eval_runtime": 3.5419, + "eval_samples_per_second": 866.482, + "eval_steps_per_second": 1.694, + "step": 384 + }, + { + "epoch": 64.16666666666667, + "grad_norm": 87660.3984375, + "learning_rate": 0.0008586580402596447, + "loss": 0.5204, + "step": 385 + }, + { + "epoch": 64.33333333333333, + "grad_norm": 101163.578125, + "learning_rate": 0.0008576431455330258, + "loss": 0.5452, + "step": 386 + }, + { + "epoch": 64.5, + "grad_norm": 25221.27734375, + "learning_rate": 0.0008566252245770908, + "loss": 0.4953, + "step": 387 + }, + { + "epoch": 64.66666666666667, + "grad_norm": 135303.609375, + "learning_rate": 0.0008556042860050685, + "loss": 0.5405, + "step": 388 + }, + { + "epoch": 64.83333333333333, + "grad_norm": 210229.96875, + "learning_rate": 0.0008545803384557219, + "loss": 0.5689, + "step": 389 + }, + { + "epoch": 65.0, + "grad_norm": 44948.34375, + "learning_rate": 0.0008535533905932737, + "loss": 0.5223, + "step": 390 + }, + { + "epoch": 65.0, + "eval_accuracy": 0.7409579667644184, + "eval_best_threshold": 0.556387722492218, + "eval_f1": 0.7936672722553854, + "eval_loss": 0.5385985374450684, + "eval_pr_auc": 0.822251513124422, + "eval_precision": 0.7095127610208817, + "eval_recall": 0.9004711425206124, + "eval_roc_auc": 0.8151740710098722, + "eval_runtime": 3.5537, + "eval_samples_per_second": 863.598, + "eval_steps_per_second": 1.688, + "step": 390 + }, + { + "epoch": 65.16666666666667, + "grad_norm": 142769.75, + "learning_rate": 0.0008525234511073352, + "loss": 0.5446, + "step": 391 + }, + { + "epoch": 65.33333333333333, + "grad_norm": 170959.328125, + "learning_rate": 0.000851490528712831, + "loss": 0.5357, + "step": 392 + }, + { + "epoch": 65.5, + "grad_norm": 125007.1171875, + "learning_rate": 0.0008504546321499254, + "loss": 0.535, + "step": 393 + }, + { + "epoch": 65.66666666666667, + "grad_norm": 106462.4296875, + "learning_rate": 0.00084941577018395, + "loss": 0.5239, + "step": 394 + }, + { + "epoch": 65.83333333333333, + "grad_norm": 168138.53125, + "learning_rate": 0.0008483739516053274, + "loss": 0.5587, + "step": 395 + }, + { + "epoch": 66.0, + "grad_norm": 133317.84375, + "learning_rate": 0.0008473291852294987, + "loss": 0.5255, + "step": 396 + }, + { + "epoch": 66.0, + "eval_accuracy": 0.7386770935158032, + "eval_best_threshold": 0.4510115087032318, + "eval_f1": 0.7912545549193128, + "eval_loss": 0.5226884484291077, + "eval_pr_auc": 0.8253034319203902, + "eval_precision": 0.7089552238805971, + "eval_recall": 0.8951707891637221, + "eval_roc_auc": 0.8134717207097379, + "eval_runtime": 3.9669, + "eval_samples_per_second": 773.652, + "eval_steps_per_second": 1.513, + "step": 396 + }, + { + "epoch": 66.16666666666667, + "grad_norm": 80206.7265625, + "learning_rate": 0.0008462814798968471, + "loss": 0.527, + "step": 397 + }, + { + "epoch": 66.33333333333333, + "grad_norm": 80147.8046875, + "learning_rate": 0.0008452308444726248, + "loss": 0.5161, + "step": 398 + }, + { + "epoch": 66.5, + "grad_norm": 39128.91796875, + "learning_rate": 0.000844177287846877, + "loss": 0.5153, + "step": 399 + }, + { + "epoch": 66.66666666666667, + "grad_norm": 37543.1328125, + "learning_rate": 0.0008431208189343669, + "loss": 0.4919, + "step": 400 + }, + { + "epoch": 66.83333333333333, + "grad_norm": 75717.34375, + "learning_rate": 0.0008420614466745, + "loss": 0.5031, + "step": 401 + }, + { + "epoch": 67.0, + "grad_norm": 33691.29296875, + "learning_rate": 0.0008409991800312493, + "loss": 0.4967, + "step": 402 + }, + { + "epoch": 67.0, + "eval_accuracy": 0.7461713913326816, + "eval_best_threshold": 0.33050093054771423, + "eval_f1": 0.7949460384311661, + "eval_loss": 0.513331949710846, + "eval_pr_auc": 0.8367566848805189, + "eval_precision": 0.7187053783912423, + "eval_recall": 0.889281507656066, + "eval_roc_auc": 0.8243490647168032, + "eval_runtime": 3.7958, + "eval_samples_per_second": 808.525, + "eval_steps_per_second": 1.581, + "step": 402 + }, + { + "epoch": 67.16666666666667, + "grad_norm": 148904.53125, + "learning_rate": 0.0008399340279930785, + "loss": 0.5028, + "step": 403 + }, + { + "epoch": 67.33333333333333, + "grad_norm": 65357.32421875, + "learning_rate": 0.0008388659995728663, + "loss": 0.5002, + "step": 404 + }, + { + "epoch": 67.5, + "grad_norm": 106315.421875, + "learning_rate": 0.0008377951038078302, + "loss": 0.4969, + "step": 405 + }, + { + "epoch": 67.66666666666667, + "grad_norm": 45195.2890625, + "learning_rate": 0.0008367213497594501, + "loss": 0.5188, + "step": 406 + }, + { + "epoch": 67.83333333333333, + "grad_norm": 90925.59375, + "learning_rate": 0.0008356447465133919, + "loss": 0.5067, + "step": 407 + }, + { + "epoch": 68.0, + "grad_norm": 217360.078125, + "learning_rate": 0.0008345653031794292, + "loss": 0.512, + "step": 408 + }, + { + "epoch": 68.0, + "eval_accuracy": 0.7435646790485501, + "eval_best_threshold": 0.4942082166671753, + "eval_f1": 0.7937090432503277, + "eval_loss": 0.5158348679542542, + "eval_pr_auc": 0.8390055356862849, + "eval_precision": 0.7151629664619745, + "eval_recall": 0.8916372202591284, + "eval_roc_auc": 0.8256519232735298, + "eval_runtime": 3.5256, + "eval_samples_per_second": 870.495, + "eval_steps_per_second": 1.702, + "step": 408 + }, + { + "epoch": 68.16666666666667, + "grad_norm": 104175.828125, + "learning_rate": 0.000833483028891368, + "loss": 0.4955, + "step": 409 + }, + { + "epoch": 68.33333333333333, + "grad_norm": 124932.0390625, + "learning_rate": 0.0008323979328069688, + "loss": 0.5162, + "step": 410 + }, + { + "epoch": 68.5, + "grad_norm": 109886.3125, + "learning_rate": 0.0008313100241078688, + "loss": 0.5087, + "step": 411 + }, + { + "epoch": 68.66666666666667, + "grad_norm": 156512.953125, + "learning_rate": 0.0008302193119995038, + "loss": 0.5231, + "step": 412 + }, + { + "epoch": 68.83333333333333, + "grad_norm": 134718.625, + "learning_rate": 0.000829125805711032, + "loss": 0.5115, + "step": 413 + }, + { + "epoch": 69.0, + "grad_norm": 152846.578125, + "learning_rate": 0.0008280295144952537, + "loss": 0.5167, + "step": 414 + }, + { + "epoch": 69.0, + "eval_accuracy": 0.7481264255457803, + "eval_best_threshold": 0.3464486598968506, + "eval_f1": 0.7973787680209699, + "eval_loss": 0.5154203772544861, + "eval_pr_auc": 0.8414014550914709, + "eval_precision": 0.7184695323571091, + "eval_recall": 0.8957597173144877, + "eval_roc_auc": 0.8285329889972242, + "eval_runtime": 3.5935, + "eval_samples_per_second": 854.049, + "eval_steps_per_second": 1.67, + "step": 414 + }, + { + "epoch": 69.16666666666667, + "grad_norm": 170052.90625, + "learning_rate": 0.0008269304476285349, + "loss": 0.5021, + "step": 415 + }, + { + "epoch": 69.33333333333333, + "grad_norm": 86306.34375, + "learning_rate": 0.0008258286144107276, + "loss": 0.522, + "step": 416 + }, + { + "epoch": 69.5, + "grad_norm": 55669.80859375, + "learning_rate": 0.0008247240241650918, + "loss": 0.483, + "step": 417 + }, + { + "epoch": 69.66666666666667, + "grad_norm": 203016.5625, + "learning_rate": 0.0008236166862382162, + "loss": 0.5094, + "step": 418 + }, + { + "epoch": 69.83333333333333, + "grad_norm": 104809.640625, + "learning_rate": 0.0008225066099999392, + "loss": 0.5041, + "step": 419 + }, + { + "epoch": 70.0, + "grad_norm": 225442.765625, + "learning_rate": 0.0008213938048432696, + "loss": 0.5173, + "step": 420 + }, + { + "epoch": 70.0, + "eval_accuracy": 0.7543173672205931, + "eval_best_threshold": 0.5091540217399597, + "eval_f1": 0.7990405117270789, + "eval_loss": 0.5147111415863037, + "eval_pr_auc": 0.845698763041466, + "eval_precision": 0.7297955209347614, + "eval_recall": 0.8828032979976443, + "eval_roc_auc": 0.8304823368806482, + "eval_runtime": 3.5604, + "eval_samples_per_second": 861.975, + "eval_steps_per_second": 1.685, + "step": 420 + }, + { + "epoch": 70.16666666666667, + "grad_norm": 152348.796875, + "learning_rate": 0.0008202782801843076, + "loss": 0.4905, + "step": 421 + }, + { + "epoch": 70.33333333333333, + "grad_norm": 145317.3125, + "learning_rate": 0.0008191600454621642, + "loss": 0.4952, + "step": 422 + }, + { + "epoch": 70.5, + "grad_norm": 123521.515625, + "learning_rate": 0.000818039110138882, + "loss": 0.5022, + "step": 423 + }, + { + "epoch": 70.66666666666667, + "grad_norm": 187403.15625, + "learning_rate": 0.0008169154836993551, + "loss": 0.5047, + "step": 424 + }, + { + "epoch": 70.83333333333333, + "grad_norm": 131991.8125, + "learning_rate": 0.0008157891756512488, + "loss": 0.4971, + "step": 425 + }, + { + "epoch": 71.0, + "grad_norm": 182392.90625, + "learning_rate": 0.0008146601955249188, + "loss": 0.5092, + "step": 426 + }, + { + "epoch": 71.0, + "eval_accuracy": 0.7491039426523297, + "eval_best_threshold": 0.3482086956501007, + "eval_f1": 0.7979002624671916, + "eval_loss": 0.5007196068763733, + "eval_pr_auc": 0.8512863437213596, + "eval_precision": 0.7196969696969697, + "eval_recall": 0.8951707891637221, + "eval_roc_auc": 0.8366776806110762, + "eval_runtime": 3.5517, + "eval_samples_per_second": 864.102, + "eval_steps_per_second": 1.689, + "step": 426 + }, + { + "epoch": 71.16666666666667, + "grad_norm": 201647.609375, + "learning_rate": 0.0008135285528733309, + "loss": 0.5154, + "step": 427 + }, + { + "epoch": 71.33333333333333, + "grad_norm": 121528.09375, + "learning_rate": 0.00081239425727198, + "loss": 0.5022, + "step": 428 + }, + { + "epoch": 71.5, + "grad_norm": 85086.0703125, + "learning_rate": 0.0008112573183188099, + "loss": 0.4703, + "step": 429 + }, + { + "epoch": 71.66666666666667, + "grad_norm": 160130.96875, + "learning_rate": 0.00081011774563413, + "loss": 0.483, + "step": 430 + }, + { + "epoch": 71.83333333333333, + "grad_norm": 174087.8125, + "learning_rate": 0.0008089755488605366, + "loss": 0.4995, + "step": 431 + }, + { + "epoch": 72.0, + "grad_norm": 93436.890625, + "learning_rate": 0.0008078307376628291, + "loss": 0.4824, + "step": 432 + }, + { + "epoch": 72.0, + "eval_accuracy": 0.7647442163571195, + "eval_best_threshold": 0.5070761442184448, + "eval_f1": 0.8005524861878454, + "eval_loss": 0.48814845085144043, + "eval_pr_auc": 0.8545800658620809, + "eval_precision": 0.7539021852237253, + "eval_recall": 0.8533568904593639, + "eval_roc_auc": 0.8396495984893199, + "eval_runtime": 3.6564, + "eval_samples_per_second": 839.359, + "eval_steps_per_second": 1.641, + "step": 432 + }, + { + "epoch": 72.16666666666667, + "grad_norm": 51381.62890625, + "learning_rate": 0.0008066833217279297, + "loss": 0.4834, + "step": 433 + }, + { + "epoch": 72.33333333333333, + "grad_norm": 209116.90625, + "learning_rate": 0.0008055333107648, + "loss": 0.5018, + "step": 434 + }, + { + "epoch": 72.5, + "grad_norm": 237707.515625, + "learning_rate": 0.0008043807145043603, + "loss": 0.4991, + "step": 435 + }, + { + "epoch": 72.66666666666667, + "grad_norm": 95548.7109375, + "learning_rate": 0.0008032255426994069, + "loss": 0.4799, + "step": 436 + }, + { + "epoch": 72.83333333333333, + "grad_norm": 117247.875, + "learning_rate": 0.0008020678051245282, + "loss": 0.4845, + "step": 437 + }, + { + "epoch": 73.0, + "grad_norm": 128688.5703125, + "learning_rate": 0.0008009075115760243, + "loss": 0.4959, + "step": 438 + }, + { + "epoch": 73.0, + "eval_accuracy": 0.761485826001955, + "eval_best_threshold": 0.37738415598869324, + "eval_f1": 0.8025889967637541, + "eval_loss": 0.4879034757614136, + "eval_pr_auc": 0.8562801951827024, + "eval_precision": 0.7402985074626866, + "eval_recall": 0.8763250883392226, + "eval_roc_auc": 0.8414073621603138, + "eval_runtime": 3.7092, + "eval_samples_per_second": 827.405, + "eval_steps_per_second": 1.618, + "step": 438 + }, + { + "epoch": 73.16666666666667, + "grad_norm": 154160.890625, + "learning_rate": 0.000799744671871822, + "loss": 0.5086, + "step": 439 + }, + { + "epoch": 73.33333333333333, + "grad_norm": 119829.8125, + "learning_rate": 0.0007985792958513931, + "loss": 0.4827, + "step": 440 + }, + { + "epoch": 73.5, + "grad_norm": 152021.484375, + "learning_rate": 0.0007974113933756707, + "loss": 0.4863, + "step": 441 + }, + { + "epoch": 73.66666666666667, + "grad_norm": 209087.796875, + "learning_rate": 0.0007962409743269654, + "loss": 0.4906, + "step": 442 + }, + { + "epoch": 73.83333333333333, + "grad_norm": 216866.65625, + "learning_rate": 0.0007950680486088822, + "loss": 0.51, + "step": 443 + }, + { + "epoch": 74.0, + "grad_norm": 57889.4296875, + "learning_rate": 0.0007938926261462366, + "loss": 0.4732, + "step": 444 + }, + { + "epoch": 74.0, + "eval_accuracy": 0.7552948843271424, + "eval_best_threshold": 0.4220488965511322, + "eval_f1": 0.8007429026266915, + "eval_loss": 0.4835204780101776, + "eval_pr_auc": 0.8598558329755022, + "eval_precision": 0.7286335103814582, + "eval_recall": 0.8886925795053003, + "eval_roc_auc": 0.8418996390828357, + "eval_runtime": 3.7001, + "eval_samples_per_second": 829.428, + "eval_steps_per_second": 1.622, + "step": 444 + }, + { + "epoch": 74.16666666666667, + "grad_norm": 53130.546875, + "learning_rate": 0.0007927147168849704, + "loss": 0.4673, + "step": 445 + }, + { + "epoch": 74.33333333333333, + "grad_norm": 162182.0, + "learning_rate": 0.0007915343307920673, + "loss": 0.497, + "step": 446 + }, + { + "epoch": 74.5, + "grad_norm": 89758.8203125, + "learning_rate": 0.0007903514778554699, + "loss": 0.4919, + "step": 447 + }, + { + "epoch": 74.66666666666667, + "grad_norm": 248663.484375, + "learning_rate": 0.0007891661680839932, + "loss": 0.5119, + "step": 448 + }, + { + "epoch": 74.83333333333333, + "grad_norm": 246315.734375, + "learning_rate": 0.0007879784115072417, + "loss": 0.5121, + "step": 449 + }, + { + "epoch": 75.0, + "grad_norm": 38542.20703125, + "learning_rate": 0.0007867882181755231, + "loss": 0.4878, + "step": 450 + }, + { + "epoch": 75.0, + "eval_accuracy": 0.7673509286412512, + "eval_best_threshold": 0.31659069657325745, + "eval_f1": 0.8055555555555556, + "eval_loss": 0.529030442237854, + "eval_pr_auc": 0.856213564735403, + "eval_precision": 0.7492401215805471, + "eval_recall": 0.8710247349823321, + "eval_roc_auc": 0.8407879351775246, + "eval_runtime": 4.2878, + "eval_samples_per_second": 715.756, + "eval_steps_per_second": 1.399, + "step": 450 + }, + { + "epoch": 75.16666666666667, + "grad_norm": 333584.46875, + "learning_rate": 0.0007855955981597644, + "loss": 0.5461, + "step": 451 + }, + { + "epoch": 75.33333333333333, + "grad_norm": 322359.9375, + "learning_rate": 0.0007844005615514259, + "loss": 0.5358, + "step": 452 + }, + { + "epoch": 75.5, + "grad_norm": 84971.671875, + "learning_rate": 0.0007832031184624164, + "loss": 0.4801, + "step": 453 + }, + { + "epoch": 75.66666666666667, + "grad_norm": 296875.28125, + "learning_rate": 0.0007820032790250074, + "loss": 0.5571, + "step": 454 + }, + { + "epoch": 75.83333333333333, + "grad_norm": 333367.90625, + "learning_rate": 0.0007808010533917464, + "loss": 0.5737, + "step": 455 + }, + { + "epoch": 76.0, + "grad_norm": 226147.171875, + "learning_rate": 0.0007795964517353734, + "loss": 0.5162, + "step": 456 + }, + { + "epoch": 76.0, + "eval_accuracy": 0.7422613229064842, + "eval_best_threshold": 0.37861621379852295, + "eval_f1": 0.792769190463715, + "eval_loss": 0.509261965751648, + "eval_pr_auc": 0.8386148647298124, + "eval_precision": 0.7140160453043889, + "eval_recall": 0.8910482921083628, + "eval_roc_auc": 0.8225797028984201, + "eval_runtime": 3.7448, + "eval_samples_per_second": 819.535, + "eval_steps_per_second": 1.602, + "step": 456 + }, + { + "epoch": 76.16666666666667, + "grad_norm": 73060.7265625, + "learning_rate": 0.0007783894842487322, + "loss": 0.5189, + "step": 457 + }, + { + "epoch": 76.33333333333333, + "grad_norm": 191211.65625, + "learning_rate": 0.0007771801611446858, + "loss": 0.527, + "step": 458 + }, + { + "epoch": 76.5, + "grad_norm": 145793.390625, + "learning_rate": 0.0007759684926560291, + "loss": 0.5262, + "step": 459 + }, + { + "epoch": 76.66666666666667, + "grad_norm": 39650.92578125, + "learning_rate": 0.000774754489035403, + "loss": 0.4805, + "step": 460 + }, + { + "epoch": 76.83333333333333, + "grad_norm": 93844.328125, + "learning_rate": 0.0007735381605552072, + "loss": 0.4957, + "step": 461 + }, + { + "epoch": 77.0, + "grad_norm": 129532.5234375, + "learning_rate": 0.0007723195175075137, + "loss": 0.5023, + "step": 462 + }, + { + "epoch": 77.0, + "eval_accuracy": 0.747148908439231, + "eval_best_threshold": 0.3997952342033386, + "eval_f1": 0.7988595127008813, + "eval_loss": 0.49405384063720703, + "eval_pr_auc": 0.8504424681690863, + "eval_precision": 0.7134259259259259, + "eval_recall": 0.9075382803297998, + "eval_roc_auc": 0.8348483950311818, + "eval_runtime": 3.7383, + "eval_samples_per_second": 820.962, + "eval_steps_per_second": 1.605, + "step": 462 + }, + { + "epoch": 77.16666666666667, + "grad_norm": 30520.681640625, + "learning_rate": 0.0007710985702039786, + "loss": 0.5047, + "step": 463 + }, + { + "epoch": 77.33333333333333, + "grad_norm": 122456.078125, + "learning_rate": 0.0007698753289757565, + "loss": 0.4829, + "step": 464 + }, + { + "epoch": 77.5, + "grad_norm": 161532.8125, + "learning_rate": 0.000768649804173412, + "loss": 0.4899, + "step": 465 + }, + { + "epoch": 77.66666666666667, + "grad_norm": 46629.90234375, + "learning_rate": 0.0007674220061668323, + "loss": 0.4808, + "step": 466 + }, + { + "epoch": 77.83333333333333, + "grad_norm": 169030.65625, + "learning_rate": 0.0007661919453451394, + "loss": 0.491, + "step": 467 + }, + { + "epoch": 78.0, + "grad_norm": 80774.3828125, + "learning_rate": 0.0007649596321166025, + "loss": 0.5117, + "step": 468 + }, + { + "epoch": 78.0, + "eval_accuracy": 0.7432388400130335, + "eval_best_threshold": 0.40479913353919983, + "eval_f1": 0.7895299145299145, + "eval_loss": 0.512270450592041, + "eval_pr_auc": 0.8395600491799751, + "eval_precision": 0.7223851417399805, + "eval_recall": 0.8704358068315665, + "eval_roc_auc": 0.8215491860248338, + "eval_runtime": 3.7435, + "eval_samples_per_second": 819.822, + "eval_steps_per_second": 1.603, + "step": 468 + }, + { + "epoch": 78.16666666666667, + "grad_norm": 150559.34375, + "learning_rate": 0.00076372507690855, + "loss": 0.516, + "step": 469 + }, + { + "epoch": 78.33333333333333, + "grad_norm": 95044.8203125, + "learning_rate": 0.00076248829016728, + "loss": 0.4937, + "step": 470 + }, + { + "epoch": 78.5, + "grad_norm": 100775.890625, + "learning_rate": 0.0007612492823579744, + "loss": 0.4884, + "step": 471 + }, + { + "epoch": 78.66666666666667, + "grad_norm": 106388.2109375, + "learning_rate": 0.0007600080639646077, + "loss": 0.4644, + "step": 472 + }, + { + "epoch": 78.83333333333333, + "grad_norm": 151662.96875, + "learning_rate": 0.00075876464548986, + "loss": 0.4927, + "step": 473 + }, + { + "epoch": 79.0, + "grad_norm": 163484.265625, + "learning_rate": 0.0007575190374550272, + "loss": 0.5001, + "step": 474 + }, + { + "epoch": 79.0, + "eval_accuracy": 0.7536656891495601, + "eval_best_threshold": 0.38404473662376404, + "eval_f1": 0.8009478672985783, + "eval_loss": 0.4811509847640991, + "eval_pr_auc": 0.861499823224796, + "eval_precision": 0.7242857142857143, + "eval_recall": 0.8957597173144877, + "eval_roc_auc": 0.8440641970344825, + "eval_runtime": 3.7485, + "eval_samples_per_second": 818.72, + "eval_steps_per_second": 1.601, + "step": 474 + }, + { + "epoch": 79.16666666666667, + "grad_norm": 38699.5625, + "learning_rate": 0.0007562712503999327, + "loss": 0.4759, + "step": 475 + }, + { + "epoch": 79.33333333333333, + "grad_norm": 138076.015625, + "learning_rate": 0.0007550212948828377, + "loss": 0.4878, + "step": 476 + }, + { + "epoch": 79.5, + "grad_norm": 175757.359375, + "learning_rate": 0.0007537691814803521, + "loss": 0.5021, + "step": 477 + }, + { + "epoch": 79.66666666666667, + "grad_norm": 64115.30859375, + "learning_rate": 0.000752514920787345, + "loss": 0.4995, + "step": 478 + }, + { + "epoch": 79.83333333333333, + "grad_norm": 122277.4765625, + "learning_rate": 0.000751258523416855, + "loss": 0.4964, + "step": 479 + }, + { + "epoch": 80.0, + "grad_norm": 67262.625, + "learning_rate": 0.00075, + "loss": 0.4762, + "step": 480 + }, + { + "epoch": 80.0, + "eval_accuracy": 0.7530140110785272, + "eval_best_threshold": 0.48448118567466736, + "eval_f1": 0.7952458130740141, + "eval_loss": 0.5035498738288879, + "eval_pr_auc": 0.8543361186664507, + "eval_precision": 0.7345309381237525, + "eval_recall": 0.866902237926973, + "eval_roc_auc": 0.8367859299867093, + "eval_runtime": 3.7951, + "eval_samples_per_second": 808.683, + "eval_steps_per_second": 1.581, + "step": 480 + }, + { + "epoch": 80.16666666666667, + "grad_norm": 128038.4921875, + "learning_rate": 0.0007487393611858883, + "loss": 0.5011, + "step": 481 + }, + { + "epoch": 80.33333333333333, + "grad_norm": 115982.5390625, + "learning_rate": 0.0007474766176415271, + "loss": 0.5074, + "step": 482 + }, + { + "epoch": 80.5, + "grad_norm": 346951.09375, + "learning_rate": 0.0007462117800517336, + "loss": 0.5196, + "step": 483 + }, + { + "epoch": 80.66666666666667, + "grad_norm": 445729.40625, + "learning_rate": 0.0007449448591190436, + "loss": 0.5751, + "step": 484 + }, + { + "epoch": 80.83333333333333, + "grad_norm": 270755.5, + "learning_rate": 0.0007436758655636212, + "loss": 0.5385, + "step": 485 + }, + { + "epoch": 81.0, + "grad_norm": 146638.015625, + "learning_rate": 0.0007424048101231686, + "loss": 0.4997, + "step": 486 + }, + { + "epoch": 81.0, + "eval_accuracy": 0.757901596611274, + "eval_best_threshold": 0.5885984897613525, + "eval_f1": 0.8016021361815754, + "eval_loss": 0.5125724673271179, + "eval_pr_auc": 0.8595454646304805, + "eval_precision": 0.7332681973619931, + "eval_recall": 0.8839811542991755, + "eval_roc_auc": 0.841192152092091, + "eval_runtime": 3.8042, + "eval_samples_per_second": 806.738, + "eval_steps_per_second": 1.577, + "step": 486 + }, + { + "epoch": 81.16666666666667, + "grad_norm": 233428.828125, + "learning_rate": 0.0007411317035528344, + "loss": 0.5256, + "step": 487 + }, + { + "epoch": 81.33333333333333, + "grad_norm": 86020.21875, + "learning_rate": 0.0007398565566251232, + "loss": 0.4879, + "step": 488 + }, + { + "epoch": 81.5, + "grad_norm": 195027.78125, + "learning_rate": 0.0007385793801298042, + "loss": 0.5046, + "step": 489 + }, + { + "epoch": 81.66666666666667, + "grad_norm": 217321.640625, + "learning_rate": 0.0007373001848738202, + "loss": 0.5145, + "step": 490 + }, + { + "epoch": 81.83333333333333, + "grad_norm": 123365.1796875, + "learning_rate": 0.0007360189816811956, + "loss": 0.4879, + "step": 491 + }, + { + "epoch": 82.0, + "grad_norm": 115342.3515625, + "learning_rate": 0.0007347357813929454, + "loss": 0.4934, + "step": 492 + }, + { + "epoch": 82.0, + "eval_accuracy": 0.7468230694037146, + "eval_best_threshold": 0.4948383569717407, + "eval_f1": 0.7964369924024103, + "eval_loss": 0.5086027979850769, + "eval_pr_auc": 0.8558498971534145, + "eval_precision": 0.7173194903256253, + "eval_recall": 0.8951707891637221, + "eval_roc_auc": 0.8383057168557166, + "eval_runtime": 3.7413, + "eval_samples_per_second": 820.293, + "eval_steps_per_second": 1.604, + "step": 492 + }, + { + "epoch": 82.16666666666667, + "grad_norm": 158514.203125, + "learning_rate": 0.000733450594866983, + "loss": 0.4833, + "step": 493 + }, + { + "epoch": 82.33333333333333, + "grad_norm": 76403.3828125, + "learning_rate": 0.0007321634329780285, + "loss": 0.4802, + "step": 494 + }, + { + "epoch": 82.5, + "grad_norm": 213661.546875, + "learning_rate": 0.000730874306617517, + "loss": 0.5218, + "step": 495 + }, + { + "epoch": 82.66666666666667, + "grad_norm": 266354.0625, + "learning_rate": 0.0007295832266935059, + "loss": 0.5181, + "step": 496 + }, + { + "epoch": 82.83333333333333, + "grad_norm": 134915.609375, + "learning_rate": 0.0007282902041305828, + "loss": 0.4869, + "step": 497 + }, + { + "epoch": 83.0, + "grad_norm": 109487.90625, + "learning_rate": 0.0007269952498697733, + "loss": 0.4924, + "step": 498 + }, + { + "epoch": 83.0, + "eval_accuracy": 0.7556207233626588, + "eval_best_threshold": 0.4894412159919739, + "eval_f1": 0.8012718600953895, + "eval_loss": 0.49321600794792175, + "eval_pr_auc": 0.8601618845148933, + "eval_precision": 0.7283236994219653, + "eval_recall": 0.8904593639575972, + "eval_roc_auc": 0.8421629599846733, + "eval_runtime": 3.7702, + "eval_samples_per_second": 814.019, + "eval_steps_per_second": 1.591, + "step": 498 + }, + { + "epoch": 83.16666666666667, + "grad_norm": 106140.2109375, + "learning_rate": 0.0007256983748684484, + "loss": 0.4818, + "step": 499 + }, + { + "epoch": 83.33333333333333, + "grad_norm": 32685.55078125, + "learning_rate": 0.0007243995901002312, + "loss": 0.4807, + "step": 500 + }, + { + "epoch": 83.5, + "grad_norm": 118984.3671875, + "learning_rate": 0.0007230989065549044, + "loss": 0.4849, + "step": 501 + }, + { + "epoch": 83.66666666666667, + "grad_norm": 121551.421875, + "learning_rate": 0.0007217963352383181, + "loss": 0.494, + "step": 502 + }, + { + "epoch": 83.83333333333333, + "grad_norm": 90743.734375, + "learning_rate": 0.000720491887172295, + "loss": 0.4627, + "step": 503 + }, + { + "epoch": 84.0, + "grad_norm": 113933.1171875, + "learning_rate": 0.0007191855733945387, + "loss": 0.4873, + "step": 504 + }, + { + "epoch": 84.0, + "eval_accuracy": 0.7706093189964157, + "eval_best_threshold": 0.4577232599258423, + "eval_f1": 0.8032420346562326, + "eval_loss": 0.4764363467693329, + "eval_pr_auc": 0.8592588506486144, + "eval_precision": 0.7643617021276595, + "eval_recall": 0.8462897526501767, + "eval_roc_auc": 0.8475118537361928, + "eval_runtime": 3.5755, + "eval_samples_per_second": 858.343, + "eval_steps_per_second": 1.678, + "step": 504 + }, + { + "epoch": 84.16666666666667, + "grad_norm": 47223.42578125, + "learning_rate": 0.0007178774049585397, + "loss": 0.4676, + "step": 505 + }, + { + "epoch": 84.33333333333333, + "grad_norm": 329249.5, + "learning_rate": 0.0007165673929334815, + "loss": 0.5294, + "step": 506 + }, + { + "epoch": 84.5, + "grad_norm": 365006.0625, + "learning_rate": 0.0007152555484041476, + "loss": 0.5173, + "step": 507 + }, + { + "epoch": 84.66666666666667, + "grad_norm": 209159.03125, + "learning_rate": 0.0007139418824708272, + "loss": 0.4985, + "step": 508 + }, + { + "epoch": 84.83333333333333, + "grad_norm": 80609.34375, + "learning_rate": 0.0007126264062492216, + "loss": 0.47, + "step": 509 + }, + { + "epoch": 85.0, + "grad_norm": 154371.296875, + "learning_rate": 0.0007113091308703497, + "loss": 0.4881, + "step": 510 + }, + { + "epoch": 85.0, + "eval_accuracy": 0.7735418703160639, + "eval_best_threshold": 0.5390059351921082, + "eval_f1": 0.8057031031590719, + "eval_loss": 0.4798082411289215, + "eval_pr_auc": 0.8604935935331044, + "eval_precision": 0.766897285790314, + "eval_recall": 0.8486454652532391, + "eval_roc_auc": 0.847249391956384, + "eval_runtime": 3.4838, + "eval_samples_per_second": 880.946, + "eval_steps_per_second": 1.722, + "step": 510 + }, + { + "epoch": 85.16666666666667, + "grad_norm": 72014.5390625, + "learning_rate": 0.0007099900674804548, + "loss": 0.4731, + "step": 511 + }, + { + "epoch": 85.33333333333333, + "grad_norm": 187983.53125, + "learning_rate": 0.000708669227240909, + "loss": 0.4994, + "step": 512 + }, + { + "epoch": 85.5, + "grad_norm": 226755.40625, + "learning_rate": 0.0007073466213281196, + "loss": 0.4852, + "step": 513 + }, + { + "epoch": 85.66666666666667, + "grad_norm": 62913.12890625, + "learning_rate": 0.0007060222609334342, + "loss": 0.4722, + "step": 514 + }, + { + "epoch": 85.83333333333333, + "grad_norm": 155044.375, + "learning_rate": 0.0007046961572630462, + "loss": 0.4641, + "step": 515 + }, + { + "epoch": 86.0, + "grad_norm": 197888.65625, + "learning_rate": 0.0007033683215379002, + "loss": 0.5081, + "step": 516 + }, + { + "epoch": 86.0, + "eval_accuracy": 0.7725643532095146, + "eval_best_threshold": 0.45184916257858276, + "eval_f1": 0.8087671232876712, + "eval_loss": 0.46945297718048096, + "eval_pr_auc": 0.8683643115135102, + "eval_precision": 0.7561475409836066, + "eval_recall": 0.8692579505300353, + "eval_roc_auc": 0.853145761220778, + "eval_runtime": 3.3665, + "eval_samples_per_second": 911.627, + "eval_steps_per_second": 1.782, + "step": 516 + }, + { + "epoch": 86.16666666666667, + "grad_norm": 43307.9453125, + "learning_rate": 0.0007020387649935959, + "loss": 0.4768, + "step": 517 + }, + { + "epoch": 86.33333333333333, + "grad_norm": 224318.390625, + "learning_rate": 0.0007007074988802946, + "loss": 0.4983, + "step": 518 + }, + { + "epoch": 86.5, + "grad_norm": 232254.71875, + "learning_rate": 0.0006993745344626231, + "loss": 0.5024, + "step": 519 + }, + { + "epoch": 86.66666666666667, + "grad_norm": 95864.921875, + "learning_rate": 0.0006980398830195785, + "loss": 0.4668, + "step": 520 + }, + { + "epoch": 86.83333333333333, + "grad_norm": 156923.25, + "learning_rate": 0.0006967035558444326, + "loss": 0.4887, + "step": 521 + }, + { + "epoch": 87.0, + "grad_norm": 167827.109375, + "learning_rate": 0.0006953655642446368, + "loss": 0.4985, + "step": 522 + }, + { + "epoch": 87.0, + "eval_accuracy": 0.7481264255457803, + "eval_best_threshold": 0.4086613953113556, + "eval_f1": 0.7982250065257113, + "eval_loss": 0.4912816286087036, + "eval_pr_auc": 0.8606898747895984, + "eval_precision": 0.7168307548054383, + "eval_recall": 0.9004711425206124, + "eval_roc_auc": 0.841153062039779, + "eval_runtime": 3.5763, + "eval_samples_per_second": 858.159, + "eval_steps_per_second": 1.678, + "step": 522 + }, + { + "epoch": 87.16666666666667, + "grad_norm": 99152.1796875, + "learning_rate": 0.0006940259195417264, + "loss": 0.4867, + "step": 523 + }, + { + "epoch": 87.33333333333333, + "grad_norm": 174363.046875, + "learning_rate": 0.0006926846330712242, + "loss": 0.4909, + "step": 524 + }, + { + "epoch": 87.5, + "grad_norm": 181446.703125, + "learning_rate": 0.000691341716182545, + "loss": 0.4938, + "step": 525 + }, + { + "epoch": 87.66666666666667, + "grad_norm": 92542.921875, + "learning_rate": 0.0006899971802388996, + "loss": 0.4791, + "step": 526 + }, + { + "epoch": 87.83333333333333, + "grad_norm": 89334.8046875, + "learning_rate": 0.0006886510366171985, + "loss": 0.4749, + "step": 527 + }, + { + "epoch": 88.0, + "grad_norm": 93162.484375, + "learning_rate": 0.0006873032967079561, + "loss": 0.488, + "step": 528 + }, + { + "epoch": 88.0, + "eval_accuracy": 0.7637666992505702, + "eval_best_threshold": 0.4100832939147949, + "eval_f1": 0.8053691275167785, + "eval_loss": 0.4828181266784668, + "eval_pr_auc": 0.8556550349485466, + "eval_precision": 0.740009866798224, + "eval_recall": 0.8833922261484098, + "eval_roc_auc": 0.844597282253374, + "eval_runtime": 3.5794, + "eval_samples_per_second": 857.413, + "eval_steps_per_second": 1.676, + "step": 528 + }, + { + "epoch": 88.16666666666667, + "grad_norm": 81783.1015625, + "learning_rate": 0.0006859539719151933, + "loss": 0.478, + "step": 529 + }, + { + "epoch": 88.33333333333333, + "grad_norm": 85834.1875, + "learning_rate": 0.0006846030736563422, + "loss": 0.4646, + "step": 530 + }, + { + "epoch": 88.5, + "grad_norm": 109085.2890625, + "learning_rate": 0.0006832506133621487, + "loss": 0.4656, + "step": 531 + }, + { + "epoch": 88.66666666666667, + "grad_norm": 94811.3515625, + "learning_rate": 0.0006818966024765758, + "loss": 0.4834, + "step": 532 + }, + { + "epoch": 88.83333333333333, + "grad_norm": 119964.9375, + "learning_rate": 0.0006805410524567073, + "loss": 0.4802, + "step": 533 + }, + { + "epoch": 89.0, + "grad_norm": 168818.828125, + "learning_rate": 0.0006791839747726501, + "loss": 0.4848, + "step": 534 + }, + { + "epoch": 89.0, + "eval_accuracy": 0.768654284783317, + "eval_best_threshold": 0.41445741057395935, + "eval_f1": 0.807796426637791, + "eval_loss": 0.4731602966785431, + "eval_pr_auc": 0.8652850197288536, + "eval_precision": 0.7474949899799599, + "eval_recall": 0.8786808009422851, + "eval_roc_auc": 0.8494418713739681, + "eval_runtime": 3.5921, + "eval_samples_per_second": 854.381, + "eval_steps_per_second": 1.67, + "step": 534 + }, + { + "epoch": 89.16666666666667, + "grad_norm": 48302.47265625, + "learning_rate": 0.0006778253809074384, + "loss": 0.4608, + "step": 535 + }, + { + "epoch": 89.33333333333333, + "grad_norm": 186283.75, + "learning_rate": 0.0006764652823569344, + "loss": 0.4846, + "step": 536 + }, + { + "epoch": 89.5, + "grad_norm": 206481.390625, + "learning_rate": 0.0006751036906297338, + "loss": 0.5059, + "step": 537 + }, + { + "epoch": 89.66666666666667, + "grad_norm": 76336.390625, + "learning_rate": 0.0006737406172470657, + "loss": 0.4759, + "step": 538 + }, + { + "epoch": 89.83333333333333, + "grad_norm": 65805.59375, + "learning_rate": 0.0006723760737426972, + "loss": 0.4615, + "step": 539 + }, + { + "epoch": 90.0, + "grad_norm": 52209.296875, + "learning_rate": 0.0006710100716628344, + "loss": 0.4717, + "step": 540 + }, + { + "epoch": 90.0, + "eval_accuracy": 0.7774519387422614, + "eval_best_threshold": 0.4345960021018982, + "eval_f1": 0.8100139082058414, + "eval_loss": 0.4671099781990051, + "eval_pr_auc": 0.8687265581892443, + "eval_precision": 0.7675276752767528, + "eval_recall": 0.8574793875147232, + "eval_roc_auc": 0.8548354394709871, + "eval_runtime": 3.4237, + "eval_samples_per_second": 896.39, + "eval_steps_per_second": 1.752, + "step": 540 + }, + { + "epoch": 90.16666666666667, + "grad_norm": 53076.24609375, + "learning_rate": 0.0006696426225660256, + "loss": 0.4575, + "step": 541 + }, + { + "epoch": 90.33333333333333, + "grad_norm": 52598.28515625, + "learning_rate": 0.0006682737380230633, + "loss": 0.4618, + "step": 542 + }, + { + "epoch": 90.5, + "grad_norm": 82825.6015625, + "learning_rate": 0.0006669034296168854, + "loss": 0.4489, + "step": 543 + }, + { + "epoch": 90.66666666666667, + "grad_norm": 62264.2578125, + "learning_rate": 0.0006655317089424791, + "loss": 0.4657, + "step": 544 + }, + { + "epoch": 90.83333333333333, + "grad_norm": 47665.33984375, + "learning_rate": 0.0006641585876067806, + "loss": 0.4737, + "step": 545 + }, + { + "epoch": 91.0, + "grad_norm": 48329.640625, + "learning_rate": 0.0006627840772285784, + "loss": 0.4799, + "step": 546 + }, + { + "epoch": 91.0, + "eval_accuracy": 0.7790811339198436, + "eval_best_threshold": 0.4940151870250702, + "eval_f1": 0.8072768618533257, + "eval_loss": 0.4737241268157959, + "eval_pr_auc": 0.8681764998032273, + "eval_precision": 0.7802197802197802, + "eval_recall": 0.8362779740871613, + "eval_roc_auc": 0.8499154623923628, + "eval_runtime": 3.3936, + "eval_samples_per_second": 904.341, + "eval_steps_per_second": 1.768, + "step": 546 + }, + { + "epoch": 91.16666666666667, + "grad_norm": 84555.46875, + "learning_rate": 0.0006614081894384145, + "loss": 0.4674, + "step": 547 + }, + { + "epoch": 91.33333333333333, + "grad_norm": 92255.9375, + "learning_rate": 0.0006600309358784857, + "loss": 0.4891, + "step": 548 + }, + { + "epoch": 91.5, + "grad_norm": 66551.3359375, + "learning_rate": 0.0006586523282025462, + "loss": 0.4754, + "step": 549 + }, + { + "epoch": 91.66666666666667, + "grad_norm": 157776.9375, + "learning_rate": 0.0006572723780758069, + "loss": 0.4624, + "step": 550 + }, + { + "epoch": 91.83333333333333, + "grad_norm": 97448.4453125, + "learning_rate": 0.000655891097174839, + "loss": 0.4661, + "step": 551 + }, + { + "epoch": 92.0, + "grad_norm": 73972.796875, + "learning_rate": 0.0006545084971874737, + "loss": 0.4549, + "step": 552 + }, + { + "epoch": 92.0, + "eval_accuracy": 0.7673509286412512, + "eval_best_threshold": 0.46922558546066284, + "eval_f1": 0.8092948717948718, + "eval_loss": 0.475037544965744, + "eval_pr_auc": 0.8710701531358553, + "eval_precision": 0.7404692082111437, + "eval_recall": 0.892226148409894, + "eval_roc_auc": 0.8553062383427881, + "eval_runtime": 3.6512, + "eval_samples_per_second": 840.538, + "eval_steps_per_second": 1.643, + "step": 552 + }, + { + "epoch": 92.16666666666667, + "grad_norm": 121098.90625, + "learning_rate": 0.000653124589812704, + "loss": 0.4572, + "step": 553 + }, + { + "epoch": 92.33333333333333, + "grad_norm": 56371.390625, + "learning_rate": 0.0006517393867605854, + "loss": 0.4593, + "step": 554 + }, + { + "epoch": 92.5, + "grad_norm": 163690.765625, + "learning_rate": 0.0006503528997521365, + "loss": 0.4792, + "step": 555 + }, + { + "epoch": 92.66666666666667, + "grad_norm": 53005.6796875, + "learning_rate": 0.0006489651405192409, + "loss": 0.463, + "step": 556 + }, + { + "epoch": 92.83333333333333, + "grad_norm": 174509.890625, + "learning_rate": 0.000647576120804547, + "loss": 0.4603, + "step": 557 + }, + { + "epoch": 93.0, + "grad_norm": 189806.515625, + "learning_rate": 0.0006461858523613684, + "loss": 0.4642, + "step": 558 + }, + { + "epoch": 93.0, + "eval_accuracy": 0.7575757575757576, + "eval_best_threshold": 0.3204557001590729, + "eval_f1": 0.8059467918622848, + "eval_loss": 0.4740186333656311, + "eval_pr_auc": 0.864116644561191, + "eval_precision": 0.723314606741573, + "eval_recall": 0.9098939929328622, + "eval_roc_auc": 0.8504285730240838, + "eval_runtime": 3.6601, + "eval_samples_per_second": 838.5, + "eval_steps_per_second": 1.639, + "step": 558 + }, + { + "epoch": 93.16666666666667, + "grad_norm": 74287.015625, + "learning_rate": 0.0006447943469535855, + "loss": 0.449, + "step": 559 + }, + { + "epoch": 93.33333333333333, + "grad_norm": 51398.22265625, + "learning_rate": 0.0006434016163555452, + "loss": 0.4631, + "step": 560 + }, + { + "epoch": 93.5, + "grad_norm": 69963.7265625, + "learning_rate": 0.0006420076723519614, + "loss": 0.4422, + "step": 561 + }, + { + "epoch": 93.66666666666667, + "grad_norm": 106419.03125, + "learning_rate": 0.0006406125267378153, + "loss": 0.474, + "step": 562 + }, + { + "epoch": 93.83333333333333, + "grad_norm": 123178.09375, + "learning_rate": 0.0006392161913182559, + "loss": 0.4536, + "step": 563 + }, + { + "epoch": 94.0, + "grad_norm": 218872.578125, + "learning_rate": 0.0006378186779084996, + "loss": 0.489, + "step": 564 + }, + { + "epoch": 94.0, + "eval_accuracy": 0.7774519387422614, + "eval_best_threshold": 0.4742838740348816, + "eval_f1": 0.8142507478923036, + "eval_loss": 0.4738493859767914, + "eval_pr_auc": 0.8734095284617952, + "eval_precision": 0.7564426478019202, + "eval_recall": 0.8816254416961131, + "eval_roc_auc": 0.8591533867879061, + "eval_runtime": 3.6414, + "eval_samples_per_second": 842.802, + "eval_steps_per_second": 1.648, + "step": 564 + }, + { + "epoch": 94.16666666666667, + "grad_norm": 217597.546875, + "learning_rate": 0.0006364199983337305, + "loss": 0.4792, + "step": 565 + }, + { + "epoch": 94.33333333333333, + "grad_norm": 116264.5703125, + "learning_rate": 0.0006350201644290005, + "loss": 0.4711, + "step": 566 + }, + { + "epoch": 94.5, + "grad_norm": 124475.5703125, + "learning_rate": 0.0006336191880391284, + "loss": 0.4516, + "step": 567 + }, + { + "epoch": 94.66666666666667, + "grad_norm": 26623.99609375, + "learning_rate": 0.0006322170810186012, + "loss": 0.4416, + "step": 568 + }, + { + "epoch": 94.83333333333333, + "grad_norm": 85539.7421875, + "learning_rate": 0.0006308138552314717, + "loss": 0.4391, + "step": 569 + }, + { + "epoch": 95.0, + "grad_norm": 116239.9375, + "learning_rate": 0.0006294095225512603, + "loss": 0.4649, + "step": 570 + }, + { + "epoch": 95.0, + "eval_accuracy": 0.7673509286412512, + "eval_best_threshold": 0.4234827756881714, + "eval_f1": 0.80878414568827, + "eval_loss": 0.4616597294807434, + "eval_pr_auc": 0.8767909548555617, + "eval_precision": 0.7416502946954814, + "eval_recall": 0.889281507656066, + "eval_roc_auc": 0.8578664219887129, + "eval_runtime": 3.6402, + "eval_samples_per_second": 843.075, + "eval_steps_per_second": 1.648, + "step": 570 + }, + { + "epoch": 95.16666666666667, + "grad_norm": 53956.39453125, + "learning_rate": 0.0006280040948608529, + "loss": 0.4726, + "step": 571 + }, + { + "epoch": 95.33333333333333, + "grad_norm": 132776.96875, + "learning_rate": 0.000626597584052401, + "loss": 0.4812, + "step": 572 + }, + { + "epoch": 95.5, + "grad_norm": 110310.21875, + "learning_rate": 0.0006251900020272208, + "loss": 0.4419, + "step": 573 + }, + { + "epoch": 95.66666666666667, + "grad_norm": 213141.046875, + "learning_rate": 0.000623781360695693, + "loss": 0.479, + "step": 574 + }, + { + "epoch": 95.83333333333333, + "grad_norm": 265038.9375, + "learning_rate": 0.0006223716719771619, + "loss": 0.5025, + "step": 575 + }, + { + "epoch": 96.0, + "grad_norm": 100577.78125, + "learning_rate": 0.0006209609477998338, + "loss": 0.445, + "step": 576 + }, + { + "epoch": 96.0, + "eval_accuracy": 0.7653958944281525, + "eval_best_threshold": 0.4986065626144409, + "eval_f1": 0.8094229751191107, + "eval_loss": 0.48587265610694885, + "eval_pr_auc": 0.8728447339217204, + "eval_precision": 0.7350961538461539, + "eval_recall": 0.9004711425206124, + "eval_roc_auc": 0.8566103855825578, + "eval_runtime": 3.6655, + "eval_samples_per_second": 837.276, + "eval_steps_per_second": 1.637, + "step": 576 + }, + { + "epoch": 96.16666666666667, + "grad_norm": 218718.828125, + "learning_rate": 0.0006195492001006772, + "loss": 0.4864, + "step": 577 + }, + { + "epoch": 96.33333333333333, + "grad_norm": 238896.375, + "learning_rate": 0.0006181364408253208, + "loss": 0.4732, + "step": 578 + }, + { + "epoch": 96.5, + "grad_norm": 144929.984375, + "learning_rate": 0.0006167226819279528, + "loss": 0.4766, + "step": 579 + }, + { + "epoch": 96.66666666666667, + "grad_norm": 345105.125, + "learning_rate": 0.0006153079353712201, + "loss": 0.5188, + "step": 580 + }, + { + "epoch": 96.83333333333333, + "grad_norm": 405794.65625, + "learning_rate": 0.0006138922131261268, + "loss": 0.5247, + "step": 581 + }, + { + "epoch": 97.0, + "grad_norm": 258567.859375, + "learning_rate": 0.0006124755271719325, + "loss": 0.4781, + "step": 582 + }, + { + "epoch": 97.0, + "eval_accuracy": 0.7758227435646791, + "eval_best_threshold": 0.4811469614505768, + "eval_f1": 0.8101545253863135, + "eval_loss": 0.4663538932800293, + "eval_pr_auc": 0.8747418466683401, + "eval_precision": 0.7622014537902388, + "eval_recall": 0.8645465253239105, + "eval_roc_auc": 0.8566623624653023, + "eval_runtime": 3.6983, + "eval_samples_per_second": 829.85, + "eval_steps_per_second": 1.622, + "step": 582 + }, + { + "epoch": 97.16666666666667, + "grad_norm": 42506.12109375, + "learning_rate": 0.0006110578894960517, + "loss": 0.453, + "step": 583 + }, + { + "epoch": 97.33333333333333, + "grad_norm": 252258.21875, + "learning_rate": 0.0006096393120939516, + "loss": 0.4965, + "step": 584 + }, + { + "epoch": 97.5, + "grad_norm": 216667.84375, + "learning_rate": 0.0006082198069690515, + "loss": 0.4718, + "step": 585 + }, + { + "epoch": 97.66666666666667, + "grad_norm": 63770.66015625, + "learning_rate": 0.0006067993861326201, + "loss": 0.439, + "step": 586 + }, + { + "epoch": 97.83333333333333, + "grad_norm": 117137.3984375, + "learning_rate": 0.0006053780616036748, + "loss": 0.4545, + "step": 587 + }, + { + "epoch": 98.0, + "grad_norm": 81328.671875, + "learning_rate": 0.0006039558454088796, + "loss": 0.4616, + "step": 588 + }, + { + "epoch": 98.0, + "eval_accuracy": 0.7820136852394917, + "eval_best_threshold": 0.5103572607040405, + "eval_f1": 0.8164609053497942, + "eval_loss": 0.46580880880355835, + "eval_pr_auc": 0.8780657638908369, + "eval_precision": 0.7642526964560863, + "eval_recall": 0.8763250883392226, + "eval_roc_auc": 0.8624738934293489, + "eval_runtime": 3.7401, + "eval_samples_per_second": 820.564, + "eval_steps_per_second": 1.604, + "step": 588 + }, + { + "epoch": 98.16666666666667, + "grad_norm": 96831.2265625, + "learning_rate": 0.0006025327495824439, + "loss": 0.4453, + "step": 589 + }, + { + "epoch": 98.33333333333333, + "grad_norm": 140212.84375, + "learning_rate": 0.000601108786166019, + "loss": 0.4752, + "step": 590 + }, + { + "epoch": 98.5, + "grad_norm": 32939.4609375, + "learning_rate": 0.0005996839672085986, + "loss": 0.4524, + "step": 591 + }, + { + "epoch": 98.66666666666667, + "grad_norm": 217863.0625, + "learning_rate": 0.0005982583047664151, + "loss": 0.4733, + "step": 592 + }, + { + "epoch": 98.83333333333333, + "grad_norm": 245090.515625, + "learning_rate": 0.0005968318109028381, + "loss": 0.4867, + "step": 593 + }, + { + "epoch": 99.0, + "grad_norm": 75009.2265625, + "learning_rate": 0.0005954044976882724, + "loss": 0.4464, + "step": 594 + }, + { + "epoch": 99.0, + "eval_accuracy": 0.7813620071684588, + "eval_best_threshold": 0.5514122247695923, + "eval_f1": 0.8162147356888524, + "eval_loss": 0.4710569381713867, + "eval_pr_auc": 0.8805845064525083, + "eval_precision": 0.7629288274449565, + "eval_recall": 0.8775029446407538, + "eval_roc_auc": 0.8641551952397766, + "eval_runtime": 3.6598, + "eval_samples_per_second": 838.564, + "eval_steps_per_second": 1.639, + "step": 594 + }, + { + "epoch": 99.16666666666667, + "grad_norm": 174301.109375, + "learning_rate": 0.0005939763772000559, + "loss": 0.464, + "step": 595 + }, + { + "epoch": 99.33333333333333, + "grad_norm": 214547.453125, + "learning_rate": 0.0005925474615223572, + "loss": 0.4796, + "step": 596 + }, + { + "epoch": 99.5, + "grad_norm": 73995.5703125, + "learning_rate": 0.0005911177627460738, + "loss": 0.4753, + "step": 597 + }, + { + "epoch": 99.66666666666667, + "grad_norm": 254424.515625, + "learning_rate": 0.0005896872929687287, + "loss": 0.4835, + "step": 598 + }, + { + "epoch": 99.83333333333333, + "grad_norm": 196407.71875, + "learning_rate": 0.0005882560642943696, + "loss": 0.4804, + "step": 599 + }, + { + "epoch": 100.0, + "grad_norm": 59902.87890625, + "learning_rate": 0.0005868240888334653, + "loss": 0.4595, + "step": 600 + }, + { + "epoch": 100.0, + "eval_accuracy": 0.7758227435646791, + "eval_best_threshold": 0.5762914419174194, + "eval_f1": 0.8124318429661941, + "eval_loss": 0.48100268840789795, + "eval_pr_auc": 0.8751349295776516, + "eval_precision": 0.7563451776649747, + "eval_recall": 0.8775029446407538, + "eval_roc_auc": 0.8608797925048475, + "eval_runtime": 3.7446, + "eval_samples_per_second": 819.576, + "eval_steps_per_second": 1.602, + "step": 600 + }, + { + "epoch": 100.16666666666667, + "grad_norm": 185637.796875, + "learning_rate": 0.0005853913787028035, + "loss": 0.4682, + "step": 601 + }, + { + "epoch": 100.33333333333333, + "grad_norm": 145423.65625, + "learning_rate": 0.0005839579460253886, + "loss": 0.4791, + "step": 602 + }, + { + "epoch": 100.5, + "grad_norm": 82947.40625, + "learning_rate": 0.0005825238029303388, + "loss": 0.4639, + "step": 603 + }, + { + "epoch": 100.66666666666667, + "grad_norm": 125656.6875, + "learning_rate": 0.0005810889615527838, + "loss": 0.462, + "step": 604 + }, + { + "epoch": 100.83333333333333, + "grad_norm": 42078.859375, + "learning_rate": 0.0005796534340337614, + "loss": 0.4258, + "step": 605 + }, + { + "epoch": 101.0, + "grad_norm": 35350.0625, + "learning_rate": 0.0005782172325201155, + "loss": 0.4494, + "step": 606 + }, + { + "epoch": 101.0, + "eval_accuracy": 0.7741935483870968, + "eval_best_threshold": 0.3599238991737366, + "eval_f1": 0.8139597315436241, + "eval_loss": 0.46240806579589844, + "eval_pr_auc": 0.8759762303440317, + "eval_precision": 0.747903305377405, + "eval_recall": 0.8928150765606596, + "eval_roc_auc": 0.8611521341879879, + "eval_runtime": 3.7767, + "eval_samples_per_second": 812.608, + "eval_steps_per_second": 1.589, + "step": 606 + }, + { + "epoch": 101.16666666666667, + "grad_norm": 99813.265625, + "learning_rate": 0.000576780369164393, + "loss": 0.426, + "step": 607 + }, + { + "epoch": 101.33333333333333, + "grad_norm": 104900.5703125, + "learning_rate": 0.0005753428561247416, + "loss": 0.4602, + "step": 608 + }, + { + "epoch": 101.5, + "grad_norm": 99518.796875, + "learning_rate": 0.0005739047055648054, + "loss": 0.4712, + "step": 609 + }, + { + "epoch": 101.66666666666667, + "grad_norm": 86323.15625, + "learning_rate": 0.0005724659296536234, + "loss": 0.4648, + "step": 610 + }, + { + "epoch": 101.83333333333333, + "grad_norm": 62814.87890625, + "learning_rate": 0.0005710265405655262, + "loss": 0.4333, + "step": 611 + }, + { + "epoch": 102.0, + "grad_norm": 71986.578125, + "learning_rate": 0.0005695865504800327, + "loss": 0.4499, + "step": 612 + }, + { + "epoch": 102.0, + "eval_accuracy": 0.7803844900619095, + "eval_best_threshold": 0.3741658627986908, + "eval_f1": 0.8187197417966648, + "eval_loss": 0.44404909014701843, + "eval_pr_auc": 0.8861759100281312, + "eval_precision": 0.7534653465346535, + "eval_recall": 0.8963486454652533, + "eval_roc_auc": 0.8697824445286384, + "eval_runtime": 3.7218, + "eval_samples_per_second": 824.608, + "eval_steps_per_second": 1.612, + "step": 612 + }, + { + "epoch": 102.16666666666667, + "grad_norm": 43484.6484375, + "learning_rate": 0.0005681459715817473, + "loss": 0.452, + "step": 613 + }, + { + "epoch": 102.33333333333333, + "grad_norm": 138327.375, + "learning_rate": 0.0005667048160602564, + "loss": 0.4387, + "step": 614 + }, + { + "epoch": 102.5, + "grad_norm": 70967.2734375, + "learning_rate": 0.000565263096110026, + "loss": 0.4281, + "step": 615 + }, + { + "epoch": 102.66666666666667, + "grad_norm": 100818.53125, + "learning_rate": 0.0005638208239302974, + "loss": 0.4618, + "step": 616 + }, + { + "epoch": 102.83333333333333, + "grad_norm": 50325.85546875, + "learning_rate": 0.0005623780117249852, + "loss": 0.4533, + "step": 617 + }, + { + "epoch": 103.0, + "grad_norm": 149581.5625, + "learning_rate": 0.0005609346717025737, + "loss": 0.4503, + "step": 618 + }, + { + "epoch": 103.0, + "eval_accuracy": 0.7908113391984359, + "eval_best_threshold": 0.48392462730407715, + "eval_f1": 0.8210702341137124, + "eval_loss": 0.44357526302337646, + "eval_pr_auc": 0.8885960306219365, + "eval_precision": 0.7793650793650794, + "eval_recall": 0.8674911660777385, + "eval_roc_auc": 0.8710221576162457, + "eval_runtime": 3.7582, + "eval_samples_per_second": 816.614, + "eval_steps_per_second": 1.597, + "step": 618 + }, + { + "epoch": 103.16666666666667, + "grad_norm": 82695.859375, + "learning_rate": 0.0005594908160760127, + "loss": 0.4568, + "step": 619 + }, + { + "epoch": 103.33333333333333, + "grad_norm": 107545.03125, + "learning_rate": 0.0005580464570626152, + "loss": 0.4497, + "step": 620 + }, + { + "epoch": 103.5, + "grad_norm": 125753.75, + "learning_rate": 0.0005566016068839535, + "loss": 0.4361, + "step": 621 + }, + { + "epoch": 103.66666666666667, + "grad_norm": 56653.78125, + "learning_rate": 0.0005551562777657559, + "loss": 0.4209, + "step": 622 + }, + { + "epoch": 103.83333333333333, + "grad_norm": 41988.1796875, + "learning_rate": 0.0005537104819378037, + "loss": 0.4091, + "step": 623 + }, + { + "epoch": 104.0, + "grad_norm": 80381.1328125, + "learning_rate": 0.0005522642316338268, + "loss": 0.4407, + "step": 624 + }, + { + "epoch": 104.0, + "eval_accuracy": 0.7914630172694689, + "eval_best_threshold": 0.462395578622818, + "eval_f1": 0.819718309859155, + "eval_loss": 0.44202810525894165, + "eval_pr_auc": 0.8876987764595818, + "eval_precision": 0.7856371490280778, + "eval_recall": 0.8568904593639576, + "eval_roc_auc": 0.871470619315297, + "eval_runtime": 3.7572, + "eval_samples_per_second": 816.841, + "eval_steps_per_second": 1.597, + "step": 624 + }, + { + "epoch": 104.16666666666667, + "grad_norm": 78504.8828125, + "learning_rate": 0.000550817539091401, + "loss": 0.4259, + "step": 625 + }, + { + "epoch": 104.33333333333333, + "grad_norm": 67419.703125, + "learning_rate": 0.000549370416551844, + "loss": 0.4346, + "step": 626 + }, + { + "epoch": 104.5, + "grad_norm": 166526.515625, + "learning_rate": 0.000547922876260112, + "loss": 0.4574, + "step": 627 + }, + { + "epoch": 104.66666666666667, + "grad_norm": 87268.5703125, + "learning_rate": 0.0005464749304646962, + "loss": 0.429, + "step": 628 + }, + { + "epoch": 104.83333333333333, + "grad_norm": 197419.03125, + "learning_rate": 0.0005450265914175187, + "loss": 0.438, + "step": 629 + }, + { + "epoch": 105.0, + "grad_norm": 163018.390625, + "learning_rate": 0.0005435778713738292, + "loss": 0.4512, + "step": 630 + }, + { + "epoch": 105.0, + "eval_accuracy": 0.7875529488432714, + "eval_best_threshold": 0.4379901885986328, + "eval_f1": 0.8206820682068207, + "eval_loss": 0.4458414316177368, + "eval_pr_auc": 0.8821672284197818, + "eval_precision": 0.7698658410732714, + "eval_recall": 0.8786808009422851, + "eval_roc_auc": 0.8695427494825937, + "eval_runtime": 3.7782, + "eval_samples_per_second": 812.298, + "eval_steps_per_second": 1.588, + "step": 630 + }, + { + "epoch": 105.16666666666667, + "grad_norm": 89169.0546875, + "learning_rate": 0.0005421287825921014, + "loss": 0.4349, + "step": 631 + }, + { + "epoch": 105.33333333333333, + "grad_norm": 61004.5703125, + "learning_rate": 0.0005406793373339292, + "loss": 0.4277, + "step": 632 + }, + { + "epoch": 105.5, + "grad_norm": 52870.4921875, + "learning_rate": 0.0005392295478639225, + "loss": 0.4479, + "step": 633 + }, + { + "epoch": 105.66666666666667, + "grad_norm": 127358.6796875, + "learning_rate": 0.0005377794264496041, + "loss": 0.4339, + "step": 634 + }, + { + "epoch": 105.83333333333333, + "grad_norm": 81547.8203125, + "learning_rate": 0.0005363289853613053, + "loss": 0.4399, + "step": 635 + }, + { + "epoch": 106.0, + "grad_norm": 82232.0390625, + "learning_rate": 0.0005348782368720626, + "loss": 0.4348, + "step": 636 + }, + { + "epoch": 106.0, + "eval_accuracy": 0.7882046269143044, + "eval_best_threshold": 0.39316704869270325, + "eval_f1": 0.823177366702938, + "eval_loss": 0.4380728304386139, + "eval_pr_auc": 0.887453308801846, + "eval_precision": 0.7649140546006067, + "eval_recall": 0.8910482921083628, + "eval_roc_auc": 0.8738138746489412, + "eval_runtime": 4.2587, + "eval_samples_per_second": 720.643, + "eval_steps_per_second": 1.409, + "step": 636 + }, + { + "epoch": 106.16666666666667, + "grad_norm": 57085.0390625, + "learning_rate": 0.0005334271932575137, + "loss": 0.4276, + "step": 637 + }, + { + "epoch": 106.33333333333333, + "grad_norm": 73160.03125, + "learning_rate": 0.0005319758667957928, + "loss": 0.4374, + "step": 638 + }, + { + "epoch": 106.5, + "grad_norm": 94396.4921875, + "learning_rate": 0.0005305242697674285, + "loss": 0.4285, + "step": 639 + }, + { + "epoch": 106.66666666666667, + "grad_norm": 44217.97265625, + "learning_rate": 0.0005290724144552379, + "loss": 0.4334, + "step": 640 + }, + { + "epoch": 106.83333333333333, + "grad_norm": 47426.1015625, + "learning_rate": 0.0005276203131442243, + "loss": 0.424, + "step": 641 + }, + { + "epoch": 107.0, + "grad_norm": 45280.8984375, + "learning_rate": 0.000526167978121472, + "loss": 0.4096, + "step": 642 + }, + { + "epoch": 107.0, + "eval_accuracy": 0.7963506028022157, + "eval_best_threshold": 0.4691428244113922, + "eval_f1": 0.826533444351929, + "eval_loss": 0.43517568707466125, + "eval_pr_auc": 0.8899689908598397, + "eval_precision": 0.7816272965879265, + "eval_recall": 0.8769140164899882, + "eval_roc_auc": 0.8777233094411498, + "eval_runtime": 3.8895, + "eval_samples_per_second": 789.042, + "eval_steps_per_second": 1.543, + "step": 642 + }, + { + "epoch": 107.16666666666667, + "grad_norm": 79869.1796875, + "learning_rate": 0.0005247154216760431, + "loss": 0.4257, + "step": 643 + }, + { + "epoch": 107.33333333333333, + "grad_norm": 120783.3203125, + "learning_rate": 0.0005232626560988735, + "loss": 0.4233, + "step": 644 + }, + { + "epoch": 107.5, + "grad_norm": 64844.3515625, + "learning_rate": 0.0005218096936826681, + "loss": 0.4262, + "step": 645 + }, + { + "epoch": 107.66666666666667, + "grad_norm": 144929.3125, + "learning_rate": 0.000520356546721798, + "loss": 0.4377, + "step": 646 + }, + { + "epoch": 107.83333333333333, + "grad_norm": 81265.203125, + "learning_rate": 0.0005189032275121955, + "loss": 0.4215, + "step": 647 + }, + { + "epoch": 108.0, + "grad_norm": 280190.40625, + "learning_rate": 0.0005174497483512506, + "loss": 0.4443, + "step": 648 + }, + { + "epoch": 108.0, + "eval_accuracy": 0.7956989247311828, + "eval_best_threshold": 0.5434492230415344, + "eval_f1": 0.8280778722237455, + "eval_loss": 0.46423980593681335, + "eval_pr_auc": 0.891939864139048, + "eval_precision": 0.7747562852744997, + "eval_recall": 0.889281507656066, + "eval_roc_auc": 0.8753121834672275, + "eval_runtime": 3.7395, + "eval_samples_per_second": 820.687, + "eval_steps_per_second": 1.604, + "step": 648 + }, + { + "epoch": 108.16666666666667, + "grad_norm": 245879.5, + "learning_rate": 0.0005159961215377065, + "loss": 0.4513, + "step": 649 + }, + { + "epoch": 108.33333333333333, + "grad_norm": 51109.69921875, + "learning_rate": 0.0005145423593715557, + "loss": 0.4371, + "step": 650 + }, + { + "epoch": 108.5, + "grad_norm": 309565.03125, + "learning_rate": 0.0005130884741539367, + "loss": 0.4463, + "step": 651 + }, + { + "epoch": 108.66666666666667, + "grad_norm": 229982.4375, + "learning_rate": 0.0005116344781870281, + "loss": 0.4492, + "step": 652 + }, + { + "epoch": 108.83333333333333, + "grad_norm": 80197.65625, + "learning_rate": 0.0005101803837739468, + "loss": 0.4464, + "step": 653 + }, + { + "epoch": 109.0, + "grad_norm": 138026.71875, + "learning_rate": 0.0005087262032186418, + "loss": 0.449, + "step": 654 + }, + { + "epoch": 109.0, + "eval_accuracy": 0.7927663734115347, + "eval_best_threshold": 0.4488154947757721, + "eval_f1": 0.8223463687150838, + "eval_loss": 0.43212890625, + "eval_pr_auc": 0.8937612583840494, + "eval_precision": 0.7821466524973433, + "eval_recall": 0.866902237926973, + "eval_roc_auc": 0.8779436742415456, + "eval_runtime": 3.8059, + "eval_samples_per_second": 806.371, + "eval_steps_per_second": 1.576, + "step": 654 + }, + { + "epoch": 109.16666666666667, + "grad_norm": 61663.25390625, + "learning_rate": 0.0005072719488257914, + "loss": 0.4213, + "step": 655 + }, + { + "epoch": 109.33333333333333, + "grad_norm": 72922.8671875, + "learning_rate": 0.0005058176329006986, + "loss": 0.4282, + "step": 656 + }, + { + "epoch": 109.5, + "grad_norm": 150465.5625, + "learning_rate": 0.0005043632677491869, + "loss": 0.4205, + "step": 657 + }, + { + "epoch": 109.66666666666667, + "grad_norm": 131988.890625, + "learning_rate": 0.000502908865677497, + "loss": 0.435, + "step": 658 + }, + { + "epoch": 109.83333333333333, + "grad_norm": 119979.9375, + "learning_rate": 0.0005014544389921811, + "loss": 0.4199, + "step": 659 + }, + { + "epoch": 110.0, + "grad_norm": 69464.1171875, + "learning_rate": 0.0005, + "loss": 0.4125, + "step": 660 + }, + { + "epoch": 110.0, + "eval_accuracy": 0.7859237536656891, + "eval_best_threshold": 0.3834899067878723, + "eval_f1": 0.8240963855421687, + "eval_loss": 0.4334057867527008, + "eval_pr_auc": 0.8916199472812893, + "eval_precision": 0.7555228276877761, + "eval_recall": 0.9063604240282686, + "eval_roc_auc": 0.8792104496730611, + "eval_runtime": 3.7717, + "eval_samples_per_second": 813.687, + "eval_steps_per_second": 1.591, + "step": 660 + }, + { + "epoch": 110.16666666666667, + "grad_norm": 87378.875, + "learning_rate": 0.0004985455610078191, + "loss": 0.4207, + "step": 661 + }, + { + "epoch": 110.33333333333333, + "grad_norm": 71451.0546875, + "learning_rate": 0.0004970911343225031, + "loss": 0.4227, + "step": 662 + }, + { + "epoch": 110.5, + "grad_norm": 115093.7578125, + "learning_rate": 0.0004956367322508131, + "loss": 0.4317, + "step": 663 + }, + { + "epoch": 110.66666666666667, + "grad_norm": 77691.953125, + "learning_rate": 0.0004941823670993016, + "loss": 0.4072, + "step": 664 + }, + { + "epoch": 110.83333333333333, + "grad_norm": 123772.8359375, + "learning_rate": 0.0004927280511742086, + "loss": 0.4193, + "step": 665 + }, + { + "epoch": 111.0, + "grad_norm": 71867.2109375, + "learning_rate": 0.0004912737967813582, + "loss": 0.4164, + "step": 666 + }, + { + "epoch": 111.0, + "eval_accuracy": 0.7924405343760182, + "eval_best_threshold": 0.3640596866607666, + "eval_f1": 0.8279773156899811, + "eval_loss": 0.42901697754859924, + "eval_pr_auc": 0.8968072773162334, + "eval_precision": 0.7645885286783043, + "eval_recall": 0.9028268551236749, + "eval_roc_auc": 0.8803616731916986, + "eval_runtime": 3.7708, + "eval_samples_per_second": 813.877, + "eval_steps_per_second": 1.591, + "step": 666 + }, + { + "epoch": 111.16666666666667, + "grad_norm": 63211.99609375, + "learning_rate": 0.0004898196162260531, + "loss": 0.422, + "step": 667 + }, + { + "epoch": 111.33333333333333, + "grad_norm": 112762.171875, + "learning_rate": 0.0004883655218129719, + "loss": 0.4188, + "step": 668 + }, + { + "epoch": 111.5, + "grad_norm": 165300.15625, + "learning_rate": 0.0004869115258460635, + "loss": 0.4226, + "step": 669 + }, + { + "epoch": 111.66666666666667, + "grad_norm": 96502.1953125, + "learning_rate": 0.0004854576406284443, + "loss": 0.4226, + "step": 670 + }, + { + "epoch": 111.83333333333333, + "grad_norm": 103810.5546875, + "learning_rate": 0.0004840038784622937, + "loss": 0.4239, + "step": 671 + }, + { + "epoch": 112.0, + "grad_norm": 110581.7421875, + "learning_rate": 0.0004825502516487497, + "loss": 0.4106, + "step": 672 + }, + { + "epoch": 112.0, + "eval_accuracy": 0.7917888563049853, + "eval_best_threshold": 0.3643791079521179, + "eval_f1": 0.8244023083264633, + "eval_loss": 0.43213963508605957, + "eval_pr_auc": 0.8929182333546763, + "eval_precision": 0.7727975270479135, + "eval_recall": 0.8833922261484098, + "eval_roc_auc": 0.8802027356163642, + "eval_runtime": 3.8148, + "eval_samples_per_second": 804.496, + "eval_steps_per_second": 1.573, + "step": 672 + }, + { + "epoch": 112.16666666666667, + "grad_norm": 70169.6171875, + "learning_rate": 0.0004810967724878046, + "loss": 0.4186, + "step": 673 + }, + { + "epoch": 112.33333333333333, + "grad_norm": 100069.8359375, + "learning_rate": 0.0004796434532782021, + "loss": 0.4334, + "step": 674 + }, + { + "epoch": 112.5, + "grad_norm": 54957.6328125, + "learning_rate": 0.0004781903063173321, + "loss": 0.3984, + "step": 675 + }, + { + "epoch": 112.66666666666667, + "grad_norm": 144818.28125, + "learning_rate": 0.0004767373439011267, + "loss": 0.4173, + "step": 676 + }, + { + "epoch": 112.83333333333333, + "grad_norm": 128438.8046875, + "learning_rate": 0.00047528457832395705, + "loss": 0.4267, + "step": 677 + }, + { + "epoch": 113.0, + "grad_norm": 191549.34375, + "learning_rate": 0.0004738320218785281, + "loss": 0.434, + "step": 678 + }, + { + "epoch": 113.0, + "eval_accuracy": 0.7898338220918866, + "eval_best_threshold": 0.5195454955101013, + "eval_f1": 0.8216754216201272, + "eval_loss": 0.44215479493141174, + "eval_pr_auc": 0.895288529089388, + "eval_precision": 0.7743616466909848, + "eval_recall": 0.8751472320376914, + "eval_roc_auc": 0.8801997286892632, + "eval_runtime": 3.8745, + "eval_samples_per_second": 792.093, + "eval_steps_per_second": 1.549, + "step": 678 + }, + { + "epoch": 113.16666666666667, + "grad_norm": 180644.59375, + "learning_rate": 0.0004723796868557758, + "loss": 0.4344, + "step": 679 + }, + { + "epoch": 113.33333333333333, + "grad_norm": 89088.8515625, + "learning_rate": 0.0004709275855447621, + "loss": 0.4423, + "step": 680 + }, + { + "epoch": 113.5, + "grad_norm": 154950.390625, + "learning_rate": 0.00046947573023257153, + "loss": 0.4279, + "step": 681 + }, + { + "epoch": 113.66666666666667, + "grad_norm": 75882.7578125, + "learning_rate": 0.0004680241332042072, + "loss": 0.4217, + "step": 682 + }, + { + "epoch": 113.83333333333333, + "grad_norm": 178440.0, + "learning_rate": 0.00046657280674248646, + "loss": 0.4251, + "step": 683 + }, + { + "epoch": 114.0, + "grad_norm": 185205.1875, + "learning_rate": 0.00046512176312793734, + "loss": 0.4287, + "step": 684 + }, + { + "epoch": 114.0, + "eval_accuracy": 0.7973281199087651, + "eval_best_threshold": 0.43903276324272156, + "eval_f1": 0.827318156579678, + "eval_loss": 0.429295152425766, + "eval_pr_auc": 0.8894820330082692, + "eval_precision": 0.782563025210084, + "eval_recall": 0.8775029446407538, + "eval_roc_auc": 0.8793955904702748, + "eval_runtime": 3.9972, + "eval_samples_per_second": 767.793, + "eval_steps_per_second": 1.501, + "step": 684 + }, + { + "epoch": 114.16666666666667, + "grad_norm": 57033.11328125, + "learning_rate": 0.0004636710146386947, + "loss": 0.4343, + "step": 685 + }, + { + "epoch": 114.33333333333333, + "grad_norm": 190587.890625, + "learning_rate": 0.0004622205735503961, + "loss": 0.4285, + "step": 686 + }, + { + "epoch": 114.5, + "grad_norm": 188324.6875, + "learning_rate": 0.0004607704521360776, + "loss": 0.427, + "step": 687 + }, + { + "epoch": 114.66666666666667, + "grad_norm": 77043.28125, + "learning_rate": 0.00045932066266607093, + "loss": 0.4032, + "step": 688 + }, + { + "epoch": 114.83333333333333, + "grad_norm": 104234.28125, + "learning_rate": 0.0004578712174078986, + "loss": 0.4, + "step": 689 + }, + { + "epoch": 115.0, + "grad_norm": 109458.25, + "learning_rate": 0.00045642212862617086, + "loss": 0.4238, + "step": 690 + }, + { + "epoch": 115.0, + "eval_accuracy": 0.7983056370153144, + "eval_best_threshold": 0.35520094633102417, + "eval_f1": 0.8281987232861504, + "eval_loss": 0.43845149874687195, + "eval_pr_auc": 0.8946518775878574, + "eval_precision": 0.7832020997375329, + "eval_recall": 0.8786808009422851, + "eval_roc_auc": 0.8802770496718584, + "eval_runtime": 3.8083, + "eval_samples_per_second": 805.875, + "eval_steps_per_second": 1.576, + "step": 690 + }, + { + "epoch": 115.16666666666667, + "grad_norm": 164871.96875, + "learning_rate": 0.0004549734085824816, + "loss": 0.4166, + "step": 691 + }, + { + "epoch": 115.33333333333333, + "grad_norm": 112055.2265625, + "learning_rate": 0.0004535250695353039, + "loss": 0.4082, + "step": 692 + }, + { + "epoch": 115.5, + "grad_norm": 155252.046875, + "learning_rate": 0.000452077123739888, + "loss": 0.4193, + "step": 693 + }, + { + "epoch": 115.66666666666667, + "grad_norm": 116719.890625, + "learning_rate": 0.0004506295834481561, + "loss": 0.4435, + "step": 694 + }, + { + "epoch": 115.83333333333333, + "grad_norm": 128234.1640625, + "learning_rate": 0.00044918246090859906, + "loss": 0.4177, + "step": 695 + }, + { + "epoch": 116.0, + "grad_norm": 184487.40625, + "learning_rate": 0.00044773576836617336, + "loss": 0.4256, + "step": 696 + }, + { + "epoch": 116.0, + "eval_accuracy": 0.797979797979798, + "eval_best_threshold": 0.4464789927005768, + "eval_f1": 0.8266219239373602, + "eval_loss": 0.42530736327171326, + "eval_pr_auc": 0.8979053279883333, + "eval_precision": 0.7870074547390842, + "eval_recall": 0.8704358068315665, + "eval_roc_auc": 0.882127168960952, + "eval_runtime": 3.8355, + "eval_samples_per_second": 800.146, + "eval_steps_per_second": 1.564, + "step": 696 + }, + { + "epoch": 116.16666666666667, + "grad_norm": 107760.5078125, + "learning_rate": 0.0004462895180621965, + "loss": 0.405, + "step": 697 + }, + { + "epoch": 116.33333333333333, + "grad_norm": 200850.09375, + "learning_rate": 0.00044484372223424414, + "loss": 0.4161, + "step": 698 + }, + { + "epoch": 116.5, + "grad_norm": 245987.421875, + "learning_rate": 0.00044339839311604667, + "loss": 0.4531, + "step": 699 + }, + { + "epoch": 116.66666666666667, + "grad_norm": 103315.1171875, + "learning_rate": 0.0004419535429373848, + "loss": 0.4008, + "step": 700 + }, + { + "epoch": 116.83333333333333, + "grad_norm": 121910.796875, + "learning_rate": 0.0004405091839239874, + "loss": 0.4229, + "step": 701 + }, + { + "epoch": 117.0, + "grad_norm": 148540.953125, + "learning_rate": 0.00043906532829742634, + "loss": 0.4003, + "step": 702 + }, + { + "epoch": 117.0, + "eval_accuracy": 0.7940697295536006, + "eval_best_threshold": 0.35628262162208557, + "eval_f1": 0.8281674823273518, + "eval_loss": 0.41951417922973633, + "eval_pr_auc": 0.9025554076344122, + "eval_precision": 0.7691919191919192, + "eval_recall": 0.8969375736160189, + "eval_roc_auc": 0.8862505251383402, + "eval_runtime": 3.8482, + "eval_samples_per_second": 797.513, + "eval_steps_per_second": 1.559, + "step": 702 + }, + { + "epoch": 117.16666666666667, + "grad_norm": 61846.95703125, + "learning_rate": 0.00043762198827501463, + "loss": 0.418, + "step": 703 + }, + { + "epoch": 117.33333333333333, + "grad_norm": 62604.359375, + "learning_rate": 0.0004361791760697027, + "loss": 0.4135, + "step": 704 + }, + { + "epoch": 117.5, + "grad_norm": 64781.78125, + "learning_rate": 0.00043473690388997434, + "loss": 0.4109, + "step": 705 + }, + { + "epoch": 117.66666666666667, + "grad_norm": 78247.5859375, + "learning_rate": 0.00043329518393974364, + "loss": 0.4041, + "step": 706 + }, + { + "epoch": 117.83333333333333, + "grad_norm": 120075.2890625, + "learning_rate": 0.00043185402841825285, + "loss": 0.3946, + "step": 707 + }, + { + "epoch": 118.0, + "grad_norm": 133093.421875, + "learning_rate": 0.0004304134495199674, + "loss": 0.3965, + "step": 708 + }, + { + "epoch": 118.0, + "eval_accuracy": 0.8041707396546106, + "eval_best_threshold": 0.3818773925304413, + "eval_f1": 0.830847171404447, + "eval_loss": 0.4185047745704651, + "eval_pr_auc": 0.9035732506643357, + "eval_precision": 0.7956873315363882, + "eval_recall": 0.8692579505300353, + "eval_roc_auc": 0.888738542533843, + "eval_runtime": 3.9051, + "eval_samples_per_second": 785.905, + "eval_steps_per_second": 1.536, + "step": 708 + }, + { + "epoch": 118.16666666666667, + "grad_norm": 106172.3125, + "learning_rate": 0.00042897345943447373, + "loss": 0.4074, + "step": 709 + }, + { + "epoch": 118.33333333333333, + "grad_norm": 66229.796875, + "learning_rate": 0.0004275340703463767, + "loss": 0.3781, + "step": 710 + }, + { + "epoch": 118.5, + "grad_norm": 49387.58984375, + "learning_rate": 0.0004260952944351947, + "loss": 0.3981, + "step": 711 + }, + { + "epoch": 118.66666666666667, + "grad_norm": 282733.625, + "learning_rate": 0.00042465714387525844, + "loss": 0.4425, + "step": 712 + }, + { + "epoch": 118.83333333333333, + "grad_norm": 188722.796875, + "learning_rate": 0.000423219630835607, + "loss": 0.4469, + "step": 713 + }, + { + "epoch": 119.0, + "grad_norm": 162376.21875, + "learning_rate": 0.0004217827674798845, + "loss": 0.4318, + "step": 714 + }, + { + "epoch": 119.0, + "eval_accuracy": 0.8106875203649397, + "eval_best_threshold": 0.5838644504547119, + "eval_f1": 0.8339525578736782, + "eval_loss": 0.4320233464241028, + "eval_pr_auc": 0.9027477013201601, + "eval_precision": 0.810105496946141, + "eval_recall": 0.85924617196702, + "eval_roc_auc": 0.8869300906631478, + "eval_runtime": 3.9503, + "eval_samples_per_second": 776.896, + "eval_steps_per_second": 1.519, + "step": 714 + }, + { + "epoch": 119.16666666666667, + "grad_norm": 147409.125, + "learning_rate": 0.00042034656596623876, + "loss": 0.4207, + "step": 715 + }, + { + "epoch": 119.33333333333333, + "grad_norm": 55680.04296875, + "learning_rate": 0.00041891103844721633, + "loss": 0.3979, + "step": 716 + }, + { + "epoch": 119.5, + "grad_norm": 169613.6875, + "learning_rate": 0.00041747619706966117, + "loss": 0.4194, + "step": 717 + }, + { + "epoch": 119.66666666666667, + "grad_norm": 159846.125, + "learning_rate": 0.0004160420539746115, + "loss": 0.4207, + "step": 718 + }, + { + "epoch": 119.83333333333333, + "grad_norm": 62629.86328125, + "learning_rate": 0.00041460862129719675, + "loss": 0.4122, + "step": 719 + }, + { + "epoch": 120.0, + "grad_norm": 70529.6953125, + "learning_rate": 0.00041317591116653486, + "loss": 0.3997, + "step": 720 + }, + { + "epoch": 120.0, + "eval_accuracy": 0.8071032909742587, + "eval_best_threshold": 0.43030133843421936, + "eval_f1": 0.8332394366197183, + "eval_loss": 0.41646894812583923, + "eval_pr_auc": 0.9015496978574711, + "eval_precision": 0.7985961123110151, + "eval_recall": 0.8710247349823321, + "eval_roc_auc": 0.8878643858695044, + "eval_runtime": 3.9469, + "eval_samples_per_second": 777.573, + "eval_steps_per_second": 1.52, + "step": 720 + }, + { + "epoch": 120.16666666666667, + "grad_norm": 65910.9921875, + "learning_rate": 0.0004117439357056305, + "loss": 0.3941, + "step": 721 + }, + { + "epoch": 120.33333333333333, + "grad_norm": 76004.28125, + "learning_rate": 0.0004103127070312713, + "loss": 0.4048, + "step": 722 + }, + { + "epoch": 120.5, + "grad_norm": 169147.484375, + "learning_rate": 0.00040888223725392626, + "loss": 0.4036, + "step": 723 + }, + { + "epoch": 120.66666666666667, + "grad_norm": 118840.078125, + "learning_rate": 0.0004074525384776428, + "loss": 0.4056, + "step": 724 + }, + { + "epoch": 120.83333333333333, + "grad_norm": 267710.28125, + "learning_rate": 0.0004060236227999441, + "loss": 0.4361, + "step": 725 + }, + { + "epoch": 121.0, + "grad_norm": 303997.96875, + "learning_rate": 0.0004045955023117276, + "loss": 0.432, + "step": 726 + }, + { + "epoch": 121.0, + "eval_accuracy": 0.7908113391984359, + "eval_best_threshold": 0.33801013231277466, + "eval_f1": 0.8248772504091653, + "eval_loss": 0.4277116358280182, + "eval_pr_auc": 0.9012340785625325, + "eval_precision": 0.7682926829268293, + "eval_recall": 0.8904593639575972, + "eval_roc_auc": 0.8837801197444284, + "eval_runtime": 3.8194, + "eval_samples_per_second": 803.521, + "eval_steps_per_second": 1.571, + "step": 726 + }, + { + "epoch": 121.16666666666667, + "grad_norm": 138695.34375, + "learning_rate": 0.00040316818909716206, + "loss": 0.4303, + "step": 727 + }, + { + "epoch": 121.33333333333333, + "grad_norm": 116743.2265625, + "learning_rate": 0.0004017416952335849, + "loss": 0.3981, + "step": 728 + }, + { + "epoch": 121.5, + "grad_norm": 90517.8828125, + "learning_rate": 0.0004003160327914015, + "loss": 0.392, + "step": 729 + }, + { + "epoch": 121.66666666666667, + "grad_norm": 116381.640625, + "learning_rate": 0.00039889121383398113, + "loss": 0.4085, + "step": 730 + }, + { + "epoch": 121.83333333333333, + "grad_norm": 98319.7421875, + "learning_rate": 0.00039746725041755625, + "loss": 0.4026, + "step": 731 + }, + { + "epoch": 122.0, + "grad_norm": 110849.65625, + "learning_rate": 0.0003960441545911204, + "loss": 0.4035, + "step": 732 + }, + { + "epoch": 122.0, + "eval_accuracy": 0.8002606712284132, + "eval_best_threshold": 0.4917587339878082, + "eval_f1": 0.8283394007280873, + "eval_loss": 0.4172866940498352, + "eval_pr_auc": 0.9087252692436502, + "eval_precision": 0.789642285104111, + "eval_recall": 0.8710247349823321, + "eval_roc_auc": 0.8915839546933406, + "eval_runtime": 3.8054, + "eval_samples_per_second": 806.475, + "eval_steps_per_second": 1.577, + "step": 732 + }, + { + "epoch": 122.16666666666667, + "grad_norm": 174083.859375, + "learning_rate": 0.0003946219383963254, + "loss": 0.4315, + "step": 733 + }, + { + "epoch": 122.33333333333333, + "grad_norm": 55083.375, + "learning_rate": 0.00039320061386738007, + "loss": 0.3932, + "step": 734 + }, + { + "epoch": 122.5, + "grad_norm": 136627.65625, + "learning_rate": 0.0003917801930309486, + "loss": 0.3989, + "step": 735 + }, + { + "epoch": 122.66666666666667, + "grad_norm": 64387.43359375, + "learning_rate": 0.0003903606879060483, + "loss": 0.4018, + "step": 736 + }, + { + "epoch": 122.83333333333333, + "grad_norm": 127286.59375, + "learning_rate": 0.00038894211050394847, + "loss": 0.4137, + "step": 737 + }, + { + "epoch": 123.0, + "grad_norm": 135965.734375, + "learning_rate": 0.0003875244728280676, + "loss": 0.3932, + "step": 738 + }, + { + "epoch": 123.0, + "eval_accuracy": 0.8084066471163245, + "eval_best_threshold": 0.43941736221313477, + "eval_f1": 0.8357541899441341, + "eval_loss": 0.41049131751060486, + "eval_pr_auc": 0.9031017937895613, + "eval_precision": 0.7948990435706695, + "eval_recall": 0.8810365135453475, + "eval_roc_auc": 0.8904202739052852, + "eval_runtime": 3.958, + "eval_samples_per_second": 775.396, + "eval_steps_per_second": 1.516, + "step": 738 + }, + { + "epoch": 123.16666666666667, + "grad_norm": 83984.4375, + "learning_rate": 0.00038610778687387325, + "loss": 0.4004, + "step": 739 + }, + { + "epoch": 123.33333333333333, + "grad_norm": 118075.1640625, + "learning_rate": 0.00038469206462877993, + "loss": 0.4059, + "step": 740 + }, + { + "epoch": 123.5, + "grad_norm": 69527.3828125, + "learning_rate": 0.00038327731807204744, + "loss": 0.3804, + "step": 741 + }, + { + "epoch": 123.66666666666667, + "grad_norm": 115279.078125, + "learning_rate": 0.0003818635591746793, + "loss": 0.3858, + "step": 742 + }, + { + "epoch": 123.83333333333333, + "grad_norm": 90124.2890625, + "learning_rate": 0.0003804507998993229, + "loss": 0.418, + "step": 743 + }, + { + "epoch": 124.0, + "grad_norm": 197003.6875, + "learning_rate": 0.0003790390522001662, + "loss": 0.4173, + "step": 744 + }, + { + "epoch": 124.0, + "eval_accuracy": 0.8012381883349625, + "eval_best_threshold": 0.2784575819969177, + "eval_f1": 0.8348673524634542, + "eval_loss": 0.43013083934783936, + "eval_pr_auc": 0.9060761496011254, + "eval_precision": 0.7725450901803608, + "eval_recall": 0.9081272084805654, + "eval_roc_auc": 0.8903390868735606, + "eval_runtime": 3.8243, + "eval_samples_per_second": 802.5, + "eval_steps_per_second": 1.569, + "step": 744 + }, + { + "epoch": 124.16666666666667, + "grad_norm": 315667.375, + "learning_rate": 0.0003776283280228381, + "loss": 0.4234, + "step": 745 + }, + { + "epoch": 124.33333333333333, + "grad_norm": 58933.265625, + "learning_rate": 0.00037621863930430713, + "loss": 0.3804, + "step": 746 + }, + { + "epoch": 124.5, + "grad_norm": 193578.453125, + "learning_rate": 0.0003748099979727792, + "loss": 0.3968, + "step": 747 + }, + { + "epoch": 124.66666666666667, + "grad_norm": 213663.515625, + "learning_rate": 0.00037340241594759917, + "loss": 0.4235, + "step": 748 + }, + { + "epoch": 124.83333333333333, + "grad_norm": 92057.046875, + "learning_rate": 0.00037199590513914714, + "loss": 0.3882, + "step": 749 + }, + { + "epoch": 125.0, + "grad_norm": 58977.01171875, + "learning_rate": 0.0003705904774487396, + "loss": 0.3975, + "step": 750 + }, + { + "epoch": 125.0, + "eval_accuracy": 0.8113391984359726, + "eval_best_threshold": 0.42779427766799927, + "eval_f1": 0.8374052232518955, + "eval_loss": 0.39996033906936646, + "eval_pr_auc": 0.9114790385715704, + "eval_precision": 0.8003220611916264, + "eval_recall": 0.8780918727915195, + "eval_roc_auc": 0.8961544838867368, + "eval_runtime": 3.8438, + "eval_samples_per_second": 798.422, + "eval_steps_per_second": 1.561, + "step": 750 + }, + { + "epoch": 125.16666666666667, + "grad_norm": 56553.06640625, + "learning_rate": 0.0003691861447685283, + "loss": 0.4024, + "step": 751 + }, + { + "epoch": 125.33333333333333, + "grad_norm": 85294.8671875, + "learning_rate": 0.00036778291898139905, + "loss": 0.4007, + "step": 752 + }, + { + "epoch": 125.5, + "grad_norm": 55662.74609375, + "learning_rate": 0.0003663808119608716, + "loss": 0.3915, + "step": 753 + }, + { + "epoch": 125.66666666666667, + "grad_norm": 59557.08984375, + "learning_rate": 0.0003649798355709997, + "loss": 0.3726, + "step": 754 + }, + { + "epoch": 125.83333333333333, + "grad_norm": 185125.28125, + "learning_rate": 0.0003635800016662696, + "loss": 0.4103, + "step": 755 + }, + { + "epoch": 126.0, + "grad_norm": 95126.6640625, + "learning_rate": 0.00036218132209150044, + "loss": 0.3871, + "step": 756 + }, + { + "epoch": 126.0, + "eval_accuracy": 0.8074291300097751, + "eval_best_threshold": 0.39693698287010193, + "eval_f1": 0.8332863187588152, + "eval_loss": 0.4095918834209442, + "eval_pr_auc": 0.9093813330693225, + "eval_precision": 0.7996751488900921, + "eval_recall": 0.8698468786808009, + "eval_roc_auc": 0.8925401575114328, + "eval_runtime": 3.6274, + "eval_samples_per_second": 846.051, + "eval_steps_per_second": 1.654, + "step": 756 + }, + { + "epoch": 126.16666666666667, + "grad_norm": 111067.421875, + "learning_rate": 0.00036078380868174423, + "loss": 0.4079, + "step": 757 + }, + { + "epoch": 126.33333333333333, + "grad_norm": 161514.3125, + "learning_rate": 0.00035938747326218467, + "loss": 0.4236, + "step": 758 + }, + { + "epoch": 126.5, + "grad_norm": 105326.0703125, + "learning_rate": 0.0003579923276480387, + "loss": 0.3951, + "step": 759 + }, + { + "epoch": 126.66666666666667, + "grad_norm": 153800.453125, + "learning_rate": 0.00035659838364445503, + "loss": 0.3889, + "step": 760 + }, + { + "epoch": 126.83333333333333, + "grad_norm": 164338.90625, + "learning_rate": 0.0003552056530464146, + "loss": 0.4136, + "step": 761 + }, + { + "epoch": 127.0, + "grad_norm": 68888.59375, + "learning_rate": 0.00035381414763863166, + "loss": 0.4016, + "step": 762 + }, + { + "epoch": 127.0, + "eval_accuracy": 0.8080808080808081, + "eval_best_threshold": 0.3385215401649475, + "eval_f1": 0.8349677780891006, + "eval_loss": 0.4206610918045044, + "eval_pr_auc": 0.9069561308305243, + "eval_precision": 0.7963655799037948, + "eval_recall": 0.8775029446407538, + "eval_roc_auc": 0.8931857877161014, + "eval_runtime": 3.6927, + "eval_samples_per_second": 831.105, + "eval_steps_per_second": 1.625, + "step": 762 + }, + { + "epoch": 127.16666666666667, + "grad_norm": 190998.125, + "learning_rate": 0.00035242387919545313, + "loss": 0.3974, + "step": 763 + }, + { + "epoch": 127.33333333333333, + "grad_norm": 243973.671875, + "learning_rate": 0.000351034859480759, + "loss": 0.3905, + "step": 764 + }, + { + "epoch": 127.5, + "grad_norm": 100218.6484375, + "learning_rate": 0.0003496471002478635, + "loss": 0.4081, + "step": 765 + }, + { + "epoch": 127.66666666666667, + "grad_norm": 171532.484375, + "learning_rate": 0.00034826061323941484, + "loss": 0.3994, + "step": 766 + }, + { + "epoch": 127.83333333333333, + "grad_norm": 133002.640625, + "learning_rate": 0.00034687541018729604, + "loss": 0.4055, + "step": 767 + }, + { + "epoch": 128.0, + "grad_norm": 74934.3359375, + "learning_rate": 0.00034549150281252633, + "loss": 0.4062, + "step": 768 + }, + { + "epoch": 128.0, + "eval_accuracy": 0.8028673835125448, + "eval_best_threshold": 0.3128413259983063, + "eval_f1": 0.8354636932281752, + "eval_loss": 0.4116682708263397, + "eval_pr_auc": 0.9072100448142456, + "eval_precision": 0.7761495704901465, + "eval_recall": 0.9045936395759717, + "eval_roc_auc": 0.8940595148194255, + "eval_runtime": 3.6699, + "eval_samples_per_second": 836.257, + "eval_steps_per_second": 1.635, + "step": 768 + }, + { + "epoch": 128.16666666666666, + "grad_norm": 187347.5625, + "learning_rate": 0.000344108902825161, + "loss": 0.416, + "step": 769 + }, + { + "epoch": 128.33333333333334, + "grad_norm": 81035.7890625, + "learning_rate": 0.00034272762192419327, + "loss": 0.3729, + "step": 770 + }, + { + "epoch": 128.5, + "grad_norm": 162363.390625, + "learning_rate": 0.000341347671797454, + "loss": 0.4092, + "step": 771 + }, + { + "epoch": 128.66666666666666, + "grad_norm": 112333.1328125, + "learning_rate": 0.00033996906412151417, + "loss": 0.398, + "step": 772 + }, + { + "epoch": 128.83333333333334, + "grad_norm": 106592.90625, + "learning_rate": 0.00033859181056158564, + "loss": 0.4205, + "step": 773 + }, + { + "epoch": 129.0, + "grad_norm": 107416.6640625, + "learning_rate": 0.00033721592277142175, + "loss": 0.4021, + "step": 774 + }, + { + "epoch": 129.0, + "eval_accuracy": 0.8123167155425219, + "eval_best_threshold": 0.40762990713119507, + "eval_f1": 0.8406198118428334, + "eval_loss": 0.39988085627555847, + "eval_pr_auc": 0.9105294646161908, + "eval_precision": 0.7927974947807933, + "eval_recall": 0.8945818610129564, + "eval_roc_auc": 0.8966293635881746, + "eval_runtime": 3.5481, + "eval_samples_per_second": 864.965, + "eval_steps_per_second": 1.691, + "step": 774 + }, + { + "epoch": 129.16666666666666, + "grad_norm": 41239.49609375, + "learning_rate": 0.00033584141239321953, + "loss": 0.3802, + "step": 775 + }, + { + "epoch": 129.33333333333334, + "grad_norm": 85460.1328125, + "learning_rate": 0.000334468291057521, + "loss": 0.401, + "step": 776 + }, + { + "epoch": 129.5, + "grad_norm": 89357.3984375, + "learning_rate": 0.00033309657038311456, + "loss": 0.4056, + "step": 777 + }, + { + "epoch": 129.66666666666666, + "grad_norm": 111458.8046875, + "learning_rate": 0.0003317262619769368, + "loss": 0.3956, + "step": 778 + }, + { + "epoch": 129.83333333333334, + "grad_norm": 109909.875, + "learning_rate": 0.00033035737743397446, + "loss": 0.4093, + "step": 779 + }, + { + "epoch": 130.0, + "grad_norm": 50348.46484375, + "learning_rate": 0.0003289899283371657, + "loss": 0.3804, + "step": 780 + }, + { + "epoch": 130.0, + "eval_accuracy": 0.8113391984359726, + "eval_best_threshold": 0.48834142088890076, + "eval_f1": 0.8335728657660247, + "eval_loss": 0.4086468517780304, + "eval_pr_auc": 0.9062419900377866, + "eval_precision": 0.8141493542953397, + "eval_recall": 0.8539458186101295, + "eval_roc_auc": 0.8933133673373831, + "eval_runtime": 3.6984, + "eval_samples_per_second": 829.81, + "eval_steps_per_second": 1.622, + "step": 780 + }, + { + "epoch": 130.16666666666666, + "grad_norm": 61747.39453125, + "learning_rate": 0.00032762392625730293, + "loss": 0.3776, + "step": 781 + }, + { + "epoch": 130.33333333333334, + "grad_norm": 81545.625, + "learning_rate": 0.0003262593827529343, + "loss": 0.3911, + "step": 782 + }, + { + "epoch": 130.5, + "grad_norm": 50101.1015625, + "learning_rate": 0.00032489630937026625, + "loss": 0.3805, + "step": 783 + }, + { + "epoch": 130.66666666666666, + "grad_norm": 47948.2265625, + "learning_rate": 0.00032353471764306563, + "loss": 0.3778, + "step": 784 + }, + { + "epoch": 130.83333333333334, + "grad_norm": 70858.6015625, + "learning_rate": 0.0003221746190925618, + "loss": 0.4055, + "step": 785 + }, + { + "epoch": 131.0, + "grad_norm": 65349.390625, + "learning_rate": 0.00032081602522734986, + "loss": 0.3874, + "step": 786 + }, + { + "epoch": 131.0, + "eval_accuracy": 0.8165526230042359, + "eval_best_threshold": 0.3926054537296295, + "eval_f1": 0.8408255583828103, + "eval_loss": 0.40172040462493896, + "eval_pr_auc": 0.9120142027521659, + "eval_precision": 0.8085916258836324, + "eval_recall": 0.875736160188457, + "eval_roc_auc": 0.8970475412357095, + "eval_runtime": 3.7845, + "eval_samples_per_second": 810.935, + "eval_steps_per_second": 1.585, + "step": 786 + }, + { + "epoch": 131.16666666666666, + "grad_norm": 118622.21875, + "learning_rate": 0.00031945894754329286, + "loss": 0.3902, + "step": 787 + }, + { + "epoch": 131.33333333333334, + "grad_norm": 109528.3515625, + "learning_rate": 0.0003181033975234244, + "loss": 0.4079, + "step": 788 + }, + { + "epoch": 131.5, + "grad_norm": 78493.7265625, + "learning_rate": 0.0003167493866378514, + "loss": 0.3847, + "step": 789 + }, + { + "epoch": 131.66666666666666, + "grad_norm": 198052.96875, + "learning_rate": 0.00031539692634365783, + "loss": 0.3853, + "step": 790 + }, + { + "epoch": 131.83333333333334, + "grad_norm": 139994.390625, + "learning_rate": 0.0003140460280848066, + "loss": 0.379, + "step": 791 + }, + { + "epoch": 132.0, + "grad_norm": 99828.71875, + "learning_rate": 0.00031269670329204396, + "loss": 0.3816, + "step": 792 + }, + { + "epoch": 132.0, + "eval_accuracy": 0.8211143695014663, + "eval_best_threshold": 0.5147361159324646, + "eval_f1": 0.8431876606683805, + "eval_loss": 0.39976704120635986, + "eval_pr_auc": 0.9126112501936106, + "eval_precision": 0.8186356073211315, + "eval_recall": 0.8692579505300353, + "eval_roc_auc": 0.8986343396229659, + "eval_runtime": 3.6082, + "eval_samples_per_second": 850.569, + "eval_steps_per_second": 1.663, + "step": 792 + }, + { + "epoch": 132.16666666666666, + "grad_norm": 137741.453125, + "learning_rate": 0.0003113489633828016, + "loss": 0.3943, + "step": 793 + }, + { + "epoch": 132.33333333333334, + "grad_norm": 64978.36328125, + "learning_rate": 0.00031000281976110056, + "loss": 0.3808, + "step": 794 + }, + { + "epoch": 132.5, + "grad_norm": 253367.953125, + "learning_rate": 0.0003086582838174551, + "loss": 0.3871, + "step": 795 + }, + { + "epoch": 132.66666666666666, + "grad_norm": 175149.796875, + "learning_rate": 0.00030731536692877595, + "loss": 0.3883, + "step": 796 + }, + { + "epoch": 132.83333333333334, + "grad_norm": 62742.25, + "learning_rate": 0.0003059740804582736, + "loss": 0.3913, + "step": 797 + }, + { + "epoch": 133.0, + "grad_norm": 69216.578125, + "learning_rate": 0.0003046344357553632, + "loss": 0.3498, + "step": 798 + }, + { + "epoch": 133.0, + "eval_accuracy": 0.8145975887911372, + "eval_best_threshold": 0.3728158175945282, + "eval_f1": 0.8408391608391609, + "eval_loss": 0.3914491534233093, + "eval_pr_auc": 0.915878808081855, + "eval_precision": 0.8007458710708577, + "eval_recall": 0.8851590106007067, + "eval_roc_auc": 0.9024999591917036, + "eval_runtime": 3.6398, + "eval_samples_per_second": 843.171, + "eval_steps_per_second": 1.648, + "step": 798 + }, + { + "epoch": 133.16666666666666, + "grad_norm": 78250.9921875, + "learning_rate": 0.00030329644415556757, + "loss": 0.3738, + "step": 799 + }, + { + "epoch": 133.33333333333334, + "grad_norm": 51423.38671875, + "learning_rate": 0.0003019601169804216, + "loss": 0.3597, + "step": 800 + }, + { + "epoch": 133.5, + "grad_norm": 128579.515625, + "learning_rate": 0.00030062546553737693, + "loss": 0.3987, + "step": 801 + }, + { + "epoch": 133.66666666666666, + "grad_norm": 49651.3046875, + "learning_rate": 0.0002992925011197053, + "loss": 0.377, + "step": 802 + }, + { + "epoch": 133.83333333333334, + "grad_norm": 95432.3671875, + "learning_rate": 0.0002979612350064042, + "loss": 0.3948, + "step": 803 + }, + { + "epoch": 134.0, + "grad_norm": 179643.421875, + "learning_rate": 0.0002966316784621, + "loss": 0.3748, + "step": 804 + }, + { + "epoch": 134.0, + "eval_accuracy": 0.820136852394917, + "eval_best_threshold": 0.6017637848854065, + "eval_f1": 0.8398142774230992, + "eval_loss": 0.40153124928474426, + "eval_pr_auc": 0.9159341444123951, + "eval_precision": 0.8278032036613272, + "eval_recall": 0.8521790341578327, + "eval_roc_auc": 0.9012782876667019, + "eval_runtime": 3.6233, + "eval_samples_per_second": 847.024, + "eval_steps_per_second": 1.656, + "step": 804 + }, + { + "epoch": 134.16666666666666, + "grad_norm": 152972.578125, + "learning_rate": 0.0002953038427369537, + "loss": 0.3877, + "step": 805 + }, + { + "epoch": 134.33333333333334, + "grad_norm": 75853.0078125, + "learning_rate": 0.0002939777390665658, + "loss": 0.3917, + "step": 806 + }, + { + "epoch": 134.5, + "grad_norm": 87845.7734375, + "learning_rate": 0.00029265337867188056, + "loss": 0.3859, + "step": 807 + }, + { + "epoch": 134.66666666666666, + "grad_norm": 91740.2421875, + "learning_rate": 0.0002913307727590911, + "loss": 0.366, + "step": 808 + }, + { + "epoch": 134.83333333333334, + "grad_norm": 178002.421875, + "learning_rate": 0.0002900099325195453, + "loss": 0.3757, + "step": 809 + }, + { + "epoch": 135.0, + "grad_norm": 134209.953125, + "learning_rate": 0.0002886908691296504, + "loss": 0.3992, + "step": 810 + }, + { + "epoch": 135.0, + "eval_accuracy": 0.8243727598566308, + "eval_best_threshold": 0.3863644599914551, + "eval_f1": 0.8455144740613356, + "eval_loss": 0.39555782079696655, + "eval_pr_auc": 0.9174050733417477, + "eval_precision": 0.8235622557230597, + "eval_recall": 0.8686690223792697, + "eval_roc_auc": 0.902185090968136, + "eval_runtime": 3.6528, + "eval_samples_per_second": 840.174, + "eval_steps_per_second": 1.643, + "step": 810 + }, + { + "epoch": 135.16666666666666, + "grad_norm": 109344.46875, + "learning_rate": 0.0002873735937507786, + "loss": 0.3929, + "step": 811 + }, + { + "epoch": 135.33333333333334, + "grad_norm": 193133.703125, + "learning_rate": 0.000286058117529173, + "loss": 0.3721, + "step": 812 + }, + { + "epoch": 135.5, + "grad_norm": 97225.890625, + "learning_rate": 0.0002847444515958523, + "loss": 0.3845, + "step": 813 + }, + { + "epoch": 135.66666666666666, + "grad_norm": 65620.78125, + "learning_rate": 0.00028343260706651866, + "loss": 0.3739, + "step": 814 + }, + { + "epoch": 135.83333333333334, + "grad_norm": 121686.9609375, + "learning_rate": 0.00028212259504146043, + "loss": 0.3798, + "step": 815 + }, + { + "epoch": 136.0, + "grad_norm": 145372.125, + "learning_rate": 0.00028081442660546124, + "loss": 0.4034, + "step": 816 + }, + { + "epoch": 136.0, + "eval_accuracy": 0.8139459107201042, + "eval_best_threshold": 0.434948205947876, + "eval_f1": 0.8400112076211824, + "eval_loss": 0.3963012099266052, + "eval_pr_auc": 0.9114395336778187, + "eval_precision": 0.8011758417958311, + "eval_recall": 0.8828032979976443, + "eval_roc_auc": 0.8988340854946696, + "eval_runtime": 3.8337, + "eval_samples_per_second": 800.528, + "eval_steps_per_second": 1.565, + "step": 816 + }, + { + "epoch": 136.16666666666666, + "grad_norm": 69139.9921875, + "learning_rate": 0.00027950811282770506, + "loss": 0.363, + "step": 817 + }, + { + "epoch": 136.33333333333334, + "grad_norm": 87535.4453125, + "learning_rate": 0.00027820366476168226, + "loss": 0.3627, + "step": 818 + }, + { + "epoch": 136.5, + "grad_norm": 64158.21484375, + "learning_rate": 0.0002769010934450956, + "loss": 0.3796, + "step": 819 + }, + { + "epoch": 136.66666666666666, + "grad_norm": 134014.46875, + "learning_rate": 0.00027560040989976894, + "loss": 0.4012, + "step": 820 + }, + { + "epoch": 136.83333333333334, + "grad_norm": 122497.1953125, + "learning_rate": 0.0002743016251315517, + "loss": 0.3701, + "step": 821 + }, + { + "epoch": 137.0, + "grad_norm": 41029.91796875, + "learning_rate": 0.00027300475013022663, + "loss": 0.3771, + "step": 822 + }, + { + "epoch": 137.0, + "eval_accuracy": 0.8224177256435321, + "eval_best_threshold": 0.4527401924133301, + "eval_f1": 0.8470390120684816, + "eval_loss": 0.3848971724510193, + "eval_pr_auc": 0.918773658242021, + "eval_precision": 0.8091152815013405, + "eval_recall": 0.8886925795053003, + "eval_roc_auc": 0.9052160734858619, + "eval_runtime": 3.7139, + "eval_samples_per_second": 826.362, + "eval_steps_per_second": 1.616, + "step": 822 + }, + { + "epoch": 137.16666666666666, + "grad_norm": 78552.4296875, + "learning_rate": 0.00027170979586941727, + "loss": 0.3637, + "step": 823 + }, + { + "epoch": 137.33333333333334, + "grad_norm": 83702.828125, + "learning_rate": 0.0002704167733064941, + "loss": 0.3808, + "step": 824 + }, + { + "epoch": 137.5, + "grad_norm": 101689.9453125, + "learning_rate": 0.00026912569338248315, + "loss": 0.3703, + "step": 825 + }, + { + "epoch": 137.66666666666666, + "grad_norm": 108093.1015625, + "learning_rate": 0.00026783656702197156, + "loss": 0.3886, + "step": 826 + }, + { + "epoch": 137.83333333333334, + "grad_norm": 85036.0546875, + "learning_rate": 0.00026654940513301696, + "loss": 0.3702, + "step": 827 + }, + { + "epoch": 138.0, + "grad_norm": 92898.703125, + "learning_rate": 0.00026526421860705474, + "loss": 0.3831, + "step": 828 + }, + { + "epoch": 138.0, + "eval_accuracy": 0.8233952427500815, + "eval_best_threshold": 0.36562278866767883, + "eval_f1": 0.848434004474273, + "eval_loss": 0.38462093472480774, + "eval_pr_auc": 0.9204799802032648, + "eval_precision": 0.8077742279020235, + "eval_recall": 0.8934040047114252, + "eval_roc_auc": 0.9060363202428912, + "eval_runtime": 3.6435, + "eval_samples_per_second": 842.317, + "eval_steps_per_second": 1.647, + "step": 828 + }, + { + "epoch": 138.16666666666666, + "grad_norm": 105777.234375, + "learning_rate": 0.0002639810183188045, + "loss": 0.3695, + "step": 829 + }, + { + "epoch": 138.33333333333334, + "grad_norm": 72163.3515625, + "learning_rate": 0.0002626998151261798, + "loss": 0.3792, + "step": 830 + }, + { + "epoch": 138.5, + "grad_norm": 78065.9921875, + "learning_rate": 0.00026142061987019576, + "loss": 0.3676, + "step": 831 + }, + { + "epoch": 138.66666666666666, + "grad_norm": 149816.9375, + "learning_rate": 0.0002601434433748771, + "loss": 0.3829, + "step": 832 + }, + { + "epoch": 138.83333333333334, + "grad_norm": 99635.90625, + "learning_rate": 0.0002588682964471657, + "loss": 0.361, + "step": 833 + }, + { + "epoch": 139.0, + "grad_norm": 153759.828125, + "learning_rate": 0.0002575951898768315, + "loss": 0.3732, + "step": 834 + }, + { + "epoch": 139.0, + "eval_accuracy": 0.826653633105246, + "eval_best_threshold": 0.5249126553535461, + "eval_f1": 0.8463316002310803, + "eval_loss": 0.3846147060394287, + "eval_pr_auc": 0.920402157746111, + "eval_precision": 0.8304988662131519, + "eval_recall": 0.8627797408716137, + "eval_roc_auc": 0.9073454074343266, + "eval_runtime": 3.6743, + "eval_samples_per_second": 835.272, + "eval_steps_per_second": 1.633, + "step": 834 + }, + { + "epoch": 139.16666666666666, + "grad_norm": 115450.0859375, + "learning_rate": 0.00025632413443637885, + "loss": 0.3688, + "step": 835 + }, + { + "epoch": 139.33333333333334, + "grad_norm": 108277.234375, + "learning_rate": 0.00025505514088095655, + "loss": 0.3803, + "step": 836 + }, + { + "epoch": 139.5, + "grad_norm": 167415.609375, + "learning_rate": 0.0002537882199482665, + "loss": 0.3723, + "step": 837 + }, + { + "epoch": 139.66666666666666, + "grad_norm": 69005.0859375, + "learning_rate": 0.000252523382358473, + "loss": 0.3635, + "step": 838 + }, + { + "epoch": 139.83333333333334, + "grad_norm": 161154.0, + "learning_rate": 0.00025126063881411185, + "loss": 0.4026, + "step": 839 + }, + { + "epoch": 140.0, + "grad_norm": 224129.625, + "learning_rate": 0.0002500000000000001, + "loss": 0.3813, + "step": 840 + }, + { + "epoch": 140.0, + "eval_accuracy": 0.8207885304659498, + "eval_best_threshold": 0.43965643644332886, + "eval_f1": 0.8441043083900227, + "eval_loss": 0.3839028477668762, + "eval_pr_auc": 0.9200467798517515, + "eval_precision": 0.8136612021857923, + "eval_recall": 0.8769140164899882, + "eval_roc_auc": 0.9065846548777942, + "eval_runtime": 3.8677, + "eval_samples_per_second": 793.494, + "eval_steps_per_second": 1.551, + "step": 840 + }, + { + "epoch": 140.16666666666666, + "grad_norm": 138304.515625, + "learning_rate": 0.000248741476583145, + "loss": 0.3825, + "step": 841 + }, + { + "epoch": 140.33333333333334, + "grad_norm": 193517.5625, + "learning_rate": 0.00024748507921265513, + "loss": 0.3782, + "step": 842 + }, + { + "epoch": 140.5, + "grad_norm": 210903.09375, + "learning_rate": 0.00024623081851964806, + "loss": 0.4004, + "step": 843 + }, + { + "epoch": 140.66666666666666, + "grad_norm": 137417.265625, + "learning_rate": 0.0002449787051171624, + "loss": 0.3664, + "step": 844 + }, + { + "epoch": 140.83333333333334, + "grad_norm": 112819.9765625, + "learning_rate": 0.00024372874960006742, + "loss": 0.3757, + "step": 845 + }, + { + "epoch": 141.0, + "grad_norm": 204521.53125, + "learning_rate": 0.00024248096254497287, + "loss": 0.3845, + "step": 846 + }, + { + "epoch": 141.0, + "eval_accuracy": 0.8204626914304334, + "eval_best_threshold": 0.39028987288475037, + "eval_f1": 0.8484181568088033, + "eval_loss": 0.37648096680641174, + "eval_pr_auc": 0.9232867403907821, + "eval_precision": 0.7960764068146619, + "eval_recall": 0.9081272084805654, + "eval_roc_auc": 0.909695106183187, + "eval_runtime": 3.9144, + "eval_samples_per_second": 784.022, + "eval_steps_per_second": 1.533, + "step": 846 + }, + { + "epoch": 141.16666666666666, + "grad_norm": 53121.89453125, + "learning_rate": 0.00024123535451014012, + "loss": 0.3609, + "step": 847 + }, + { + "epoch": 141.33333333333334, + "grad_norm": 133370.984375, + "learning_rate": 0.00023999193603539232, + "loss": 0.3849, + "step": 848 + }, + { + "epoch": 141.5, + "grad_norm": 164482.40625, + "learning_rate": 0.00023875071764202561, + "loss": 0.3461, + "step": 849 + }, + { + "epoch": 141.66666666666666, + "grad_norm": 76757.4609375, + "learning_rate": 0.00023751170983271996, + "loss": 0.3632, + "step": 850 + }, + { + "epoch": 141.83333333333334, + "grad_norm": 128235.109375, + "learning_rate": 0.00023627492309145028, + "loss": 0.3693, + "step": 851 + }, + { + "epoch": 142.0, + "grad_norm": 63393.671875, + "learning_rate": 0.0002350403678833976, + "loss": 0.3608, + "step": 852 + }, + { + "epoch": 142.0, + "eval_accuracy": 0.8269794721407625, + "eval_best_threshold": 0.37692126631736755, + "eval_f1": 0.8489331436699857, + "eval_loss": 0.38506025075912476, + "eval_pr_auc": 0.9212251720339615, + "eval_precision": 0.8211337369290038, + "eval_recall": 0.8786808009422851, + "eval_roc_auc": 0.9076310655089139, + "eval_runtime": 3.9427, + "eval_samples_per_second": 778.401, + "eval_steps_per_second": 1.522, + "step": 852 + }, + { + "epoch": 142.16666666666666, + "grad_norm": 165122.5, + "learning_rate": 0.00023380805465486083, + "loss": 0.3744, + "step": 853 + }, + { + "epoch": 142.33333333333334, + "grad_norm": 152976.96875, + "learning_rate": 0.00023257799383316798, + "loss": 0.3712, + "step": 854 + }, + { + "epoch": 142.5, + "grad_norm": 85063.9921875, + "learning_rate": 0.000231350195826588, + "loss": 0.355, + "step": 855 + }, + { + "epoch": 142.66666666666666, + "grad_norm": 120685.4609375, + "learning_rate": 0.00023012467102424372, + "loss": 0.3713, + "step": 856 + }, + { + "epoch": 142.83333333333334, + "grad_norm": 102459.109375, + "learning_rate": 0.00022890142979602147, + "loss": 0.3722, + "step": 857 + }, + { + "epoch": 143.0, + "grad_norm": 72757.78125, + "learning_rate": 0.00022768048249248646, + "loss": 0.3411, + "step": 858 + }, + { + "epoch": 143.0, + "eval_accuracy": 0.83088954056696, + "eval_best_threshold": 0.5101819634437561, + "eval_f1": 0.8498698293317906, + "eval_loss": 0.3803495764732361, + "eval_pr_auc": 0.9233289689301184, + "eval_precision": 0.8351335986355884, + "eval_recall": 0.8651354534746761, + "eval_roc_auc": 0.9097028382814465, + "eval_runtime": 3.9443, + "eval_samples_per_second": 778.087, + "eval_steps_per_second": 1.521, + "step": 858 + }, + { + "epoch": 143.16666666666666, + "grad_norm": 126750.984375, + "learning_rate": 0.00022646183944479272, + "loss": 0.3623, + "step": 859 + }, + { + "epoch": 143.33333333333334, + "grad_norm": 146971.515625, + "learning_rate": 0.000225245510964597, + "loss": 0.3923, + "step": 860 + }, + { + "epoch": 143.5, + "grad_norm": 92141.0234375, + "learning_rate": 0.00022403150734397092, + "loss": 0.3584, + "step": 861 + }, + { + "epoch": 143.66666666666666, + "grad_norm": 158932.25, + "learning_rate": 0.00022281983885531425, + "loss": 0.3741, + "step": 862 + }, + { + "epoch": 143.83333333333334, + "grad_norm": 72797.109375, + "learning_rate": 0.00022161051575126783, + "loss": 0.3628, + "step": 863 + }, + { + "epoch": 144.0, + "grad_norm": 189629.625, + "learning_rate": 0.00022040354826462666, + "loss": 0.3652, + "step": 864 + }, + { + "epoch": 144.0, + "eval_accuracy": 0.8305637015314434, + "eval_best_threshold": 0.5188665390014648, + "eval_f1": 0.8505747126436781, + "eval_loss": 0.3749193251132965, + "eval_pr_auc": 0.9253198082355598, + "eval_precision": 0.8305274971941639, + "eval_recall": 0.8716136631330977, + "eval_roc_auc": 0.912020319954226, + "eval_runtime": 3.8321, + "eval_samples_per_second": 800.873, + "eval_steps_per_second": 1.566, + "step": 864 + }, + { + "epoch": 144.16666666666666, + "grad_norm": 105332.4609375, + "learning_rate": 0.00021919894660825363, + "loss": 0.3674, + "step": 865 + }, + { + "epoch": 144.33333333333334, + "grad_norm": 91438.1640625, + "learning_rate": 0.0002179967209749929, + "loss": 0.3586, + "step": 866 + }, + { + "epoch": 144.5, + "grad_norm": 93499.1796875, + "learning_rate": 0.0002167968815375837, + "loss": 0.3675, + "step": 867 + }, + { + "epoch": 144.66666666666666, + "grad_norm": 66955.734375, + "learning_rate": 0.0002155994384485742, + "loss": 0.3729, + "step": 868 + }, + { + "epoch": 144.83333333333334, + "grad_norm": 67269.2578125, + "learning_rate": 0.00021440440184023564, + "loss": 0.3571, + "step": 869 + }, + { + "epoch": 145.0, + "grad_norm": 103688.8515625, + "learning_rate": 0.00021321178182447708, + "loss": 0.3786, + "step": 870 + }, + { + "epoch": 145.0, + "eval_accuracy": 0.8240469208211144, + "eval_best_threshold": 0.3671317994594574, + "eval_f1": 0.8512396694214877, + "eval_loss": 0.37529799342155457, + "eval_pr_auc": 0.9232630000576302, + "eval_precision": 0.7996894409937888, + "eval_recall": 0.9098939929328622, + "eval_roc_auc": 0.9101865239836802, + "eval_runtime": 3.8101, + "eval_samples_per_second": 805.489, + "eval_steps_per_second": 1.575, + "step": 870 + }, + { + "epoch": 145.16666666666666, + "grad_norm": 91547.3515625, + "learning_rate": 0.00021202158849275833, + "loss": 0.3695, + "step": 871 + }, + { + "epoch": 145.33333333333334, + "grad_norm": 89502.46875, + "learning_rate": 0.00021083383191600674, + "loss": 0.3704, + "step": 872 + }, + { + "epoch": 145.5, + "grad_norm": 96104.1875, + "learning_rate": 0.0002096485221445301, + "loss": 0.358, + "step": 873 + }, + { + "epoch": 145.66666666666666, + "grad_norm": 108982.1796875, + "learning_rate": 0.00020846566920793263, + "loss": 0.3532, + "step": 874 + }, + { + "epoch": 145.83333333333334, + "grad_norm": 114974.890625, + "learning_rate": 0.00020728528311502974, + "loss": 0.3778, + "step": 875 + }, + { + "epoch": 146.0, + "grad_norm": 75530.546875, + "learning_rate": 0.00020610737385376348, + "loss": 0.3627, + "step": 876 + }, + { + "epoch": 146.0, + "eval_accuracy": 0.8312153796024764, + "eval_best_threshold": 0.4211173951625824, + "eval_f1": 0.853257790368272, + "eval_loss": 0.37683817744255066, + "eval_pr_auc": 0.9232412956565166, + "eval_precision": 0.8220524017467249, + "eval_recall": 0.8869257950530035, + "eval_roc_auc": 0.9097780114589695, + "eval_runtime": 3.9666, + "eval_samples_per_second": 773.701, + "eval_steps_per_second": 1.513, + "step": 876 + }, + { + "epoch": 146.16666666666666, + "grad_norm": 65020.58984375, + "learning_rate": 0.00020493195139111787, + "loss": 0.3548, + "step": 877 + }, + { + "epoch": 146.33333333333334, + "grad_norm": 67554.9609375, + "learning_rate": 0.00020375902567303471, + "loss": 0.3666, + "step": 878 + }, + { + "epoch": 146.5, + "grad_norm": 80817.4921875, + "learning_rate": 0.00020258860662432943, + "loss": 0.3667, + "step": 879 + }, + { + "epoch": 146.66666666666666, + "grad_norm": 129111.09375, + "learning_rate": 0.00020142070414860702, + "loss": 0.364, + "step": 880 + }, + { + "epoch": 146.83333333333334, + "grad_norm": 68261.125, + "learning_rate": 0.0002002553281281782, + "loss": 0.3613, + "step": 881 + }, + { + "epoch": 147.0, + "grad_norm": 85333.3671875, + "learning_rate": 0.00019909248842397582, + "loss": 0.3684, + "step": 882 + }, + { + "epoch": 147.0, + "eval_accuracy": 0.8305637015314434, + "eval_best_threshold": 0.45273062586784363, + "eval_f1": 0.8525241066364152, + "eval_loss": 0.3769237995147705, + "eval_pr_auc": 0.9225581489341609, + "eval_precision": 0.8222100656455142, + "eval_recall": 0.8851590106007067, + "eval_roc_auc": 0.9094597067472867, + "eval_runtime": 4.0322, + "eval_samples_per_second": 761.121, + "eval_steps_per_second": 1.488, + "step": 882 + }, + { + "epoch": 147.16666666666666, + "grad_norm": 111886.7578125, + "learning_rate": 0.00019793219487547183, + "loss": 0.3575, + "step": 883 + }, + { + "epoch": 147.33333333333334, + "grad_norm": 163052.109375, + "learning_rate": 0.00019677445730059345, + "loss": 0.3625, + "step": 884 + }, + { + "epoch": 147.5, + "grad_norm": 66131.125, + "learning_rate": 0.00019561928549563967, + "loss": 0.3651, + "step": 885 + }, + { + "epoch": 147.66666666666666, + "grad_norm": 199836.4375, + "learning_rate": 0.0001944666892352001, + "loss": 0.3858, + "step": 886 + }, + { + "epoch": 147.83333333333334, + "grad_norm": 204624.03125, + "learning_rate": 0.00019331667827207044, + "loss": 0.3718, + "step": 887 + }, + { + "epoch": 148.0, + "grad_norm": 118003.953125, + "learning_rate": 0.00019216926233717085, + "loss": 0.3635, + "step": 888 + }, + { + "epoch": 148.0, + "eval_accuracy": 0.83088954056696, + "eval_best_threshold": 0.5705100893974304, + "eval_f1": 0.8493468795355588, + "eval_loss": 0.3833242356777191, + "eval_pr_auc": 0.9233202281747017, + "eval_precision": 0.8374356038923869, + "eval_recall": 0.8616018845700825, + "eval_roc_auc": 0.9097142216483287, + "eval_runtime": 3.9143, + "eval_samples_per_second": 784.048, + "eval_steps_per_second": 1.533, + "step": 888 + }, + { + "epoch": 148.16666666666666, + "grad_norm": 222922.71875, + "learning_rate": 0.00019102445113946344, + "loss": 0.3759, + "step": 889 + }, + { + "epoch": 148.33333333333334, + "grad_norm": 234529.6875, + "learning_rate": 0.00018988225436587002, + "loss": 0.3723, + "step": 890 + }, + { + "epoch": 148.5, + "grad_norm": 210516.421875, + "learning_rate": 0.0001887426816811903, + "loss": 0.3804, + "step": 891 + }, + { + "epoch": 148.66666666666666, + "grad_norm": 59804.4765625, + "learning_rate": 0.00018760574272802001, + "loss": 0.3669, + "step": 892 + }, + { + "epoch": 148.83333333333334, + "grad_norm": 221783.171875, + "learning_rate": 0.0001864714471266691, + "loss": 0.3675, + "step": 893 + }, + { + "epoch": 149.0, + "grad_norm": 220364.734375, + "learning_rate": 0.00018533980447508135, + "loss": 0.3643, + "step": 894 + }, + { + "epoch": 149.0, + "eval_accuracy": 0.8227435646790485, + "eval_best_threshold": 0.4187193512916565, + "eval_f1": 0.8465011286681715, + "eval_loss": 0.37492498755455017, + "eval_pr_auc": 0.924585944838121, + "eval_precision": 0.8125677139761647, + "eval_recall": 0.8833922261484098, + "eval_roc_auc": 0.9101375540280366, + "eval_runtime": 3.9895, + "eval_samples_per_second": 769.264, + "eval_steps_per_second": 1.504, + "step": 894 + }, + { + "epoch": 149.16666666666666, + "grad_norm": 81329.15625, + "learning_rate": 0.00018421082434875132, + "loss": 0.3556, + "step": 895 + }, + { + "epoch": 149.33333333333334, + "grad_norm": 156673.375, + "learning_rate": 0.0001830845163006448, + "loss": 0.3886, + "step": 896 + }, + { + "epoch": 149.5, + "grad_norm": 65551.578125, + "learning_rate": 0.00018196088986111798, + "loss": 0.3523, + "step": 897 + }, + { + "epoch": 149.66666666666666, + "grad_norm": 121681.234375, + "learning_rate": 0.00018083995453783603, + "loss": 0.3617, + "step": 898 + }, + { + "epoch": 149.83333333333334, + "grad_norm": 119770.8984375, + "learning_rate": 0.0001797217198156924, + "loss": 0.3687, + "step": 899 + }, + { + "epoch": 150.0, + "grad_norm": 62330.62890625, + "learning_rate": 0.0001786061951567303, + "loss": 0.3559, + "step": 900 + }, + { + "epoch": 150.0, + "eval_accuracy": 0.826653633105246, + "eval_best_threshold": 0.47909417748451233, + "eval_f1": 0.8487777146105742, + "eval_loss": 0.3842035233974457, + "eval_pr_auc": 0.9212527914064633, + "eval_precision": 0.8203296703296703, + "eval_recall": 0.8792697290930507, + "eval_roc_auc": 0.9077642294233831, + "eval_runtime": 4.006, + "eval_samples_per_second": 766.103, + "eval_steps_per_second": 1.498, + "step": 900 + }, + { + "epoch": 150.16666666666666, + "grad_norm": 181080.671875, + "learning_rate": 0.00017749339000006098, + "loss": 0.3687, + "step": 901 + }, + { + "epoch": 150.33333333333334, + "grad_norm": 173832.859375, + "learning_rate": 0.00017638331376178384, + "loss": 0.3865, + "step": 902 + }, + { + "epoch": 150.5, + "grad_norm": 120683.6015625, + "learning_rate": 0.00017527597583490823, + "loss": 0.3556, + "step": 903 + }, + { + "epoch": 150.66666666666666, + "grad_norm": 215743.546875, + "learning_rate": 0.00017417138558927244, + "loss": 0.3401, + "step": 904 + }, + { + "epoch": 150.83333333333334, + "grad_norm": 303968.78125, + "learning_rate": 0.00017306955237146522, + "loss": 0.3778, + "step": 905 + }, + { + "epoch": 151.0, + "grad_norm": 278908.25, + "learning_rate": 0.00017197048550474643, + "loss": 0.3826, + "step": 906 + }, + { + "epoch": 151.0, + "eval_accuracy": 0.83088954056696, + "eval_best_threshold": 0.4328542947769165, + "eval_f1": 0.8517566409597258, + "eval_loss": 0.37284591794013977, + "eval_pr_auc": 0.9236686774663367, + "eval_precision": 0.826955074875208, + "eval_recall": 0.8780918727915195, + "eval_roc_auc": 0.9110409208413552, + "eval_runtime": 4.0186, + "eval_samples_per_second": 763.692, + "eval_steps_per_second": 1.493, + "step": 906 + }, + { + "epoch": 151.16666666666666, + "grad_norm": 151452.84375, + "learning_rate": 0.00017087419428896806, + "loss": 0.3363, + "step": 907 + }, + { + "epoch": 151.33333333333334, + "grad_norm": 205682.671875, + "learning_rate": 0.00016978068800049622, + "loss": 0.3828, + "step": 908 + }, + { + "epoch": 151.5, + "grad_norm": 306064.1875, + "learning_rate": 0.00016868997589213137, + "loss": 0.3857, + "step": 909 + }, + { + "epoch": 151.66666666666666, + "grad_norm": 246629.0, + "learning_rate": 0.00016760206719303107, + "loss": 0.3897, + "step": 910 + }, + { + "epoch": 151.83333333333334, + "grad_norm": 102579.609375, + "learning_rate": 0.0001665169711086319, + "loss": 0.3532, + "step": 911 + }, + { + "epoch": 152.0, + "grad_norm": 155984.015625, + "learning_rate": 0.00016543469682057105, + "loss": 0.3756, + "step": 912 + }, + { + "epoch": 152.0, + "eval_accuracy": 0.8204626914304334, + "eval_best_threshold": 0.2976920008659363, + "eval_f1": 0.8457878533445284, + "eval_loss": 0.40224388241767883, + "eval_pr_auc": 0.9200705150322268, + "eval_precision": 0.8058666666666666, + "eval_recall": 0.8898704358068316, + "eval_roc_auc": 0.906433019839705, + "eval_runtime": 3.9788, + "eval_samples_per_second": 771.348, + "eval_steps_per_second": 1.508, + "step": 912 + }, + { + "epoch": 152.16666666666666, + "grad_norm": 299503.15625, + "learning_rate": 0.00016435525348660823, + "loss": 0.3917, + "step": 913 + }, + { + "epoch": 152.33333333333334, + "grad_norm": 268243.5625, + "learning_rate": 0.00016327865024054982, + "loss": 0.3717, + "step": 914 + }, + { + "epoch": 152.5, + "grad_norm": 88446.296875, + "learning_rate": 0.0001622048961921699, + "loss": 0.3505, + "step": 915 + }, + { + "epoch": 152.66666666666666, + "grad_norm": 175196.296875, + "learning_rate": 0.0001611340004271339, + "loss": 0.3534, + "step": 916 + }, + { + "epoch": 152.83333333333334, + "grad_norm": 200618.140625, + "learning_rate": 0.00016006597200692168, + "loss": 0.3837, + "step": 917 + }, + { + "epoch": 153.0, + "grad_norm": 138927.59375, + "learning_rate": 0.00015900081996875082, + "loss": 0.3716, + "step": 918 + }, + { + "epoch": 153.0, + "eval_accuracy": 0.8334962528510915, + "eval_best_threshold": 0.49080690741539, + "eval_f1": 0.8498383779018513, + "eval_loss": 0.37779301404953003, + "eval_pr_auc": 0.9236738139526288, + "eval_precision": 0.8480938416422288, + "eval_recall": 0.8515901060070671, + "eval_roc_auc": 0.909127226522128, + "eval_runtime": 4.0135, + "eval_samples_per_second": 764.662, + "eval_steps_per_second": 1.495, + "step": 918 + }, + { + "epoch": 153.16666666666666, + "grad_norm": 81768.3828125, + "learning_rate": 0.0001579385533255001, + "loss": 0.3556, + "step": 919 + }, + { + "epoch": 153.33333333333334, + "grad_norm": 165935.765625, + "learning_rate": 0.00015687918106563326, + "loss": 0.352, + "step": 920 + }, + { + "epoch": 153.5, + "grad_norm": 228723.890625, + "learning_rate": 0.00015582271215312294, + "loss": 0.3631, + "step": 921 + }, + { + "epoch": 153.66666666666666, + "grad_norm": 213650.1875, + "learning_rate": 0.00015476915552737532, + "loss": 0.3663, + "step": 922 + }, + { + "epoch": 153.83333333333334, + "grad_norm": 126728.4453125, + "learning_rate": 0.00015371852010315302, + "loss": 0.3653, + "step": 923 + }, + { + "epoch": 154.0, + "grad_norm": 166727.078125, + "learning_rate": 0.00015267081477050133, + "loss": 0.3684, + "step": 924 + }, + { + "epoch": 154.0, + "eval_accuracy": 0.8305637015314434, + "eval_best_threshold": 0.570549488067627, + "eval_f1": 0.8510028653295129, + "eval_loss": 0.3899708092212677, + "eval_pr_auc": 0.9218689165850973, + "eval_precision": 0.8286830357142857, + "eval_recall": 0.8745583038869258, + "eval_roc_auc": 0.9100284455303747, + "eval_runtime": 3.9666, + "eval_samples_per_second": 773.719, + "eval_steps_per_second": 1.513, + "step": 924 + }, + { + "epoch": 154.16666666666666, + "grad_norm": 238350.765625, + "learning_rate": 0.00015162604839467266, + "loss": 0.3786, + "step": 925 + }, + { + "epoch": 154.33333333333334, + "grad_norm": 208803.765625, + "learning_rate": 0.00015058422981604997, + "loss": 0.3754, + "step": 926 + }, + { + "epoch": 154.5, + "grad_norm": 146063.75, + "learning_rate": 0.00014954536785007454, + "loss": 0.3698, + "step": 927 + }, + { + "epoch": 154.66666666666666, + "grad_norm": 141785.609375, + "learning_rate": 0.00014850947128716913, + "loss": 0.3579, + "step": 928 + }, + { + "epoch": 154.83333333333334, + "grad_norm": 191827.84375, + "learning_rate": 0.0001474765488926648, + "loss": 0.3634, + "step": 929 + }, + { + "epoch": 155.0, + "grad_norm": 185356.5, + "learning_rate": 0.00014644660940672628, + "loss": 0.3723, + "step": 930 + }, + { + "epoch": 155.0, + "eval_accuracy": 0.8367546432062561, + "eval_best_threshold": 0.40736904740333557, + "eval_f1": 0.8571428571428571, + "eval_loss": 0.37203943729400635, + "eval_pr_auc": 0.9237786897972513, + "eval_precision": 0.8308457711442786, + "eval_recall": 0.8851590106007067, + "eval_roc_auc": 0.9116079413803857, + "eval_runtime": 3.9799, + "eval_samples_per_second": 771.132, + "eval_steps_per_second": 1.508, + "step": 930 + }, + { + "epoch": 155.16666666666666, + "grad_norm": 92037.6640625, + "learning_rate": 0.00014541966154427822, + "loss": 0.3771, + "step": 931 + }, + { + "epoch": 155.33333333333334, + "grad_norm": 62586.59375, + "learning_rate": 0.00014439571399493145, + "loss": 0.3654, + "step": 932 + }, + { + "epoch": 155.5, + "grad_norm": 77046.0390625, + "learning_rate": 0.00014337477542290927, + "loss": 0.3472, + "step": 933 + }, + { + "epoch": 155.66666666666666, + "grad_norm": 54610.234375, + "learning_rate": 0.00014235685446697434, + "loss": 0.3664, + "step": 934 + }, + { + "epoch": 155.83333333333334, + "grad_norm": 109650.8125, + "learning_rate": 0.00014134195974035524, + "loss": 0.3338, + "step": 935 + }, + { + "epoch": 156.0, + "grad_norm": 68304.0625, + "learning_rate": 0.00014033009983067452, + "loss": 0.3468, + "step": 936 + }, + { + "epoch": 156.0, + "eval_accuracy": 0.8328445747800587, + "eval_best_threshold": 0.46987348794937134, + "eval_f1": 0.8525438344351826, + "eval_loss": 0.37313708662986755, + "eval_pr_auc": 0.9235315191777744, + "eval_precision": 0.832678270634475, + "eval_recall": 0.8733804475853946, + "eval_roc_auc": 0.9111019185054027, + "eval_runtime": 4.1093, + "eval_samples_per_second": 746.848, + "eval_steps_per_second": 1.46, + "step": 936 + }, + { + "epoch": 156.16666666666666, + "grad_norm": 97583.125, + "learning_rate": 0.0001393212832998743, + "loss": 0.3536, + "step": 937 + }, + { + "epoch": 156.33333333333334, + "grad_norm": 62628.94921875, + "learning_rate": 0.000138315518684146, + "loss": 0.3457, + "step": 938 + }, + { + "epoch": 156.5, + "grad_norm": 79858.0078125, + "learning_rate": 0.0001373128144938563, + "loss": 0.3513, + "step": 939 + }, + { + "epoch": 156.66666666666666, + "grad_norm": 87592.1015625, + "learning_rate": 0.00013631317921347562, + "loss": 0.3656, + "step": 940 + }, + { + "epoch": 156.83333333333334, + "grad_norm": 72757.2578125, + "learning_rate": 0.00013531662130150736, + "loss": 0.3523, + "step": 941 + }, + { + "epoch": 157.0, + "grad_norm": 77319.5859375, + "learning_rate": 0.00013432314919041476, + "loss": 0.3549, + "step": 942 + }, + { + "epoch": 157.0, + "eval_accuracy": 0.83088954056696, + "eval_best_threshold": 0.424466073513031, + "eval_f1": 0.85099052540913, + "eval_loss": 0.3759530484676361, + "eval_pr_auc": 0.9233009083235354, + "eval_precision": 0.8302521008403362, + "eval_recall": 0.872791519434629, + "eval_roc_auc": 0.9103029350185871, + "eval_runtime": 4.1782, + "eval_samples_per_second": 734.535, + "eval_steps_per_second": 1.436, + "step": 942 + }, + { + "epoch": 157.16666666666666, + "grad_norm": 95107.390625, + "learning_rate": 0.00013333277128655064, + "loss": 0.3339, + "step": 943 + }, + { + "epoch": 157.33333333333334, + "grad_norm": 86475.1171875, + "learning_rate": 0.00013234549597008571, + "loss": 0.3561, + "step": 944 + }, + { + "epoch": 157.5, + "grad_norm": 80413.9765625, + "learning_rate": 0.000131361331594938, + "loss": 0.3521, + "step": 945 + }, + { + "epoch": 157.66666666666666, + "grad_norm": 131372.859375, + "learning_rate": 0.00013038028648870205, + "loss": 0.3606, + "step": 946 + }, + { + "epoch": 157.83333333333334, + "grad_norm": 89998.09375, + "learning_rate": 0.00012940236895257824, + "loss": 0.3661, + "step": 947 + }, + { + "epoch": 158.0, + "grad_norm": 110346.8359375, + "learning_rate": 0.00012842758726130281, + "loss": 0.3606, + "step": 948 + }, + { + "epoch": 158.0, + "eval_accuracy": 0.8253502769631802, + "eval_best_threshold": 0.3530597686767578, + "eval_f1": 0.8529088913282108, + "eval_loss": 0.37229081988334656, + "eval_pr_auc": 0.9241729081786076, + "eval_precision": 0.7985611510791367, + "eval_recall": 0.9151943462897526, + "eval_roc_auc": 0.9118356087180268, + "eval_runtime": 4.152, + "eval_samples_per_second": 739.162, + "eval_steps_per_second": 1.445, + "step": 948 + }, + { + "epoch": 158.16666666666666, + "grad_norm": 80206.3359375, + "learning_rate": 0.00012745594966307822, + "loss": 0.3499, + "step": 949 + }, + { + "epoch": 158.33333333333334, + "grad_norm": 65314.18359375, + "learning_rate": 0.00012648746437950208, + "loss": 0.3384, + "step": 950 + }, + { + "epoch": 158.5, + "grad_norm": 89174.2578125, + "learning_rate": 0.0001255221396054989, + "loss": 0.3593, + "step": 951 + }, + { + "epoch": 158.66666666666666, + "grad_norm": 67226.4375, + "learning_rate": 0.0001245599835092504, + "loss": 0.364, + "step": 952 + }, + { + "epoch": 158.83333333333334, + "grad_norm": 83786.7734375, + "learning_rate": 0.00012360100423212605, + "loss": 0.3524, + "step": 953 + }, + { + "epoch": 159.0, + "grad_norm": 67511.8359375, + "learning_rate": 0.000122645209888614, + "loss": 0.3537, + "step": 954 + }, + { + "epoch": 159.0, + "eval_accuracy": 0.8413163897034864, + "eval_best_threshold": 0.5131648182868958, + "eval_f1": 0.8578102189781022, + "eval_loss": 0.3645898997783661, + "eval_pr_auc": 0.9277610171354891, + "eval_precision": 0.8506079907353793, + "eval_recall": 0.8651354534746761, + "eval_roc_auc": 0.9155506671512115, + "eval_runtime": 4.1736, + "eval_samples_per_second": 735.334, + "eval_steps_per_second": 1.438, + "step": 954 + }, + { + "epoch": 159.16666666666666, + "grad_norm": 53345.765625, + "learning_rate": 0.00012169260856625358, + "loss": 0.351, + "step": 955 + }, + { + "epoch": 159.33333333333334, + "grad_norm": 168120.953125, + "learning_rate": 0.00012074320832556557, + "loss": 0.3663, + "step": 956 + }, + { + "epoch": 159.5, + "grad_norm": 154755.359375, + "learning_rate": 0.00011979701719998454, + "loss": 0.3637, + "step": 957 + }, + { + "epoch": 159.66666666666666, + "grad_norm": 81563.53125, + "learning_rate": 0.00011885404319579107, + "loss": 0.3413, + "step": 958 + }, + { + "epoch": 159.83333333333334, + "grad_norm": 119309.7578125, + "learning_rate": 0.00011791429429204342, + "loss": 0.3577, + "step": 959 + }, + { + "epoch": 160.0, + "grad_norm": 72382.3359375, + "learning_rate": 0.00011697777844051105, + "loss": 0.3399, + "step": 960 + }, + { + "epoch": 160.0, + "eval_accuracy": 0.8292603453893777, + "eval_best_threshold": 0.44661715626716614, + "eval_f1": 0.8506271379703535, + "eval_loss": 0.3717084228992462, + "eval_pr_auc": 0.9246020036155943, + "eval_precision": 0.8243093922651934, + "eval_recall": 0.8786808009422851, + "eval_roc_auc": 0.9123667609123532, + "eval_runtime": 4.1104, + "eval_samples_per_second": 746.647, + "eval_steps_per_second": 1.46, + "step": 960 + }, + { + "epoch": 160.16666666666666, + "grad_norm": 67661.6640625, + "learning_rate": 0.00011604450356560575, + "loss": 0.3437, + "step": 961 + }, + { + "epoch": 160.33333333333334, + "grad_norm": 119658.078125, + "learning_rate": 0.00011511447756431604, + "loss": 0.3525, + "step": 962 + }, + { + "epoch": 160.5, + "grad_norm": 106822.890625, + "learning_rate": 0.00011418770830614011, + "loss": 0.3454, + "step": 963 + }, + { + "epoch": 160.66666666666666, + "grad_norm": 57645.9296875, + "learning_rate": 0.00011326420363301809, + "loss": 0.3424, + "step": 964 + }, + { + "epoch": 160.83333333333334, + "grad_norm": 77775.6796875, + "learning_rate": 0.00011234397135926705, + "loss": 0.3647, + "step": 965 + }, + { + "epoch": 161.0, + "grad_norm": 90241.984375, + "learning_rate": 0.00011142701927151455, + "loss": 0.3537, + "step": 966 + }, + { + "epoch": 161.0, + "eval_accuracy": 0.8400130335614207, + "eval_best_threshold": 0.4722670316696167, + "eval_f1": 0.858948577994829, + "eval_loss": 0.36644211411476135, + "eval_pr_auc": 0.9255285397987627, + "eval_precision": 0.8384744812114414, + "eval_recall": 0.8804475853945819, + "eval_roc_auc": 0.9146513811675296, + "eval_runtime": 4.2747, + "eval_samples_per_second": 717.945, + "eval_steps_per_second": 1.404, + "step": 966 + }, + { + "epoch": 161.16666666666666, + "grad_norm": 81642.9296875, + "learning_rate": 0.00011051335512863232, + "loss": 0.3504, + "step": 967 + }, + { + "epoch": 161.33333333333334, + "grad_norm": 160026.0, + "learning_rate": 0.0001096029866616704, + "loss": 0.3406, + "step": 968 + }, + { + "epoch": 161.5, + "grad_norm": 120099.6171875, + "learning_rate": 0.00010869592157379304, + "loss": 0.3677, + "step": 969 + }, + { + "epoch": 161.66666666666666, + "grad_norm": 75987.7421875, + "learning_rate": 0.00010779216754021215, + "loss": 0.3498, + "step": 970 + }, + { + "epoch": 161.83333333333334, + "grad_norm": 116359.1484375, + "learning_rate": 0.00010689173220812293, + "loss": 0.3335, + "step": 971 + }, + { + "epoch": 162.0, + "grad_norm": 79084.625, + "learning_rate": 0.00010599462319663906, + "loss": 0.3546, + "step": 972 + }, + { + "epoch": 162.0, + "eval_accuracy": 0.8289345063538612, + "eval_best_threshold": 0.4081013798713684, + "eval_f1": 0.8509792790235595, + "eval_loss": 0.3715569078922272, + "eval_pr_auc": 0.9236155693773005, + "eval_precision": 0.8213698630136986, + "eval_recall": 0.8828032979976443, + "eval_roc_auc": 0.9119734978036544, + "eval_runtime": 4.2416, + "eval_samples_per_second": 723.546, + "eval_steps_per_second": 1.415, + "step": 972 + }, + { + "epoch": 162.16666666666666, + "grad_norm": 94386.2265625, + "learning_rate": 0.00010510084809672838, + "loss": 0.3692, + "step": 973 + }, + { + "epoch": 162.33333333333334, + "grad_norm": 74503.6953125, + "learning_rate": 0.00010421041447114837, + "loss": 0.3599, + "step": 974 + }, + { + "epoch": 162.5, + "grad_norm": 100313.7578125, + "learning_rate": 0.00010332332985438247, + "loss": 0.3539, + "step": 975 + }, + { + "epoch": 162.66666666666666, + "grad_norm": 101121.765625, + "learning_rate": 0.00010243960175257604, + "loss": 0.3283, + "step": 976 + }, + { + "epoch": 162.83333333333334, + "grad_norm": 100584.296875, + "learning_rate": 0.00010155923764347352, + "loss": 0.3514, + "step": 977 + }, + { + "epoch": 163.0, + "grad_norm": 88137.6328125, + "learning_rate": 0.00010068224497635369, + "loss": 0.3506, + "step": 978 + }, + { + "epoch": 163.0, + "eval_accuracy": 0.830237862495927, + "eval_best_threshold": 0.4215361773967743, + "eval_f1": 0.8511853756069694, + "eval_loss": 0.3692292869091034, + "eval_pr_auc": 0.9259921792261762, + "eval_precision": 0.8264004437049363, + "eval_recall": 0.8775029446407538, + "eval_roc_auc": 0.9133734371496394, + "eval_runtime": 4.2429, + "eval_samples_per_second": 723.323, + "eval_steps_per_second": 1.414, + "step": 978 + }, + { + "epoch": 163.16666666666666, + "grad_norm": 71053.8828125, + "learning_rate": 9.980863117196814e-05, + "loss": 0.3631, + "step": 979 + }, + { + "epoch": 163.33333333333334, + "grad_norm": 158759.703125, + "learning_rate": 9.893840362247809e-05, + "loss": 0.3573, + "step": 980 + }, + { + "epoch": 163.5, + "grad_norm": 124987.6015625, + "learning_rate": 9.807156969139135e-05, + "loss": 0.3565, + "step": 981 + }, + { + "epoch": 163.66666666666666, + "grad_norm": 74836.234375, + "learning_rate": 9.720813671350031e-05, + "loss": 0.3333, + "step": 982 + }, + { + "epoch": 163.83333333333334, + "grad_norm": 70798.34375, + "learning_rate": 9.634811199482008e-05, + "loss": 0.3462, + "step": 983 + }, + { + "epoch": 164.0, + "grad_norm": 82296.0234375, + "learning_rate": 9.549150281252633e-05, + "loss": 0.3453, + "step": 984 + }, + { + "epoch": 164.0, + "eval_accuracy": 0.8328445747800587, + "eval_best_threshold": 0.4308677613735199, + "eval_f1": 0.8538877812589006, + "eval_loss": 0.36614447832107544, + "eval_pr_auc": 0.9269770160926805, + "eval_precision": 0.8268063982349697, + "eval_recall": 0.8828032979976443, + "eval_roc_auc": 0.9147209700518653, + "eval_runtime": 4.2745, + "eval_samples_per_second": 717.975, + "eval_steps_per_second": 1.404, + "step": 984 + }, + { + "epoch": 164.16666666666666, + "grad_norm": 41647.91015625, + "learning_rate": 9.46383164148939e-05, + "loss": 0.3571, + "step": 985 + }, + { + "epoch": 164.33333333333334, + "grad_norm": 49898.0625, + "learning_rate": 9.378856002123548e-05, + "loss": 0.3608, + "step": 986 + }, + { + "epoch": 164.5, + "grad_norm": 142842.78125, + "learning_rate": 9.294224082184043e-05, + "loss": 0.3591, + "step": 987 + }, + { + "epoch": 164.66666666666666, + "grad_norm": 97403.40625, + "learning_rate": 9.209936597791407e-05, + "loss": 0.3492, + "step": 988 + }, + { + "epoch": 164.83333333333334, + "grad_norm": 72412.1328125, + "learning_rate": 9.125994262151683e-05, + "loss": 0.3452, + "step": 989 + }, + { + "epoch": 165.0, + "grad_norm": 80854.3046875, + "learning_rate": 9.042397785550405e-05, + "loss": 0.3233, + "step": 990 + }, + { + "epoch": 165.0, + "eval_accuracy": 0.8351254480286738, + "eval_best_threshold": 0.4565604031085968, + "eval_f1": 0.8544303797468354, + "eval_loss": 0.36564016342163086, + "eval_pr_auc": 0.9278390368820122, + "eval_precision": 0.8352080989876266, + "eval_recall": 0.8745583038869258, + "eval_roc_auc": 0.9147793903498259, + "eval_runtime": 4.2545, + "eval_samples_per_second": 721.352, + "eval_steps_per_second": 1.41, + "step": 990 + }, + { + "epoch": 165.16666666666666, + "grad_norm": 49396.58984375, + "learning_rate": 8.959147875346652e-05, + "loss": 0.3176, + "step": 991 + }, + { + "epoch": 165.33333333333334, + "grad_norm": 112044.234375, + "learning_rate": 8.876245235966885e-05, + "loss": 0.3341, + "step": 992 + }, + { + "epoch": 165.5, + "grad_norm": 80428.34375, + "learning_rate": 8.793690568899215e-05, + "loss": 0.3543, + "step": 993 + }, + { + "epoch": 165.66666666666666, + "grad_norm": 66167.703125, + "learning_rate": 8.711484572687295e-05, + "loss": 0.3357, + "step": 994 + }, + { + "epoch": 165.83333333333334, + "grad_norm": 87920.015625, + "learning_rate": 8.629627942924473e-05, + "loss": 0.3829, + "step": 995 + }, + { + "epoch": 166.0, + "grad_norm": 150330.1875, + "learning_rate": 8.548121372247918e-05, + "loss": 0.3576, + "step": 996 + }, + { + "epoch": 166.0, + "eval_accuracy": 0.8341479309221245, + "eval_best_threshold": 0.41728565096855164, + "eval_f1": 0.8544466685730626, + "eval_loss": 0.36713624000549316, + "eval_pr_auc": 0.92653588836402, + "eval_precision": 0.830461367426348, + "eval_recall": 0.8798586572438163, + "eval_roc_auc": 0.9145203650581325, + "eval_runtime": 4.2872, + "eval_samples_per_second": 715.845, + "eval_steps_per_second": 1.4, + "step": 996 + }, + { + "epoch": 166.16666666666666, + "grad_norm": 73645.6640625, + "learning_rate": 8.466965550332728e-05, + "loss": 0.3528, + "step": 997 + }, + { + "epoch": 166.33333333333334, + "grad_norm": 83714.1015625, + "learning_rate": 8.386161163886118e-05, + "loss": 0.3432, + "step": 998 + }, + { + "epoch": 166.5, + "grad_norm": 146188.8125, + "learning_rate": 8.305708896641595e-05, + "loss": 0.341, + "step": 999 + }, + { + "epoch": 166.66666666666666, + "grad_norm": 69561.3671875, + "learning_rate": 8.225609429353187e-05, + "loss": 0.35, + "step": 1000 + }, + { + "epoch": 166.83333333333334, + "grad_norm": 170722.265625, + "learning_rate": 8.145863439789658e-05, + "loss": 0.3656, + "step": 1001 + }, + { + "epoch": 167.0, + "grad_norm": 155031.265625, + "learning_rate": 8.066471602728804e-05, + "loss": 0.3356, + "step": 1002 + }, + { + "epoch": 167.0, + "eval_accuracy": 0.8344737699576409, + "eval_best_threshold": 0.4809587299823761, + "eval_f1": 0.8542742398164085, + "eval_loss": 0.3662647604942322, + "eval_pr_auc": 0.926724531939227, + "eval_precision": 0.8327740492170023, + "eval_recall": 0.8769140164899882, + "eval_roc_auc": 0.915205085315113, + "eval_runtime": 4.3238, + "eval_samples_per_second": 709.793, + "eval_steps_per_second": 1.388, + "step": 1002 + }, + { + "epoch": 167.16666666666666, + "grad_norm": 101407.078125, + "learning_rate": 7.987434589951726e-05, + "loss": 0.3447, + "step": 1003 + }, + { + "epoch": 167.33333333333334, + "grad_norm": 103118.65625, + "learning_rate": 7.908753070237123e-05, + "loss": 0.3635, + "step": 1004 + }, + { + "epoch": 167.5, + "grad_norm": 123300.7265625, + "learning_rate": 7.830427709355725e-05, + "loss": 0.3502, + "step": 1005 + }, + { + "epoch": 167.66666666666666, + "grad_norm": 127009.6328125, + "learning_rate": 7.752459170064491e-05, + "loss": 0.3332, + "step": 1006 + }, + { + "epoch": 167.83333333333334, + "grad_norm": 69865.4765625, + "learning_rate": 7.674848112101196e-05, + "loss": 0.3415, + "step": 1007 + }, + { + "epoch": 168.0, + "grad_norm": 90881.4140625, + "learning_rate": 7.597595192178702e-05, + "loss": 0.3486, + "step": 1008 + }, + { + "epoch": 168.0, + "eval_accuracy": 0.8393613554903877, + "eval_best_threshold": 0.5172176361083984, + "eval_f1": 0.8541851523217983, + "eval_loss": 0.3668026328086853, + "eval_pr_auc": 0.927415132113945, + "eval_precision": 0.8579916815210933, + "eval_recall": 0.850412249705536, + "eval_roc_auc": 0.9148575704544499, + "eval_runtime": 3.9261, + "eval_samples_per_second": 781.695, + "eval_steps_per_second": 1.528, + "step": 1008 + }, + { + "epoch": 168.16666666666666, + "grad_norm": 106595.0625, + "learning_rate": 7.520701063979474e-05, + "loss": 0.3614, + "step": 1009 + }, + { + "epoch": 168.33333333333334, + "grad_norm": 93386.671875, + "learning_rate": 7.444166378150013e-05, + "loss": 0.3379, + "step": 1010 + }, + { + "epoch": 168.5, + "grad_norm": 45564.28515625, + "learning_rate": 7.367991782295391e-05, + "loss": 0.3381, + "step": 1011 + }, + { + "epoch": 168.66666666666666, + "grad_norm": 68190.890625, + "learning_rate": 7.292177920973725e-05, + "loss": 0.3379, + "step": 1012 + }, + { + "epoch": 168.83333333333334, + "grad_norm": 114296.609375, + "learning_rate": 7.216725435690751e-05, + "loss": 0.3523, + "step": 1013 + }, + { + "epoch": 169.0, + "grad_norm": 118999.3515625, + "learning_rate": 7.14163496489439e-05, + "loss": 0.3421, + "step": 1014 + }, + { + "epoch": 169.0, + "eval_accuracy": 0.8396871945259042, + "eval_best_threshold": 0.4463706612586975, + "eval_f1": 0.858701895462378, + "eval_loss": 0.3626120686531067, + "eval_pr_auc": 0.9278995895335498, + "eval_precision": 0.8380044843049327, + "eval_recall": 0.8804475853945819, + "eval_roc_auc": 0.916600084709432, + "eval_runtime": 3.9409, + "eval_samples_per_second": 778.747, + "eval_steps_per_second": 1.522, + "step": 1014 + }, + { + "epoch": 169.16666666666666, + "grad_norm": 55655.57421875, + "learning_rate": 7.066907143969353e-05, + "loss": 0.3158, + "step": 1015 + }, + { + "epoch": 169.33333333333334, + "grad_norm": 67258.390625, + "learning_rate": 6.992542605231739e-05, + "loss": 0.3561, + "step": 1016 + }, + { + "epoch": 169.5, + "grad_norm": 63820.6328125, + "learning_rate": 6.91854197792371e-05, + "loss": 0.3378, + "step": 1017 + }, + { + "epoch": 169.66666666666666, + "grad_norm": 63111.68359375, + "learning_rate": 6.844905888208181e-05, + "loss": 0.3529, + "step": 1018 + }, + { + "epoch": 169.83333333333334, + "grad_norm": 98256.8203125, + "learning_rate": 6.77163495916348e-05, + "loss": 0.3369, + "step": 1019 + }, + { + "epoch": 170.0, + "grad_norm": 59946.46484375, + "learning_rate": 6.698729810778065e-05, + "loss": 0.3437, + "step": 1020 + }, + { + "epoch": 170.0, + "eval_accuracy": 0.8380579993483219, + "eval_best_threshold": 0.45613446831703186, + "eval_f1": 0.8571428571428571, + "eval_loss": 0.3628476858139038, + "eval_pr_auc": 0.928300039207456, + "eval_precision": 0.837170129140932, + "eval_recall": 0.8780918727915195, + "eval_roc_auc": 0.9168992739559735, + "eval_runtime": 3.9447, + "eval_samples_per_second": 778.01, + "eval_steps_per_second": 1.521, + "step": 1020 + }, + { + "epoch": 170.16666666666666, + "grad_norm": 93339.4765625, + "learning_rate": 6.626191059945375e-05, + "loss": 0.3552, + "step": 1021 + }, + { + "epoch": 170.33333333333334, + "grad_norm": 83849.5625, + "learning_rate": 6.554019320458493e-05, + "loss": 0.3205, + "step": 1022 + }, + { + "epoch": 170.5, + "grad_norm": 48642.6015625, + "learning_rate": 6.482215203005015e-05, + "loss": 0.3427, + "step": 1023 + }, + { + "epoch": 170.66666666666666, + "grad_norm": 155210.078125, + "learning_rate": 6.410779315161885e-05, + "loss": 0.3271, + "step": 1024 + }, + { + "epoch": 170.83333333333334, + "grad_norm": 86855.953125, + "learning_rate": 6.339712261390212e-05, + "loss": 0.3423, + "step": 1025 + }, + { + "epoch": 171.0, + "grad_norm": 107046.40625, + "learning_rate": 6.269014643030213e-05, + "loss": 0.3363, + "step": 1026 + }, + { + "epoch": 171.0, + "eval_accuracy": 0.8400130335614207, + "eval_best_threshold": 0.5443341135978699, + "eval_f1": 0.8557155451072583, + "eval_loss": 0.3667296767234802, + "eval_pr_auc": 0.9274746367227966, + "eval_precision": 0.853958944281525, + "eval_recall": 0.8574793875147232, + "eval_roc_auc": 0.9158794961077475, + "eval_runtime": 3.9867, + "eval_samples_per_second": 769.81, + "eval_steps_per_second": 1.505, + "step": 1026 + }, + { + "epoch": 171.16666666666666, + "grad_norm": 100376.0625, + "learning_rate": 6.198687058296071e-05, + "loss": 0.3336, + "step": 1027 + }, + { + "epoch": 171.33333333333334, + "grad_norm": 150857.78125, + "learning_rate": 6.128730102270897e-05, + "loss": 0.3689, + "step": 1028 + }, + { + "epoch": 171.5, + "grad_norm": 62914.38671875, + "learning_rate": 6.059144366901737e-05, + "loss": 0.3193, + "step": 1029 + }, + { + "epoch": 171.66666666666666, + "grad_norm": 72126.6484375, + "learning_rate": 5.989930440994451e-05, + "loss": 0.3407, + "step": 1030 + }, + { + "epoch": 171.83333333333334, + "grad_norm": 59858.13671875, + "learning_rate": 5.921088910208833e-05, + "loss": 0.3384, + "step": 1031 + }, + { + "epoch": 172.0, + "grad_norm": 107665.28125, + "learning_rate": 5.852620357053651e-05, + "loss": 0.3605, + "step": 1032 + }, + { + "epoch": 172.0, + "eval_accuracy": 0.837406321277289, + "eval_best_threshold": 0.42939823865890503, + "eval_f1": 0.8546460821438975, + "eval_loss": 0.36934569478034973, + "eval_pr_auc": 0.9267793551349948, + "eval_precision": 0.8455331412103746, + "eval_recall": 0.8639575971731449, + "eval_roc_auc": 0.9149121247032808, + "eval_runtime": 4.0058, + "eval_samples_per_second": 766.141, + "eval_steps_per_second": 1.498, + "step": 1032 + }, + { + "epoch": 172.16666666666666, + "grad_norm": 190367.765625, + "learning_rate": 5.78452536088166e-05, + "loss": 0.3248, + "step": 1033 + }, + { + "epoch": 172.33333333333334, + "grad_norm": 134129.859375, + "learning_rate": 5.716804497884698e-05, + "loss": 0.3496, + "step": 1034 + }, + { + "epoch": 172.5, + "grad_norm": 129405.9609375, + "learning_rate": 5.649458341088914e-05, + "loss": 0.3654, + "step": 1035 + }, + { + "epoch": 172.66666666666666, + "grad_norm": 85490.953125, + "learning_rate": 5.5824874603498056e-05, + "loss": 0.3546, + "step": 1036 + }, + { + "epoch": 172.83333333333334, + "grad_norm": 101800.09375, + "learning_rate": 5.515892422347413e-05, + "loss": 0.3349, + "step": 1037 + }, + { + "epoch": 173.0, + "grad_norm": 102238.9296875, + "learning_rate": 5.449673790581611e-05, + "loss": 0.3378, + "step": 1038 + }, + { + "epoch": 173.0, + "eval_accuracy": 0.8387096774193549, + "eval_best_threshold": 0.4776381254196167, + "eval_f1": 0.8567293777134588, + "eval_loss": 0.36330658197402954, + "eval_pr_auc": 0.9276768573910711, + "eval_precision": 0.842344906089926, + "eval_recall": 0.8716136631330977, + "eval_roc_auc": 0.9164031309843219, + "eval_runtime": 4.006, + "eval_samples_per_second": 766.097, + "eval_steps_per_second": 1.498, + "step": 1038 + }, + { + "epoch": 173.16666666666666, + "grad_norm": 73306.3046875, + "learning_rate": 5.383832125367233e-05, + "loss": 0.3338, + "step": 1039 + }, + { + "epoch": 173.33333333333334, + "grad_norm": 55874.16015625, + "learning_rate": 5.318367983829392e-05, + "loss": 0.3408, + "step": 1040 + }, + { + "epoch": 173.5, + "grad_norm": 164934.734375, + "learning_rate": 5.2532819198987506e-05, + "loss": 0.334, + "step": 1041 + }, + { + "epoch": 173.66666666666666, + "grad_norm": 159728.6875, + "learning_rate": 5.1885744843068295e-05, + "loss": 0.3337, + "step": 1042 + }, + { + "epoch": 173.83333333333334, + "grad_norm": 125720.4140625, + "learning_rate": 5.124246224581375e-05, + "loss": 0.3456, + "step": 1043 + }, + { + "epoch": 174.0, + "grad_norm": 61294.265625, + "learning_rate": 5.060297685041659e-05, + "loss": 0.3518, + "step": 1044 + }, + { + "epoch": 174.0, + "eval_accuracy": 0.8426197458455523, + "eval_best_threshold": 0.4912884533405304, + "eval_f1": 0.8591426071741033, + "eval_loss": 0.3630110025405884, + "eval_pr_auc": 0.9279353248854936, + "eval_precision": 0.8509532062391681, + "eval_recall": 0.8674911660777385, + "eval_roc_auc": 0.9164450131832275, + "eval_runtime": 4.055, + "eval_samples_per_second": 756.837, + "eval_steps_per_second": 1.48, + "step": 1044 + }, + { + "epoch": 174.16666666666666, + "grad_norm": 46890.20703125, + "learning_rate": 4.996729406793943e-05, + "loss": 0.3363, + "step": 1045 + }, + { + "epoch": 174.33333333333334, + "grad_norm": 112337.2421875, + "learning_rate": 4.933541927726887e-05, + "loss": 0.3398, + "step": 1046 + }, + { + "epoch": 174.5, + "grad_norm": 91068.9609375, + "learning_rate": 4.87073578250698e-05, + "loss": 0.3423, + "step": 1047 + }, + { + "epoch": 174.66666666666666, + "grad_norm": 97913.828125, + "learning_rate": 4.8083115025739754e-05, + "loss": 0.3471, + "step": 1048 + }, + { + "epoch": 174.83333333333334, + "grad_norm": 63174.71875, + "learning_rate": 4.7462696161365006e-05, + "loss": 0.329, + "step": 1049 + }, + { + "epoch": 175.0, + "grad_norm": 103652.0859375, + "learning_rate": 4.684610648167503e-05, + "loss": 0.3401, + "step": 1050 + }, + { + "epoch": 175.0, + "eval_accuracy": 0.8387096774193549, + "eval_best_threshold": 0.43684035539627075, + "eval_f1": 0.8576358930112166, + "eval_loss": 0.362859845161438, + "eval_pr_auc": 0.9280278073336945, + "eval_precision": 0.8381112984822934, + "eval_recall": 0.8780918727915195, + "eval_roc_auc": 0.9166754726674623, + "eval_runtime": 4.0499, + "eval_samples_per_second": 757.803, + "eval_steps_per_second": 1.482, + "step": 1050 + }, + { + "epoch": 175.16666666666666, + "grad_norm": 74191.2109375, + "learning_rate": 4.6233351203998185e-05, + "loss": 0.3401, + "step": 1051 + }, + { + "epoch": 175.33333333333334, + "grad_norm": 56000.7421875, + "learning_rate": 4.562443551321788e-05, + "loss": 0.3453, + "step": 1052 + }, + { + "epoch": 175.5, + "grad_norm": 90864.1640625, + "learning_rate": 4.501936456172845e-05, + "loss": 0.3524, + "step": 1053 + }, + { + "epoch": 175.66666666666666, + "grad_norm": 77782.4453125, + "learning_rate": 4.441814346939149e-05, + "loss": 0.3323, + "step": 1054 + }, + { + "epoch": 175.83333333333334, + "grad_norm": 67637.40625, + "learning_rate": 4.382077732349299e-05, + "loss": 0.3194, + "step": 1055 + }, + { + "epoch": 176.0, + "grad_norm": 122488.0390625, + "learning_rate": 4.322727117869951e-05, + "loss": 0.3407, + "step": 1056 + }, + { + "epoch": 176.0, + "eval_accuracy": 0.8422939068100358, + "eval_best_threshold": 0.4837534725666046, + "eval_f1": 0.8576470588235294, + "eval_loss": 0.3607856035232544, + "eval_pr_auc": 0.9296632050709258, + "eval_precision": 0.8566392479435958, + "eval_recall": 0.8586572438162544, + "eval_roc_auc": 0.9178060772574076, + "eval_runtime": 4.0408, + "eval_samples_per_second": 759.5, + "eval_steps_per_second": 1.485, + "step": 1056 + }, + { + "epoch": 176.16666666666666, + "grad_norm": 102023.53125, + "learning_rate": 4.2637630057016487e-05, + "loss": 0.3196, + "step": 1057 + }, + { + "epoch": 176.33333333333334, + "grad_norm": 90810.1171875, + "learning_rate": 4.205185894774455e-05, + "loss": 0.3339, + "step": 1058 + }, + { + "epoch": 176.5, + "grad_norm": 137295.046875, + "learning_rate": 4.1469962807437976e-05, + "loss": 0.335, + "step": 1059 + }, + { + "epoch": 176.66666666666666, + "grad_norm": 57208.4453125, + "learning_rate": 4.0891946559863055e-05, + "loss": 0.3573, + "step": 1060 + }, + { + "epoch": 176.83333333333334, + "grad_norm": 89965.6640625, + "learning_rate": 4.0317815095955446e-05, + "loss": 0.3539, + "step": 1061 + }, + { + "epoch": 177.0, + "grad_norm": 91107.03125, + "learning_rate": 3.974757327377981e-05, + "loss": 0.3411, + "step": 1062 + }, + { + "epoch": 177.0, + "eval_accuracy": 0.8419680677745194, + "eval_best_threshold": 0.46736517548561096, + "eval_f1": 0.8590526009880849, + "eval_loss": 0.359243780374527, + "eval_pr_auc": 0.9296493537780411, + "eval_precision": 0.8479632816982214, + "eval_recall": 0.8704358068315665, + "eval_roc_auc": 0.918315536620506, + "eval_runtime": 4.0551, + "eval_samples_per_second": 756.821, + "eval_steps_per_second": 1.48, + "step": 1062 + }, + { + "epoch": 177.16666666666666, + "grad_norm": 60148.11328125, + "learning_rate": 3.91812259184881e-05, + "loss": 0.3518, + "step": 1063 + }, + { + "epoch": 177.33333333333334, + "grad_norm": 69379.609375, + "learning_rate": 3.861877782227885e-05, + "loss": 0.3265, + "step": 1064 + }, + { + "epoch": 177.5, + "grad_norm": 101317.9375, + "learning_rate": 3.806023374435663e-05, + "loss": 0.3383, + "step": 1065 + }, + { + "epoch": 177.66666666666666, + "grad_norm": 55583.95703125, + "learning_rate": 3.750559841089196e-05, + "loss": 0.3113, + "step": 1066 + }, + { + "epoch": 177.83333333333334, + "grad_norm": 91411.53125, + "learning_rate": 3.6954876514981085e-05, + "loss": 0.3464, + "step": 1067 + }, + { + "epoch": 178.0, + "grad_norm": 61728.08984375, + "learning_rate": 3.6408072716606344e-05, + "loss": 0.3551, + "step": 1068 + }, + { + "epoch": 178.0, + "eval_accuracy": 0.8442489410231345, + "eval_best_threshold": 0.4724968671798706, + "eval_f1": 0.8594117647058823, + "eval_loss": 0.3610427677631378, + "eval_pr_auc": 0.9291781268367707, + "eval_precision": 0.8584018801410106, + "eval_recall": 0.8604240282685512, + "eval_roc_auc": 0.9179811233707824, + "eval_runtime": 4.0883, + "eval_samples_per_second": 750.681, + "eval_steps_per_second": 1.468, + "step": 1068 + }, + { + "epoch": 178.16666666666666, + "grad_norm": 54876.40234375, + "learning_rate": 3.586519164259672e-05, + "loss": 0.3271, + "step": 1069 + }, + { + "epoch": 178.33333333333334, + "grad_norm": 151829.515625, + "learning_rate": 3.532623788658873e-05, + "loss": 0.3351, + "step": 1070 + }, + { + "epoch": 178.5, + "grad_norm": 183532.984375, + "learning_rate": 3.479121600898777e-05, + "loss": 0.3301, + "step": 1071 + }, + { + "epoch": 178.66666666666666, + "grad_norm": 71134.21875, + "learning_rate": 3.426013053692878e-05, + "loss": 0.3312, + "step": 1072 + }, + { + "epoch": 178.83333333333334, + "grad_norm": 65962.1484375, + "learning_rate": 3.373298596423901e-05, + "loss": 0.3306, + "step": 1073 + }, + { + "epoch": 179.0, + "grad_norm": 73190.765625, + "learning_rate": 3.3209786751399184e-05, + "loss": 0.3645, + "step": 1074 + }, + { + "epoch": 179.0, + "eval_accuracy": 0.844574780058651, + "eval_best_threshold": 0.5492097735404968, + "eval_f1": 0.8584989617324236, + "eval_loss": 0.35855212807655334, + "eval_pr_auc": 0.9293997822677108, + "eval_precision": 0.8649133293484758, + "eval_recall": 0.8521790341578327, + "eval_roc_auc": 0.9187532592941969, + "eval_runtime": 4.0681, + "eval_samples_per_second": 754.413, + "eval_steps_per_second": 1.475, + "step": 1074 + }, + { + "epoch": 179.16666666666666, + "grad_norm": 118103.890625, + "learning_rate": 3.2690537325505806e-05, + "loss": 0.3578, + "step": 1075 + }, + { + "epoch": 179.33333333333334, + "grad_norm": 89901.9375, + "learning_rate": 3.217524208023431e-05, + "loss": 0.3531, + "step": 1076 + }, + { + "epoch": 179.5, + "grad_norm": 116530.1328125, + "learning_rate": 3.166390537580122e-05, + "loss": 0.3251, + "step": 1077 + }, + { + "epoch": 179.66666666666666, + "grad_norm": 64210.16015625, + "learning_rate": 3.115653153892761e-05, + "loss": 0.3309, + "step": 1078 + }, + { + "epoch": 179.83333333333334, + "grad_norm": 115194.0625, + "learning_rate": 3.065312486280231e-05, + "loss": 0.325, + "step": 1079 + }, + { + "epoch": 180.0, + "grad_norm": 74724.2890625, + "learning_rate": 3.0153689607045842e-05, + "loss": 0.3515, + "step": 1080 + }, + { + "epoch": 180.0, + "eval_accuracy": 0.8406647116324536, + "eval_best_threshold": 0.44527578353881836, + "eval_f1": 0.8587929540860526, + "eval_loss": 0.35788804292678833, + "eval_pr_auc": 0.929943212647981, + "eval_precision": 0.8424929178470255, + "eval_recall": 0.875736160188457, + "eval_roc_auc": 0.9188731068172191, + "eval_runtime": 4.1403, + "eval_samples_per_second": 741.258, + "eval_steps_per_second": 1.449, + "step": 1080 + }, + { + "epoch": 180.16666666666666, + "grad_norm": 91110.6875, + "learning_rate": 2.965822999767398e-05, + "loss": 0.3468, + "step": 1081 + }, + { + "epoch": 180.33333333333334, + "grad_norm": 86685.6640625, + "learning_rate": 2.9166750227062387e-05, + "loss": 0.3374, + "step": 1082 + }, + { + "epoch": 180.5, + "grad_norm": 121932.2734375, + "learning_rate": 2.8679254453910786e-05, + "loss": 0.346, + "step": 1083 + }, + { + "epoch": 180.66666666666666, + "grad_norm": 108420.78125, + "learning_rate": 2.8195746803208243e-05, + "loss": 0.3369, + "step": 1084 + }, + { + "epoch": 180.83333333333334, + "grad_norm": 85976.6953125, + "learning_rate": 2.7716231366197665e-05, + "loss": 0.3223, + "step": 1085 + }, + { + "epoch": 181.0, + "grad_norm": 77798.3046875, + "learning_rate": 2.724071220034158e-05, + "loss": 0.3413, + "step": 1086 + }, + { + "epoch": 181.0, + "eval_accuracy": 0.8426197458455523, + "eval_best_threshold": 0.46771353483200073, + "eval_f1": 0.8588132125109617, + "eval_loss": 0.36123743653297424, + "eval_pr_auc": 0.9284586856058963, + "eval_precision": 0.8525827045850262, + "eval_recall": 0.8651354534746761, + "eval_roc_auc": 0.9173434400448806, + "eval_runtime": 4.1205, + "eval_samples_per_second": 744.82, + "eval_steps_per_second": 1.456, + "step": 1086 + }, + { + "epoch": 181.16666666666666, + "grad_norm": 69239.671875, + "learning_rate": 2.6769193329287845e-05, + "loss": 0.34, + "step": 1087 + }, + { + "epoch": 181.33333333333334, + "grad_norm": 67383.75, + "learning_rate": 2.63016787428354e-05, + "loss": 0.3458, + "step": 1088 + }, + { + "epoch": 181.5, + "grad_norm": 49835.70703125, + "learning_rate": 2.583817239690034e-05, + "loss": 0.3425, + "step": 1089 + }, + { + "epoch": 181.66666666666666, + "grad_norm": 92561.109375, + "learning_rate": 2.537867821348305e-05, + "loss": 0.342, + "step": 1090 + }, + { + "epoch": 181.83333333333334, + "grad_norm": 65791.2734375, + "learning_rate": 2.4923200080634422e-05, + "loss": 0.3283, + "step": 1091 + }, + { + "epoch": 182.0, + "grad_norm": 90121.1484375, + "learning_rate": 2.4471741852423235e-05, + "loss": 0.3357, + "step": 1092 + }, + { + "epoch": 182.0, + "eval_accuracy": 0.8419680677745194, + "eval_best_threshold": 0.451703816652298, + "eval_f1": 0.8601902565580859, + "eval_loss": 0.3588697612285614, + "eval_pr_auc": 0.9288007303907959, + "eval_precision": 0.8424618859401468, + "eval_recall": 0.8786808009422851, + "eval_roc_auc": 0.9184053148725191, + "eval_runtime": 4.1365, + "eval_samples_per_second": 741.932, + "eval_steps_per_second": 1.451, + "step": 1092 + }, + { + "epoch": 182.16666666666666, + "grad_norm": 58866.40234375, + "learning_rate": 2.402430734890343e-05, + "loss": 0.3147, + "step": 1093 + }, + { + "epoch": 182.33333333333334, + "grad_norm": 93942.328125, + "learning_rate": 2.3580900356081904e-05, + "loss": 0.3327, + "step": 1094 + }, + { + "epoch": 182.5, + "grad_norm": 83109.65625, + "learning_rate": 2.314152462588659e-05, + "loss": 0.3356, + "step": 1095 + }, + { + "epoch": 182.66666666666666, + "grad_norm": 105172.3359375, + "learning_rate": 2.2706183876134045e-05, + "loss": 0.3514, + "step": 1096 + }, + { + "epoch": 182.83333333333334, + "grad_norm": 65252.1484375, + "learning_rate": 2.2274881790498913e-05, + "loss": 0.3422, + "step": 1097 + }, + { + "epoch": 183.0, + "grad_norm": 58000.1953125, + "learning_rate": 2.1847622018482283e-05, + "loss": 0.3336, + "step": 1098 + }, + { + "epoch": 183.0, + "eval_accuracy": 0.8422939068100358, + "eval_best_threshold": 0.4615512788295746, + "eval_f1": 0.8593023255813953, + "eval_loss": 0.35933783650398254, + "eval_pr_auc": 0.928577987507547, + "eval_precision": 0.8484500574052812, + "eval_recall": 0.8704358068315665, + "eval_roc_auc": 0.9181961186584982, + "eval_runtime": 4.1192, + "eval_samples_per_second": 745.048, + "eval_steps_per_second": 1.457, + "step": 1098 + }, + { + "epoch": 183.16666666666666, + "grad_norm": 75021.8828125, + "learning_rate": 2.142440817538066e-05, + "loss": 0.3462, + "step": 1099 + }, + { + "epoch": 183.33333333333334, + "grad_norm": 108155.3671875, + "learning_rate": 2.100524384225555e-05, + "loss": 0.3532, + "step": 1100 + }, + { + "epoch": 183.5, + "grad_norm": 60136.75, + "learning_rate": 2.0590132565903473e-05, + "loss": 0.3307, + "step": 1101 + }, + { + "epoch": 183.66666666666666, + "grad_norm": 64039.2109375, + "learning_rate": 2.0179077858825447e-05, + "loss": 0.326, + "step": 1102 + }, + { + "epoch": 183.83333333333334, + "grad_norm": 85661.9375, + "learning_rate": 1.977208319919721e-05, + "loss": 0.3576, + "step": 1103 + }, + { + "epoch": 184.0, + "grad_norm": 97985.703125, + "learning_rate": 1.9369152030840554e-05, + "loss": 0.3204, + "step": 1104 + }, + { + "epoch": 184.0, + "eval_accuracy": 0.8419680677745194, + "eval_best_threshold": 0.46132898330688477, + "eval_f1": 0.8590526009880849, + "eval_loss": 0.35893210768699646, + "eval_pr_auc": 0.9290151696778128, + "eval_precision": 0.8479632816982214, + "eval_recall": 0.8704358068315665, + "eval_roc_auc": 0.9183816890167262, + "eval_runtime": 4.2619, + "eval_samples_per_second": 720.096, + "eval_steps_per_second": 1.408, + "step": 1104 + }, + { + "epoch": 184.16666666666666, + "grad_norm": 63854.22265625, + "learning_rate": 1.8970287763193428e-05, + "loss": 0.3424, + "step": 1105 + }, + { + "epoch": 184.33333333333334, + "grad_norm": 61403.140625, + "learning_rate": 1.8575493771281205e-05, + "loss": 0.3312, + "step": 1106 + }, + { + "epoch": 184.5, + "grad_norm": 86111.2578125, + "learning_rate": 1.8184773395688526e-05, + "loss": 0.343, + "step": 1107 + }, + { + "epoch": 184.66666666666666, + "grad_norm": 122857.234375, + "learning_rate": 1.779812994253055e-05, + "loss": 0.346, + "step": 1108 + }, + { + "epoch": 184.83333333333334, + "grad_norm": 77238.8671875, + "learning_rate": 1.741556668342531e-05, + "loss": 0.3356, + "step": 1109 + }, + { + "epoch": 185.0, + "grad_norm": 85184.375, + "learning_rate": 1.70370868554659e-05, + "loss": 0.3307, + "step": 1110 + }, + { + "epoch": 185.0, + "eval_accuracy": 0.8439231019876181, + "eval_best_threshold": 0.5097879767417908, + "eval_f1": 0.858242083456644, + "eval_loss": 0.35859715938568115, + "eval_pr_auc": 0.9290756714382906, + "eval_precision": 0.8625817965496728, + "eval_recall": 0.8539458186101295, + "eval_roc_auc": 0.918589596547704, + "eval_runtime": 4.1985, + "eval_samples_per_second": 730.979, + "eval_steps_per_second": 1.429, + "step": 1110 + }, + { + "epoch": 185.16666666666666, + "grad_norm": 75691.6640625, + "learning_rate": 1.6662693661192997e-05, + "loss": 0.3324, + "step": 1111 + }, + { + "epoch": 185.33333333333334, + "grad_norm": 50374.91796875, + "learning_rate": 1.6292390268568102e-05, + "loss": 0.3305, + "step": 1112 + }, + { + "epoch": 185.5, + "grad_norm": 61129.52734375, + "learning_rate": 1.5926179810946185e-05, + "loss": 0.3259, + "step": 1113 + }, + { + "epoch": 185.66666666666666, + "grad_norm": 71613.9453125, + "learning_rate": 1.556406538704963e-05, + "loss": 0.3314, + "step": 1114 + }, + { + "epoch": 185.83333333333334, + "grad_norm": 62662.45703125, + "learning_rate": 1.5206050060942001e-05, + "loss": 0.3318, + "step": 1115 + }, + { + "epoch": 186.0, + "grad_norm": 68081.0078125, + "learning_rate": 1.4852136862001764e-05, + "loss": 0.347, + "step": 1116 + }, + { + "epoch": 186.0, + "eval_accuracy": 0.844574780058651, + "eval_best_threshold": 0.5024138689041138, + "eval_f1": 0.858833974548683, + "eval_loss": 0.35903245210647583, + "eval_pr_auc": 0.9289612048718296, + "eval_precision": 0.8631766805472932, + "eval_recall": 0.8545347467608951, + "eval_roc_auc": 0.9184444049248311, + "eval_runtime": 4.2396, + "eval_samples_per_second": 723.888, + "eval_steps_per_second": 1.415, + "step": 1116 + }, + { + "epoch": 186.16666666666666, + "grad_norm": 70388.890625, + "learning_rate": 1.450232878489699e-05, + "loss": 0.3416, + "step": 1117 + }, + { + "epoch": 186.33333333333334, + "grad_norm": 61398.140625, + "learning_rate": 1.4156628789559923e-05, + "loss": 0.3336, + "step": 1118 + }, + { + "epoch": 186.5, + "grad_norm": 92423.7109375, + "learning_rate": 1.3815039801161721e-05, + "loss": 0.34, + "step": 1119 + }, + { + "epoch": 186.66666666666666, + "grad_norm": 96490.03125, + "learning_rate": 1.3477564710088097e-05, + "loss": 0.3446, + "step": 1120 + }, + { + "epoch": 186.83333333333334, + "grad_norm": 83160.28125, + "learning_rate": 1.3144206371914601e-05, + "loss": 0.3277, + "step": 1121 + }, + { + "epoch": 187.0, + "grad_norm": 100917.78125, + "learning_rate": 1.2814967607382432e-05, + "loss": 0.3291, + "step": 1122 + }, + { + "epoch": 187.0, + "eval_accuracy": 0.8429455848810687, + "eval_best_threshold": 0.45496270060539246, + "eval_f1": 0.8600464576074333, + "eval_loss": 0.35840901732444763, + "eval_pr_auc": 0.9294939960565185, + "eval_precision": 0.8482245131729668, + "eval_recall": 0.8722025912838633, + "eval_roc_auc": 0.9187990075422323, + "eval_runtime": 4.8699, + "eval_samples_per_second": 630.198, + "eval_steps_per_second": 1.232, + "step": 1122 + }, + { + "epoch": 187.16666666666666, + "grad_norm": 71137.640625, + "learning_rate": 1.2489851202374724e-05, + "loss": 0.3169, + "step": 1123 + }, + { + "epoch": 187.33333333333334, + "grad_norm": 117276.4453125, + "learning_rate": 1.2168859907892904e-05, + "loss": 0.3514, + "step": 1124 + }, + { + "epoch": 187.5, + "grad_norm": 114218.09375, + "learning_rate": 1.185199644003332e-05, + "loss": 0.3436, + "step": 1125 + }, + { + "epoch": 187.66666666666666, + "grad_norm": 97303.96875, + "learning_rate": 1.1539263479964535e-05, + "loss": 0.3552, + "step": 1126 + }, + { + "epoch": 187.83333333333334, + "grad_norm": 80702.0546875, + "learning_rate": 1.123066367390424e-05, + "loss": 0.323, + "step": 1127 + }, + { + "epoch": 188.0, + "grad_norm": 59036.5390625, + "learning_rate": 1.0926199633097156e-05, + "loss": 0.3381, + "step": 1128 + }, + { + "epoch": 188.0, + "eval_accuracy": 0.8442489410231345, + "eval_best_threshold": 0.49203333258628845, + "eval_f1": 0.8590801886792453, + "eval_loss": 0.3574254512786865, + "eval_pr_auc": 0.9295933422197652, + "eval_precision": 0.8600944510035419, + "eval_recall": 0.8580683156654888, + "eval_roc_auc": 0.9192786124148288, + "eval_runtime": 4.3706, + "eval_samples_per_second": 702.193, + "eval_steps_per_second": 1.373, + "step": 1128 + }, + { + "epoch": 188.16666666666666, + "grad_norm": 58683.0, + "learning_rate": 1.0625873933792996e-05, + "loss": 0.3391, + "step": 1129 + }, + { + "epoch": 188.33333333333334, + "grad_norm": 51025.08203125, + "learning_rate": 1.0329689117224261e-05, + "loss": 0.3307, + "step": 1130 + }, + { + "epoch": 188.5, + "grad_norm": 95173.8984375, + "learning_rate": 1.0037647689585205e-05, + "loss": 0.3309, + "step": 1131 + }, + { + "epoch": 188.66666666666666, + "grad_norm": 123481.3359375, + "learning_rate": 9.749752122010347e-06, + "loss": 0.337, + "step": 1132 + }, + { + "epoch": 188.83333333333334, + "grad_norm": 106579.6953125, + "learning_rate": 9.46600485055371e-06, + "loss": 0.3498, + "step": 1133 + }, + { + "epoch": 189.0, + "grad_norm": 87096.609375, + "learning_rate": 9.186408276168012e-06, + "loss": 0.3346, + "step": 1134 + }, + { + "epoch": 189.0, + "eval_accuracy": 0.8419680677745194, + "eval_best_threshold": 0.4620799422264099, + "eval_f1": 0.859297940237888, + "eval_loss": 0.35710543394088745, + "eval_pr_auc": 0.9296763020932254, + "eval_precision": 0.8467695826186392, + "eval_recall": 0.8722025912838633, + "eval_roc_auc": 0.9193872913514762, + "eval_runtime": 4.154, + "eval_samples_per_second": 738.803, + "eval_steps_per_second": 1.444, + "step": 1134 + }, + { + "epoch": 189.16666666666666, + "grad_norm": 67883.25, + "learning_rate": 8.91096476468467e-06, + "loss": 0.3172, + "step": 1135 + }, + { + "epoch": 189.33333333333334, + "grad_norm": 60549.61328125, + "learning_rate": 8.639676646793382e-06, + "loss": 0.3289, + "step": 1136 + }, + { + "epoch": 189.5, + "grad_norm": 60352.7890625, + "learning_rate": 8.372546218022748e-06, + "loss": 0.3512, + "step": 1137 + }, + { + "epoch": 189.66666666666666, + "grad_norm": 86886.8359375, + "learning_rate": 8.109575738720621e-06, + "loss": 0.3293, + "step": 1138 + }, + { + "epoch": 189.83333333333334, + "grad_norm": 56512.19140625, + "learning_rate": 7.850767434035177e-06, + "loss": 0.3434, + "step": 1139 + }, + { + "epoch": 190.0, + "grad_norm": 88472.5625, + "learning_rate": 7.59612349389599e-06, + "loss": 0.3397, + "step": 1140 + }, + { + "epoch": 190.0, + "eval_accuracy": 0.8413163897034864, + "eval_best_threshold": 0.47650080919265747, + "eval_f1": 0.8582241630276565, + "eval_loss": 0.3567776381969452, + "eval_pr_auc": 0.9300562636053985, + "eval_precision": 0.8485895221646517, + "eval_recall": 0.8680800942285041, + "eval_roc_auc": 0.9194770696034894, + "eval_runtime": 4.4139, + "eval_samples_per_second": 695.303, + "eval_steps_per_second": 1.359, + "step": 1140 + }, + { + "epoch": 190.16666666666666, + "grad_norm": 80563.6796875, + "learning_rate": 7.345646072995315e-06, + "loss": 0.3177, + "step": 1141 + }, + { + "epoch": 190.33333333333334, + "grad_norm": 52460.4921875, + "learning_rate": 7.099337290770169e-06, + "loss": 0.3426, + "step": 1142 + }, + { + "epoch": 190.5, + "grad_norm": 60113.8359375, + "learning_rate": 6.8571992313842815e-06, + "loss": 0.3162, + "step": 1143 + }, + { + "epoch": 190.66666666666666, + "grad_norm": 86581.2109375, + "learning_rate": 6.61923394371039e-06, + "loss": 0.3342, + "step": 1144 + }, + { + "epoch": 190.83333333333334, + "grad_norm": 90135.9375, + "learning_rate": 6.385443441312977e-06, + "loss": 0.3307, + "step": 1145 + }, + { + "epoch": 191.0, + "grad_norm": 75371.625, + "learning_rate": 6.15582970243117e-06, + "loss": 0.3468, + "step": 1146 + }, + { + "epoch": 191.0, + "eval_accuracy": 0.8426197458455523, + "eval_best_threshold": 0.46230238676071167, + "eval_f1": 0.8587306229891781, + "eval_loss": 0.3580075204372406, + "eval_pr_auc": 0.9295473297541057, + "eval_precision": 0.852992446252179, + "eval_recall": 0.8645465253239105, + "eval_roc_auc": 0.9190067002927028, + "eval_runtime": 4.3965, + "eval_samples_per_second": 698.062, + "eval_steps_per_second": 1.365, + "step": 1146 + }, + { + "epoch": 191.16666666666666, + "grad_norm": 66957.3984375, + "learning_rate": 5.9303946699620355e-06, + "loss": 0.3281, + "step": 1147 + }, + { + "epoch": 191.33333333333334, + "grad_norm": 106900.6796875, + "learning_rate": 5.709140251444201e-06, + "loss": 0.3299, + "step": 1148 + }, + { + "epoch": 191.5, + "grad_norm": 69170.3203125, + "learning_rate": 5.492068319041588e-06, + "loss": 0.3284, + "step": 1149 + }, + { + "epoch": 191.66666666666666, + "grad_norm": 85578.3359375, + "learning_rate": 5.279180709527765e-06, + "loss": 0.3441, + "step": 1150 + }, + { + "epoch": 191.83333333333334, + "grad_norm": 90991.1484375, + "learning_rate": 5.070479224270119e-06, + "loss": 0.3408, + "step": 1151 + }, + { + "epoch": 192.0, + "grad_norm": 80094.2109375, + "learning_rate": 4.865965629214819e-06, + "loss": 0.3497, + "step": 1152 + }, + { + "epoch": 192.0, + "eval_accuracy": 0.8432714239165852, + "eval_best_threshold": 0.486149400472641, + "eval_f1": 0.8591508052708638, + "eval_loss": 0.3572799861431122, + "eval_pr_auc": 0.9297294809940666, + "eval_precision": 0.8543972044263249, + "eval_recall": 0.8639575971731449, + "eval_roc_auc": 0.9192713098775837, + "eval_runtime": 4.4784, + "eval_samples_per_second": 685.294, + "eval_steps_per_second": 1.34, + "step": 1152 + }, + { + "epoch": 192.16666666666666, + "grad_norm": 50093.2265625, + "learning_rate": 4.665641654871989e-06, + "loss": 0.323, + "step": 1153 + }, + { + "epoch": 192.33333333333334, + "grad_norm": 116718.796875, + "learning_rate": 4.469508996300664e-06, + "loss": 0.3508, + "step": 1154 + }, + { + "epoch": 192.5, + "grad_norm": 53607.64453125, + "learning_rate": 4.277569313094809e-06, + "loss": 0.3174, + "step": 1155 + }, + { + "epoch": 192.66666666666666, + "grad_norm": 54493.2421875, + "learning_rate": 4.089824229369155e-06, + "loss": 0.3451, + "step": 1156 + }, + { + "epoch": 192.83333333333334, + "grad_norm": 82322.3359375, + "learning_rate": 3.906275333745435e-06, + "loss": 0.3356, + "step": 1157 + }, + { + "epoch": 193.0, + "grad_norm": 93312.859375, + "learning_rate": 3.7269241793390084e-06, + "loss": 0.3567, + "step": 1158 + }, + { + "epoch": 193.0, + "eval_accuracy": 0.8432714239165852, + "eval_best_threshold": 0.4834613502025604, + "eval_f1": 0.8591508052708638, + "eval_loss": 0.35751256346702576, + "eval_pr_auc": 0.9295732860994348, + "eval_precision": 0.8543972044263249, + "eval_recall": 0.8639575971731449, + "eval_roc_auc": 0.9191643491849939, + "eval_runtime": 4.2553, + "eval_samples_per_second": 721.211, + "eval_steps_per_second": 1.41, + "step": 1158 + }, + { + "epoch": 193.16666666666666, + "grad_norm": 53519.34765625, + "learning_rate": 3.551772283745536e-06, + "loss": 0.3178, + "step": 1159 + }, + { + "epoch": 193.33333333333334, + "grad_norm": 48680.87109375, + "learning_rate": 3.3808211290284885e-06, + "loss": 0.327, + "step": 1160 + }, + { + "epoch": 193.5, + "grad_norm": 65628.5859375, + "learning_rate": 3.2140721617062717e-06, + "loss": 0.348, + "step": 1161 + }, + { + "epoch": 193.66666666666666, + "grad_norm": 87672.9296875, + "learning_rate": 3.0515267927400113e-06, + "loss": 0.3446, + "step": 1162 + }, + { + "epoch": 193.83333333333334, + "grad_norm": 95234.8671875, + "learning_rate": 2.8931863975218966e-06, + "loss": 0.3429, + "step": 1163 + }, + { + "epoch": 194.0, + "grad_norm": 61562.625, + "learning_rate": 2.739052315863355e-06, + "loss": 0.3158, + "step": 1164 + }, + { + "epoch": 194.0, + "eval_accuracy": 0.8432714239165852, + "eval_best_threshold": 0.4742875397205353, + "eval_f1": 0.8593155893536122, + "eval_loss": 0.3577769100666046, + "eval_pr_auc": 0.9295315138035642, + "eval_precision": 0.8535735037768739, + "eval_recall": 0.8651354534746761, + "eval_roc_auc": 0.919044931222986, + "eval_runtime": 4.2171, + "eval_samples_per_second": 727.752, + "eval_steps_per_second": 1.423, + "step": 1164 + }, + { + "epoch": 194.16666666666666, + "grad_norm": 65050.80859375, + "learning_rate": 2.589125851983509e-06, + "loss": 0.3344, + "step": 1165 + }, + { + "epoch": 194.33333333333334, + "grad_norm": 55117.078125, + "learning_rate": 2.4434082744984598e-06, + "loss": 0.3308, + "step": 1166 + }, + { + "epoch": 194.5, + "grad_norm": 97287.5, + "learning_rate": 2.3019008164105737e-06, + "loss": 0.3409, + "step": 1167 + }, + { + "epoch": 194.66666666666666, + "grad_norm": 59091.03125, + "learning_rate": 2.1646046750978256e-06, + "loss": 0.3445, + "step": 1168 + }, + { + "epoch": 194.83333333333334, + "grad_norm": 52100.3828125, + "learning_rate": 2.0315210123035833e-06, + "loss": 0.3412, + "step": 1169 + }, + { + "epoch": 195.0, + "grad_norm": 89186.1171875, + "learning_rate": 1.9026509541272275e-06, + "loss": 0.3369, + "step": 1170 + }, + { + "epoch": 195.0, + "eval_accuracy": 0.8442489410231345, + "eval_best_threshold": 0.48313555121421814, + "eval_f1": 0.8595769682726204, + "eval_loss": 0.35788410902023315, + "eval_pr_auc": 0.9295530985016898, + "eval_precision": 0.8575615474794842, + "eval_recall": 0.8616018845700825, + "eval_roc_auc": 0.919023453172265, + "eval_runtime": 4.2448, + "eval_samples_per_second": 723.009, + "eval_steps_per_second": 1.414, + "step": 1170 + }, + { + "epoch": 195.16666666666666, + "grad_norm": 74249.125, + "learning_rate": 1.7779955910142142e-06, + "loss": 0.3233, + "step": 1171 + }, + { + "epoch": 195.33333333333334, + "grad_norm": 83374.2265625, + "learning_rate": 1.6575559777469718e-06, + "loss": 0.3331, + "step": 1172 + }, + { + "epoch": 195.5, + "grad_norm": 59120.72265625, + "learning_rate": 1.541333133436018e-06, + "loss": 0.3472, + "step": 1173 + }, + { + "epoch": 195.66666666666666, + "grad_norm": 64595.88671875, + "learning_rate": 1.429328041511302e-06, + "loss": 0.3523, + "step": 1174 + }, + { + "epoch": 195.83333333333334, + "grad_norm": 77457.6640625, + "learning_rate": 1.3215416497138755e-06, + "loss": 0.3406, + "step": 1175 + }, + { + "epoch": 196.0, + "grad_norm": 60436.64453125, + "learning_rate": 1.2179748700879012e-06, + "loss": 0.3301, + "step": 1176 + }, + { + "epoch": 196.0, + "eval_accuracy": 0.8449006190941675, + "eval_best_threshold": 0.4860289394855499, + "eval_f1": 0.8602466236054023, + "eval_loss": 0.35769322514533997, + "eval_pr_auc": 0.9295719105862945, + "eval_precision": 0.857728337236534, + "eval_recall": 0.8627797408716137, + "eval_roc_auc": 0.9190900351294997, + "eval_runtime": 4.5203, + "eval_samples_per_second": 678.935, + "eval_steps_per_second": 1.327, + "step": 1176 + } + ], + "logging_steps": 1, + "max_steps": 1200, + "num_input_tokens_seen": 0, + "num_train_epochs": 200, + "save_steps": 500, + "stateful_callbacks": { + "EarlyStoppingCallback": { + "args": { + "early_stopping_patience": 20, + "early_stopping_threshold": 0.0 + }, + "attributes": { + "early_stopping_patience_counter": 0 + } + }, + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.880234183022346e+17, + "train_batch_size": 1024, + "trial_name": null, + "trial_params": null +}