{ "best_metric": 0.32019519805908203, "best_model_checkpoint": "./deberta_multilabel_safetyattribution_run1/checkpoint-2172", "epoch": 8.0, "eval_steps": 500, "global_step": 5792, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06906077348066299, "grad_norm": 17.054950714111328, "learning_rate": 5.753739930955121e-07, "loss": 2.7202, "step": 50 }, { "epoch": 0.13812154696132597, "grad_norm": 15.804380416870117, "learning_rate": 1.1507479861910242e-06, "loss": 2.4784, "step": 100 }, { "epoch": 0.20718232044198895, "grad_norm": 4.005581378936768, "learning_rate": 1.7261219792865363e-06, "loss": 1.9331, "step": 150 }, { "epoch": 0.27624309392265195, "grad_norm": 3.5140247344970703, "learning_rate": 2.3014959723820484e-06, "loss": 1.3647, "step": 200 }, { "epoch": 0.3453038674033149, "grad_norm": 10.396773338317871, "learning_rate": 2.8768699654775607e-06, "loss": 1.1814, "step": 250 }, { "epoch": 0.4143646408839779, "grad_norm": 4.15683126449585, "learning_rate": 3.4522439585730726e-06, "loss": 1.1349, "step": 300 }, { "epoch": 0.48342541436464087, "grad_norm": 13.827107429504395, "learning_rate": 4.027617951668585e-06, "loss": 1.0992, "step": 350 }, { "epoch": 0.5524861878453039, "grad_norm": 5.466891765594482, "learning_rate": 4.602991944764097e-06, "loss": 1.0756, "step": 400 }, { "epoch": 0.6215469613259669, "grad_norm": 5.3988118171691895, "learning_rate": 5.1783659378596095e-06, "loss": 1.0385, "step": 450 }, { "epoch": 0.6906077348066298, "grad_norm": 6.582638740539551, "learning_rate": 5.753739930955121e-06, "loss": 0.9732, "step": 500 }, { "epoch": 0.7596685082872928, "grad_norm": 4.769979953765869, "learning_rate": 6.329113924050634e-06, "loss": 0.9272, "step": 550 }, { "epoch": 0.8287292817679558, "grad_norm": 7.325048923492432, "learning_rate": 6.904487917146145e-06, "loss": 0.9391, "step": 600 }, { "epoch": 0.8977900552486188, "grad_norm": 5.211851596832275, "learning_rate": 7.479861910241658e-06, "loss": 0.9031, "step": 650 }, { "epoch": 0.9668508287292817, "grad_norm": 8.523693084716797, "learning_rate": 8.05523590333717e-06, "loss": 0.8428, "step": 700 }, { "epoch": 1.0, "eval_class_accuracy": { "label_0": 0.7745931503521982, "label_1": 0.9664804469273743, "label_2": 0.8299732815156667 }, "eval_class_f1": { "label_0": 0.5691736304549675, "label_1": 0.7523923444976076, "label_2": 0.6285815351963212 }, "eval_class_false_positive_rate": { "label_0": 0.06384009691095531, "label_1": 0.018147086914993647, "label_2": 0.0660200595172417 }, "eval_class_precision": { "label_0": 0.7772612003381234, "label_1": 0.7505966587112172, "label_2": 0.747895622895623 }, "eval_class_recall": { "label_0": 0.448974609375, "label_1": 0.7541966426858513, "label_2": 0.5420988407565589 }, "eval_loss": 0.4103058874607086, "eval_macro_f1": 0.6500491700496321, "eval_macro_precision": 0.7585844939816545, "eval_macro_recall": 0.5817566976058034, "eval_micro_f1": 0.6157528285465622, "eval_micro_precision": 0.760752688172043, "eval_micro_recall": 0.5171783625730995, "eval_runtime": 132.9806, "eval_samples_per_second": 92.878, "eval_steps_per_second": 1.451, "eval_subset_accuracy": 0.6513642620030766, "step": 724 }, { "epoch": 1.0359116022099448, "grad_norm": 5.507997989654541, "learning_rate": 8.630609896432683e-06, "loss": 0.8297, "step": 750 }, { "epoch": 1.1049723756906078, "grad_norm": 6.73417329788208, "learning_rate": 9.205983889528194e-06, "loss": 0.7722, "step": 800 }, { "epoch": 1.1740331491712708, "grad_norm": 5.604831218719482, "learning_rate": 9.781357882623706e-06, "loss": 0.7892, "step": 850 }, { "epoch": 1.2430939226519337, "grad_norm": 6.269744873046875, "learning_rate": 9.93703026609791e-06, "loss": 0.7489, "step": 900 }, { "epoch": 1.3121546961325967, "grad_norm": 5.223318576812744, "learning_rate": 9.83546617915905e-06, "loss": 0.756, "step": 950 }, { "epoch": 1.3812154696132597, "grad_norm": 6.4933180809021, "learning_rate": 9.733902092220193e-06, "loss": 0.706, "step": 1000 }, { "epoch": 1.4502762430939227, "grad_norm": 4.81841516494751, "learning_rate": 9.632338005281333e-06, "loss": 0.7007, "step": 1050 }, { "epoch": 1.5193370165745856, "grad_norm": 4.895750999450684, "learning_rate": 9.530773918342476e-06, "loss": 0.7067, "step": 1100 }, { "epoch": 1.5883977900552486, "grad_norm": 15.617860794067383, "learning_rate": 9.429209831403616e-06, "loss": 0.6879, "step": 1150 }, { "epoch": 1.6574585635359116, "grad_norm": 5.868403911590576, "learning_rate": 9.327645744464759e-06, "loss": 0.6599, "step": 1200 }, { "epoch": 1.7265193370165746, "grad_norm": 5.1376423835754395, "learning_rate": 9.2260816575259e-06, "loss": 0.6524, "step": 1250 }, { "epoch": 1.7955801104972375, "grad_norm": 8.1726713180542, "learning_rate": 9.124517570587042e-06, "loss": 0.6301, "step": 1300 }, { "epoch": 1.8646408839779005, "grad_norm": 5.737952709197998, "learning_rate": 9.022953483648182e-06, "loss": 0.6554, "step": 1350 }, { "epoch": 1.9337016574585635, "grad_norm": 8.225056648254395, "learning_rate": 8.921389396709325e-06, "loss": 0.6457, "step": 1400 }, { "epoch": 2.0, "eval_class_accuracy": { "label_0": 0.8472998137802608, "label_1": 0.9697190510889806, "label_2": 0.8784713788357218 }, "eval_class_f1": { "label_0": 0.7341415280518748, "label_1": 0.7713936430317848, "label_2": 0.7481966113068277 }, "eval_class_false_positive_rate": { "label_0": 0.04772864930344667, "label_1": 0.014847616566812985, "label_2": 0.049928358866962425 }, "eval_class_precision": { "label_0": 0.8685790527018012, "label_1": 0.786783042394015, "label_2": 0.8311591502049944 }, "eval_class_recall": { "label_0": 0.6357421875, "label_1": 0.7565947242206235, "label_2": 0.6802928615009152 }, "eval_loss": 0.3337920904159546, "eval_macro_f1": 0.7512439274634959, "eval_macro_precision": 0.8288404151002702, "eval_macro_recall": 0.6908765910738461, "eval_micro_f1": 0.7439929208358859, "eval_micro_precision": 0.8429739318216874, "eval_micro_recall": 0.6658138401559455, "eval_runtime": 132.9461, "eval_samples_per_second": 92.902, "eval_steps_per_second": 1.452, "eval_subset_accuracy": 0.7538660837179175, "step": 1448 }, { "epoch": 2.0027624309392267, "grad_norm": 4.911665439605713, "learning_rate": 8.819825309770465e-06, "loss": 0.6339, "step": 1450 }, { "epoch": 2.0718232044198897, "grad_norm": 5.494454383850098, "learning_rate": 8.718261222831608e-06, "loss": 0.6068, "step": 1500 }, { "epoch": 2.1408839779005526, "grad_norm": 7.909505844116211, "learning_rate": 8.616697135892748e-06, "loss": 0.6289, "step": 1550 }, { "epoch": 2.2099447513812156, "grad_norm": 6.768340587615967, "learning_rate": 8.515133048953891e-06, "loss": 0.6021, "step": 1600 }, { "epoch": 2.2790055248618786, "grad_norm": 5.231354236602783, "learning_rate": 8.413568962015032e-06, "loss": 0.585, "step": 1650 }, { "epoch": 2.3480662983425415, "grad_norm": 3.701436758041382, "learning_rate": 8.312004875076174e-06, "loss": 0.5728, "step": 1700 }, { "epoch": 2.4171270718232045, "grad_norm": 3.403343915939331, "learning_rate": 8.210440788137315e-06, "loss": 0.5746, "step": 1750 }, { "epoch": 2.4861878453038675, "grad_norm": 4.147812843322754, "learning_rate": 8.108876701198457e-06, "loss": 0.6028, "step": 1800 }, { "epoch": 2.5552486187845305, "grad_norm": 8.234676361083984, "learning_rate": 8.007312614259598e-06, "loss": 0.5451, "step": 1850 }, { "epoch": 2.6243093922651934, "grad_norm": 3.46964955329895, "learning_rate": 7.90574852732074e-06, "loss": 0.591, "step": 1900 }, { "epoch": 2.6933701657458564, "grad_norm": 5.345615386962891, "learning_rate": 7.80418444038188e-06, "loss": 0.5685, "step": 1950 }, { "epoch": 2.7624309392265194, "grad_norm": 5.062536239624023, "learning_rate": 7.702620353443023e-06, "loss": 0.5786, "step": 2000 }, { "epoch": 2.8314917127071824, "grad_norm": 5.448482036590576, "learning_rate": 7.6010562665041645e-06, "loss": 0.5773, "step": 2050 }, { "epoch": 2.9005524861878453, "grad_norm": 7.135024547576904, "learning_rate": 7.499492179565306e-06, "loss": 0.5849, "step": 2100 }, { "epoch": 2.9696132596685083, "grad_norm": 4.267426490783691, "learning_rate": 7.3979280926264475e-06, "loss": 0.5663, "step": 2150 }, { "epoch": 3.0, "eval_class_accuracy": { "label_0": 0.8591207189701239, "label_1": 0.9690713302566594, "label_2": 0.8903732491296251 }, "eval_class_f1": { "label_0": 0.7654986522911051, "label_1": 0.7768691588785047, "label_2": 0.7841198979591837 }, "eval_class_false_positive_rate": { "label_0": 0.05863113264687357, "label_1": 0.01849439958322319, "label_2": 0.05896616334177681 }, "eval_class_precision": { "label_0": 0.8543922984356197, "label_1": 0.7574031890660592, "label_2": 0.821309285237141 }, "eval_class_recall": { "label_0": 0.693359375, "label_1": 0.7973621103117506, "label_2": 0.7501525320317267 }, "eval_loss": 0.32019519805908203, "eval_macro_f1": 0.775495903042931, "eval_macro_precision": 0.8110349242462732, "eval_macro_recall": 0.746958005781159, "eval_micro_f1": 0.7743443261490522, "eval_micro_precision": 0.8287937743190662, "eval_micro_recall": 0.7266081871345029, "eval_runtime": 132.8667, "eval_samples_per_second": 92.958, "eval_steps_per_second": 1.453, "eval_subset_accuracy": 0.7761314873289612, "step": 2172 }, { "epoch": 3.0386740331491713, "grad_norm": 3.6615378856658936, "learning_rate": 7.29636400568759e-06, "loss": 0.5629, "step": 2200 }, { "epoch": 3.1077348066298343, "grad_norm": 7.720168590545654, "learning_rate": 7.194799918748731e-06, "loss": 0.5422, "step": 2250 }, { "epoch": 3.1767955801104972, "grad_norm": 5.942800521850586, "learning_rate": 7.093235831809873e-06, "loss": 0.5198, "step": 2300 }, { "epoch": 3.24585635359116, "grad_norm": 4.374157428741455, "learning_rate": 6.991671744871014e-06, "loss": 0.528, "step": 2350 }, { "epoch": 3.314917127071823, "grad_norm": 4.527683734893799, "learning_rate": 6.890107657932156e-06, "loss": 0.5481, "step": 2400 }, { "epoch": 3.383977900552486, "grad_norm": 3.410897731781006, "learning_rate": 6.7885435709932975e-06, "loss": 0.531, "step": 2450 }, { "epoch": 3.453038674033149, "grad_norm": 3.105001449584961, "learning_rate": 6.686979484054439e-06, "loss": 0.5193, "step": 2500 }, { "epoch": 3.522099447513812, "grad_norm": 4.019840717315674, "learning_rate": 6.585415397115581e-06, "loss": 0.5435, "step": 2550 }, { "epoch": 3.591160220994475, "grad_norm": 2.8091235160827637, "learning_rate": 6.483851310176722e-06, "loss": 0.5028, "step": 2600 }, { "epoch": 3.660220994475138, "grad_norm": 6.829853057861328, "learning_rate": 6.382287223237864e-06, "loss": 0.5308, "step": 2650 }, { "epoch": 3.729281767955801, "grad_norm": 6.366962432861328, "learning_rate": 6.280723136299005e-06, "loss": 0.5431, "step": 2700 }, { "epoch": 3.798342541436464, "grad_norm": 7.169396877288818, "learning_rate": 6.1791590493601475e-06, "loss": 0.5297, "step": 2750 }, { "epoch": 3.867403314917127, "grad_norm": 4.60045051574707, "learning_rate": 6.077594962421288e-06, "loss": 0.5226, "step": 2800 }, { "epoch": 3.93646408839779, "grad_norm": 3.7710654735565186, "learning_rate": 5.9760308754824305e-06, "loss": 0.528, "step": 2850 }, { "epoch": 4.0, "eval_class_accuracy": { "label_0": 0.8668933689579791, "label_1": 0.9708525625455429, "label_2": 0.8956359808922354 }, "eval_class_f1": { "label_0": 0.7924242424242425, "label_1": 0.7904540162980209, "label_2": 0.7929982334992773 }, "eval_class_false_positive_rate": { "label_0": 0.08310115081767618, "label_1": 0.017799774246764103, "label_2": 0.05290422131598667 }, "eval_class_precision": { "label_0": 0.8206066945606695, "label_1": 0.7680995475113123, "label_2": 0.8372329603255341 }, "eval_class_recall": { "label_0": 0.76611328125, "label_1": 0.8141486810551559, "label_2": 0.7532031726662599 }, "eval_loss": 0.3253972828388214, "eval_macro_f1": 0.7919588307405135, "eval_macro_precision": 0.8086464007991719, "eval_macro_recall": 0.7778217116571385, "eval_micro_f1": 0.7924361802710369, "eval_micro_precision": 0.8209481520177615, "eval_micro_recall": 0.7658382066276803, "eval_runtime": 132.8554, "eval_samples_per_second": 92.966, "eval_steps_per_second": 1.453, "eval_subset_accuracy": 0.7909481013683103, "step": 2896 }, { "epoch": 4.005524861878453, "grad_norm": 6.861307621002197, "learning_rate": 5.874466788543571e-06, "loss": 0.5269, "step": 2900 }, { "epoch": 4.074585635359116, "grad_norm": 5.186521530151367, "learning_rate": 5.7729027016047135e-06, "loss": 0.5032, "step": 2950 }, { "epoch": 4.143646408839779, "grad_norm": 2.319528579711914, "learning_rate": 5.671338614665854e-06, "loss": 0.4947, "step": 3000 }, { "epoch": 4.212707182320442, "grad_norm": 7.0490403175354, "learning_rate": 5.5697745277269966e-06, "loss": 0.475, "step": 3050 }, { "epoch": 4.281767955801105, "grad_norm": 6.733311653137207, "learning_rate": 5.468210440788137e-06, "loss": 0.4758, "step": 3100 }, { "epoch": 4.350828729281768, "grad_norm": 5.118820667266846, "learning_rate": 5.36664635384928e-06, "loss": 0.5216, "step": 3150 }, { "epoch": 4.419889502762431, "grad_norm": 4.245698928833008, "learning_rate": 5.26508226691042e-06, "loss": 0.4965, "step": 3200 }, { "epoch": 4.488950276243094, "grad_norm": 6.310864448547363, "learning_rate": 5.163518179971563e-06, "loss": 0.4979, "step": 3250 }, { "epoch": 4.558011049723757, "grad_norm": 10.312039375305176, "learning_rate": 5.061954093032704e-06, "loss": 0.4812, "step": 3300 }, { "epoch": 4.62707182320442, "grad_norm": 5.925340175628662, "learning_rate": 4.960390006093846e-06, "loss": 0.4946, "step": 3350 }, { "epoch": 4.696132596685083, "grad_norm": 2.886112689971924, "learning_rate": 4.858825919154987e-06, "loss": 0.488, "step": 3400 }, { "epoch": 4.765193370165746, "grad_norm": 5.802464008331299, "learning_rate": 4.757261832216129e-06, "loss": 0.4914, "step": 3450 }, { "epoch": 4.834254143646409, "grad_norm": 3.5704684257507324, "learning_rate": 4.65569774527727e-06, "loss": 0.4581, "step": 3500 }, { "epoch": 4.903314917127072, "grad_norm": 4.317540168762207, "learning_rate": 4.554133658338412e-06, "loss": 0.4881, "step": 3550 }, { "epoch": 4.972375690607735, "grad_norm": 2.523470401763916, "learning_rate": 4.452569571399554e-06, "loss": 0.4949, "step": 3600 }, { "epoch": 5.0, "eval_class_accuracy": { "label_0": 0.8691603918711035, "label_1": 0.9713383531697838, "label_2": 0.8945024694356732 }, "eval_class_f1": { "label_0": 0.7941925624044829, "label_1": 0.787515006002401, "label_2": 0.7994458981068185 }, "eval_class_false_positive_rate": { "label_0": 0.07728649303451517, "label_1": 0.015281757402099914, "label_2": 0.0685550534552994 }, "eval_class_precision": { "label_0": 0.8301384451544196, "label_1": 0.7884615384615384, "label_2": 0.8067722895309102 }, "eval_class_recall": { "label_0": 0.76123046875, "label_1": 0.7865707434052758, "label_2": 0.7922513727882855 }, "eval_loss": 0.3214770257472992, "eval_macro_f1": 0.7937178221712342, "eval_macro_precision": 0.8084574243822894, "eval_macro_recall": 0.7800175283145204, "eval_micro_f1": 0.7956290977208866, "eval_micro_precision": 0.816062508005636, "eval_micro_recall": 0.7761939571150097, "eval_runtime": 133.0557, "eval_samples_per_second": 92.826, "eval_steps_per_second": 1.451, "eval_subset_accuracy": 0.791838717512752, "step": 3620 }, { "epoch": 5.041436464088398, "grad_norm": 5.695593357086182, "learning_rate": 4.351005484460696e-06, "loss": 0.4785, "step": 3650 }, { "epoch": 5.110497237569061, "grad_norm": 6.8877692222595215, "learning_rate": 4.249441397521837e-06, "loss": 0.4648, "step": 3700 }, { "epoch": 5.179558011049724, "grad_norm": 4.254743576049805, "learning_rate": 4.147877310582979e-06, "loss": 0.4543, "step": 3750 }, { "epoch": 5.248618784530387, "grad_norm": 4.990494251251221, "learning_rate": 4.04631322364412e-06, "loss": 0.4654, "step": 3800 }, { "epoch": 5.31767955801105, "grad_norm": 20.026336669921875, "learning_rate": 3.944749136705262e-06, "loss": 0.4667, "step": 3850 }, { "epoch": 5.386740331491713, "grad_norm": 5.121794700622559, "learning_rate": 3.843185049766403e-06, "loss": 0.4763, "step": 3900 }, { "epoch": 5.455801104972376, "grad_norm": 7.3256516456604, "learning_rate": 3.7416209628275447e-06, "loss": 0.4418, "step": 3950 }, { "epoch": 5.524861878453039, "grad_norm": 7.178898811340332, "learning_rate": 3.6400568758886863e-06, "loss": 0.467, "step": 4000 }, { "epoch": 5.593922651933702, "grad_norm": 7.906302452087402, "learning_rate": 3.5384927889498278e-06, "loss": 0.426, "step": 4050 }, { "epoch": 5.662983425414365, "grad_norm": 4.6435546875, "learning_rate": 3.4369287020109693e-06, "loss": 0.4585, "step": 4100 }, { "epoch": 5.732044198895028, "grad_norm": 6.0284104347229, "learning_rate": 3.335364615072111e-06, "loss": 0.441, "step": 4150 }, { "epoch": 5.801104972375691, "grad_norm": 4.433581829071045, "learning_rate": 3.2338005281332523e-06, "loss": 0.4645, "step": 4200 }, { "epoch": 5.870165745856354, "grad_norm": 4.757821083068848, "learning_rate": 3.132236441194394e-06, "loss": 0.4652, "step": 4250 }, { "epoch": 5.939226519337017, "grad_norm": 6.152480602264404, "learning_rate": 3.0306723542555354e-06, "loss": 0.4484, "step": 4300 }, { "epoch": 6.0, "eval_class_accuracy": { "label_0": 0.8723989960327099, "label_1": 0.9698000161930208, "label_2": 0.8949882600599142 }, "eval_class_f1": { "label_0": 0.7942021415513189, "label_1": 0.7852619458837076, "label_2": 0.7991949218145223 }, "eval_class_false_positive_rate": { "label_0": 0.0631132646880602, "label_1": 0.019189024919682277, "label_2": 0.06613027664498335 }, "eval_class_precision": { "label_0": 0.8537338573834924, "label_1": 0.7552602436323367, "label_2": 0.8113800691606413 }, "eval_class_recall": { "label_0": 0.742431640625, "label_1": 0.8177458033573142, "label_2": 0.7873703477730324 }, "eval_loss": 0.3344503939151764, "eval_macro_f1": 0.7928863364165163, "eval_macro_precision": 0.8067913900588235, "eval_macro_recall": 0.7825159305851156, "eval_micro_f1": 0.7952567175476221, "eval_micro_precision": 0.8244833900078472, "eval_micro_recall": 0.7680311890838206, "eval_runtime": 132.822, "eval_samples_per_second": 92.989, "eval_steps_per_second": 1.453, "eval_subset_accuracy": 0.792324508136993, "step": 4344 }, { "epoch": 6.00828729281768, "grad_norm": 7.724102973937988, "learning_rate": 2.929108267316677e-06, "loss": 0.4457, "step": 4350 }, { "epoch": 6.077348066298343, "grad_norm": 7.960653305053711, "learning_rate": 2.827544180377819e-06, "loss": 0.4507, "step": 4400 }, { "epoch": 6.1464088397790055, "grad_norm": 7.553245544433594, "learning_rate": 2.7259800934389603e-06, "loss": 0.4214, "step": 4450 }, { "epoch": 6.2154696132596685, "grad_norm": 13.122285842895508, "learning_rate": 2.624416006500102e-06, "loss": 0.4561, "step": 4500 }, { "epoch": 6.2845303867403315, "grad_norm": 5.380446910858154, "learning_rate": 2.5228519195612434e-06, "loss": 0.4195, "step": 4550 }, { "epoch": 6.3535911602209945, "grad_norm": 8.281150817871094, "learning_rate": 2.421287832622385e-06, "loss": 0.4411, "step": 4600 }, { "epoch": 6.422651933701657, "grad_norm": 5.860757350921631, "learning_rate": 2.3197237456835264e-06, "loss": 0.461, "step": 4650 }, { "epoch": 6.49171270718232, "grad_norm": 5.274202346801758, "learning_rate": 2.218159658744668e-06, "loss": 0.4469, "step": 4700 }, { "epoch": 6.560773480662983, "grad_norm": 3.1023948192596436, "learning_rate": 2.1165955718058095e-06, "loss": 0.4353, "step": 4750 }, { "epoch": 6.629834254143646, "grad_norm": 3.906625270843506, "learning_rate": 2.015031484866951e-06, "loss": 0.413, "step": 4800 }, { "epoch": 6.698895027624309, "grad_norm": 4.5051422119140625, "learning_rate": 1.9134673979280925e-06, "loss": 0.4255, "step": 4850 }, { "epoch": 6.767955801104972, "grad_norm": 6.132110118865967, "learning_rate": 1.8119033109892342e-06, "loss": 0.4412, "step": 4900 }, { "epoch": 6.837016574585635, "grad_norm": 3.9629147052764893, "learning_rate": 1.710339224050376e-06, "loss": 0.4081, "step": 4950 }, { "epoch": 6.906077348066298, "grad_norm": 8.018949508666992, "learning_rate": 1.6087751371115177e-06, "loss": 0.4497, "step": 5000 }, { "epoch": 6.975138121546961, "grad_norm": 4.3800177574157715, "learning_rate": 1.5072110501726592e-06, "loss": 0.4381, "step": 5050 }, { "epoch": 7.0, "eval_class_accuracy": { "label_0": 0.8718322403044287, "label_1": 0.9686665047364585, "label_2": 0.896445631932637 }, "eval_class_f1": { "label_0": 0.7970772977823356, "label_1": 0.7792355961209355, "label_2": 0.8016746782446891 }, "eval_class_false_positive_rate": { "label_0": 0.07219866747424927, "label_1": 0.020491447425543066, "label_2": 0.0645872368566004 }, "eval_class_precision": { "label_0": 0.839136302294197, "label_1": 0.7431991294885746, "label_2": 0.815200252286345 }, "eval_class_recall": { "label_0": 0.759033203125, "label_1": 0.8189448441247003, "label_2": 0.7885906040268457 }, "eval_loss": 0.34001022577285767, "eval_macro_f1": 0.79266252404932, "eval_macro_precision": 0.7991785613563721, "eval_macro_recall": 0.788856217092182, "eval_micro_f1": 0.7969755670811722, "eval_micro_precision": 0.8180885182809493, "eval_micro_recall": 0.7769249512670565, "eval_runtime": 133.0263, "eval_samples_per_second": 92.846, "eval_steps_per_second": 1.451, "eval_subset_accuracy": 0.7941867055299167, "step": 5068 }, { "epoch": 7.044198895027624, "grad_norm": 5.756816387176514, "learning_rate": 1.4056469632338007e-06, "loss": 0.4057, "step": 5100 }, { "epoch": 7.113259668508287, "grad_norm": 10.789379119873047, "learning_rate": 1.3040828762949422e-06, "loss": 0.4082, "step": 5150 }, { "epoch": 7.18232044198895, "grad_norm": 6.213741779327393, "learning_rate": 1.2025187893560838e-06, "loss": 0.4371, "step": 5200 }, { "epoch": 7.251381215469613, "grad_norm": 10.314269065856934, "learning_rate": 1.1009547024172255e-06, "loss": 0.414, "step": 5250 }, { "epoch": 7.320441988950276, "grad_norm": 4.766229152679443, "learning_rate": 9.99390615478367e-07, "loss": 0.4262, "step": 5300 }, { "epoch": 7.389502762430939, "grad_norm": 10.421786308288574, "learning_rate": 8.978265285395085e-07, "loss": 0.4089, "step": 5350 }, { "epoch": 7.458563535911602, "grad_norm": 7.079195976257324, "learning_rate": 7.9626244160065e-07, "loss": 0.4176, "step": 5400 }, { "epoch": 7.527624309392265, "grad_norm": 11.755300521850586, "learning_rate": 6.946983546617917e-07, "loss": 0.4175, "step": 5450 }, { "epoch": 7.596685082872928, "grad_norm": 5.82271146774292, "learning_rate": 5.931342677229332e-07, "loss": 0.4028, "step": 5500 }, { "epoch": 7.665745856353591, "grad_norm": 5.967957019805908, "learning_rate": 4.915701807840748e-07, "loss": 0.4248, "step": 5550 }, { "epoch": 7.734806629834254, "grad_norm": 8.050609588623047, "learning_rate": 3.900060938452164e-07, "loss": 0.4203, "step": 5600 }, { "epoch": 7.803867403314917, "grad_norm": 5.390321254730225, "learning_rate": 2.8844200690635795e-07, "loss": 0.4054, "step": 5650 }, { "epoch": 7.87292817679558, "grad_norm": 4.956524848937988, "learning_rate": 1.868779199674995e-07, "loss": 0.4327, "step": 5700 }, { "epoch": 7.941988950276243, "grad_norm": 3.822096347808838, "learning_rate": 8.531383302864108e-08, "loss": 0.4183, "step": 5750 }, { "epoch": 8.0, "eval_class_accuracy": { "label_0": 0.8722370658246296, "label_1": 0.9690713302566594, "label_2": 0.8970123876609182 }, "eval_class_f1": { "label_0": 0.797640420620672, "label_1": 0.7791907514450868, "label_2": 0.8029129222187791 }, "eval_class_false_positive_rate": { "label_0": 0.07171411265898586, "label_1": 0.019275853086739663, "label_2": 0.06447701972885876 }, "eval_class_precision": { "label_0": 0.8400864397622907, "label_1": 0.7522321428571429, "label_2": 0.8158060453400504 }, "eval_class_recall": { "label_0": 0.75927734375, "label_1": 0.8081534772182254, "label_2": 0.7904209884075656 }, "eval_loss": 0.34776991605758667, "eval_macro_f1": 0.7932480314281793, "eval_macro_precision": 0.8027082093198281, "eval_macro_recall": 0.7859506031252637, "eval_micro_f1": 0.7977724940558127, "eval_micro_precision": 0.8200411628505274, "eval_micro_recall": 0.7766812865497076, "eval_runtime": 132.8386, "eval_samples_per_second": 92.977, "eval_steps_per_second": 1.453, "eval_subset_accuracy": 0.7951582867783985, "step": 5792 } ], "logging_steps": 50, "max_steps": 5792, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.9496632912293888e+17, "train_batch_size": 64, "trial_name": null, "trial_params": null }