{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.999926226484692, "eval_steps": 20, "global_step": 6777, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0, "eval_accuracy": 0.8306801736613604, "eval_f1": 0.6163934426229508, "eval_loss": 0.4624278247356415, "eval_precision": 0.9494949494949495, "eval_recall": 0.4563106796116505, "eval_runtime": 50.0967, "eval_samples_per_second": 5.809, "eval_steps_per_second": 0.2, "step": 0 }, { "epoch": 0.00014754703061600885, "grad_norm": 1.9701803922653198, "learning_rate": 2.9498525073746314e-08, "loss": 0.6704, "step": 1 }, { "epoch": 0.0002950940612320177, "grad_norm": 1.8603360652923584, "learning_rate": 5.899705014749263e-08, "loss": 0.5838, "step": 2 }, { "epoch": 0.00044264109184802655, "grad_norm": 2.2240378856658936, "learning_rate": 8.849557522123894e-08, "loss": 0.5271, "step": 3 }, { "epoch": 0.0005901881224640354, "grad_norm": 1.9955577850341797, "learning_rate": 1.1799410029498526e-07, "loss": 0.6257, "step": 4 }, { "epoch": 0.0007377351530800443, "grad_norm": 2.2215893268585205, "learning_rate": 1.4749262536873157e-07, "loss": 0.5975, "step": 5 }, { "epoch": 0.0008852821836960531, "grad_norm": 1.9294912815093994, "learning_rate": 1.7699115044247788e-07, "loss": 0.5903, "step": 6 }, { "epoch": 0.0010328292143120619, "grad_norm": 2.0847418308258057, "learning_rate": 2.064896755162242e-07, "loss": 0.5851, "step": 7 }, { "epoch": 0.0011803762449280708, "grad_norm": 2.115176200866699, "learning_rate": 2.359882005899705e-07, "loss": 0.5952, "step": 8 }, { "epoch": 0.0013279232755440797, "grad_norm": 2.261711835861206, "learning_rate": 2.654867256637168e-07, "loss": 0.589, "step": 9 }, { "epoch": 0.0014754703061600886, "grad_norm": 1.9945313930511475, "learning_rate": 2.9498525073746315e-07, "loss": 0.6214, "step": 10 }, { "epoch": 0.0016230173367760973, "grad_norm": 1.9860559701919556, "learning_rate": 3.244837758112095e-07, "loss": 0.6032, "step": 11 }, { "epoch": 0.0017705643673921062, "grad_norm": 2.2528653144836426, "learning_rate": 3.5398230088495575e-07, "loss": 0.6562, "step": 12 }, { "epoch": 0.001918111398008115, "grad_norm": 2.50659441947937, "learning_rate": 3.834808259587021e-07, "loss": 0.6093, "step": 13 }, { "epoch": 0.0020656584286241238, "grad_norm": 1.8829281330108643, "learning_rate": 4.129793510324484e-07, "loss": 0.6456, "step": 14 }, { "epoch": 0.0022132054592401327, "grad_norm": 2.3590948581695557, "learning_rate": 4.4247787610619474e-07, "loss": 0.722, "step": 15 }, { "epoch": 0.0023607524898561416, "grad_norm": 1.8200560808181763, "learning_rate": 4.71976401179941e-07, "loss": 0.5189, "step": 16 }, { "epoch": 0.0025082995204721505, "grad_norm": 2.125967502593994, "learning_rate": 5.014749262536873e-07, "loss": 0.5547, "step": 17 }, { "epoch": 0.0026558465510881594, "grad_norm": 2.1043388843536377, "learning_rate": 5.309734513274336e-07, "loss": 0.5487, "step": 18 }, { "epoch": 0.0028033935817041683, "grad_norm": 2.0919413566589355, "learning_rate": 5.6047197640118e-07, "loss": 0.5783, "step": 19 }, { "epoch": 0.002950940612320177, "grad_norm": 2.051356554031372, "learning_rate": 5.899705014749263e-07, "loss": 0.6147, "step": 20 }, { "epoch": 0.002950940612320177, "eval_accuracy": 0.829232995658466, "eval_f1": 0.6168831168831169, "eval_loss": 0.4627985954284668, "eval_precision": 0.9313725490196079, "eval_recall": 0.46116504854368934, "eval_runtime": 49.6124, "eval_samples_per_second": 5.865, "eval_steps_per_second": 0.202, "step": 20 }, { "epoch": 0.003098487642936186, "grad_norm": 2.2795355319976807, "learning_rate": 6.194690265486726e-07, "loss": 0.5637, "step": 21 }, { "epoch": 0.0032460346735521946, "grad_norm": 2.2324700355529785, "learning_rate": 6.48967551622419e-07, "loss": 0.5646, "step": 22 }, { "epoch": 0.0033935817041682035, "grad_norm": 3.0950839519500732, "learning_rate": 6.784660766961653e-07, "loss": 0.6036, "step": 23 }, { "epoch": 0.0035411287347842124, "grad_norm": 1.8922393321990967, "learning_rate": 7.079646017699115e-07, "loss": 0.5911, "step": 24 }, { "epoch": 0.0036886757654002213, "grad_norm": 2.513780117034912, "learning_rate": 7.374631268436578e-07, "loss": 0.6239, "step": 25 }, { "epoch": 0.00383622279601623, "grad_norm": 2.138617753982544, "learning_rate": 7.669616519174042e-07, "loss": 0.5282, "step": 26 }, { "epoch": 0.003983769826632239, "grad_norm": 1.7860370874404907, "learning_rate": 7.964601769911505e-07, "loss": 0.5166, "step": 27 }, { "epoch": 0.0041313168572482475, "grad_norm": 1.7825922966003418, "learning_rate": 8.259587020648968e-07, "loss": 0.5581, "step": 28 }, { "epoch": 0.004278863887864257, "grad_norm": 1.8826736211776733, "learning_rate": 8.554572271386432e-07, "loss": 0.5276, "step": 29 }, { "epoch": 0.004426410918480265, "grad_norm": 2.067537307739258, "learning_rate": 8.849557522123895e-07, "loss": 0.5944, "step": 30 }, { "epoch": 0.004573957949096275, "grad_norm": 2.011340379714966, "learning_rate": 9.144542772861357e-07, "loss": 0.5603, "step": 31 }, { "epoch": 0.004721504979712283, "grad_norm": 1.8796640634536743, "learning_rate": 9.43952802359882e-07, "loss": 0.5679, "step": 32 }, { "epoch": 0.0048690520103282925, "grad_norm": 2.359736442565918, "learning_rate": 9.734513274336284e-07, "loss": 0.5206, "step": 33 }, { "epoch": 0.005016599040944301, "grad_norm": 2.2447509765625, "learning_rate": 1.0029498525073746e-06, "loss": 0.5807, "step": 34 }, { "epoch": 0.005164146071560309, "grad_norm": 1.9554908275604248, "learning_rate": 1.032448377581121e-06, "loss": 0.5323, "step": 35 }, { "epoch": 0.005311693102176319, "grad_norm": 2.3275227546691895, "learning_rate": 1.0619469026548673e-06, "loss": 0.5667, "step": 36 }, { "epoch": 0.005459240132792327, "grad_norm": 2.8186075687408447, "learning_rate": 1.0914454277286137e-06, "loss": 0.655, "step": 37 }, { "epoch": 0.005606787163408337, "grad_norm": 1.6935522556304932, "learning_rate": 1.12094395280236e-06, "loss": 0.5529, "step": 38 }, { "epoch": 0.005754334194024345, "grad_norm": 2.2262094020843506, "learning_rate": 1.1504424778761064e-06, "loss": 0.6316, "step": 39 }, { "epoch": 0.005901881224640354, "grad_norm": 1.9788655042648315, "learning_rate": 1.1799410029498526e-06, "loss": 0.5341, "step": 40 }, { "epoch": 0.005901881224640354, "eval_accuracy": 0.8277858176555717, "eval_f1": 0.6098360655737705, "eval_loss": 0.4612308144569397, "eval_precision": 0.9393939393939394, "eval_recall": 0.45145631067961167, "eval_runtime": 50.5301, "eval_samples_per_second": 5.759, "eval_steps_per_second": 0.198, "step": 40 }, { "epoch": 0.006049428255256363, "grad_norm": 1.910378098487854, "learning_rate": 1.2094395280235988e-06, "loss": 0.6305, "step": 41 }, { "epoch": 0.006196975285872372, "grad_norm": 1.9001059532165527, "learning_rate": 1.2389380530973452e-06, "loss": 0.5019, "step": 42 }, { "epoch": 0.006344522316488381, "grad_norm": 2.275596857070923, "learning_rate": 1.2684365781710917e-06, "loss": 0.6744, "step": 43 }, { "epoch": 0.006492069347104389, "grad_norm": 2.0762808322906494, "learning_rate": 1.297935103244838e-06, "loss": 0.596, "step": 44 }, { "epoch": 0.0066396163777203985, "grad_norm": 1.8717623949050903, "learning_rate": 1.3274336283185843e-06, "loss": 0.6371, "step": 45 }, { "epoch": 0.006787163408336407, "grad_norm": 2.0242576599121094, "learning_rate": 1.3569321533923306e-06, "loss": 0.6408, "step": 46 }, { "epoch": 0.006934710438952416, "grad_norm": 3.683346748352051, "learning_rate": 1.386430678466077e-06, "loss": 0.5967, "step": 47 }, { "epoch": 0.007082257469568425, "grad_norm": 2.0110268592834473, "learning_rate": 1.415929203539823e-06, "loss": 0.6027, "step": 48 }, { "epoch": 0.007229804500184434, "grad_norm": 2.288217782974243, "learning_rate": 1.4454277286135697e-06, "loss": 0.5854, "step": 49 }, { "epoch": 0.0073773515308004425, "grad_norm": 2.1070024967193604, "learning_rate": 1.4749262536873157e-06, "loss": 0.565, "step": 50 }, { "epoch": 0.007524898561416452, "grad_norm": 1.879689335823059, "learning_rate": 1.5044247787610621e-06, "loss": 0.601, "step": 51 }, { "epoch": 0.00767244559203246, "grad_norm": 1.9958750009536743, "learning_rate": 1.5339233038348083e-06, "loss": 0.5243, "step": 52 }, { "epoch": 0.007819992622648469, "grad_norm": 2.1826648712158203, "learning_rate": 1.5634218289085548e-06, "loss": 0.6103, "step": 53 }, { "epoch": 0.007967539653264478, "grad_norm": 1.838269591331482, "learning_rate": 1.592920353982301e-06, "loss": 0.5783, "step": 54 }, { "epoch": 0.008115086683880487, "grad_norm": 1.9237309694290161, "learning_rate": 1.6224188790560474e-06, "loss": 0.4933, "step": 55 }, { "epoch": 0.008262633714496495, "grad_norm": 1.7426055669784546, "learning_rate": 1.6519174041297937e-06, "loss": 0.5435, "step": 56 }, { "epoch": 0.008410180745112504, "grad_norm": 1.5711760520935059, "learning_rate": 1.68141592920354e-06, "loss": 0.5268, "step": 57 }, { "epoch": 0.008557727775728514, "grad_norm": 1.984619140625, "learning_rate": 1.7109144542772863e-06, "loss": 0.6161, "step": 58 }, { "epoch": 0.008705274806344523, "grad_norm": 2.2215609550476074, "learning_rate": 1.7404129793510328e-06, "loss": 0.575, "step": 59 }, { "epoch": 0.00885282183696053, "grad_norm": 2.1121788024902344, "learning_rate": 1.769911504424779e-06, "loss": 0.5857, "step": 60 }, { "epoch": 0.00885282183696053, "eval_accuracy": 0.8277858176555717, "eval_f1": 0.6072607260726073, "eval_loss": 0.4569220244884491, "eval_precision": 0.9484536082474226, "eval_recall": 0.44660194174757284, "eval_runtime": 50.1142, "eval_samples_per_second": 5.807, "eval_steps_per_second": 0.2, "step": 60 }, { "epoch": 0.00900036886757654, "grad_norm": 1.9798246622085571, "learning_rate": 1.7994100294985254e-06, "loss": 0.4932, "step": 61 }, { "epoch": 0.00914791589819255, "grad_norm": 1.7873258590698242, "learning_rate": 1.8289085545722714e-06, "loss": 0.4711, "step": 62 }, { "epoch": 0.009295462928808557, "grad_norm": 2.109706163406372, "learning_rate": 1.8584070796460179e-06, "loss": 0.5424, "step": 63 }, { "epoch": 0.009443009959424566, "grad_norm": 1.7430013418197632, "learning_rate": 1.887905604719764e-06, "loss": 0.5439, "step": 64 }, { "epoch": 0.009590556990040576, "grad_norm": 1.8797430992126465, "learning_rate": 1.9174041297935107e-06, "loss": 0.5548, "step": 65 }, { "epoch": 0.009738104020656585, "grad_norm": 2.084313154220581, "learning_rate": 1.9469026548672567e-06, "loss": 0.5142, "step": 66 }, { "epoch": 0.009885651051272593, "grad_norm": 2.2595505714416504, "learning_rate": 1.976401179941003e-06, "loss": 0.5789, "step": 67 }, { "epoch": 0.010033198081888602, "grad_norm": 2.101414203643799, "learning_rate": 2.005899705014749e-06, "loss": 0.5557, "step": 68 }, { "epoch": 0.010180745112504611, "grad_norm": 1.9330633878707886, "learning_rate": 2.035398230088496e-06, "loss": 0.5573, "step": 69 }, { "epoch": 0.010328292143120619, "grad_norm": 2.061075210571289, "learning_rate": 2.064896755162242e-06, "loss": 0.6316, "step": 70 }, { "epoch": 0.010475839173736628, "grad_norm": 2.1672215461730957, "learning_rate": 2.0943952802359885e-06, "loss": 0.5352, "step": 71 }, { "epoch": 0.010623386204352638, "grad_norm": 1.6692755222320557, "learning_rate": 2.1238938053097345e-06, "loss": 0.5023, "step": 72 }, { "epoch": 0.010770933234968647, "grad_norm": 1.9689394235610962, "learning_rate": 2.153392330383481e-06, "loss": 0.5226, "step": 73 }, { "epoch": 0.010918480265584654, "grad_norm": 2.438157320022583, "learning_rate": 2.1828908554572274e-06, "loss": 0.5675, "step": 74 }, { "epoch": 0.011066027296200664, "grad_norm": 2.170057773590088, "learning_rate": 2.212389380530974e-06, "loss": 0.4975, "step": 75 }, { "epoch": 0.011213574326816673, "grad_norm": 1.7632702589035034, "learning_rate": 2.24188790560472e-06, "loss": 0.5138, "step": 76 }, { "epoch": 0.011361121357432682, "grad_norm": 2.0990798473358154, "learning_rate": 2.2713864306784663e-06, "loss": 0.4686, "step": 77 }, { "epoch": 0.01150866838804869, "grad_norm": 1.6329792737960815, "learning_rate": 2.3008849557522127e-06, "loss": 0.4681, "step": 78 }, { "epoch": 0.0116562154186647, "grad_norm": 2.017184019088745, "learning_rate": 2.330383480825959e-06, "loss": 0.5001, "step": 79 }, { "epoch": 0.011803762449280709, "grad_norm": 1.9848320484161377, "learning_rate": 2.359882005899705e-06, "loss": 0.5562, "step": 80 }, { "epoch": 0.011803762449280709, "eval_accuracy": 0.8364688856729378, "eval_f1": 0.6366559485530546, "eval_loss": 0.4472511410713196, "eval_precision": 0.9428571428571428, "eval_recall": 0.48058252427184467, "eval_runtime": 49.4533, "eval_samples_per_second": 5.884, "eval_steps_per_second": 0.202, "step": 80 }, { "epoch": 0.011951309479896716, "grad_norm": 2.2966082096099854, "learning_rate": 2.3893805309734516e-06, "loss": 0.6017, "step": 81 }, { "epoch": 0.012098856510512726, "grad_norm": 2.56752610206604, "learning_rate": 2.4188790560471976e-06, "loss": 0.5368, "step": 82 }, { "epoch": 0.012246403541128735, "grad_norm": 1.9481098651885986, "learning_rate": 2.448377581120944e-06, "loss": 0.588, "step": 83 }, { "epoch": 0.012393950571744744, "grad_norm": 1.8865103721618652, "learning_rate": 2.4778761061946905e-06, "loss": 0.5751, "step": 84 }, { "epoch": 0.012541497602360752, "grad_norm": 2.2721524238586426, "learning_rate": 2.5073746312684365e-06, "loss": 0.5319, "step": 85 }, { "epoch": 0.012689044632976761, "grad_norm": 2.1798763275146484, "learning_rate": 2.5368731563421834e-06, "loss": 0.519, "step": 86 }, { "epoch": 0.01283659166359277, "grad_norm": 1.6984542608261108, "learning_rate": 2.5663716814159294e-06, "loss": 0.4963, "step": 87 }, { "epoch": 0.012984138694208778, "grad_norm": 1.9920787811279297, "learning_rate": 2.595870206489676e-06, "loss": 0.457, "step": 88 }, { "epoch": 0.013131685724824788, "grad_norm": 1.8378043174743652, "learning_rate": 2.625368731563422e-06, "loss": 0.5445, "step": 89 }, { "epoch": 0.013279232755440797, "grad_norm": 1.9275016784667969, "learning_rate": 2.6548672566371687e-06, "loss": 0.5571, "step": 90 }, { "epoch": 0.013426779786056806, "grad_norm": 1.9556200504302979, "learning_rate": 2.6843657817109147e-06, "loss": 0.5127, "step": 91 }, { "epoch": 0.013574326816672814, "grad_norm": 2.2737321853637695, "learning_rate": 2.713864306784661e-06, "loss": 0.4905, "step": 92 }, { "epoch": 0.013721873847288823, "grad_norm": 2.186802864074707, "learning_rate": 2.743362831858407e-06, "loss": 0.5501, "step": 93 }, { "epoch": 0.013869420877904833, "grad_norm": 1.774180293083191, "learning_rate": 2.772861356932154e-06, "loss": 0.4829, "step": 94 }, { "epoch": 0.014016967908520842, "grad_norm": 2.271841526031494, "learning_rate": 2.8023598820059e-06, "loss": 0.4667, "step": 95 }, { "epoch": 0.01416451493913685, "grad_norm": 1.8774105310440063, "learning_rate": 2.831858407079646e-06, "loss": 0.4369, "step": 96 }, { "epoch": 0.014312061969752859, "grad_norm": 3.2019522190093994, "learning_rate": 2.8613569321533925e-06, "loss": 0.6799, "step": 97 }, { "epoch": 0.014459609000368868, "grad_norm": 1.7792394161224365, "learning_rate": 2.8908554572271393e-06, "loss": 0.5064, "step": 98 }, { "epoch": 0.014607156030984876, "grad_norm": 1.9200505018234253, "learning_rate": 2.9203539823008853e-06, "loss": 0.4738, "step": 99 }, { "epoch": 0.014754703061600885, "grad_norm": 1.9603195190429688, "learning_rate": 2.9498525073746313e-06, "loss": 0.5169, "step": 100 }, { "epoch": 0.014754703061600885, "eval_accuracy": 0.8596237337192475, "eval_f1": 0.6996904024767802, "eval_loss": 0.4301896095275879, "eval_precision": 0.9658119658119658, "eval_recall": 0.5485436893203883, "eval_runtime": 50.2851, "eval_samples_per_second": 5.787, "eval_steps_per_second": 0.199, "step": 100 }, { "epoch": 0.014902250092216894, "grad_norm": 1.7537790536880493, "learning_rate": 2.9793510324483778e-06, "loss": 0.4808, "step": 101 }, { "epoch": 0.015049797122832904, "grad_norm": 1.7961171865463257, "learning_rate": 3.0088495575221242e-06, "loss": 0.5196, "step": 102 }, { "epoch": 0.015197344153448911, "grad_norm": 1.7709578275680542, "learning_rate": 3.0383480825958707e-06, "loss": 0.4835, "step": 103 }, { "epoch": 0.01534489118406492, "grad_norm": 1.9066882133483887, "learning_rate": 3.0678466076696167e-06, "loss": 0.4616, "step": 104 }, { "epoch": 0.01549243821468093, "grad_norm": 1.9344055652618408, "learning_rate": 3.097345132743363e-06, "loss": 0.467, "step": 105 }, { "epoch": 0.015639985245296938, "grad_norm": 1.7963544130325317, "learning_rate": 3.1268436578171095e-06, "loss": 0.531, "step": 106 }, { "epoch": 0.01578753227591295, "grad_norm": 2.0553719997406006, "learning_rate": 3.156342182890856e-06, "loss": 0.5395, "step": 107 }, { "epoch": 0.015935079306528956, "grad_norm": 1.851574182510376, "learning_rate": 3.185840707964602e-06, "loss": 0.5077, "step": 108 }, { "epoch": 0.016082626337144964, "grad_norm": 1.8345019817352295, "learning_rate": 3.215339233038348e-06, "loss": 0.4585, "step": 109 }, { "epoch": 0.016230173367760975, "grad_norm": 2.1635265350341797, "learning_rate": 3.244837758112095e-06, "loss": 0.5037, "step": 110 }, { "epoch": 0.016377720398376983, "grad_norm": 1.8476346731185913, "learning_rate": 3.274336283185841e-06, "loss": 0.4381, "step": 111 }, { "epoch": 0.01652526742899299, "grad_norm": 1.7085602283477783, "learning_rate": 3.3038348082595873e-06, "loss": 0.4408, "step": 112 }, { "epoch": 0.016672814459609, "grad_norm": 2.8017685413360596, "learning_rate": 3.3333333333333333e-06, "loss": 0.556, "step": 113 }, { "epoch": 0.01682036149022501, "grad_norm": 1.8998867273330688, "learning_rate": 3.36283185840708e-06, "loss": 0.4543, "step": 114 }, { "epoch": 0.016967908520841016, "grad_norm": 1.9203765392303467, "learning_rate": 3.392330383480826e-06, "loss": 0.4329, "step": 115 }, { "epoch": 0.017115455551457028, "grad_norm": 1.8684941530227661, "learning_rate": 3.4218289085545726e-06, "loss": 0.4145, "step": 116 }, { "epoch": 0.017263002582073035, "grad_norm": 3.40446400642395, "learning_rate": 3.4513274336283186e-06, "loss": 0.5905, "step": 117 }, { "epoch": 0.017410549612689046, "grad_norm": 1.9037803411483765, "learning_rate": 3.4808259587020655e-06, "loss": 0.4872, "step": 118 }, { "epoch": 0.017558096643305054, "grad_norm": 2.2757906913757324, "learning_rate": 3.5103244837758115e-06, "loss": 0.5239, "step": 119 }, { "epoch": 0.01770564367392106, "grad_norm": 1.7001346349716187, "learning_rate": 3.539823008849558e-06, "loss": 0.4474, "step": 120 }, { "epoch": 0.01770564367392106, "eval_accuracy": 0.8712011577424024, "eval_f1": 0.7420289855072464, "eval_loss": 0.3984713852405548, "eval_precision": 0.920863309352518, "eval_recall": 0.6213592233009708, "eval_runtime": 49.4224, "eval_samples_per_second": 5.888, "eval_steps_per_second": 0.202, "step": 120 }, { "epoch": 0.017853190704537072, "grad_norm": 2.8817026615142822, "learning_rate": 3.569321533923304e-06, "loss": 0.5248, "step": 121 }, { "epoch": 0.01800073773515308, "grad_norm": 3.701470136642456, "learning_rate": 3.598820058997051e-06, "loss": 0.5858, "step": 122 }, { "epoch": 0.018148284765769088, "grad_norm": 1.855739712715149, "learning_rate": 3.628318584070797e-06, "loss": 0.4128, "step": 123 }, { "epoch": 0.0182958317963851, "grad_norm": 2.1126368045806885, "learning_rate": 3.657817109144543e-06, "loss": 0.4497, "step": 124 }, { "epoch": 0.018443378827001106, "grad_norm": 1.6906609535217285, "learning_rate": 3.6873156342182893e-06, "loss": 0.4394, "step": 125 }, { "epoch": 0.018590925857617114, "grad_norm": 4.466763973236084, "learning_rate": 3.7168141592920357e-06, "loss": 0.6138, "step": 126 }, { "epoch": 0.018738472888233125, "grad_norm": 2.4154622554779053, "learning_rate": 3.746312684365782e-06, "loss": 0.4837, "step": 127 }, { "epoch": 0.018886019918849133, "grad_norm": 2.22926664352417, "learning_rate": 3.775811209439528e-06, "loss": 0.457, "step": 128 }, { "epoch": 0.019033566949465144, "grad_norm": 2.7719802856445312, "learning_rate": 3.8053097345132746e-06, "loss": 0.3897, "step": 129 }, { "epoch": 0.01918111398008115, "grad_norm": 1.9416868686676025, "learning_rate": 3.8348082595870215e-06, "loss": 0.4163, "step": 130 }, { "epoch": 0.01932866101069716, "grad_norm": 2.7867188453674316, "learning_rate": 3.8643067846607675e-06, "loss": 0.4211, "step": 131 }, { "epoch": 0.01947620804131317, "grad_norm": 1.907348394393921, "learning_rate": 3.8938053097345135e-06, "loss": 0.4752, "step": 132 }, { "epoch": 0.019623755071929178, "grad_norm": 2.0061187744140625, "learning_rate": 3.9233038348082595e-06, "loss": 0.4196, "step": 133 }, { "epoch": 0.019771302102545185, "grad_norm": 2.150468349456787, "learning_rate": 3.952802359882006e-06, "loss": 0.4593, "step": 134 }, { "epoch": 0.019918849133161196, "grad_norm": 2.467625617980957, "learning_rate": 3.982300884955752e-06, "loss": 0.3896, "step": 135 }, { "epoch": 0.020066396163777204, "grad_norm": 2.2157087326049805, "learning_rate": 4.011799410029498e-06, "loss": 0.5088, "step": 136 }, { "epoch": 0.02021394319439321, "grad_norm": 2.0862808227539062, "learning_rate": 4.041297935103245e-06, "loss": 0.429, "step": 137 }, { "epoch": 0.020361490225009223, "grad_norm": 2.965019464492798, "learning_rate": 4.070796460176992e-06, "loss": 0.4501, "step": 138 }, { "epoch": 0.02050903725562523, "grad_norm": 1.881576657295227, "learning_rate": 4.100294985250738e-06, "loss": 0.4071, "step": 139 }, { "epoch": 0.020656584286241238, "grad_norm": 2.6010119915008545, "learning_rate": 4.129793510324484e-06, "loss": 0.4423, "step": 140 }, { "epoch": 0.020656584286241238, "eval_accuracy": 0.8929088277858177, "eval_f1": 0.8, "eval_loss": 0.3566611111164093, "eval_precision": 0.9024390243902439, "eval_recall": 0.7184466019417476, "eval_runtime": 49.7229, "eval_samples_per_second": 5.852, "eval_steps_per_second": 0.201, "step": 140 }, { "epoch": 0.02080413131685725, "grad_norm": 2.6115405559539795, "learning_rate": 4.15929203539823e-06, "loss": 0.4543, "step": 141 }, { "epoch": 0.020951678347473256, "grad_norm": 1.9557864665985107, "learning_rate": 4.188790560471977e-06, "loss": 0.3753, "step": 142 }, { "epoch": 0.021099225378089267, "grad_norm": 3.3719418048858643, "learning_rate": 4.218289085545723e-06, "loss": 0.366, "step": 143 }, { "epoch": 0.021246772408705275, "grad_norm": 2.972867250442505, "learning_rate": 4.247787610619469e-06, "loss": 0.4154, "step": 144 }, { "epoch": 0.021394319439321283, "grad_norm": 2.8596489429473877, "learning_rate": 4.277286135693216e-06, "loss": 0.3593, "step": 145 }, { "epoch": 0.021541866469937294, "grad_norm": 2.5790815353393555, "learning_rate": 4.306784660766962e-06, "loss": 0.442, "step": 146 }, { "epoch": 0.0216894135005533, "grad_norm": 2.1344902515411377, "learning_rate": 4.336283185840709e-06, "loss": 0.4607, "step": 147 }, { "epoch": 0.02183696053116931, "grad_norm": 2.003572940826416, "learning_rate": 4.365781710914455e-06, "loss": 0.3847, "step": 148 }, { "epoch": 0.02198450756178532, "grad_norm": 2.83815598487854, "learning_rate": 4.395280235988201e-06, "loss": 0.436, "step": 149 }, { "epoch": 0.022132054592401328, "grad_norm": 2.2443125247955322, "learning_rate": 4.424778761061948e-06, "loss": 0.4092, "step": 150 }, { "epoch": 0.022279601623017335, "grad_norm": 2.969388008117676, "learning_rate": 4.454277286135694e-06, "loss": 0.4425, "step": 151 }, { "epoch": 0.022427148653633346, "grad_norm": 3.049867630004883, "learning_rate": 4.48377581120944e-06, "loss": 0.457, "step": 152 }, { "epoch": 0.022574695684249354, "grad_norm": 2.269061803817749, "learning_rate": 4.513274336283186e-06, "loss": 0.3761, "step": 153 }, { "epoch": 0.022722242714865365, "grad_norm": 2.2566583156585693, "learning_rate": 4.5427728613569326e-06, "loss": 0.351, "step": 154 }, { "epoch": 0.022869789745481373, "grad_norm": 2.557331085205078, "learning_rate": 4.5722713864306786e-06, "loss": 0.4462, "step": 155 }, { "epoch": 0.02301733677609738, "grad_norm": 2.1571779251098633, "learning_rate": 4.6017699115044254e-06, "loss": 0.3258, "step": 156 }, { "epoch": 0.02316488380671339, "grad_norm": 2.3921117782592773, "learning_rate": 4.6312684365781714e-06, "loss": 0.3909, "step": 157 }, { "epoch": 0.0233124308373294, "grad_norm": 2.303868532180786, "learning_rate": 4.660766961651918e-06, "loss": 0.3693, "step": 158 }, { "epoch": 0.023459977867945406, "grad_norm": 2.4850494861602783, "learning_rate": 4.690265486725664e-06, "loss": 0.3341, "step": 159 }, { "epoch": 0.023607524898561418, "grad_norm": 3.2140703201293945, "learning_rate": 4.71976401179941e-06, "loss": 0.3734, "step": 160 }, { "epoch": 0.023607524898561418, "eval_accuracy": 0.9117221418234442, "eval_f1": 0.8398950131233596, "eval_loss": 0.2947663366794586, "eval_precision": 0.9142857142857143, "eval_recall": 0.7766990291262136, "eval_runtime": 49.4353, "eval_samples_per_second": 5.886, "eval_steps_per_second": 0.202, "step": 160 }, { "epoch": 0.023755071929177425, "grad_norm": 2.6800179481506348, "learning_rate": 4.749262536873156e-06, "loss": 0.3835, "step": 161 }, { "epoch": 0.023902618959793433, "grad_norm": 3.807173728942871, "learning_rate": 4.778761061946903e-06, "loss": 0.4336, "step": 162 }, { "epoch": 0.024050165990409444, "grad_norm": 2.529468059539795, "learning_rate": 4.808259587020649e-06, "loss": 0.3558, "step": 163 }, { "epoch": 0.02419771302102545, "grad_norm": 2.251593828201294, "learning_rate": 4.837758112094395e-06, "loss": 0.335, "step": 164 }, { "epoch": 0.02434526005164146, "grad_norm": 2.378906011581421, "learning_rate": 4.867256637168142e-06, "loss": 0.3557, "step": 165 }, { "epoch": 0.02449280708225747, "grad_norm": 2.23449969291687, "learning_rate": 4.896755162241888e-06, "loss": 0.3134, "step": 166 }, { "epoch": 0.024640354112873478, "grad_norm": 2.712001085281372, "learning_rate": 4.926253687315635e-06, "loss": 0.3809, "step": 167 }, { "epoch": 0.02478790114348949, "grad_norm": 2.1402900218963623, "learning_rate": 4.955752212389381e-06, "loss": 0.3083, "step": 168 }, { "epoch": 0.024935448174105496, "grad_norm": 2.3690595626831055, "learning_rate": 4.985250737463127e-06, "loss": 0.341, "step": 169 }, { "epoch": 0.025082995204721504, "grad_norm": 2.9476425647735596, "learning_rate": 5.014749262536873e-06, "loss": 0.3466, "step": 170 }, { "epoch": 0.025230542235337515, "grad_norm": 2.676905870437622, "learning_rate": 5.04424778761062e-06, "loss": 0.3215, "step": 171 }, { "epoch": 0.025378089265953523, "grad_norm": 2.3869245052337646, "learning_rate": 5.073746312684367e-06, "loss": 0.2622, "step": 172 }, { "epoch": 0.02552563629656953, "grad_norm": 3.2571730613708496, "learning_rate": 5.103244837758113e-06, "loss": 0.3361, "step": 173 }, { "epoch": 0.02567318332718554, "grad_norm": 2.323625326156616, "learning_rate": 5.132743362831859e-06, "loss": 0.2968, "step": 174 }, { "epoch": 0.02582073035780155, "grad_norm": 2.768444299697876, "learning_rate": 5.162241887905605e-06, "loss": 0.2997, "step": 175 }, { "epoch": 0.025968277388417556, "grad_norm": 2.646876335144043, "learning_rate": 5.191740412979352e-06, "loss": 0.2869, "step": 176 }, { "epoch": 0.026115824419033568, "grad_norm": 2.6804604530334473, "learning_rate": 5.2212389380530985e-06, "loss": 0.2935, "step": 177 }, { "epoch": 0.026263371449649575, "grad_norm": 3.361034393310547, "learning_rate": 5.250737463126844e-06, "loss": 0.296, "step": 178 }, { "epoch": 0.026410918480265586, "grad_norm": 4.176695823669434, "learning_rate": 5.2802359882005905e-06, "loss": 0.2339, "step": 179 }, { "epoch": 0.026558465510881594, "grad_norm": 3.175715923309326, "learning_rate": 5.309734513274337e-06, "loss": 0.3324, "step": 180 }, { "epoch": 0.026558465510881594, "eval_accuracy": 0.9363241678726484, "eval_f1": 0.8894472361809045, "eval_loss": 0.21964313089847565, "eval_precision": 0.921875, "eval_recall": 0.8592233009708737, "eval_runtime": 50.2368, "eval_samples_per_second": 5.793, "eval_steps_per_second": 0.199, "step": 180 }, { "epoch": 0.0267060125414976, "grad_norm": 3.8445475101470947, "learning_rate": 5.3392330383480825e-06, "loss": 0.3102, "step": 181 }, { "epoch": 0.026853559572113612, "grad_norm": 4.203682899475098, "learning_rate": 5.368731563421829e-06, "loss": 0.2443, "step": 182 }, { "epoch": 0.02700110660272962, "grad_norm": 3.9179210662841797, "learning_rate": 5.398230088495575e-06, "loss": 0.2552, "step": 183 }, { "epoch": 0.027148653633345628, "grad_norm": 4.1730265617370605, "learning_rate": 5.427728613569322e-06, "loss": 0.3203, "step": 184 }, { "epoch": 0.02729620066396164, "grad_norm": 7.326657295227051, "learning_rate": 5.457227138643068e-06, "loss": 0.3337, "step": 185 }, { "epoch": 0.027443747694577646, "grad_norm": 3.349335193634033, "learning_rate": 5.486725663716814e-06, "loss": 0.2344, "step": 186 }, { "epoch": 0.027591294725193654, "grad_norm": 2.689974546432495, "learning_rate": 5.516224188790561e-06, "loss": 0.2158, "step": 187 }, { "epoch": 0.027738841755809665, "grad_norm": 3.069680690765381, "learning_rate": 5.545722713864308e-06, "loss": 0.2646, "step": 188 }, { "epoch": 0.027886388786425673, "grad_norm": 5.341132164001465, "learning_rate": 5.575221238938053e-06, "loss": 0.2934, "step": 189 }, { "epoch": 0.028033935817041684, "grad_norm": 3.09668231010437, "learning_rate": 5.6047197640118e-06, "loss": 0.2593, "step": 190 }, { "epoch": 0.02818148284765769, "grad_norm": 3.718409538269043, "learning_rate": 5.634218289085546e-06, "loss": 0.3082, "step": 191 }, { "epoch": 0.0283290298782737, "grad_norm": 3.678961992263794, "learning_rate": 5.663716814159292e-06, "loss": 0.2643, "step": 192 }, { "epoch": 0.02847657690888971, "grad_norm": 2.770150899887085, "learning_rate": 5.693215339233039e-06, "loss": 0.2, "step": 193 }, { "epoch": 0.028624123939505718, "grad_norm": 6.1580586433410645, "learning_rate": 5.722713864306785e-06, "loss": 0.3747, "step": 194 }, { "epoch": 0.028771670970121725, "grad_norm": 2.918246030807495, "learning_rate": 5.752212389380532e-06, "loss": 0.2375, "step": 195 }, { "epoch": 0.028919218000737736, "grad_norm": 3.8346288204193115, "learning_rate": 5.781710914454279e-06, "loss": 0.2471, "step": 196 }, { "epoch": 0.029066765031353744, "grad_norm": 9.396050453186035, "learning_rate": 5.811209439528024e-06, "loss": 0.3999, "step": 197 }, { "epoch": 0.02921431206196975, "grad_norm": 3.803187847137451, "learning_rate": 5.840707964601771e-06, "loss": 0.2601, "step": 198 }, { "epoch": 0.029361859092585763, "grad_norm": 3.8733484745025635, "learning_rate": 5.870206489675516e-06, "loss": 0.1761, "step": 199 }, { "epoch": 0.02950940612320177, "grad_norm": 2.7189722061157227, "learning_rate": 5.899705014749263e-06, "loss": 0.2337, "step": 200 }, { "epoch": 0.02950940612320177, "eval_accuracy": 0.9522431259044862, "eval_f1": 0.9181141439205955, "eval_loss": 0.17437304556369781, "eval_precision": 0.9390862944162437, "eval_recall": 0.8980582524271845, "eval_runtime": 49.6009, "eval_samples_per_second": 5.867, "eval_steps_per_second": 0.202, "step": 200 }, { "epoch": 0.029656953153817778, "grad_norm": 4.142955780029297, "learning_rate": 5.9292035398230096e-06, "loss": 0.2332, "step": 201 }, { "epoch": 0.02980450018443379, "grad_norm": 5.616168022155762, "learning_rate": 5.9587020648967556e-06, "loss": 0.1915, "step": 202 }, { "epoch": 0.029952047215049796, "grad_norm": 4.543870449066162, "learning_rate": 5.9882005899705024e-06, "loss": 0.2201, "step": 203 }, { "epoch": 0.030099594245665807, "grad_norm": 3.1968324184417725, "learning_rate": 6.0176991150442484e-06, "loss": 0.2028, "step": 204 }, { "epoch": 0.030247141276281815, "grad_norm": 3.9593422412872314, "learning_rate": 6.0471976401179945e-06, "loss": 0.2231, "step": 205 }, { "epoch": 0.030394688306897823, "grad_norm": 2.4024624824523926, "learning_rate": 6.076696165191741e-06, "loss": 0.1978, "step": 206 }, { "epoch": 0.030542235337513834, "grad_norm": 4.949997901916504, "learning_rate": 6.1061946902654865e-06, "loss": 0.2554, "step": 207 }, { "epoch": 0.03068978236812984, "grad_norm": 6.300983905792236, "learning_rate": 6.135693215339233e-06, "loss": 0.3101, "step": 208 }, { "epoch": 0.03083732939874585, "grad_norm": 5.079474925994873, "learning_rate": 6.16519174041298e-06, "loss": 0.2361, "step": 209 }, { "epoch": 0.03098487642936186, "grad_norm": 3.581029176712036, "learning_rate": 6.194690265486726e-06, "loss": 0.1632, "step": 210 }, { "epoch": 0.031132423459977868, "grad_norm": 2.801851749420166, "learning_rate": 6.224188790560472e-06, "loss": 0.1578, "step": 211 }, { "epoch": 0.031279970490593875, "grad_norm": 4.79841423034668, "learning_rate": 6.253687315634219e-06, "loss": 0.234, "step": 212 }, { "epoch": 0.03142751752120988, "grad_norm": 3.7327382564544678, "learning_rate": 6.283185840707965e-06, "loss": 0.1999, "step": 213 }, { "epoch": 0.0315750645518259, "grad_norm": 6.591651439666748, "learning_rate": 6.312684365781712e-06, "loss": 0.234, "step": 214 }, { "epoch": 0.031722611582441905, "grad_norm": 4.933311462402344, "learning_rate": 6.342182890855457e-06, "loss": 0.235, "step": 215 }, { "epoch": 0.03187015861305791, "grad_norm": 3.313809394836426, "learning_rate": 6.371681415929204e-06, "loss": 0.1692, "step": 216 }, { "epoch": 0.03201770564367392, "grad_norm": 3.9487600326538086, "learning_rate": 6.401179941002951e-06, "loss": 0.2114, "step": 217 }, { "epoch": 0.03216525267428993, "grad_norm": 7.258141994476318, "learning_rate": 6.430678466076696e-06, "loss": 0.2815, "step": 218 }, { "epoch": 0.032312799704905935, "grad_norm": 4.9907450675964355, "learning_rate": 6.460176991150443e-06, "loss": 0.1802, "step": 219 }, { "epoch": 0.03246034673552195, "grad_norm": 4.339593887329102, "learning_rate": 6.48967551622419e-06, "loss": 0.1528, "step": 220 }, { "epoch": 0.03246034673552195, "eval_accuracy": 0.9522431259044862, "eval_f1": 0.918918918918919, "eval_loss": 0.15728141367435455, "eval_precision": 0.9303482587064676, "eval_recall": 0.9077669902912622, "eval_runtime": 49.7582, "eval_samples_per_second": 5.848, "eval_steps_per_second": 0.201, "step": 220 }, { "epoch": 0.03260789376613796, "grad_norm": 4.5391998291015625, "learning_rate": 6.519174041297936e-06, "loss": 0.1974, "step": 221 }, { "epoch": 0.032755440796753965, "grad_norm": 5.6919426918029785, "learning_rate": 6.548672566371682e-06, "loss": 0.1847, "step": 222 }, { "epoch": 0.03290298782736997, "grad_norm": 5.235836029052734, "learning_rate": 6.578171091445428e-06, "loss": 0.2223, "step": 223 }, { "epoch": 0.03305053485798598, "grad_norm": 6.076484680175781, "learning_rate": 6.607669616519175e-06, "loss": 0.1467, "step": 224 }, { "epoch": 0.033198081888601995, "grad_norm": 3.324214458465576, "learning_rate": 6.6371681415929215e-06, "loss": 0.0906, "step": 225 }, { "epoch": 0.033345628919218, "grad_norm": 5.136935710906982, "learning_rate": 6.666666666666667e-06, "loss": 0.1331, "step": 226 }, { "epoch": 0.03349317594983401, "grad_norm": 5.696311950683594, "learning_rate": 6.6961651917404135e-06, "loss": 0.1474, "step": 227 }, { "epoch": 0.03364072298045002, "grad_norm": 3.3585317134857178, "learning_rate": 6.72566371681416e-06, "loss": 0.1372, "step": 228 }, { "epoch": 0.033788270011066025, "grad_norm": 5.843612194061279, "learning_rate": 6.7551622418879055e-06, "loss": 0.1755, "step": 229 }, { "epoch": 0.03393581704168203, "grad_norm": 4.208086967468262, "learning_rate": 6.784660766961652e-06, "loss": 0.1269, "step": 230 }, { "epoch": 0.03408336407229805, "grad_norm": 7.074487686157227, "learning_rate": 6.814159292035398e-06, "loss": 0.228, "step": 231 }, { "epoch": 0.034230911102914055, "grad_norm": 4.874953269958496, "learning_rate": 6.843657817109145e-06, "loss": 0.1739, "step": 232 }, { "epoch": 0.03437845813353006, "grad_norm": 3.7812411785125732, "learning_rate": 6.873156342182892e-06, "loss": 0.1015, "step": 233 }, { "epoch": 0.03452600516414607, "grad_norm": 8.147891998291016, "learning_rate": 6.902654867256637e-06, "loss": 0.1904, "step": 234 }, { "epoch": 0.03467355219476208, "grad_norm": 4.257922649383545, "learning_rate": 6.932153392330384e-06, "loss": 0.1433, "step": 235 }, { "epoch": 0.03482109922537809, "grad_norm": 4.28916597366333, "learning_rate": 6.961651917404131e-06, "loss": 0.1641, "step": 236 }, { "epoch": 0.0349686462559941, "grad_norm": 4.205521106719971, "learning_rate": 6.991150442477876e-06, "loss": 0.1761, "step": 237 }, { "epoch": 0.03511619328661011, "grad_norm": 3.871521472930908, "learning_rate": 7.020648967551623e-06, "loss": 0.2325, "step": 238 }, { "epoch": 0.035263740317226115, "grad_norm": 4.736901760101318, "learning_rate": 7.050147492625369e-06, "loss": 0.1844, "step": 239 }, { "epoch": 0.03541128734784212, "grad_norm": 6.687438011169434, "learning_rate": 7.079646017699116e-06, "loss": 0.3151, "step": 240 }, { "epoch": 0.03541128734784212, "eval_accuracy": 0.9551374819102749, "eval_f1": 0.9242053789731052, "eval_loss": 0.14977814257144928, "eval_precision": 0.9310344827586207, "eval_recall": 0.9174757281553398, "eval_runtime": 49.8696, "eval_samples_per_second": 5.835, "eval_steps_per_second": 0.201, "step": 240 }, { "epoch": 0.03555883437845813, "grad_norm": 3.2434473037719727, "learning_rate": 7.109144542772862e-06, "loss": 0.1006, "step": 241 }, { "epoch": 0.035706381409074145, "grad_norm": 3.5741753578186035, "learning_rate": 7.138643067846608e-06, "loss": 0.1196, "step": 242 }, { "epoch": 0.03585392843969015, "grad_norm": 4.110579013824463, "learning_rate": 7.168141592920355e-06, "loss": 0.1645, "step": 243 }, { "epoch": 0.03600147547030616, "grad_norm": 5.108462333679199, "learning_rate": 7.197640117994102e-06, "loss": 0.1756, "step": 244 }, { "epoch": 0.03614902250092217, "grad_norm": 9.589862823486328, "learning_rate": 7.227138643067847e-06, "loss": 0.2677, "step": 245 }, { "epoch": 0.036296569531538175, "grad_norm": 6.1967573165893555, "learning_rate": 7.256637168141594e-06, "loss": 0.1498, "step": 246 }, { "epoch": 0.03644411656215419, "grad_norm": 5.692258834838867, "learning_rate": 7.28613569321534e-06, "loss": 0.1772, "step": 247 }, { "epoch": 0.0365916635927702, "grad_norm": 7.874504089355469, "learning_rate": 7.315634218289086e-06, "loss": 0.2262, "step": 248 }, { "epoch": 0.036739210623386205, "grad_norm": 4.6364521980285645, "learning_rate": 7.3451327433628326e-06, "loss": 0.2221, "step": 249 }, { "epoch": 0.03688675765400221, "grad_norm": 8.91701602935791, "learning_rate": 7.374631268436579e-06, "loss": 0.1184, "step": 250 }, { "epoch": 0.03703430468461822, "grad_norm": 5.354017734527588, "learning_rate": 7.4041297935103254e-06, "loss": 0.1549, "step": 251 }, { "epoch": 0.03718185171523423, "grad_norm": 3.8493027687072754, "learning_rate": 7.4336283185840714e-06, "loss": 0.127, "step": 252 }, { "epoch": 0.03732939874585024, "grad_norm": 4.797015190124512, "learning_rate": 7.4631268436578175e-06, "loss": 0.116, "step": 253 }, { "epoch": 0.03747694577646625, "grad_norm": 8.248945236206055, "learning_rate": 7.492625368731564e-06, "loss": 0.2484, "step": 254 }, { "epoch": 0.03762449280708226, "grad_norm": 4.4472737312316895, "learning_rate": 7.5221238938053095e-06, "loss": 0.1616, "step": 255 }, { "epoch": 0.037772039837698265, "grad_norm": 6.164108753204346, "learning_rate": 7.551622418879056e-06, "loss": 0.246, "step": 256 }, { "epoch": 0.03791958686831427, "grad_norm": 7.092438220977783, "learning_rate": 7.581120943952803e-06, "loss": 0.283, "step": 257 }, { "epoch": 0.03806713389893029, "grad_norm": 5.094725131988525, "learning_rate": 7.610619469026549e-06, "loss": 0.2443, "step": 258 }, { "epoch": 0.038214680929546295, "grad_norm": 6.3736491203308105, "learning_rate": 7.640117994100296e-06, "loss": 0.2867, "step": 259 }, { "epoch": 0.0383622279601623, "grad_norm": 4.658124923706055, "learning_rate": 7.669616519174043e-06, "loss": 0.1388, "step": 260 }, { "epoch": 0.0383622279601623, "eval_accuracy": 0.9565846599131693, "eval_f1": 0.926829268292683, "eval_loss": 0.14077338576316833, "eval_precision": 0.9313725490196079, "eval_recall": 0.9223300970873787, "eval_runtime": 50.1449, "eval_samples_per_second": 5.803, "eval_steps_per_second": 0.199, "step": 260 }, { "epoch": 0.03850977499077831, "grad_norm": 4.16099214553833, "learning_rate": 7.699115044247788e-06, "loss": 0.2096, "step": 261 }, { "epoch": 0.03865732202139432, "grad_norm": 4.406971454620361, "learning_rate": 7.728613569321535e-06, "loss": 0.1376, "step": 262 }, { "epoch": 0.038804869052010325, "grad_norm": 4.102197170257568, "learning_rate": 7.75811209439528e-06, "loss": 0.1656, "step": 263 }, { "epoch": 0.03895241608262634, "grad_norm": 5.563126087188721, "learning_rate": 7.787610619469027e-06, "loss": 0.183, "step": 264 }, { "epoch": 0.03909996311324235, "grad_norm": 6.817785739898682, "learning_rate": 7.817109144542774e-06, "loss": 0.1237, "step": 265 }, { "epoch": 0.039247510143858355, "grad_norm": 3.2304129600524902, "learning_rate": 7.846607669616519e-06, "loss": 0.1092, "step": 266 }, { "epoch": 0.03939505717447436, "grad_norm": 3.9030115604400635, "learning_rate": 7.876106194690266e-06, "loss": 0.1629, "step": 267 }, { "epoch": 0.03954260420509037, "grad_norm": 3.8718602657318115, "learning_rate": 7.905604719764013e-06, "loss": 0.1782, "step": 268 }, { "epoch": 0.039690151235706385, "grad_norm": 3.4735336303710938, "learning_rate": 7.935103244837758e-06, "loss": 0.1082, "step": 269 }, { "epoch": 0.03983769826632239, "grad_norm": 4.98665189743042, "learning_rate": 7.964601769911505e-06, "loss": 0.2036, "step": 270 }, { "epoch": 0.0399852452969384, "grad_norm": 2.5346317291259766, "learning_rate": 7.994100294985252e-06, "loss": 0.0879, "step": 271 }, { "epoch": 0.04013279232755441, "grad_norm": 3.8122799396514893, "learning_rate": 8.023598820058997e-06, "loss": 0.1472, "step": 272 }, { "epoch": 0.040280339358170415, "grad_norm": 4.062076091766357, "learning_rate": 8.053097345132744e-06, "loss": 0.1439, "step": 273 }, { "epoch": 0.04042788638878642, "grad_norm": 3.1517844200134277, "learning_rate": 8.08259587020649e-06, "loss": 0.1163, "step": 274 }, { "epoch": 0.04057543341940244, "grad_norm": 2.914015531539917, "learning_rate": 8.112094395280237e-06, "loss": 0.1349, "step": 275 }, { "epoch": 0.040722980450018445, "grad_norm": 4.870702743530273, "learning_rate": 8.141592920353984e-06, "loss": 0.1212, "step": 276 }, { "epoch": 0.04087052748063445, "grad_norm": 3.2663400173187256, "learning_rate": 8.17109144542773e-06, "loss": 0.0901, "step": 277 }, { "epoch": 0.04101807451125046, "grad_norm": 2.884490728378296, "learning_rate": 8.200589970501476e-06, "loss": 0.1269, "step": 278 }, { "epoch": 0.04116562154186647, "grad_norm": 2.778538227081299, "learning_rate": 8.230088495575221e-06, "loss": 0.1557, "step": 279 }, { "epoch": 0.041313168572482475, "grad_norm": 3.76567006111145, "learning_rate": 8.259587020648968e-06, "loss": 0.1405, "step": 280 }, { "epoch": 0.041313168572482475, "eval_accuracy": 0.9565846599131693, "eval_f1": 0.9257425742574258, "eval_loss": 0.13419242203235626, "eval_precision": 0.9444444444444444, "eval_recall": 0.9077669902912622, "eval_runtime": 50.1481, "eval_samples_per_second": 5.803, "eval_steps_per_second": 0.199, "step": 280 }, { "epoch": 0.04146071560309849, "grad_norm": 6.657413482666016, "learning_rate": 8.289085545722715e-06, "loss": 0.1807, "step": 281 }, { "epoch": 0.0416082626337145, "grad_norm": 2.655567169189453, "learning_rate": 8.31858407079646e-06, "loss": 0.1088, "step": 282 }, { "epoch": 0.041755809664330505, "grad_norm": 1.7318779230117798, "learning_rate": 8.348082595870207e-06, "loss": 0.0599, "step": 283 }, { "epoch": 0.04190335669494651, "grad_norm": 3.6388893127441406, "learning_rate": 8.377581120943954e-06, "loss": 0.1029, "step": 284 }, { "epoch": 0.04205090372556252, "grad_norm": 5.381409168243408, "learning_rate": 8.4070796460177e-06, "loss": 0.1483, "step": 285 }, { "epoch": 0.042198450756178535, "grad_norm": 5.147741794586182, "learning_rate": 8.436578171091446e-06, "loss": 0.1347, "step": 286 }, { "epoch": 0.04234599778679454, "grad_norm": 3.469966411590576, "learning_rate": 8.466076696165191e-06, "loss": 0.0578, "step": 287 }, { "epoch": 0.04249354481741055, "grad_norm": 6.2962327003479, "learning_rate": 8.495575221238938e-06, "loss": 0.1671, "step": 288 }, { "epoch": 0.04264109184802656, "grad_norm": 2.943178176879883, "learning_rate": 8.525073746312685e-06, "loss": 0.0939, "step": 289 }, { "epoch": 0.042788638878642565, "grad_norm": 5.672203063964844, "learning_rate": 8.554572271386432e-06, "loss": 0.1009, "step": 290 }, { "epoch": 0.04293618590925857, "grad_norm": 5.851071357727051, "learning_rate": 8.584070796460177e-06, "loss": 0.095, "step": 291 }, { "epoch": 0.04308373293987459, "grad_norm": 4.560580730438232, "learning_rate": 8.613569321533924e-06, "loss": 0.0989, "step": 292 }, { "epoch": 0.043231279970490595, "grad_norm": 5.899099826812744, "learning_rate": 8.64306784660767e-06, "loss": 0.1301, "step": 293 }, { "epoch": 0.0433788270011066, "grad_norm": 4.162158489227295, "learning_rate": 8.672566371681418e-06, "loss": 0.2077, "step": 294 }, { "epoch": 0.04352637403172261, "grad_norm": 3.7502331733703613, "learning_rate": 8.702064896755163e-06, "loss": 0.0738, "step": 295 }, { "epoch": 0.04367392106233862, "grad_norm": 5.365631580352783, "learning_rate": 8.73156342182891e-06, "loss": 0.1579, "step": 296 }, { "epoch": 0.04382146809295463, "grad_norm": 6.791158676147461, "learning_rate": 8.761061946902656e-06, "loss": 0.1194, "step": 297 }, { "epoch": 0.04396901512357064, "grad_norm": 2.964080572128296, "learning_rate": 8.790560471976402e-06, "loss": 0.0757, "step": 298 }, { "epoch": 0.04411656215418665, "grad_norm": 4.458961486816406, "learning_rate": 8.820058997050148e-06, "loss": 0.0634, "step": 299 }, { "epoch": 0.044264109184802655, "grad_norm": 7.169512748718262, "learning_rate": 8.849557522123895e-06, "loss": 0.093, "step": 300 }, { "epoch": 0.044264109184802655, "eval_accuracy": 0.9580318379160637, "eval_f1": 0.9280397022332506, "eval_loss": 0.13798266649246216, "eval_precision": 0.949238578680203, "eval_recall": 0.9077669902912622, "eval_runtime": 50.1387, "eval_samples_per_second": 5.804, "eval_steps_per_second": 0.199, "step": 300 }, { "epoch": 0.04441165621541866, "grad_norm": 3.8345205783843994, "learning_rate": 8.87905604719764e-06, "loss": 0.1461, "step": 301 }, { "epoch": 0.04455920324603467, "grad_norm": 4.717754364013672, "learning_rate": 8.908554572271387e-06, "loss": 0.1382, "step": 302 }, { "epoch": 0.044706750276650685, "grad_norm": 5.306753635406494, "learning_rate": 8.938053097345133e-06, "loss": 0.1469, "step": 303 }, { "epoch": 0.04485429730726669, "grad_norm": 4.950700283050537, "learning_rate": 8.96755162241888e-06, "loss": 0.1422, "step": 304 }, { "epoch": 0.0450018443378827, "grad_norm": 5.903288841247559, "learning_rate": 8.997050147492626e-06, "loss": 0.1082, "step": 305 }, { "epoch": 0.04514939136849871, "grad_norm": 4.594814300537109, "learning_rate": 9.026548672566371e-06, "loss": 0.0367, "step": 306 }, { "epoch": 0.045296938399114715, "grad_norm": 8.450814247131348, "learning_rate": 9.056047197640118e-06, "loss": 0.1565, "step": 307 }, { "epoch": 0.04544448542973073, "grad_norm": 7.773022174835205, "learning_rate": 9.085545722713865e-06, "loss": 0.1062, "step": 308 }, { "epoch": 0.04559203246034674, "grad_norm": 3.9900150299072266, "learning_rate": 9.11504424778761e-06, "loss": 0.0318, "step": 309 }, { "epoch": 0.045739579490962745, "grad_norm": 7.797605991363525, "learning_rate": 9.144542772861357e-06, "loss": 0.1619, "step": 310 }, { "epoch": 0.04588712652157875, "grad_norm": 2.1928110122680664, "learning_rate": 9.174041297935104e-06, "loss": 0.0346, "step": 311 }, { "epoch": 0.04603467355219476, "grad_norm": 5.369995594024658, "learning_rate": 9.203539823008851e-06, "loss": 0.1351, "step": 312 }, { "epoch": 0.04618222058281077, "grad_norm": 3.846357583999634, "learning_rate": 9.233038348082598e-06, "loss": 0.1037, "step": 313 }, { "epoch": 0.04632976761342678, "grad_norm": 2.992255926132202, "learning_rate": 9.262536873156343e-06, "loss": 0.1279, "step": 314 }, { "epoch": 0.04647731464404279, "grad_norm": 4.43184232711792, "learning_rate": 9.29203539823009e-06, "loss": 0.1632, "step": 315 }, { "epoch": 0.0466248616746588, "grad_norm": 3.940495252609253, "learning_rate": 9.321533923303837e-06, "loss": 0.1387, "step": 316 }, { "epoch": 0.046772408705274805, "grad_norm": 4.806314468383789, "learning_rate": 9.351032448377582e-06, "loss": 0.1298, "step": 317 }, { "epoch": 0.04691995573589081, "grad_norm": 3.9163870811462402, "learning_rate": 9.380530973451329e-06, "loss": 0.1383, "step": 318 }, { "epoch": 0.04706750276650683, "grad_norm": 4.239090919494629, "learning_rate": 9.410029498525074e-06, "loss": 0.1131, "step": 319 }, { "epoch": 0.047215049797122835, "grad_norm": 3.099626302719116, "learning_rate": 9.43952802359882e-06, "loss": 0.1395, "step": 320 }, { "epoch": 0.047215049797122835, "eval_accuracy": 0.9580318379160637, "eval_f1": 0.9280397022332506, "eval_loss": 0.13586266338825226, "eval_precision": 0.949238578680203, "eval_recall": 0.9077669902912622, "eval_runtime": 49.7834, "eval_samples_per_second": 5.845, "eval_steps_per_second": 0.201, "step": 320 }, { "epoch": 0.04736259682773884, "grad_norm": 4.810850620269775, "learning_rate": 9.469026548672568e-06, "loss": 0.1791, "step": 321 }, { "epoch": 0.04751014385835485, "grad_norm": 4.507347106933594, "learning_rate": 9.498525073746313e-06, "loss": 0.1373, "step": 322 }, { "epoch": 0.04765769088897086, "grad_norm": 3.730177402496338, "learning_rate": 9.52802359882006e-06, "loss": 0.0539, "step": 323 }, { "epoch": 0.047805237919586865, "grad_norm": 8.237765312194824, "learning_rate": 9.557522123893806e-06, "loss": 0.1194, "step": 324 }, { "epoch": 0.04795278495020288, "grad_norm": 3.203836441040039, "learning_rate": 9.587020648967552e-06, "loss": 0.1024, "step": 325 }, { "epoch": 0.04810033198081889, "grad_norm": 6.77910852432251, "learning_rate": 9.616519174041298e-06, "loss": 0.1225, "step": 326 }, { "epoch": 0.048247879011434895, "grad_norm": 7.12105655670166, "learning_rate": 9.646017699115045e-06, "loss": 0.2247, "step": 327 }, { "epoch": 0.0483954260420509, "grad_norm": 3.20892596244812, "learning_rate": 9.67551622418879e-06, "loss": 0.0878, "step": 328 }, { "epoch": 0.04854297307266691, "grad_norm": 6.047598361968994, "learning_rate": 9.705014749262537e-06, "loss": 0.0686, "step": 329 }, { "epoch": 0.04869052010328292, "grad_norm": 5.353490352630615, "learning_rate": 9.734513274336284e-06, "loss": 0.124, "step": 330 }, { "epoch": 0.04883806713389893, "grad_norm": 7.897495746612549, "learning_rate": 9.764011799410031e-06, "loss": 0.1125, "step": 331 }, { "epoch": 0.04898561416451494, "grad_norm": 2.574463129043579, "learning_rate": 9.793510324483776e-06, "loss": 0.1019, "step": 332 }, { "epoch": 0.04913316119513095, "grad_norm": 3.688844919204712, "learning_rate": 9.823008849557523e-06, "loss": 0.0702, "step": 333 }, { "epoch": 0.049280708225746955, "grad_norm": 3.7450144290924072, "learning_rate": 9.85250737463127e-06, "loss": 0.1006, "step": 334 }, { "epoch": 0.04942825525636296, "grad_norm": 1.9378650188446045, "learning_rate": 9.882005899705015e-06, "loss": 0.0951, "step": 335 }, { "epoch": 0.04957580228697898, "grad_norm": 2.5890004634857178, "learning_rate": 9.911504424778762e-06, "loss": 0.0399, "step": 336 }, { "epoch": 0.049723349317594985, "grad_norm": 7.074208736419678, "learning_rate": 9.941002949852509e-06, "loss": 0.1363, "step": 337 }, { "epoch": 0.04987089634821099, "grad_norm": 2.120678663253784, "learning_rate": 9.970501474926254e-06, "loss": 0.119, "step": 338 }, { "epoch": 0.050018443378827, "grad_norm": 1.8679440021514893, "learning_rate": 1e-05, "loss": 0.0776, "step": 339 }, { "epoch": 0.05016599040944301, "grad_norm": 4.954777717590332, "learning_rate": 1.0029498525073746e-05, "loss": 0.1686, "step": 340 }, { "epoch": 0.05016599040944301, "eval_accuracy": 0.9638205499276411, "eval_f1": 0.9373433583959899, "eval_loss": 0.13156850636005402, "eval_precision": 0.9689119170984456, "eval_recall": 0.9077669902912622, "eval_runtime": 49.3505, "eval_samples_per_second": 5.897, "eval_steps_per_second": 0.203, "step": 340 }, { "epoch": 0.050313537440059015, "grad_norm": 5.471776008605957, "learning_rate": 1.0058997050147495e-05, "loss": 0.1288, "step": 341 }, { "epoch": 0.05046108447067503, "grad_norm": 4.600009918212891, "learning_rate": 1.008849557522124e-05, "loss": 0.1852, "step": 342 }, { "epoch": 0.05060863150129104, "grad_norm": 5.682035446166992, "learning_rate": 1.0117994100294985e-05, "loss": 0.1538, "step": 343 }, { "epoch": 0.050756178531907045, "grad_norm": 3.672041654586792, "learning_rate": 1.0147492625368733e-05, "loss": 0.0822, "step": 344 }, { "epoch": 0.05090372556252305, "grad_norm": 4.3361430168151855, "learning_rate": 1.0176991150442479e-05, "loss": 0.0658, "step": 345 }, { "epoch": 0.05105127259313906, "grad_norm": 3.2561590671539307, "learning_rate": 1.0206489675516225e-05, "loss": 0.0845, "step": 346 }, { "epoch": 0.051198819623755075, "grad_norm": 3.666931629180908, "learning_rate": 1.0235988200589972e-05, "loss": 0.0628, "step": 347 }, { "epoch": 0.05134636665437108, "grad_norm": 2.1404881477355957, "learning_rate": 1.0265486725663717e-05, "loss": 0.1091, "step": 348 }, { "epoch": 0.05149391368498709, "grad_norm": 3.48825740814209, "learning_rate": 1.0294985250737464e-05, "loss": 0.1666, "step": 349 }, { "epoch": 0.0516414607156031, "grad_norm": 2.4481754302978516, "learning_rate": 1.032448377581121e-05, "loss": 0.0485, "step": 350 }, { "epoch": 0.051789007746219105, "grad_norm": 6.18185567855835, "learning_rate": 1.0353982300884956e-05, "loss": 0.1059, "step": 351 }, { "epoch": 0.05193655477683511, "grad_norm": 4.555138111114502, "learning_rate": 1.0383480825958703e-05, "loss": 0.0839, "step": 352 }, { "epoch": 0.05208410180745113, "grad_norm": 3.5837013721466064, "learning_rate": 1.0412979351032448e-05, "loss": 0.0768, "step": 353 }, { "epoch": 0.052231648838067135, "grad_norm": 2.315218925476074, "learning_rate": 1.0442477876106197e-05, "loss": 0.114, "step": 354 }, { "epoch": 0.05237919586868314, "grad_norm": 2.773099422454834, "learning_rate": 1.0471976401179942e-05, "loss": 0.0753, "step": 355 }, { "epoch": 0.05252674289929915, "grad_norm": 7.356622219085693, "learning_rate": 1.0501474926253687e-05, "loss": 0.145, "step": 356 }, { "epoch": 0.05267428992991516, "grad_norm": 2.9815852642059326, "learning_rate": 1.0530973451327436e-05, "loss": 0.0914, "step": 357 }, { "epoch": 0.05282183696053117, "grad_norm": 3.948357343673706, "learning_rate": 1.0560471976401181e-05, "loss": 0.1008, "step": 358 }, { "epoch": 0.05296938399114718, "grad_norm": 4.183931827545166, "learning_rate": 1.0589970501474926e-05, "loss": 0.0847, "step": 359 }, { "epoch": 0.05311693102176319, "grad_norm": 1.4670817852020264, "learning_rate": 1.0619469026548675e-05, "loss": 0.0542, "step": 360 }, { "epoch": 0.05311693102176319, "eval_accuracy": 0.959479015918958, "eval_f1": 0.9303482587064676, "eval_loss": 0.12699969112873077, "eval_precision": 0.9540816326530612, "eval_recall": 0.9077669902912622, "eval_runtime": 49.9574, "eval_samples_per_second": 5.825, "eval_steps_per_second": 0.2, "step": 360 }, { "epoch": 0.053264478052379195, "grad_norm": 4.895223140716553, "learning_rate": 1.064896755162242e-05, "loss": 0.1715, "step": 361 }, { "epoch": 0.0534120250829952, "grad_norm": 4.754990577697754, "learning_rate": 1.0678466076696165e-05, "loss": 0.1289, "step": 362 }, { "epoch": 0.05355957211361121, "grad_norm": 4.448680400848389, "learning_rate": 1.0707964601769914e-05, "loss": 0.124, "step": 363 }, { "epoch": 0.053707119144227225, "grad_norm": 4.0850653648376465, "learning_rate": 1.0737463126843659e-05, "loss": 0.0856, "step": 364 }, { "epoch": 0.05385466617484323, "grad_norm": 13.095627784729004, "learning_rate": 1.0766961651917404e-05, "loss": 0.1709, "step": 365 }, { "epoch": 0.05400221320545924, "grad_norm": 3.209557294845581, "learning_rate": 1.079646017699115e-05, "loss": 0.1472, "step": 366 }, { "epoch": 0.05414976023607525, "grad_norm": 4.351004123687744, "learning_rate": 1.0825958702064898e-05, "loss": 0.0747, "step": 367 }, { "epoch": 0.054297307266691255, "grad_norm": 4.11442756652832, "learning_rate": 1.0855457227138645e-05, "loss": 0.1024, "step": 368 }, { "epoch": 0.05444485429730727, "grad_norm": 7.142582893371582, "learning_rate": 1.088495575221239e-05, "loss": 0.1597, "step": 369 }, { "epoch": 0.05459240132792328, "grad_norm": 3.3445897102355957, "learning_rate": 1.0914454277286137e-05, "loss": 0.0689, "step": 370 }, { "epoch": 0.054739948358539285, "grad_norm": 2.5734426975250244, "learning_rate": 1.0943952802359883e-05, "loss": 0.12, "step": 371 }, { "epoch": 0.05488749538915529, "grad_norm": 3.8147268295288086, "learning_rate": 1.0973451327433629e-05, "loss": 0.0699, "step": 372 }, { "epoch": 0.0550350424197713, "grad_norm": 4.332840919494629, "learning_rate": 1.1002949852507377e-05, "loss": 0.1048, "step": 373 }, { "epoch": 0.05518258945038731, "grad_norm": 2.9753212928771973, "learning_rate": 1.1032448377581122e-05, "loss": 0.0661, "step": 374 }, { "epoch": 0.05533013648100332, "grad_norm": 8.134544372558594, "learning_rate": 1.1061946902654867e-05, "loss": 0.0905, "step": 375 }, { "epoch": 0.05547768351161933, "grad_norm": 8.020297050476074, "learning_rate": 1.1091445427728616e-05, "loss": 0.1906, "step": 376 }, { "epoch": 0.05562523054223534, "grad_norm": 4.0883612632751465, "learning_rate": 1.1120943952802361e-05, "loss": 0.0776, "step": 377 }, { "epoch": 0.055772777572851345, "grad_norm": 5.765987873077393, "learning_rate": 1.1150442477876106e-05, "loss": 0.0504, "step": 378 }, { "epoch": 0.05592032460346735, "grad_norm": 3.230111598968506, "learning_rate": 1.1179941002949855e-05, "loss": 0.109, "step": 379 }, { "epoch": 0.05606787163408337, "grad_norm": 4.81991720199585, "learning_rate": 1.12094395280236e-05, "loss": 0.131, "step": 380 }, { "epoch": 0.05606787163408337, "eval_accuracy": 0.9638205499276411, "eval_f1": 0.9373433583959899, "eval_loss": 0.11704747378826141, "eval_precision": 0.9689119170984456, "eval_recall": 0.9077669902912622, "eval_runtime": 49.4142, "eval_samples_per_second": 5.889, "eval_steps_per_second": 0.202, "step": 380 }, { "epoch": 0.056215418664699375, "grad_norm": 6.845113277435303, "learning_rate": 1.1238938053097345e-05, "loss": 0.1723, "step": 381 }, { "epoch": 0.05636296569531538, "grad_norm": 3.922692060470581, "learning_rate": 1.1268436578171092e-05, "loss": 0.1095, "step": 382 }, { "epoch": 0.05651051272593139, "grad_norm": 2.0658581256866455, "learning_rate": 1.1297935103244839e-05, "loss": 0.084, "step": 383 }, { "epoch": 0.0566580597565474, "grad_norm": 2.986639976501465, "learning_rate": 1.1327433628318584e-05, "loss": 0.0771, "step": 384 }, { "epoch": 0.056805606787163405, "grad_norm": 4.78214693069458, "learning_rate": 1.1356932153392331e-05, "loss": 0.1344, "step": 385 }, { "epoch": 0.05695315381777942, "grad_norm": 1.850988745689392, "learning_rate": 1.1386430678466078e-05, "loss": 0.0803, "step": 386 }, { "epoch": 0.05710070084839543, "grad_norm": 3.717816114425659, "learning_rate": 1.1415929203539825e-05, "loss": 0.0611, "step": 387 }, { "epoch": 0.057248247879011435, "grad_norm": 2.820502758026123, "learning_rate": 1.144542772861357e-05, "loss": 0.0773, "step": 388 }, { "epoch": 0.05739579490962744, "grad_norm": 5.5672478675842285, "learning_rate": 1.1474926253687317e-05, "loss": 0.1118, "step": 389 }, { "epoch": 0.05754334194024345, "grad_norm": 2.3514554500579834, "learning_rate": 1.1504424778761064e-05, "loss": 0.0665, "step": 390 }, { "epoch": 0.05769088897085946, "grad_norm": 3.319936990737915, "learning_rate": 1.1533923303834809e-05, "loss": 0.0881, "step": 391 }, { "epoch": 0.05783843600147547, "grad_norm": 5.209056854248047, "learning_rate": 1.1563421828908557e-05, "loss": 0.075, "step": 392 }, { "epoch": 0.05798598303209148, "grad_norm": 4.986311912536621, "learning_rate": 1.1592920353982302e-05, "loss": 0.121, "step": 393 }, { "epoch": 0.05813353006270749, "grad_norm": 4.4049506187438965, "learning_rate": 1.1622418879056048e-05, "loss": 0.1284, "step": 394 }, { "epoch": 0.058281077093323495, "grad_norm": 5.336215496063232, "learning_rate": 1.1651917404129796e-05, "loss": 0.0497, "step": 395 }, { "epoch": 0.0584286241239395, "grad_norm": 4.9060378074646, "learning_rate": 1.1681415929203541e-05, "loss": 0.1373, "step": 396 }, { "epoch": 0.05857617115455552, "grad_norm": 3.8828747272491455, "learning_rate": 1.1710914454277286e-05, "loss": 0.0996, "step": 397 }, { "epoch": 0.058723718185171525, "grad_norm": 7.409179210662842, "learning_rate": 1.1740412979351032e-05, "loss": 0.125, "step": 398 }, { "epoch": 0.05887126521578753, "grad_norm": 4.420334815979004, "learning_rate": 1.176991150442478e-05, "loss": 0.0965, "step": 399 }, { "epoch": 0.05901881224640354, "grad_norm": 4.1392436027526855, "learning_rate": 1.1799410029498525e-05, "loss": 0.1276, "step": 400 }, { "epoch": 0.05901881224640354, "eval_accuracy": 0.9623733719247467, "eval_f1": 0.9359605911330049, "eval_loss": 0.11985436826944351, "eval_precision": 0.95, "eval_recall": 0.9223300970873787, "eval_runtime": 49.1559, "eval_samples_per_second": 5.92, "eval_steps_per_second": 0.203, "step": 400 }, { "epoch": 0.05916635927701955, "grad_norm": 3.6427252292633057, "learning_rate": 1.1828908554572272e-05, "loss": 0.1603, "step": 401 }, { "epoch": 0.059313906307635555, "grad_norm": 3.2943665981292725, "learning_rate": 1.1858407079646019e-05, "loss": 0.103, "step": 402 }, { "epoch": 0.05946145333825157, "grad_norm": 7.148464679718018, "learning_rate": 1.1887905604719764e-05, "loss": 0.1466, "step": 403 }, { "epoch": 0.05960900036886758, "grad_norm": 4.643838405609131, "learning_rate": 1.1917404129793511e-05, "loss": 0.1211, "step": 404 }, { "epoch": 0.059756547399483585, "grad_norm": 2.579813003540039, "learning_rate": 1.1946902654867258e-05, "loss": 0.0922, "step": 405 }, { "epoch": 0.05990409443009959, "grad_norm": 4.275233745574951, "learning_rate": 1.1976401179941005e-05, "loss": 0.0942, "step": 406 }, { "epoch": 0.0600516414607156, "grad_norm": 5.5644121170043945, "learning_rate": 1.200589970501475e-05, "loss": 0.1498, "step": 407 }, { "epoch": 0.060199188491331615, "grad_norm": 3.4050004482269287, "learning_rate": 1.2035398230088497e-05, "loss": 0.0795, "step": 408 }, { "epoch": 0.06034673552194762, "grad_norm": 3.207798719406128, "learning_rate": 1.2064896755162244e-05, "loss": 0.0804, "step": 409 }, { "epoch": 0.06049428255256363, "grad_norm": 3.1236562728881836, "learning_rate": 1.2094395280235989e-05, "loss": 0.0844, "step": 410 }, { "epoch": 0.06064182958317964, "grad_norm": 2.5428359508514404, "learning_rate": 1.2123893805309736e-05, "loss": 0.0792, "step": 411 }, { "epoch": 0.060789376613795645, "grad_norm": 1.2924425601959229, "learning_rate": 1.2153392330383483e-05, "loss": 0.0623, "step": 412 }, { "epoch": 0.06093692364441165, "grad_norm": 3.5470991134643555, "learning_rate": 1.2182890855457228e-05, "loss": 0.1177, "step": 413 }, { "epoch": 0.06108447067502767, "grad_norm": 1.9067574739456177, "learning_rate": 1.2212389380530973e-05, "loss": 0.0832, "step": 414 }, { "epoch": 0.061232017705643675, "grad_norm": 6.316958427429199, "learning_rate": 1.2241887905604722e-05, "loss": 0.1274, "step": 415 }, { "epoch": 0.06137956473625968, "grad_norm": 3.9669036865234375, "learning_rate": 1.2271386430678467e-05, "loss": 0.1066, "step": 416 }, { "epoch": 0.06152711176687569, "grad_norm": 3.681623935699463, "learning_rate": 1.2300884955752212e-05, "loss": 0.1041, "step": 417 }, { "epoch": 0.0616746587974917, "grad_norm": 4.660053253173828, "learning_rate": 1.233038348082596e-05, "loss": 0.1159, "step": 418 }, { "epoch": 0.06182220582810771, "grad_norm": 3.8386647701263428, "learning_rate": 1.2359882005899706e-05, "loss": 0.1269, "step": 419 }, { "epoch": 0.06196975285872372, "grad_norm": 2.965636730194092, "learning_rate": 1.2389380530973452e-05, "loss": 0.0377, "step": 420 }, { "epoch": 0.06196975285872372, "eval_accuracy": 0.9609261939218524, "eval_f1": 0.9336609336609336, "eval_loss": 0.1210731714963913, "eval_precision": 0.945273631840796, "eval_recall": 0.9223300970873787, "eval_runtime": 50.057, "eval_samples_per_second": 5.813, "eval_steps_per_second": 0.2, "step": 420 }, { "epoch": 0.06211729988933973, "grad_norm": 8.297836303710938, "learning_rate": 1.24188790560472e-05, "loss": 0.0876, "step": 421 }, { "epoch": 0.062264846919955735, "grad_norm": 9.802227020263672, "learning_rate": 1.2448377581120944e-05, "loss": 0.1187, "step": 422 }, { "epoch": 0.06241239395057174, "grad_norm": 2.1138927936553955, "learning_rate": 1.2477876106194691e-05, "loss": 0.0471, "step": 423 }, { "epoch": 0.06255994098118775, "grad_norm": 3.281634569168091, "learning_rate": 1.2507374631268438e-05, "loss": 0.0735, "step": 424 }, { "epoch": 0.06270748801180376, "grad_norm": 6.793506622314453, "learning_rate": 1.2536873156342183e-05, "loss": 0.1125, "step": 425 }, { "epoch": 0.06285503504241977, "grad_norm": 7.528573036193848, "learning_rate": 1.256637168141593e-05, "loss": 0.1149, "step": 426 }, { "epoch": 0.06300258207303577, "grad_norm": 3.2881975173950195, "learning_rate": 1.2595870206489677e-05, "loss": 0.0944, "step": 427 }, { "epoch": 0.0631501291036518, "grad_norm": 2.839482307434082, "learning_rate": 1.2625368731563424e-05, "loss": 0.0622, "step": 428 }, { "epoch": 0.0632976761342678, "grad_norm": 3.3470849990844727, "learning_rate": 1.2654867256637169e-05, "loss": 0.1367, "step": 429 }, { "epoch": 0.06344522316488381, "grad_norm": 1.8543541431427002, "learning_rate": 1.2684365781710914e-05, "loss": 0.0185, "step": 430 }, { "epoch": 0.06359277019549982, "grad_norm": 3.678795337677002, "learning_rate": 1.2713864306784663e-05, "loss": 0.0705, "step": 431 }, { "epoch": 0.06374031722611583, "grad_norm": 4.085397243499756, "learning_rate": 1.2743362831858408e-05, "loss": 0.086, "step": 432 }, { "epoch": 0.06388786425673183, "grad_norm": 4.523938179016113, "learning_rate": 1.2772861356932153e-05, "loss": 0.0967, "step": 433 }, { "epoch": 0.06403541128734784, "grad_norm": 4.135919094085693, "learning_rate": 1.2802359882005902e-05, "loss": 0.0524, "step": 434 }, { "epoch": 0.06418295831796385, "grad_norm": 6.929118633270264, "learning_rate": 1.2831858407079647e-05, "loss": 0.0596, "step": 435 }, { "epoch": 0.06433050534857986, "grad_norm": 3.2810797691345215, "learning_rate": 1.2861356932153392e-05, "loss": 0.0316, "step": 436 }, { "epoch": 0.06447805237919586, "grad_norm": 7.82334566116333, "learning_rate": 1.289085545722714e-05, "loss": 0.1666, "step": 437 }, { "epoch": 0.06462559940981187, "grad_norm": 4.659994602203369, "learning_rate": 1.2920353982300886e-05, "loss": 0.0993, "step": 438 }, { "epoch": 0.06477314644042789, "grad_norm": 5.322907447814941, "learning_rate": 1.2949852507374631e-05, "loss": 0.1437, "step": 439 }, { "epoch": 0.0649206934710439, "grad_norm": 5.643435955047607, "learning_rate": 1.297935103244838e-05, "loss": 0.1356, "step": 440 }, { "epoch": 0.0649206934710439, "eval_accuracy": 0.9609261939218524, "eval_f1": 0.9333333333333333, "eval_loss": 0.1265305131673813, "eval_precision": 0.949748743718593, "eval_recall": 0.9174757281553398, "eval_runtime": 50.4533, "eval_samples_per_second": 5.768, "eval_steps_per_second": 0.198, "step": 440 }, { "epoch": 0.06506824050165991, "grad_norm": 5.105215072631836, "learning_rate": 1.3008849557522125e-05, "loss": 0.1254, "step": 441 }, { "epoch": 0.06521578753227592, "grad_norm": 4.925067901611328, "learning_rate": 1.3038348082595871e-05, "loss": 0.1325, "step": 442 }, { "epoch": 0.06536333456289192, "grad_norm": 5.5667724609375, "learning_rate": 1.3067846607669618e-05, "loss": 0.1009, "step": 443 }, { "epoch": 0.06551088159350793, "grad_norm": 1.5036132335662842, "learning_rate": 1.3097345132743363e-05, "loss": 0.0375, "step": 444 }, { "epoch": 0.06565842862412394, "grad_norm": 9.027290344238281, "learning_rate": 1.312684365781711e-05, "loss": 0.0811, "step": 445 }, { "epoch": 0.06580597565473995, "grad_norm": 9.74825382232666, "learning_rate": 1.3156342182890856e-05, "loss": 0.1306, "step": 446 }, { "epoch": 0.06595352268535595, "grad_norm": 4.21965217590332, "learning_rate": 1.3185840707964604e-05, "loss": 0.1089, "step": 447 }, { "epoch": 0.06610106971597196, "grad_norm": 3.56333065032959, "learning_rate": 1.321533923303835e-05, "loss": 0.0679, "step": 448 }, { "epoch": 0.06624861674658797, "grad_norm": 5.2955451011657715, "learning_rate": 1.3244837758112094e-05, "loss": 0.0603, "step": 449 }, { "epoch": 0.06639616377720399, "grad_norm": 5.984939098358154, "learning_rate": 1.3274336283185843e-05, "loss": 0.1095, "step": 450 }, { "epoch": 0.06654371080782, "grad_norm": 4.408089637756348, "learning_rate": 1.3303834808259588e-05, "loss": 0.0868, "step": 451 }, { "epoch": 0.066691257838436, "grad_norm": 2.5826268196105957, "learning_rate": 1.3333333333333333e-05, "loss": 0.0836, "step": 452 }, { "epoch": 0.06683880486905201, "grad_norm": 3.6972694396972656, "learning_rate": 1.3362831858407082e-05, "loss": 0.0823, "step": 453 }, { "epoch": 0.06698635189966802, "grad_norm": 6.000738143920898, "learning_rate": 1.3392330383480827e-05, "loss": 0.096, "step": 454 }, { "epoch": 0.06713389893028403, "grad_norm": 2.657524585723877, "learning_rate": 1.3421828908554572e-05, "loss": 0.0922, "step": 455 }, { "epoch": 0.06728144596090004, "grad_norm": 2.0444602966308594, "learning_rate": 1.345132743362832e-05, "loss": 0.0321, "step": 456 }, { "epoch": 0.06742899299151604, "grad_norm": 7.014885425567627, "learning_rate": 1.3480825958702066e-05, "loss": 0.1841, "step": 457 }, { "epoch": 0.06757654002213205, "grad_norm": 6.464117527008057, "learning_rate": 1.3510324483775811e-05, "loss": 0.1565, "step": 458 }, { "epoch": 0.06772408705274806, "grad_norm": 4.069104194641113, "learning_rate": 1.353982300884956e-05, "loss": 0.0777, "step": 459 }, { "epoch": 0.06787163408336407, "grad_norm": 4.14389181137085, "learning_rate": 1.3569321533923305e-05, "loss": 0.1409, "step": 460 }, { "epoch": 0.06787163408336407, "eval_accuracy": 0.9638205499276411, "eval_f1": 0.9373433583959899, "eval_loss": 0.1059475913643837, "eval_precision": 0.9689119170984456, "eval_recall": 0.9077669902912622, "eval_runtime": 50.2358, "eval_samples_per_second": 5.793, "eval_steps_per_second": 0.199, "step": 460 }, { "epoch": 0.06801918111398009, "grad_norm": 3.105227470397949, "learning_rate": 1.3598820058997052e-05, "loss": 0.0584, "step": 461 }, { "epoch": 0.0681667281445961, "grad_norm": 4.1766204833984375, "learning_rate": 1.3628318584070797e-05, "loss": 0.1195, "step": 462 }, { "epoch": 0.0683142751752121, "grad_norm": 2.792880058288574, "learning_rate": 1.3657817109144544e-05, "loss": 0.0501, "step": 463 }, { "epoch": 0.06846182220582811, "grad_norm": 2.900728225708008, "learning_rate": 1.368731563421829e-05, "loss": 0.0681, "step": 464 }, { "epoch": 0.06860936923644412, "grad_norm": 5.650767803192139, "learning_rate": 1.3716814159292036e-05, "loss": 0.0974, "step": 465 }, { "epoch": 0.06875691626706013, "grad_norm": 2.3405301570892334, "learning_rate": 1.3746312684365784e-05, "loss": 0.1292, "step": 466 }, { "epoch": 0.06890446329767613, "grad_norm": 4.0819830894470215, "learning_rate": 1.377581120943953e-05, "loss": 0.0978, "step": 467 }, { "epoch": 0.06905201032829214, "grad_norm": 2.0271244049072266, "learning_rate": 1.3805309734513275e-05, "loss": 0.0916, "step": 468 }, { "epoch": 0.06919955735890815, "grad_norm": 2.627279281616211, "learning_rate": 1.3834808259587023e-05, "loss": 0.1063, "step": 469 }, { "epoch": 0.06934710438952416, "grad_norm": 2.0692949295043945, "learning_rate": 1.3864306784660768e-05, "loss": 0.0724, "step": 470 }, { "epoch": 0.06949465142014016, "grad_norm": 2.728027582168579, "learning_rate": 1.3893805309734513e-05, "loss": 0.0825, "step": 471 }, { "epoch": 0.06964219845075618, "grad_norm": 3.2455193996429443, "learning_rate": 1.3923303834808262e-05, "loss": 0.0992, "step": 472 }, { "epoch": 0.06978974548137219, "grad_norm": 8.183334350585938, "learning_rate": 1.3952802359882007e-05, "loss": 0.1477, "step": 473 }, { "epoch": 0.0699372925119882, "grad_norm": 1.508196234703064, "learning_rate": 1.3982300884955752e-05, "loss": 0.0627, "step": 474 }, { "epoch": 0.07008483954260421, "grad_norm": 5.840567588806152, "learning_rate": 1.4011799410029501e-05, "loss": 0.1271, "step": 475 }, { "epoch": 0.07023238657322022, "grad_norm": 2.8929684162139893, "learning_rate": 1.4041297935103246e-05, "loss": 0.0366, "step": 476 }, { "epoch": 0.07037993360383622, "grad_norm": 2.839616298675537, "learning_rate": 1.4070796460176991e-05, "loss": 0.0349, "step": 477 }, { "epoch": 0.07052748063445223, "grad_norm": 7.35011100769043, "learning_rate": 1.4100294985250738e-05, "loss": 0.0576, "step": 478 }, { "epoch": 0.07067502766506824, "grad_norm": 1.8464149236679077, "learning_rate": 1.4129793510324485e-05, "loss": 0.0364, "step": 479 }, { "epoch": 0.07082257469568425, "grad_norm": 5.065067768096924, "learning_rate": 1.4159292035398232e-05, "loss": 0.0815, "step": 480 }, { "epoch": 0.07082257469568425, "eval_accuracy": 0.9623733719247467, "eval_f1": 0.9362745098039216, "eval_loss": 0.12116201967000961, "eval_precision": 0.9455445544554455, "eval_recall": 0.9271844660194175, "eval_runtime": 52.8954, "eval_samples_per_second": 5.501, "eval_steps_per_second": 0.189, "step": 480 }, { "epoch": 0.07097012172630025, "grad_norm": 3.8353214263916016, "learning_rate": 1.4188790560471977e-05, "loss": 0.0765, "step": 481 }, { "epoch": 0.07111766875691626, "grad_norm": 3.316110372543335, "learning_rate": 1.4218289085545724e-05, "loss": 0.0809, "step": 482 }, { "epoch": 0.07126521578753228, "grad_norm": 3.284728765487671, "learning_rate": 1.424778761061947e-05, "loss": 0.0429, "step": 483 }, { "epoch": 0.07141276281814829, "grad_norm": 12.038677215576172, "learning_rate": 1.4277286135693216e-05, "loss": 0.0711, "step": 484 }, { "epoch": 0.0715603098487643, "grad_norm": 3.558060646057129, "learning_rate": 1.4306784660766964e-05, "loss": 0.095, "step": 485 }, { "epoch": 0.0717078568793803, "grad_norm": 1.764790415763855, "learning_rate": 1.433628318584071e-05, "loss": 0.0343, "step": 486 }, { "epoch": 0.07185540390999631, "grad_norm": 3.7629714012145996, "learning_rate": 1.4365781710914455e-05, "loss": 0.0857, "step": 487 }, { "epoch": 0.07200295094061232, "grad_norm": 4.762734889984131, "learning_rate": 1.4395280235988203e-05, "loss": 0.0628, "step": 488 }, { "epoch": 0.07215049797122833, "grad_norm": 2.31052303314209, "learning_rate": 1.4424778761061948e-05, "loss": 0.0427, "step": 489 }, { "epoch": 0.07229804500184434, "grad_norm": 5.971585750579834, "learning_rate": 1.4454277286135694e-05, "loss": 0.1547, "step": 490 }, { "epoch": 0.07244559203246034, "grad_norm": 5.803487777709961, "learning_rate": 1.4483775811209442e-05, "loss": 0.1235, "step": 491 }, { "epoch": 0.07259313906307635, "grad_norm": 3.256314277648926, "learning_rate": 1.4513274336283187e-05, "loss": 0.1097, "step": 492 }, { "epoch": 0.07274068609369236, "grad_norm": 3.708127021789551, "learning_rate": 1.4542772861356933e-05, "loss": 0.0716, "step": 493 }, { "epoch": 0.07288823312430838, "grad_norm": 6.63476037979126, "learning_rate": 1.457227138643068e-05, "loss": 0.1058, "step": 494 }, { "epoch": 0.07303578015492439, "grad_norm": 5.938539505004883, "learning_rate": 1.4601769911504426e-05, "loss": 0.1514, "step": 495 }, { "epoch": 0.0731833271855404, "grad_norm": 2.7483792304992676, "learning_rate": 1.4631268436578171e-05, "loss": 0.0693, "step": 496 }, { "epoch": 0.0733308742161564, "grad_norm": 6.032779693603516, "learning_rate": 1.4660766961651918e-05, "loss": 0.1217, "step": 497 }, { "epoch": 0.07347842124677241, "grad_norm": 4.033049583435059, "learning_rate": 1.4690265486725665e-05, "loss": 0.0597, "step": 498 }, { "epoch": 0.07362596827738842, "grad_norm": 9.760767936706543, "learning_rate": 1.4719764011799412e-05, "loss": 0.1895, "step": 499 }, { "epoch": 0.07377351530800443, "grad_norm": 1.2081996202468872, "learning_rate": 1.4749262536873157e-05, "loss": 0.0143, "step": 500 }, { "epoch": 0.07377351530800443, "eval_accuracy": 0.9638205499276411, "eval_f1": 0.9376558603491272, "eval_loss": 0.10416842252016068, "eval_precision": 0.9641025641025641, "eval_recall": 0.912621359223301, "eval_runtime": 48.6811, "eval_samples_per_second": 5.978, "eval_steps_per_second": 0.205, "step": 500 }, { "epoch": 0.07392106233862043, "grad_norm": 3.1498217582702637, "learning_rate": 1.4778761061946904e-05, "loss": 0.0972, "step": 501 }, { "epoch": 0.07406860936923644, "grad_norm": 4.898123741149902, "learning_rate": 1.4808259587020651e-05, "loss": 0.1232, "step": 502 }, { "epoch": 0.07421615639985245, "grad_norm": 2.9039347171783447, "learning_rate": 1.4837758112094396e-05, "loss": 0.0848, "step": 503 }, { "epoch": 0.07436370343046846, "grad_norm": 4.021357536315918, "learning_rate": 1.4867256637168143e-05, "loss": 0.0757, "step": 504 }, { "epoch": 0.07451125046108448, "grad_norm": 3.845618724822998, "learning_rate": 1.489675516224189e-05, "loss": 0.0916, "step": 505 }, { "epoch": 0.07465879749170048, "grad_norm": 3.994896173477173, "learning_rate": 1.4926253687315635e-05, "loss": 0.1367, "step": 506 }, { "epoch": 0.07480634452231649, "grad_norm": 8.842558860778809, "learning_rate": 1.4955752212389383e-05, "loss": 0.1086, "step": 507 }, { "epoch": 0.0749538915529325, "grad_norm": 3.748542308807373, "learning_rate": 1.4985250737463129e-05, "loss": 0.1582, "step": 508 }, { "epoch": 0.07510143858354851, "grad_norm": 3.2000162601470947, "learning_rate": 1.5014749262536874e-05, "loss": 0.0644, "step": 509 }, { "epoch": 0.07524898561416452, "grad_norm": 3.0972352027893066, "learning_rate": 1.5044247787610619e-05, "loss": 0.082, "step": 510 }, { "epoch": 0.07539653264478052, "grad_norm": 4.598974227905273, "learning_rate": 1.5073746312684368e-05, "loss": 0.061, "step": 511 }, { "epoch": 0.07554407967539653, "grad_norm": 1.9152635335922241, "learning_rate": 1.5103244837758113e-05, "loss": 0.0521, "step": 512 }, { "epoch": 0.07569162670601254, "grad_norm": 4.356319427490234, "learning_rate": 1.513274336283186e-05, "loss": 0.1535, "step": 513 }, { "epoch": 0.07583917373662855, "grad_norm": 2.212160348892212, "learning_rate": 1.5162241887905606e-05, "loss": 0.1243, "step": 514 }, { "epoch": 0.07598672076724455, "grad_norm": 4.025852680206299, "learning_rate": 1.5191740412979352e-05, "loss": 0.1415, "step": 515 }, { "epoch": 0.07613426779786057, "grad_norm": 4.3787102699279785, "learning_rate": 1.5221238938053098e-05, "loss": 0.0935, "step": 516 }, { "epoch": 0.07628181482847658, "grad_norm": 5.65179967880249, "learning_rate": 1.5250737463126845e-05, "loss": 0.0878, "step": 517 }, { "epoch": 0.07642936185909259, "grad_norm": 2.5024056434631348, "learning_rate": 1.5280235988200592e-05, "loss": 0.0564, "step": 518 }, { "epoch": 0.0765769088897086, "grad_norm": 2.851257562637329, "learning_rate": 1.5309734513274336e-05, "loss": 0.1219, "step": 519 }, { "epoch": 0.0767244559203246, "grad_norm": 4.302542686462402, "learning_rate": 1.5339233038348086e-05, "loss": 0.0877, "step": 520 }, { "epoch": 0.0767244559203246, "eval_accuracy": 0.9667149059334298, "eval_f1": 0.9429280397022333, "eval_loss": 0.09762920439243317, "eval_precision": 0.9644670050761421, "eval_recall": 0.9223300970873787, "eval_runtime": 49.0105, "eval_samples_per_second": 5.938, "eval_steps_per_second": 0.204, "step": 520 }, { "epoch": 0.07687200295094061, "grad_norm": 5.179481506347656, "learning_rate": 1.536873156342183e-05, "loss": 0.1335, "step": 521 }, { "epoch": 0.07701954998155662, "grad_norm": 5.739594459533691, "learning_rate": 1.5398230088495576e-05, "loss": 0.092, "step": 522 }, { "epoch": 0.07716709701217263, "grad_norm": 3.930610179901123, "learning_rate": 1.5427728613569323e-05, "loss": 0.1161, "step": 523 }, { "epoch": 0.07731464404278864, "grad_norm": 3.050614356994629, "learning_rate": 1.545722713864307e-05, "loss": 0.109, "step": 524 }, { "epoch": 0.07746219107340464, "grad_norm": 3.716261863708496, "learning_rate": 1.5486725663716813e-05, "loss": 0.113, "step": 525 }, { "epoch": 0.07760973810402065, "grad_norm": 3.4055542945861816, "learning_rate": 1.551622418879056e-05, "loss": 0.1004, "step": 526 }, { "epoch": 0.07775728513463667, "grad_norm": 8.779967308044434, "learning_rate": 1.5545722713864307e-05, "loss": 0.1352, "step": 527 }, { "epoch": 0.07790483216525268, "grad_norm": 5.3793044090271, "learning_rate": 1.5575221238938054e-05, "loss": 0.0578, "step": 528 }, { "epoch": 0.07805237919586869, "grad_norm": 5.3324480056762695, "learning_rate": 1.56047197640118e-05, "loss": 0.1084, "step": 529 }, { "epoch": 0.0781999262264847, "grad_norm": 2.5956153869628906, "learning_rate": 1.5634218289085548e-05, "loss": 0.0641, "step": 530 }, { "epoch": 0.0783474732571007, "grad_norm": 2.589036703109741, "learning_rate": 1.5663716814159295e-05, "loss": 0.0385, "step": 531 }, { "epoch": 0.07849502028771671, "grad_norm": 2.88999605178833, "learning_rate": 1.5693215339233038e-05, "loss": 0.0604, "step": 532 }, { "epoch": 0.07864256731833272, "grad_norm": 2.993828058242798, "learning_rate": 1.5722713864306788e-05, "loss": 0.0436, "step": 533 }, { "epoch": 0.07879011434894873, "grad_norm": 3.797997236251831, "learning_rate": 1.5752212389380532e-05, "loss": 0.1089, "step": 534 }, { "epoch": 0.07893766137956473, "grad_norm": 3.172595977783203, "learning_rate": 1.578171091445428e-05, "loss": 0.1064, "step": 535 }, { "epoch": 0.07908520841018074, "grad_norm": 1.970426321029663, "learning_rate": 1.5811209439528025e-05, "loss": 0.0919, "step": 536 }, { "epoch": 0.07923275544079675, "grad_norm": 6.031107425689697, "learning_rate": 1.5840707964601772e-05, "loss": 0.1091, "step": 537 }, { "epoch": 0.07938030247141277, "grad_norm": 4.010724067687988, "learning_rate": 1.5870206489675516e-05, "loss": 0.1337, "step": 538 }, { "epoch": 0.07952784950202878, "grad_norm": 3.708670139312744, "learning_rate": 1.5899705014749266e-05, "loss": 0.081, "step": 539 }, { "epoch": 0.07967539653264478, "grad_norm": 9.564098358154297, "learning_rate": 1.592920353982301e-05, "loss": 0.1308, "step": 540 }, { "epoch": 0.07967539653264478, "eval_accuracy": 0.9623733719247467, "eval_f1": 0.9356435643564357, "eval_loss": 0.10419736802577972, "eval_precision": 0.9545454545454546, "eval_recall": 0.9174757281553398, "eval_runtime": 48.5629, "eval_samples_per_second": 5.992, "eval_steps_per_second": 0.206, "step": 540 }, { "epoch": 0.07982294356326079, "grad_norm": 3.2805235385894775, "learning_rate": 1.5958702064896756e-05, "loss": 0.0472, "step": 541 }, { "epoch": 0.0799704905938768, "grad_norm": 1.583587884902954, "learning_rate": 1.5988200589970503e-05, "loss": 0.0431, "step": 542 }, { "epoch": 0.08011803762449281, "grad_norm": 3.0135581493377686, "learning_rate": 1.601769911504425e-05, "loss": 0.0319, "step": 543 }, { "epoch": 0.08026558465510882, "grad_norm": 5.587074279785156, "learning_rate": 1.6047197640117994e-05, "loss": 0.1806, "step": 544 }, { "epoch": 0.08041313168572482, "grad_norm": 6.172431468963623, "learning_rate": 1.607669616519174e-05, "loss": 0.0532, "step": 545 }, { "epoch": 0.08056067871634083, "grad_norm": 3.7355899810791016, "learning_rate": 1.6106194690265487e-05, "loss": 0.0921, "step": 546 }, { "epoch": 0.08070822574695684, "grad_norm": 11.108098983764648, "learning_rate": 1.6135693215339234e-05, "loss": 0.0675, "step": 547 }, { "epoch": 0.08085577277757285, "grad_norm": 4.097662448883057, "learning_rate": 1.616519174041298e-05, "loss": 0.0839, "step": 548 }, { "epoch": 0.08100331980818885, "grad_norm": 6.03087854385376, "learning_rate": 1.6194690265486728e-05, "loss": 0.0483, "step": 549 }, { "epoch": 0.08115086683880487, "grad_norm": 4.9661712646484375, "learning_rate": 1.6224188790560475e-05, "loss": 0.1373, "step": 550 }, { "epoch": 0.08129841386942088, "grad_norm": 3.4052000045776367, "learning_rate": 1.6253687315634218e-05, "loss": 0.114, "step": 551 }, { "epoch": 0.08144596090003689, "grad_norm": 2.755847692489624, "learning_rate": 1.628318584070797e-05, "loss": 0.0881, "step": 552 }, { "epoch": 0.0815935079306529, "grad_norm": 3.9869935512542725, "learning_rate": 1.6312684365781712e-05, "loss": 0.1105, "step": 553 }, { "epoch": 0.0817410549612689, "grad_norm": 3.345513343811035, "learning_rate": 1.634218289085546e-05, "loss": 0.08, "step": 554 }, { "epoch": 0.08188860199188491, "grad_norm": 7.086312294006348, "learning_rate": 1.6371681415929206e-05, "loss": 0.1277, "step": 555 }, { "epoch": 0.08203614902250092, "grad_norm": 8.25257396697998, "learning_rate": 1.6401179941002953e-05, "loss": 0.1081, "step": 556 }, { "epoch": 0.08218369605311693, "grad_norm": 2.2816054821014404, "learning_rate": 1.6430678466076696e-05, "loss": 0.0928, "step": 557 }, { "epoch": 0.08233124308373294, "grad_norm": 4.287100315093994, "learning_rate": 1.6460176991150443e-05, "loss": 0.0853, "step": 558 }, { "epoch": 0.08247879011434894, "grad_norm": 1.3134773969650269, "learning_rate": 1.648967551622419e-05, "loss": 0.0659, "step": 559 }, { "epoch": 0.08262633714496495, "grad_norm": 1.967331051826477, "learning_rate": 1.6519174041297937e-05, "loss": 0.0673, "step": 560 }, { "epoch": 0.08262633714496495, "eval_accuracy": 0.9652677279305355, "eval_f1": 0.9396984924623115, "eval_loss": 0.10207635164260864, "eval_precision": 0.9739583333333334, "eval_recall": 0.9077669902912622, "eval_runtime": 48.7336, "eval_samples_per_second": 5.971, "eval_steps_per_second": 0.205, "step": 560 }, { "epoch": 0.08277388417558097, "grad_norm": 2.9538745880126953, "learning_rate": 1.6548672566371683e-05, "loss": 0.1231, "step": 561 }, { "epoch": 0.08292143120619698, "grad_norm": 2.7219223976135254, "learning_rate": 1.657817109144543e-05, "loss": 0.0658, "step": 562 }, { "epoch": 0.08306897823681299, "grad_norm": 2.7699062824249268, "learning_rate": 1.6607669616519174e-05, "loss": 0.1197, "step": 563 }, { "epoch": 0.083216525267429, "grad_norm": 2.5261945724487305, "learning_rate": 1.663716814159292e-05, "loss": 0.096, "step": 564 }, { "epoch": 0.083364072298045, "grad_norm": 2.6237528324127197, "learning_rate": 1.6666666666666667e-05, "loss": 0.088, "step": 565 }, { "epoch": 0.08351161932866101, "grad_norm": 4.606682300567627, "learning_rate": 1.6696165191740414e-05, "loss": 0.1062, "step": 566 }, { "epoch": 0.08365916635927702, "grad_norm": 2.354897975921631, "learning_rate": 1.672566371681416e-05, "loss": 0.0471, "step": 567 }, { "epoch": 0.08380671338989303, "grad_norm": 2.7975962162017822, "learning_rate": 1.6755162241887908e-05, "loss": 0.0903, "step": 568 }, { "epoch": 0.08395426042050903, "grad_norm": 2.96342396736145, "learning_rate": 1.6784660766961655e-05, "loss": 0.1415, "step": 569 }, { "epoch": 0.08410180745112504, "grad_norm": 4.285039901733398, "learning_rate": 1.68141592920354e-05, "loss": 0.0978, "step": 570 }, { "epoch": 0.08424935448174105, "grad_norm": 7.682709693908691, "learning_rate": 1.684365781710915e-05, "loss": 0.1367, "step": 571 }, { "epoch": 0.08439690151235707, "grad_norm": 2.368025779724121, "learning_rate": 1.6873156342182892e-05, "loss": 0.0497, "step": 572 }, { "epoch": 0.08454444854297308, "grad_norm": 2.1186583042144775, "learning_rate": 1.690265486725664e-05, "loss": 0.0733, "step": 573 }, { "epoch": 0.08469199557358909, "grad_norm": 3.0512938499450684, "learning_rate": 1.6932153392330382e-05, "loss": 0.0446, "step": 574 }, { "epoch": 0.08483954260420509, "grad_norm": 1.2943977117538452, "learning_rate": 1.6961651917404133e-05, "loss": 0.0701, "step": 575 }, { "epoch": 0.0849870896348211, "grad_norm": 3.4783735275268555, "learning_rate": 1.6991150442477876e-05, "loss": 0.0758, "step": 576 }, { "epoch": 0.08513463666543711, "grad_norm": 2.3816847801208496, "learning_rate": 1.7020648967551623e-05, "loss": 0.0785, "step": 577 }, { "epoch": 0.08528218369605312, "grad_norm": 4.383396148681641, "learning_rate": 1.705014749262537e-05, "loss": 0.0887, "step": 578 }, { "epoch": 0.08542973072666912, "grad_norm": 4.086099624633789, "learning_rate": 1.7079646017699117e-05, "loss": 0.0802, "step": 579 }, { "epoch": 0.08557727775728513, "grad_norm": 2.8814361095428467, "learning_rate": 1.7109144542772864e-05, "loss": 0.0763, "step": 580 }, { "epoch": 0.08557727775728513, "eval_accuracy": 0.9681620839363242, "eval_f1": 0.9444444444444444, "eval_loss": 0.09670104086399078, "eval_precision": 0.9842105263157894, "eval_recall": 0.9077669902912622, "eval_runtime": 50.7176, "eval_samples_per_second": 5.738, "eval_steps_per_second": 0.197, "step": 580 }, { "epoch": 0.08572482478790114, "grad_norm": 5.520656585693359, "learning_rate": 1.713864306784661e-05, "loss": 0.177, "step": 581 }, { "epoch": 0.08587237181851715, "grad_norm": 2.481374740600586, "learning_rate": 1.7168141592920354e-05, "loss": 0.1203, "step": 582 }, { "epoch": 0.08601991884913317, "grad_norm": 2.3149218559265137, "learning_rate": 1.71976401179941e-05, "loss": 0.0736, "step": 583 }, { "epoch": 0.08616746587974917, "grad_norm": 1.5254720449447632, "learning_rate": 1.7227138643067848e-05, "loss": 0.0595, "step": 584 }, { "epoch": 0.08631501291036518, "grad_norm": 2.036500930786133, "learning_rate": 1.7256637168141594e-05, "loss": 0.0534, "step": 585 }, { "epoch": 0.08646255994098119, "grad_norm": 1.740831732749939, "learning_rate": 1.728613569321534e-05, "loss": 0.0711, "step": 586 }, { "epoch": 0.0866101069715972, "grad_norm": 4.460459232330322, "learning_rate": 1.7315634218289088e-05, "loss": 0.0815, "step": 587 }, { "epoch": 0.0867576540022132, "grad_norm": 3.808619976043701, "learning_rate": 1.7345132743362835e-05, "loss": 0.0587, "step": 588 }, { "epoch": 0.08690520103282921, "grad_norm": 3.6966910362243652, "learning_rate": 1.737463126843658e-05, "loss": 0.1416, "step": 589 }, { "epoch": 0.08705274806344522, "grad_norm": 2.9356465339660645, "learning_rate": 1.7404129793510325e-05, "loss": 0.0665, "step": 590 }, { "epoch": 0.08720029509406123, "grad_norm": 8.07757568359375, "learning_rate": 1.7433628318584072e-05, "loss": 0.1019, "step": 591 }, { "epoch": 0.08734784212467724, "grad_norm": 2.0315401554107666, "learning_rate": 1.746312684365782e-05, "loss": 0.0421, "step": 592 }, { "epoch": 0.08749538915529324, "grad_norm": 3.900761604309082, "learning_rate": 1.7492625368731563e-05, "loss": 0.0404, "step": 593 }, { "epoch": 0.08764293618590926, "grad_norm": 2.7304024696350098, "learning_rate": 1.7522123893805313e-05, "loss": 0.048, "step": 594 }, { "epoch": 0.08779048321652527, "grad_norm": 3.4262402057647705, "learning_rate": 1.7551622418879056e-05, "loss": 0.084, "step": 595 }, { "epoch": 0.08793803024714128, "grad_norm": 7.073084354400635, "learning_rate": 1.7581120943952803e-05, "loss": 0.1463, "step": 596 }, { "epoch": 0.08808557727775729, "grad_norm": 5.898651599884033, "learning_rate": 1.761061946902655e-05, "loss": 0.117, "step": 597 }, { "epoch": 0.0882331243083733, "grad_norm": 1.0400193929672241, "learning_rate": 1.7640117994100297e-05, "loss": 0.0359, "step": 598 }, { "epoch": 0.0883806713389893, "grad_norm": 4.073667526245117, "learning_rate": 1.7669616519174044e-05, "loss": 0.1165, "step": 599 }, { "epoch": 0.08852821836960531, "grad_norm": 2.249925374984741, "learning_rate": 1.769911504424779e-05, "loss": 0.0827, "step": 600 }, { "epoch": 0.08852821836960531, "eval_accuracy": 0.9710564399421129, "eval_f1": 0.949748743718593, "eval_loss": 0.09525217860937119, "eval_precision": 0.984375, "eval_recall": 0.9174757281553398, "eval_runtime": 50.8867, "eval_samples_per_second": 5.719, "eval_steps_per_second": 0.197, "step": 600 }, { "epoch": 0.08867576540022132, "grad_norm": 2.5262110233306885, "learning_rate": 1.7728613569321534e-05, "loss": 0.0708, "step": 601 }, { "epoch": 0.08882331243083733, "grad_norm": 3.8243699073791504, "learning_rate": 1.775811209439528e-05, "loss": 0.081, "step": 602 }, { "epoch": 0.08897085946145333, "grad_norm": 2.803745746612549, "learning_rate": 1.7787610619469028e-05, "loss": 0.0943, "step": 603 }, { "epoch": 0.08911840649206934, "grad_norm": 1.551865816116333, "learning_rate": 1.7817109144542775e-05, "loss": 0.0968, "step": 604 }, { "epoch": 0.08926595352268536, "grad_norm": 3.358435869216919, "learning_rate": 1.784660766961652e-05, "loss": 0.1182, "step": 605 }, { "epoch": 0.08941350055330137, "grad_norm": 4.9781928062438965, "learning_rate": 1.7876106194690265e-05, "loss": 0.0915, "step": 606 }, { "epoch": 0.08956104758391738, "grad_norm": 2.3012397289276123, "learning_rate": 1.7905604719764015e-05, "loss": 0.1128, "step": 607 }, { "epoch": 0.08970859461453339, "grad_norm": 2.0409114360809326, "learning_rate": 1.793510324483776e-05, "loss": 0.0483, "step": 608 }, { "epoch": 0.08985614164514939, "grad_norm": 6.084571838378906, "learning_rate": 1.7964601769911506e-05, "loss": 0.0599, "step": 609 }, { "epoch": 0.0900036886757654, "grad_norm": 2.198209762573242, "learning_rate": 1.7994100294985252e-05, "loss": 0.0958, "step": 610 }, { "epoch": 0.09015123570638141, "grad_norm": 1.6424261331558228, "learning_rate": 1.8023598820059e-05, "loss": 0.0717, "step": 611 }, { "epoch": 0.09029878273699742, "grad_norm": 1.502238154411316, "learning_rate": 1.8053097345132743e-05, "loss": 0.07, "step": 612 }, { "epoch": 0.09044632976761342, "grad_norm": 3.18418288230896, "learning_rate": 1.8082595870206493e-05, "loss": 0.1099, "step": 613 }, { "epoch": 0.09059387679822943, "grad_norm": 2.833146572113037, "learning_rate": 1.8112094395280236e-05, "loss": 0.0683, "step": 614 }, { "epoch": 0.09074142382884544, "grad_norm": 2.204911708831787, "learning_rate": 1.8141592920353983e-05, "loss": 0.0951, "step": 615 }, { "epoch": 0.09088897085946146, "grad_norm": 1.8501574993133545, "learning_rate": 1.817109144542773e-05, "loss": 0.0847, "step": 616 }, { "epoch": 0.09103651789007747, "grad_norm": 2.518203020095825, "learning_rate": 1.8200589970501477e-05, "loss": 0.0802, "step": 617 }, { "epoch": 0.09118406492069348, "grad_norm": 3.3474576473236084, "learning_rate": 1.823008849557522e-05, "loss": 0.0838, "step": 618 }, { "epoch": 0.09133161195130948, "grad_norm": 1.90496826171875, "learning_rate": 1.825958702064897e-05, "loss": 0.0305, "step": 619 }, { "epoch": 0.09147915898192549, "grad_norm": 7.2841081619262695, "learning_rate": 1.8289085545722714e-05, "loss": 0.0838, "step": 620 }, { "epoch": 0.09147915898192549, "eval_accuracy": 0.9710564399421129, "eval_f1": 0.95, "eval_loss": 0.09230223298072815, "eval_precision": 0.979381443298969, "eval_recall": 0.9223300970873787, "eval_runtime": 50.1931, "eval_samples_per_second": 5.798, "eval_steps_per_second": 0.199, "step": 620 }, { "epoch": 0.0916267060125415, "grad_norm": 3.469712257385254, "learning_rate": 1.831858407079646e-05, "loss": 0.0618, "step": 621 }, { "epoch": 0.0917742530431575, "grad_norm": 4.419825077056885, "learning_rate": 1.8348082595870208e-05, "loss": 0.0701, "step": 622 }, { "epoch": 0.09192180007377351, "grad_norm": 5.594338893890381, "learning_rate": 1.8377581120943955e-05, "loss": 0.1196, "step": 623 }, { "epoch": 0.09206934710438952, "grad_norm": 3.1859700679779053, "learning_rate": 1.8407079646017702e-05, "loss": 0.1119, "step": 624 }, { "epoch": 0.09221689413500553, "grad_norm": 2.0639700889587402, "learning_rate": 1.8436578171091445e-05, "loss": 0.0463, "step": 625 }, { "epoch": 0.09236444116562154, "grad_norm": 3.067298650741577, "learning_rate": 1.8466076696165195e-05, "loss": 0.078, "step": 626 }, { "epoch": 0.09251198819623756, "grad_norm": 3.8201942443847656, "learning_rate": 1.849557522123894e-05, "loss": 0.0517, "step": 627 }, { "epoch": 0.09265953522685356, "grad_norm": 4.949003219604492, "learning_rate": 1.8525073746312686e-05, "loss": 0.1366, "step": 628 }, { "epoch": 0.09280708225746957, "grad_norm": 1.8381308317184448, "learning_rate": 1.8554572271386433e-05, "loss": 0.0447, "step": 629 }, { "epoch": 0.09295462928808558, "grad_norm": 4.433422088623047, "learning_rate": 1.858407079646018e-05, "loss": 0.1301, "step": 630 }, { "epoch": 0.09310217631870159, "grad_norm": 3.13482928276062, "learning_rate": 1.8613569321533923e-05, "loss": 0.0637, "step": 631 }, { "epoch": 0.0932497233493176, "grad_norm": 3.0127792358398438, "learning_rate": 1.8643067846607673e-05, "loss": 0.0902, "step": 632 }, { "epoch": 0.0933972703799336, "grad_norm": 2.9162039756774902, "learning_rate": 1.8672566371681417e-05, "loss": 0.0724, "step": 633 }, { "epoch": 0.09354481741054961, "grad_norm": 4.342963218688965, "learning_rate": 1.8702064896755164e-05, "loss": 0.1428, "step": 634 }, { "epoch": 0.09369236444116562, "grad_norm": 3.3434808254241943, "learning_rate": 1.873156342182891e-05, "loss": 0.0776, "step": 635 }, { "epoch": 0.09383991147178163, "grad_norm": 5.598691940307617, "learning_rate": 1.8761061946902657e-05, "loss": 0.0649, "step": 636 }, { "epoch": 0.09398745850239763, "grad_norm": 2.4252452850341797, "learning_rate": 1.87905604719764e-05, "loss": 0.0394, "step": 637 }, { "epoch": 0.09413500553301365, "grad_norm": 3.141429901123047, "learning_rate": 1.8820058997050148e-05, "loss": 0.0765, "step": 638 }, { "epoch": 0.09428255256362966, "grad_norm": 7.3823652267456055, "learning_rate": 1.8849557522123894e-05, "loss": 0.0836, "step": 639 }, { "epoch": 0.09443009959424567, "grad_norm": 5.535402297973633, "learning_rate": 1.887905604719764e-05, "loss": 0.0797, "step": 640 }, { "epoch": 0.09443009959424567, "eval_accuracy": 0.9681620839363242, "eval_f1": 0.9463414634146341, "eval_loss": 0.09862089902162552, "eval_precision": 0.9509803921568627, "eval_recall": 0.941747572815534, "eval_runtime": 49.4004, "eval_samples_per_second": 5.891, "eval_steps_per_second": 0.202, "step": 640 }, { "epoch": 0.09457764662486168, "grad_norm": 5.349865436553955, "learning_rate": 1.8908554572271388e-05, "loss": 0.1747, "step": 641 }, { "epoch": 0.09472519365547769, "grad_norm": 5.868638515472412, "learning_rate": 1.8938053097345135e-05, "loss": 0.0556, "step": 642 }, { "epoch": 0.09487274068609369, "grad_norm": 4.076895236968994, "learning_rate": 1.8967551622418882e-05, "loss": 0.0755, "step": 643 }, { "epoch": 0.0950202877167097, "grad_norm": 6.668321132659912, "learning_rate": 1.8997050147492625e-05, "loss": 0.1107, "step": 644 }, { "epoch": 0.09516783474732571, "grad_norm": 5.733664035797119, "learning_rate": 1.9026548672566376e-05, "loss": 0.1145, "step": 645 }, { "epoch": 0.09531538177794172, "grad_norm": 1.9074493646621704, "learning_rate": 1.905604719764012e-05, "loss": 0.083, "step": 646 }, { "epoch": 0.09546292880855772, "grad_norm": 3.8475232124328613, "learning_rate": 1.9085545722713866e-05, "loss": 0.1025, "step": 647 }, { "epoch": 0.09561047583917373, "grad_norm": 3.377678632736206, "learning_rate": 1.9115044247787613e-05, "loss": 0.0872, "step": 648 }, { "epoch": 0.09575802286978975, "grad_norm": 4.6475701332092285, "learning_rate": 1.914454277286136e-05, "loss": 0.144, "step": 649 }, { "epoch": 0.09590556990040576, "grad_norm": 4.309667110443115, "learning_rate": 1.9174041297935103e-05, "loss": 0.0907, "step": 650 }, { "epoch": 0.09605311693102177, "grad_norm": 2.2157323360443115, "learning_rate": 1.9203539823008853e-05, "loss": 0.0933, "step": 651 }, { "epoch": 0.09620066396163778, "grad_norm": 3.317105770111084, "learning_rate": 1.9233038348082597e-05, "loss": 0.0824, "step": 652 }, { "epoch": 0.09634821099225378, "grad_norm": 3.8446860313415527, "learning_rate": 1.9262536873156344e-05, "loss": 0.127, "step": 653 }, { "epoch": 0.09649575802286979, "grad_norm": 1.6291133165359497, "learning_rate": 1.929203539823009e-05, "loss": 0.0757, "step": 654 }, { "epoch": 0.0966433050534858, "grad_norm": 2.944308280944824, "learning_rate": 1.9321533923303837e-05, "loss": 0.0865, "step": 655 }, { "epoch": 0.0967908520841018, "grad_norm": 4.815762519836426, "learning_rate": 1.935103244837758e-05, "loss": 0.0998, "step": 656 }, { "epoch": 0.09693839911471781, "grad_norm": 6.745084762573242, "learning_rate": 1.9380530973451328e-05, "loss": 0.1003, "step": 657 }, { "epoch": 0.09708594614533382, "grad_norm": 1.692372441291809, "learning_rate": 1.9410029498525075e-05, "loss": 0.0556, "step": 658 }, { "epoch": 0.09723349317594983, "grad_norm": 2.388808488845825, "learning_rate": 1.943952802359882e-05, "loss": 0.081, "step": 659 }, { "epoch": 0.09738104020656584, "grad_norm": 3.1843411922454834, "learning_rate": 1.946902654867257e-05, "loss": 0.084, "step": 660 }, { "epoch": 0.09738104020656584, "eval_accuracy": 0.9681620839363242, "eval_f1": 0.9447236180904522, "eval_loss": 0.08765505254268646, "eval_precision": 0.9791666666666666, "eval_recall": 0.912621359223301, "eval_runtime": 49.2751, "eval_samples_per_second": 5.906, "eval_steps_per_second": 0.203, "step": 660 }, { "epoch": 0.09752858723718186, "grad_norm": 4.054859638214111, "learning_rate": 1.9498525073746315e-05, "loss": 0.0875, "step": 661 }, { "epoch": 0.09767613426779787, "grad_norm": 1.8305726051330566, "learning_rate": 1.9528023598820062e-05, "loss": 0.0682, "step": 662 }, { "epoch": 0.09782368129841387, "grad_norm": 2.8815717697143555, "learning_rate": 1.9557522123893806e-05, "loss": 0.0915, "step": 663 }, { "epoch": 0.09797122832902988, "grad_norm": 3.009331464767456, "learning_rate": 1.9587020648967552e-05, "loss": 0.0857, "step": 664 }, { "epoch": 0.09811877535964589, "grad_norm": 1.912514090538025, "learning_rate": 1.96165191740413e-05, "loss": 0.0383, "step": 665 }, { "epoch": 0.0982663223902619, "grad_norm": 4.684423446655273, "learning_rate": 1.9646017699115046e-05, "loss": 0.0964, "step": 666 }, { "epoch": 0.0984138694208779, "grad_norm": 2.0638372898101807, "learning_rate": 1.9675516224188793e-05, "loss": 0.0459, "step": 667 }, { "epoch": 0.09856141645149391, "grad_norm": 3.457811117172241, "learning_rate": 1.970501474926254e-05, "loss": 0.0707, "step": 668 }, { "epoch": 0.09870896348210992, "grad_norm": 3.9127421379089355, "learning_rate": 1.9734513274336283e-05, "loss": 0.1294, "step": 669 }, { "epoch": 0.09885651051272593, "grad_norm": 1.452929139137268, "learning_rate": 1.976401179941003e-05, "loss": 0.0456, "step": 670 }, { "epoch": 0.09900405754334193, "grad_norm": 4.86648416519165, "learning_rate": 1.9793510324483777e-05, "loss": 0.0691, "step": 671 }, { "epoch": 0.09915160457395795, "grad_norm": 5.493929862976074, "learning_rate": 1.9823008849557524e-05, "loss": 0.156, "step": 672 }, { "epoch": 0.09929915160457396, "grad_norm": 5.912530422210693, "learning_rate": 1.985250737463127e-05, "loss": 0.1857, "step": 673 }, { "epoch": 0.09944669863518997, "grad_norm": 4.091241836547852, "learning_rate": 1.9882005899705018e-05, "loss": 0.1033, "step": 674 }, { "epoch": 0.09959424566580598, "grad_norm": 3.152560234069824, "learning_rate": 1.991150442477876e-05, "loss": 0.0758, "step": 675 }, { "epoch": 0.09974179269642199, "grad_norm": 1.319982647895813, "learning_rate": 1.9941002949852508e-05, "loss": 0.0162, "step": 676 }, { "epoch": 0.09988933972703799, "grad_norm": 1.771578073501587, "learning_rate": 1.9970501474926255e-05, "loss": 0.0536, "step": 677 }, { "epoch": 0.100036886757654, "grad_norm": 5.368497371673584, "learning_rate": 2e-05, "loss": 0.0858, "step": 678 }, { "epoch": 0.10018443378827001, "grad_norm": 3.3279361724853516, "learning_rate": 1.999999867336188e-05, "loss": 0.1218, "step": 679 }, { "epoch": 0.10033198081888602, "grad_norm": 0.8537330031394958, "learning_rate": 1.999999469344786e-05, "loss": 0.0091, "step": 680 }, { "epoch": 0.10033198081888602, "eval_accuracy": 0.9681620839363242, "eval_f1": 0.945, "eval_loss": 0.08562874794006348, "eval_precision": 0.9742268041237113, "eval_recall": 0.9174757281553398, "eval_runtime": 49.2072, "eval_samples_per_second": 5.914, "eval_steps_per_second": 0.203, "step": 680 }, { "epoch": 0.10047952784950202, "grad_norm": 4.688568592071533, "learning_rate": 1.9999988060259e-05, "loss": 0.0933, "step": 681 }, { "epoch": 0.10062707488011803, "grad_norm": 3.9175586700439453, "learning_rate": 1.9999978773797067e-05, "loss": 0.0775, "step": 682 }, { "epoch": 0.10077462191073405, "grad_norm": 2.943915367126465, "learning_rate": 1.9999966834064516e-05, "loss": 0.0792, "step": 683 }, { "epoch": 0.10092216894135006, "grad_norm": 3.7157487869262695, "learning_rate": 1.9999952241064517e-05, "loss": 0.1115, "step": 684 }, { "epoch": 0.10106971597196607, "grad_norm": 3.160947322845459, "learning_rate": 1.9999934994800946e-05, "loss": 0.0807, "step": 685 }, { "epoch": 0.10121726300258208, "grad_norm": 6.8842926025390625, "learning_rate": 1.999991509527837e-05, "loss": 0.0592, "step": 686 }, { "epoch": 0.10136481003319808, "grad_norm": 2.0656898021698, "learning_rate": 1.9999892542502078e-05, "loss": 0.0963, "step": 687 }, { "epoch": 0.10151235706381409, "grad_norm": 3.1259071826934814, "learning_rate": 1.9999867336478053e-05, "loss": 0.1478, "step": 688 }, { "epoch": 0.1016599040944301, "grad_norm": 3.2559399604797363, "learning_rate": 1.999983947721298e-05, "loss": 0.109, "step": 689 }, { "epoch": 0.1018074511250461, "grad_norm": 2.0052266120910645, "learning_rate": 1.9999808964714246e-05, "loss": 0.0875, "step": 690 }, { "epoch": 0.10195499815566211, "grad_norm": 3.558954954147339, "learning_rate": 1.9999775798989956e-05, "loss": 0.1152, "step": 691 }, { "epoch": 0.10210254518627812, "grad_norm": 2.6088907718658447, "learning_rate": 1.9999739980048905e-05, "loss": 0.0815, "step": 692 }, { "epoch": 0.10225009221689413, "grad_norm": 1.9101494550704956, "learning_rate": 1.99997015079006e-05, "loss": 0.0959, "step": 693 }, { "epoch": 0.10239763924751015, "grad_norm": 3.8119421005249023, "learning_rate": 1.9999660382555246e-05, "loss": 0.0954, "step": 694 }, { "epoch": 0.10254518627812616, "grad_norm": 2.8122315406799316, "learning_rate": 1.9999616604023753e-05, "loss": 0.0377, "step": 695 }, { "epoch": 0.10269273330874217, "grad_norm": 2.076998710632324, "learning_rate": 1.999957017231774e-05, "loss": 0.074, "step": 696 }, { "epoch": 0.10284028033935817, "grad_norm": 5.563538074493408, "learning_rate": 1.9999521087449523e-05, "loss": 0.06, "step": 697 }, { "epoch": 0.10298782736997418, "grad_norm": 1.9383025169372559, "learning_rate": 1.999946934943213e-05, "loss": 0.0667, "step": 698 }, { "epoch": 0.10313537440059019, "grad_norm": 1.9197624921798706, "learning_rate": 1.9999414958279285e-05, "loss": 0.089, "step": 699 }, { "epoch": 0.1032829214312062, "grad_norm": 1.6479225158691406, "learning_rate": 1.9999357914005423e-05, "loss": 0.0832, "step": 700 }, { "epoch": 0.1032829214312062, "eval_accuracy": 0.9681620839363242, "eval_f1": 0.9447236180904522, "eval_loss": 0.09132521599531174, "eval_precision": 0.9791666666666666, "eval_recall": 0.912621359223301, "eval_runtime": 48.6296, "eval_samples_per_second": 5.984, "eval_steps_per_second": 0.206, "step": 700 }, { "epoch": 0.1034304684618222, "grad_norm": 2.672760009765625, "learning_rate": 1.9999298216625676e-05, "loss": 0.0708, "step": 701 }, { "epoch": 0.10357801549243821, "grad_norm": 2.098130464553833, "learning_rate": 1.9999235866155887e-05, "loss": 0.0353, "step": 702 }, { "epoch": 0.10372556252305422, "grad_norm": 1.2144372463226318, "learning_rate": 1.999917086261259e-05, "loss": 0.0459, "step": 703 }, { "epoch": 0.10387310955367023, "grad_norm": 1.934159517288208, "learning_rate": 1.9999103206013047e-05, "loss": 0.0981, "step": 704 }, { "epoch": 0.10402065658428625, "grad_norm": 3.57991886138916, "learning_rate": 1.9999032896375195e-05, "loss": 0.0824, "step": 705 }, { "epoch": 0.10416820361490225, "grad_norm": 3.510735511779785, "learning_rate": 1.99989599337177e-05, "loss": 0.1353, "step": 706 }, { "epoch": 0.10431575064551826, "grad_norm": 3.6645357608795166, "learning_rate": 1.9998884318059915e-05, "loss": 0.0921, "step": 707 }, { "epoch": 0.10446329767613427, "grad_norm": 2.6927130222320557, "learning_rate": 1.9998806049421905e-05, "loss": 0.0507, "step": 708 }, { "epoch": 0.10461084470675028, "grad_norm": 2.7297332286834717, "learning_rate": 1.9998725127824438e-05, "loss": 0.0533, "step": 709 }, { "epoch": 0.10475839173736629, "grad_norm": 2.2919421195983887, "learning_rate": 1.999864155328898e-05, "loss": 0.0907, "step": 710 }, { "epoch": 0.10490593876798229, "grad_norm": 4.291741371154785, "learning_rate": 1.9998555325837707e-05, "loss": 0.0761, "step": 711 }, { "epoch": 0.1050534857985983, "grad_norm": 3.835400104522705, "learning_rate": 1.9998466445493504e-05, "loss": 0.0291, "step": 712 }, { "epoch": 0.10520103282921431, "grad_norm": 5.807161331176758, "learning_rate": 1.9998374912279943e-05, "loss": 0.1182, "step": 713 }, { "epoch": 0.10534857985983032, "grad_norm": 2.125586986541748, "learning_rate": 1.999828072622132e-05, "loss": 0.0846, "step": 714 }, { "epoch": 0.10549612689044632, "grad_norm": 2.8485658168792725, "learning_rate": 1.999818388734262e-05, "loss": 0.0782, "step": 715 }, { "epoch": 0.10564367392106234, "grad_norm": 1.4917887449264526, "learning_rate": 1.9998084395669537e-05, "loss": 0.0772, "step": 716 }, { "epoch": 0.10579122095167835, "grad_norm": 2.8257558345794678, "learning_rate": 1.999798225122847e-05, "loss": 0.0585, "step": 717 }, { "epoch": 0.10593876798229436, "grad_norm": 4.155523300170898, "learning_rate": 1.999787745404652e-05, "loss": 0.045, "step": 718 }, { "epoch": 0.10608631501291037, "grad_norm": 2.4763712882995605, "learning_rate": 1.9997770004151496e-05, "loss": 0.0453, "step": 719 }, { "epoch": 0.10623386204352638, "grad_norm": 2.8938565254211426, "learning_rate": 1.9997659901571902e-05, "loss": 0.0599, "step": 720 }, { "epoch": 0.10623386204352638, "eval_accuracy": 0.9696092619392185, "eval_f1": 0.9473684210526315, "eval_loss": 0.08718672394752502, "eval_precision": 0.9792746113989638, "eval_recall": 0.9174757281553398, "eval_runtime": 50.0399, "eval_samples_per_second": 5.815, "eval_steps_per_second": 0.2, "step": 720 }, { "epoch": 0.10638140907414238, "grad_norm": 3.191500425338745, "learning_rate": 1.9997547146336954e-05, "loss": 0.082, "step": 721 }, { "epoch": 0.10652895610475839, "grad_norm": 4.135310649871826, "learning_rate": 1.9997431738476572e-05, "loss": 0.1645, "step": 722 }, { "epoch": 0.1066765031353744, "grad_norm": 3.382112741470337, "learning_rate": 1.999731367802137e-05, "loss": 0.0838, "step": 723 }, { "epoch": 0.1068240501659904, "grad_norm": 3.5811784267425537, "learning_rate": 1.999719296500268e-05, "loss": 0.0967, "step": 724 }, { "epoch": 0.10697159719660641, "grad_norm": 7.132405757904053, "learning_rate": 1.9997069599452526e-05, "loss": 0.0934, "step": 725 }, { "epoch": 0.10711914422722242, "grad_norm": 3.218842029571533, "learning_rate": 1.999694358140364e-05, "loss": 0.1176, "step": 726 }, { "epoch": 0.10726669125783844, "grad_norm": 2.789419412612915, "learning_rate": 1.999681491088946e-05, "loss": 0.0409, "step": 727 }, { "epoch": 0.10741423828845445, "grad_norm": 11.626459121704102, "learning_rate": 1.9996683587944127e-05, "loss": 0.0974, "step": 728 }, { "epoch": 0.10756178531907046, "grad_norm": 4.882386207580566, "learning_rate": 1.999654961260248e-05, "loss": 0.0989, "step": 729 }, { "epoch": 0.10770933234968647, "grad_norm": 9.895661354064941, "learning_rate": 1.9996412984900072e-05, "loss": 0.1698, "step": 730 }, { "epoch": 0.10785687938030247, "grad_norm": 4.445443153381348, "learning_rate": 1.9996273704873148e-05, "loss": 0.1612, "step": 731 }, { "epoch": 0.10800442641091848, "grad_norm": 4.3391313552856445, "learning_rate": 1.9996131772558668e-05, "loss": 0.0788, "step": 732 }, { "epoch": 0.10815197344153449, "grad_norm": 3.659205436706543, "learning_rate": 1.9995987187994288e-05, "loss": 0.0886, "step": 733 }, { "epoch": 0.1082995204721505, "grad_norm": 2.3957369327545166, "learning_rate": 1.9995839951218375e-05, "loss": 0.0403, "step": 734 }, { "epoch": 0.1084470675027665, "grad_norm": 3.446357250213623, "learning_rate": 1.9995690062269985e-05, "loss": 0.0459, "step": 735 }, { "epoch": 0.10859461453338251, "grad_norm": 2.980558395385742, "learning_rate": 1.9995537521188896e-05, "loss": 0.0563, "step": 736 }, { "epoch": 0.10874216156399852, "grad_norm": 4.328250885009766, "learning_rate": 1.999538232801558e-05, "loss": 0.0875, "step": 737 }, { "epoch": 0.10888970859461454, "grad_norm": 1.6247608661651611, "learning_rate": 1.9995224482791213e-05, "loss": 0.043, "step": 738 }, { "epoch": 0.10903725562523055, "grad_norm": 2.380084753036499, "learning_rate": 1.9995063985557674e-05, "loss": 0.0789, "step": 739 }, { "epoch": 0.10918480265584656, "grad_norm": 8.056583404541016, "learning_rate": 1.999490083635755e-05, "loss": 0.1185, "step": 740 }, { "epoch": 0.10918480265584656, "eval_accuracy": 0.9638205499276411, "eval_f1": 0.9367088607594937, "eval_loss": 0.08706936985254288, "eval_precision": 0.9788359788359788, "eval_recall": 0.8980582524271845, "eval_runtime": 49.6705, "eval_samples_per_second": 5.859, "eval_steps_per_second": 0.201, "step": 740 }, { "epoch": 0.10933234968646256, "grad_norm": 3.812094211578369, "learning_rate": 1.999473503523413e-05, "loss": 0.1123, "step": 741 }, { "epoch": 0.10947989671707857, "grad_norm": 6.163642883300781, "learning_rate": 1.9994566582231404e-05, "loss": 0.0785, "step": 742 }, { "epoch": 0.10962744374769458, "grad_norm": 2.7690041065216064, "learning_rate": 1.9994395477394067e-05, "loss": 0.0959, "step": 743 }, { "epoch": 0.10977499077831059, "grad_norm": 2.126983165740967, "learning_rate": 1.9994221720767514e-05, "loss": 0.0632, "step": 744 }, { "epoch": 0.1099225378089266, "grad_norm": 2.455386161804199, "learning_rate": 1.9994045312397855e-05, "loss": 0.0634, "step": 745 }, { "epoch": 0.1100700848395426, "grad_norm": 5.767280578613281, "learning_rate": 1.999386625233189e-05, "loss": 0.1937, "step": 746 }, { "epoch": 0.11021763187015861, "grad_norm": 3.2494020462036133, "learning_rate": 1.999368454061713e-05, "loss": 0.0592, "step": 747 }, { "epoch": 0.11036517890077462, "grad_norm": 2.4792871475219727, "learning_rate": 1.999350017730179e-05, "loss": 0.0612, "step": 748 }, { "epoch": 0.11051272593139064, "grad_norm": 4.196202754974365, "learning_rate": 1.999331316243479e-05, "loss": 0.0706, "step": 749 }, { "epoch": 0.11066027296200664, "grad_norm": 6.937958240509033, "learning_rate": 1.999312349606574e-05, "loss": 0.1197, "step": 750 }, { "epoch": 0.11080781999262265, "grad_norm": 6.105783462524414, "learning_rate": 1.9992931178244973e-05, "loss": 0.0836, "step": 751 }, { "epoch": 0.11095536702323866, "grad_norm": 5.956887722015381, "learning_rate": 1.9992736209023507e-05, "loss": 0.1308, "step": 752 }, { "epoch": 0.11110291405385467, "grad_norm": 5.724761009216309, "learning_rate": 1.9992538588453087e-05, "loss": 0.0551, "step": 753 }, { "epoch": 0.11125046108447068, "grad_norm": 6.329967021942139, "learning_rate": 1.9992338316586132e-05, "loss": 0.2474, "step": 754 }, { "epoch": 0.11139800811508668, "grad_norm": 3.177274227142334, "learning_rate": 1.999213539347579e-05, "loss": 0.1034, "step": 755 }, { "epoch": 0.11154555514570269, "grad_norm": 1.8797543048858643, "learning_rate": 1.99919298191759e-05, "loss": 0.0532, "step": 756 }, { "epoch": 0.1116931021763187, "grad_norm": 2.0092849731445312, "learning_rate": 1.9991721593741e-05, "loss": 0.0426, "step": 757 }, { "epoch": 0.1118406492069347, "grad_norm": 2.335038900375366, "learning_rate": 1.9991510717226345e-05, "loss": 0.0674, "step": 758 }, { "epoch": 0.11198819623755071, "grad_norm": 1.80488121509552, "learning_rate": 1.9991297189687887e-05, "loss": 0.0619, "step": 759 }, { "epoch": 0.11213574326816673, "grad_norm": 2.4070348739624023, "learning_rate": 1.9991081011182275e-05, "loss": 0.059, "step": 760 }, { "epoch": 0.11213574326816673, "eval_accuracy": 0.9652677279305355, "eval_f1": 0.9396984924623115, "eval_loss": 0.08778787404298782, "eval_precision": 0.9739583333333334, "eval_recall": 0.9077669902912622, "eval_runtime": 49.477, "eval_samples_per_second": 5.882, "eval_steps_per_second": 0.202, "step": 760 }, { "epoch": 0.11228329029878274, "grad_norm": 3.128009080886841, "learning_rate": 1.999086218176687e-05, "loss": 0.0678, "step": 761 }, { "epoch": 0.11243083732939875, "grad_norm": 0.973224937915802, "learning_rate": 1.9990640701499738e-05, "loss": 0.0308, "step": 762 }, { "epoch": 0.11257838436001476, "grad_norm": 2.0165839195251465, "learning_rate": 1.9990416570439635e-05, "loss": 0.0611, "step": 763 }, { "epoch": 0.11272593139063077, "grad_norm": 3.5563056468963623, "learning_rate": 1.9990189788646038e-05, "loss": 0.1019, "step": 764 }, { "epoch": 0.11287347842124677, "grad_norm": 3.9265475273132324, "learning_rate": 1.998996035617911e-05, "loss": 0.0685, "step": 765 }, { "epoch": 0.11302102545186278, "grad_norm": 3.04717755317688, "learning_rate": 1.9989728273099732e-05, "loss": 0.0699, "step": 766 }, { "epoch": 0.11316857248247879, "grad_norm": 1.6231255531311035, "learning_rate": 1.998949353946948e-05, "loss": 0.0426, "step": 767 }, { "epoch": 0.1133161195130948, "grad_norm": 4.998641014099121, "learning_rate": 1.9989256155350635e-05, "loss": 0.082, "step": 768 }, { "epoch": 0.1134636665437108, "grad_norm": 1.401096224784851, "learning_rate": 1.9989016120806182e-05, "loss": 0.0202, "step": 769 }, { "epoch": 0.11361121357432681, "grad_norm": 2.487551689147949, "learning_rate": 1.9988773435899808e-05, "loss": 0.0804, "step": 770 }, { "epoch": 0.11375876060494283, "grad_norm": 1.8232187032699585, "learning_rate": 1.9988528100695904e-05, "loss": 0.052, "step": 771 }, { "epoch": 0.11390630763555884, "grad_norm": 2.227843761444092, "learning_rate": 1.998828011525957e-05, "loss": 0.0799, "step": 772 }, { "epoch": 0.11405385466617485, "grad_norm": 3.416242837905884, "learning_rate": 1.9988029479656596e-05, "loss": 0.1184, "step": 773 }, { "epoch": 0.11420140169679086, "grad_norm": 6.412944793701172, "learning_rate": 1.9987776193953482e-05, "loss": 0.1074, "step": 774 }, { "epoch": 0.11434894872740686, "grad_norm": 3.670717716217041, "learning_rate": 1.9987520258217438e-05, "loss": 0.1582, "step": 775 }, { "epoch": 0.11449649575802287, "grad_norm": 4.662622928619385, "learning_rate": 1.998726167251636e-05, "loss": 0.0606, "step": 776 }, { "epoch": 0.11464404278863888, "grad_norm": 3.990325450897217, "learning_rate": 1.9987000436918876e-05, "loss": 0.0629, "step": 777 }, { "epoch": 0.11479158981925489, "grad_norm": 8.231770515441895, "learning_rate": 1.998673655149428e-05, "loss": 0.0993, "step": 778 }, { "epoch": 0.1149391368498709, "grad_norm": 3.067122459411621, "learning_rate": 1.99864700163126e-05, "loss": 0.0962, "step": 779 }, { "epoch": 0.1150866838804869, "grad_norm": 11.92526912689209, "learning_rate": 1.9986200831444555e-05, "loss": 0.1429, "step": 780 }, { "epoch": 0.1150866838804869, "eval_accuracy": 0.9696092619392185, "eval_f1": 0.9481481481481482, "eval_loss": 0.08677008748054504, "eval_precision": 0.964824120603015, "eval_recall": 0.9320388349514563, "eval_runtime": 50.1399, "eval_samples_per_second": 5.804, "eval_steps_per_second": 0.199, "step": 780 }, { "epoch": 0.11523423091110291, "grad_norm": 2.1681830883026123, "learning_rate": 1.9985928996961558e-05, "loss": 0.0651, "step": 781 }, { "epoch": 0.11538177794171892, "grad_norm": 2.2294504642486572, "learning_rate": 1.998565451293574e-05, "loss": 0.1235, "step": 782 }, { "epoch": 0.11552932497233494, "grad_norm": 2.6438939571380615, "learning_rate": 1.9985377379439936e-05, "loss": 0.0526, "step": 783 }, { "epoch": 0.11567687200295095, "grad_norm": 2.4441215991973877, "learning_rate": 1.9985097596547664e-05, "loss": 0.1249, "step": 784 }, { "epoch": 0.11582441903356695, "grad_norm": 5.697798728942871, "learning_rate": 1.9984815164333163e-05, "loss": 0.0783, "step": 785 }, { "epoch": 0.11597196606418296, "grad_norm": 3.611173391342163, "learning_rate": 1.9984530082871373e-05, "loss": 0.1161, "step": 786 }, { "epoch": 0.11611951309479897, "grad_norm": 2.9603285789489746, "learning_rate": 1.9984242352237935e-05, "loss": 0.0476, "step": 787 }, { "epoch": 0.11626706012541498, "grad_norm": 1.3843454122543335, "learning_rate": 1.998395197250919e-05, "loss": 0.0372, "step": 788 }, { "epoch": 0.11641460715603098, "grad_norm": 3.1214351654052734, "learning_rate": 1.998365894376218e-05, "loss": 0.0595, "step": 789 }, { "epoch": 0.11656215418664699, "grad_norm": 2.170861005783081, "learning_rate": 1.998336326607466e-05, "loss": 0.043, "step": 790 }, { "epoch": 0.116709701217263, "grad_norm": 3.470710515975952, "learning_rate": 1.9983064939525076e-05, "loss": 0.0629, "step": 791 }, { "epoch": 0.116857248247879, "grad_norm": 3.264995574951172, "learning_rate": 1.9982763964192586e-05, "loss": 0.0553, "step": 792 }, { "epoch": 0.11700479527849501, "grad_norm": 6.381844997406006, "learning_rate": 1.9982460340157045e-05, "loss": 0.1032, "step": 793 }, { "epoch": 0.11715234230911103, "grad_norm": 4.215766906738281, "learning_rate": 1.9982154067499013e-05, "loss": 0.0903, "step": 794 }, { "epoch": 0.11729988933972704, "grad_norm": 5.5455803871154785, "learning_rate": 1.998184514629975e-05, "loss": 0.1454, "step": 795 }, { "epoch": 0.11744743637034305, "grad_norm": 1.9435747861862183, "learning_rate": 1.998153357664123e-05, "loss": 0.0459, "step": 796 }, { "epoch": 0.11759498340095906, "grad_norm": 7.67086935043335, "learning_rate": 1.9981219358606113e-05, "loss": 0.1583, "step": 797 }, { "epoch": 0.11774253043157507, "grad_norm": 3.172956943511963, "learning_rate": 1.998090249227777e-05, "loss": 0.0419, "step": 798 }, { "epoch": 0.11789007746219107, "grad_norm": 5.856876850128174, "learning_rate": 1.998058297774028e-05, "loss": 0.081, "step": 799 }, { "epoch": 0.11803762449280708, "grad_norm": 3.2290866374969482, "learning_rate": 1.9980260815078416e-05, "loss": 0.0583, "step": 800 }, { "epoch": 0.11803762449280708, "eval_accuracy": 0.9696092619392185, "eval_f1": 0.9481481481481482, "eval_loss": 0.08584646880626678, "eval_precision": 0.964824120603015, "eval_recall": 0.9320388349514563, "eval_runtime": 53.2233, "eval_samples_per_second": 5.468, "eval_steps_per_second": 0.188, "step": 800 }, { "epoch": 0.11818517152342309, "grad_norm": 2.8346219062805176, "learning_rate": 1.9979936004377653e-05, "loss": 0.0487, "step": 801 }, { "epoch": 0.1183327185540391, "grad_norm": 5.30798864364624, "learning_rate": 1.9979608545724174e-05, "loss": 0.1392, "step": 802 }, { "epoch": 0.1184802655846551, "grad_norm": 2.963656187057495, "learning_rate": 1.997927843920487e-05, "loss": 0.0545, "step": 803 }, { "epoch": 0.11862781261527111, "grad_norm": 5.108734130859375, "learning_rate": 1.9978945684907317e-05, "loss": 0.0961, "step": 804 }, { "epoch": 0.11877535964588713, "grad_norm": 3.170442581176758, "learning_rate": 1.997861028291981e-05, "loss": 0.0343, "step": 805 }, { "epoch": 0.11892290667650314, "grad_norm": 2.6766741275787354, "learning_rate": 1.9978272233331337e-05, "loss": 0.0831, "step": 806 }, { "epoch": 0.11907045370711915, "grad_norm": 7.192470073699951, "learning_rate": 1.9977931536231597e-05, "loss": 0.1179, "step": 807 }, { "epoch": 0.11921800073773516, "grad_norm": 2.3859522342681885, "learning_rate": 1.997758819171098e-05, "loss": 0.1153, "step": 808 }, { "epoch": 0.11936554776835116, "grad_norm": 4.983349323272705, "learning_rate": 1.9977242199860592e-05, "loss": 0.1453, "step": 809 }, { "epoch": 0.11951309479896717, "grad_norm": 5.1341118812561035, "learning_rate": 1.997689356077223e-05, "loss": 0.0794, "step": 810 }, { "epoch": 0.11966064182958318, "grad_norm": 2.9639084339141846, "learning_rate": 1.9976542274538394e-05, "loss": 0.0717, "step": 811 }, { "epoch": 0.11980818886019919, "grad_norm": 5.447583198547363, "learning_rate": 1.9976188341252296e-05, "loss": 0.0483, "step": 812 }, { "epoch": 0.1199557358908152, "grad_norm": 2.2201592922210693, "learning_rate": 1.997583176100784e-05, "loss": 0.0613, "step": 813 }, { "epoch": 0.1201032829214312, "grad_norm": 1.8389087915420532, "learning_rate": 1.997547253389964e-05, "loss": 0.067, "step": 814 }, { "epoch": 0.12025082995204721, "grad_norm": 4.130992889404297, "learning_rate": 1.997511066002301e-05, "loss": 0.1207, "step": 815 }, { "epoch": 0.12039837698266323, "grad_norm": 3.213615655899048, "learning_rate": 1.9974746139473963e-05, "loss": 0.0571, "step": 816 }, { "epoch": 0.12054592401327924, "grad_norm": 1.6278706789016724, "learning_rate": 1.997437897234921e-05, "loss": 0.0511, "step": 817 }, { "epoch": 0.12069347104389525, "grad_norm": 3.3251805305480957, "learning_rate": 1.997400915874618e-05, "loss": 0.0929, "step": 818 }, { "epoch": 0.12084101807451125, "grad_norm": 4.89307975769043, "learning_rate": 1.997363669876299e-05, "loss": 0.0973, "step": 819 }, { "epoch": 0.12098856510512726, "grad_norm": 3.0118377208709717, "learning_rate": 1.997326159249847e-05, "loss": 0.0792, "step": 820 }, { "epoch": 0.12098856510512726, "eval_accuracy": 0.9681620839363242, "eval_f1": 0.945273631840796, "eval_loss": 0.08328968286514282, "eval_precision": 0.9693877551020408, "eval_recall": 0.9223300970873787, "eval_runtime": 49.8053, "eval_samples_per_second": 5.843, "eval_steps_per_second": 0.201, "step": 820 }, { "epoch": 0.12113611213574327, "grad_norm": 5.648924350738525, "learning_rate": 1.997288384005214e-05, "loss": 0.0593, "step": 821 }, { "epoch": 0.12128365916635928, "grad_norm": 1.969643235206604, "learning_rate": 1.9972503441524225e-05, "loss": 0.0762, "step": 822 }, { "epoch": 0.12143120619697528, "grad_norm": 1.7293329238891602, "learning_rate": 1.9972120397015658e-05, "loss": 0.081, "step": 823 }, { "epoch": 0.12157875322759129, "grad_norm": 4.257837772369385, "learning_rate": 1.997173470662808e-05, "loss": 0.0623, "step": 824 }, { "epoch": 0.1217263002582073, "grad_norm": 2.357661724090576, "learning_rate": 1.9971346370463814e-05, "loss": 0.104, "step": 825 }, { "epoch": 0.1218738472888233, "grad_norm": 3.600338935852051, "learning_rate": 1.9970955388625902e-05, "loss": 0.1478, "step": 826 }, { "epoch": 0.12202139431943933, "grad_norm": 1.9968920946121216, "learning_rate": 1.9970561761218083e-05, "loss": 0.0791, "step": 827 }, { "epoch": 0.12216894135005534, "grad_norm": 5.789556503295898, "learning_rate": 1.997016548834479e-05, "loss": 0.0987, "step": 828 }, { "epoch": 0.12231648838067134, "grad_norm": 2.2478652000427246, "learning_rate": 1.996976657011117e-05, "loss": 0.0675, "step": 829 }, { "epoch": 0.12246403541128735, "grad_norm": 1.9542851448059082, "learning_rate": 1.9969365006623072e-05, "loss": 0.0742, "step": 830 }, { "epoch": 0.12261158244190336, "grad_norm": 3.967731237411499, "learning_rate": 1.9968960797987038e-05, "loss": 0.0643, "step": 831 }, { "epoch": 0.12275912947251937, "grad_norm": 2.1692540645599365, "learning_rate": 1.9968553944310313e-05, "loss": 0.0864, "step": 832 }, { "epoch": 0.12290667650313537, "grad_norm": 2.8444504737854004, "learning_rate": 1.9968144445700846e-05, "loss": 0.0275, "step": 833 }, { "epoch": 0.12305422353375138, "grad_norm": 2.9833950996398926, "learning_rate": 1.9967732302267295e-05, "loss": 0.1367, "step": 834 }, { "epoch": 0.12320177056436739, "grad_norm": 2.1316490173339844, "learning_rate": 1.9967317514119005e-05, "loss": 0.0502, "step": 835 }, { "epoch": 0.1233493175949834, "grad_norm": 1.8471524715423584, "learning_rate": 1.9966900081366036e-05, "loss": 0.0264, "step": 836 }, { "epoch": 0.1234968646255994, "grad_norm": 4.945133686065674, "learning_rate": 1.9966480004119143e-05, "loss": 0.0517, "step": 837 }, { "epoch": 0.12364441165621542, "grad_norm": 4.8993635177612305, "learning_rate": 1.9966057282489785e-05, "loss": 0.0678, "step": 838 }, { "epoch": 0.12379195868683143, "grad_norm": 2.9735472202301025, "learning_rate": 1.9965631916590116e-05, "loss": 0.125, "step": 839 }, { "epoch": 0.12393950571744744, "grad_norm": 3.2485132217407227, "learning_rate": 1.9965203906533005e-05, "loss": 0.0746, "step": 840 }, { "epoch": 0.12393950571744744, "eval_accuracy": 0.9725036179450073, "eval_f1": 0.9518987341772152, "eval_loss": 0.07715827971696854, "eval_precision": 0.9947089947089947, "eval_recall": 0.912621359223301, "eval_runtime": 50.6908, "eval_samples_per_second": 5.741, "eval_steps_per_second": 0.197, "step": 840 }, { "epoch": 0.12408705274806345, "grad_norm": 1.560117483139038, "learning_rate": 1.9964773252432015e-05, "loss": 0.0675, "step": 841 }, { "epoch": 0.12423459977867946, "grad_norm": 2.316660165786743, "learning_rate": 1.9964339954401405e-05, "loss": 0.0462, "step": 842 }, { "epoch": 0.12438214680929546, "grad_norm": 2.360581398010254, "learning_rate": 1.996390401255614e-05, "loss": 0.0441, "step": 843 }, { "epoch": 0.12452969383991147, "grad_norm": 3.1658785343170166, "learning_rate": 1.9963465427011894e-05, "loss": 0.0254, "step": 844 }, { "epoch": 0.12467724087052748, "grad_norm": 2.2196929454803467, "learning_rate": 1.9963024197885034e-05, "loss": 0.0427, "step": 845 }, { "epoch": 0.12482478790114349, "grad_norm": 4.199805736541748, "learning_rate": 1.9962580325292625e-05, "loss": 0.1002, "step": 846 }, { "epoch": 0.1249723349317595, "grad_norm": 3.8902390003204346, "learning_rate": 1.996213380935245e-05, "loss": 0.0432, "step": 847 }, { "epoch": 0.1251198819623755, "grad_norm": 2.673084020614624, "learning_rate": 1.9961684650182967e-05, "loss": 0.0522, "step": 848 }, { "epoch": 0.1252674289929915, "grad_norm": 1.7067304849624634, "learning_rate": 1.996123284790336e-05, "loss": 0.076, "step": 849 }, { "epoch": 0.12541497602360752, "grad_norm": 3.398080825805664, "learning_rate": 1.9960778402633503e-05, "loss": 0.101, "step": 850 }, { "epoch": 0.12556252305422352, "grad_norm": 1.6756361722946167, "learning_rate": 1.9960321314493972e-05, "loss": 0.049, "step": 851 }, { "epoch": 0.12571007008483953, "grad_norm": 2.6315760612487793, "learning_rate": 1.9959861583606045e-05, "loss": 0.0673, "step": 852 }, { "epoch": 0.12585761711545554, "grad_norm": 7.502798080444336, "learning_rate": 1.9959399210091704e-05, "loss": 0.1247, "step": 853 }, { "epoch": 0.12600516414607155, "grad_norm": 2.0811808109283447, "learning_rate": 1.9958934194073625e-05, "loss": 0.0564, "step": 854 }, { "epoch": 0.12615271117668758, "grad_norm": 2.500744104385376, "learning_rate": 1.9958466535675195e-05, "loss": 0.0636, "step": 855 }, { "epoch": 0.1263002582073036, "grad_norm": 3.5321614742279053, "learning_rate": 1.9957996235020492e-05, "loss": 0.0877, "step": 856 }, { "epoch": 0.1264478052379196, "grad_norm": 3.2365834712982178, "learning_rate": 1.99575232922343e-05, "loss": 0.0691, "step": 857 }, { "epoch": 0.1265953522685356, "grad_norm": 3.4131531715393066, "learning_rate": 1.9957047707442105e-05, "loss": 0.0772, "step": 858 }, { "epoch": 0.1267428992991516, "grad_norm": 3.1513803005218506, "learning_rate": 1.9956569480770093e-05, "loss": 0.0497, "step": 859 }, { "epoch": 0.12689044632976762, "grad_norm": 2.6419363021850586, "learning_rate": 1.9956088612345153e-05, "loss": 0.0463, "step": 860 }, { "epoch": 0.12689044632976762, "eval_accuracy": 0.9725036179450073, "eval_f1": 0.9526184538653366, "eval_loss": 0.07326771318912506, "eval_precision": 0.9794871794871794, "eval_recall": 0.9271844660194175, "eval_runtime": 49.592, "eval_samples_per_second": 5.868, "eval_steps_per_second": 0.202, "step": 860 }, { "epoch": 0.12703799336038363, "grad_norm": 5.903964996337891, "learning_rate": 1.9955605102294867e-05, "loss": 0.1221, "step": 861 }, { "epoch": 0.12718554039099964, "grad_norm": 2.3677377700805664, "learning_rate": 1.995511895074753e-05, "loss": 0.0141, "step": 862 }, { "epoch": 0.12733308742161564, "grad_norm": 3.337918996810913, "learning_rate": 1.9954630157832127e-05, "loss": 0.1087, "step": 863 }, { "epoch": 0.12748063445223165, "grad_norm": 1.951736330986023, "learning_rate": 1.995413872367835e-05, "loss": 0.0712, "step": 864 }, { "epoch": 0.12762818148284766, "grad_norm": 2.7879199981689453, "learning_rate": 1.995364464841659e-05, "loss": 0.109, "step": 865 }, { "epoch": 0.12777572851346367, "grad_norm": 2.1337168216705322, "learning_rate": 1.9953147932177935e-05, "loss": 0.097, "step": 866 }, { "epoch": 0.12792327554407967, "grad_norm": 3.6122055053710938, "learning_rate": 1.9952648575094186e-05, "loss": 0.1125, "step": 867 }, { "epoch": 0.12807082257469568, "grad_norm": 1.942949891090393, "learning_rate": 1.9952146577297827e-05, "loss": 0.0479, "step": 868 }, { "epoch": 0.1282183696053117, "grad_norm": 2.1994197368621826, "learning_rate": 1.995164193892206e-05, "loss": 0.03, "step": 869 }, { "epoch": 0.1283659166359277, "grad_norm": 2.757167100906372, "learning_rate": 1.9951134660100774e-05, "loss": 0.0879, "step": 870 }, { "epoch": 0.1285134636665437, "grad_norm": 4.015503406524658, "learning_rate": 1.9950624740968567e-05, "loss": 0.0405, "step": 871 }, { "epoch": 0.1286610106971597, "grad_norm": 2.4473226070404053, "learning_rate": 1.995011218166073e-05, "loss": 0.0819, "step": 872 }, { "epoch": 0.12880855772777572, "grad_norm": 3.6910784244537354, "learning_rate": 1.9949596982313266e-05, "loss": 0.0738, "step": 873 }, { "epoch": 0.12895610475839173, "grad_norm": 4.270644664764404, "learning_rate": 1.9949079143062863e-05, "loss": 0.0802, "step": 874 }, { "epoch": 0.12910365178900773, "grad_norm": 2.5281600952148438, "learning_rate": 1.9948558664046928e-05, "loss": 0.0448, "step": 875 }, { "epoch": 0.12925119881962374, "grad_norm": 4.027614593505859, "learning_rate": 1.994803554540355e-05, "loss": 0.0985, "step": 876 }, { "epoch": 0.12939874585023978, "grad_norm": 2.1794724464416504, "learning_rate": 1.9947509787271533e-05, "loss": 0.0462, "step": 877 }, { "epoch": 0.12954629288085578, "grad_norm": 1.6822476387023926, "learning_rate": 1.9946981389790375e-05, "loss": 0.0613, "step": 878 }, { "epoch": 0.1296938399114718, "grad_norm": 5.981480121612549, "learning_rate": 1.994645035310027e-05, "loss": 0.0901, "step": 879 }, { "epoch": 0.1298413869420878, "grad_norm": 4.830456256866455, "learning_rate": 1.9945916677342116e-05, "loss": 0.1658, "step": 880 }, { "epoch": 0.1298413869420878, "eval_accuracy": 0.9696092619392185, "eval_f1": 0.9468354430379747, "eval_loss": 0.08612699061632156, "eval_precision": 0.9894179894179894, "eval_recall": 0.9077669902912622, "eval_runtime": 49.8971, "eval_samples_per_second": 5.832, "eval_steps_per_second": 0.2, "step": 880 }, { "epoch": 0.1299889339727038, "grad_norm": 3.4190924167633057, "learning_rate": 1.9945380362657518e-05, "loss": 0.0817, "step": 881 }, { "epoch": 0.13013648100331981, "grad_norm": 3.3585731983184814, "learning_rate": 1.994484140918877e-05, "loss": 0.042, "step": 882 }, { "epoch": 0.13028402803393582, "grad_norm": 4.539821147918701, "learning_rate": 1.9944299817078873e-05, "loss": 0.1481, "step": 883 }, { "epoch": 0.13043157506455183, "grad_norm": 2.5136327743530273, "learning_rate": 1.9943755586471525e-05, "loss": 0.0776, "step": 884 }, { "epoch": 0.13057912209516784, "grad_norm": 7.7520527839660645, "learning_rate": 1.9943208717511128e-05, "loss": 0.1189, "step": 885 }, { "epoch": 0.13072666912578385, "grad_norm": 2.056431770324707, "learning_rate": 1.9942659210342783e-05, "loss": 0.0301, "step": 886 }, { "epoch": 0.13087421615639985, "grad_norm": 2.568932056427002, "learning_rate": 1.9942107065112286e-05, "loss": 0.0347, "step": 887 }, { "epoch": 0.13102176318701586, "grad_norm": 2.0954034328460693, "learning_rate": 1.9941552281966136e-05, "loss": 0.0621, "step": 888 }, { "epoch": 0.13116931021763187, "grad_norm": 5.280118942260742, "learning_rate": 1.994099486105153e-05, "loss": 0.167, "step": 889 }, { "epoch": 0.13131685724824788, "grad_norm": 1.46014404296875, "learning_rate": 1.9940434802516375e-05, "loss": 0.0614, "step": 890 }, { "epoch": 0.13146440427886388, "grad_norm": 2.457958221435547, "learning_rate": 1.9939872106509262e-05, "loss": 0.0862, "step": 891 }, { "epoch": 0.1316119513094799, "grad_norm": 3.1738052368164062, "learning_rate": 1.9939306773179498e-05, "loss": 0.0386, "step": 892 }, { "epoch": 0.1317594983400959, "grad_norm": 1.4225889444351196, "learning_rate": 1.993873880267707e-05, "loss": 0.0478, "step": 893 }, { "epoch": 0.1319070453707119, "grad_norm": 4.195611953735352, "learning_rate": 1.993816819515269e-05, "loss": 0.1391, "step": 894 }, { "epoch": 0.1320545924013279, "grad_norm": 3.1356394290924072, "learning_rate": 1.9937594950757746e-05, "loss": 0.0822, "step": 895 }, { "epoch": 0.13220213943194392, "grad_norm": 3.420642852783203, "learning_rate": 1.9937019069644337e-05, "loss": 0.101, "step": 896 }, { "epoch": 0.13234968646255993, "grad_norm": 3.3701255321502686, "learning_rate": 1.9936440551965263e-05, "loss": 0.065, "step": 897 }, { "epoch": 0.13249723349317594, "grad_norm": 5.420750617980957, "learning_rate": 1.9935859397874023e-05, "loss": 0.0837, "step": 898 }, { "epoch": 0.13264478052379197, "grad_norm": 3.1765167713165283, "learning_rate": 1.9935275607524807e-05, "loss": 0.1244, "step": 899 }, { "epoch": 0.13279232755440798, "grad_norm": 2.7102837562561035, "learning_rate": 1.9934689181072514e-05, "loss": 0.1177, "step": 900 }, { "epoch": 0.13279232755440798, "eval_accuracy": 0.9710564399421129, "eval_f1": 0.9509803921568627, "eval_loss": 0.0814955085515976, "eval_precision": 0.9603960396039604, "eval_recall": 0.941747572815534, "eval_runtime": 49.8535, "eval_samples_per_second": 5.837, "eval_steps_per_second": 0.201, "step": 900 }, { "epoch": 0.132939874585024, "grad_norm": 3.6961820125579834, "learning_rate": 1.9934100118672737e-05, "loss": 0.0685, "step": 901 }, { "epoch": 0.13308742161564, "grad_norm": 5.280569076538086, "learning_rate": 1.993350842048177e-05, "loss": 0.0892, "step": 902 }, { "epoch": 0.133234968646256, "grad_norm": 3.3943850994110107, "learning_rate": 1.9932914086656616e-05, "loss": 0.1113, "step": 903 }, { "epoch": 0.133382515676872, "grad_norm": 4.079817295074463, "learning_rate": 1.9932317117354957e-05, "loss": 0.07, "step": 904 }, { "epoch": 0.13353006270748802, "grad_norm": 3.6208643913269043, "learning_rate": 1.993171751273519e-05, "loss": 0.1253, "step": 905 }, { "epoch": 0.13367760973810403, "grad_norm": 3.062695026397705, "learning_rate": 1.9931115272956405e-05, "loss": 0.0908, "step": 906 }, { "epoch": 0.13382515676872003, "grad_norm": 4.252796649932861, "learning_rate": 1.9930510398178397e-05, "loss": 0.1128, "step": 907 }, { "epoch": 0.13397270379933604, "grad_norm": 2.8278918266296387, "learning_rate": 1.992990288856165e-05, "loss": 0.1082, "step": 908 }, { "epoch": 0.13412025082995205, "grad_norm": 2.5324268341064453, "learning_rate": 1.992929274426736e-05, "loss": 0.0952, "step": 909 }, { "epoch": 0.13426779786056806, "grad_norm": 3.5670738220214844, "learning_rate": 1.9928679965457407e-05, "loss": 0.0984, "step": 910 }, { "epoch": 0.13441534489118406, "grad_norm": 2.1843373775482178, "learning_rate": 1.9928064552294384e-05, "loss": 0.0965, "step": 911 }, { "epoch": 0.13456289192180007, "grad_norm": 3.1432881355285645, "learning_rate": 1.9927446504941576e-05, "loss": 0.095, "step": 912 }, { "epoch": 0.13471043895241608, "grad_norm": 2.3614120483398438, "learning_rate": 1.992682582356297e-05, "loss": 0.0564, "step": 913 }, { "epoch": 0.13485798598303209, "grad_norm": 1.8553917407989502, "learning_rate": 1.9926202508323243e-05, "loss": 0.0883, "step": 914 }, { "epoch": 0.1350055330136481, "grad_norm": 2.375873327255249, "learning_rate": 1.9925576559387784e-05, "loss": 0.088, "step": 915 }, { "epoch": 0.1351530800442641, "grad_norm": 3.9025936126708984, "learning_rate": 1.9924947976922676e-05, "loss": 0.1033, "step": 916 }, { "epoch": 0.1353006270748801, "grad_norm": 2.559783458709717, "learning_rate": 1.9924316761094694e-05, "loss": 0.0622, "step": 917 }, { "epoch": 0.13544817410549612, "grad_norm": 4.394411563873291, "learning_rate": 1.992368291207132e-05, "loss": 0.1635, "step": 918 }, { "epoch": 0.13559572113611212, "grad_norm": 2.0193920135498047, "learning_rate": 1.9923046430020726e-05, "loss": 0.0377, "step": 919 }, { "epoch": 0.13574326816672813, "grad_norm": 2.8139891624450684, "learning_rate": 1.9922407315111794e-05, "loss": 0.0709, "step": 920 }, { "epoch": 0.13574326816672813, "eval_accuracy": 0.9696092619392185, "eval_f1": 0.9473684210526315, "eval_loss": 0.07725755870342255, "eval_precision": 0.9792746113989638, "eval_recall": 0.9174757281553398, "eval_runtime": 49.3364, "eval_samples_per_second": 5.898, "eval_steps_per_second": 0.203, "step": 920 }, { "epoch": 0.13589081519734417, "grad_norm": 1.9640271663665771, "learning_rate": 1.9921765567514105e-05, "loss": 0.0593, "step": 921 }, { "epoch": 0.13603836222796017, "grad_norm": 6.5333943367004395, "learning_rate": 1.992112118739792e-05, "loss": 0.1231, "step": 922 }, { "epoch": 0.13618590925857618, "grad_norm": 3.07806134223938, "learning_rate": 1.9920474174934217e-05, "loss": 0.0607, "step": 923 }, { "epoch": 0.1363334562891922, "grad_norm": 3.3435006141662598, "learning_rate": 1.9919824530294668e-05, "loss": 0.055, "step": 924 }, { "epoch": 0.1364810033198082, "grad_norm": 4.966794967651367, "learning_rate": 1.9919172253651637e-05, "loss": 0.023, "step": 925 }, { "epoch": 0.1366285503504242, "grad_norm": 5.490158557891846, "learning_rate": 1.991851734517819e-05, "loss": 0.1584, "step": 926 }, { "epoch": 0.1367760973810402, "grad_norm": 3.0673775672912598, "learning_rate": 1.9917859805048098e-05, "loss": 0.1055, "step": 927 }, { "epoch": 0.13692364441165622, "grad_norm": 1.0951420068740845, "learning_rate": 1.9917199633435822e-05, "loss": 0.0119, "step": 928 }, { "epoch": 0.13707119144227223, "grad_norm": 2.024261951446533, "learning_rate": 1.9916536830516523e-05, "loss": 0.0225, "step": 929 }, { "epoch": 0.13721873847288824, "grad_norm": 4.463064193725586, "learning_rate": 1.991587139646606e-05, "loss": 0.0473, "step": 930 }, { "epoch": 0.13736628550350424, "grad_norm": 4.4939284324646, "learning_rate": 1.9915203331460998e-05, "loss": 0.0629, "step": 931 }, { "epoch": 0.13751383253412025, "grad_norm": 5.388205051422119, "learning_rate": 1.9914532635678584e-05, "loss": 0.1111, "step": 932 }, { "epoch": 0.13766137956473626, "grad_norm": 6.879351615905762, "learning_rate": 1.9913859309296778e-05, "loss": 0.1813, "step": 933 }, { "epoch": 0.13780892659535227, "grad_norm": 4.1047797203063965, "learning_rate": 1.9913183352494226e-05, "loss": 0.0855, "step": 934 }, { "epoch": 0.13795647362596827, "grad_norm": 1.6623461246490479, "learning_rate": 1.991250476545029e-05, "loss": 0.0466, "step": 935 }, { "epoch": 0.13810402065658428, "grad_norm": 3.6807048320770264, "learning_rate": 1.9911823548345e-05, "loss": 0.107, "step": 936 }, { "epoch": 0.1382515676872003, "grad_norm": 3.127305269241333, "learning_rate": 1.9911139701359116e-05, "loss": 0.1208, "step": 937 }, { "epoch": 0.1383991147178163, "grad_norm": 2.098724842071533, "learning_rate": 1.9910453224674076e-05, "loss": 0.054, "step": 938 }, { "epoch": 0.1385466617484323, "grad_norm": 1.4199751615524292, "learning_rate": 1.9909764118472027e-05, "loss": 0.0695, "step": 939 }, { "epoch": 0.1386942087790483, "grad_norm": 3.0499134063720703, "learning_rate": 1.9909072382935798e-05, "loss": 0.0801, "step": 940 }, { "epoch": 0.1386942087790483, "eval_accuracy": 0.9739507959479016, "eval_f1": 0.955, "eval_loss": 0.07257014513015747, "eval_precision": 0.9845360824742269, "eval_recall": 0.9271844660194175, "eval_runtime": 48.9541, "eval_samples_per_second": 5.944, "eval_steps_per_second": 0.204, "step": 940 }, { "epoch": 0.13884175580966432, "grad_norm": 1.8707119226455688, "learning_rate": 1.9908378018248932e-05, "loss": 0.0861, "step": 941 }, { "epoch": 0.13898930284028033, "grad_norm": 2.2044320106506348, "learning_rate": 1.9907681024595666e-05, "loss": 0.0936, "step": 942 }, { "epoch": 0.13913684987089636, "grad_norm": 3.051396608352661, "learning_rate": 1.9906981402160922e-05, "loss": 0.0904, "step": 943 }, { "epoch": 0.13928439690151237, "grad_norm": 4.13213586807251, "learning_rate": 1.9906279151130338e-05, "loss": 0.039, "step": 944 }, { "epoch": 0.13943194393212838, "grad_norm": 2.7796614170074463, "learning_rate": 1.9905574271690236e-05, "loss": 0.1052, "step": 945 }, { "epoch": 0.13957949096274438, "grad_norm": 1.8954288959503174, "learning_rate": 1.9904866764027642e-05, "loss": 0.0697, "step": 946 }, { "epoch": 0.1397270379933604, "grad_norm": 3.3926310539245605, "learning_rate": 1.9904156628330278e-05, "loss": 0.0649, "step": 947 }, { "epoch": 0.1398745850239764, "grad_norm": 3.9519522190093994, "learning_rate": 1.990344386478656e-05, "loss": 0.0652, "step": 948 }, { "epoch": 0.1400221320545924, "grad_norm": 3.145176649093628, "learning_rate": 1.9902728473585602e-05, "loss": 0.0725, "step": 949 }, { "epoch": 0.14016967908520842, "grad_norm": 2.8811659812927246, "learning_rate": 1.9902010454917226e-05, "loss": 0.0385, "step": 950 }, { "epoch": 0.14031722611582442, "grad_norm": 2.492112636566162, "learning_rate": 1.990128980897193e-05, "loss": 0.0502, "step": 951 }, { "epoch": 0.14046477314644043, "grad_norm": 2.0640201568603516, "learning_rate": 1.990056653594093e-05, "loss": 0.0808, "step": 952 }, { "epoch": 0.14061232017705644, "grad_norm": 4.045786380767822, "learning_rate": 1.9899840636016133e-05, "loss": 0.1058, "step": 953 }, { "epoch": 0.14075986720767245, "grad_norm": 3.1658120155334473, "learning_rate": 1.9899112109390128e-05, "loss": 0.0849, "step": 954 }, { "epoch": 0.14090741423828845, "grad_norm": 3.7166903018951416, "learning_rate": 1.9898380956256224e-05, "loss": 0.0797, "step": 955 }, { "epoch": 0.14105496126890446, "grad_norm": 2.8205759525299072, "learning_rate": 1.9897647176808413e-05, "loss": 0.0659, "step": 956 }, { "epoch": 0.14120250829952047, "grad_norm": 1.9550689458847046, "learning_rate": 1.9896910771241388e-05, "loss": 0.0148, "step": 957 }, { "epoch": 0.14135005533013648, "grad_norm": 4.906272888183594, "learning_rate": 1.989617173975053e-05, "loss": 0.1381, "step": 958 }, { "epoch": 0.14149760236075248, "grad_norm": 2.077265501022339, "learning_rate": 1.9895430082531937e-05, "loss": 0.045, "step": 959 }, { "epoch": 0.1416451493913685, "grad_norm": 2.105579376220703, "learning_rate": 1.9894685799782387e-05, "loss": 0.0609, "step": 960 }, { "epoch": 0.1416451493913685, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9578163771712159, "eval_loss": 0.07093372195959091, "eval_precision": 0.9796954314720813, "eval_recall": 0.9368932038834952, "eval_runtime": 49.1594, "eval_samples_per_second": 5.92, "eval_steps_per_second": 0.203, "step": 960 }, { "epoch": 0.1417926964219845, "grad_norm": 3.1120989322662354, "learning_rate": 1.989393889169935e-05, "loss": 0.0867, "step": 961 }, { "epoch": 0.1419402434526005, "grad_norm": 2.378723621368408, "learning_rate": 1.989318935848101e-05, "loss": 0.074, "step": 962 }, { "epoch": 0.14208779048321651, "grad_norm": 1.7381685972213745, "learning_rate": 1.989243720032624e-05, "loss": 0.0368, "step": 963 }, { "epoch": 0.14223533751383252, "grad_norm": 4.492753982543945, "learning_rate": 1.9891682417434603e-05, "loss": 0.0693, "step": 964 }, { "epoch": 0.14238288454444853, "grad_norm": 1.787280559539795, "learning_rate": 1.9890925010006364e-05, "loss": 0.0564, "step": 965 }, { "epoch": 0.14253043157506456, "grad_norm": 6.4030632972717285, "learning_rate": 1.989016497824249e-05, "loss": 0.2828, "step": 966 }, { "epoch": 0.14267797860568057, "grad_norm": 3.454893112182617, "learning_rate": 1.9889402322344634e-05, "loss": 0.0451, "step": 967 }, { "epoch": 0.14282552563629658, "grad_norm": 4.159964561462402, "learning_rate": 1.9888637042515148e-05, "loss": 0.0472, "step": 968 }, { "epoch": 0.1429730726669126, "grad_norm": 1.6718432903289795, "learning_rate": 1.9887869138957085e-05, "loss": 0.0499, "step": 969 }, { "epoch": 0.1431206196975286, "grad_norm": 1.4350783824920654, "learning_rate": 1.988709861187419e-05, "loss": 0.0359, "step": 970 }, { "epoch": 0.1432681667281446, "grad_norm": 2.464378833770752, "learning_rate": 1.9886325461470907e-05, "loss": 0.0846, "step": 971 }, { "epoch": 0.1434157137587606, "grad_norm": 4.067535400390625, "learning_rate": 1.9885549687952372e-05, "loss": 0.1218, "step": 972 }, { "epoch": 0.14356326078937662, "grad_norm": 3.0504636764526367, "learning_rate": 1.9884771291524417e-05, "loss": 0.0308, "step": 973 }, { "epoch": 0.14371080781999263, "grad_norm": 6.834758758544922, "learning_rate": 1.988399027239358e-05, "loss": 0.1127, "step": 974 }, { "epoch": 0.14385835485060863, "grad_norm": 1.1059668064117432, "learning_rate": 1.9883206630767075e-05, "loss": 0.0372, "step": 975 }, { "epoch": 0.14400590188122464, "grad_norm": 1.9253910779953003, "learning_rate": 1.9882420366852835e-05, "loss": 0.0717, "step": 976 }, { "epoch": 0.14415344891184065, "grad_norm": 4.201210021972656, "learning_rate": 1.988163148085947e-05, "loss": 0.0773, "step": 977 }, { "epoch": 0.14430099594245666, "grad_norm": 3.2971372604370117, "learning_rate": 1.98808399729963e-05, "loss": 0.0836, "step": 978 }, { "epoch": 0.14444854297307266, "grad_norm": 3.800781488418579, "learning_rate": 1.9880045843473324e-05, "loss": 0.0995, "step": 979 }, { "epoch": 0.14459609000368867, "grad_norm": 3.795149087905884, "learning_rate": 1.9879249092501255e-05, "loss": 0.0865, "step": 980 }, { "epoch": 0.14459609000368867, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9576059850374065, "eval_loss": 0.06955776363611221, "eval_precision": 0.9846153846153847, "eval_recall": 0.9320388349514563, "eval_runtime": 50.2736, "eval_samples_per_second": 5.788, "eval_steps_per_second": 0.199, "step": 980 }, { "epoch": 0.14474363703430468, "grad_norm": 2.377592086791992, "learning_rate": 1.9878449720291492e-05, "loss": 0.0357, "step": 981 }, { "epoch": 0.1448911840649207, "grad_norm": 1.2770681381225586, "learning_rate": 1.987764772705613e-05, "loss": 0.0294, "step": 982 }, { "epoch": 0.1450387310955367, "grad_norm": 2.552394151687622, "learning_rate": 1.9876843113007958e-05, "loss": 0.0536, "step": 983 }, { "epoch": 0.1451862781261527, "grad_norm": 3.871574640274048, "learning_rate": 1.9876035878360464e-05, "loss": 0.1185, "step": 984 }, { "epoch": 0.1453338251567687, "grad_norm": 1.2006237506866455, "learning_rate": 1.987522602332783e-05, "loss": 0.0671, "step": 985 }, { "epoch": 0.14548137218738472, "grad_norm": 1.76056969165802, "learning_rate": 1.987441354812493e-05, "loss": 0.051, "step": 986 }, { "epoch": 0.14562891921800072, "grad_norm": 2.635849714279175, "learning_rate": 1.987359845296734e-05, "loss": 0.096, "step": 987 }, { "epoch": 0.14577646624861676, "grad_norm": 3.905381202697754, "learning_rate": 1.9872780738071323e-05, "loss": 0.0562, "step": 988 }, { "epoch": 0.14592401327923277, "grad_norm": 1.2348036766052246, "learning_rate": 1.9871960403653845e-05, "loss": 0.0141, "step": 989 }, { "epoch": 0.14607156030984877, "grad_norm": 2.393639326095581, "learning_rate": 1.987113744993256e-05, "loss": 0.0501, "step": 990 }, { "epoch": 0.14621910734046478, "grad_norm": 3.7687454223632812, "learning_rate": 1.9870311877125824e-05, "loss": 0.1119, "step": 991 }, { "epoch": 0.1463666543710808, "grad_norm": 0.8920186758041382, "learning_rate": 1.9869483685452685e-05, "loss": 0.0225, "step": 992 }, { "epoch": 0.1465142014016968, "grad_norm": 6.160821437835693, "learning_rate": 1.986865287513288e-05, "loss": 0.1284, "step": 993 }, { "epoch": 0.1466617484323128, "grad_norm": 4.173859119415283, "learning_rate": 1.986781944638685e-05, "loss": 0.0788, "step": 994 }, { "epoch": 0.1468092954629288, "grad_norm": 1.856460690498352, "learning_rate": 1.9866983399435725e-05, "loss": 0.0962, "step": 995 }, { "epoch": 0.14695684249354482, "grad_norm": 10.115558624267578, "learning_rate": 1.986614473450133e-05, "loss": 0.1783, "step": 996 }, { "epoch": 0.14710438952416083, "grad_norm": 3.740000009536743, "learning_rate": 1.986530345180619e-05, "loss": 0.0576, "step": 997 }, { "epoch": 0.14725193655477684, "grad_norm": 5.102725982666016, "learning_rate": 1.9864459551573517e-05, "loss": 0.0715, "step": 998 }, { "epoch": 0.14739948358539284, "grad_norm": 4.572036266326904, "learning_rate": 1.9863613034027224e-05, "loss": 0.091, "step": 999 }, { "epoch": 0.14754703061600885, "grad_norm": 3.223302125930786, "learning_rate": 1.9862763899391914e-05, "loss": 0.0347, "step": 1000 }, { "epoch": 0.14754703061600885, "eval_accuracy": 0.9725036179450073, "eval_f1": 0.9530864197530864, "eval_loss": 0.0721602737903595, "eval_precision": 0.9698492462311558, "eval_recall": 0.9368932038834952, "eval_runtime": 50.2406, "eval_samples_per_second": 5.792, "eval_steps_per_second": 0.199, "step": 1000 }, { "epoch": 0.14769457764662486, "grad_norm": 7.848106861114502, "learning_rate": 1.9861912147892884e-05, "loss": 0.1343, "step": 1001 }, { "epoch": 0.14784212467724087, "grad_norm": 2.193047285079956, "learning_rate": 1.9861057779756133e-05, "loss": 0.0525, "step": 1002 }, { "epoch": 0.14798967170785687, "grad_norm": 5.619309425354004, "learning_rate": 1.986020079520834e-05, "loss": 0.1208, "step": 1003 }, { "epoch": 0.14813721873847288, "grad_norm": 1.1347784996032715, "learning_rate": 1.985934119447689e-05, "loss": 0.0119, "step": 1004 }, { "epoch": 0.1482847657690889, "grad_norm": 2.448190689086914, "learning_rate": 1.9858478977789867e-05, "loss": 0.0211, "step": 1005 }, { "epoch": 0.1484323127997049, "grad_norm": 1.6592494249343872, "learning_rate": 1.9857614145376027e-05, "loss": 0.0316, "step": 1006 }, { "epoch": 0.1485798598303209, "grad_norm": 4.254730224609375, "learning_rate": 1.985674669746484e-05, "loss": 0.073, "step": 1007 }, { "epoch": 0.1487274068609369, "grad_norm": 6.407226085662842, "learning_rate": 1.9855876634286472e-05, "loss": 0.1129, "step": 1008 }, { "epoch": 0.14887495389155292, "grad_norm": 3.275400400161743, "learning_rate": 1.985500395607176e-05, "loss": 0.1148, "step": 1009 }, { "epoch": 0.14902250092216895, "grad_norm": 1.9416885375976562, "learning_rate": 1.985412866305226e-05, "loss": 0.0697, "step": 1010 }, { "epoch": 0.14917004795278496, "grad_norm": 4.446692943572998, "learning_rate": 1.985325075546021e-05, "loss": 0.1356, "step": 1011 }, { "epoch": 0.14931759498340097, "grad_norm": 1.4866023063659668, "learning_rate": 1.985237023352854e-05, "loss": 0.0259, "step": 1012 }, { "epoch": 0.14946514201401698, "grad_norm": 5.761842727661133, "learning_rate": 1.985148709749088e-05, "loss": 0.1019, "step": 1013 }, { "epoch": 0.14961268904463298, "grad_norm": 2.746413230895996, "learning_rate": 1.9850601347581547e-05, "loss": 0.0929, "step": 1014 }, { "epoch": 0.149760236075249, "grad_norm": 5.954599380493164, "learning_rate": 1.9849712984035556e-05, "loss": 0.0682, "step": 1015 }, { "epoch": 0.149907783105865, "grad_norm": 5.145752429962158, "learning_rate": 1.9848822007088614e-05, "loss": 0.0694, "step": 1016 }, { "epoch": 0.150055330136481, "grad_norm": 5.2828545570373535, "learning_rate": 1.9847928416977125e-05, "loss": 0.0971, "step": 1017 }, { "epoch": 0.15020287716709702, "grad_norm": 7.458224296569824, "learning_rate": 1.9847032213938184e-05, "loss": 0.117, "step": 1018 }, { "epoch": 0.15035042419771302, "grad_norm": 2.191798448562622, "learning_rate": 1.9846133398209572e-05, "loss": 0.0622, "step": 1019 }, { "epoch": 0.15049797122832903, "grad_norm": 4.914392948150635, "learning_rate": 1.9845231970029774e-05, "loss": 0.1003, "step": 1020 }, { "epoch": 0.15049797122832903, "eval_accuracy": 0.9652677279305355, "eval_f1": 0.9411764705882353, "eval_loss": 0.081731878221035, "eval_precision": 0.9504950495049505, "eval_recall": 0.9320388349514563, "eval_runtime": 49.5219, "eval_samples_per_second": 5.876, "eval_steps_per_second": 0.202, "step": 1020 }, { "epoch": 0.15064551825894504, "grad_norm": 1.8010629415512085, "learning_rate": 1.984432792963796e-05, "loss": 0.0486, "step": 1021 }, { "epoch": 0.15079306528956105, "grad_norm": 1.649573802947998, "learning_rate": 1.9843421277274004e-05, "loss": 0.0359, "step": 1022 }, { "epoch": 0.15094061232017705, "grad_norm": 6.072673797607422, "learning_rate": 1.9842512013178462e-05, "loss": 0.0983, "step": 1023 }, { "epoch": 0.15108815935079306, "grad_norm": 3.31156063079834, "learning_rate": 1.9841600137592584e-05, "loss": 0.1127, "step": 1024 }, { "epoch": 0.15123570638140907, "grad_norm": 3.5180397033691406, "learning_rate": 1.9840685650758317e-05, "loss": 0.1, "step": 1025 }, { "epoch": 0.15138325341202508, "grad_norm": 3.0031371116638184, "learning_rate": 1.9839768552918305e-05, "loss": 0.0536, "step": 1026 }, { "epoch": 0.15153080044264108, "grad_norm": 2.7900681495666504, "learning_rate": 1.9838848844315874e-05, "loss": 0.1017, "step": 1027 }, { "epoch": 0.1516783474732571, "grad_norm": 2.2977097034454346, "learning_rate": 1.9837926525195047e-05, "loss": 0.089, "step": 1028 }, { "epoch": 0.1518258945038731, "grad_norm": 3.2741096019744873, "learning_rate": 1.9837001595800545e-05, "loss": 0.0342, "step": 1029 }, { "epoch": 0.1519734415344891, "grad_norm": 3.3672845363616943, "learning_rate": 1.9836074056377778e-05, "loss": 0.0874, "step": 1030 }, { "epoch": 0.15212098856510511, "grad_norm": 4.149081707000732, "learning_rate": 1.9835143907172842e-05, "loss": 0.1237, "step": 1031 }, { "epoch": 0.15226853559572115, "grad_norm": 8.915290832519531, "learning_rate": 1.9834211148432537e-05, "loss": 0.1247, "step": 1032 }, { "epoch": 0.15241608262633716, "grad_norm": 4.5622358322143555, "learning_rate": 1.9833275780404343e-05, "loss": 0.0946, "step": 1033 }, { "epoch": 0.15256362965695316, "grad_norm": 2.594510793685913, "learning_rate": 1.9832337803336446e-05, "loss": 0.1028, "step": 1034 }, { "epoch": 0.15271117668756917, "grad_norm": 1.1598225831985474, "learning_rate": 1.9831397217477718e-05, "loss": 0.0168, "step": 1035 }, { "epoch": 0.15285872371818518, "grad_norm": 1.8105746507644653, "learning_rate": 1.9830454023077714e-05, "loss": 0.0734, "step": 1036 }, { "epoch": 0.1530062707488012, "grad_norm": 1.144171953201294, "learning_rate": 1.98295082203867e-05, "loss": 0.0447, "step": 1037 }, { "epoch": 0.1531538177794172, "grad_norm": 5.465419769287109, "learning_rate": 1.9828559809655612e-05, "loss": 0.1366, "step": 1038 }, { "epoch": 0.1533013648100332, "grad_norm": 1.4797170162200928, "learning_rate": 1.98276087911361e-05, "loss": 0.0631, "step": 1039 }, { "epoch": 0.1534489118406492, "grad_norm": 1.1740902662277222, "learning_rate": 1.982665516508049e-05, "loss": 0.0152, "step": 1040 }, { "epoch": 0.1534489118406492, "eval_accuracy": 0.9725036179450073, "eval_f1": 0.9518987341772152, "eval_loss": 0.0774322971701622, "eval_precision": 0.9947089947089947, "eval_recall": 0.912621359223301, "eval_runtime": 49.6893, "eval_samples_per_second": 5.856, "eval_steps_per_second": 0.201, "step": 1040 }, { "epoch": 0.15359645887126522, "grad_norm": 3.8290774822235107, "learning_rate": 1.9825698931741802e-05, "loss": 0.1057, "step": 1041 }, { "epoch": 0.15374400590188123, "grad_norm": 2.3218913078308105, "learning_rate": 1.9824740091373763e-05, "loss": 0.0889, "step": 1042 }, { "epoch": 0.15389155293249723, "grad_norm": 2.505398988723755, "learning_rate": 1.9823778644230767e-05, "loss": 0.0539, "step": 1043 }, { "epoch": 0.15403909996311324, "grad_norm": 1.3350603580474854, "learning_rate": 1.982281459056792e-05, "loss": 0.0346, "step": 1044 }, { "epoch": 0.15418664699372925, "grad_norm": 1.586264729499817, "learning_rate": 1.982184793064101e-05, "loss": 0.0395, "step": 1045 }, { "epoch": 0.15433419402434526, "grad_norm": 2.379105567932129, "learning_rate": 1.982087866470652e-05, "loss": 0.0629, "step": 1046 }, { "epoch": 0.15448174105496126, "grad_norm": 1.8235529661178589, "learning_rate": 1.9819906793021616e-05, "loss": 0.052, "step": 1047 }, { "epoch": 0.15462928808557727, "grad_norm": 1.0692788362503052, "learning_rate": 1.981893231584417e-05, "loss": 0.0234, "step": 1048 }, { "epoch": 0.15477683511619328, "grad_norm": 3.23006534576416, "learning_rate": 1.981795523343274e-05, "loss": 0.0739, "step": 1049 }, { "epoch": 0.1549243821468093, "grad_norm": 2.807236433029175, "learning_rate": 1.9816975546046568e-05, "loss": 0.0626, "step": 1050 }, { "epoch": 0.1550719291774253, "grad_norm": 4.488359451293945, "learning_rate": 1.9815993253945586e-05, "loss": 0.0677, "step": 1051 }, { "epoch": 0.1552194762080413, "grad_norm": 2.8741297721862793, "learning_rate": 1.9815008357390435e-05, "loss": 0.0818, "step": 1052 }, { "epoch": 0.1553670232386573, "grad_norm": 2.770172119140625, "learning_rate": 1.981402085664243e-05, "loss": 0.0478, "step": 1053 }, { "epoch": 0.15551457026927334, "grad_norm": 4.521235466003418, "learning_rate": 1.9813030751963582e-05, "loss": 0.0477, "step": 1054 }, { "epoch": 0.15566211729988935, "grad_norm": 1.932637095451355, "learning_rate": 1.9812038043616596e-05, "loss": 0.0782, "step": 1055 }, { "epoch": 0.15580966433050536, "grad_norm": 3.522562026977539, "learning_rate": 1.981104273186486e-05, "loss": 0.1339, "step": 1056 }, { "epoch": 0.15595721136112137, "grad_norm": 2.1084277629852295, "learning_rate": 1.981004481697246e-05, "loss": 0.0446, "step": 1057 }, { "epoch": 0.15610475839173737, "grad_norm": 2.635068416595459, "learning_rate": 1.9809044299204173e-05, "loss": 0.0499, "step": 1058 }, { "epoch": 0.15625230542235338, "grad_norm": 1.5453013181686401, "learning_rate": 1.980804117882546e-05, "loss": 0.0333, "step": 1059 }, { "epoch": 0.1563998524529694, "grad_norm": 2.7596287727355957, "learning_rate": 1.980703545610248e-05, "loss": 0.0965, "step": 1060 }, { "epoch": 0.1563998524529694, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9573934837092731, "eval_loss": 0.075201116502285, "eval_precision": 0.9896373056994818, "eval_recall": 0.9271844660194175, "eval_runtime": 50.1783, "eval_samples_per_second": 5.799, "eval_steps_per_second": 0.199, "step": 1060 }, { "epoch": 0.1565473994835854, "grad_norm": 3.0337114334106445, "learning_rate": 1.9806027131302074e-05, "loss": 0.0425, "step": 1061 }, { "epoch": 0.1566949465142014, "grad_norm": 5.65107536315918, "learning_rate": 1.980501620469178e-05, "loss": 0.1849, "step": 1062 }, { "epoch": 0.1568424935448174, "grad_norm": 4.041816234588623, "learning_rate": 1.9804002676539832e-05, "loss": 0.0433, "step": 1063 }, { "epoch": 0.15699004057543342, "grad_norm": 1.5392462015151978, "learning_rate": 1.980298654711514e-05, "loss": 0.0307, "step": 1064 }, { "epoch": 0.15713758760604943, "grad_norm": 2.601379871368408, "learning_rate": 1.9801967816687312e-05, "loss": 0.0739, "step": 1065 }, { "epoch": 0.15728513463666544, "grad_norm": 2.614755630493164, "learning_rate": 1.9800946485526646e-05, "loss": 0.0743, "step": 1066 }, { "epoch": 0.15743268166728144, "grad_norm": 3.5770504474639893, "learning_rate": 1.9799922553904128e-05, "loss": 0.104, "step": 1067 }, { "epoch": 0.15758022869789745, "grad_norm": 2.813310384750366, "learning_rate": 1.9798896022091436e-05, "loss": 0.1036, "step": 1068 }, { "epoch": 0.15772777572851346, "grad_norm": 3.0933241844177246, "learning_rate": 1.9797866890360938e-05, "loss": 0.0626, "step": 1069 }, { "epoch": 0.15787532275912947, "grad_norm": 3.362506866455078, "learning_rate": 1.9796835158985695e-05, "loss": 0.1401, "step": 1070 }, { "epoch": 0.15802286978974547, "grad_norm": 3.2485926151275635, "learning_rate": 1.9795800828239446e-05, "loss": 0.0455, "step": 1071 }, { "epoch": 0.15817041682036148, "grad_norm": 3.0987308025360107, "learning_rate": 1.9794763898396633e-05, "loss": 0.0893, "step": 1072 }, { "epoch": 0.1583179638509775, "grad_norm": 1.906617283821106, "learning_rate": 1.979372436973238e-05, "loss": 0.0523, "step": 1073 }, { "epoch": 0.1584655108815935, "grad_norm": 2.1242833137512207, "learning_rate": 1.9792682242522506e-05, "loss": 0.0289, "step": 1074 }, { "epoch": 0.1586130579122095, "grad_norm": 3.080934762954712, "learning_rate": 1.9791637517043507e-05, "loss": 0.13, "step": 1075 }, { "epoch": 0.15876060494282554, "grad_norm": 3.077103853225708, "learning_rate": 1.979059019357259e-05, "loss": 0.0731, "step": 1076 }, { "epoch": 0.15890815197344155, "grad_norm": 3.670008897781372, "learning_rate": 1.978954027238763e-05, "loss": 0.0525, "step": 1077 }, { "epoch": 0.15905569900405755, "grad_norm": 2.0089237689971924, "learning_rate": 1.97884877537672e-05, "loss": 0.0693, "step": 1078 }, { "epoch": 0.15920324603467356, "grad_norm": 2.8120405673980713, "learning_rate": 1.978743263799057e-05, "loss": 0.0561, "step": 1079 }, { "epoch": 0.15935079306528957, "grad_norm": 4.657172203063965, "learning_rate": 1.9786374925337684e-05, "loss": 0.1153, "step": 1080 }, { "epoch": 0.15935079306528957, "eval_accuracy": 0.9739507959479016, "eval_f1": 0.9545454545454546, "eval_loss": 0.07713089138269424, "eval_precision": 0.9947368421052631, "eval_recall": 0.9174757281553398, "eval_runtime": 49.8612, "eval_samples_per_second": 5.836, "eval_steps_per_second": 0.201, "step": 1080 }, { "epoch": 0.15949834009590558, "grad_norm": 1.3010139465332031, "learning_rate": 1.9785314616089187e-05, "loss": 0.0638, "step": 1081 }, { "epoch": 0.15964588712652159, "grad_norm": 3.8000481128692627, "learning_rate": 1.9784251710526404e-05, "loss": 0.1156, "step": 1082 }, { "epoch": 0.1597934341571376, "grad_norm": 1.6202903985977173, "learning_rate": 1.9783186208931355e-05, "loss": 0.063, "step": 1083 }, { "epoch": 0.1599409811877536, "grad_norm": 1.6496022939682007, "learning_rate": 1.9782118111586746e-05, "loss": 0.0289, "step": 1084 }, { "epoch": 0.1600885282183696, "grad_norm": 2.3569116592407227, "learning_rate": 1.9781047418775977e-05, "loss": 0.0231, "step": 1085 }, { "epoch": 0.16023607524898562, "grad_norm": 3.200526237487793, "learning_rate": 1.977997413078313e-05, "loss": 0.0806, "step": 1086 }, { "epoch": 0.16038362227960162, "grad_norm": 2.9035394191741943, "learning_rate": 1.9778898247892973e-05, "loss": 0.1459, "step": 1087 }, { "epoch": 0.16053116931021763, "grad_norm": 1.2487620115280151, "learning_rate": 1.9777819770390974e-05, "loss": 0.0553, "step": 1088 }, { "epoch": 0.16067871634083364, "grad_norm": 2.998079538345337, "learning_rate": 1.977673869856328e-05, "loss": 0.1036, "step": 1089 }, { "epoch": 0.16082626337144965, "grad_norm": 3.8028757572174072, "learning_rate": 1.9775655032696733e-05, "loss": 0.1195, "step": 1090 }, { "epoch": 0.16097381040206565, "grad_norm": 4.384522914886475, "learning_rate": 1.9774568773078854e-05, "loss": 0.1117, "step": 1091 }, { "epoch": 0.16112135743268166, "grad_norm": 3.1089909076690674, "learning_rate": 1.977347991999786e-05, "loss": 0.0561, "step": 1092 }, { "epoch": 0.16126890446329767, "grad_norm": 3.275921106338501, "learning_rate": 1.9772388473742655e-05, "loss": 0.0686, "step": 1093 }, { "epoch": 0.16141645149391368, "grad_norm": 3.2788338661193848, "learning_rate": 1.977129443460283e-05, "loss": 0.0449, "step": 1094 }, { "epoch": 0.16156399852452968, "grad_norm": 2.0479440689086914, "learning_rate": 1.977019780286866e-05, "loss": 0.0393, "step": 1095 }, { "epoch": 0.1617115455551457, "grad_norm": 1.5337871313095093, "learning_rate": 1.9769098578831113e-05, "loss": 0.0493, "step": 1096 }, { "epoch": 0.1618590925857617, "grad_norm": 2.300659656524658, "learning_rate": 1.976799676278185e-05, "loss": 0.0479, "step": 1097 }, { "epoch": 0.1620066396163777, "grad_norm": 2.4802398681640625, "learning_rate": 1.9766892355013203e-05, "loss": 0.0938, "step": 1098 }, { "epoch": 0.16215418664699374, "grad_norm": 3.2316417694091797, "learning_rate": 1.976578535581821e-05, "loss": 0.0471, "step": 1099 }, { "epoch": 0.16230173367760975, "grad_norm": 2.0149641036987305, "learning_rate": 1.9764675765490585e-05, "loss": 0.0918, "step": 1100 }, { "epoch": 0.16230173367760975, "eval_accuracy": 0.9710564399421129, "eval_f1": 0.949238578680203, "eval_loss": 0.07976354658603668, "eval_precision": 0.9946808510638298, "eval_recall": 0.9077669902912622, "eval_runtime": 49.9043, "eval_samples_per_second": 5.831, "eval_steps_per_second": 0.2, "step": 1100 }, { "epoch": 0.16244928070822576, "grad_norm": 1.5391192436218262, "learning_rate": 1.976356358432473e-05, "loss": 0.0909, "step": 1101 }, { "epoch": 0.16259682773884176, "grad_norm": 4.712087154388428, "learning_rate": 1.9762448812615746e-05, "loss": 0.0457, "step": 1102 }, { "epoch": 0.16274437476945777, "grad_norm": 1.3102666139602661, "learning_rate": 1.9761331450659402e-05, "loss": 0.0654, "step": 1103 }, { "epoch": 0.16289192180007378, "grad_norm": 1.8530757427215576, "learning_rate": 1.9760211498752174e-05, "loss": 0.0605, "step": 1104 }, { "epoch": 0.1630394688306898, "grad_norm": 3.2777750492095947, "learning_rate": 1.9759088957191214e-05, "loss": 0.0895, "step": 1105 }, { "epoch": 0.1631870158613058, "grad_norm": 4.860166549682617, "learning_rate": 1.9757963826274357e-05, "loss": 0.1321, "step": 1106 }, { "epoch": 0.1633345628919218, "grad_norm": 1.3685470819473267, "learning_rate": 1.975683610630014e-05, "loss": 0.0549, "step": 1107 }, { "epoch": 0.1634821099225378, "grad_norm": 3.2440357208251953, "learning_rate": 1.9755705797567773e-05, "loss": 0.0543, "step": 1108 }, { "epoch": 0.16362965695315382, "grad_norm": 2.0525951385498047, "learning_rate": 1.9754572900377157e-05, "loss": 0.063, "step": 1109 }, { "epoch": 0.16377720398376983, "grad_norm": 3.972214460372925, "learning_rate": 1.975343741502889e-05, "loss": 0.0556, "step": 1110 }, { "epoch": 0.16392475101438583, "grad_norm": 4.460510730743408, "learning_rate": 1.9752299341824237e-05, "loss": 0.1375, "step": 1111 }, { "epoch": 0.16407229804500184, "grad_norm": 1.478887677192688, "learning_rate": 1.9751158681065162e-05, "loss": 0.033, "step": 1112 }, { "epoch": 0.16421984507561785, "grad_norm": 1.9389015436172485, "learning_rate": 1.9750015433054322e-05, "loss": 0.0604, "step": 1113 }, { "epoch": 0.16436739210623386, "grad_norm": 5.091506004333496, "learning_rate": 1.974886959809504e-05, "loss": 0.1242, "step": 1114 }, { "epoch": 0.16451493913684986, "grad_norm": 2.724470853805542, "learning_rate": 1.974772117649135e-05, "loss": 0.0681, "step": 1115 }, { "epoch": 0.16466248616746587, "grad_norm": 2.056694269180298, "learning_rate": 1.9746570168547952e-05, "loss": 0.0756, "step": 1116 }, { "epoch": 0.16481003319808188, "grad_norm": 3.2380738258361816, "learning_rate": 1.9745416574570244e-05, "loss": 0.0886, "step": 1117 }, { "epoch": 0.1649575802286979, "grad_norm": 3.8210017681121826, "learning_rate": 1.97442603948643e-05, "loss": 0.0456, "step": 1118 }, { "epoch": 0.1651051272593139, "grad_norm": 5.30413818359375, "learning_rate": 1.9743101629736897e-05, "loss": 0.1309, "step": 1119 }, { "epoch": 0.1652526742899299, "grad_norm": 1.543357491493225, "learning_rate": 1.9741940279495476e-05, "loss": 0.0745, "step": 1120 }, { "epoch": 0.1652526742899299, "eval_accuracy": 0.9739507959479016, "eval_f1": 0.9552238805970149, "eval_loss": 0.07394512742757797, "eval_precision": 0.9795918367346939, "eval_recall": 0.9320388349514563, "eval_runtime": 49.6554, "eval_samples_per_second": 5.86, "eval_steps_per_second": 0.201, "step": 1120 }, { "epoch": 0.16540022132054594, "grad_norm": 1.7845977544784546, "learning_rate": 1.9740776344448184e-05, "loss": 0.0875, "step": 1121 }, { "epoch": 0.16554776835116194, "grad_norm": 0.8256816267967224, "learning_rate": 1.9739609824903843e-05, "loss": 0.0091, "step": 1122 }, { "epoch": 0.16569531538177795, "grad_norm": 3.202981472015381, "learning_rate": 1.973844072117196e-05, "loss": 0.0466, "step": 1123 }, { "epoch": 0.16584286241239396, "grad_norm": 3.335444688796997, "learning_rate": 1.9737269033562732e-05, "loss": 0.127, "step": 1124 }, { "epoch": 0.16599040944300997, "grad_norm": 1.9391448497772217, "learning_rate": 1.973609476238704e-05, "loss": 0.0602, "step": 1125 }, { "epoch": 0.16613795647362598, "grad_norm": 4.718049049377441, "learning_rate": 1.9734917907956452e-05, "loss": 0.0597, "step": 1126 }, { "epoch": 0.16628550350424198, "grad_norm": 5.464658260345459, "learning_rate": 1.973373847058322e-05, "loss": 0.1208, "step": 1127 }, { "epoch": 0.166433050534858, "grad_norm": 13.993646621704102, "learning_rate": 1.973255645058028e-05, "loss": 0.0919, "step": 1128 }, { "epoch": 0.166580597565474, "grad_norm": 3.1344707012176514, "learning_rate": 1.9731371848261253e-05, "loss": 0.0596, "step": 1129 }, { "epoch": 0.16672814459609, "grad_norm": 2.166461706161499, "learning_rate": 1.973018466394045e-05, "loss": 0.0573, "step": 1130 }, { "epoch": 0.166875691626706, "grad_norm": 17.70781898498535, "learning_rate": 1.9728994897932862e-05, "loss": 0.1011, "step": 1131 }, { "epoch": 0.16702323865732202, "grad_norm": 2.5160629749298096, "learning_rate": 1.9727802550554163e-05, "loss": 0.061, "step": 1132 }, { "epoch": 0.16717078568793803, "grad_norm": 4.427779197692871, "learning_rate": 1.9726607622120727e-05, "loss": 0.0715, "step": 1133 }, { "epoch": 0.16731833271855404, "grad_norm": 1.6453346014022827, "learning_rate": 1.972541011294959e-05, "loss": 0.0515, "step": 1134 }, { "epoch": 0.16746587974917004, "grad_norm": 2.4062912464141846, "learning_rate": 1.972421002335849e-05, "loss": 0.0833, "step": 1135 }, { "epoch": 0.16761342677978605, "grad_norm": 2.4710896015167236, "learning_rate": 1.972300735366584e-05, "loss": 0.049, "step": 1136 }, { "epoch": 0.16776097381040206, "grad_norm": 5.580881118774414, "learning_rate": 1.9721802104190748e-05, "loss": 0.092, "step": 1137 }, { "epoch": 0.16790852084101807, "grad_norm": 2.5302517414093018, "learning_rate": 1.9720594275252995e-05, "loss": 0.076, "step": 1138 }, { "epoch": 0.16805606787163407, "grad_norm": 1.7984987497329712, "learning_rate": 1.9719383867173053e-05, "loss": 0.098, "step": 1139 }, { "epoch": 0.16820361490225008, "grad_norm": 1.3634850978851318, "learning_rate": 1.9718170880272074e-05, "loss": 0.0426, "step": 1140 }, { "epoch": 0.16820361490225008, "eval_accuracy": 0.9725036179450073, "eval_f1": 0.9521410579345088, "eval_loss": 0.07820115983486176, "eval_precision": 0.9895287958115183, "eval_recall": 0.9174757281553398, "eval_runtime": 49.5968, "eval_samples_per_second": 5.867, "eval_steps_per_second": 0.202, "step": 1140 }, { "epoch": 0.1683511619328661, "grad_norm": 1.8019294738769531, "learning_rate": 1.97169553148719e-05, "loss": 0.0434, "step": 1141 }, { "epoch": 0.1684987089634821, "grad_norm": 2.5386962890625, "learning_rate": 1.9715737171295052e-05, "loss": 0.0883, "step": 1142 }, { "epoch": 0.16864625599409813, "grad_norm": 2.2441985607147217, "learning_rate": 1.971451644986474e-05, "loss": 0.0464, "step": 1143 }, { "epoch": 0.16879380302471414, "grad_norm": 1.6799899339675903, "learning_rate": 1.9713293150904853e-05, "loss": 0.0796, "step": 1144 }, { "epoch": 0.16894135005533015, "grad_norm": 0.9977738261222839, "learning_rate": 1.9712067274739968e-05, "loss": 0.0281, "step": 1145 }, { "epoch": 0.16908889708594615, "grad_norm": 5.005007743835449, "learning_rate": 1.9710838821695337e-05, "loss": 0.1063, "step": 1146 }, { "epoch": 0.16923644411656216, "grad_norm": 6.595736503601074, "learning_rate": 1.9709607792096915e-05, "loss": 0.1034, "step": 1147 }, { "epoch": 0.16938399114717817, "grad_norm": 2.2267541885375977, "learning_rate": 1.9708374186271315e-05, "loss": 0.0359, "step": 1148 }, { "epoch": 0.16953153817779418, "grad_norm": 3.520979881286621, "learning_rate": 1.9707138004545853e-05, "loss": 0.1188, "step": 1149 }, { "epoch": 0.16967908520841019, "grad_norm": 2.3073904514312744, "learning_rate": 1.9705899247248525e-05, "loss": 0.0568, "step": 1150 }, { "epoch": 0.1698266322390262, "grad_norm": 2.7262933254241943, "learning_rate": 1.9704657914708004e-05, "loss": 0.066, "step": 1151 }, { "epoch": 0.1699741792696422, "grad_norm": 3.5414979457855225, "learning_rate": 1.9703414007253648e-05, "loss": 0.1063, "step": 1152 }, { "epoch": 0.1701217263002582, "grad_norm": 3.266883373260498, "learning_rate": 1.9702167525215504e-05, "loss": 0.1094, "step": 1153 }, { "epoch": 0.17026927333087422, "grad_norm": 2.5065932273864746, "learning_rate": 1.9700918468924293e-05, "loss": 0.0723, "step": 1154 }, { "epoch": 0.17041682036149022, "grad_norm": 2.3549139499664307, "learning_rate": 1.969966683871143e-05, "loss": 0.04, "step": 1155 }, { "epoch": 0.17056436739210623, "grad_norm": 1.7745110988616943, "learning_rate": 1.9698412634909006e-05, "loss": 0.0204, "step": 1156 }, { "epoch": 0.17071191442272224, "grad_norm": 3.4690945148468018, "learning_rate": 1.969715585784979e-05, "loss": 0.1341, "step": 1157 }, { "epoch": 0.17085946145333825, "grad_norm": 1.155771255493164, "learning_rate": 1.9695896507867248e-05, "loss": 0.0302, "step": 1158 }, { "epoch": 0.17100700848395425, "grad_norm": 4.164511203765869, "learning_rate": 1.9694634585295514e-05, "loss": 0.0716, "step": 1159 }, { "epoch": 0.17115455551457026, "grad_norm": 2.091667890548706, "learning_rate": 1.9693370090469412e-05, "loss": 0.0373, "step": 1160 }, { "epoch": 0.17115455551457026, "eval_accuracy": 0.9739507959479016, "eval_f1": 0.9552238805970149, "eval_loss": 0.07951486110687256, "eval_precision": 0.9795918367346939, "eval_recall": 0.9320388349514563, "eval_runtime": 49.21, "eval_samples_per_second": 5.913, "eval_steps_per_second": 0.203, "step": 1160 }, { "epoch": 0.17130210254518627, "grad_norm": 2.886604070663452, "learning_rate": 1.969210302372445e-05, "loss": 0.1189, "step": 1161 }, { "epoch": 0.17144964957580228, "grad_norm": 5.074235916137695, "learning_rate": 1.9690833385396818e-05, "loss": 0.1441, "step": 1162 }, { "epoch": 0.17159719660641828, "grad_norm": 2.6262738704681396, "learning_rate": 1.9689561175823378e-05, "loss": 0.0443, "step": 1163 }, { "epoch": 0.1717447436370343, "grad_norm": 5.723478317260742, "learning_rate": 1.968828639534169e-05, "loss": 0.0314, "step": 1164 }, { "epoch": 0.17189229066765033, "grad_norm": 4.439426898956299, "learning_rate": 1.968700904428998e-05, "loss": 0.0865, "step": 1165 }, { "epoch": 0.17203983769826633, "grad_norm": 5.734502792358398, "learning_rate": 1.9685729123007178e-05, "loss": 0.0883, "step": 1166 }, { "epoch": 0.17218738472888234, "grad_norm": 0.765541672706604, "learning_rate": 1.968444663183287e-05, "loss": 0.0089, "step": 1167 }, { "epoch": 0.17233493175949835, "grad_norm": 2.7013490200042725, "learning_rate": 1.968316157110734e-05, "loss": 0.1004, "step": 1168 }, { "epoch": 0.17248247879011436, "grad_norm": 2.2848806381225586, "learning_rate": 1.9681873941171554e-05, "loss": 0.0538, "step": 1169 }, { "epoch": 0.17263002582073037, "grad_norm": 2.424731969833374, "learning_rate": 1.9680583742367152e-05, "loss": 0.0791, "step": 1170 }, { "epoch": 0.17277757285134637, "grad_norm": 9.543401718139648, "learning_rate": 1.9679290975036457e-05, "loss": 0.1065, "step": 1171 }, { "epoch": 0.17292511988196238, "grad_norm": 1.9008293151855469, "learning_rate": 1.9677995639522482e-05, "loss": 0.0281, "step": 1172 }, { "epoch": 0.1730726669125784, "grad_norm": 2.1277124881744385, "learning_rate": 1.967669773616891e-05, "loss": 0.094, "step": 1173 }, { "epoch": 0.1732202139431944, "grad_norm": 4.10353946685791, "learning_rate": 1.9675397265320114e-05, "loss": 0.0867, "step": 1174 }, { "epoch": 0.1733677609738104, "grad_norm": 2.1625311374664307, "learning_rate": 1.9674094227321145e-05, "loss": 0.0349, "step": 1175 }, { "epoch": 0.1735153080044264, "grad_norm": 4.3703436851501465, "learning_rate": 1.967278862251773e-05, "loss": 0.1463, "step": 1176 }, { "epoch": 0.17366285503504242, "grad_norm": 2.625303268432617, "learning_rate": 1.967148045125629e-05, "loss": 0.0614, "step": 1177 }, { "epoch": 0.17381040206565843, "grad_norm": 1.3506369590759277, "learning_rate": 1.9670169713883913e-05, "loss": 0.047, "step": 1178 }, { "epoch": 0.17395794909627443, "grad_norm": 2.7205722332000732, "learning_rate": 1.9668856410748377e-05, "loss": 0.0618, "step": 1179 }, { "epoch": 0.17410549612689044, "grad_norm": 2.453324794769287, "learning_rate": 1.9667540542198133e-05, "loss": 0.0635, "step": 1180 }, { "epoch": 0.17410549612689044, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9576059850374065, "eval_loss": 0.0754147320985794, "eval_precision": 0.9846153846153847, "eval_recall": 0.9320388349514563, "eval_runtime": 48.9901, "eval_samples_per_second": 5.94, "eval_steps_per_second": 0.204, "step": 1180 }, { "epoch": 0.17425304315750645, "grad_norm": 5.400239944458008, "learning_rate": 1.966622210858232e-05, "loss": 0.0864, "step": 1181 }, { "epoch": 0.17440059018812246, "grad_norm": 2.175227642059326, "learning_rate": 1.966490111025076e-05, "loss": 0.0911, "step": 1182 }, { "epoch": 0.17454813721873846, "grad_norm": 3.4278488159179688, "learning_rate": 1.9663577547553942e-05, "loss": 0.0959, "step": 1183 }, { "epoch": 0.17469568424935447, "grad_norm": 2.260950803756714, "learning_rate": 1.9662251420843047e-05, "loss": 0.0945, "step": 1184 }, { "epoch": 0.17484323127997048, "grad_norm": 2.850489854812622, "learning_rate": 1.9660922730469935e-05, "loss": 0.1095, "step": 1185 }, { "epoch": 0.1749907783105865, "grad_norm": 3.7380781173706055, "learning_rate": 1.965959147678714e-05, "loss": 0.1314, "step": 1186 }, { "epoch": 0.17513832534120252, "grad_norm": 1.1512999534606934, "learning_rate": 1.9658257660147887e-05, "loss": 0.0635, "step": 1187 }, { "epoch": 0.17528587237181853, "grad_norm": 1.3392937183380127, "learning_rate": 1.965692128090607e-05, "loss": 0.0435, "step": 1188 }, { "epoch": 0.17543341940243454, "grad_norm": 1.7340505123138428, "learning_rate": 1.9655582339416262e-05, "loss": 0.0463, "step": 1189 }, { "epoch": 0.17558096643305054, "grad_norm": 2.113524913787842, "learning_rate": 1.9654240836033735e-05, "loss": 0.1057, "step": 1190 }, { "epoch": 0.17572851346366655, "grad_norm": 6.343625545501709, "learning_rate": 1.9652896771114416e-05, "loss": 0.1339, "step": 1191 }, { "epoch": 0.17587606049428256, "grad_norm": 2.841322898864746, "learning_rate": 1.9651550145014924e-05, "loss": 0.1088, "step": 1192 }, { "epoch": 0.17602360752489857, "grad_norm": 2.3897523880004883, "learning_rate": 1.9650200958092557e-05, "loss": 0.0771, "step": 1193 }, { "epoch": 0.17617115455551458, "grad_norm": 2.443484306335449, "learning_rate": 1.9648849210705295e-05, "loss": 0.0964, "step": 1194 }, { "epoch": 0.17631870158613058, "grad_norm": 2.3321216106414795, "learning_rate": 1.964749490321179e-05, "loss": 0.0716, "step": 1195 }, { "epoch": 0.1764662486167466, "grad_norm": 2.347444534301758, "learning_rate": 1.9646138035971374e-05, "loss": 0.136, "step": 1196 }, { "epoch": 0.1766137956473626, "grad_norm": 2.7857375144958496, "learning_rate": 1.964477860934407e-05, "loss": 0.0939, "step": 1197 }, { "epoch": 0.1767613426779786, "grad_norm": 1.407057762145996, "learning_rate": 1.9643416623690562e-05, "loss": 0.0485, "step": 1198 }, { "epoch": 0.1769088897085946, "grad_norm": 3.6051270961761475, "learning_rate": 1.9642052079372234e-05, "loss": 0.0683, "step": 1199 }, { "epoch": 0.17705643673921062, "grad_norm": 1.4427069425582886, "learning_rate": 1.9640684976751126e-05, "loss": 0.0733, "step": 1200 }, { "epoch": 0.17705643673921062, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.9601990049751243, "eval_loss": 0.07437404990196228, "eval_precision": 0.9846938775510204, "eval_recall": 0.9368932038834952, "eval_runtime": 49.1138, "eval_samples_per_second": 5.925, "eval_steps_per_second": 0.204, "step": 1200 }, { "epoch": 0.17720398376982663, "grad_norm": 1.8686000108718872, "learning_rate": 1.9639315316189974e-05, "loss": 0.0745, "step": 1201 }, { "epoch": 0.17735153080044264, "grad_norm": 2.218090534210205, "learning_rate": 1.9637943098052186e-05, "loss": 0.0676, "step": 1202 }, { "epoch": 0.17749907783105864, "grad_norm": 1.7477823495864868, "learning_rate": 1.963656832270185e-05, "loss": 0.038, "step": 1203 }, { "epoch": 0.17764662486167465, "grad_norm": 3.691580295562744, "learning_rate": 1.963519099050373e-05, "loss": 0.0848, "step": 1204 }, { "epoch": 0.17779417189229066, "grad_norm": 9.843948364257812, "learning_rate": 1.963381110182327e-05, "loss": 0.1181, "step": 1205 }, { "epoch": 0.17794171892290667, "grad_norm": 1.3146629333496094, "learning_rate": 1.9632428657026593e-05, "loss": 0.0317, "step": 1206 }, { "epoch": 0.17808926595352267, "grad_norm": 1.4718788862228394, "learning_rate": 1.9631043656480505e-05, "loss": 0.0612, "step": 1207 }, { "epoch": 0.17823681298413868, "grad_norm": 9.122505187988281, "learning_rate": 1.9629656100552473e-05, "loss": 0.112, "step": 1208 }, { "epoch": 0.1783843600147547, "grad_norm": 2.6077795028686523, "learning_rate": 1.962826598961067e-05, "loss": 0.114, "step": 1209 }, { "epoch": 0.17853190704537072, "grad_norm": 1.836540699005127, "learning_rate": 1.9626873324023915e-05, "loss": 0.0632, "step": 1210 }, { "epoch": 0.17867945407598673, "grad_norm": 7.771944999694824, "learning_rate": 1.9625478104161732e-05, "loss": 0.1108, "step": 1211 }, { "epoch": 0.17882700110660274, "grad_norm": 1.3506052494049072, "learning_rate": 1.9624080330394308e-05, "loss": 0.019, "step": 1212 }, { "epoch": 0.17897454813721875, "grad_norm": 2.4817545413970947, "learning_rate": 1.9622680003092503e-05, "loss": 0.0808, "step": 1213 }, { "epoch": 0.17912209516783476, "grad_norm": 1.7728381156921387, "learning_rate": 1.962127712262788e-05, "loss": 0.0473, "step": 1214 }, { "epoch": 0.17926964219845076, "grad_norm": 4.336607456207275, "learning_rate": 1.9619871689372644e-05, "loss": 0.1478, "step": 1215 }, { "epoch": 0.17941718922906677, "grad_norm": 3.580509662628174, "learning_rate": 1.9618463703699703e-05, "loss": 0.0448, "step": 1216 }, { "epoch": 0.17956473625968278, "grad_norm": 1.3735064268112183, "learning_rate": 1.961705316598264e-05, "loss": 0.027, "step": 1217 }, { "epoch": 0.17971228329029879, "grad_norm": 3.108590602874756, "learning_rate": 1.9615640076595697e-05, "loss": 0.0535, "step": 1218 }, { "epoch": 0.1798598303209148, "grad_norm": 1.1712565422058105, "learning_rate": 1.961422443591382e-05, "loss": 0.0481, "step": 1219 }, { "epoch": 0.1800073773515308, "grad_norm": 3.844938039779663, "learning_rate": 1.9612806244312602e-05, "loss": 0.0914, "step": 1220 }, { "epoch": 0.1800073773515308, "eval_accuracy": 0.9739507959479016, "eval_f1": 0.955, "eval_loss": 0.0765000432729721, "eval_precision": 0.9845360824742269, "eval_recall": 0.9271844660194175, "eval_runtime": 49.7692, "eval_samples_per_second": 5.847, "eval_steps_per_second": 0.201, "step": 1220 }, { "epoch": 0.1801549243821468, "grad_norm": 1.383786916732788, "learning_rate": 1.961138550216834e-05, "loss": 0.0601, "step": 1221 }, { "epoch": 0.18030247141276282, "grad_norm": 1.9068822860717773, "learning_rate": 1.9609962209857996e-05, "loss": 0.0478, "step": 1222 }, { "epoch": 0.18045001844337882, "grad_norm": 2.4948177337646484, "learning_rate": 1.9608536367759204e-05, "loss": 0.0472, "step": 1223 }, { "epoch": 0.18059756547399483, "grad_norm": 2.1393113136291504, "learning_rate": 1.960710797625028e-05, "loss": 0.0976, "step": 1224 }, { "epoch": 0.18074511250461084, "grad_norm": 4.436635971069336, "learning_rate": 1.9605677035710216e-05, "loss": 0.1245, "step": 1225 }, { "epoch": 0.18089265953522685, "grad_norm": 5.252721786499023, "learning_rate": 1.9604243546518684e-05, "loss": 0.0739, "step": 1226 }, { "epoch": 0.18104020656584285, "grad_norm": 5.165715217590332, "learning_rate": 1.9602807509056017e-05, "loss": 0.0934, "step": 1227 }, { "epoch": 0.18118775359645886, "grad_norm": 2.585773229598999, "learning_rate": 1.960136892370325e-05, "loss": 0.0406, "step": 1228 }, { "epoch": 0.18133530062707487, "grad_norm": 1.3011809587478638, "learning_rate": 1.959992779084207e-05, "loss": 0.039, "step": 1229 }, { "epoch": 0.18148284765769088, "grad_norm": 3.3417413234710693, "learning_rate": 1.9598484110854853e-05, "loss": 0.0827, "step": 1230 }, { "epoch": 0.18163039468830688, "grad_norm": 3.0331408977508545, "learning_rate": 1.959703788412465e-05, "loss": 0.0811, "step": 1231 }, { "epoch": 0.18177794171892292, "grad_norm": 1.717118740081787, "learning_rate": 1.9595589111035176e-05, "loss": 0.0526, "step": 1232 }, { "epoch": 0.18192548874953893, "grad_norm": 1.806394338607788, "learning_rate": 1.959413779197084e-05, "loss": 0.0238, "step": 1233 }, { "epoch": 0.18207303578015493, "grad_norm": 1.730916142463684, "learning_rate": 1.959268392731671e-05, "loss": 0.0782, "step": 1234 }, { "epoch": 0.18222058281077094, "grad_norm": 3.566943883895874, "learning_rate": 1.9591227517458537e-05, "loss": 0.0768, "step": 1235 }, { "epoch": 0.18236812984138695, "grad_norm": 2.4168803691864014, "learning_rate": 1.958976856278275e-05, "loss": 0.0833, "step": 1236 }, { "epoch": 0.18251567687200296, "grad_norm": 1.5765150785446167, "learning_rate": 1.9588307063676453e-05, "loss": 0.065, "step": 1237 }, { "epoch": 0.18266322390261897, "grad_norm": 1.9019513130187988, "learning_rate": 1.9586843020527414e-05, "loss": 0.0511, "step": 1238 }, { "epoch": 0.18281077093323497, "grad_norm": 2.257558822631836, "learning_rate": 1.958537643372409e-05, "loss": 0.0817, "step": 1239 }, { "epoch": 0.18295831796385098, "grad_norm": 3.913238286972046, "learning_rate": 1.9583907303655602e-05, "loss": 0.1249, "step": 1240 }, { "epoch": 0.18295831796385098, "eval_accuracy": 0.9725036179450073, "eval_f1": 0.9523809523809523, "eval_loss": 0.07652975618839264, "eval_precision": 0.9844559585492227, "eval_recall": 0.9223300970873787, "eval_runtime": 49.7466, "eval_samples_per_second": 5.85, "eval_steps_per_second": 0.201, "step": 1240 }, { "epoch": 0.183105864994467, "grad_norm": 6.389395713806152, "learning_rate": 1.958243563071176e-05, "loss": 0.1093, "step": 1241 }, { "epoch": 0.183253412025083, "grad_norm": 1.2438921928405762, "learning_rate": 1.958096141528303e-05, "loss": 0.0725, "step": 1242 }, { "epoch": 0.183400959055699, "grad_norm": 2.012863874435425, "learning_rate": 1.9579484657760563e-05, "loss": 0.0969, "step": 1243 }, { "epoch": 0.183548506086315, "grad_norm": 1.1797195672988892, "learning_rate": 1.957800535853619e-05, "loss": 0.0168, "step": 1244 }, { "epoch": 0.18369605311693102, "grad_norm": 1.8514281511306763, "learning_rate": 1.9576523518002404e-05, "loss": 0.1002, "step": 1245 }, { "epoch": 0.18384360014754703, "grad_norm": 1.3554762601852417, "learning_rate": 1.9575039136552382e-05, "loss": 0.0534, "step": 1246 }, { "epoch": 0.18399114717816303, "grad_norm": 1.897148609161377, "learning_rate": 1.9573552214579968e-05, "loss": 0.0797, "step": 1247 }, { "epoch": 0.18413869420877904, "grad_norm": 2.973963975906372, "learning_rate": 1.9572062752479684e-05, "loss": 0.1043, "step": 1248 }, { "epoch": 0.18428624123939505, "grad_norm": 1.4574750661849976, "learning_rate": 1.9570570750646727e-05, "loss": 0.0285, "step": 1249 }, { "epoch": 0.18443378827001106, "grad_norm": 2.356619119644165, "learning_rate": 1.9569076209476967e-05, "loss": 0.0891, "step": 1250 }, { "epoch": 0.18458133530062706, "grad_norm": 0.8433429598808289, "learning_rate": 1.9567579129366944e-05, "loss": 0.0283, "step": 1251 }, { "epoch": 0.18472888233124307, "grad_norm": 1.5216174125671387, "learning_rate": 1.956607951071388e-05, "loss": 0.0461, "step": 1252 }, { "epoch": 0.18487642936185908, "grad_norm": 5.878219127655029, "learning_rate": 1.956457735391566e-05, "loss": 0.0749, "step": 1253 }, { "epoch": 0.18502397639247511, "grad_norm": 2.0145456790924072, "learning_rate": 1.956307265937085e-05, "loss": 0.0675, "step": 1254 }, { "epoch": 0.18517152342309112, "grad_norm": 2.759228467941284, "learning_rate": 1.9561565427478687e-05, "loss": 0.1105, "step": 1255 }, { "epoch": 0.18531907045370713, "grad_norm": 2.1275229454040527, "learning_rate": 1.956005565863908e-05, "loss": 0.0787, "step": 1256 }, { "epoch": 0.18546661748432314, "grad_norm": 3.5280590057373047, "learning_rate": 1.9558543353252614e-05, "loss": 0.0674, "step": 1257 }, { "epoch": 0.18561416451493915, "grad_norm": 2.8413875102996826, "learning_rate": 1.9557028511720545e-05, "loss": 0.1264, "step": 1258 }, { "epoch": 0.18576171154555515, "grad_norm": 3.2262542247772217, "learning_rate": 1.95555111344448e-05, "loss": 0.0555, "step": 1259 }, { "epoch": 0.18590925857617116, "grad_norm": 3.0910959243774414, "learning_rate": 1.9553991221827983e-05, "loss": 0.0829, "step": 1260 }, { "epoch": 0.18590925857617116, "eval_accuracy": 0.9725036179450073, "eval_f1": 0.9526184538653366, "eval_loss": 0.0714610368013382, "eval_precision": 0.9794871794871794, "eval_recall": 0.9271844660194175, "eval_runtime": 49.2946, "eval_samples_per_second": 5.903, "eval_steps_per_second": 0.203, "step": 1260 }, { "epoch": 0.18605680560678717, "grad_norm": 3.3781979084014893, "learning_rate": 1.955246877427337e-05, "loss": 0.0705, "step": 1261 }, { "epoch": 0.18620435263740318, "grad_norm": 3.884610652923584, "learning_rate": 1.9550943792184908e-05, "loss": 0.0811, "step": 1262 }, { "epoch": 0.18635189966801918, "grad_norm": 4.1358137130737305, "learning_rate": 1.9549416275967214e-05, "loss": 0.1435, "step": 1263 }, { "epoch": 0.1864994466986352, "grad_norm": 1.9579921960830688, "learning_rate": 1.9547886226025583e-05, "loss": 0.1082, "step": 1264 }, { "epoch": 0.1866469937292512, "grad_norm": 1.505574345588684, "learning_rate": 1.954635364276598e-05, "loss": 0.0808, "step": 1265 }, { "epoch": 0.1867945407598672, "grad_norm": 3.01751971244812, "learning_rate": 1.954481852659504e-05, "loss": 0.0785, "step": 1266 }, { "epoch": 0.1869420877904832, "grad_norm": 4.708956241607666, "learning_rate": 1.9543280877920073e-05, "loss": 0.1652, "step": 1267 }, { "epoch": 0.18708963482109922, "grad_norm": 1.6969563961029053, "learning_rate": 1.9541740697149057e-05, "loss": 0.0628, "step": 1268 }, { "epoch": 0.18723718185171523, "grad_norm": 1.3834433555603027, "learning_rate": 1.9540197984690646e-05, "loss": 0.0741, "step": 1269 }, { "epoch": 0.18738472888233124, "grad_norm": 1.8319188356399536, "learning_rate": 1.9538652740954168e-05, "loss": 0.036, "step": 1270 }, { "epoch": 0.18753227591294724, "grad_norm": 2.6822447776794434, "learning_rate": 1.9537104966349614e-05, "loss": 0.1084, "step": 1271 }, { "epoch": 0.18767982294356325, "grad_norm": 2.1942567825317383, "learning_rate": 1.9535554661287652e-05, "loss": 0.023, "step": 1272 }, { "epoch": 0.18782736997417926, "grad_norm": 1.8392460346221924, "learning_rate": 1.9534001826179622e-05, "loss": 0.0416, "step": 1273 }, { "epoch": 0.18797491700479527, "grad_norm": 1.673385500907898, "learning_rate": 1.9532446461437534e-05, "loss": 0.0499, "step": 1274 }, { "epoch": 0.18812246403541127, "grad_norm": 2.2422759532928467, "learning_rate": 1.953088856747407e-05, "loss": 0.0736, "step": 1275 }, { "epoch": 0.1882700110660273, "grad_norm": 8.048042297363281, "learning_rate": 1.952932814470258e-05, "loss": 0.1011, "step": 1276 }, { "epoch": 0.18841755809664332, "grad_norm": 5.37031364440918, "learning_rate": 1.9527765193537088e-05, "loss": 0.0734, "step": 1277 }, { "epoch": 0.18856510512725932, "grad_norm": 1.1488605737686157, "learning_rate": 1.952619971439229e-05, "loss": 0.057, "step": 1278 }, { "epoch": 0.18871265215787533, "grad_norm": 6.691167831420898, "learning_rate": 1.952463170768355e-05, "loss": 0.0984, "step": 1279 }, { "epoch": 0.18886019918849134, "grad_norm": 3.032681465148926, "learning_rate": 1.95230611738269e-05, "loss": 0.1013, "step": 1280 }, { "epoch": 0.18886019918849134, "eval_accuracy": 0.9710564399421129, "eval_f1": 0.949238578680203, "eval_loss": 0.09274248033761978, "eval_precision": 0.9946808510638298, "eval_recall": 0.9077669902912622, "eval_runtime": 49.2681, "eval_samples_per_second": 5.906, "eval_steps_per_second": 0.203, "step": 1280 }, { "epoch": 0.18900774621910735, "grad_norm": 2.7321367263793945, "learning_rate": 1.9521488113239052e-05, "loss": 0.0776, "step": 1281 }, { "epoch": 0.18915529324972336, "grad_norm": 4.913203716278076, "learning_rate": 1.9519912526337378e-05, "loss": 0.0867, "step": 1282 }, { "epoch": 0.18930284028033936, "grad_norm": 4.8550519943237305, "learning_rate": 1.951833441353993e-05, "loss": 0.1422, "step": 1283 }, { "epoch": 0.18945038731095537, "grad_norm": 2.368396759033203, "learning_rate": 1.9516753775265415e-05, "loss": 0.0645, "step": 1284 }, { "epoch": 0.18959793434157138, "grad_norm": 2.3448708057403564, "learning_rate": 1.951517061193323e-05, "loss": 0.0523, "step": 1285 }, { "epoch": 0.18974548137218739, "grad_norm": 1.928807258605957, "learning_rate": 1.9513584923963426e-05, "loss": 0.0465, "step": 1286 }, { "epoch": 0.1898930284028034, "grad_norm": 1.2653396129608154, "learning_rate": 1.9511996711776732e-05, "loss": 0.0494, "step": 1287 }, { "epoch": 0.1900405754334194, "grad_norm": 1.3399567604064941, "learning_rate": 1.9510405975794545e-05, "loss": 0.0293, "step": 1288 }, { "epoch": 0.1901881224640354, "grad_norm": 2.8300299644470215, "learning_rate": 1.950881271643893e-05, "loss": 0.0491, "step": 1289 }, { "epoch": 0.19033566949465142, "grad_norm": 6.4869818687438965, "learning_rate": 1.9507216934132626e-05, "loss": 0.1417, "step": 1290 }, { "epoch": 0.19048321652526742, "grad_norm": 3.0066754817962646, "learning_rate": 1.9505618629299033e-05, "loss": 0.0619, "step": 1291 }, { "epoch": 0.19063076355588343, "grad_norm": 1.1571245193481445, "learning_rate": 1.9504017802362227e-05, "loss": 0.0125, "step": 1292 }, { "epoch": 0.19077831058649944, "grad_norm": 5.801309108734131, "learning_rate": 1.9502414453746957e-05, "loss": 0.0578, "step": 1293 }, { "epoch": 0.19092585761711545, "grad_norm": 2.1293575763702393, "learning_rate": 1.9500808583878625e-05, "loss": 0.0848, "step": 1294 }, { "epoch": 0.19107340464773145, "grad_norm": 1.6071228981018066, "learning_rate": 1.9499200193183324e-05, "loss": 0.0645, "step": 1295 }, { "epoch": 0.19122095167834746, "grad_norm": 6.371485233306885, "learning_rate": 1.9497589282087797e-05, "loss": 0.1073, "step": 1296 }, { "epoch": 0.19136849870896347, "grad_norm": 8.235854148864746, "learning_rate": 1.9495975851019466e-05, "loss": 0.1024, "step": 1297 }, { "epoch": 0.1915160457395795, "grad_norm": 0.6926367282867432, "learning_rate": 1.949435990040642e-05, "loss": 0.0074, "step": 1298 }, { "epoch": 0.1916635927701955, "grad_norm": 2.004610776901245, "learning_rate": 1.949274143067741e-05, "loss": 0.0739, "step": 1299 }, { "epoch": 0.19181113980081152, "grad_norm": 2.013143539428711, "learning_rate": 1.9491120442261867e-05, "loss": 0.0697, "step": 1300 }, { "epoch": 0.19181113980081152, "eval_accuracy": 0.9725036179450073, "eval_f1": 0.9521410579345088, "eval_loss": 0.08052647858858109, "eval_precision": 0.9895287958115183, "eval_recall": 0.9174757281553398, "eval_runtime": 49.4228, "eval_samples_per_second": 5.888, "eval_steps_per_second": 0.202, "step": 1300 }, { "epoch": 0.19195868683142753, "grad_norm": 1.5573711395263672, "learning_rate": 1.948949693558988e-05, "loss": 0.0434, "step": 1301 }, { "epoch": 0.19210623386204354, "grad_norm": 3.444673776626587, "learning_rate": 1.9487870911092214e-05, "loss": 0.0818, "step": 1302 }, { "epoch": 0.19225378089265954, "grad_norm": 2.6416754722595215, "learning_rate": 1.948624236920029e-05, "loss": 0.0863, "step": 1303 }, { "epoch": 0.19240132792327555, "grad_norm": 3.2755672931671143, "learning_rate": 1.948461131034622e-05, "loss": 0.0863, "step": 1304 }, { "epoch": 0.19254887495389156, "grad_norm": 3.8528685569763184, "learning_rate": 1.9482977734962753e-05, "loss": 0.0573, "step": 1305 }, { "epoch": 0.19269642198450757, "grad_norm": 2.1156210899353027, "learning_rate": 1.9481341643483332e-05, "loss": 0.0926, "step": 1306 }, { "epoch": 0.19284396901512357, "grad_norm": 1.6438225507736206, "learning_rate": 1.947970303634205e-05, "loss": 0.015, "step": 1307 }, { "epoch": 0.19299151604573958, "grad_norm": 3.650826930999756, "learning_rate": 1.9478061913973686e-05, "loss": 0.0645, "step": 1308 }, { "epoch": 0.1931390630763556, "grad_norm": 5.674660682678223, "learning_rate": 1.9476418276813662e-05, "loss": 0.0759, "step": 1309 }, { "epoch": 0.1932866101069716, "grad_norm": 5.307286262512207, "learning_rate": 1.9474772125298086e-05, "loss": 0.0681, "step": 1310 }, { "epoch": 0.1934341571375876, "grad_norm": 3.7402114868164062, "learning_rate": 1.9473123459863733e-05, "loss": 0.075, "step": 1311 }, { "epoch": 0.1935817041682036, "grad_norm": 3.2019646167755127, "learning_rate": 1.9471472280948026e-05, "loss": 0.1216, "step": 1312 }, { "epoch": 0.19372925119881962, "grad_norm": 2.5090363025665283, "learning_rate": 1.9469818588989083e-05, "loss": 0.0352, "step": 1313 }, { "epoch": 0.19387679822943563, "grad_norm": 2.1919286251068115, "learning_rate": 1.9468162384425663e-05, "loss": 0.0706, "step": 1314 }, { "epoch": 0.19402434526005163, "grad_norm": 3.9691972732543945, "learning_rate": 1.946650366769721e-05, "loss": 0.1012, "step": 1315 }, { "epoch": 0.19417189229066764, "grad_norm": 2.9805376529693604, "learning_rate": 1.9464842439243824e-05, "loss": 0.0546, "step": 1316 }, { "epoch": 0.19431943932128365, "grad_norm": 3.260493516921997, "learning_rate": 1.9463178699506276e-05, "loss": 0.0986, "step": 1317 }, { "epoch": 0.19446698635189966, "grad_norm": 1.3861193656921387, "learning_rate": 1.9461512448926e-05, "loss": 0.036, "step": 1318 }, { "epoch": 0.19461453338251566, "grad_norm": 2.0619916915893555, "learning_rate": 1.9459843687945103e-05, "loss": 0.0546, "step": 1319 }, { "epoch": 0.19476208041313167, "grad_norm": 2.1363534927368164, "learning_rate": 1.9458172417006347e-05, "loss": 0.0905, "step": 1320 }, { "epoch": 0.19476208041313167, "eval_accuracy": 0.9739507959479016, "eval_f1": 0.9552238805970149, "eval_loss": 0.07725830376148224, "eval_precision": 0.9795918367346939, "eval_recall": 0.9320388349514563, "eval_runtime": 49.0982, "eval_samples_per_second": 5.927, "eval_steps_per_second": 0.204, "step": 1320 }, { "epoch": 0.1949096274437477, "grad_norm": 5.401767253875732, "learning_rate": 1.945649863655317e-05, "loss": 0.0846, "step": 1321 }, { "epoch": 0.19505717447436371, "grad_norm": 4.89719820022583, "learning_rate": 1.9454822347029675e-05, "loss": 0.0794, "step": 1322 }, { "epoch": 0.19520472150497972, "grad_norm": 5.015806674957275, "learning_rate": 1.9453143548880626e-05, "loss": 0.1078, "step": 1323 }, { "epoch": 0.19535226853559573, "grad_norm": 1.8391727209091187, "learning_rate": 1.945146224255145e-05, "loss": 0.0636, "step": 1324 }, { "epoch": 0.19549981556621174, "grad_norm": 1.650496482849121, "learning_rate": 1.944977842848825e-05, "loss": 0.0499, "step": 1325 }, { "epoch": 0.19564736259682775, "grad_norm": 3.3419551849365234, "learning_rate": 1.9448092107137778e-05, "loss": 0.1112, "step": 1326 }, { "epoch": 0.19579490962744375, "grad_norm": 1.771303653717041, "learning_rate": 1.9446403278947478e-05, "loss": 0.0488, "step": 1327 }, { "epoch": 0.19594245665805976, "grad_norm": 1.964579463005066, "learning_rate": 1.9444711944365433e-05, "loss": 0.1193, "step": 1328 }, { "epoch": 0.19609000368867577, "grad_norm": 2.816879987716675, "learning_rate": 1.9443018103840397e-05, "loss": 0.0901, "step": 1329 }, { "epoch": 0.19623755071929178, "grad_norm": 2.059269666671753, "learning_rate": 1.94413217578218e-05, "loss": 0.0812, "step": 1330 }, { "epoch": 0.19638509774990778, "grad_norm": 1.197261929512024, "learning_rate": 1.9439622906759734e-05, "loss": 0.0659, "step": 1331 }, { "epoch": 0.1965326447805238, "grad_norm": 1.4067429304122925, "learning_rate": 1.9437921551104934e-05, "loss": 0.025, "step": 1332 }, { "epoch": 0.1966801918111398, "grad_norm": 1.6476287841796875, "learning_rate": 1.9436217691308832e-05, "loss": 0.0502, "step": 1333 }, { "epoch": 0.1968277388417558, "grad_norm": 2.7130286693573, "learning_rate": 1.94345113278235e-05, "loss": 0.0814, "step": 1334 }, { "epoch": 0.1969752858723718, "grad_norm": 1.350067138671875, "learning_rate": 1.9432802461101694e-05, "loss": 0.0637, "step": 1335 }, { "epoch": 0.19712283290298782, "grad_norm": 1.2226768732070923, "learning_rate": 1.943109109159681e-05, "loss": 0.036, "step": 1336 }, { "epoch": 0.19727037993360383, "grad_norm": 6.2276692390441895, "learning_rate": 1.9429377219762933e-05, "loss": 0.1289, "step": 1337 }, { "epoch": 0.19741792696421984, "grad_norm": 3.1329100131988525, "learning_rate": 1.9427660846054795e-05, "loss": 0.0895, "step": 1338 }, { "epoch": 0.19756547399483584, "grad_norm": 1.1181056499481201, "learning_rate": 1.9425941970927798e-05, "loss": 0.0153, "step": 1339 }, { "epoch": 0.19771302102545185, "grad_norm": 2.2246644496917725, "learning_rate": 1.942422059483801e-05, "loss": 0.0417, "step": 1340 }, { "epoch": 0.19771302102545185, "eval_accuracy": 0.9710564399421129, "eval_f1": 0.949748743718593, "eval_loss": 0.07904960215091705, "eval_precision": 0.984375, "eval_recall": 0.9174757281553398, "eval_runtime": 48.6769, "eval_samples_per_second": 5.978, "eval_steps_per_second": 0.205, "step": 1340 }, { "epoch": 0.19786056805606786, "grad_norm": 1.7612236738204956, "learning_rate": 1.9422496718242155e-05, "loss": 0.0452, "step": 1341 }, { "epoch": 0.19800811508668387, "grad_norm": 4.0994977951049805, "learning_rate": 1.9420770341597627e-05, "loss": 0.0534, "step": 1342 }, { "epoch": 0.1981556621172999, "grad_norm": 1.8626494407653809, "learning_rate": 1.9419041465362477e-05, "loss": 0.0553, "step": 1343 }, { "epoch": 0.1983032091479159, "grad_norm": 3.3860955238342285, "learning_rate": 1.9417310089995433e-05, "loss": 0.105, "step": 1344 }, { "epoch": 0.19845075617853192, "grad_norm": 3.4416463375091553, "learning_rate": 1.941557621595587e-05, "loss": 0.0878, "step": 1345 }, { "epoch": 0.19859830320914793, "grad_norm": 1.6001362800598145, "learning_rate": 1.9413839843703837e-05, "loss": 0.0448, "step": 1346 }, { "epoch": 0.19874585023976393, "grad_norm": 1.1885074377059937, "learning_rate": 1.941210097370004e-05, "loss": 0.0454, "step": 1347 }, { "epoch": 0.19889339727037994, "grad_norm": 4.245115756988525, "learning_rate": 1.9410359606405843e-05, "loss": 0.118, "step": 1348 }, { "epoch": 0.19904094430099595, "grad_norm": 1.709445834159851, "learning_rate": 1.940861574228329e-05, "loss": 0.0385, "step": 1349 }, { "epoch": 0.19918849133161196, "grad_norm": 2.4923059940338135, "learning_rate": 1.9406869381795067e-05, "loss": 0.0806, "step": 1350 }, { "epoch": 0.19933603836222796, "grad_norm": 1.4957060813903809, "learning_rate": 1.940512052540454e-05, "loss": 0.0687, "step": 1351 }, { "epoch": 0.19948358539284397, "grad_norm": 1.2899264097213745, "learning_rate": 1.9403369173575716e-05, "loss": 0.0364, "step": 1352 }, { "epoch": 0.19963113242345998, "grad_norm": 0.8532899618148804, "learning_rate": 1.9401615326773292e-05, "loss": 0.0116, "step": 1353 }, { "epoch": 0.19977867945407599, "grad_norm": 4.752753734588623, "learning_rate": 1.9399858985462602e-05, "loss": 0.1038, "step": 1354 }, { "epoch": 0.199926226484692, "grad_norm": 1.4301010370254517, "learning_rate": 1.9398100150109654e-05, "loss": 0.0384, "step": 1355 }, { "epoch": 0.200073773515308, "grad_norm": 5.159061908721924, "learning_rate": 1.939633882118112e-05, "loss": 0.1079, "step": 1356 }, { "epoch": 0.200221320545924, "grad_norm": 1.8330707550048828, "learning_rate": 1.939457499914432e-05, "loss": 0.0651, "step": 1357 }, { "epoch": 0.20036886757654002, "grad_norm": 1.412860631942749, "learning_rate": 1.939280868446726e-05, "loss": 0.0462, "step": 1358 }, { "epoch": 0.20051641460715602, "grad_norm": 1.6666032075881958, "learning_rate": 1.9391039877618576e-05, "loss": 0.032, "step": 1359 }, { "epoch": 0.20066396163777203, "grad_norm": 2.953582286834717, "learning_rate": 1.938926857906759e-05, "loss": 0.0541, "step": 1360 }, { "epoch": 0.20066396163777203, "eval_accuracy": 0.9739507959479016, "eval_f1": 0.955, "eval_loss": 0.07594899833202362, "eval_precision": 0.9845360824742269, "eval_recall": 0.9271844660194175, "eval_runtime": 49.1826, "eval_samples_per_second": 5.917, "eval_steps_per_second": 0.203, "step": 1360 }, { "epoch": 0.20081150866838804, "grad_norm": 1.8912771940231323, "learning_rate": 1.9387494789284274e-05, "loss": 0.0556, "step": 1361 }, { "epoch": 0.20095905569900405, "grad_norm": 3.8213112354278564, "learning_rate": 1.9385718508739263e-05, "loss": 0.1093, "step": 1362 }, { "epoch": 0.20110660272962005, "grad_norm": 2.005277156829834, "learning_rate": 1.9383939737903854e-05, "loss": 0.0703, "step": 1363 }, { "epoch": 0.20125414976023606, "grad_norm": 2.694446563720703, "learning_rate": 1.9382158477250005e-05, "loss": 0.0569, "step": 1364 }, { "epoch": 0.2014016967908521, "grad_norm": 2.007324457168579, "learning_rate": 1.938037472725033e-05, "loss": 0.0793, "step": 1365 }, { "epoch": 0.2015492438214681, "grad_norm": 1.8499947786331177, "learning_rate": 1.9378588488378115e-05, "loss": 0.078, "step": 1366 }, { "epoch": 0.2016967908520841, "grad_norm": 2.667909860610962, "learning_rate": 1.937679976110729e-05, "loss": 0.0289, "step": 1367 }, { "epoch": 0.20184433788270012, "grad_norm": 5.730069637298584, "learning_rate": 1.937500854591246e-05, "loss": 0.1179, "step": 1368 }, { "epoch": 0.20199188491331613, "grad_norm": 1.4177708625793457, "learning_rate": 1.9373214843268876e-05, "loss": 0.0321, "step": 1369 }, { "epoch": 0.20213943194393214, "grad_norm": 2.069298028945923, "learning_rate": 1.9371418653652467e-05, "loss": 0.0811, "step": 1370 }, { "epoch": 0.20228697897454814, "grad_norm": 2.562945604324341, "learning_rate": 1.9369619977539803e-05, "loss": 0.0456, "step": 1371 }, { "epoch": 0.20243452600516415, "grad_norm": 2.0184197425842285, "learning_rate": 1.936781881540813e-05, "loss": 0.0409, "step": 1372 }, { "epoch": 0.20258207303578016, "grad_norm": 7.014094352722168, "learning_rate": 1.936601516773534e-05, "loss": 0.0354, "step": 1373 }, { "epoch": 0.20272962006639617, "grad_norm": 0.8310100436210632, "learning_rate": 1.9364209034999992e-05, "loss": 0.0354, "step": 1374 }, { "epoch": 0.20287716709701217, "grad_norm": 2.520078182220459, "learning_rate": 1.9362400417681304e-05, "loss": 0.0985, "step": 1375 }, { "epoch": 0.20302471412762818, "grad_norm": 1.2535043954849243, "learning_rate": 1.9360589316259154e-05, "loss": 0.0141, "step": 1376 }, { "epoch": 0.2031722611582442, "grad_norm": 3.4522523880004883, "learning_rate": 1.9358775731214073e-05, "loss": 0.0687, "step": 1377 }, { "epoch": 0.2033198081888602, "grad_norm": 5.03420877456665, "learning_rate": 1.9356959663027256e-05, "loss": 0.1394, "step": 1378 }, { "epoch": 0.2034673552194762, "grad_norm": 2.143878698348999, "learning_rate": 1.935514111218056e-05, "loss": 0.0523, "step": 1379 }, { "epoch": 0.2036149022500922, "grad_norm": 1.6295140981674194, "learning_rate": 1.9353320079156494e-05, "loss": 0.0333, "step": 1380 }, { "epoch": 0.2036149022500922, "eval_accuracy": 0.9681620839363242, "eval_f1": 0.9438775510204082, "eval_loss": 0.0932467058300972, "eval_precision": 0.9946236559139785, "eval_recall": 0.8980582524271845, "eval_runtime": 51.7346, "eval_samples_per_second": 5.625, "eval_steps_per_second": 0.193, "step": 1380 }, { "epoch": 0.20376244928070822, "grad_norm": 2.827363967895508, "learning_rate": 1.9351496564438228e-05, "loss": 0.0675, "step": 1381 }, { "epoch": 0.20390999631132423, "grad_norm": 1.450039029121399, "learning_rate": 1.934967056850959e-05, "loss": 0.0341, "step": 1382 }, { "epoch": 0.20405754334194023, "grad_norm": 6.97248649597168, "learning_rate": 1.9347842091855068e-05, "loss": 0.0878, "step": 1383 }, { "epoch": 0.20420509037255624, "grad_norm": 2.276160478591919, "learning_rate": 1.934601113495981e-05, "loss": 0.0901, "step": 1384 }, { "epoch": 0.20435263740317225, "grad_norm": 3.3418705463409424, "learning_rate": 1.9344177698309616e-05, "loss": 0.1414, "step": 1385 }, { "epoch": 0.20450018443378826, "grad_norm": 2.274207353591919, "learning_rate": 1.9342341782390952e-05, "loss": 0.086, "step": 1386 }, { "epoch": 0.2046477314644043, "grad_norm": 9.955597877502441, "learning_rate": 1.934050338769093e-05, "loss": 0.091, "step": 1387 }, { "epoch": 0.2047952784950203, "grad_norm": 4.9306230545043945, "learning_rate": 1.9338662514697332e-05, "loss": 0.1047, "step": 1388 }, { "epoch": 0.2049428255256363, "grad_norm": 6.726152420043945, "learning_rate": 1.933681916389859e-05, "loss": 0.072, "step": 1389 }, { "epoch": 0.20509037255625231, "grad_norm": 4.446255207061768, "learning_rate": 1.93349733357838e-05, "loss": 0.147, "step": 1390 }, { "epoch": 0.20523791958686832, "grad_norm": 1.6504340171813965, "learning_rate": 1.9333125030842707e-05, "loss": 0.052, "step": 1391 }, { "epoch": 0.20538546661748433, "grad_norm": 3.5462560653686523, "learning_rate": 1.9331274249565716e-05, "loss": 0.1068, "step": 1392 }, { "epoch": 0.20553301364810034, "grad_norm": 2.614546537399292, "learning_rate": 1.9329420992443896e-05, "loss": 0.0475, "step": 1393 }, { "epoch": 0.20568056067871635, "grad_norm": 1.7455826997756958, "learning_rate": 1.9327565259968962e-05, "loss": 0.0271, "step": 1394 }, { "epoch": 0.20582810770933235, "grad_norm": 1.4279757738113403, "learning_rate": 1.93257070526333e-05, "loss": 0.0686, "step": 1395 }, { "epoch": 0.20597565473994836, "grad_norm": 1.767085313796997, "learning_rate": 1.932384637092993e-05, "loss": 0.0787, "step": 1396 }, { "epoch": 0.20612320177056437, "grad_norm": 5.151937961578369, "learning_rate": 1.9321983215352553e-05, "loss": 0.1253, "step": 1397 }, { "epoch": 0.20627074880118038, "grad_norm": 2.1151609420776367, "learning_rate": 1.932011758639551e-05, "loss": 0.0423, "step": 1398 }, { "epoch": 0.20641829583179638, "grad_norm": 2.2435364723205566, "learning_rate": 1.9318249484553808e-05, "loss": 0.0821, "step": 1399 }, { "epoch": 0.2065658428624124, "grad_norm": 2.060382604598999, "learning_rate": 1.93163789103231e-05, "loss": 0.0933, "step": 1400 }, { "epoch": 0.2065658428624124, "eval_accuracy": 0.9739507959479016, "eval_f1": 0.9547738693467337, "eval_loss": 0.0743865817785263, "eval_precision": 0.9895833333333334, "eval_recall": 0.9223300970873787, "eval_runtime": 48.7898, "eval_samples_per_second": 5.964, "eval_steps_per_second": 0.205, "step": 1400 }, { "epoch": 0.2067133898930284, "grad_norm": 1.9975576400756836, "learning_rate": 1.931450586419971e-05, "loss": 0.0679, "step": 1401 }, { "epoch": 0.2068609369236444, "grad_norm": 1.5937947034835815, "learning_rate": 1.93126303466806e-05, "loss": 0.0297, "step": 1402 }, { "epoch": 0.2070084839542604, "grad_norm": 2.6826157569885254, "learning_rate": 1.9310752358263404e-05, "loss": 0.0502, "step": 1403 }, { "epoch": 0.20715603098487642, "grad_norm": 1.3916103839874268, "learning_rate": 1.93088718994464e-05, "loss": 0.0551, "step": 1404 }, { "epoch": 0.20730357801549243, "grad_norm": 2.998272180557251, "learning_rate": 1.9306988970728527e-05, "loss": 0.0746, "step": 1405 }, { "epoch": 0.20745112504610844, "grad_norm": 1.3118089437484741, "learning_rate": 1.9305103572609374e-05, "loss": 0.0604, "step": 1406 }, { "epoch": 0.20759867207672444, "grad_norm": 2.0403177738189697, "learning_rate": 1.9303215705589195e-05, "loss": 0.0467, "step": 1407 }, { "epoch": 0.20774621910734045, "grad_norm": 7.963498115539551, "learning_rate": 1.930132537016889e-05, "loss": 0.0996, "step": 1408 }, { "epoch": 0.2078937661379565, "grad_norm": 2.4032022953033447, "learning_rate": 1.929943256685002e-05, "loss": 0.0182, "step": 1409 }, { "epoch": 0.2080413131685725, "grad_norm": 3.1507861614227295, "learning_rate": 1.9297537296134794e-05, "loss": 0.0478, "step": 1410 }, { "epoch": 0.2081888601991885, "grad_norm": 2.3151755332946777, "learning_rate": 1.9295639558526084e-05, "loss": 0.038, "step": 1411 }, { "epoch": 0.2083364072298045, "grad_norm": 10.23894214630127, "learning_rate": 1.9293739354527407e-05, "loss": 0.2211, "step": 1412 }, { "epoch": 0.20848395426042052, "grad_norm": 1.379333257675171, "learning_rate": 1.9291836684642944e-05, "loss": 0.0705, "step": 1413 }, { "epoch": 0.20863150129103653, "grad_norm": 2.622653007507324, "learning_rate": 1.9289931549377524e-05, "loss": 0.0754, "step": 1414 }, { "epoch": 0.20877904832165253, "grad_norm": 1.7936551570892334, "learning_rate": 1.9288023949236632e-05, "loss": 0.0833, "step": 1415 }, { "epoch": 0.20892659535226854, "grad_norm": 1.9044911861419678, "learning_rate": 1.928611388472641e-05, "loss": 0.052, "step": 1416 }, { "epoch": 0.20907414238288455, "grad_norm": 2.713116407394409, "learning_rate": 1.928420135635365e-05, "loss": 0.0501, "step": 1417 }, { "epoch": 0.20922168941350056, "grad_norm": 5.833685398101807, "learning_rate": 1.928228636462579e-05, "loss": 0.0357, "step": 1418 }, { "epoch": 0.20936923644411656, "grad_norm": 2.3489458560943604, "learning_rate": 1.9280368910050943e-05, "loss": 0.0551, "step": 1419 }, { "epoch": 0.20951678347473257, "grad_norm": 12.28634262084961, "learning_rate": 1.9278448993137856e-05, "loss": 0.0679, "step": 1420 }, { "epoch": 0.20951678347473257, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9573934837092731, "eval_loss": 0.07018116116523743, "eval_precision": 0.9896373056994818, "eval_recall": 0.9271844660194175, "eval_runtime": 49.5429, "eval_samples_per_second": 5.874, "eval_steps_per_second": 0.202, "step": 1420 }, { "epoch": 0.20966433050534858, "grad_norm": 1.8340884447097778, "learning_rate": 1.9276526614395936e-05, "loss": 0.1097, "step": 1421 }, { "epoch": 0.20981187753596459, "grad_norm": 2.3088462352752686, "learning_rate": 1.9274601774335243e-05, "loss": 0.0715, "step": 1422 }, { "epoch": 0.2099594245665806, "grad_norm": 3.743189573287964, "learning_rate": 1.9272674473466493e-05, "loss": 0.1148, "step": 1423 }, { "epoch": 0.2101069715971966, "grad_norm": 2.3974030017852783, "learning_rate": 1.927074471230105e-05, "loss": 0.101, "step": 1424 }, { "epoch": 0.2102545186278126, "grad_norm": 1.2952502965927124, "learning_rate": 1.9268812491350935e-05, "loss": 0.0384, "step": 1425 }, { "epoch": 0.21040206565842862, "grad_norm": 3.054689407348633, "learning_rate": 1.9266877811128814e-05, "loss": 0.092, "step": 1426 }, { "epoch": 0.21054961268904462, "grad_norm": 3.6154634952545166, "learning_rate": 1.9264940672148018e-05, "loss": 0.0707, "step": 1427 }, { "epoch": 0.21069715971966063, "grad_norm": 2.354193925857544, "learning_rate": 1.9263001074922522e-05, "loss": 0.0755, "step": 1428 }, { "epoch": 0.21084470675027664, "grad_norm": 1.808956265449524, "learning_rate": 1.926105901996695e-05, "loss": 0.0802, "step": 1429 }, { "epoch": 0.21099225378089265, "grad_norm": 1.7122451066970825, "learning_rate": 1.925911450779659e-05, "loss": 0.0537, "step": 1430 }, { "epoch": 0.21113980081150868, "grad_norm": 1.3185975551605225, "learning_rate": 1.9257167538927368e-05, "loss": 0.05, "step": 1431 }, { "epoch": 0.2112873478421247, "grad_norm": 2.829758882522583, "learning_rate": 1.9255218113875873e-05, "loss": 0.0495, "step": 1432 }, { "epoch": 0.2114348948727407, "grad_norm": 4.546072959899902, "learning_rate": 1.925326623315934e-05, "loss": 0.0595, "step": 1433 }, { "epoch": 0.2115824419033567, "grad_norm": 2.8329343795776367, "learning_rate": 1.9251311897295655e-05, "loss": 0.0858, "step": 1434 }, { "epoch": 0.2117299889339727, "grad_norm": 4.411838054656982, "learning_rate": 1.924935510680336e-05, "loss": 0.1106, "step": 1435 }, { "epoch": 0.21187753596458872, "grad_norm": 2.662076473236084, "learning_rate": 1.9247395862201645e-05, "loss": 0.1094, "step": 1436 }, { "epoch": 0.21202508299520473, "grad_norm": 1.4620989561080933, "learning_rate": 1.924543416401035e-05, "loss": 0.0748, "step": 1437 }, { "epoch": 0.21217263002582074, "grad_norm": 2.6469125747680664, "learning_rate": 1.9243470012749968e-05, "loss": 0.0918, "step": 1438 }, { "epoch": 0.21232017705643674, "grad_norm": 1.3561089038848877, "learning_rate": 1.9241503408941646e-05, "loss": 0.046, "step": 1439 }, { "epoch": 0.21246772408705275, "grad_norm": 1.3046250343322754, "learning_rate": 1.9239534353107176e-05, "loss": 0.0482, "step": 1440 }, { "epoch": 0.21246772408705275, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9576059850374065, "eval_loss": 0.07073784619569778, "eval_precision": 0.9846153846153847, "eval_recall": 0.9320388349514563, "eval_runtime": 49.7621, "eval_samples_per_second": 5.848, "eval_steps_per_second": 0.201, "step": 1440 }, { "epoch": 0.21261527111766876, "grad_norm": 2.6487796306610107, "learning_rate": 1.9237562845769e-05, "loss": 0.0884, "step": 1441 }, { "epoch": 0.21276281814828477, "grad_norm": 3.1482350826263428, "learning_rate": 1.9235588887450222e-05, "loss": 0.0887, "step": 1442 }, { "epoch": 0.21291036517890077, "grad_norm": 1.8714118003845215, "learning_rate": 1.9233612478674576e-05, "loss": 0.1148, "step": 1443 }, { "epoch": 0.21305791220951678, "grad_norm": 6.874730110168457, "learning_rate": 1.9231633619966468e-05, "loss": 0.0601, "step": 1444 }, { "epoch": 0.2132054592401328, "grad_norm": 2.4575579166412354, "learning_rate": 1.922965231185094e-05, "loss": 0.0477, "step": 1445 }, { "epoch": 0.2133530062707488, "grad_norm": 2.172178030014038, "learning_rate": 1.9227668554853684e-05, "loss": 0.0255, "step": 1446 }, { "epoch": 0.2135005533013648, "grad_norm": 5.253436088562012, "learning_rate": 1.922568234950105e-05, "loss": 0.1086, "step": 1447 }, { "epoch": 0.2136481003319808, "grad_norm": 4.240809440612793, "learning_rate": 1.922369369632003e-05, "loss": 0.0832, "step": 1448 }, { "epoch": 0.21379564736259682, "grad_norm": 1.1348377466201782, "learning_rate": 1.9221702595838274e-05, "loss": 0.0353, "step": 1449 }, { "epoch": 0.21394319439321283, "grad_norm": 3.167478561401367, "learning_rate": 1.9219709048584075e-05, "loss": 0.1263, "step": 1450 }, { "epoch": 0.21409074142382883, "grad_norm": 2.7938549518585205, "learning_rate": 1.921771305508637e-05, "loss": 0.0647, "step": 1451 }, { "epoch": 0.21423828845444484, "grad_norm": 2.226094961166382, "learning_rate": 1.9215714615874757e-05, "loss": 0.0421, "step": 1452 }, { "epoch": 0.21438583548506085, "grad_norm": 3.1141579151153564, "learning_rate": 1.9213713731479473e-05, "loss": 0.0806, "step": 1453 }, { "epoch": 0.21453338251567688, "grad_norm": 6.75639009475708, "learning_rate": 1.9211710402431413e-05, "loss": 0.095, "step": 1454 }, { "epoch": 0.2146809295462929, "grad_norm": 2.9658875465393066, "learning_rate": 1.920970462926211e-05, "loss": 0.1025, "step": 1455 }, { "epoch": 0.2148284765769089, "grad_norm": 1.3812388181686401, "learning_rate": 1.9207696412503753e-05, "loss": 0.0486, "step": 1456 }, { "epoch": 0.2149760236075249, "grad_norm": 3.175931453704834, "learning_rate": 1.9205685752689178e-05, "loss": 0.0613, "step": 1457 }, { "epoch": 0.21512357063814092, "grad_norm": 3.094967842102051, "learning_rate": 1.920367265035187e-05, "loss": 0.0893, "step": 1458 }, { "epoch": 0.21527111766875692, "grad_norm": 1.2051812410354614, "learning_rate": 1.920165710602596e-05, "loss": 0.0257, "step": 1459 }, { "epoch": 0.21541866469937293, "grad_norm": 1.6724923849105835, "learning_rate": 1.919963912024623e-05, "loss": 0.0408, "step": 1460 }, { "epoch": 0.21541866469937293, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.96, "eval_loss": 0.07402520626783371, "eval_precision": 0.9896907216494846, "eval_recall": 0.9320388349514563, "eval_runtime": 49.7907, "eval_samples_per_second": 5.844, "eval_steps_per_second": 0.201, "step": 1460 }, { "epoch": 0.21556621172998894, "grad_norm": 9.526878356933594, "learning_rate": 1.91976186935481e-05, "loss": 0.1099, "step": 1461 }, { "epoch": 0.21571375876060495, "grad_norm": 2.1356711387634277, "learning_rate": 1.9195595826467647e-05, "loss": 0.0808, "step": 1462 }, { "epoch": 0.21586130579122095, "grad_norm": 2.3917713165283203, "learning_rate": 1.91935705195416e-05, "loss": 0.1027, "step": 1463 }, { "epoch": 0.21600885282183696, "grad_norm": 1.4529759883880615, "learning_rate": 1.919154277330732e-05, "loss": 0.0267, "step": 1464 }, { "epoch": 0.21615639985245297, "grad_norm": 2.229956865310669, "learning_rate": 1.9189512588302833e-05, "loss": 0.0611, "step": 1465 }, { "epoch": 0.21630394688306898, "grad_norm": 1.4243957996368408, "learning_rate": 1.9187479965066797e-05, "loss": 0.0211, "step": 1466 }, { "epoch": 0.21645149391368498, "grad_norm": 3.3039212226867676, "learning_rate": 1.9185444904138528e-05, "loss": 0.0545, "step": 1467 }, { "epoch": 0.216599040944301, "grad_norm": 1.929646372795105, "learning_rate": 1.918340740605798e-05, "loss": 0.0424, "step": 1468 }, { "epoch": 0.216746587974917, "grad_norm": 1.9063628911972046, "learning_rate": 1.9181367471365757e-05, "loss": 0.0873, "step": 1469 }, { "epoch": 0.216894135005533, "grad_norm": 5.495874881744385, "learning_rate": 1.917932510060311e-05, "loss": 0.0191, "step": 1470 }, { "epoch": 0.21704168203614901, "grad_norm": 2.174502372741699, "learning_rate": 1.917728029431194e-05, "loss": 0.0792, "step": 1471 }, { "epoch": 0.21718922906676502, "grad_norm": 1.781110405921936, "learning_rate": 1.917523305303479e-05, "loss": 0.0421, "step": 1472 }, { "epoch": 0.21733677609738103, "grad_norm": 3.269526481628418, "learning_rate": 1.9173183377314846e-05, "loss": 0.0727, "step": 1473 }, { "epoch": 0.21748432312799704, "grad_norm": 6.254942417144775, "learning_rate": 1.9171131267695945e-05, "loss": 0.12, "step": 1474 }, { "epoch": 0.21763187015861304, "grad_norm": 5.211254596710205, "learning_rate": 1.916907672472257e-05, "loss": 0.0846, "step": 1475 }, { "epoch": 0.21777941718922908, "grad_norm": 3.0138373374938965, "learning_rate": 1.9167019748939847e-05, "loss": 0.0562, "step": 1476 }, { "epoch": 0.2179269642198451, "grad_norm": 2.384809732437134, "learning_rate": 1.9164960340893547e-05, "loss": 0.0498, "step": 1477 }, { "epoch": 0.2180745112504611, "grad_norm": 1.630139708518982, "learning_rate": 1.9162898501130093e-05, "loss": 0.039, "step": 1478 }, { "epoch": 0.2182220582810771, "grad_norm": 2.263195276260376, "learning_rate": 1.9160834230196542e-05, "loss": 0.024, "step": 1479 }, { "epoch": 0.2183696053116931, "grad_norm": 3.641052484512329, "learning_rate": 1.9158767528640604e-05, "loss": 0.0819, "step": 1480 }, { "epoch": 0.2183696053116931, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9651741293532339, "eval_loss": 0.06750384718179703, "eval_precision": 0.9897959183673469, "eval_recall": 0.941747572815534, "eval_runtime": 51.3938, "eval_samples_per_second": 5.662, "eval_steps_per_second": 0.195, "step": 1480 }, { "epoch": 0.21851715234230912, "grad_norm": 5.724951267242432, "learning_rate": 1.9156698397010634e-05, "loss": 0.0913, "step": 1481 }, { "epoch": 0.21866469937292513, "grad_norm": 4.667200088500977, "learning_rate": 1.9154626835855628e-05, "loss": 0.0243, "step": 1482 }, { "epoch": 0.21881224640354113, "grad_norm": 1.1886905431747437, "learning_rate": 1.915255284572523e-05, "loss": 0.0209, "step": 1483 }, { "epoch": 0.21895979343415714, "grad_norm": 2.9417309761047363, "learning_rate": 1.9150476427169723e-05, "loss": 0.0849, "step": 1484 }, { "epoch": 0.21910734046477315, "grad_norm": 2.9135100841522217, "learning_rate": 1.9148397580740042e-05, "loss": 0.0854, "step": 1485 }, { "epoch": 0.21925488749538916, "grad_norm": 1.8095918893814087, "learning_rate": 1.914631630698776e-05, "loss": 0.0544, "step": 1486 }, { "epoch": 0.21940243452600516, "grad_norm": 1.706761121749878, "learning_rate": 1.91442326064651e-05, "loss": 0.0546, "step": 1487 }, { "epoch": 0.21954998155662117, "grad_norm": 9.358868598937988, "learning_rate": 1.9142146479724923e-05, "loss": 0.0975, "step": 1488 }, { "epoch": 0.21969752858723718, "grad_norm": 4.05071496963501, "learning_rate": 1.9140057927320733e-05, "loss": 0.0918, "step": 1489 }, { "epoch": 0.2198450756178532, "grad_norm": 3.119196891784668, "learning_rate": 1.913796694980669e-05, "loss": 0.1352, "step": 1490 }, { "epoch": 0.2199926226484692, "grad_norm": 2.773177146911621, "learning_rate": 1.9135873547737572e-05, "loss": 0.0846, "step": 1491 }, { "epoch": 0.2201401696790852, "grad_norm": 2.1122961044311523, "learning_rate": 1.9133777721668832e-05, "loss": 0.0675, "step": 1492 }, { "epoch": 0.2202877167097012, "grad_norm": 5.189138412475586, "learning_rate": 1.9131679472156545e-05, "loss": 0.0919, "step": 1493 }, { "epoch": 0.22043526374031722, "grad_norm": 5.06166934967041, "learning_rate": 1.9129578799757433e-05, "loss": 0.0563, "step": 1494 }, { "epoch": 0.22058281077093322, "grad_norm": 2.0916285514831543, "learning_rate": 1.9127475705028864e-05, "loss": 0.1085, "step": 1495 }, { "epoch": 0.22073035780154923, "grad_norm": 3.055936574935913, "learning_rate": 1.9125370188528846e-05, "loss": 0.0793, "step": 1496 }, { "epoch": 0.22087790483216524, "grad_norm": 6.338480472564697, "learning_rate": 1.9123262250816034e-05, "loss": 0.0492, "step": 1497 }, { "epoch": 0.22102545186278127, "grad_norm": 3.016126871109009, "learning_rate": 1.9121151892449717e-05, "loss": 0.1147, "step": 1498 }, { "epoch": 0.22117299889339728, "grad_norm": 2.2563865184783936, "learning_rate": 1.9119039113989836e-05, "loss": 0.05, "step": 1499 }, { "epoch": 0.2213205459240133, "grad_norm": 2.1269707679748535, "learning_rate": 1.9116923915996967e-05, "loss": 0.0622, "step": 1500 }, { "epoch": 0.2213205459240133, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9576059850374065, "eval_loss": 0.07007961720228195, "eval_precision": 0.9846153846153847, "eval_recall": 0.9320388349514563, "eval_runtime": 49.6318, "eval_samples_per_second": 5.863, "eval_steps_per_second": 0.201, "step": 1500 }, { "epoch": 0.2214680929546293, "grad_norm": 1.5344703197479248, "learning_rate": 1.911480629903233e-05, "loss": 0.076, "step": 1501 }, { "epoch": 0.2216156399852453, "grad_norm": 1.7562031745910645, "learning_rate": 1.9112686263657788e-05, "loss": 0.074, "step": 1502 }, { "epoch": 0.2217631870158613, "grad_norm": 1.2847347259521484, "learning_rate": 1.9110563810435847e-05, "loss": 0.0252, "step": 1503 }, { "epoch": 0.22191073404647732, "grad_norm": 3.505068302154541, "learning_rate": 1.910843893992965e-05, "loss": 0.0809, "step": 1504 }, { "epoch": 0.22205828107709333, "grad_norm": 3.5792269706726074, "learning_rate": 1.9106311652702983e-05, "loss": 0.0406, "step": 1505 }, { "epoch": 0.22220582810770934, "grad_norm": 1.2998307943344116, "learning_rate": 1.9104181949320274e-05, "loss": 0.0443, "step": 1506 }, { "epoch": 0.22235337513832534, "grad_norm": 3.618804931640625, "learning_rate": 1.91020498303466e-05, "loss": 0.1234, "step": 1507 }, { "epoch": 0.22250092216894135, "grad_norm": 3.212873697280884, "learning_rate": 1.909991529634766e-05, "loss": 0.0441, "step": 1508 }, { "epoch": 0.22264846919955736, "grad_norm": 2.5931785106658936, "learning_rate": 1.909777834788981e-05, "loss": 0.0708, "step": 1509 }, { "epoch": 0.22279601623017337, "grad_norm": 5.498706817626953, "learning_rate": 1.909563898554004e-05, "loss": 0.0518, "step": 1510 }, { "epoch": 0.22294356326078937, "grad_norm": 3.2276811599731445, "learning_rate": 1.9093497209865987e-05, "loss": 0.0746, "step": 1511 }, { "epoch": 0.22309111029140538, "grad_norm": 1.5317574739456177, "learning_rate": 1.9091353021435913e-05, "loss": 0.0426, "step": 1512 }, { "epoch": 0.2232386573220214, "grad_norm": 2.8579018115997314, "learning_rate": 1.908920642081874e-05, "loss": 0.1185, "step": 1513 }, { "epoch": 0.2233862043526374, "grad_norm": 1.2200193405151367, "learning_rate": 1.908705740858402e-05, "loss": 0.0262, "step": 1514 }, { "epoch": 0.2235337513832534, "grad_norm": 1.1458063125610352, "learning_rate": 1.908490598530194e-05, "loss": 0.0404, "step": 1515 }, { "epoch": 0.2236812984138694, "grad_norm": 4.839862823486328, "learning_rate": 1.9082752151543334e-05, "loss": 0.0911, "step": 1516 }, { "epoch": 0.22382884544448542, "grad_norm": 8.113639831542969, "learning_rate": 1.9080595907879672e-05, "loss": 0.0736, "step": 1517 }, { "epoch": 0.22397639247510143, "grad_norm": 6.0743632316589355, "learning_rate": 1.907843725488307e-05, "loss": 0.089, "step": 1518 }, { "epoch": 0.22412393950571743, "grad_norm": 2.801781177520752, "learning_rate": 1.9076276193126277e-05, "loss": 0.0996, "step": 1519 }, { "epoch": 0.22427148653633347, "grad_norm": 0.7859647274017334, "learning_rate": 1.9074112723182677e-05, "loss": 0.0211, "step": 1520 }, { "epoch": 0.22427148653633347, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9580246913580247, "eval_loss": 0.0649338960647583, "eval_precision": 0.9748743718592965, "eval_recall": 0.941747572815534, "eval_runtime": 49.7969, "eval_samples_per_second": 5.844, "eval_steps_per_second": 0.201, "step": 1520 }, { "epoch": 0.22441903356694948, "grad_norm": 2.9750444889068604, "learning_rate": 1.9071946845626302e-05, "loss": 0.12, "step": 1521 }, { "epoch": 0.22456658059756548, "grad_norm": 2.7230350971221924, "learning_rate": 1.9069778561031823e-05, "loss": 0.1002, "step": 1522 }, { "epoch": 0.2247141276281815, "grad_norm": 13.253767013549805, "learning_rate": 1.906760786997454e-05, "loss": 0.0872, "step": 1523 }, { "epoch": 0.2248616746587975, "grad_norm": 6.408520698547363, "learning_rate": 1.9065434773030394e-05, "loss": 0.1308, "step": 1524 }, { "epoch": 0.2250092216894135, "grad_norm": 2.8635432720184326, "learning_rate": 1.906325927077598e-05, "loss": 0.0603, "step": 1525 }, { "epoch": 0.22515676872002952, "grad_norm": 9.502235412597656, "learning_rate": 1.906108136378851e-05, "loss": 0.1187, "step": 1526 }, { "epoch": 0.22530431575064552, "grad_norm": 3.6357860565185547, "learning_rate": 1.9058901052645843e-05, "loss": 0.0407, "step": 1527 }, { "epoch": 0.22545186278126153, "grad_norm": 5.088707447052002, "learning_rate": 1.9056718337926477e-05, "loss": 0.1315, "step": 1528 }, { "epoch": 0.22559940981187754, "grad_norm": 1.3516696691513062, "learning_rate": 1.905453322020955e-05, "loss": 0.0386, "step": 1529 }, { "epoch": 0.22574695684249355, "grad_norm": 2.0411782264709473, "learning_rate": 1.905234570007483e-05, "loss": 0.025, "step": 1530 }, { "epoch": 0.22589450387310955, "grad_norm": 1.3867287635803223, "learning_rate": 1.9050155778102727e-05, "loss": 0.0389, "step": 1531 }, { "epoch": 0.22604205090372556, "grad_norm": 1.9618216753005981, "learning_rate": 1.9047963454874287e-05, "loss": 0.0554, "step": 1532 }, { "epoch": 0.22618959793434157, "grad_norm": 2.296586513519287, "learning_rate": 1.9045768730971198e-05, "loss": 0.0598, "step": 1533 }, { "epoch": 0.22633714496495758, "grad_norm": 2.5132110118865967, "learning_rate": 1.9043571606975776e-05, "loss": 0.0844, "step": 1534 }, { "epoch": 0.22648469199557358, "grad_norm": 3.0915493965148926, "learning_rate": 1.9041372083470984e-05, "loss": 0.0823, "step": 1535 }, { "epoch": 0.2266322390261896, "grad_norm": 2.6400187015533447, "learning_rate": 1.9039170161040408e-05, "loss": 0.0863, "step": 1536 }, { "epoch": 0.2267797860568056, "grad_norm": 1.6036396026611328, "learning_rate": 1.903696584026829e-05, "loss": 0.035, "step": 1537 }, { "epoch": 0.2269273330874216, "grad_norm": 2.944317579269409, "learning_rate": 1.9034759121739486e-05, "loss": 0.0959, "step": 1538 }, { "epoch": 0.22707488011803761, "grad_norm": 2.2183780670166016, "learning_rate": 1.903255000603951e-05, "loss": 0.054, "step": 1539 }, { "epoch": 0.22722242714865362, "grad_norm": 2.041203022003174, "learning_rate": 1.903033849375449e-05, "loss": 0.0753, "step": 1540 }, { "epoch": 0.22722242714865362, "eval_accuracy": 0.9739507959479016, "eval_f1": 0.9554455445544554, "eval_loss": 0.06858236342668533, "eval_precision": 0.9747474747474747, "eval_recall": 0.9368932038834952, "eval_runtime": 49.5676, "eval_samples_per_second": 5.871, "eval_steps_per_second": 0.202, "step": 1540 }, { "epoch": 0.22736997417926963, "grad_norm": 5.1879401206970215, "learning_rate": 1.9028124585471207e-05, "loss": 0.0801, "step": 1541 }, { "epoch": 0.22751752120988566, "grad_norm": 4.049645900726318, "learning_rate": 1.902590828177708e-05, "loss": 0.0868, "step": 1542 }, { "epoch": 0.22766506824050167, "grad_norm": 1.6593950986862183, "learning_rate": 1.9023689583260144e-05, "loss": 0.0611, "step": 1543 }, { "epoch": 0.22781261527111768, "grad_norm": 3.2899882793426514, "learning_rate": 1.9021468490509082e-05, "loss": 0.0604, "step": 1544 }, { "epoch": 0.2279601623017337, "grad_norm": 4.629453182220459, "learning_rate": 1.9019245004113215e-05, "loss": 0.1888, "step": 1545 }, { "epoch": 0.2281077093323497, "grad_norm": 2.135707139968872, "learning_rate": 1.90170191246625e-05, "loss": 0.0334, "step": 1546 }, { "epoch": 0.2282552563629657, "grad_norm": 2.0412282943725586, "learning_rate": 1.9014790852747515e-05, "loss": 0.0666, "step": 1547 }, { "epoch": 0.2284028033935817, "grad_norm": 2.8245701789855957, "learning_rate": 1.9012560188959487e-05, "loss": 0.0952, "step": 1548 }, { "epoch": 0.22855035042419772, "grad_norm": 4.680109024047852, "learning_rate": 1.9010327133890268e-05, "loss": 0.1023, "step": 1549 }, { "epoch": 0.22869789745481373, "grad_norm": 3.1484830379486084, "learning_rate": 1.900809168813236e-05, "loss": 0.1129, "step": 1550 }, { "epoch": 0.22884544448542973, "grad_norm": 3.4137840270996094, "learning_rate": 1.900585385227887e-05, "loss": 0.0438, "step": 1551 }, { "epoch": 0.22899299151604574, "grad_norm": 3.38733172416687, "learning_rate": 1.900361362692358e-05, "loss": 0.0594, "step": 1552 }, { "epoch": 0.22914053854666175, "grad_norm": 1.8768424987792969, "learning_rate": 1.9001371012660867e-05, "loss": 0.0715, "step": 1553 }, { "epoch": 0.22928808557727776, "grad_norm": 1.5474003553390503, "learning_rate": 1.8999126010085767e-05, "loss": 0.0701, "step": 1554 }, { "epoch": 0.22943563260789376, "grad_norm": 1.305575966835022, "learning_rate": 1.899687861979394e-05, "loss": 0.058, "step": 1555 }, { "epoch": 0.22958317963850977, "grad_norm": 3.69191837310791, "learning_rate": 1.8994628842381675e-05, "loss": 0.0559, "step": 1556 }, { "epoch": 0.22973072666912578, "grad_norm": 1.1927210092544556, "learning_rate": 1.899237667844591e-05, "loss": 0.0395, "step": 1557 }, { "epoch": 0.2298782736997418, "grad_norm": 2.1002490520477295, "learning_rate": 1.8990122128584196e-05, "loss": 0.0708, "step": 1558 }, { "epoch": 0.2300258207303578, "grad_norm": 1.410414695739746, "learning_rate": 1.898786519339473e-05, "loss": 0.0555, "step": 1559 }, { "epoch": 0.2301733677609738, "grad_norm": 2.3505992889404297, "learning_rate": 1.8985605873476344e-05, "loss": 0.0839, "step": 1560 }, { "epoch": 0.2301733677609738, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9571788413098237, "eval_loss": 0.07852373272180557, "eval_precision": 0.9947643979057592, "eval_recall": 0.9223300970873787, "eval_runtime": 49.5521, "eval_samples_per_second": 5.873, "eval_steps_per_second": 0.202, "step": 1560 }, { "epoch": 0.2303209147915898, "grad_norm": 1.7025160789489746, "learning_rate": 1.89833441694285e-05, "loss": 0.0542, "step": 1561 }, { "epoch": 0.23046846182220582, "grad_norm": 3.706110715866089, "learning_rate": 1.898108008185128e-05, "loss": 0.156, "step": 1562 }, { "epoch": 0.23061600885282182, "grad_norm": 2.6673829555511475, "learning_rate": 1.8978813611345414e-05, "loss": 0.0782, "step": 1563 }, { "epoch": 0.23076355588343783, "grad_norm": 2.4429564476013184, "learning_rate": 1.8976544758512262e-05, "loss": 0.0299, "step": 1564 }, { "epoch": 0.23091110291405387, "grad_norm": 2.807406425476074, "learning_rate": 1.8974273523953814e-05, "loss": 0.0667, "step": 1565 }, { "epoch": 0.23105864994466987, "grad_norm": 2.4867608547210693, "learning_rate": 1.8971999908272685e-05, "loss": 0.0559, "step": 1566 }, { "epoch": 0.23120619697528588, "grad_norm": 3.225327491760254, "learning_rate": 1.8969723912072133e-05, "loss": 0.0972, "step": 1567 }, { "epoch": 0.2313537440059019, "grad_norm": 1.0718013048171997, "learning_rate": 1.8967445535956042e-05, "loss": 0.0273, "step": 1568 }, { "epoch": 0.2315012910365179, "grad_norm": 1.6319961547851562, "learning_rate": 1.8965164780528925e-05, "loss": 0.0521, "step": 1569 }, { "epoch": 0.2316488380671339, "grad_norm": 3.548936128616333, "learning_rate": 1.8962881646395934e-05, "loss": 0.0681, "step": 1570 }, { "epoch": 0.2317963850977499, "grad_norm": 6.134999752044678, "learning_rate": 1.8960596134162845e-05, "loss": 0.0557, "step": 1571 }, { "epoch": 0.23194393212836592, "grad_norm": 3.2041032314300537, "learning_rate": 1.8958308244436066e-05, "loss": 0.0727, "step": 1572 }, { "epoch": 0.23209147915898193, "grad_norm": 2.943803548812866, "learning_rate": 1.895601797782264e-05, "loss": 0.0542, "step": 1573 }, { "epoch": 0.23223902618959794, "grad_norm": 2.4484059810638428, "learning_rate": 1.895372533493024e-05, "loss": 0.0586, "step": 1574 }, { "epoch": 0.23238657322021394, "grad_norm": 0.9962635636329651, "learning_rate": 1.8951430316367163e-05, "loss": 0.0113, "step": 1575 }, { "epoch": 0.23253412025082995, "grad_norm": 2.771796941757202, "learning_rate": 1.894913292274234e-05, "loss": 0.1163, "step": 1576 }, { "epoch": 0.23268166728144596, "grad_norm": 1.495766520500183, "learning_rate": 1.894683315466534e-05, "loss": 0.0332, "step": 1577 }, { "epoch": 0.23282921431206197, "grad_norm": 1.1597464084625244, "learning_rate": 1.894453101274635e-05, "loss": 0.0179, "step": 1578 }, { "epoch": 0.23297676134267797, "grad_norm": 3.0135273933410645, "learning_rate": 1.8942226497596194e-05, "loss": 0.053, "step": 1579 }, { "epoch": 0.23312430837329398, "grad_norm": 1.8259490728378296, "learning_rate": 1.8939919609826317e-05, "loss": 0.0861, "step": 1580 }, { "epoch": 0.23312430837329398, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9571788413098237, "eval_loss": 0.08110551536083221, "eval_precision": 0.9947643979057592, "eval_recall": 0.9223300970873787, "eval_runtime": 49.6499, "eval_samples_per_second": 5.861, "eval_steps_per_second": 0.201, "step": 1580 }, { "epoch": 0.23327185540391, "grad_norm": 2.748650074005127, "learning_rate": 1.893761035004881e-05, "loss": 0.0441, "step": 1581 }, { "epoch": 0.233419402434526, "grad_norm": 2.156463861465454, "learning_rate": 1.8935298718876374e-05, "loss": 0.0759, "step": 1582 }, { "epoch": 0.233566949465142, "grad_norm": 2.9778733253479004, "learning_rate": 1.8932984716922354e-05, "loss": 0.1014, "step": 1583 }, { "epoch": 0.233714496495758, "grad_norm": 3.8208696842193604, "learning_rate": 1.893066834480072e-05, "loss": 0.1488, "step": 1584 }, { "epoch": 0.23386204352637402, "grad_norm": 3.3341753482818604, "learning_rate": 1.8928349603126067e-05, "loss": 0.0587, "step": 1585 }, { "epoch": 0.23400959055699003, "grad_norm": 1.781720519065857, "learning_rate": 1.892602849251362e-05, "loss": 0.0518, "step": 1586 }, { "epoch": 0.23415713758760606, "grad_norm": 2.1888794898986816, "learning_rate": 1.8923705013579236e-05, "loss": 0.0603, "step": 1587 }, { "epoch": 0.23430468461822207, "grad_norm": 5.221462726593018, "learning_rate": 1.8921379166939397e-05, "loss": 0.0561, "step": 1588 }, { "epoch": 0.23445223164883808, "grad_norm": 2.3900160789489746, "learning_rate": 1.8919050953211214e-05, "loss": 0.0181, "step": 1589 }, { "epoch": 0.23459977867945409, "grad_norm": 1.8215352296829224, "learning_rate": 1.8916720373012425e-05, "loss": 0.0452, "step": 1590 }, { "epoch": 0.2347473257100701, "grad_norm": 2.3176705837249756, "learning_rate": 1.89143874269614e-05, "loss": 0.0564, "step": 1591 }, { "epoch": 0.2348948727406861, "grad_norm": 2.7995049953460693, "learning_rate": 1.8912052115677138e-05, "loss": 0.0798, "step": 1592 }, { "epoch": 0.2350424197713021, "grad_norm": 4.901682376861572, "learning_rate": 1.8909714439779257e-05, "loss": 0.137, "step": 1593 }, { "epoch": 0.23518996680191812, "grad_norm": 1.7497882843017578, "learning_rate": 1.8907374399888002e-05, "loss": 0.0461, "step": 1594 }, { "epoch": 0.23533751383253412, "grad_norm": 2.6849687099456787, "learning_rate": 1.8905031996624262e-05, "loss": 0.0824, "step": 1595 }, { "epoch": 0.23548506086315013, "grad_norm": 4.540345191955566, "learning_rate": 1.890268723060953e-05, "loss": 0.1235, "step": 1596 }, { "epoch": 0.23563260789376614, "grad_norm": 5.917974472045898, "learning_rate": 1.890034010246594e-05, "loss": 0.1283, "step": 1597 }, { "epoch": 0.23578015492438215, "grad_norm": 2.0922887325286865, "learning_rate": 1.8897990612816256e-05, "loss": 0.0608, "step": 1598 }, { "epoch": 0.23592770195499815, "grad_norm": 3.9860498905181885, "learning_rate": 1.8895638762283855e-05, "loss": 0.1301, "step": 1599 }, { "epoch": 0.23607524898561416, "grad_norm": 1.354077696800232, "learning_rate": 1.889328455149275e-05, "loss": 0.0657, "step": 1600 }, { "epoch": 0.23607524898561416, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9576059850374065, "eval_loss": 0.06800917536020279, "eval_precision": 0.9846153846153847, "eval_recall": 0.9320388349514563, "eval_runtime": 49.8751, "eval_samples_per_second": 5.835, "eval_steps_per_second": 0.201, "step": 1600 }, { "epoch": 0.23622279601623017, "grad_norm": 3.008150815963745, "learning_rate": 1.8890927981067583e-05, "loss": 0.1196, "step": 1601 }, { "epoch": 0.23637034304684618, "grad_norm": 1.6604888439178467, "learning_rate": 1.8888569051633613e-05, "loss": 0.0921, "step": 1602 }, { "epoch": 0.23651789007746218, "grad_norm": 2.6593024730682373, "learning_rate": 1.8886207763816732e-05, "loss": 0.0476, "step": 1603 }, { "epoch": 0.2366654371080782, "grad_norm": 2.5641930103302, "learning_rate": 1.888384411824345e-05, "loss": 0.1078, "step": 1604 }, { "epoch": 0.2368129841386942, "grad_norm": 1.1254023313522339, "learning_rate": 1.8881478115540907e-05, "loss": 0.0171, "step": 1605 }, { "epoch": 0.2369605311693102, "grad_norm": 1.325793981552124, "learning_rate": 1.8879109756336876e-05, "loss": 0.0377, "step": 1606 }, { "epoch": 0.23710807819992621, "grad_norm": 7.710003852844238, "learning_rate": 1.8876739041259742e-05, "loss": 0.114, "step": 1607 }, { "epoch": 0.23725562523054222, "grad_norm": 1.0816516876220703, "learning_rate": 1.8874365970938524e-05, "loss": 0.0315, "step": 1608 }, { "epoch": 0.23740317226115826, "grad_norm": 1.852760910987854, "learning_rate": 1.887199054600286e-05, "loss": 0.0433, "step": 1609 }, { "epoch": 0.23755071929177426, "grad_norm": 4.503643989562988, "learning_rate": 1.886961276708302e-05, "loss": 0.0586, "step": 1610 }, { "epoch": 0.23769826632239027, "grad_norm": 2.2752246856689453, "learning_rate": 1.8867232634809895e-05, "loss": 0.0571, "step": 1611 }, { "epoch": 0.23784581335300628, "grad_norm": 1.762903094291687, "learning_rate": 1.8864850149814995e-05, "loss": 0.0236, "step": 1612 }, { "epoch": 0.2379933603836223, "grad_norm": 2.781127452850342, "learning_rate": 1.886246531273046e-05, "loss": 0.1219, "step": 1613 }, { "epoch": 0.2381409074142383, "grad_norm": 1.7541805505752563, "learning_rate": 1.886007812418906e-05, "loss": 0.0381, "step": 1614 }, { "epoch": 0.2382884544448543, "grad_norm": 1.8100532293319702, "learning_rate": 1.8857688584824172e-05, "loss": 0.0707, "step": 1615 }, { "epoch": 0.2384360014754703, "grad_norm": 5.112905025482178, "learning_rate": 1.8855296695269815e-05, "loss": 0.0493, "step": 1616 }, { "epoch": 0.23858354850608632, "grad_norm": 3.7628955841064453, "learning_rate": 1.8852902456160618e-05, "loss": 0.0739, "step": 1617 }, { "epoch": 0.23873109553670233, "grad_norm": 2.7358105182647705, "learning_rate": 1.885050586813184e-05, "loss": 0.1133, "step": 1618 }, { "epoch": 0.23887864256731833, "grad_norm": 4.206934928894043, "learning_rate": 1.8848106931819363e-05, "loss": 0.1211, "step": 1619 }, { "epoch": 0.23902618959793434, "grad_norm": 1.8442342281341553, "learning_rate": 1.8845705647859687e-05, "loss": 0.0693, "step": 1620 }, { "epoch": 0.23902618959793434, "eval_accuracy": 0.9739507959479016, "eval_f1": 0.9545454545454546, "eval_loss": 0.07409653812646866, "eval_precision": 0.9947368421052631, "eval_recall": 0.9174757281553398, "eval_runtime": 49.553, "eval_samples_per_second": 5.873, "eval_steps_per_second": 0.202, "step": 1620 }, { "epoch": 0.23917373662855035, "grad_norm": 10.018924713134766, "learning_rate": 1.884330201688995e-05, "loss": 0.197, "step": 1621 }, { "epoch": 0.23932128365916636, "grad_norm": 3.2351040840148926, "learning_rate": 1.8840896039547894e-05, "loss": 0.0677, "step": 1622 }, { "epoch": 0.23946883068978236, "grad_norm": 3.178816795349121, "learning_rate": 1.8838487716471885e-05, "loss": 0.0813, "step": 1623 }, { "epoch": 0.23961637772039837, "grad_norm": 3.182863235473633, "learning_rate": 1.883607704830093e-05, "loss": 0.1022, "step": 1624 }, { "epoch": 0.23976392475101438, "grad_norm": 2.7225515842437744, "learning_rate": 1.8833664035674643e-05, "loss": 0.0387, "step": 1625 }, { "epoch": 0.2399114717816304, "grad_norm": 2.1176841259002686, "learning_rate": 1.8831248679233255e-05, "loss": 0.0566, "step": 1626 }, { "epoch": 0.2400590188122464, "grad_norm": 2.227962017059326, "learning_rate": 1.882883097961763e-05, "loss": 0.0786, "step": 1627 }, { "epoch": 0.2402065658428624, "grad_norm": 2.132786750793457, "learning_rate": 1.8826410937469256e-05, "loss": 0.0624, "step": 1628 }, { "epoch": 0.2403541128734784, "grad_norm": 2.2545626163482666, "learning_rate": 1.8823988553430235e-05, "loss": 0.0538, "step": 1629 }, { "epoch": 0.24050165990409442, "grad_norm": 4.5056538581848145, "learning_rate": 1.8821563828143284e-05, "loss": 0.1264, "step": 1630 }, { "epoch": 0.24064920693471045, "grad_norm": 0.7923541069030762, "learning_rate": 1.8819136762251765e-05, "loss": 0.0122, "step": 1631 }, { "epoch": 0.24079675396532646, "grad_norm": 2.7172040939331055, "learning_rate": 1.881670735639963e-05, "loss": 0.0346, "step": 1632 }, { "epoch": 0.24094430099594247, "grad_norm": 1.0857584476470947, "learning_rate": 1.881427561123148e-05, "loss": 0.0368, "step": 1633 }, { "epoch": 0.24109184802655848, "grad_norm": 2.4387190341949463, "learning_rate": 1.881184152739252e-05, "loss": 0.1468, "step": 1634 }, { "epoch": 0.24123939505717448, "grad_norm": 3.6060807704925537, "learning_rate": 1.8809405105528572e-05, "loss": 0.1154, "step": 1635 }, { "epoch": 0.2413869420877905, "grad_norm": 2.9632816314697266, "learning_rate": 1.8806966346286095e-05, "loss": 0.0649, "step": 1636 }, { "epoch": 0.2415344891184065, "grad_norm": 2.280517816543579, "learning_rate": 1.8804525250312155e-05, "loss": 0.0458, "step": 1637 }, { "epoch": 0.2416820361490225, "grad_norm": 1.5791654586791992, "learning_rate": 1.8802081818254446e-05, "loss": 0.0591, "step": 1638 }, { "epoch": 0.2418295831796385, "grad_norm": 3.066272258758545, "learning_rate": 1.8799636050761276e-05, "loss": 0.0735, "step": 1639 }, { "epoch": 0.24197713021025452, "grad_norm": 3.7066493034362793, "learning_rate": 1.8797187948481576e-05, "loss": 0.0547, "step": 1640 }, { "epoch": 0.24197713021025452, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9573934837092731, "eval_loss": 0.07134856283664703, "eval_precision": 0.9896373056994818, "eval_recall": 0.9271844660194175, "eval_runtime": 50.2461, "eval_samples_per_second": 5.791, "eval_steps_per_second": 0.199, "step": 1640 }, { "epoch": 0.24212467724087053, "grad_norm": 2.341954231262207, "learning_rate": 1.879473751206489e-05, "loss": 0.0773, "step": 1641 }, { "epoch": 0.24227222427148654, "grad_norm": 2.7641525268554688, "learning_rate": 1.8792284742161394e-05, "loss": 0.0995, "step": 1642 }, { "epoch": 0.24241977130210254, "grad_norm": 3.9379851818084717, "learning_rate": 1.8789829639421872e-05, "loss": 0.1247, "step": 1643 }, { "epoch": 0.24256731833271855, "grad_norm": 5.1747541427612305, "learning_rate": 1.8787372204497727e-05, "loss": 0.0858, "step": 1644 }, { "epoch": 0.24271486536333456, "grad_norm": 1.9137122631072998, "learning_rate": 1.878491243804099e-05, "loss": 0.092, "step": 1645 }, { "epoch": 0.24286241239395057, "grad_norm": 2.063175678253174, "learning_rate": 1.87824503407043e-05, "loss": 0.0578, "step": 1646 }, { "epoch": 0.24300995942456657, "grad_norm": 4.144733428955078, "learning_rate": 1.8779985913140927e-05, "loss": 0.0883, "step": 1647 }, { "epoch": 0.24315750645518258, "grad_norm": 2.499525308609009, "learning_rate": 1.8777519156004742e-05, "loss": 0.0933, "step": 1648 }, { "epoch": 0.2433050534857986, "grad_norm": 4.733231544494629, "learning_rate": 1.877505006995025e-05, "loss": 0.0867, "step": 1649 }, { "epoch": 0.2434526005164146, "grad_norm": 1.7096129655838013, "learning_rate": 1.8772578655632568e-05, "loss": 0.0595, "step": 1650 }, { "epoch": 0.2436001475470306, "grad_norm": 3.6752445697784424, "learning_rate": 1.8770104913707426e-05, "loss": 0.0921, "step": 1651 }, { "epoch": 0.2437476945776466, "grad_norm": 2.651184558868408, "learning_rate": 1.8767628844831183e-05, "loss": 0.0965, "step": 1652 }, { "epoch": 0.24389524160826265, "grad_norm": 2.46650767326355, "learning_rate": 1.8765150449660804e-05, "loss": 0.0642, "step": 1653 }, { "epoch": 0.24404278863887865, "grad_norm": 1.9928101301193237, "learning_rate": 1.876266972885387e-05, "loss": 0.0633, "step": 1654 }, { "epoch": 0.24419033566949466, "grad_norm": 1.8227845430374146, "learning_rate": 1.87601866830686e-05, "loss": 0.0282, "step": 1655 }, { "epoch": 0.24433788270011067, "grad_norm": 1.8954331874847412, "learning_rate": 1.87577013129638e-05, "loss": 0.0936, "step": 1656 }, { "epoch": 0.24448542973072668, "grad_norm": 1.4748156070709229, "learning_rate": 1.8755213619198915e-05, "loss": 0.0548, "step": 1657 }, { "epoch": 0.24463297676134269, "grad_norm": 2.6450955867767334, "learning_rate": 1.8752723602433997e-05, "loss": 0.0697, "step": 1658 }, { "epoch": 0.2447805237919587, "grad_norm": 1.495141625404358, "learning_rate": 1.8750231263329716e-05, "loss": 0.0529, "step": 1659 }, { "epoch": 0.2449280708225747, "grad_norm": 3.938822031021118, "learning_rate": 1.8747736602547358e-05, "loss": 0.0983, "step": 1660 }, { "epoch": 0.2449280708225747, "eval_accuracy": 0.9739507959479016, "eval_f1": 0.9547738693467337, "eval_loss": 0.07112825661897659, "eval_precision": 0.9895833333333334, "eval_recall": 0.9223300970873787, "eval_runtime": 49.5534, "eval_samples_per_second": 5.872, "eval_steps_per_second": 0.202, "step": 1660 }, { "epoch": 0.2450756178531907, "grad_norm": 1.5419602394104004, "learning_rate": 1.8745239620748822e-05, "loss": 0.0493, "step": 1661 }, { "epoch": 0.24522316488380672, "grad_norm": 2.9517505168914795, "learning_rate": 1.8742740318596632e-05, "loss": 0.0674, "step": 1662 }, { "epoch": 0.24537071191442272, "grad_norm": 1.3199691772460938, "learning_rate": 1.8740238696753925e-05, "loss": 0.0372, "step": 1663 }, { "epoch": 0.24551825894503873, "grad_norm": 5.415014743804932, "learning_rate": 1.873773475588444e-05, "loss": 0.06, "step": 1664 }, { "epoch": 0.24566580597565474, "grad_norm": 2.477362871170044, "learning_rate": 1.873522849665255e-05, "loss": 0.113, "step": 1665 }, { "epoch": 0.24581335300627075, "grad_norm": 2.2444581985473633, "learning_rate": 1.873271991972323e-05, "loss": 0.0704, "step": 1666 }, { "epoch": 0.24596090003688675, "grad_norm": 2.8318018913269043, "learning_rate": 1.8730209025762077e-05, "loss": 0.0688, "step": 1667 }, { "epoch": 0.24610844706750276, "grad_norm": 3.7584753036499023, "learning_rate": 1.87276958154353e-05, "loss": 0.1223, "step": 1668 }, { "epoch": 0.24625599409811877, "grad_norm": 2.150073289871216, "learning_rate": 1.8725180289409725e-05, "loss": 0.0549, "step": 1669 }, { "epoch": 0.24640354112873478, "grad_norm": 2.330817699432373, "learning_rate": 1.872266244835279e-05, "loss": 0.0545, "step": 1670 }, { "epoch": 0.24655108815935078, "grad_norm": 4.182545185089111, "learning_rate": 1.8720142292932544e-05, "loss": 0.0861, "step": 1671 }, { "epoch": 0.2466986351899668, "grad_norm": 2.2650673389434814, "learning_rate": 1.8717619823817655e-05, "loss": 0.0871, "step": 1672 }, { "epoch": 0.2468461822205828, "grad_norm": 2.7728540897369385, "learning_rate": 1.871509504167741e-05, "loss": 0.0798, "step": 1673 }, { "epoch": 0.2469937292511988, "grad_norm": 1.604771614074707, "learning_rate": 1.8712567947181695e-05, "loss": 0.0634, "step": 1674 }, { "epoch": 0.24714127628181484, "grad_norm": 1.5157824754714966, "learning_rate": 1.8710038541001024e-05, "loss": 0.045, "step": 1675 }, { "epoch": 0.24728882331243085, "grad_norm": 1.7582708597183228, "learning_rate": 1.8707506823806514e-05, "loss": 0.0654, "step": 1676 }, { "epoch": 0.24743637034304686, "grad_norm": 2.13720965385437, "learning_rate": 1.87049727962699e-05, "loss": 0.0637, "step": 1677 }, { "epoch": 0.24758391737366287, "grad_norm": 1.3255536556243896, "learning_rate": 1.8702436459063533e-05, "loss": 0.037, "step": 1678 }, { "epoch": 0.24773146440427887, "grad_norm": 3.8610284328460693, "learning_rate": 1.8699897812860374e-05, "loss": 0.1258, "step": 1679 }, { "epoch": 0.24787901143489488, "grad_norm": 3.3714330196380615, "learning_rate": 1.869735685833399e-05, "loss": 0.0599, "step": 1680 }, { "epoch": 0.24787901143489488, "eval_accuracy": 0.9739507959479016, "eval_f1": 0.955, "eval_loss": 0.0660150945186615, "eval_precision": 0.9845360824742269, "eval_recall": 0.9271844660194175, "eval_runtime": 50.4891, "eval_samples_per_second": 5.764, "eval_steps_per_second": 0.198, "step": 1680 }, { "epoch": 0.2480265584655109, "grad_norm": 7.666978359222412, "learning_rate": 1.8694813596158568e-05, "loss": 0.065, "step": 1681 }, { "epoch": 0.2481741054961269, "grad_norm": 1.7158877849578857, "learning_rate": 1.8692268027008912e-05, "loss": 0.0468, "step": 1682 }, { "epoch": 0.2483216525267429, "grad_norm": 2.407661199569702, "learning_rate": 1.8689720151560425e-05, "loss": 0.0526, "step": 1683 }, { "epoch": 0.2484691995573589, "grad_norm": 1.6817408800125122, "learning_rate": 1.868716997048913e-05, "loss": 0.0444, "step": 1684 }, { "epoch": 0.24861674658797492, "grad_norm": 1.7822405099868774, "learning_rate": 1.8684617484471662e-05, "loss": 0.0706, "step": 1685 }, { "epoch": 0.24876429361859093, "grad_norm": 1.842396855354309, "learning_rate": 1.8682062694185267e-05, "loss": 0.0323, "step": 1686 }, { "epoch": 0.24891184064920693, "grad_norm": 3.4837851524353027, "learning_rate": 1.86795056003078e-05, "loss": 0.1292, "step": 1687 }, { "epoch": 0.24905938767982294, "grad_norm": 3.1840720176696777, "learning_rate": 1.8676946203517728e-05, "loss": 0.1145, "step": 1688 }, { "epoch": 0.24920693471043895, "grad_norm": 4.872955799102783, "learning_rate": 1.867438450449413e-05, "loss": 0.0766, "step": 1689 }, { "epoch": 0.24935448174105496, "grad_norm": 3.049365758895874, "learning_rate": 1.8671820503916696e-05, "loss": 0.1221, "step": 1690 }, { "epoch": 0.24950202877167096, "grad_norm": 1.2856934070587158, "learning_rate": 1.8669254202465725e-05, "loss": 0.0264, "step": 1691 }, { "epoch": 0.24964957580228697, "grad_norm": 2.228694438934326, "learning_rate": 1.866668560082213e-05, "loss": 0.0331, "step": 1692 }, { "epoch": 0.24979712283290298, "grad_norm": 2.5879011154174805, "learning_rate": 1.8664114699667427e-05, "loss": 0.1078, "step": 1693 }, { "epoch": 0.249944669863519, "grad_norm": 4.204154968261719, "learning_rate": 1.8661541499683756e-05, "loss": 0.1442, "step": 1694 }, { "epoch": 0.250092216894135, "grad_norm": 1.7922683954238892, "learning_rate": 1.865896600155385e-05, "loss": 0.0385, "step": 1695 }, { "epoch": 0.250239763924751, "grad_norm": 2.9584996700286865, "learning_rate": 1.8656388205961066e-05, "loss": 0.0558, "step": 1696 }, { "epoch": 0.25038731095536704, "grad_norm": 2.5491790771484375, "learning_rate": 1.8653808113589357e-05, "loss": 0.022, "step": 1697 }, { "epoch": 0.250534857985983, "grad_norm": 2.3495829105377197, "learning_rate": 1.86512257251233e-05, "loss": 0.0433, "step": 1698 }, { "epoch": 0.25068240501659905, "grad_norm": 7.000943183898926, "learning_rate": 1.8648641041248067e-05, "loss": 0.1028, "step": 1699 }, { "epoch": 0.25082995204721503, "grad_norm": 3.87225604057312, "learning_rate": 1.864605406264945e-05, "loss": 0.1337, "step": 1700 }, { "epoch": 0.25082995204721503, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9653465346534653, "eval_loss": 0.06515882909297943, "eval_precision": 0.9848484848484849, "eval_recall": 0.9466019417475728, "eval_runtime": 50.4012, "eval_samples_per_second": 5.774, "eval_steps_per_second": 0.198, "step": 1700 }, { "epoch": 0.25097749907783107, "grad_norm": 1.9432036876678467, "learning_rate": 1.8643464790013847e-05, "loss": 0.0727, "step": 1701 }, { "epoch": 0.25112504610844705, "grad_norm": 2.7673637866973877, "learning_rate": 1.8640873224028264e-05, "loss": 0.0993, "step": 1702 }, { "epoch": 0.2512725931390631, "grad_norm": 3.1381680965423584, "learning_rate": 1.8638279365380313e-05, "loss": 0.0613, "step": 1703 }, { "epoch": 0.25142014016967906, "grad_norm": 2.2705259323120117, "learning_rate": 1.8635683214758213e-05, "loss": 0.0616, "step": 1704 }, { "epoch": 0.2515676872002951, "grad_norm": 1.7805274724960327, "learning_rate": 1.86330847728508e-05, "loss": 0.0541, "step": 1705 }, { "epoch": 0.2517152342309111, "grad_norm": 2.0656790733337402, "learning_rate": 1.8630484040347513e-05, "loss": 0.0557, "step": 1706 }, { "epoch": 0.2518627812615271, "grad_norm": 2.5722081661224365, "learning_rate": 1.8627881017938392e-05, "loss": 0.0742, "step": 1707 }, { "epoch": 0.2520103282921431, "grad_norm": 3.1926660537719727, "learning_rate": 1.8625275706314094e-05, "loss": 0.077, "step": 1708 }, { "epoch": 0.25215787532275913, "grad_norm": 2.3915457725524902, "learning_rate": 1.8622668106165883e-05, "loss": 0.0217, "step": 1709 }, { "epoch": 0.25230542235337516, "grad_norm": 1.8211325407028198, "learning_rate": 1.8620058218185624e-05, "loss": 0.0851, "step": 1710 }, { "epoch": 0.25245296938399114, "grad_norm": 2.756812810897827, "learning_rate": 1.8617446043065796e-05, "loss": 0.047, "step": 1711 }, { "epoch": 0.2526005164146072, "grad_norm": 3.6415255069732666, "learning_rate": 1.8614831581499477e-05, "loss": 0.0554, "step": 1712 }, { "epoch": 0.25274806344522316, "grad_norm": 4.068582534790039, "learning_rate": 1.8612214834180357e-05, "loss": 0.1054, "step": 1713 }, { "epoch": 0.2528956104758392, "grad_norm": 1.3567254543304443, "learning_rate": 1.860959580180273e-05, "loss": 0.0349, "step": 1714 }, { "epoch": 0.2530431575064552, "grad_norm": 1.8216665983200073, "learning_rate": 1.8606974485061503e-05, "loss": 0.0471, "step": 1715 }, { "epoch": 0.2531907045370712, "grad_norm": 2.0203757286071777, "learning_rate": 1.8604350884652175e-05, "loss": 0.0454, "step": 1716 }, { "epoch": 0.2533382515676872, "grad_norm": 3.666323184967041, "learning_rate": 1.8601725001270868e-05, "loss": 0.0352, "step": 1717 }, { "epoch": 0.2534857985983032, "grad_norm": 2.4993855953216553, "learning_rate": 1.8599096835614298e-05, "loss": 0.0631, "step": 1718 }, { "epoch": 0.2536333456289192, "grad_norm": 0.7023165225982666, "learning_rate": 1.8596466388379793e-05, "loss": 0.007, "step": 1719 }, { "epoch": 0.25378089265953524, "grad_norm": 4.019131183624268, "learning_rate": 1.8593833660265274e-05, "loss": 0.0401, "step": 1720 }, { "epoch": 0.25378089265953524, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9571788413098237, "eval_loss": 0.08183550834655762, "eval_precision": 0.9947643979057592, "eval_recall": 0.9223300970873787, "eval_runtime": 51.288, "eval_samples_per_second": 5.674, "eval_steps_per_second": 0.195, "step": 1720 }, { "epoch": 0.2539284396901512, "grad_norm": 1.3719967603683472, "learning_rate": 1.8591198651969286e-05, "loss": 0.0428, "step": 1721 }, { "epoch": 0.25407598672076726, "grad_norm": 2.008892774581909, "learning_rate": 1.858856136419097e-05, "loss": 0.0452, "step": 1722 }, { "epoch": 0.25422353375138323, "grad_norm": 0.8016170263290405, "learning_rate": 1.8585921797630064e-05, "loss": 0.0162, "step": 1723 }, { "epoch": 0.25437108078199927, "grad_norm": 6.163304328918457, "learning_rate": 1.8583279952986924e-05, "loss": 0.1614, "step": 1724 }, { "epoch": 0.25451862781261525, "grad_norm": 2.027590274810791, "learning_rate": 1.8580635830962498e-05, "loss": 0.0829, "step": 1725 }, { "epoch": 0.2546661748432313, "grad_norm": 2.659754753112793, "learning_rate": 1.8577989432258352e-05, "loss": 0.0459, "step": 1726 }, { "epoch": 0.25481372187384727, "grad_norm": 5.089884281158447, "learning_rate": 1.8575340757576647e-05, "loss": 0.1173, "step": 1727 }, { "epoch": 0.2549612689044633, "grad_norm": 3.7841427326202393, "learning_rate": 1.8572689807620144e-05, "loss": 0.1242, "step": 1728 }, { "epoch": 0.2551088159350793, "grad_norm": 1.6784816980361938, "learning_rate": 1.8570036583092218e-05, "loss": 0.0231, "step": 1729 }, { "epoch": 0.2552563629656953, "grad_norm": 2.5521578788757324, "learning_rate": 1.8567381084696846e-05, "loss": 0.0475, "step": 1730 }, { "epoch": 0.25540390999631135, "grad_norm": 2.2968292236328125, "learning_rate": 1.8564723313138595e-05, "loss": 0.0776, "step": 1731 }, { "epoch": 0.25555145702692733, "grad_norm": 5.221238136291504, "learning_rate": 1.8562063269122654e-05, "loss": 0.0677, "step": 1732 }, { "epoch": 0.25569900405754337, "grad_norm": 3.9754536151885986, "learning_rate": 1.8559400953354804e-05, "loss": 0.0611, "step": 1733 }, { "epoch": 0.25584655108815935, "grad_norm": 1.1239689588546753, "learning_rate": 1.855673636654143e-05, "loss": 0.0145, "step": 1734 }, { "epoch": 0.2559940981187754, "grad_norm": 2.755523443222046, "learning_rate": 1.8554069509389522e-05, "loss": 0.0504, "step": 1735 }, { "epoch": 0.25614164514939136, "grad_norm": 2.0224733352661133, "learning_rate": 1.8551400382606666e-05, "loss": 0.0286, "step": 1736 }, { "epoch": 0.2562891921800074, "grad_norm": 2.5080149173736572, "learning_rate": 1.8548728986901063e-05, "loss": 0.0417, "step": 1737 }, { "epoch": 0.2564367392106234, "grad_norm": 2.173332452774048, "learning_rate": 1.8546055322981498e-05, "loss": 0.0264, "step": 1738 }, { "epoch": 0.2565842862412394, "grad_norm": 4.695002555847168, "learning_rate": 1.854337939155738e-05, "loss": 0.0752, "step": 1739 }, { "epoch": 0.2567318332718554, "grad_norm": 1.4090908765792847, "learning_rate": 1.85407011933387e-05, "loss": 0.0215, "step": 1740 }, { "epoch": 0.2567318332718554, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9571788413098237, "eval_loss": 0.06773916631937027, "eval_precision": 0.9947643979057592, "eval_recall": 0.9223300970873787, "eval_runtime": 50.6257, "eval_samples_per_second": 5.748, "eval_steps_per_second": 0.198, "step": 1740 }, { "epoch": 0.2568793803024714, "grad_norm": 1.6555758714675903, "learning_rate": 1.8538020729036056e-05, "loss": 0.0399, "step": 1741 }, { "epoch": 0.2570269273330874, "grad_norm": 2.4466981887817383, "learning_rate": 1.8535337999360655e-05, "loss": 0.0532, "step": 1742 }, { "epoch": 0.25717447436370344, "grad_norm": 1.5140111446380615, "learning_rate": 1.8532653005024296e-05, "loss": 0.0407, "step": 1743 }, { "epoch": 0.2573220213943194, "grad_norm": 4.404722690582275, "learning_rate": 1.8529965746739382e-05, "loss": 0.0865, "step": 1744 }, { "epoch": 0.25746956842493546, "grad_norm": 2.450817346572876, "learning_rate": 1.8527276225218917e-05, "loss": 0.0475, "step": 1745 }, { "epoch": 0.25761711545555144, "grad_norm": 4.185603618621826, "learning_rate": 1.852458444117651e-05, "loss": 0.0627, "step": 1746 }, { "epoch": 0.2577646624861675, "grad_norm": 2.810720443725586, "learning_rate": 1.8521890395326362e-05, "loss": 0.0803, "step": 1747 }, { "epoch": 0.25791220951678345, "grad_norm": 1.0313979387283325, "learning_rate": 1.851919408838327e-05, "loss": 0.0125, "step": 1748 }, { "epoch": 0.2580597565473995, "grad_norm": 3.1539950370788574, "learning_rate": 1.8516495521062656e-05, "loss": 0.0782, "step": 1749 }, { "epoch": 0.25820730357801547, "grad_norm": 6.985763072967529, "learning_rate": 1.851379469408051e-05, "loss": 0.1116, "step": 1750 }, { "epoch": 0.2583548506086315, "grad_norm": 1.3113516569137573, "learning_rate": 1.851109160815344e-05, "loss": 0.0102, "step": 1751 }, { "epoch": 0.2585023976392475, "grad_norm": 3.379911184310913, "learning_rate": 1.850838626399865e-05, "loss": 0.0657, "step": 1752 }, { "epoch": 0.2586499446698635, "grad_norm": 4.795577526092529, "learning_rate": 1.8505678662333945e-05, "loss": 0.1243, "step": 1753 }, { "epoch": 0.25879749170047955, "grad_norm": 0.8018746972084045, "learning_rate": 1.8502968803877724e-05, "loss": 0.0167, "step": 1754 }, { "epoch": 0.25894503873109553, "grad_norm": 5.773539066314697, "learning_rate": 1.8500256689348985e-05, "loss": 0.0964, "step": 1755 }, { "epoch": 0.25909258576171157, "grad_norm": 2.2165045738220215, "learning_rate": 1.849754231946733e-05, "loss": 0.1102, "step": 1756 }, { "epoch": 0.25924013279232755, "grad_norm": 1.5358134508132935, "learning_rate": 1.8494825694952955e-05, "loss": 0.022, "step": 1757 }, { "epoch": 0.2593876798229436, "grad_norm": 4.51975679397583, "learning_rate": 1.849210681652666e-05, "loss": 0.1497, "step": 1758 }, { "epoch": 0.25953522685355956, "grad_norm": 1.4902085065841675, "learning_rate": 1.8489385684909833e-05, "loss": 0.0418, "step": 1759 }, { "epoch": 0.2596827738841756, "grad_norm": 3.8184237480163574, "learning_rate": 1.8486662300824466e-05, "loss": 0.1742, "step": 1760 }, { "epoch": 0.2596827738841756, "eval_accuracy": 0.9739507959479016, "eval_f1": 0.9545454545454546, "eval_loss": 0.06750224530696869, "eval_precision": 0.9947368421052631, "eval_recall": 0.9174757281553398, "eval_runtime": 50.9325, "eval_samples_per_second": 5.713, "eval_steps_per_second": 0.196, "step": 1760 }, { "epoch": 0.2598303209147916, "grad_norm": 2.709332227706909, "learning_rate": 1.8483936664993152e-05, "loss": 0.0462, "step": 1761 }, { "epoch": 0.2599778679454076, "grad_norm": 3.011286497116089, "learning_rate": 1.8481208778139072e-05, "loss": 0.0473, "step": 1762 }, { "epoch": 0.2601254149760236, "grad_norm": 1.5091990232467651, "learning_rate": 1.8478478640986012e-05, "loss": 0.0645, "step": 1763 }, { "epoch": 0.26027296200663963, "grad_norm": 2.049199104309082, "learning_rate": 1.8475746254258355e-05, "loss": 0.0925, "step": 1764 }, { "epoch": 0.2604205090372556, "grad_norm": 1.6349753141403198, "learning_rate": 1.8473011618681075e-05, "loss": 0.0617, "step": 1765 }, { "epoch": 0.26056805606787165, "grad_norm": 3.4156811237335205, "learning_rate": 1.847027473497975e-05, "loss": 0.0825, "step": 1766 }, { "epoch": 0.2607156030984876, "grad_norm": 1.9899978637695312, "learning_rate": 1.846753560388055e-05, "loss": 0.1078, "step": 1767 }, { "epoch": 0.26086315012910366, "grad_norm": 3.2789146900177, "learning_rate": 1.846479422611024e-05, "loss": 0.0708, "step": 1768 }, { "epoch": 0.26101069715971964, "grad_norm": 1.5854333639144897, "learning_rate": 1.8462050602396185e-05, "loss": 0.0241, "step": 1769 }, { "epoch": 0.2611582441903357, "grad_norm": 2.8847148418426514, "learning_rate": 1.845930473346634e-05, "loss": 0.0582, "step": 1770 }, { "epoch": 0.26130579122095166, "grad_norm": 1.4418102502822876, "learning_rate": 1.8456556620049268e-05, "loss": 0.045, "step": 1771 }, { "epoch": 0.2614533382515677, "grad_norm": 2.6730871200561523, "learning_rate": 1.8453806262874113e-05, "loss": 0.1407, "step": 1772 }, { "epoch": 0.26160088528218367, "grad_norm": 1.6478065252304077, "learning_rate": 1.8451053662670627e-05, "loss": 0.0935, "step": 1773 }, { "epoch": 0.2617484323127997, "grad_norm": 1.2931264638900757, "learning_rate": 1.844829882016914e-05, "loss": 0.0254, "step": 1774 }, { "epoch": 0.26189597934341574, "grad_norm": 7.989828586578369, "learning_rate": 1.84455417361006e-05, "loss": 0.0289, "step": 1775 }, { "epoch": 0.2620435263740317, "grad_norm": 1.3203344345092773, "learning_rate": 1.8442782411196528e-05, "loss": 0.057, "step": 1776 }, { "epoch": 0.26219107340464776, "grad_norm": 1.7602546215057373, "learning_rate": 1.8440020846189056e-05, "loss": 0.0389, "step": 1777 }, { "epoch": 0.26233862043526374, "grad_norm": 2.9316866397857666, "learning_rate": 1.8437257041810898e-05, "loss": 0.0283, "step": 1778 }, { "epoch": 0.26248616746587977, "grad_norm": 2.4985477924346924, "learning_rate": 1.843449099879537e-05, "loss": 0.0767, "step": 1779 }, { "epoch": 0.26263371449649575, "grad_norm": 2.154252529144287, "learning_rate": 1.8431722717876383e-05, "loss": 0.0445, "step": 1780 }, { "epoch": 0.26263371449649575, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9573934837092731, "eval_loss": 0.06556418538093567, "eval_precision": 0.9896373056994818, "eval_recall": 0.9271844660194175, "eval_runtime": 50.127, "eval_samples_per_second": 5.805, "eval_steps_per_second": 0.199, "step": 1780 }, { "epoch": 0.2627812615271118, "grad_norm": 4.1848578453063965, "learning_rate": 1.8428952199788433e-05, "loss": 0.0738, "step": 1781 }, { "epoch": 0.26292880855772777, "grad_norm": 4.228147029876709, "learning_rate": 1.8426179445266615e-05, "loss": 0.0622, "step": 1782 }, { "epoch": 0.2630763555883438, "grad_norm": 1.3042999505996704, "learning_rate": 1.8423404455046626e-05, "loss": 0.0486, "step": 1783 }, { "epoch": 0.2632239026189598, "grad_norm": 2.5874569416046143, "learning_rate": 1.8420627229864737e-05, "loss": 0.0597, "step": 1784 }, { "epoch": 0.2633714496495758, "grad_norm": 1.3495664596557617, "learning_rate": 1.8417847770457825e-05, "loss": 0.0321, "step": 1785 }, { "epoch": 0.2635189966801918, "grad_norm": 3.2655980587005615, "learning_rate": 1.8415066077563362e-05, "loss": 0.1073, "step": 1786 }, { "epoch": 0.26366654371080783, "grad_norm": 2.773749828338623, "learning_rate": 1.8412282151919405e-05, "loss": 0.0846, "step": 1787 }, { "epoch": 0.2638140907414238, "grad_norm": 3.9619085788726807, "learning_rate": 1.8409495994264607e-05, "loss": 0.0737, "step": 1788 }, { "epoch": 0.26396163777203985, "grad_norm": 4.892509937286377, "learning_rate": 1.840670760533821e-05, "loss": 0.1648, "step": 1789 }, { "epoch": 0.2641091848026558, "grad_norm": 2.7034366130828857, "learning_rate": 1.8403916985880054e-05, "loss": 0.074, "step": 1790 }, { "epoch": 0.26425673183327186, "grad_norm": 1.525233268737793, "learning_rate": 1.8401124136630566e-05, "loss": 0.0114, "step": 1791 }, { "epoch": 0.26440427886388784, "grad_norm": 2.4368274211883545, "learning_rate": 1.8398329058330767e-05, "loss": 0.0171, "step": 1792 }, { "epoch": 0.2645518258945039, "grad_norm": 1.877754807472229, "learning_rate": 1.8395531751722268e-05, "loss": 0.0866, "step": 1793 }, { "epoch": 0.26469937292511986, "grad_norm": 1.8865419626235962, "learning_rate": 1.8392732217547265e-05, "loss": 0.0747, "step": 1794 }, { "epoch": 0.2648469199557359, "grad_norm": 2.2851805686950684, "learning_rate": 1.8389930456548563e-05, "loss": 0.0929, "step": 1795 }, { "epoch": 0.2649944669863519, "grad_norm": 1.866339921951294, "learning_rate": 1.8387126469469542e-05, "loss": 0.0568, "step": 1796 }, { "epoch": 0.2651420140169679, "grad_norm": 3.043684720993042, "learning_rate": 1.838432025705418e-05, "loss": 0.0495, "step": 1797 }, { "epoch": 0.26528956104758394, "grad_norm": 3.269800901412964, "learning_rate": 1.8381511820047033e-05, "loss": 0.0995, "step": 1798 }, { "epoch": 0.2654371080781999, "grad_norm": 3.5884273052215576, "learning_rate": 1.837870115919327e-05, "loss": 0.0808, "step": 1799 }, { "epoch": 0.26558465510881596, "grad_norm": 2.3827102184295654, "learning_rate": 1.8375888275238625e-05, "loss": 0.0557, "step": 1800 }, { "epoch": 0.26558465510881596, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.06993769109249115, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 50.1472, "eval_samples_per_second": 5.803, "eval_steps_per_second": 0.199, "step": 1800 }, { "epoch": 0.26573220213943194, "grad_norm": 4.462726593017578, "learning_rate": 1.837307316892944e-05, "loss": 0.0945, "step": 1801 }, { "epoch": 0.265879749170048, "grad_norm": 2.278245687484741, "learning_rate": 1.837025584101264e-05, "loss": 0.09, "step": 1802 }, { "epoch": 0.26602729620066395, "grad_norm": 2.3065855503082275, "learning_rate": 1.8367436292235743e-05, "loss": 0.0595, "step": 1803 }, { "epoch": 0.26617484323128, "grad_norm": 2.1850202083587646, "learning_rate": 1.8364614523346847e-05, "loss": 0.0452, "step": 1804 }, { "epoch": 0.26632239026189597, "grad_norm": 2.008924722671509, "learning_rate": 1.836179053509465e-05, "loss": 0.0625, "step": 1805 }, { "epoch": 0.266469937292512, "grad_norm": 2.2581794261932373, "learning_rate": 1.835896432822843e-05, "loss": 0.075, "step": 1806 }, { "epoch": 0.266617484323128, "grad_norm": 4.481566429138184, "learning_rate": 1.835613590349806e-05, "loss": 0.0996, "step": 1807 }, { "epoch": 0.266765031353744, "grad_norm": 2.1959474086761475, "learning_rate": 1.8353305261654003e-05, "loss": 0.0506, "step": 1808 }, { "epoch": 0.26691257838436, "grad_norm": 1.564278244972229, "learning_rate": 1.83504724034473e-05, "loss": 0.0766, "step": 1809 }, { "epoch": 0.26706012541497604, "grad_norm": 1.711566686630249, "learning_rate": 1.8347637329629585e-05, "loss": 0.0549, "step": 1810 }, { "epoch": 0.267207672445592, "grad_norm": 1.9015671014785767, "learning_rate": 1.8344800040953092e-05, "loss": 0.0871, "step": 1811 }, { "epoch": 0.26735521947620805, "grad_norm": 1.4092458486557007, "learning_rate": 1.8341960538170622e-05, "loss": 0.056, "step": 1812 }, { "epoch": 0.26750276650682403, "grad_norm": 1.2663614749908447, "learning_rate": 1.833911882203558e-05, "loss": 0.0361, "step": 1813 }, { "epoch": 0.26765031353744007, "grad_norm": 2.314241886138916, "learning_rate": 1.8336274893301947e-05, "loss": 0.0667, "step": 1814 }, { "epoch": 0.26779786056805605, "grad_norm": 1.9834822416305542, "learning_rate": 1.83334287527243e-05, "loss": 0.0494, "step": 1815 }, { "epoch": 0.2679454075986721, "grad_norm": 2.036881446838379, "learning_rate": 1.833058040105779e-05, "loss": 0.0756, "step": 1816 }, { "epoch": 0.26809295462928806, "grad_norm": 1.7831075191497803, "learning_rate": 1.8327729839058176e-05, "loss": 0.0371, "step": 1817 }, { "epoch": 0.2682405016599041, "grad_norm": 1.0665255784988403, "learning_rate": 1.8324877067481782e-05, "loss": 0.0075, "step": 1818 }, { "epoch": 0.2683880486905201, "grad_norm": 2.107034206390381, "learning_rate": 1.8322022087085533e-05, "loss": 0.0229, "step": 1819 }, { "epoch": 0.2685355957211361, "grad_norm": 0.44284266233444214, "learning_rate": 1.8319164898626927e-05, "loss": 0.0079, "step": 1820 }, { "epoch": 0.2685355957211361, "eval_accuracy": 0.9739507959479016, "eval_f1": 0.9545454545454546, "eval_loss": 0.07334955036640167, "eval_precision": 0.9947368421052631, "eval_recall": 0.9174757281553398, "eval_runtime": 49.9163, "eval_samples_per_second": 5.83, "eval_steps_per_second": 0.2, "step": 1820 }, { "epoch": 0.26868314275175215, "grad_norm": 5.438878536224365, "learning_rate": 1.8316305502864064e-05, "loss": 0.0422, "step": 1821 }, { "epoch": 0.2688306897823681, "grad_norm": 4.154041290283203, "learning_rate": 1.8313443900555615e-05, "loss": 0.1081, "step": 1822 }, { "epoch": 0.26897823681298416, "grad_norm": 1.2309131622314453, "learning_rate": 1.8310580092460838e-05, "loss": 0.0518, "step": 1823 }, { "epoch": 0.26912578384360014, "grad_norm": 1.512600064277649, "learning_rate": 1.830771407933959e-05, "loss": 0.0522, "step": 1824 }, { "epoch": 0.2692733308742162, "grad_norm": 1.9559988975524902, "learning_rate": 1.8304845861952295e-05, "loss": 0.0539, "step": 1825 }, { "epoch": 0.26942087790483216, "grad_norm": 2.5186820030212402, "learning_rate": 1.830197544105998e-05, "loss": 0.0358, "step": 1826 }, { "epoch": 0.2695684249354482, "grad_norm": 1.0470854043960571, "learning_rate": 1.8299102817424234e-05, "loss": 0.0123, "step": 1827 }, { "epoch": 0.26971597196606417, "grad_norm": 3.760580539703369, "learning_rate": 1.8296227991807253e-05, "loss": 0.045, "step": 1828 }, { "epoch": 0.2698635189966802, "grad_norm": 5.5353546142578125, "learning_rate": 1.829335096497181e-05, "loss": 0.1079, "step": 1829 }, { "epoch": 0.2700110660272962, "grad_norm": 5.633973598480225, "learning_rate": 1.829047173768125e-05, "loss": 0.0781, "step": 1830 }, { "epoch": 0.2701586130579122, "grad_norm": 2.8763039112091064, "learning_rate": 1.8287590310699515e-05, "loss": 0.0832, "step": 1831 }, { "epoch": 0.2703061600885282, "grad_norm": 7.963526248931885, "learning_rate": 1.828470668479113e-05, "loss": 0.1088, "step": 1832 }, { "epoch": 0.27045370711914424, "grad_norm": 14.26453685760498, "learning_rate": 1.82818208607212e-05, "loss": 0.1011, "step": 1833 }, { "epoch": 0.2706012541497602, "grad_norm": 2.874347448348999, "learning_rate": 1.8278932839255412e-05, "loss": 0.0487, "step": 1834 }, { "epoch": 0.27074880118037625, "grad_norm": 2.6893510818481445, "learning_rate": 1.827604262116004e-05, "loss": 0.0854, "step": 1835 }, { "epoch": 0.27089634821099223, "grad_norm": 2.124901056289673, "learning_rate": 1.8273150207201938e-05, "loss": 0.0723, "step": 1836 }, { "epoch": 0.27104389524160827, "grad_norm": 1.1771577596664429, "learning_rate": 1.8270255598148542e-05, "loss": 0.0286, "step": 1837 }, { "epoch": 0.27119144227222425, "grad_norm": 1.573038101196289, "learning_rate": 1.8267358794767873e-05, "loss": 0.0271, "step": 1838 }, { "epoch": 0.2713389893028403, "grad_norm": 4.747241020202637, "learning_rate": 1.8264459797828528e-05, "loss": 0.0473, "step": 1839 }, { "epoch": 0.27148653633345626, "grad_norm": 2.565274477005005, "learning_rate": 1.82615586080997e-05, "loss": 0.056, "step": 1840 }, { "epoch": 0.27148653633345626, "eval_accuracy": 0.9739507959479016, "eval_f1": 0.9552238805970149, "eval_loss": 0.07094237208366394, "eval_precision": 0.9795918367346939, "eval_recall": 0.9320388349514563, "eval_runtime": 49.1776, "eval_samples_per_second": 5.917, "eval_steps_per_second": 0.203, "step": 1840 }, { "epoch": 0.2716340833640723, "grad_norm": 3.047848701477051, "learning_rate": 1.825865522635115e-05, "loss": 0.0683, "step": 1841 }, { "epoch": 0.27178163039468833, "grad_norm": 3.3757214546203613, "learning_rate": 1.8255749653353225e-05, "loss": 0.1105, "step": 1842 }, { "epoch": 0.2719291774253043, "grad_norm": 4.784446716308594, "learning_rate": 1.8252841889876854e-05, "loss": 0.0595, "step": 1843 }, { "epoch": 0.27207672445592035, "grad_norm": 2.870875835418701, "learning_rate": 1.824993193669355e-05, "loss": 0.0718, "step": 1844 }, { "epoch": 0.27222427148653633, "grad_norm": 3.5102977752685547, "learning_rate": 1.82470197945754e-05, "loss": 0.0824, "step": 1845 }, { "epoch": 0.27237181851715236, "grad_norm": 0.9509576559066772, "learning_rate": 1.8244105464295073e-05, "loss": 0.0225, "step": 1846 }, { "epoch": 0.27251936554776834, "grad_norm": 1.8587613105773926, "learning_rate": 1.8241188946625832e-05, "loss": 0.0779, "step": 1847 }, { "epoch": 0.2726669125783844, "grad_norm": 1.541709065437317, "learning_rate": 1.82382702423415e-05, "loss": 0.0283, "step": 1848 }, { "epoch": 0.27281445960900036, "grad_norm": 2.8956470489501953, "learning_rate": 1.8235349352216495e-05, "loss": 0.1142, "step": 1849 }, { "epoch": 0.2729620066396164, "grad_norm": 1.300102710723877, "learning_rate": 1.8232426277025804e-05, "loss": 0.0639, "step": 1850 }, { "epoch": 0.2731095536702324, "grad_norm": 1.6062426567077637, "learning_rate": 1.822950101754501e-05, "loss": 0.048, "step": 1851 }, { "epoch": 0.2732571007008484, "grad_norm": 4.50151252746582, "learning_rate": 1.8226573574550253e-05, "loss": 0.0561, "step": 1852 }, { "epoch": 0.2734046477314644, "grad_norm": 2.7688539028167725, "learning_rate": 1.822364394881827e-05, "loss": 0.0327, "step": 1853 }, { "epoch": 0.2735521947620804, "grad_norm": 1.9938652515411377, "learning_rate": 1.8220712141126375e-05, "loss": 0.0675, "step": 1854 }, { "epoch": 0.2736997417926964, "grad_norm": 2.556931734085083, "learning_rate": 1.821777815225245e-05, "loss": 0.0899, "step": 1855 }, { "epoch": 0.27384728882331244, "grad_norm": 4.636996269226074, "learning_rate": 1.8214841982974975e-05, "loss": 0.1662, "step": 1856 }, { "epoch": 0.2739948358539284, "grad_norm": 2.0064525604248047, "learning_rate": 1.8211903634072983e-05, "loss": 0.0573, "step": 1857 }, { "epoch": 0.27414238288454446, "grad_norm": 3.2013099193573, "learning_rate": 1.8208963106326108e-05, "loss": 0.0901, "step": 1858 }, { "epoch": 0.27428992991516044, "grad_norm": 2.746439218521118, "learning_rate": 1.820602040051455e-05, "loss": 0.0737, "step": 1859 }, { "epoch": 0.27443747694577647, "grad_norm": 3.1914734840393066, "learning_rate": 1.8203075517419092e-05, "loss": 0.0923, "step": 1860 }, { "epoch": 0.27443747694577647, "eval_accuracy": 0.9739507959479016, "eval_f1": 0.9556650246305419, "eval_loss": 0.06761012226343155, "eval_precision": 0.97, "eval_recall": 0.941747572815534, "eval_runtime": 49.2497, "eval_samples_per_second": 5.909, "eval_steps_per_second": 0.203, "step": 1860 }, { "epoch": 0.27458502397639245, "grad_norm": 1.8137160539627075, "learning_rate": 1.820012845782109e-05, "loss": 0.0452, "step": 1861 }, { "epoch": 0.2747325710070085, "grad_norm": 7.028554916381836, "learning_rate": 1.8197179222502486e-05, "loss": 0.0723, "step": 1862 }, { "epoch": 0.27488011803762447, "grad_norm": 2.008192539215088, "learning_rate": 1.819422781224579e-05, "loss": 0.0827, "step": 1863 }, { "epoch": 0.2750276650682405, "grad_norm": 2.3676629066467285, "learning_rate": 1.8191274227834087e-05, "loss": 0.0803, "step": 1864 }, { "epoch": 0.27517521209885654, "grad_norm": 2.65525221824646, "learning_rate": 1.8188318470051054e-05, "loss": 0.1222, "step": 1865 }, { "epoch": 0.2753227591294725, "grad_norm": 3.2418432235717773, "learning_rate": 1.818536053968093e-05, "loss": 0.1249, "step": 1866 }, { "epoch": 0.27547030616008855, "grad_norm": 1.959547519683838, "learning_rate": 1.8182400437508537e-05, "loss": 0.0256, "step": 1867 }, { "epoch": 0.27561785319070453, "grad_norm": 1.6460672616958618, "learning_rate": 1.817943816431927e-05, "loss": 0.0253, "step": 1868 }, { "epoch": 0.27576540022132057, "grad_norm": 2.181018590927124, "learning_rate": 1.8176473720899107e-05, "loss": 0.0893, "step": 1869 }, { "epoch": 0.27591294725193655, "grad_norm": 1.9750832319259644, "learning_rate": 1.817350710803459e-05, "loss": 0.0389, "step": 1870 }, { "epoch": 0.2760604942825526, "grad_norm": 3.312065601348877, "learning_rate": 1.817053832651285e-05, "loss": 0.0166, "step": 1871 }, { "epoch": 0.27620804131316856, "grad_norm": 1.3024770021438599, "learning_rate": 1.816756737712158e-05, "loss": 0.0368, "step": 1872 }, { "epoch": 0.2763555883437846, "grad_norm": 1.5345538854599, "learning_rate": 1.816459426064906e-05, "loss": 0.0656, "step": 1873 }, { "epoch": 0.2765031353744006, "grad_norm": 2.562371015548706, "learning_rate": 1.816161897788414e-05, "loss": 0.0422, "step": 1874 }, { "epoch": 0.2766506824050166, "grad_norm": 2.397543430328369, "learning_rate": 1.815864152961624e-05, "loss": 0.0366, "step": 1875 }, { "epoch": 0.2767982294356326, "grad_norm": 3.582508087158203, "learning_rate": 1.8155661916635362e-05, "loss": 0.1649, "step": 1876 }, { "epoch": 0.2769457764662486, "grad_norm": 4.200384140014648, "learning_rate": 1.8152680139732083e-05, "loss": 0.1155, "step": 1877 }, { "epoch": 0.2770933234968646, "grad_norm": 4.91301965713501, "learning_rate": 1.8149696199697547e-05, "loss": 0.1864, "step": 1878 }, { "epoch": 0.27724087052748064, "grad_norm": 1.6632002592086792, "learning_rate": 1.8146710097323473e-05, "loss": 0.0541, "step": 1879 }, { "epoch": 0.2773884175580966, "grad_norm": 2.451502561569214, "learning_rate": 1.8143721833402166e-05, "loss": 0.0769, "step": 1880 }, { "epoch": 0.2773884175580966, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9571788413098237, "eval_loss": 0.07145875692367554, "eval_precision": 0.9947643979057592, "eval_recall": 0.9223300970873787, "eval_runtime": 50.6396, "eval_samples_per_second": 5.746, "eval_steps_per_second": 0.197, "step": 1880 }, { "epoch": 0.27753596458871266, "grad_norm": 3.385312080383301, "learning_rate": 1.8140731408726484e-05, "loss": 0.1248, "step": 1881 }, { "epoch": 0.27768351161932864, "grad_norm": 1.4377604722976685, "learning_rate": 1.8137738824089876e-05, "loss": 0.0452, "step": 1882 }, { "epoch": 0.2778310586499447, "grad_norm": 2.1186070442199707, "learning_rate": 1.813474408028635e-05, "loss": 0.0544, "step": 1883 }, { "epoch": 0.27797860568056065, "grad_norm": 4.867067337036133, "learning_rate": 1.8131747178110507e-05, "loss": 0.1193, "step": 1884 }, { "epoch": 0.2781261527111767, "grad_norm": 3.2188632488250732, "learning_rate": 1.81287481183575e-05, "loss": 0.1098, "step": 1885 }, { "epoch": 0.2782736997417927, "grad_norm": 2.8877511024475098, "learning_rate": 1.8125746901823064e-05, "loss": 0.0532, "step": 1886 }, { "epoch": 0.2784212467724087, "grad_norm": 1.40437650680542, "learning_rate": 1.81227435293035e-05, "loss": 0.0454, "step": 1887 }, { "epoch": 0.27856879380302474, "grad_norm": 2.2449872493743896, "learning_rate": 1.811973800159569e-05, "loss": 0.057, "step": 1888 }, { "epoch": 0.2787163408336407, "grad_norm": 3.3180947303771973, "learning_rate": 1.811673031949708e-05, "loss": 0.0645, "step": 1889 }, { "epoch": 0.27886388786425675, "grad_norm": 3.3282129764556885, "learning_rate": 1.8113720483805698e-05, "loss": 0.0552, "step": 1890 }, { "epoch": 0.27901143489487273, "grad_norm": 1.8338218927383423, "learning_rate": 1.8110708495320132e-05, "loss": 0.0447, "step": 1891 }, { "epoch": 0.27915898192548877, "grad_norm": 2.270963430404663, "learning_rate": 1.810769435483955e-05, "loss": 0.0932, "step": 1892 }, { "epoch": 0.27930652895610475, "grad_norm": 3.8612663745880127, "learning_rate": 1.810467806316368e-05, "loss": 0.087, "step": 1893 }, { "epoch": 0.2794540759867208, "grad_norm": 1.1670207977294922, "learning_rate": 1.8101659621092832e-05, "loss": 0.0496, "step": 1894 }, { "epoch": 0.27960162301733676, "grad_norm": 1.885435700416565, "learning_rate": 1.809863902942788e-05, "loss": 0.0275, "step": 1895 }, { "epoch": 0.2797491700479528, "grad_norm": 4.63778018951416, "learning_rate": 1.8095616288970268e-05, "loss": 0.0579, "step": 1896 }, { "epoch": 0.2798967170785688, "grad_norm": 2.977574110031128, "learning_rate": 1.8092591400522018e-05, "loss": 0.0993, "step": 1897 }, { "epoch": 0.2800442641091848, "grad_norm": 3.402623414993286, "learning_rate": 1.8089564364885716e-05, "loss": 0.0883, "step": 1898 }, { "epoch": 0.2801918111398008, "grad_norm": 2.1657140254974365, "learning_rate": 1.8086535182864513e-05, "loss": 0.0911, "step": 1899 }, { "epoch": 0.28033935817041683, "grad_norm": 2.973764181137085, "learning_rate": 1.808350385526214e-05, "loss": 0.1452, "step": 1900 }, { "epoch": 0.28033935817041683, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9573934837092731, "eval_loss": 0.0686851218342781, "eval_precision": 0.9896373056994818, "eval_recall": 0.9271844660194175, "eval_runtime": 50.2754, "eval_samples_per_second": 5.788, "eval_steps_per_second": 0.199, "step": 1900 }, { "epoch": 0.2804869052010328, "grad_norm": 1.2506681680679321, "learning_rate": 1.808047038288289e-05, "loss": 0.0572, "step": 1901 }, { "epoch": 0.28063445223164885, "grad_norm": 1.06496262550354, "learning_rate": 1.8077434766531624e-05, "loss": 0.0329, "step": 1902 }, { "epoch": 0.2807819992622648, "grad_norm": 1.2955524921417236, "learning_rate": 1.807439700701378e-05, "loss": 0.0393, "step": 1903 }, { "epoch": 0.28092954629288086, "grad_norm": 2.02809739112854, "learning_rate": 1.807135710513536e-05, "loss": 0.0977, "step": 1904 }, { "epoch": 0.28107709332349684, "grad_norm": 3.2513322830200195, "learning_rate": 1.8068315061702927e-05, "loss": 0.0392, "step": 1905 }, { "epoch": 0.2812246403541129, "grad_norm": 1.3542325496673584, "learning_rate": 1.806527087752363e-05, "loss": 0.0318, "step": 1906 }, { "epoch": 0.28137218738472886, "grad_norm": 1.1355664730072021, "learning_rate": 1.806222455340516e-05, "loss": 0.0295, "step": 1907 }, { "epoch": 0.2815197344153449, "grad_norm": 5.970703125, "learning_rate": 1.8059176090155804e-05, "loss": 0.1103, "step": 1908 }, { "epoch": 0.2816672814459609, "grad_norm": 2.8276009559631348, "learning_rate": 1.80561254885844e-05, "loss": 0.1166, "step": 1909 }, { "epoch": 0.2818148284765769, "grad_norm": 3.3073973655700684, "learning_rate": 1.8053072749500354e-05, "loss": 0.0993, "step": 1910 }, { "epoch": 0.28196237550719294, "grad_norm": 3.4079155921936035, "learning_rate": 1.8050017873713646e-05, "loss": 0.0675, "step": 1911 }, { "epoch": 0.2821099225378089, "grad_norm": 1.537765383720398, "learning_rate": 1.804696086203481e-05, "loss": 0.0575, "step": 1912 }, { "epoch": 0.28225746956842496, "grad_norm": 1.6342790126800537, "learning_rate": 1.804390171527497e-05, "loss": 0.0391, "step": 1913 }, { "epoch": 0.28240501659904094, "grad_norm": 3.1278223991394043, "learning_rate": 1.8040840434245794e-05, "loss": 0.1122, "step": 1914 }, { "epoch": 0.28255256362965697, "grad_norm": 1.565006136894226, "learning_rate": 1.8037777019759523e-05, "loss": 0.0819, "step": 1915 }, { "epoch": 0.28270011066027295, "grad_norm": 1.9849228858947754, "learning_rate": 1.803471147262897e-05, "loss": 0.0428, "step": 1916 }, { "epoch": 0.282847657690889, "grad_norm": 1.583899736404419, "learning_rate": 1.8031643793667503e-05, "loss": 0.0257, "step": 1917 }, { "epoch": 0.28299520472150497, "grad_norm": 1.8216240406036377, "learning_rate": 1.802857398368907e-05, "loss": 0.0597, "step": 1918 }, { "epoch": 0.283142751752121, "grad_norm": 8.167851448059082, "learning_rate": 1.802550204350817e-05, "loss": 0.0548, "step": 1919 }, { "epoch": 0.283290298782737, "grad_norm": 2.49054217338562, "learning_rate": 1.8022427973939878e-05, "loss": 0.0938, "step": 1920 }, { "epoch": 0.283290298782737, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9576059850374065, "eval_loss": 0.06758425384759903, "eval_precision": 0.9846153846153847, "eval_recall": 0.9320388349514563, "eval_runtime": 49.7773, "eval_samples_per_second": 5.846, "eval_steps_per_second": 0.201, "step": 1920 }, { "epoch": 0.283437845813353, "grad_norm": 2.7514865398406982, "learning_rate": 1.8019351775799826e-05, "loss": 0.0554, "step": 1921 }, { "epoch": 0.283585392843969, "grad_norm": 4.395203113555908, "learning_rate": 1.8016273449904215e-05, "loss": 0.1256, "step": 1922 }, { "epoch": 0.28373293987458503, "grad_norm": 1.9275283813476562, "learning_rate": 1.801319299706981e-05, "loss": 0.0494, "step": 1923 }, { "epoch": 0.283880486905201, "grad_norm": 2.956646203994751, "learning_rate": 1.8010110418113945e-05, "loss": 0.0839, "step": 1924 }, { "epoch": 0.28402803393581705, "grad_norm": 0.38787606358528137, "learning_rate": 1.8007025713854503e-05, "loss": 0.0055, "step": 1925 }, { "epoch": 0.28417558096643303, "grad_norm": 2.3350155353546143, "learning_rate": 1.8003938885109954e-05, "loss": 0.0534, "step": 1926 }, { "epoch": 0.28432312799704906, "grad_norm": 2.094541072845459, "learning_rate": 1.8000849932699308e-05, "loss": 0.0539, "step": 1927 }, { "epoch": 0.28447067502766504, "grad_norm": 1.2548242807388306, "learning_rate": 1.7997758857442156e-05, "loss": 0.052, "step": 1928 }, { "epoch": 0.2846182220582811, "grad_norm": 5.784312725067139, "learning_rate": 1.7994665660158644e-05, "loss": 0.0866, "step": 1929 }, { "epoch": 0.28476576908889706, "grad_norm": 1.255893349647522, "learning_rate": 1.7991570341669483e-05, "loss": 0.0539, "step": 1930 }, { "epoch": 0.2849133161195131, "grad_norm": 2.5495920181274414, "learning_rate": 1.798847290279594e-05, "loss": 0.0781, "step": 1931 }, { "epoch": 0.28506086315012913, "grad_norm": 4.172173023223877, "learning_rate": 1.798537334435986e-05, "loss": 0.0765, "step": 1932 }, { "epoch": 0.2852084101807451, "grad_norm": 3.1734747886657715, "learning_rate": 1.798227166718364e-05, "loss": 0.0997, "step": 1933 }, { "epoch": 0.28535595721136114, "grad_norm": 1.5780705213546753, "learning_rate": 1.797916787209024e-05, "loss": 0.0802, "step": 1934 }, { "epoch": 0.2855035042419771, "grad_norm": 2.0439326763153076, "learning_rate": 1.797606195990318e-05, "loss": 0.0957, "step": 1935 }, { "epoch": 0.28565105127259316, "grad_norm": 1.3669174909591675, "learning_rate": 1.7972953931446543e-05, "loss": 0.0328, "step": 1936 }, { "epoch": 0.28579859830320914, "grad_norm": 2.3694560527801514, "learning_rate": 1.7969843787544983e-05, "loss": 0.076, "step": 1937 }, { "epoch": 0.2859461453338252, "grad_norm": 1.0859376192092896, "learning_rate": 1.7966731529023697e-05, "loss": 0.047, "step": 1938 }, { "epoch": 0.28609369236444115, "grad_norm": 2.6620748043060303, "learning_rate": 1.796361715670846e-05, "loss": 0.081, "step": 1939 }, { "epoch": 0.2862412393950572, "grad_norm": 1.7046546936035156, "learning_rate": 1.7960500671425597e-05, "loss": 0.0583, "step": 1940 }, { "epoch": 0.2862412393950572, "eval_accuracy": 0.9739507959479016, "eval_f1": 0.955, "eval_loss": 0.06977508217096329, "eval_precision": 0.9845360824742269, "eval_recall": 0.9271844660194175, "eval_runtime": 49.1369, "eval_samples_per_second": 5.922, "eval_steps_per_second": 0.204, "step": 1940 }, { "epoch": 0.28638878642567317, "grad_norm": 2.993781328201294, "learning_rate": 1.7957382074002002e-05, "loss": 0.1066, "step": 1941 }, { "epoch": 0.2865363334562892, "grad_norm": 2.2758100032806396, "learning_rate": 1.7954261365265125e-05, "loss": 0.0453, "step": 1942 }, { "epoch": 0.2866838804869052, "grad_norm": 2.030885696411133, "learning_rate": 1.795113854604297e-05, "loss": 0.0757, "step": 1943 }, { "epoch": 0.2868314275175212, "grad_norm": 5.546229839324951, "learning_rate": 1.794801361716411e-05, "loss": 0.105, "step": 1944 }, { "epoch": 0.2869789745481372, "grad_norm": 2.4605917930603027, "learning_rate": 1.794488657945768e-05, "loss": 0.0296, "step": 1945 }, { "epoch": 0.28712652157875324, "grad_norm": 2.6243858337402344, "learning_rate": 1.7941757433753362e-05, "loss": 0.0897, "step": 1946 }, { "epoch": 0.2872740686093692, "grad_norm": 1.5439924001693726, "learning_rate": 1.7938626180881408e-05, "loss": 0.0568, "step": 1947 }, { "epoch": 0.28742161563998525, "grad_norm": 3.3562021255493164, "learning_rate": 1.7935492821672628e-05, "loss": 0.0471, "step": 1948 }, { "epoch": 0.28756916267060123, "grad_norm": 1.4034656286239624, "learning_rate": 1.7932357356958387e-05, "loss": 0.0621, "step": 1949 }, { "epoch": 0.28771670970121727, "grad_norm": 1.2984174489974976, "learning_rate": 1.792921978757061e-05, "loss": 0.0433, "step": 1950 }, { "epoch": 0.28786425673183325, "grad_norm": 7.683390140533447, "learning_rate": 1.792608011434178e-05, "loss": 0.0962, "step": 1951 }, { "epoch": 0.2880118037624493, "grad_norm": 2.84192156791687, "learning_rate": 1.7922938338104942e-05, "loss": 0.0706, "step": 1952 }, { "epoch": 0.2881593507930653, "grad_norm": 4.856176853179932, "learning_rate": 1.7919794459693692e-05, "loss": 0.1189, "step": 1953 }, { "epoch": 0.2883068978236813, "grad_norm": 1.6959781646728516, "learning_rate": 1.7916648479942192e-05, "loss": 0.0578, "step": 1954 }, { "epoch": 0.28845444485429733, "grad_norm": 2.625617027282715, "learning_rate": 1.7913500399685155e-05, "loss": 0.094, "step": 1955 }, { "epoch": 0.2886019918849133, "grad_norm": 2.724163055419922, "learning_rate": 1.7910350219757854e-05, "loss": 0.0685, "step": 1956 }, { "epoch": 0.28874953891552935, "grad_norm": 3.802644729614258, "learning_rate": 1.7907197940996117e-05, "loss": 0.0401, "step": 1957 }, { "epoch": 0.2888970859461453, "grad_norm": 2.1602630615234375, "learning_rate": 1.7904043564236335e-05, "loss": 0.0626, "step": 1958 }, { "epoch": 0.28904463297676136, "grad_norm": 1.0709794759750366, "learning_rate": 1.7900887090315445e-05, "loss": 0.05, "step": 1959 }, { "epoch": 0.28919218000737734, "grad_norm": 3.7788944244384766, "learning_rate": 1.7897728520070955e-05, "loss": 0.0937, "step": 1960 }, { "epoch": 0.28919218000737734, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.9601990049751243, "eval_loss": 0.06719287484884262, "eval_precision": 0.9846938775510204, "eval_recall": 0.9368932038834952, "eval_runtime": 49.0582, "eval_samples_per_second": 5.932, "eval_steps_per_second": 0.204, "step": 1960 }, { "epoch": 0.2893397270379934, "grad_norm": 1.0289095640182495, "learning_rate": 1.7894567854340917e-05, "loss": 0.018, "step": 1961 }, { "epoch": 0.28948727406860936, "grad_norm": 3.2440502643585205, "learning_rate": 1.789140509396394e-05, "loss": 0.0549, "step": 1962 }, { "epoch": 0.2896348210992254, "grad_norm": 2.771836042404175, "learning_rate": 1.788824023977919e-05, "loss": 0.1131, "step": 1963 }, { "epoch": 0.2897823681298414, "grad_norm": 2.5311827659606934, "learning_rate": 1.78850732926264e-05, "loss": 0.0788, "step": 1964 }, { "epoch": 0.2899299151604574, "grad_norm": 1.3133442401885986, "learning_rate": 1.788190425334584e-05, "loss": 0.0321, "step": 1965 }, { "epoch": 0.2900774621910734, "grad_norm": 7.215641975402832, "learning_rate": 1.7878733122778346e-05, "loss": 0.097, "step": 1966 }, { "epoch": 0.2902250092216894, "grad_norm": 4.248411178588867, "learning_rate": 1.787555990176531e-05, "loss": 0.0723, "step": 1967 }, { "epoch": 0.2903725562523054, "grad_norm": 2.580868721008301, "learning_rate": 1.787238459114867e-05, "loss": 0.0899, "step": 1968 }, { "epoch": 0.29052010328292144, "grad_norm": 1.865951418876648, "learning_rate": 1.7869207191770926e-05, "loss": 0.0809, "step": 1969 }, { "epoch": 0.2906676503135374, "grad_norm": 1.7011492252349854, "learning_rate": 1.786602770447513e-05, "loss": 0.0587, "step": 1970 }, { "epoch": 0.29081519734415345, "grad_norm": 1.7207391262054443, "learning_rate": 1.7862846130104884e-05, "loss": 0.0846, "step": 1971 }, { "epoch": 0.29096274437476943, "grad_norm": 2.1384150981903076, "learning_rate": 1.7859662469504356e-05, "loss": 0.0563, "step": 1972 }, { "epoch": 0.29111029140538547, "grad_norm": 1.8367100954055786, "learning_rate": 1.7856476723518252e-05, "loss": 0.0738, "step": 1973 }, { "epoch": 0.29125783843600145, "grad_norm": 2.835747003555298, "learning_rate": 1.785328889299184e-05, "loss": 0.1234, "step": 1974 }, { "epoch": 0.2914053854666175, "grad_norm": 3.6200499534606934, "learning_rate": 1.7850098978770943e-05, "loss": 0.0418, "step": 1975 }, { "epoch": 0.2915529324972335, "grad_norm": 1.439263105392456, "learning_rate": 1.784690698170193e-05, "loss": 0.0588, "step": 1976 }, { "epoch": 0.2917004795278495, "grad_norm": 2.0826683044433594, "learning_rate": 1.7843712902631722e-05, "loss": 0.0366, "step": 1977 }, { "epoch": 0.29184802655846553, "grad_norm": 1.399391531944275, "learning_rate": 1.7840516742407804e-05, "loss": 0.0283, "step": 1978 }, { "epoch": 0.2919955735890815, "grad_norm": 1.0982340574264526, "learning_rate": 1.7837318501878204e-05, "loss": 0.0183, "step": 1979 }, { "epoch": 0.29214312061969755, "grad_norm": 1.996166706085205, "learning_rate": 1.7834118181891498e-05, "loss": 0.0391, "step": 1980 }, { "epoch": 0.29214312061969755, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.96, "eval_loss": 0.06745325773954391, "eval_precision": 0.9896907216494846, "eval_recall": 0.9320388349514563, "eval_runtime": 50.1494, "eval_samples_per_second": 5.803, "eval_steps_per_second": 0.199, "step": 1980 }, { "epoch": 0.29229066765031353, "grad_norm": 1.8825138807296753, "learning_rate": 1.7830915783296828e-05, "loss": 0.0375, "step": 1981 }, { "epoch": 0.29243821468092956, "grad_norm": 8.279768943786621, "learning_rate": 1.7827711306943868e-05, "loss": 0.1422, "step": 1982 }, { "epoch": 0.29258576171154554, "grad_norm": 5.702897071838379, "learning_rate": 1.7824504753682865e-05, "loss": 0.0899, "step": 1983 }, { "epoch": 0.2927333087421616, "grad_norm": 4.7787604331970215, "learning_rate": 1.78212961243646e-05, "loss": 0.0668, "step": 1984 }, { "epoch": 0.29288085577277756, "grad_norm": 1.4204812049865723, "learning_rate": 1.7818085419840412e-05, "loss": 0.0845, "step": 1985 }, { "epoch": 0.2930284028033936, "grad_norm": 1.6509289741516113, "learning_rate": 1.7814872640962192e-05, "loss": 0.0241, "step": 1986 }, { "epoch": 0.2931759498340096, "grad_norm": 3.7667734622955322, "learning_rate": 1.781165778858237e-05, "loss": 0.1058, "step": 1987 }, { "epoch": 0.2933234968646256, "grad_norm": 6.3251872062683105, "learning_rate": 1.7808440863553952e-05, "loss": 0.1092, "step": 1988 }, { "epoch": 0.2934710438952416, "grad_norm": 4.902050495147705, "learning_rate": 1.780522186673046e-05, "loss": 0.0908, "step": 1989 }, { "epoch": 0.2936185909258576, "grad_norm": 2.7209508419036865, "learning_rate": 1.7802000798965996e-05, "loss": 0.0821, "step": 1990 }, { "epoch": 0.2937661379564736, "grad_norm": 3.469336986541748, "learning_rate": 1.779877766111519e-05, "loss": 0.1272, "step": 1991 }, { "epoch": 0.29391368498708964, "grad_norm": 5.348670482635498, "learning_rate": 1.7795552454033226e-05, "loss": 0.1046, "step": 1992 }, { "epoch": 0.2940612320177056, "grad_norm": 2.974876642227173, "learning_rate": 1.779232517857585e-05, "loss": 0.0459, "step": 1993 }, { "epoch": 0.29420877904832166, "grad_norm": 8.443914413452148, "learning_rate": 1.7789095835599346e-05, "loss": 0.059, "step": 1994 }, { "epoch": 0.29435632607893764, "grad_norm": 1.456032156944275, "learning_rate": 1.7785864425960543e-05, "loss": 0.0207, "step": 1995 }, { "epoch": 0.29450387310955367, "grad_norm": 2.9038796424865723, "learning_rate": 1.7782630950516826e-05, "loss": 0.0917, "step": 1996 }, { "epoch": 0.2946514201401697, "grad_norm": 2.5437049865722656, "learning_rate": 1.7779395410126127e-05, "loss": 0.0708, "step": 1997 }, { "epoch": 0.2947989671707857, "grad_norm": 1.7724251747131348, "learning_rate": 1.777615780564692e-05, "loss": 0.0662, "step": 1998 }, { "epoch": 0.2949465142014017, "grad_norm": 1.7889469861984253, "learning_rate": 1.7772918137938234e-05, "loss": 0.0855, "step": 1999 }, { "epoch": 0.2950940612320177, "grad_norm": 1.1646666526794434, "learning_rate": 1.7769676407859644e-05, "loss": 0.0536, "step": 2000 }, { "epoch": 0.2950940612320177, "eval_accuracy": 0.9725036179450073, "eval_f1": 0.9521410579345088, "eval_loss": 0.06987255066633224, "eval_precision": 0.9895287958115183, "eval_recall": 0.9174757281553398, "eval_runtime": 49.3515, "eval_samples_per_second": 5.896, "eval_steps_per_second": 0.203, "step": 2000 }, { "epoch": 0.29524160826263374, "grad_norm": 1.9098221063613892, "learning_rate": 1.7766432616271263e-05, "loss": 0.0981, "step": 2001 }, { "epoch": 0.2953891552932497, "grad_norm": 2.293426513671875, "learning_rate": 1.776318676403377e-05, "loss": 0.0471, "step": 2002 }, { "epoch": 0.29553670232386575, "grad_norm": 2.658841133117676, "learning_rate": 1.7759938852008368e-05, "loss": 0.1074, "step": 2003 }, { "epoch": 0.29568424935448173, "grad_norm": 1.2824565172195435, "learning_rate": 1.7756688881056824e-05, "loss": 0.077, "step": 2004 }, { "epoch": 0.29583179638509777, "grad_norm": 2.054882287979126, "learning_rate": 1.775343685204144e-05, "loss": 0.0397, "step": 2005 }, { "epoch": 0.29597934341571375, "grad_norm": 2.451122522354126, "learning_rate": 1.775018276582508e-05, "loss": 0.0876, "step": 2006 }, { "epoch": 0.2961268904463298, "grad_norm": 1.9787144660949707, "learning_rate": 1.774692662327113e-05, "loss": 0.077, "step": 2007 }, { "epoch": 0.29627443747694576, "grad_norm": 1.9749809503555298, "learning_rate": 1.7743668425243547e-05, "loss": 0.0536, "step": 2008 }, { "epoch": 0.2964219845075618, "grad_norm": 1.7527657747268677, "learning_rate": 1.7740408172606808e-05, "loss": 0.0643, "step": 2009 }, { "epoch": 0.2965695315381778, "grad_norm": 2.3230299949645996, "learning_rate": 1.7737145866225958e-05, "loss": 0.09, "step": 2010 }, { "epoch": 0.2967170785687938, "grad_norm": 2.791459798812866, "learning_rate": 1.7733881506966574e-05, "loss": 0.0614, "step": 2011 }, { "epoch": 0.2968646255994098, "grad_norm": 1.4842112064361572, "learning_rate": 1.7730615095694777e-05, "loss": 0.0651, "step": 2012 }, { "epoch": 0.29701217263002583, "grad_norm": 1.992242693901062, "learning_rate": 1.772734663327724e-05, "loss": 0.0689, "step": 2013 }, { "epoch": 0.2971597196606418, "grad_norm": 1.381278157234192, "learning_rate": 1.7724076120581174e-05, "loss": 0.0315, "step": 2014 }, { "epoch": 0.29730726669125784, "grad_norm": 2.3684511184692383, "learning_rate": 1.772080355847434e-05, "loss": 0.0675, "step": 2015 }, { "epoch": 0.2974548137218738, "grad_norm": 3.954451560974121, "learning_rate": 1.771752894782504e-05, "loss": 0.0626, "step": 2016 }, { "epoch": 0.29760236075248986, "grad_norm": 1.9097782373428345, "learning_rate": 1.7714252289502115e-05, "loss": 0.0554, "step": 2017 }, { "epoch": 0.29774990778310584, "grad_norm": 1.399617314338684, "learning_rate": 1.7710973584374952e-05, "loss": 0.023, "step": 2018 }, { "epoch": 0.2978974548137219, "grad_norm": 2.006929636001587, "learning_rate": 1.7707692833313485e-05, "loss": 0.0436, "step": 2019 }, { "epoch": 0.2980450018443379, "grad_norm": 1.7862168550491333, "learning_rate": 1.7704410037188185e-05, "loss": 0.0647, "step": 2020 }, { "epoch": 0.2980450018443379, "eval_accuracy": 0.9739507959479016, "eval_f1": 0.9547738693467337, "eval_loss": 0.07232852280139923, "eval_precision": 0.9895833333333334, "eval_recall": 0.9223300970873787, "eval_runtime": 50.3001, "eval_samples_per_second": 5.785, "eval_steps_per_second": 0.199, "step": 2020 }, { "epoch": 0.2981925488749539, "grad_norm": 4.871994495391846, "learning_rate": 1.770112519687007e-05, "loss": 0.1089, "step": 2021 }, { "epoch": 0.2983400959055699, "grad_norm": 3.192202568054199, "learning_rate": 1.76978383132307e-05, "loss": 0.1199, "step": 2022 }, { "epoch": 0.2984876429361859, "grad_norm": 2.528630495071411, "learning_rate": 1.7694549387142177e-05, "loss": 0.0659, "step": 2023 }, { "epoch": 0.29863518996680194, "grad_norm": 2.4678826332092285, "learning_rate": 1.769125841947714e-05, "loss": 0.0606, "step": 2024 }, { "epoch": 0.2987827369974179, "grad_norm": 2.455389976501465, "learning_rate": 1.7687965411108778e-05, "loss": 0.0696, "step": 2025 }, { "epoch": 0.29893028402803395, "grad_norm": 3.206726312637329, "learning_rate": 1.768467036291081e-05, "loss": 0.0488, "step": 2026 }, { "epoch": 0.29907783105864993, "grad_norm": 1.5881465673446655, "learning_rate": 1.768137327575751e-05, "loss": 0.0653, "step": 2027 }, { "epoch": 0.29922537808926597, "grad_norm": 1.4814188480377197, "learning_rate": 1.767807415052369e-05, "loss": 0.0351, "step": 2028 }, { "epoch": 0.29937292511988195, "grad_norm": 0.7718455791473389, "learning_rate": 1.7674772988084688e-05, "loss": 0.0225, "step": 2029 }, { "epoch": 0.299520472150498, "grad_norm": 2.762436628341675, "learning_rate": 1.7671469789316397e-05, "loss": 0.0939, "step": 2030 }, { "epoch": 0.29966801918111396, "grad_norm": 0.6224008202552795, "learning_rate": 1.7668164555095252e-05, "loss": 0.0077, "step": 2031 }, { "epoch": 0.29981556621173, "grad_norm": 2.458287239074707, "learning_rate": 1.7664857286298224e-05, "loss": 0.0426, "step": 2032 }, { "epoch": 0.299963113242346, "grad_norm": 1.0516847372055054, "learning_rate": 1.766154798380281e-05, "loss": 0.0511, "step": 2033 }, { "epoch": 0.300110660272962, "grad_norm": 2.5216686725616455, "learning_rate": 1.7658236648487074e-05, "loss": 0.1235, "step": 2034 }, { "epoch": 0.300258207303578, "grad_norm": 1.444677710533142, "learning_rate": 1.7654923281229598e-05, "loss": 0.0572, "step": 2035 }, { "epoch": 0.30040575433419403, "grad_norm": 1.2242169380187988, "learning_rate": 1.765160788290951e-05, "loss": 0.0316, "step": 2036 }, { "epoch": 0.30055330136481, "grad_norm": 2.5934975147247314, "learning_rate": 1.7648290454406475e-05, "loss": 0.0302, "step": 2037 }, { "epoch": 0.30070084839542605, "grad_norm": 1.539410948753357, "learning_rate": 1.7644970996600706e-05, "loss": 0.0423, "step": 2038 }, { "epoch": 0.300848395426042, "grad_norm": 2.736931085586548, "learning_rate": 1.7641649510372938e-05, "loss": 0.0485, "step": 2039 }, { "epoch": 0.30099594245665806, "grad_norm": 3.35821795463562, "learning_rate": 1.7638325996604456e-05, "loss": 0.0637, "step": 2040 }, { "epoch": 0.30099594245665806, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9584352078239609, "eval_loss": 0.06988977640867233, "eval_precision": 0.9655172413793104, "eval_recall": 0.9514563106796117, "eval_runtime": 50.3385, "eval_samples_per_second": 5.781, "eval_steps_per_second": 0.199, "step": 2040 }, { "epoch": 0.30114348948727404, "grad_norm": 1.4133102893829346, "learning_rate": 1.7635000456177085e-05, "loss": 0.0186, "step": 2041 }, { "epoch": 0.3012910365178901, "grad_norm": 3.4392263889312744, "learning_rate": 1.7631672889973173e-05, "loss": 0.0771, "step": 2042 }, { "epoch": 0.3014385835485061, "grad_norm": 3.163463830947876, "learning_rate": 1.7628343298875626e-05, "loss": 0.0355, "step": 2043 }, { "epoch": 0.3015861305791221, "grad_norm": 3.125253200531006, "learning_rate": 1.7625011683767867e-05, "loss": 0.1054, "step": 2044 }, { "epoch": 0.3017336776097381, "grad_norm": 3.791630744934082, "learning_rate": 1.7621678045533873e-05, "loss": 0.0508, "step": 2045 }, { "epoch": 0.3018812246403541, "grad_norm": 3.480570077896118, "learning_rate": 1.7618342385058147e-05, "loss": 0.1314, "step": 2046 }, { "epoch": 0.30202877167097014, "grad_norm": 2.3844668865203857, "learning_rate": 1.7615004703225727e-05, "loss": 0.064, "step": 2047 }, { "epoch": 0.3021763187015861, "grad_norm": 6.523024082183838, "learning_rate": 1.7611665000922206e-05, "loss": 0.05, "step": 2048 }, { "epoch": 0.30232386573220216, "grad_norm": 2.540951728820801, "learning_rate": 1.7608323279033685e-05, "loss": 0.0403, "step": 2049 }, { "epoch": 0.30247141276281814, "grad_norm": 1.9776675701141357, "learning_rate": 1.7604979538446818e-05, "loss": 0.0376, "step": 2050 }, { "epoch": 0.3026189597934342, "grad_norm": 3.6032168865203857, "learning_rate": 1.7601633780048797e-05, "loss": 0.1119, "step": 2051 }, { "epoch": 0.30276650682405015, "grad_norm": 1.87938392162323, "learning_rate": 1.759828600472734e-05, "loss": 0.0541, "step": 2052 }, { "epoch": 0.3029140538546662, "grad_norm": 1.8706809282302856, "learning_rate": 1.7594936213370708e-05, "loss": 0.0685, "step": 2053 }, { "epoch": 0.30306160088528217, "grad_norm": 3.2565836906433105, "learning_rate": 1.759158440686769e-05, "loss": 0.0501, "step": 2054 }, { "epoch": 0.3032091479158982, "grad_norm": 1.5893309116363525, "learning_rate": 1.7588230586107613e-05, "loss": 0.0652, "step": 2055 }, { "epoch": 0.3033566949465142, "grad_norm": 1.8227797746658325, "learning_rate": 1.758487475198034e-05, "loss": 0.0597, "step": 2056 }, { "epoch": 0.3035042419771302, "grad_norm": 3.258263349533081, "learning_rate": 1.7581516905376266e-05, "loss": 0.0286, "step": 2057 }, { "epoch": 0.3036517890077462, "grad_norm": 4.387876033782959, "learning_rate": 1.7578157047186318e-05, "loss": 0.1144, "step": 2058 }, { "epoch": 0.30379933603836223, "grad_norm": 4.693922996520996, "learning_rate": 1.7574795178301963e-05, "loss": 0.1051, "step": 2059 }, { "epoch": 0.3039468830689782, "grad_norm": 4.034201622009277, "learning_rate": 1.7571431299615195e-05, "loss": 0.07, "step": 2060 }, { "epoch": 0.3039468830689782, "eval_accuracy": 0.9739507959479016, "eval_f1": 0.9552238805970149, "eval_loss": 0.06562598049640656, "eval_precision": 0.9795918367346939, "eval_recall": 0.9320388349514563, "eval_runtime": 49.8669, "eval_samples_per_second": 5.836, "eval_steps_per_second": 0.201, "step": 2060 }, { "epoch": 0.30409443009959425, "grad_norm": 3.5295114517211914, "learning_rate": 1.7568065412018543e-05, "loss": 0.1046, "step": 2061 }, { "epoch": 0.30424197713021023, "grad_norm": 2.2974231243133545, "learning_rate": 1.7564697516405074e-05, "loss": 0.0658, "step": 2062 }, { "epoch": 0.30438952416082626, "grad_norm": 5.0034942626953125, "learning_rate": 1.7561327613668382e-05, "loss": 0.0556, "step": 2063 }, { "epoch": 0.3045370711914423, "grad_norm": 2.9695205688476562, "learning_rate": 1.7557955704702597e-05, "loss": 0.1117, "step": 2064 }, { "epoch": 0.3046846182220583, "grad_norm": 3.0170087814331055, "learning_rate": 1.7554581790402372e-05, "loss": 0.0589, "step": 2065 }, { "epoch": 0.3048321652526743, "grad_norm": 1.7038782835006714, "learning_rate": 1.7551205871662906e-05, "loss": 0.0362, "step": 2066 }, { "epoch": 0.3049797122832903, "grad_norm": 3.1643404960632324, "learning_rate": 1.7547827949379927e-05, "loss": 0.0417, "step": 2067 }, { "epoch": 0.30512725931390633, "grad_norm": 3.3591418266296387, "learning_rate": 1.7544448024449683e-05, "loss": 0.1141, "step": 2068 }, { "epoch": 0.3052748063445223, "grad_norm": 1.7569690942764282, "learning_rate": 1.7541066097768965e-05, "loss": 0.0871, "step": 2069 }, { "epoch": 0.30542235337513834, "grad_norm": 3.4121599197387695, "learning_rate": 1.753768217023509e-05, "loss": 0.0851, "step": 2070 }, { "epoch": 0.3055699004057543, "grad_norm": 2.230632781982422, "learning_rate": 1.7534296242745916e-05, "loss": 0.0668, "step": 2071 }, { "epoch": 0.30571744743637036, "grad_norm": 1.6636126041412354, "learning_rate": 1.753090831619981e-05, "loss": 0.0499, "step": 2072 }, { "epoch": 0.30586499446698634, "grad_norm": 1.216579794883728, "learning_rate": 1.752751839149569e-05, "loss": 0.0246, "step": 2073 }, { "epoch": 0.3060125414976024, "grad_norm": 1.7445745468139648, "learning_rate": 1.7524126469532997e-05, "loss": 0.0742, "step": 2074 }, { "epoch": 0.30616008852821835, "grad_norm": 2.8874173164367676, "learning_rate": 1.75207325512117e-05, "loss": 0.0552, "step": 2075 }, { "epoch": 0.3063076355588344, "grad_norm": 6.502613544464111, "learning_rate": 1.7517336637432297e-05, "loss": 0.0917, "step": 2076 }, { "epoch": 0.30645518258945037, "grad_norm": 1.183326244354248, "learning_rate": 1.751393872909582e-05, "loss": 0.0524, "step": 2077 }, { "epoch": 0.3066027296200664, "grad_norm": 3.1588449478149414, "learning_rate": 1.751053882710383e-05, "loss": 0.0655, "step": 2078 }, { "epoch": 0.3067502766506824, "grad_norm": 2.601128578186035, "learning_rate": 1.750713693235841e-05, "loss": 0.0582, "step": 2079 }, { "epoch": 0.3068978236812984, "grad_norm": 1.0544044971466064, "learning_rate": 1.7503733045762185e-05, "loss": 0.0264, "step": 2080 }, { "epoch": 0.3068978236812984, "eval_accuracy": 0.9710564399421129, "eval_f1": 0.9494949494949495, "eval_loss": 0.0671740248799324, "eval_precision": 0.9894736842105263, "eval_recall": 0.912621359223301, "eval_runtime": 49.9335, "eval_samples_per_second": 5.828, "eval_steps_per_second": 0.2, "step": 2080 }, { "epoch": 0.3070453707119144, "grad_norm": 5.65303897857666, "learning_rate": 1.7500327168218287e-05, "loss": 0.0727, "step": 2081 }, { "epoch": 0.30719291774253044, "grad_norm": 1.4922033548355103, "learning_rate": 1.7496919300630405e-05, "loss": 0.0614, "step": 2082 }, { "epoch": 0.3073404647731464, "grad_norm": 3.6531383991241455, "learning_rate": 1.7493509443902726e-05, "loss": 0.0938, "step": 2083 }, { "epoch": 0.30748801180376245, "grad_norm": 1.5921177864074707, "learning_rate": 1.749009759893999e-05, "loss": 0.0219, "step": 2084 }, { "epoch": 0.30763555883437843, "grad_norm": 1.0543262958526611, "learning_rate": 1.7486683766647447e-05, "loss": 0.0258, "step": 2085 }, { "epoch": 0.30778310586499447, "grad_norm": 1.6033868789672852, "learning_rate": 1.7483267947930884e-05, "loss": 0.0572, "step": 2086 }, { "epoch": 0.3079306528956105, "grad_norm": 2.187842845916748, "learning_rate": 1.7479850143696614e-05, "loss": 0.1, "step": 2087 }, { "epoch": 0.3080781999262265, "grad_norm": 1.839615821838379, "learning_rate": 1.747643035485147e-05, "loss": 0.0387, "step": 2088 }, { "epoch": 0.3082257469568425, "grad_norm": 2.7941935062408447, "learning_rate": 1.747300858230282e-05, "loss": 0.1078, "step": 2089 }, { "epoch": 0.3083732939874585, "grad_norm": 2.140803337097168, "learning_rate": 1.7469584826958554e-05, "loss": 0.0817, "step": 2090 }, { "epoch": 0.30852084101807453, "grad_norm": 1.14137864112854, "learning_rate": 1.7466159089727084e-05, "loss": 0.032, "step": 2091 }, { "epoch": 0.3086683880486905, "grad_norm": 1.5074483156204224, "learning_rate": 1.746273137151736e-05, "loss": 0.0822, "step": 2092 }, { "epoch": 0.30881593507930655, "grad_norm": 3.0813302993774414, "learning_rate": 1.7459301673238853e-05, "loss": 0.0903, "step": 2093 }, { "epoch": 0.3089634821099225, "grad_norm": 2.7230687141418457, "learning_rate": 1.7455869995801544e-05, "loss": 0.047, "step": 2094 }, { "epoch": 0.30911102914053856, "grad_norm": 1.7247334718704224, "learning_rate": 1.745243634011596e-05, "loss": 0.0621, "step": 2095 }, { "epoch": 0.30925857617115454, "grad_norm": 2.046072483062744, "learning_rate": 1.7449000707093145e-05, "loss": 0.0641, "step": 2096 }, { "epoch": 0.3094061232017706, "grad_norm": 4.232421875, "learning_rate": 1.7445563097644664e-05, "loss": 0.0799, "step": 2097 }, { "epoch": 0.30955367023238656, "grad_norm": 2.046140432357788, "learning_rate": 1.7442123512682615e-05, "loss": 0.1188, "step": 2098 }, { "epoch": 0.3097012172630026, "grad_norm": 5.812841892242432, "learning_rate": 1.743868195311961e-05, "loss": 0.1308, "step": 2099 }, { "epoch": 0.3098487642936186, "grad_norm": 3.1564996242523193, "learning_rate": 1.7435238419868793e-05, "loss": 0.1048, "step": 2100 }, { "epoch": 0.3098487642936186, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.06561989337205887, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 50.0258, "eval_samples_per_second": 5.817, "eval_steps_per_second": 0.2, "step": 2100 }, { "epoch": 0.3099963113242346, "grad_norm": 3.3443610668182373, "learning_rate": 1.7431792913843826e-05, "loss": 0.1319, "step": 2101 }, { "epoch": 0.3101438583548506, "grad_norm": 1.7402970790863037, "learning_rate": 1.74283454359589e-05, "loss": 0.0387, "step": 2102 }, { "epoch": 0.3102914053854666, "grad_norm": 2.536003828048706, "learning_rate": 1.7424895987128723e-05, "loss": 0.032, "step": 2103 }, { "epoch": 0.3104389524160826, "grad_norm": 4.353453159332275, "learning_rate": 1.742144456826853e-05, "loss": 0.0702, "step": 2104 }, { "epoch": 0.31058649944669864, "grad_norm": 1.7974307537078857, "learning_rate": 1.7417991180294077e-05, "loss": 0.0597, "step": 2105 }, { "epoch": 0.3107340464773146, "grad_norm": 3.0360138416290283, "learning_rate": 1.7414535824121644e-05, "loss": 0.0842, "step": 2106 }, { "epoch": 0.31088159350793065, "grad_norm": 1.7299362421035767, "learning_rate": 1.7411078500668032e-05, "loss": 0.0664, "step": 2107 }, { "epoch": 0.3110291405385467, "grad_norm": 3.420109748840332, "learning_rate": 1.7407619210850568e-05, "loss": 0.1006, "step": 2108 }, { "epoch": 0.31117668756916267, "grad_norm": 2.340759754180908, "learning_rate": 1.7404157955587095e-05, "loss": 0.0929, "step": 2109 }, { "epoch": 0.3113242345997787, "grad_norm": 1.8463226556777954, "learning_rate": 1.7400694735795974e-05, "loss": 0.0536, "step": 2110 }, { "epoch": 0.3114717816303947, "grad_norm": 2.033116579055786, "learning_rate": 1.7397229552396104e-05, "loss": 0.0606, "step": 2111 }, { "epoch": 0.3116193286610107, "grad_norm": 1.240280270576477, "learning_rate": 1.739376240630688e-05, "loss": 0.0385, "step": 2112 }, { "epoch": 0.3117668756916267, "grad_norm": 2.432034969329834, "learning_rate": 1.7390293298448243e-05, "loss": 0.0819, "step": 2113 }, { "epoch": 0.31191442272224273, "grad_norm": 2.032050848007202, "learning_rate": 1.7386822229740642e-05, "loss": 0.0664, "step": 2114 }, { "epoch": 0.3120619697528587, "grad_norm": 2.3717551231384277, "learning_rate": 1.738334920110504e-05, "loss": 0.0783, "step": 2115 }, { "epoch": 0.31220951678347475, "grad_norm": 1.4648905992507935, "learning_rate": 1.7379874213462936e-05, "loss": 0.0784, "step": 2116 }, { "epoch": 0.31235706381409073, "grad_norm": 1.3918699026107788, "learning_rate": 1.7376397267736337e-05, "loss": 0.0238, "step": 2117 }, { "epoch": 0.31250461084470676, "grad_norm": 2.6029951572418213, "learning_rate": 1.7372918364847767e-05, "loss": 0.1066, "step": 2118 }, { "epoch": 0.31265215787532274, "grad_norm": 2.328977584838867, "learning_rate": 1.7369437505720284e-05, "loss": 0.0149, "step": 2119 }, { "epoch": 0.3127997049059388, "grad_norm": 3.0880587100982666, "learning_rate": 1.7365954691277453e-05, "loss": 0.136, "step": 2120 }, { "epoch": 0.3127997049059388, "eval_accuracy": 0.9725036179450073, "eval_f1": 0.9518987341772152, "eval_loss": 0.06848844885826111, "eval_precision": 0.9947089947089947, "eval_recall": 0.912621359223301, "eval_runtime": 49.3055, "eval_samples_per_second": 5.902, "eval_steps_per_second": 0.203, "step": 2120 }, { "epoch": 0.31294725193655476, "grad_norm": 1.6658941507339478, "learning_rate": 1.736246992244336e-05, "loss": 0.0554, "step": 2121 }, { "epoch": 0.3130947989671708, "grad_norm": 1.4149516820907593, "learning_rate": 1.7358983200142608e-05, "loss": 0.0629, "step": 2122 }, { "epoch": 0.3132423459977868, "grad_norm": 1.4125851392745972, "learning_rate": 1.7355494525300324e-05, "loss": 0.0598, "step": 2123 }, { "epoch": 0.3133898930284028, "grad_norm": 1.1009302139282227, "learning_rate": 1.7352003898842152e-05, "loss": 0.0234, "step": 2124 }, { "epoch": 0.3135374400590188, "grad_norm": 1.810051441192627, "learning_rate": 1.734851132169425e-05, "loss": 0.0447, "step": 2125 }, { "epoch": 0.3136849870896348, "grad_norm": 2.4140326976776123, "learning_rate": 1.7345016794783287e-05, "loss": 0.1102, "step": 2126 }, { "epoch": 0.3138325341202508, "grad_norm": 1.550392746925354, "learning_rate": 1.734152031903647e-05, "loss": 0.0842, "step": 2127 }, { "epoch": 0.31398008115086684, "grad_norm": 2.508880853652954, "learning_rate": 1.7338021895381506e-05, "loss": 0.0717, "step": 2128 }, { "epoch": 0.3141276281814828, "grad_norm": 2.0103600025177, "learning_rate": 1.7334521524746617e-05, "loss": 0.0264, "step": 2129 }, { "epoch": 0.31427517521209886, "grad_norm": 4.147952556610107, "learning_rate": 1.733101920806056e-05, "loss": 0.0752, "step": 2130 }, { "epoch": 0.3144227222427149, "grad_norm": 2.0161080360412598, "learning_rate": 1.7327514946252586e-05, "loss": 0.0328, "step": 2131 }, { "epoch": 0.31457026927333087, "grad_norm": 3.3436663150787354, "learning_rate": 1.7324008740252478e-05, "loss": 0.0976, "step": 2132 }, { "epoch": 0.3147178163039469, "grad_norm": 5.70300817489624, "learning_rate": 1.7320500590990525e-05, "loss": 0.1025, "step": 2133 }, { "epoch": 0.3148653633345629, "grad_norm": 2.568835496902466, "learning_rate": 1.7316990499397544e-05, "loss": 0.0993, "step": 2134 }, { "epoch": 0.3150129103651789, "grad_norm": 1.7607049942016602, "learning_rate": 1.7313478466404848e-05, "loss": 0.0714, "step": 2135 }, { "epoch": 0.3151604573957949, "grad_norm": 3.572340965270996, "learning_rate": 1.7309964492944288e-05, "loss": 0.0349, "step": 2136 }, { "epoch": 0.31530800442641094, "grad_norm": 1.9686864614486694, "learning_rate": 1.730644857994821e-05, "loss": 0.0598, "step": 2137 }, { "epoch": 0.3154555514570269, "grad_norm": 1.995460867881775, "learning_rate": 1.7302930728349485e-05, "loss": 0.0291, "step": 2138 }, { "epoch": 0.31560309848764295, "grad_norm": 2.1415622234344482, "learning_rate": 1.7299410939081498e-05, "loss": 0.0887, "step": 2139 }, { "epoch": 0.31575064551825893, "grad_norm": 2.0583574771881104, "learning_rate": 1.7295889213078143e-05, "loss": 0.0833, "step": 2140 }, { "epoch": 0.31575064551825893, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.9603960396039604, "eval_loss": 0.06603804975748062, "eval_precision": 0.9797979797979798, "eval_recall": 0.941747572815534, "eval_runtime": 49.8262, "eval_samples_per_second": 5.84, "eval_steps_per_second": 0.201, "step": 2140 }, { "epoch": 0.31589819254887497, "grad_norm": 2.9655098915100098, "learning_rate": 1.7292365551273835e-05, "loss": 0.0847, "step": 2141 }, { "epoch": 0.31604573957949095, "grad_norm": 1.5837337970733643, "learning_rate": 1.7288839954603495e-05, "loss": 0.0563, "step": 2142 }, { "epoch": 0.316193286610107, "grad_norm": 3.348179578781128, "learning_rate": 1.7285312424002564e-05, "loss": 0.0959, "step": 2143 }, { "epoch": 0.31634083364072296, "grad_norm": 3.233232021331787, "learning_rate": 1.728178296040699e-05, "loss": 0.1235, "step": 2144 }, { "epoch": 0.316488380671339, "grad_norm": 5.451711654663086, "learning_rate": 1.7278251564753247e-05, "loss": 0.0784, "step": 2145 }, { "epoch": 0.316635927701955, "grad_norm": 4.001325607299805, "learning_rate": 1.7274718237978296e-05, "loss": 0.0405, "step": 2146 }, { "epoch": 0.316783474732571, "grad_norm": 3.600191831588745, "learning_rate": 1.727118298101964e-05, "loss": 0.086, "step": 2147 }, { "epoch": 0.316931021763187, "grad_norm": 1.718353033065796, "learning_rate": 1.7267645794815274e-05, "loss": 0.0982, "step": 2148 }, { "epoch": 0.31707856879380303, "grad_norm": 4.251434326171875, "learning_rate": 1.726410668030371e-05, "loss": 0.113, "step": 2149 }, { "epoch": 0.317226115824419, "grad_norm": 1.4363844394683838, "learning_rate": 1.7260565638423975e-05, "loss": 0.036, "step": 2150 }, { "epoch": 0.31737366285503504, "grad_norm": 1.8766740560531616, "learning_rate": 1.725702267011561e-05, "loss": 0.0589, "step": 2151 }, { "epoch": 0.3175212098856511, "grad_norm": 2.134807586669922, "learning_rate": 1.7253477776318653e-05, "loss": 0.0504, "step": 2152 }, { "epoch": 0.31766875691626706, "grad_norm": 1.7791506052017212, "learning_rate": 1.7249930957973668e-05, "loss": 0.0601, "step": 2153 }, { "epoch": 0.3178163039468831, "grad_norm": 0.8975431323051453, "learning_rate": 1.7246382216021725e-05, "loss": 0.0153, "step": 2154 }, { "epoch": 0.3179638509774991, "grad_norm": 1.7321504354476929, "learning_rate": 1.7242831551404397e-05, "loss": 0.056, "step": 2155 }, { "epoch": 0.3181113980081151, "grad_norm": 1.682796835899353, "learning_rate": 1.723927896506378e-05, "loss": 0.0257, "step": 2156 }, { "epoch": 0.3182589450387311, "grad_norm": 4.231181621551514, "learning_rate": 1.7235724457942468e-05, "loss": 0.0942, "step": 2157 }, { "epoch": 0.3184064920693471, "grad_norm": 1.9627528190612793, "learning_rate": 1.7232168030983577e-05, "loss": 0.0489, "step": 2158 }, { "epoch": 0.3185540390999631, "grad_norm": 1.3976247310638428, "learning_rate": 1.722860968513072e-05, "loss": 0.0415, "step": 2159 }, { "epoch": 0.31870158613057914, "grad_norm": 4.44366455078125, "learning_rate": 1.7225049421328024e-05, "loss": 0.0643, "step": 2160 }, { "epoch": 0.31870158613057914, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9571788413098237, "eval_loss": 0.06960581243038177, "eval_precision": 0.9947643979057592, "eval_recall": 0.9223300970873787, "eval_runtime": 49.5923, "eval_samples_per_second": 5.868, "eval_steps_per_second": 0.202, "step": 2160 }, { "epoch": 0.3188491331611951, "grad_norm": 2.6021177768707275, "learning_rate": 1.7221487240520127e-05, "loss": 0.1243, "step": 2161 }, { "epoch": 0.31899668019181115, "grad_norm": 2.948322296142578, "learning_rate": 1.7217923143652173e-05, "loss": 0.0406, "step": 2162 }, { "epoch": 0.31914422722242713, "grad_norm": 2.37955379486084, "learning_rate": 1.721435713166982e-05, "loss": 0.1008, "step": 2163 }, { "epoch": 0.31929177425304317, "grad_norm": 3.536679744720459, "learning_rate": 1.7210789205519224e-05, "loss": 0.0458, "step": 2164 }, { "epoch": 0.31943932128365915, "grad_norm": 2.160754680633545, "learning_rate": 1.7207219366147053e-05, "loss": 0.0647, "step": 2165 }, { "epoch": 0.3195868683142752, "grad_norm": 1.1617878675460815, "learning_rate": 1.720364761450049e-05, "loss": 0.0294, "step": 2166 }, { "epoch": 0.31973441534489117, "grad_norm": 2.2255806922912598, "learning_rate": 1.7200073951527212e-05, "loss": 0.0595, "step": 2167 }, { "epoch": 0.3198819623755072, "grad_norm": 2.3716604709625244, "learning_rate": 1.7196498378175418e-05, "loss": 0.0784, "step": 2168 }, { "epoch": 0.3200295094061232, "grad_norm": 2.182983875274658, "learning_rate": 1.7192920895393806e-05, "loss": 0.0192, "step": 2169 }, { "epoch": 0.3201770564367392, "grad_norm": 9.193492889404297, "learning_rate": 1.7189341504131574e-05, "loss": 0.1631, "step": 2170 }, { "epoch": 0.3203246034673552, "grad_norm": 2.255892038345337, "learning_rate": 1.7185760205338443e-05, "loss": 0.0393, "step": 2171 }, { "epoch": 0.32047215049797123, "grad_norm": 1.9479318857192993, "learning_rate": 1.7182176999964618e-05, "loss": 0.0252, "step": 2172 }, { "epoch": 0.3206196975285872, "grad_norm": 0.6452316641807556, "learning_rate": 1.7178591888960835e-05, "loss": 0.007, "step": 2173 }, { "epoch": 0.32076724455920325, "grad_norm": 1.8716485500335693, "learning_rate": 1.7175004873278312e-05, "loss": 0.051, "step": 2174 }, { "epoch": 0.3209147915898193, "grad_norm": 2.7840285301208496, "learning_rate": 1.7171415953868793e-05, "loss": 0.0951, "step": 2175 }, { "epoch": 0.32106233862043526, "grad_norm": 3.441535711288452, "learning_rate": 1.7167825131684516e-05, "loss": 0.0594, "step": 2176 }, { "epoch": 0.3212098856510513, "grad_norm": 0.9892779588699341, "learning_rate": 1.7164232407678216e-05, "loss": 0.0242, "step": 2177 }, { "epoch": 0.3213574326816673, "grad_norm": 1.6680243015289307, "learning_rate": 1.7160637782803155e-05, "loss": 0.0971, "step": 2178 }, { "epoch": 0.3215049797122833, "grad_norm": 2.6267309188842773, "learning_rate": 1.7157041258013074e-05, "loss": 0.0564, "step": 2179 }, { "epoch": 0.3216525267428993, "grad_norm": 1.4788562059402466, "learning_rate": 1.715344283426224e-05, "loss": 0.032, "step": 2180 }, { "epoch": 0.3216525267428993, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9582309582309583, "eval_loss": 0.06722773611545563, "eval_precision": 0.9701492537313433, "eval_recall": 0.9466019417475728, "eval_runtime": 49.3598, "eval_samples_per_second": 5.895, "eval_steps_per_second": 0.203, "step": 2180 }, { "epoch": 0.3218000737735153, "grad_norm": 1.4854892492294312, "learning_rate": 1.7149842512505407e-05, "loss": 0.0409, "step": 2181 }, { "epoch": 0.3219476208041313, "grad_norm": 2.132868766784668, "learning_rate": 1.7146240293697847e-05, "loss": 0.0376, "step": 2182 }, { "epoch": 0.32209516783474734, "grad_norm": 3.089017152786255, "learning_rate": 1.7142636178795326e-05, "loss": 0.0878, "step": 2183 }, { "epoch": 0.3222427148653633, "grad_norm": 2.3784422874450684, "learning_rate": 1.7139030168754108e-05, "loss": 0.0594, "step": 2184 }, { "epoch": 0.32239026189597936, "grad_norm": 1.111702799797058, "learning_rate": 1.7135422264530977e-05, "loss": 0.0367, "step": 2185 }, { "epoch": 0.32253780892659534, "grad_norm": 3.1458306312561035, "learning_rate": 1.7131812467083204e-05, "loss": 0.0595, "step": 2186 }, { "epoch": 0.3226853559572114, "grad_norm": 4.715087890625, "learning_rate": 1.712820077736857e-05, "loss": 0.0814, "step": 2187 }, { "epoch": 0.32283290298782735, "grad_norm": 4.471789836883545, "learning_rate": 1.7124587196345354e-05, "loss": 0.0772, "step": 2188 }, { "epoch": 0.3229804500184434, "grad_norm": 4.2753376960754395, "learning_rate": 1.7120971724972343e-05, "loss": 0.0965, "step": 2189 }, { "epoch": 0.32312799704905937, "grad_norm": 1.4975053071975708, "learning_rate": 1.7117354364208816e-05, "loss": 0.0401, "step": 2190 }, { "epoch": 0.3232755440796754, "grad_norm": 1.8241612911224365, "learning_rate": 1.7113735115014563e-05, "loss": 0.0624, "step": 2191 }, { "epoch": 0.3234230911102914, "grad_norm": 2.369324207305908, "learning_rate": 1.7110113978349867e-05, "loss": 0.0869, "step": 2192 }, { "epoch": 0.3235706381409074, "grad_norm": 1.3327139616012573, "learning_rate": 1.710649095517552e-05, "loss": 0.0224, "step": 2193 }, { "epoch": 0.3237181851715234, "grad_norm": 2.886950969696045, "learning_rate": 1.7102866046452807e-05, "loss": 0.1318, "step": 2194 }, { "epoch": 0.32386573220213943, "grad_norm": 4.114792346954346, "learning_rate": 1.709923925314352e-05, "loss": 0.11, "step": 2195 }, { "epoch": 0.3240132792327554, "grad_norm": 1.9742048978805542, "learning_rate": 1.709561057620994e-05, "loss": 0.0389, "step": 2196 }, { "epoch": 0.32416082626337145, "grad_norm": 2.6809005737304688, "learning_rate": 1.7091980016614863e-05, "loss": 0.0838, "step": 2197 }, { "epoch": 0.3243083732939875, "grad_norm": 2.5167462825775146, "learning_rate": 1.7088347575321575e-05, "loss": 0.0896, "step": 2198 }, { "epoch": 0.32445592032460346, "grad_norm": 1.6761678457260132, "learning_rate": 1.708471325329386e-05, "loss": 0.0448, "step": 2199 }, { "epoch": 0.3246034673552195, "grad_norm": 2.724100351333618, "learning_rate": 1.708107705149601e-05, "loss": 0.0613, "step": 2200 }, { "epoch": 0.3246034673552195, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.9605911330049262, "eval_loss": 0.06580810993909836, "eval_precision": 0.975, "eval_recall": 0.9466019417475728, "eval_runtime": 50.0082, "eval_samples_per_second": 5.819, "eval_steps_per_second": 0.2, "step": 2200 }, { "epoch": 0.3247510143858355, "grad_norm": 2.656367301940918, "learning_rate": 1.70774389708928e-05, "loss": 0.0927, "step": 2201 }, { "epoch": 0.3248985614164515, "grad_norm": 2.6321423053741455, "learning_rate": 1.7073799012449524e-05, "loss": 0.1059, "step": 2202 }, { "epoch": 0.3250461084470675, "grad_norm": 2.040905475616455, "learning_rate": 1.7070157177131954e-05, "loss": 0.0781, "step": 2203 }, { "epoch": 0.32519365547768353, "grad_norm": 1.4005239009857178, "learning_rate": 1.7066513465906375e-05, "loss": 0.0528, "step": 2204 }, { "epoch": 0.3253412025082995, "grad_norm": 2.356604814529419, "learning_rate": 1.7062867879739568e-05, "loss": 0.0751, "step": 2205 }, { "epoch": 0.32548874953891554, "grad_norm": 1.1565563678741455, "learning_rate": 1.70592204195988e-05, "loss": 0.0347, "step": 2206 }, { "epoch": 0.3256362965695315, "grad_norm": 1.0566600561141968, "learning_rate": 1.7055571086451842e-05, "loss": 0.0169, "step": 2207 }, { "epoch": 0.32578384360014756, "grad_norm": 1.6565196514129639, "learning_rate": 1.705191988126697e-05, "loss": 0.0689, "step": 2208 }, { "epoch": 0.32593139063076354, "grad_norm": 4.447436332702637, "learning_rate": 1.7048266805012948e-05, "loss": 0.0736, "step": 2209 }, { "epoch": 0.3260789376613796, "grad_norm": 2.0285775661468506, "learning_rate": 1.7044611858659036e-05, "loss": 0.0734, "step": 2210 }, { "epoch": 0.32622648469199556, "grad_norm": 2.754542350769043, "learning_rate": 1.7040955043174992e-05, "loss": 0.0771, "step": 2211 }, { "epoch": 0.3263740317226116, "grad_norm": 2.8806169033050537, "learning_rate": 1.703729635953107e-05, "loss": 0.0402, "step": 2212 }, { "epoch": 0.32652157875322757, "grad_norm": 2.1255829334259033, "learning_rate": 1.703363580869802e-05, "loss": 0.0644, "step": 2213 }, { "epoch": 0.3266691257838436, "grad_norm": 1.6564240455627441, "learning_rate": 1.702997339164709e-05, "loss": 0.0927, "step": 2214 }, { "epoch": 0.3268166728144596, "grad_norm": 1.8921287059783936, "learning_rate": 1.702630910935002e-05, "loss": 0.0788, "step": 2215 }, { "epoch": 0.3269642198450756, "grad_norm": 4.465273857116699, "learning_rate": 1.7022642962779037e-05, "loss": 0.0737, "step": 2216 }, { "epoch": 0.3271117668756916, "grad_norm": 2.584212303161621, "learning_rate": 1.7018974952906885e-05, "loss": 0.0978, "step": 2217 }, { "epoch": 0.32725931390630764, "grad_norm": 2.30361008644104, "learning_rate": 1.7015305080706776e-05, "loss": 0.0308, "step": 2218 }, { "epoch": 0.32740686093692367, "grad_norm": 2.4971728324890137, "learning_rate": 1.7011633347152437e-05, "loss": 0.0478, "step": 2219 }, { "epoch": 0.32755440796753965, "grad_norm": 1.7847650051116943, "learning_rate": 1.7007959753218073e-05, "loss": 0.0755, "step": 2220 }, { "epoch": 0.32755440796753965, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9627791563275434, "eval_loss": 0.06917760521173477, "eval_precision": 0.9847715736040609, "eval_recall": 0.941747572815534, "eval_runtime": 49.8729, "eval_samples_per_second": 5.835, "eval_steps_per_second": 0.201, "step": 2220 }, { "epoch": 0.3277019549981557, "grad_norm": 1.4912227392196655, "learning_rate": 1.7004284299878397e-05, "loss": 0.0567, "step": 2221 }, { "epoch": 0.32784950202877167, "grad_norm": 0.9491235613822937, "learning_rate": 1.7000606988108606e-05, "loss": 0.0369, "step": 2222 }, { "epoch": 0.3279970490593877, "grad_norm": 0.8188719749450684, "learning_rate": 1.699692781888439e-05, "loss": 0.0276, "step": 2223 }, { "epoch": 0.3281445960900037, "grad_norm": 1.8164491653442383, "learning_rate": 1.6993246793181934e-05, "loss": 0.0535, "step": 2224 }, { "epoch": 0.3282921431206197, "grad_norm": 1.3040269613265991, "learning_rate": 1.698956391197792e-05, "loss": 0.0151, "step": 2225 }, { "epoch": 0.3284396901512357, "grad_norm": 4.568538188934326, "learning_rate": 1.6985879176249513e-05, "loss": 0.044, "step": 2226 }, { "epoch": 0.32858723718185173, "grad_norm": 3.484936237335205, "learning_rate": 1.698219258697438e-05, "loss": 0.1221, "step": 2227 }, { "epoch": 0.3287347842124677, "grad_norm": 3.1631078720092773, "learning_rate": 1.6978504145130674e-05, "loss": 0.0445, "step": 2228 }, { "epoch": 0.32888233124308375, "grad_norm": 3.169933557510376, "learning_rate": 1.6974813851697037e-05, "loss": 0.0632, "step": 2229 }, { "epoch": 0.3290298782736997, "grad_norm": 2.9364147186279297, "learning_rate": 1.697112170765261e-05, "loss": 0.1092, "step": 2230 }, { "epoch": 0.32917742530431576, "grad_norm": 1.1063787937164307, "learning_rate": 1.6967427713977017e-05, "loss": 0.0304, "step": 2231 }, { "epoch": 0.32932497233493174, "grad_norm": 1.1559252738952637, "learning_rate": 1.6963731871650377e-05, "loss": 0.0158, "step": 2232 }, { "epoch": 0.3294725193655478, "grad_norm": 1.299078345298767, "learning_rate": 1.6960034181653303e-05, "loss": 0.0284, "step": 2233 }, { "epoch": 0.32962006639616376, "grad_norm": 2.1277706623077393, "learning_rate": 1.695633464496689e-05, "loss": 0.0631, "step": 2234 }, { "epoch": 0.3297676134267798, "grad_norm": 4.438483238220215, "learning_rate": 1.6952633262572734e-05, "loss": 0.1384, "step": 2235 }, { "epoch": 0.3299151604573958, "grad_norm": 1.8104290962219238, "learning_rate": 1.6948930035452905e-05, "loss": 0.0919, "step": 2236 }, { "epoch": 0.3300627074880118, "grad_norm": 3.712137222290039, "learning_rate": 1.6945224964589974e-05, "loss": 0.0628, "step": 2237 }, { "epoch": 0.3302102545186278, "grad_norm": 3.4406650066375732, "learning_rate": 1.6941518050967002e-05, "loss": 0.076, "step": 2238 }, { "epoch": 0.3303578015492438, "grad_norm": 3.186852216720581, "learning_rate": 1.6937809295567536e-05, "loss": 0.0632, "step": 2239 }, { "epoch": 0.3305053485798598, "grad_norm": 1.3787213563919067, "learning_rate": 1.6934098699375608e-05, "loss": 0.0122, "step": 2240 }, { "epoch": 0.3305053485798598, "eval_accuracy": 0.9725036179450073, "eval_f1": 0.9533169533169533, "eval_loss": 0.07235416024923325, "eval_precision": 0.9651741293532339, "eval_recall": 0.941747572815534, "eval_runtime": 49.7738, "eval_samples_per_second": 5.846, "eval_steps_per_second": 0.201, "step": 2240 }, { "epoch": 0.33065289561047584, "grad_norm": 2.335592031478882, "learning_rate": 1.6930386263375738e-05, "loss": 0.0762, "step": 2241 }, { "epoch": 0.3308004426410919, "grad_norm": 3.2321720123291016, "learning_rate": 1.692667198855295e-05, "loss": 0.0311, "step": 2242 }, { "epoch": 0.33094798967170785, "grad_norm": 5.0269775390625, "learning_rate": 1.6922955875892736e-05, "loss": 0.1519, "step": 2243 }, { "epoch": 0.3310955367023239, "grad_norm": 1.4114075899124146, "learning_rate": 1.691923792638108e-05, "loss": 0.0312, "step": 2244 }, { "epoch": 0.33124308373293987, "grad_norm": 2.941546678543091, "learning_rate": 1.691551814100446e-05, "loss": 0.0618, "step": 2245 }, { "epoch": 0.3313906307635559, "grad_norm": 1.8612760305404663, "learning_rate": 1.691179652074984e-05, "loss": 0.0432, "step": 2246 }, { "epoch": 0.3315381777941719, "grad_norm": 7.949804306030273, "learning_rate": 1.6908073066604665e-05, "loss": 0.094, "step": 2247 }, { "epoch": 0.3316857248247879, "grad_norm": 4.214132308959961, "learning_rate": 1.6904347779556875e-05, "loss": 0.0619, "step": 2248 }, { "epoch": 0.3318332718554039, "grad_norm": 3.301882743835449, "learning_rate": 1.6900620660594884e-05, "loss": 0.0829, "step": 2249 }, { "epoch": 0.33198081888601993, "grad_norm": 1.0096971988677979, "learning_rate": 1.689689171070761e-05, "loss": 0.0447, "step": 2250 }, { "epoch": 0.3321283659166359, "grad_norm": 4.101325511932373, "learning_rate": 1.6893160930884435e-05, "loss": 0.0691, "step": 2251 }, { "epoch": 0.33227591294725195, "grad_norm": 1.1518974304199219, "learning_rate": 1.6889428322115247e-05, "loss": 0.0306, "step": 2252 }, { "epoch": 0.33242345997786793, "grad_norm": 6.926194190979004, "learning_rate": 1.6885693885390405e-05, "loss": 0.0677, "step": 2253 }, { "epoch": 0.33257100700848397, "grad_norm": 3.5227413177490234, "learning_rate": 1.688195762170076e-05, "loss": 0.0887, "step": 2254 }, { "epoch": 0.33271855403909995, "grad_norm": 1.2965720891952515, "learning_rate": 1.687821953203765e-05, "loss": 0.0249, "step": 2255 }, { "epoch": 0.332866101069716, "grad_norm": 2.641340494155884, "learning_rate": 1.6874479617392884e-05, "loss": 0.0984, "step": 2256 }, { "epoch": 0.33301364810033196, "grad_norm": 2.2977609634399414, "learning_rate": 1.6870737878758775e-05, "loss": 0.0543, "step": 2257 }, { "epoch": 0.333161195130948, "grad_norm": 1.1189682483673096, "learning_rate": 1.6866994317128105e-05, "loss": 0.0566, "step": 2258 }, { "epoch": 0.333308742161564, "grad_norm": 2.5354154109954834, "learning_rate": 1.686324893349414e-05, "loss": 0.0822, "step": 2259 }, { "epoch": 0.33345628919218, "grad_norm": 1.2568128108978271, "learning_rate": 1.685950172885064e-05, "loss": 0.0472, "step": 2260 }, { "epoch": 0.33345628919218, "eval_accuracy": 0.9710564399421129, "eval_f1": 0.949238578680203, "eval_loss": 0.07537344843149185, "eval_precision": 0.9946808510638298, "eval_recall": 0.9077669902912622, "eval_runtime": 49.8557, "eval_samples_per_second": 5.837, "eval_steps_per_second": 0.201, "step": 2260 }, { "epoch": 0.333603836222796, "grad_norm": 4.083811283111572, "learning_rate": 1.6855752704191843e-05, "loss": 0.1204, "step": 2261 }, { "epoch": 0.333751383253412, "grad_norm": 4.4965338706970215, "learning_rate": 1.685200186051246e-05, "loss": 0.078, "step": 2262 }, { "epoch": 0.33389893028402806, "grad_norm": 0.9255520105361938, "learning_rate": 1.68482491988077e-05, "loss": 0.0119, "step": 2263 }, { "epoch": 0.33404647731464404, "grad_norm": 1.2964768409729004, "learning_rate": 1.6844494720073247e-05, "loss": 0.0356, "step": 2264 }, { "epoch": 0.3341940243452601, "grad_norm": 2.200490713119507, "learning_rate": 1.684073842530527e-05, "loss": 0.1243, "step": 2265 }, { "epoch": 0.33434157137587606, "grad_norm": 1.4421501159667969, "learning_rate": 1.6836980315500417e-05, "loss": 0.0449, "step": 2266 }, { "epoch": 0.3344891184064921, "grad_norm": 3.5278217792510986, "learning_rate": 1.6833220391655812e-05, "loss": 0.0539, "step": 2267 }, { "epoch": 0.33463666543710807, "grad_norm": 1.55814528465271, "learning_rate": 1.6829458654769073e-05, "loss": 0.0481, "step": 2268 }, { "epoch": 0.3347842124677241, "grad_norm": 2.162083864212036, "learning_rate": 1.682569510583829e-05, "loss": 0.0087, "step": 2269 }, { "epoch": 0.3349317594983401, "grad_norm": 1.685699224472046, "learning_rate": 1.682192974586204e-05, "loss": 0.0269, "step": 2270 }, { "epoch": 0.3350793065289561, "grad_norm": 2.480480194091797, "learning_rate": 1.6818162575839375e-05, "loss": 0.0476, "step": 2271 }, { "epoch": 0.3352268535595721, "grad_norm": 4.631017684936523, "learning_rate": 1.6814393596769826e-05, "loss": 0.1842, "step": 2272 }, { "epoch": 0.33537440059018814, "grad_norm": 2.340878486633301, "learning_rate": 1.6810622809653415e-05, "loss": 0.0569, "step": 2273 }, { "epoch": 0.3355219476208041, "grad_norm": 1.0919411182403564, "learning_rate": 1.680685021549063e-05, "loss": 0.0343, "step": 2274 }, { "epoch": 0.33566949465142015, "grad_norm": 2.9381957054138184, "learning_rate": 1.6803075815282442e-05, "loss": 0.071, "step": 2275 }, { "epoch": 0.33581704168203613, "grad_norm": 3.323485851287842, "learning_rate": 1.6799299610030314e-05, "loss": 0.117, "step": 2276 }, { "epoch": 0.33596458871265217, "grad_norm": 1.3506728410720825, "learning_rate": 1.6795521600736166e-05, "loss": 0.055, "step": 2277 }, { "epoch": 0.33611213574326815, "grad_norm": 3.0928151607513428, "learning_rate": 1.6791741788402414e-05, "loss": 0.0299, "step": 2278 }, { "epoch": 0.3362596827738842, "grad_norm": 2.393380641937256, "learning_rate": 1.6787960174031945e-05, "loss": 0.1111, "step": 2279 }, { "epoch": 0.33640722980450016, "grad_norm": 2.1875338554382324, "learning_rate": 1.6784176758628127e-05, "loss": 0.0404, "step": 2280 }, { "epoch": 0.33640722980450016, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.9603960396039604, "eval_loss": 0.06640757620334625, "eval_precision": 0.9797979797979798, "eval_recall": 0.941747572815534, "eval_runtime": 49.5572, "eval_samples_per_second": 5.872, "eval_steps_per_second": 0.202, "step": 2280 }, { "epoch": 0.3365547768351162, "grad_norm": 3.1600427627563477, "learning_rate": 1.6780391543194804e-05, "loss": 0.106, "step": 2281 }, { "epoch": 0.3367023238657322, "grad_norm": 5.332502841949463, "learning_rate": 1.67766045287363e-05, "loss": 0.1328, "step": 2282 }, { "epoch": 0.3368498708963482, "grad_norm": 12.91946792602539, "learning_rate": 1.6772815716257414e-05, "loss": 0.0924, "step": 2283 }, { "epoch": 0.3369974179269642, "grad_norm": 2.88958477973938, "learning_rate": 1.6769025106763417e-05, "loss": 0.0885, "step": 2284 }, { "epoch": 0.33714496495758023, "grad_norm": 1.5320292711257935, "learning_rate": 1.676523270126007e-05, "loss": 0.0241, "step": 2285 }, { "epoch": 0.33729251198819626, "grad_norm": 4.352901458740234, "learning_rate": 1.67614385007536e-05, "loss": 0.0684, "step": 2286 }, { "epoch": 0.33744005901881224, "grad_norm": 1.6219866275787354, "learning_rate": 1.6757642506250713e-05, "loss": 0.0619, "step": 2287 }, { "epoch": 0.3375876060494283, "grad_norm": 2.50010085105896, "learning_rate": 1.6753844718758592e-05, "loss": 0.0624, "step": 2288 }, { "epoch": 0.33773515308004426, "grad_norm": 1.923895239830017, "learning_rate": 1.6750045139284892e-05, "loss": 0.0436, "step": 2289 }, { "epoch": 0.3378827001106603, "grad_norm": 1.3771748542785645, "learning_rate": 1.6746243768837752e-05, "loss": 0.0488, "step": 2290 }, { "epoch": 0.3380302471412763, "grad_norm": 1.6006910800933838, "learning_rate": 1.6742440608425772e-05, "loss": 0.0466, "step": 2291 }, { "epoch": 0.3381777941718923, "grad_norm": 0.9986759424209595, "learning_rate": 1.6738635659058045e-05, "loss": 0.0322, "step": 2292 }, { "epoch": 0.3383253412025083, "grad_norm": 2.151573419570923, "learning_rate": 1.6734828921744127e-05, "loss": 0.0706, "step": 2293 }, { "epoch": 0.3384728882331243, "grad_norm": 1.4651777744293213, "learning_rate": 1.6731020397494044e-05, "loss": 0.084, "step": 2294 }, { "epoch": 0.3386204352637403, "grad_norm": 3.3459606170654297, "learning_rate": 1.6727210087318308e-05, "loss": 0.021, "step": 2295 }, { "epoch": 0.33876798229435634, "grad_norm": 1.4047874212265015, "learning_rate": 1.6723397992227898e-05, "loss": 0.0156, "step": 2296 }, { "epoch": 0.3389155293249723, "grad_norm": 2.0876870155334473, "learning_rate": 1.671958411323427e-05, "loss": 0.032, "step": 2297 }, { "epoch": 0.33906307635558836, "grad_norm": 2.436588764190674, "learning_rate": 1.671576845134935e-05, "loss": 0.0934, "step": 2298 }, { "epoch": 0.33921062338620434, "grad_norm": 2.535916566848755, "learning_rate": 1.671195100758554e-05, "loss": 0.0255, "step": 2299 }, { "epoch": 0.33935817041682037, "grad_norm": 3.2804372310638428, "learning_rate": 1.6708131782955713e-05, "loss": 0.0952, "step": 2300 }, { "epoch": 0.33935817041682037, "eval_accuracy": 0.9696092619392185, "eval_f1": 0.9465648854961832, "eval_loss": 0.08067480474710464, "eval_precision": 0.9946524064171123, "eval_recall": 0.9029126213592233, "eval_runtime": 49.5636, "eval_samples_per_second": 5.871, "eval_steps_per_second": 0.202, "step": 2300 }, { "epoch": 0.33950571744743635, "grad_norm": 3.165992498397827, "learning_rate": 1.6704310778473216e-05, "loss": 0.1267, "step": 2301 }, { "epoch": 0.3396532644780524, "grad_norm": 1.883520483970642, "learning_rate": 1.670048799515186e-05, "loss": 0.0585, "step": 2302 }, { "epoch": 0.33980081150866837, "grad_norm": 3.8987653255462646, "learning_rate": 1.669666343400594e-05, "loss": 0.0849, "step": 2303 }, { "epoch": 0.3399483585392844, "grad_norm": 4.0299506187438965, "learning_rate": 1.669283709605022e-05, "loss": 0.1268, "step": 2304 }, { "epoch": 0.3400959055699004, "grad_norm": 1.843334436416626, "learning_rate": 1.668900898229993e-05, "loss": 0.0545, "step": 2305 }, { "epoch": 0.3402434526005164, "grad_norm": 3.21661376953125, "learning_rate": 1.668517909377078e-05, "loss": 0.1062, "step": 2306 }, { "epoch": 0.3403909996311324, "grad_norm": 5.5056986808776855, "learning_rate": 1.6681347431478933e-05, "loss": 0.0502, "step": 2307 }, { "epoch": 0.34053854666174843, "grad_norm": 1.074285626411438, "learning_rate": 1.6677513996441044e-05, "loss": 0.0116, "step": 2308 }, { "epoch": 0.34068609369236447, "grad_norm": 4.0778727531433105, "learning_rate": 1.667367878967423e-05, "loss": 0.1103, "step": 2309 }, { "epoch": 0.34083364072298045, "grad_norm": 1.545832633972168, "learning_rate": 1.6669841812196074e-05, "loss": 0.0461, "step": 2310 }, { "epoch": 0.3409811877535965, "grad_norm": 2.930983543395996, "learning_rate": 1.6666003065024633e-05, "loss": 0.0922, "step": 2311 }, { "epoch": 0.34112873478421246, "grad_norm": 3.9265267848968506, "learning_rate": 1.6662162549178433e-05, "loss": 0.0512, "step": 2312 }, { "epoch": 0.3412762818148285, "grad_norm": 16.14114761352539, "learning_rate": 1.6658320265676465e-05, "loss": 0.1144, "step": 2313 }, { "epoch": 0.3414238288454445, "grad_norm": 2.545940637588501, "learning_rate": 1.66544762155382e-05, "loss": 0.0302, "step": 2314 }, { "epoch": 0.3415713758760605, "grad_norm": 1.3965038061141968, "learning_rate": 1.6650630399783567e-05, "loss": 0.0551, "step": 2315 }, { "epoch": 0.3417189229066765, "grad_norm": 4.396641254425049, "learning_rate": 1.6646782819432967e-05, "loss": 0.0921, "step": 2316 }, { "epoch": 0.3418664699372925, "grad_norm": 4.47985315322876, "learning_rate": 1.6642933475507268e-05, "loss": 0.1322, "step": 2317 }, { "epoch": 0.3420140169679085, "grad_norm": 1.0465424060821533, "learning_rate": 1.6639082369027812e-05, "loss": 0.0375, "step": 2318 }, { "epoch": 0.34216156399852454, "grad_norm": 2.912580966949463, "learning_rate": 1.6635229501016403e-05, "loss": 0.03, "step": 2319 }, { "epoch": 0.3423091110291405, "grad_norm": 1.5885111093521118, "learning_rate": 1.6631374872495303e-05, "loss": 0.0435, "step": 2320 }, { "epoch": 0.3423091110291405, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.06419352442026138, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 49.8539, "eval_samples_per_second": 5.837, "eval_steps_per_second": 0.201, "step": 2320 }, { "epoch": 0.34245665805975656, "grad_norm": 1.3954262733459473, "learning_rate": 1.6627518484487266e-05, "loss": 0.0511, "step": 2321 }, { "epoch": 0.34260420509037254, "grad_norm": 0.9614152908325195, "learning_rate": 1.662366033801549e-05, "loss": 0.0366, "step": 2322 }, { "epoch": 0.3427517521209886, "grad_norm": 5.605996608734131, "learning_rate": 1.661980043410365e-05, "loss": 0.1091, "step": 2323 }, { "epoch": 0.34289929915160455, "grad_norm": 1.5293532609939575, "learning_rate": 1.6615938773775885e-05, "loss": 0.0472, "step": 2324 }, { "epoch": 0.3430468461822206, "grad_norm": 1.7617485523223877, "learning_rate": 1.66120753580568e-05, "loss": 0.0416, "step": 2325 }, { "epoch": 0.34319439321283657, "grad_norm": 2.8528995513916016, "learning_rate": 1.6608210187971467e-05, "loss": 0.0875, "step": 2326 }, { "epoch": 0.3433419402434526, "grad_norm": 1.56826651096344, "learning_rate": 1.660434326454542e-05, "loss": 0.0456, "step": 2327 }, { "epoch": 0.3434894872740686, "grad_norm": 3.3070790767669678, "learning_rate": 1.6600474588804664e-05, "loss": 0.1317, "step": 2328 }, { "epoch": 0.3436370343046846, "grad_norm": 2.1225554943084717, "learning_rate": 1.659660416177566e-05, "loss": 0.0765, "step": 2329 }, { "epoch": 0.34378458133530065, "grad_norm": 0.9811033606529236, "learning_rate": 1.6592731984485343e-05, "loss": 0.0293, "step": 2330 }, { "epoch": 0.34393212836591663, "grad_norm": 2.6723105907440186, "learning_rate": 1.658885805796111e-05, "loss": 0.0435, "step": 2331 }, { "epoch": 0.34407967539653267, "grad_norm": 3.923781156539917, "learning_rate": 1.658498238323082e-05, "loss": 0.0906, "step": 2332 }, { "epoch": 0.34422722242714865, "grad_norm": 3.2581284046173096, "learning_rate": 1.65811049613228e-05, "loss": 0.0979, "step": 2333 }, { "epoch": 0.3443747694577647, "grad_norm": 1.3880892992019653, "learning_rate": 1.6577225793265826e-05, "loss": 0.0395, "step": 2334 }, { "epoch": 0.34452231648838066, "grad_norm": 1.3485021591186523, "learning_rate": 1.6573344880089157e-05, "loss": 0.0188, "step": 2335 }, { "epoch": 0.3446698635189967, "grad_norm": 4.487036228179932, "learning_rate": 1.6569462222822504e-05, "loss": 0.0493, "step": 2336 }, { "epoch": 0.3448174105496127, "grad_norm": 2.8430447578430176, "learning_rate": 1.6565577822496042e-05, "loss": 0.085, "step": 2337 }, { "epoch": 0.3449649575802287, "grad_norm": 8.752228736877441, "learning_rate": 1.6561691680140416e-05, "loss": 0.1306, "step": 2338 }, { "epoch": 0.3451125046108447, "grad_norm": 1.9699515104293823, "learning_rate": 1.6557803796786723e-05, "loss": 0.0232, "step": 2339 }, { "epoch": 0.34526005164146073, "grad_norm": 2.071045398712158, "learning_rate": 1.6553914173466523e-05, "loss": 0.027, "step": 2340 }, { "epoch": 0.34526005164146073, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.9603960396039604, "eval_loss": 0.06641129404306412, "eval_precision": 0.9797979797979798, "eval_recall": 0.941747572815534, "eval_runtime": 50.2292, "eval_samples_per_second": 5.793, "eval_steps_per_second": 0.199, "step": 2340 }, { "epoch": 0.3454075986720767, "grad_norm": 1.799544334411621, "learning_rate": 1.6550022811211842e-05, "loss": 0.0379, "step": 2341 }, { "epoch": 0.34555514570269275, "grad_norm": 3.7917068004608154, "learning_rate": 1.6546129711055167e-05, "loss": 0.0667, "step": 2342 }, { "epoch": 0.3457026927333087, "grad_norm": 1.8615074157714844, "learning_rate": 1.6542234874029448e-05, "loss": 0.0434, "step": 2343 }, { "epoch": 0.34585023976392476, "grad_norm": 3.230102777481079, "learning_rate": 1.6538338301168088e-05, "loss": 0.0807, "step": 2344 }, { "epoch": 0.34599778679454074, "grad_norm": 3.9649109840393066, "learning_rate": 1.6534439993504956e-05, "loss": 0.135, "step": 2345 }, { "epoch": 0.3461453338251568, "grad_norm": 2.0120749473571777, "learning_rate": 1.653053995207438e-05, "loss": 0.0335, "step": 2346 }, { "epoch": 0.34629288085577276, "grad_norm": 3.698765754699707, "learning_rate": 1.652663817791116e-05, "loss": 0.0956, "step": 2347 }, { "epoch": 0.3464404278863888, "grad_norm": 1.215096116065979, "learning_rate": 1.6522734672050526e-05, "loss": 0.0425, "step": 2348 }, { "epoch": 0.34658797491700477, "grad_norm": 2.372612237930298, "learning_rate": 1.6518829435528195e-05, "loss": 0.0391, "step": 2349 }, { "epoch": 0.3467355219476208, "grad_norm": 1.7483614683151245, "learning_rate": 1.651492246938034e-05, "loss": 0.0361, "step": 2350 }, { "epoch": 0.3468830689782368, "grad_norm": 2.1177897453308105, "learning_rate": 1.6511013774643578e-05, "loss": 0.0804, "step": 2351 }, { "epoch": 0.3470306160088528, "grad_norm": 2.710125207901001, "learning_rate": 1.6507103352354998e-05, "loss": 0.101, "step": 2352 }, { "epoch": 0.34717816303946886, "grad_norm": 2.26253604888916, "learning_rate": 1.6503191203552138e-05, "loss": 0.0586, "step": 2353 }, { "epoch": 0.34732571007008484, "grad_norm": 2.8633363246917725, "learning_rate": 1.6499277329273006e-05, "loss": 0.0986, "step": 2354 }, { "epoch": 0.34747325710070087, "grad_norm": 3.533268690109253, "learning_rate": 1.649536173055606e-05, "loss": 0.1384, "step": 2355 }, { "epoch": 0.34762080413131685, "grad_norm": 1.1151478290557861, "learning_rate": 1.6491444408440212e-05, "loss": 0.045, "step": 2356 }, { "epoch": 0.3477683511619329, "grad_norm": 2.5397818088531494, "learning_rate": 1.648752536396484e-05, "loss": 0.0585, "step": 2357 }, { "epoch": 0.34791589819254887, "grad_norm": 2.7460968494415283, "learning_rate": 1.6483604598169768e-05, "loss": 0.0747, "step": 2358 }, { "epoch": 0.3480634452231649, "grad_norm": 2.952080488204956, "learning_rate": 1.6479682112095295e-05, "loss": 0.072, "step": 2359 }, { "epoch": 0.3482109922537809, "grad_norm": 2.2750353813171387, "learning_rate": 1.647575790678215e-05, "loss": 0.075, "step": 2360 }, { "epoch": 0.3482109922537809, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9584352078239609, "eval_loss": 0.0662899762392044, "eval_precision": 0.9655172413793104, "eval_recall": 0.9514563106796117, "eval_runtime": 50.1902, "eval_samples_per_second": 5.798, "eval_steps_per_second": 0.199, "step": 2360 }, { "epoch": 0.3483585392843969, "grad_norm": 1.847737431526184, "learning_rate": 1.6471831983271555e-05, "loss": 0.0352, "step": 2361 }, { "epoch": 0.3485060863150129, "grad_norm": 3.476930618286133, "learning_rate": 1.6467904342605142e-05, "loss": 0.1174, "step": 2362 }, { "epoch": 0.34865363334562893, "grad_norm": 1.9660117626190186, "learning_rate": 1.6463974985825038e-05, "loss": 0.0256, "step": 2363 }, { "epoch": 0.3488011803762449, "grad_norm": 2.007992744445801, "learning_rate": 1.64600439139738e-05, "loss": 0.0526, "step": 2364 }, { "epoch": 0.34894872740686095, "grad_norm": 2.0181987285614014, "learning_rate": 1.6456111128094458e-05, "loss": 0.0426, "step": 2365 }, { "epoch": 0.34909627443747693, "grad_norm": 1.851974606513977, "learning_rate": 1.6452176629230486e-05, "loss": 0.0249, "step": 2366 }, { "epoch": 0.34924382146809296, "grad_norm": 1.7971105575561523, "learning_rate": 1.6448240418425815e-05, "loss": 0.0513, "step": 2367 }, { "epoch": 0.34939136849870894, "grad_norm": 2.0325169563293457, "learning_rate": 1.644430249672483e-05, "loss": 0.0848, "step": 2368 }, { "epoch": 0.349538915529325, "grad_norm": 1.832484245300293, "learning_rate": 1.6440362865172373e-05, "loss": 0.0608, "step": 2369 }, { "epoch": 0.34968646255994096, "grad_norm": 2.5634117126464844, "learning_rate": 1.643642152481373e-05, "loss": 0.0657, "step": 2370 }, { "epoch": 0.349834009590557, "grad_norm": 2.4291880130767822, "learning_rate": 1.6432478476694654e-05, "loss": 0.1045, "step": 2371 }, { "epoch": 0.349981556621173, "grad_norm": 1.7768257856369019, "learning_rate": 1.6428533721861346e-05, "loss": 0.0642, "step": 2372 }, { "epoch": 0.350129103651789, "grad_norm": 2.673724889755249, "learning_rate": 1.6424587261360452e-05, "loss": 0.0779, "step": 2373 }, { "epoch": 0.35027665068240504, "grad_norm": 1.1445502042770386, "learning_rate": 1.642063909623908e-05, "loss": 0.0277, "step": 2374 }, { "epoch": 0.350424197713021, "grad_norm": 1.6839622259140015, "learning_rate": 1.641668922754479e-05, "loss": 0.0186, "step": 2375 }, { "epoch": 0.35057174474363706, "grad_norm": 0.8631309866905212, "learning_rate": 1.641273765632559e-05, "loss": 0.0226, "step": 2376 }, { "epoch": 0.35071929177425304, "grad_norm": 4.310630798339844, "learning_rate": 1.6408784383629937e-05, "loss": 0.127, "step": 2377 }, { "epoch": 0.3508668388048691, "grad_norm": 2.018258571624756, "learning_rate": 1.6404829410506747e-05, "loss": 0.0359, "step": 2378 }, { "epoch": 0.35101438583548505, "grad_norm": 1.7199050188064575, "learning_rate": 1.6400872738005385e-05, "loss": 0.0526, "step": 2379 }, { "epoch": 0.3511619328661011, "grad_norm": 3.265058755874634, "learning_rate": 1.639691436717566e-05, "loss": 0.071, "step": 2380 }, { "epoch": 0.3511619328661011, "eval_accuracy": 0.9739507959479016, "eval_f1": 0.955, "eval_loss": 0.0693851187825203, "eval_precision": 0.9845360824742269, "eval_recall": 0.9271844660194175, "eval_runtime": 50.6047, "eval_samples_per_second": 5.75, "eval_steps_per_second": 0.198, "step": 2380 }, { "epoch": 0.35130947989671707, "grad_norm": 1.8791478872299194, "learning_rate": 1.6392954299067843e-05, "loss": 0.0339, "step": 2381 }, { "epoch": 0.3514570269273331, "grad_norm": 1.8809936046600342, "learning_rate": 1.6388992534732645e-05, "loss": 0.0407, "step": 2382 }, { "epoch": 0.3516045739579491, "grad_norm": 1.4170249700546265, "learning_rate": 1.6385029075221238e-05, "loss": 0.0351, "step": 2383 }, { "epoch": 0.3517521209885651, "grad_norm": 5.98276948928833, "learning_rate": 1.638106392158523e-05, "loss": 0.1323, "step": 2384 }, { "epoch": 0.3518996680191811, "grad_norm": 2.3200674057006836, "learning_rate": 1.6377097074876687e-05, "loss": 0.027, "step": 2385 }, { "epoch": 0.35204721504979714, "grad_norm": 7.031911373138428, "learning_rate": 1.637312853614813e-05, "loss": 0.111, "step": 2386 }, { "epoch": 0.3521947620804131, "grad_norm": 1.8072407245635986, "learning_rate": 1.636915830645251e-05, "loss": 0.0809, "step": 2387 }, { "epoch": 0.35234230911102915, "grad_norm": 1.564636468887329, "learning_rate": 1.636518638684325e-05, "loss": 0.0281, "step": 2388 }, { "epoch": 0.35248985614164513, "grad_norm": 2.847052812576294, "learning_rate": 1.6361212778374206e-05, "loss": 0.0926, "step": 2389 }, { "epoch": 0.35263740317226117, "grad_norm": 3.316835880279541, "learning_rate": 1.6357237482099682e-05, "loss": 0.1283, "step": 2390 }, { "epoch": 0.35278495020287715, "grad_norm": 1.7379379272460938, "learning_rate": 1.6353260499074437e-05, "loss": 0.049, "step": 2391 }, { "epoch": 0.3529324972334932, "grad_norm": 2.3816347122192383, "learning_rate": 1.6349281830353677e-05, "loss": 0.0739, "step": 2392 }, { "epoch": 0.35308004426410916, "grad_norm": 3.8929433822631836, "learning_rate": 1.6345301476993052e-05, "loss": 0.0741, "step": 2393 }, { "epoch": 0.3532275912947252, "grad_norm": 3.5631837844848633, "learning_rate": 1.6341319440048657e-05, "loss": 0.0788, "step": 2394 }, { "epoch": 0.3533751383253412, "grad_norm": 1.5988062620162964, "learning_rate": 1.6337335720577033e-05, "loss": 0.0634, "step": 2395 }, { "epoch": 0.3535226853559572, "grad_norm": 2.0330374240875244, "learning_rate": 1.633335031963518e-05, "loss": 0.0534, "step": 2396 }, { "epoch": 0.35367023238657325, "grad_norm": 2.1022183895111084, "learning_rate": 1.6329363238280528e-05, "loss": 0.0438, "step": 2397 }, { "epoch": 0.3538177794171892, "grad_norm": 2.7071707248687744, "learning_rate": 1.6325374477570965e-05, "loss": 0.0509, "step": 2398 }, { "epoch": 0.35396532644780526, "grad_norm": 2.6731655597686768, "learning_rate": 1.6321384038564816e-05, "loss": 0.0327, "step": 2399 }, { "epoch": 0.35411287347842124, "grad_norm": 1.992941975593567, "learning_rate": 1.6317391922320857e-05, "loss": 0.0724, "step": 2400 }, { "epoch": 0.35411287347842124, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9576059850374065, "eval_loss": 0.0716041848063469, "eval_precision": 0.9846153846153847, "eval_recall": 0.9320388349514563, "eval_runtime": 50.0864, "eval_samples_per_second": 5.81, "eval_steps_per_second": 0.2, "step": 2400 }, { "epoch": 0.3542604205090373, "grad_norm": 1.9264169931411743, "learning_rate": 1.63133981298983e-05, "loss": 0.0579, "step": 2401 }, { "epoch": 0.35440796753965326, "grad_norm": 2.2756268978118896, "learning_rate": 1.6309402662356816e-05, "loss": 0.0785, "step": 2402 }, { "epoch": 0.3545555145702693, "grad_norm": 2.6466474533081055, "learning_rate": 1.630540552075651e-05, "loss": 0.0416, "step": 2403 }, { "epoch": 0.35470306160088527, "grad_norm": 1.7743314504623413, "learning_rate": 1.6301406706157937e-05, "loss": 0.0247, "step": 2404 }, { "epoch": 0.3548506086315013, "grad_norm": 2.087181568145752, "learning_rate": 1.6297406219622088e-05, "loss": 0.0479, "step": 2405 }, { "epoch": 0.3549981556621173, "grad_norm": 8.197646141052246, "learning_rate": 1.6293404062210406e-05, "loss": 0.0759, "step": 2406 }, { "epoch": 0.3551457026927333, "grad_norm": 7.282373428344727, "learning_rate": 1.628940023498477e-05, "loss": 0.1089, "step": 2407 }, { "epoch": 0.3552932497233493, "grad_norm": 1.1593992710113525, "learning_rate": 1.628539473900751e-05, "loss": 0.033, "step": 2408 }, { "epoch": 0.35544079675396534, "grad_norm": 5.951663970947266, "learning_rate": 1.6281387575341397e-05, "loss": 0.0383, "step": 2409 }, { "epoch": 0.3555883437845813, "grad_norm": 1.3353403806686401, "learning_rate": 1.627737874504964e-05, "loss": 0.0476, "step": 2410 }, { "epoch": 0.35573589081519735, "grad_norm": 1.3402293920516968, "learning_rate": 1.6273368249195885e-05, "loss": 0.0502, "step": 2411 }, { "epoch": 0.35588343784581333, "grad_norm": 2.364414930343628, "learning_rate": 1.626935608884424e-05, "loss": 0.0417, "step": 2412 }, { "epoch": 0.35603098487642937, "grad_norm": 4.8306498527526855, "learning_rate": 1.6265342265059235e-05, "loss": 0.1917, "step": 2413 }, { "epoch": 0.35617853190704535, "grad_norm": 1.529222011566162, "learning_rate": 1.6261326778905844e-05, "loss": 0.0148, "step": 2414 }, { "epoch": 0.3563260789376614, "grad_norm": 1.0430806875228882, "learning_rate": 1.6257309631449495e-05, "loss": 0.0152, "step": 2415 }, { "epoch": 0.35647362596827736, "grad_norm": 1.4311214685440063, "learning_rate": 1.6253290823756043e-05, "loss": 0.0464, "step": 2416 }, { "epoch": 0.3566211729988934, "grad_norm": 1.112471103668213, "learning_rate": 1.6249270356891788e-05, "loss": 0.033, "step": 2417 }, { "epoch": 0.3567687200295094, "grad_norm": 1.4111711978912354, "learning_rate": 1.624524823192348e-05, "loss": 0.0339, "step": 2418 }, { "epoch": 0.3569162670601254, "grad_norm": 2.091536283493042, "learning_rate": 1.624122444991829e-05, "loss": 0.0863, "step": 2419 }, { "epoch": 0.35706381409074145, "grad_norm": 0.88022780418396, "learning_rate": 1.623719901194384e-05, "loss": 0.0126, "step": 2420 }, { "epoch": 0.35706381409074145, "eval_accuracy": 0.9739507959479016, "eval_f1": 0.9545454545454546, "eval_loss": 0.08004338294267654, "eval_precision": 0.9947368421052631, "eval_recall": 0.9174757281553398, "eval_runtime": 49.8271, "eval_samples_per_second": 5.84, "eval_steps_per_second": 0.201, "step": 2420 }, { "epoch": 0.35721136112135743, "grad_norm": 2.082973003387451, "learning_rate": 1.623317191906819e-05, "loss": 0.0665, "step": 2421 }, { "epoch": 0.35735890815197346, "grad_norm": 1.9848361015319824, "learning_rate": 1.6229143172359846e-05, "loss": 0.0791, "step": 2422 }, { "epoch": 0.35750645518258944, "grad_norm": 2.008039951324463, "learning_rate": 1.6225112772887733e-05, "loss": 0.0622, "step": 2423 }, { "epoch": 0.3576540022132055, "grad_norm": 4.1017374992370605, "learning_rate": 1.6221080721721238e-05, "loss": 0.058, "step": 2424 }, { "epoch": 0.35780154924382146, "grad_norm": 2.1726508140563965, "learning_rate": 1.6217047019930173e-05, "loss": 0.0667, "step": 2425 }, { "epoch": 0.3579490962744375, "grad_norm": 0.7537729740142822, "learning_rate": 1.621301166858479e-05, "loss": 0.0185, "step": 2426 }, { "epoch": 0.3580966433050535, "grad_norm": 4.123482704162598, "learning_rate": 1.620897466875578e-05, "loss": 0.1422, "step": 2427 }, { "epoch": 0.3582441903356695, "grad_norm": 2.4677505493164062, "learning_rate": 1.6204936021514267e-05, "loss": 0.0612, "step": 2428 }, { "epoch": 0.3583917373662855, "grad_norm": 10.251601219177246, "learning_rate": 1.6200895727931816e-05, "loss": 0.1341, "step": 2429 }, { "epoch": 0.3585392843969015, "grad_norm": 5.24082088470459, "learning_rate": 1.6196853789080436e-05, "loss": 0.0935, "step": 2430 }, { "epoch": 0.3586868314275175, "grad_norm": 3.244222402572632, "learning_rate": 1.6192810206032558e-05, "loss": 0.1596, "step": 2431 }, { "epoch": 0.35883437845813354, "grad_norm": 5.2294535636901855, "learning_rate": 1.6188764979861057e-05, "loss": 0.087, "step": 2432 }, { "epoch": 0.3589819254887495, "grad_norm": 1.9989055395126343, "learning_rate": 1.618471811163924e-05, "loss": 0.068, "step": 2433 }, { "epoch": 0.35912947251936556, "grad_norm": 3.090933322906494, "learning_rate": 1.618066960244086e-05, "loss": 0.0651, "step": 2434 }, { "epoch": 0.35927701954998154, "grad_norm": 3.5918147563934326, "learning_rate": 1.61766194533401e-05, "loss": 0.0497, "step": 2435 }, { "epoch": 0.35942456658059757, "grad_norm": 4.346996307373047, "learning_rate": 1.6172567665411568e-05, "loss": 0.0944, "step": 2436 }, { "epoch": 0.35957211361121355, "grad_norm": 1.523473858833313, "learning_rate": 1.616851423973032e-05, "loss": 0.0277, "step": 2437 }, { "epoch": 0.3597196606418296, "grad_norm": 2.977506637573242, "learning_rate": 1.616445917737184e-05, "loss": 0.0796, "step": 2438 }, { "epoch": 0.35986720767244557, "grad_norm": 1.9257076978683472, "learning_rate": 1.6160402479412046e-05, "loss": 0.0406, "step": 2439 }, { "epoch": 0.3600147547030616, "grad_norm": 2.6524338722229004, "learning_rate": 1.61563441469273e-05, "loss": 0.0611, "step": 2440 }, { "epoch": 0.3600147547030616, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9653465346534653, "eval_loss": 0.06607688218355179, "eval_precision": 0.9848484848484849, "eval_recall": 0.9466019417475728, "eval_runtime": 49.5964, "eval_samples_per_second": 5.867, "eval_steps_per_second": 0.202, "step": 2440 }, { "epoch": 0.36016230173367764, "grad_norm": 2.06833815574646, "learning_rate": 1.615228418099438e-05, "loss": 0.0578, "step": 2441 }, { "epoch": 0.3603098487642936, "grad_norm": 2.467794895172119, "learning_rate": 1.6148222582690518e-05, "loss": 0.0672, "step": 2442 }, { "epoch": 0.36045739579490965, "grad_norm": 2.2856760025024414, "learning_rate": 1.614415935309336e-05, "loss": 0.0639, "step": 2443 }, { "epoch": 0.36060494282552563, "grad_norm": 1.401296615600586, "learning_rate": 1.6140094493280992e-05, "loss": 0.0545, "step": 2444 }, { "epoch": 0.36075248985614167, "grad_norm": 4.119458198547363, "learning_rate": 1.613602800433194e-05, "loss": 0.0985, "step": 2445 }, { "epoch": 0.36090003688675765, "grad_norm": 2.8262550830841064, "learning_rate": 1.613195988732515e-05, "loss": 0.0547, "step": 2446 }, { "epoch": 0.3610475839173737, "grad_norm": 2.027926206588745, "learning_rate": 1.6127890143340012e-05, "loss": 0.07, "step": 2447 }, { "epoch": 0.36119513094798966, "grad_norm": 2.6602234840393066, "learning_rate": 1.6123818773456334e-05, "loss": 0.0634, "step": 2448 }, { "epoch": 0.3613426779786057, "grad_norm": 2.266278028488159, "learning_rate": 1.611974577875437e-05, "loss": 0.0596, "step": 2449 }, { "epoch": 0.3614902250092217, "grad_norm": 3.076375961303711, "learning_rate": 1.611567116031479e-05, "loss": 0.0451, "step": 2450 }, { "epoch": 0.3616377720398377, "grad_norm": 1.0650479793548584, "learning_rate": 1.6111594919218715e-05, "loss": 0.0388, "step": 2451 }, { "epoch": 0.3617853190704537, "grad_norm": 3.145294666290283, "learning_rate": 1.6107517056547673e-05, "loss": 0.1487, "step": 2452 }, { "epoch": 0.36193286610106973, "grad_norm": 2.0649852752685547, "learning_rate": 1.610343757338364e-05, "loss": 0.0367, "step": 2453 }, { "epoch": 0.3620804131316857, "grad_norm": 1.5061755180358887, "learning_rate": 1.609935647080901e-05, "loss": 0.0232, "step": 2454 }, { "epoch": 0.36222796016230174, "grad_norm": 1.9770785570144653, "learning_rate": 1.6095273749906614e-05, "loss": 0.0586, "step": 2455 }, { "epoch": 0.3623755071929177, "grad_norm": 2.9176297187805176, "learning_rate": 1.6091189411759717e-05, "loss": 0.0835, "step": 2456 }, { "epoch": 0.36252305422353376, "grad_norm": 2.026430130004883, "learning_rate": 1.6087103457452e-05, "loss": 0.0181, "step": 2457 }, { "epoch": 0.36267060125414974, "grad_norm": 1.9945342540740967, "learning_rate": 1.608301588806758e-05, "loss": 0.1087, "step": 2458 }, { "epoch": 0.3628181482847658, "grad_norm": 1.5255026817321777, "learning_rate": 1.6078926704691006e-05, "loss": 0.0585, "step": 2459 }, { "epoch": 0.36296569531538175, "grad_norm": 0.5948257446289062, "learning_rate": 1.6074835908407247e-05, "loss": 0.0067, "step": 2460 }, { "epoch": 0.36296569531538175, "eval_accuracy": 0.9725036179450073, "eval_f1": 0.9523809523809523, "eval_loss": 0.06712567806243896, "eval_precision": 0.9844559585492227, "eval_recall": 0.9223300970873787, "eval_runtime": 50.1299, "eval_samples_per_second": 5.805, "eval_steps_per_second": 0.199, "step": 2460 }, { "epoch": 0.3631132423459978, "grad_norm": 2.4280731678009033, "learning_rate": 1.6070743500301707e-05, "loss": 0.0688, "step": 2461 }, { "epoch": 0.36326078937661377, "grad_norm": 1.528167486190796, "learning_rate": 1.6066649481460212e-05, "loss": 0.0606, "step": 2462 }, { "epoch": 0.3634083364072298, "grad_norm": 4.370822429656982, "learning_rate": 1.606255385296902e-05, "loss": 0.0759, "step": 2463 }, { "epoch": 0.36355588343784584, "grad_norm": 2.9819388389587402, "learning_rate": 1.6058456615914815e-05, "loss": 0.0947, "step": 2464 }, { "epoch": 0.3637034304684618, "grad_norm": 6.783071517944336, "learning_rate": 1.6054357771384708e-05, "loss": 0.0887, "step": 2465 }, { "epoch": 0.36385097749907785, "grad_norm": 0.6223083734512329, "learning_rate": 1.605025732046624e-05, "loss": 0.0092, "step": 2466 }, { "epoch": 0.36399852452969383, "grad_norm": 0.8484699130058289, "learning_rate": 1.604615526424736e-05, "loss": 0.0334, "step": 2467 }, { "epoch": 0.36414607156030987, "grad_norm": 2.1984329223632812, "learning_rate": 1.604205160381647e-05, "loss": 0.0737, "step": 2468 }, { "epoch": 0.36429361859092585, "grad_norm": 2.140397071838379, "learning_rate": 1.6037946340262375e-05, "loss": 0.0776, "step": 2469 }, { "epoch": 0.3644411656215419, "grad_norm": 1.8038660287857056, "learning_rate": 1.6033839474674325e-05, "loss": 0.0902, "step": 2470 }, { "epoch": 0.36458871265215786, "grad_norm": 2.251108169555664, "learning_rate": 1.6029731008141977e-05, "loss": 0.0718, "step": 2471 }, { "epoch": 0.3647362596827739, "grad_norm": 1.1015743017196655, "learning_rate": 1.6025620941755425e-05, "loss": 0.046, "step": 2472 }, { "epoch": 0.3648838067133899, "grad_norm": 0.801582396030426, "learning_rate": 1.602150927660518e-05, "loss": 0.0418, "step": 2473 }, { "epoch": 0.3650313537440059, "grad_norm": 2.8035898208618164, "learning_rate": 1.6017396013782177e-05, "loss": 0.0896, "step": 2474 }, { "epoch": 0.3651789007746219, "grad_norm": 6.449028968811035, "learning_rate": 1.6013281154377788e-05, "loss": 0.0783, "step": 2475 }, { "epoch": 0.36532644780523793, "grad_norm": 0.8921969532966614, "learning_rate": 1.6009164699483793e-05, "loss": 0.0267, "step": 2476 }, { "epoch": 0.3654739948358539, "grad_norm": 3.9773943424224854, "learning_rate": 1.6005046650192403e-05, "loss": 0.116, "step": 2477 }, { "epoch": 0.36562154186646995, "grad_norm": 1.8626863956451416, "learning_rate": 1.6000927007596247e-05, "loss": 0.0838, "step": 2478 }, { "epoch": 0.3657690888970859, "grad_norm": 2.7865920066833496, "learning_rate": 1.5996805772788384e-05, "loss": 0.0636, "step": 2479 }, { "epoch": 0.36591663592770196, "grad_norm": 3.0623490810394287, "learning_rate": 1.5992682946862286e-05, "loss": 0.0804, "step": 2480 }, { "epoch": 0.36591663592770196, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.06521567702293396, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 49.9821, "eval_samples_per_second": 5.822, "eval_steps_per_second": 0.2, "step": 2480 }, { "epoch": 0.36606418295831794, "grad_norm": 2.007986307144165, "learning_rate": 1.598855853091186e-05, "loss": 0.0292, "step": 2481 }, { "epoch": 0.366211729988934, "grad_norm": 2.4751055240631104, "learning_rate": 1.598443252603142e-05, "loss": 0.0426, "step": 2482 }, { "epoch": 0.36635927701954996, "grad_norm": 2.724830150604248, "learning_rate": 1.598030493331572e-05, "loss": 0.1108, "step": 2483 }, { "epoch": 0.366506824050166, "grad_norm": 1.4615898132324219, "learning_rate": 1.5976175753859912e-05, "loss": 0.0509, "step": 2484 }, { "epoch": 0.366654371080782, "grad_norm": 1.80696439743042, "learning_rate": 1.5972044988759587e-05, "loss": 0.0558, "step": 2485 }, { "epoch": 0.366801918111398, "grad_norm": 2.8761091232299805, "learning_rate": 1.596791263911075e-05, "loss": 0.1261, "step": 2486 }, { "epoch": 0.36694946514201404, "grad_norm": 1.5573318004608154, "learning_rate": 1.596377870600983e-05, "loss": 0.0583, "step": 2487 }, { "epoch": 0.36709701217263, "grad_norm": 2.090137481689453, "learning_rate": 1.5959643190553668e-05, "loss": 0.0684, "step": 2488 }, { "epoch": 0.36724455920324606, "grad_norm": 5.645848751068115, "learning_rate": 1.5955506093839537e-05, "loss": 0.1314, "step": 2489 }, { "epoch": 0.36739210623386204, "grad_norm": 1.7534146308898926, "learning_rate": 1.595136741696512e-05, "loss": 0.0368, "step": 2490 }, { "epoch": 0.3675396532644781, "grad_norm": 6.885266304016113, "learning_rate": 1.594722716102852e-05, "loss": 0.0878, "step": 2491 }, { "epoch": 0.36768720029509405, "grad_norm": 1.4330418109893799, "learning_rate": 1.594308532712827e-05, "loss": 0.0457, "step": 2492 }, { "epoch": 0.3678347473257101, "grad_norm": 1.5661605596542358, "learning_rate": 1.5938941916363302e-05, "loss": 0.0446, "step": 2493 }, { "epoch": 0.36798229435632607, "grad_norm": 3.1636805534362793, "learning_rate": 1.5934796929832984e-05, "loss": 0.0573, "step": 2494 }, { "epoch": 0.3681298413869421, "grad_norm": 2.598984479904175, "learning_rate": 1.5930650368637093e-05, "loss": 0.066, "step": 2495 }, { "epoch": 0.3682773884175581, "grad_norm": 1.6422033309936523, "learning_rate": 1.592650223387583e-05, "loss": 0.0507, "step": 2496 }, { "epoch": 0.3684249354481741, "grad_norm": 1.8862583637237549, "learning_rate": 1.5922352526649803e-05, "loss": 0.0789, "step": 2497 }, { "epoch": 0.3685724824787901, "grad_norm": 5.008487701416016, "learning_rate": 1.5918201248060045e-05, "loss": 0.0527, "step": 2498 }, { "epoch": 0.36872002950940613, "grad_norm": 0.9551709890365601, "learning_rate": 1.5914048399208012e-05, "loss": 0.0369, "step": 2499 }, { "epoch": 0.3688675765400221, "grad_norm": 2.677898645401001, "learning_rate": 1.5909893981195566e-05, "loss": 0.057, "step": 2500 }, { "epoch": 0.3688675765400221, "eval_accuracy": 0.9725036179450073, "eval_f1": 0.9521410579345088, "eval_loss": 0.07160571217536926, "eval_precision": 0.9895287958115183, "eval_recall": 0.9174757281553398, "eval_runtime": 50.2599, "eval_samples_per_second": 5.79, "eval_steps_per_second": 0.199, "step": 2500 }, { "epoch": 0.36901512357063815, "grad_norm": 2.3989951610565186, "learning_rate": 1.5905737995124983e-05, "loss": 0.1133, "step": 2501 }, { "epoch": 0.36916267060125413, "grad_norm": 1.354426622390747, "learning_rate": 1.590158044209897e-05, "loss": 0.0278, "step": 2502 }, { "epoch": 0.36931021763187016, "grad_norm": 2.5139896869659424, "learning_rate": 1.5897421323220633e-05, "loss": 0.066, "step": 2503 }, { "epoch": 0.36945776466248614, "grad_norm": 4.559173107147217, "learning_rate": 1.5893260639593508e-05, "loss": 0.0932, "step": 2504 }, { "epoch": 0.3696053116931022, "grad_norm": 0.9878551959991455, "learning_rate": 1.5889098392321532e-05, "loss": 0.0425, "step": 2505 }, { "epoch": 0.36975285872371816, "grad_norm": 3.37072491645813, "learning_rate": 1.588493458250907e-05, "loss": 0.1092, "step": 2506 }, { "epoch": 0.3699004057543342, "grad_norm": 1.257444977760315, "learning_rate": 1.5880769211260894e-05, "loss": 0.042, "step": 2507 }, { "epoch": 0.37004795278495023, "grad_norm": 2.199918746948242, "learning_rate": 1.587660227968219e-05, "loss": 0.0791, "step": 2508 }, { "epoch": 0.3701954998155662, "grad_norm": 0.9912312030792236, "learning_rate": 1.5872433788878566e-05, "loss": 0.0107, "step": 2509 }, { "epoch": 0.37034304684618224, "grad_norm": 2.2516462802886963, "learning_rate": 1.586826373995603e-05, "loss": 0.0759, "step": 2510 }, { "epoch": 0.3704905938767982, "grad_norm": 1.693091869354248, "learning_rate": 1.5864092134021017e-05, "loss": 0.0659, "step": 2511 }, { "epoch": 0.37063814090741426, "grad_norm": 1.32491934299469, "learning_rate": 1.5859918972180367e-05, "loss": 0.0313, "step": 2512 }, { "epoch": 0.37078568793803024, "grad_norm": 2.799652099609375, "learning_rate": 1.5855744255541334e-05, "loss": 0.0478, "step": 2513 }, { "epoch": 0.3709332349686463, "grad_norm": 4.083103656768799, "learning_rate": 1.5851567985211586e-05, "loss": 0.1405, "step": 2514 }, { "epoch": 0.37108078199926225, "grad_norm": 1.5154445171356201, "learning_rate": 1.5847390162299207e-05, "loss": 0.0492, "step": 2515 }, { "epoch": 0.3712283290298783, "grad_norm": 1.1110596656799316, "learning_rate": 1.5843210787912685e-05, "loss": 0.03, "step": 2516 }, { "epoch": 0.37137587606049427, "grad_norm": 1.641926646232605, "learning_rate": 1.5839029863160923e-05, "loss": 0.051, "step": 2517 }, { "epoch": 0.3715234230911103, "grad_norm": 1.944383978843689, "learning_rate": 1.5834847389153236e-05, "loss": 0.0352, "step": 2518 }, { "epoch": 0.3716709701217263, "grad_norm": 3.6904098987579346, "learning_rate": 1.583066336699935e-05, "loss": 0.0576, "step": 2519 }, { "epoch": 0.3718185171523423, "grad_norm": 1.6393698453903198, "learning_rate": 1.5826477797809405e-05, "loss": 0.0518, "step": 2520 }, { "epoch": 0.3718185171523423, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9576059850374065, "eval_loss": 0.07073094695806503, "eval_precision": 0.9846153846153847, "eval_recall": 0.9320388349514563, "eval_runtime": 51.2003, "eval_samples_per_second": 5.684, "eval_steps_per_second": 0.195, "step": 2520 }, { "epoch": 0.3719660641829583, "grad_norm": 3.4592244625091553, "learning_rate": 1.5822290682693944e-05, "loss": 0.1144, "step": 2521 }, { "epoch": 0.37211361121357434, "grad_norm": 3.00449538230896, "learning_rate": 1.5818102022763925e-05, "loss": 0.0848, "step": 2522 }, { "epoch": 0.3722611582441903, "grad_norm": 3.234938383102417, "learning_rate": 1.581391181913072e-05, "loss": 0.1334, "step": 2523 }, { "epoch": 0.37240870527480635, "grad_norm": 2.543311595916748, "learning_rate": 1.58097200729061e-05, "loss": 0.0895, "step": 2524 }, { "epoch": 0.37255625230542233, "grad_norm": 4.5440592765808105, "learning_rate": 1.5805526785202253e-05, "loss": 0.0592, "step": 2525 }, { "epoch": 0.37270379933603837, "grad_norm": 0.908332884311676, "learning_rate": 1.5801331957131772e-05, "loss": 0.0355, "step": 2526 }, { "epoch": 0.37285134636665435, "grad_norm": 2.0351359844207764, "learning_rate": 1.5797135589807666e-05, "loss": 0.0539, "step": 2527 }, { "epoch": 0.3729988933972704, "grad_norm": 2.9527597427368164, "learning_rate": 1.5792937684343343e-05, "loss": 0.0935, "step": 2528 }, { "epoch": 0.37314644042788636, "grad_norm": 3.41292405128479, "learning_rate": 1.5788738241852624e-05, "loss": 0.0477, "step": 2529 }, { "epoch": 0.3732939874585024, "grad_norm": 1.2462111711502075, "learning_rate": 1.5784537263449736e-05, "loss": 0.0373, "step": 2530 }, { "epoch": 0.37344153448911843, "grad_norm": 3.910452127456665, "learning_rate": 1.5780334750249314e-05, "loss": 0.0394, "step": 2531 }, { "epoch": 0.3735890815197344, "grad_norm": 2.999420166015625, "learning_rate": 1.577613070336641e-05, "loss": 0.041, "step": 2532 }, { "epoch": 0.37373662855035045, "grad_norm": 1.388188123703003, "learning_rate": 1.5771925123916464e-05, "loss": 0.0353, "step": 2533 }, { "epoch": 0.3738841755809664, "grad_norm": 2.0514490604400635, "learning_rate": 1.5767718013015333e-05, "loss": 0.0654, "step": 2534 }, { "epoch": 0.37403172261158246, "grad_norm": 2.7276864051818848, "learning_rate": 1.576350937177928e-05, "loss": 0.0585, "step": 2535 }, { "epoch": 0.37417926964219844, "grad_norm": 2.511110305786133, "learning_rate": 1.5759299201324977e-05, "loss": 0.0775, "step": 2536 }, { "epoch": 0.3743268166728145, "grad_norm": 3.799807548522949, "learning_rate": 1.57550875027695e-05, "loss": 0.1337, "step": 2537 }, { "epoch": 0.37447436370343046, "grad_norm": 2.4047887325286865, "learning_rate": 1.5750874277230324e-05, "loss": 0.0829, "step": 2538 }, { "epoch": 0.3746219107340465, "grad_norm": 2.1289265155792236, "learning_rate": 1.5746659525825335e-05, "loss": 0.0539, "step": 2539 }, { "epoch": 0.3747694577646625, "grad_norm": 1.9650177955627441, "learning_rate": 1.574244324967283e-05, "loss": 0.0533, "step": 2540 }, { "epoch": 0.3747694577646625, "eval_accuracy": 0.9710564399421129, "eval_f1": 0.9494949494949495, "eval_loss": 0.07780446112155914, "eval_precision": 0.9894736842105263, "eval_recall": 0.912621359223301, "eval_runtime": 49.0742, "eval_samples_per_second": 5.93, "eval_steps_per_second": 0.204, "step": 2540 }, { "epoch": 0.3749170047952785, "grad_norm": 1.7750149965286255, "learning_rate": 1.573822544989149e-05, "loss": 0.0329, "step": 2541 }, { "epoch": 0.3750645518258945, "grad_norm": 3.3576529026031494, "learning_rate": 1.5734006127600423e-05, "loss": 0.1352, "step": 2542 }, { "epoch": 0.3752120988565105, "grad_norm": 0.9900513887405396, "learning_rate": 1.572978528391913e-05, "loss": 0.0399, "step": 2543 }, { "epoch": 0.3753596458871265, "grad_norm": 0.6436564922332764, "learning_rate": 1.5725562919967522e-05, "loss": 0.0168, "step": 2544 }, { "epoch": 0.37550719291774254, "grad_norm": 1.0064016580581665, "learning_rate": 1.57213390368659e-05, "loss": 0.0289, "step": 2545 }, { "epoch": 0.3756547399483585, "grad_norm": 1.2798937559127808, "learning_rate": 1.5717113635734983e-05, "loss": 0.0315, "step": 2546 }, { "epoch": 0.37580228697897455, "grad_norm": 1.8434165716171265, "learning_rate": 1.5712886717695887e-05, "loss": 0.0803, "step": 2547 }, { "epoch": 0.37594983400959053, "grad_norm": 4.3248820304870605, "learning_rate": 1.5708658283870125e-05, "loss": 0.1139, "step": 2548 }, { "epoch": 0.37609738104020657, "grad_norm": 2.611905336380005, "learning_rate": 1.5704428335379624e-05, "loss": 0.0198, "step": 2549 }, { "epoch": 0.37624492807082255, "grad_norm": 1.3592013120651245, "learning_rate": 1.5700196873346695e-05, "loss": 0.0715, "step": 2550 }, { "epoch": 0.3763924751014386, "grad_norm": 2.3580543994903564, "learning_rate": 1.5695963898894077e-05, "loss": 0.0415, "step": 2551 }, { "epoch": 0.3765400221320546, "grad_norm": 1.5415029525756836, "learning_rate": 1.5691729413144884e-05, "loss": 0.035, "step": 2552 }, { "epoch": 0.3766875691626706, "grad_norm": 4.153555870056152, "learning_rate": 1.568749341722265e-05, "loss": 0.0644, "step": 2553 }, { "epoch": 0.37683511619328663, "grad_norm": 3.2045950889587402, "learning_rate": 1.5683255912251292e-05, "loss": 0.0994, "step": 2554 }, { "epoch": 0.3769826632239026, "grad_norm": 1.5448007583618164, "learning_rate": 1.5679016899355142e-05, "loss": 0.0439, "step": 2555 }, { "epoch": 0.37713021025451865, "grad_norm": 2.089601755142212, "learning_rate": 1.567477637965893e-05, "loss": 0.0682, "step": 2556 }, { "epoch": 0.37727775728513463, "grad_norm": 1.6594382524490356, "learning_rate": 1.567053435428778e-05, "loss": 0.042, "step": 2557 }, { "epoch": 0.37742530431575066, "grad_norm": 1.2571824789047241, "learning_rate": 1.566629082436722e-05, "loss": 0.0221, "step": 2558 }, { "epoch": 0.37757285134636664, "grad_norm": 2.8939342498779297, "learning_rate": 1.566204579102317e-05, "loss": 0.0808, "step": 2559 }, { "epoch": 0.3777203983769827, "grad_norm": 1.391944408416748, "learning_rate": 1.5657799255381963e-05, "loss": 0.0371, "step": 2560 }, { "epoch": 0.3777203983769827, "eval_accuracy": 0.9739507959479016, "eval_f1": 0.9552238805970149, "eval_loss": 0.07141584903001785, "eval_precision": 0.9795918367346939, "eval_recall": 0.9320388349514563, "eval_runtime": 48.8372, "eval_samples_per_second": 5.959, "eval_steps_per_second": 0.205, "step": 2560 }, { "epoch": 0.37786794540759866, "grad_norm": 0.8811967968940735, "learning_rate": 1.565355121857032e-05, "loss": 0.0085, "step": 2561 }, { "epoch": 0.3780154924382147, "grad_norm": 3.23637318611145, "learning_rate": 1.564930168171536e-05, "loss": 0.0438, "step": 2562 }, { "epoch": 0.3781630394688307, "grad_norm": 1.5000485181808472, "learning_rate": 1.5645050645944605e-05, "loss": 0.0543, "step": 2563 }, { "epoch": 0.3783105864994467, "grad_norm": 1.7088264226913452, "learning_rate": 1.564079811238597e-05, "loss": 0.0413, "step": 2564 }, { "epoch": 0.3784581335300627, "grad_norm": 1.382508397102356, "learning_rate": 1.563654408216777e-05, "loss": 0.0446, "step": 2565 }, { "epoch": 0.3786056805606787, "grad_norm": 1.1302746534347534, "learning_rate": 1.5632288556418718e-05, "loss": 0.0211, "step": 2566 }, { "epoch": 0.3787532275912947, "grad_norm": 3.420208692550659, "learning_rate": 1.562803153626792e-05, "loss": 0.0352, "step": 2567 }, { "epoch": 0.37890077462191074, "grad_norm": 1.7556750774383545, "learning_rate": 1.5623773022844882e-05, "loss": 0.0456, "step": 2568 }, { "epoch": 0.3790483216525267, "grad_norm": 5.931196689605713, "learning_rate": 1.561951301727951e-05, "loss": 0.0722, "step": 2569 }, { "epoch": 0.37919586868314276, "grad_norm": 5.869902610778809, "learning_rate": 1.5615251520702095e-05, "loss": 0.0739, "step": 2570 }, { "epoch": 0.37934341571375874, "grad_norm": 2.732017993927002, "learning_rate": 1.561098853424333e-05, "loss": 0.0894, "step": 2571 }, { "epoch": 0.37949096274437477, "grad_norm": 4.879835605621338, "learning_rate": 1.5606724059034307e-05, "loss": 0.1008, "step": 2572 }, { "epoch": 0.37963850977499075, "grad_norm": 1.5897046327590942, "learning_rate": 1.560245809620651e-05, "loss": 0.0443, "step": 2573 }, { "epoch": 0.3797860568056068, "grad_norm": 6.106495380401611, "learning_rate": 1.559819064689181e-05, "loss": 0.0394, "step": 2574 }, { "epoch": 0.3799336038362228, "grad_norm": 1.524953842163086, "learning_rate": 1.5593921712222487e-05, "loss": 0.0081, "step": 2575 }, { "epoch": 0.3800811508668388, "grad_norm": 2.946108102798462, "learning_rate": 1.55896512933312e-05, "loss": 0.1163, "step": 2576 }, { "epoch": 0.38022869789745484, "grad_norm": 4.350228786468506, "learning_rate": 1.558537939135101e-05, "loss": 0.0635, "step": 2577 }, { "epoch": 0.3803762449280708, "grad_norm": 4.12252950668335, "learning_rate": 1.5581106007415382e-05, "loss": 0.1282, "step": 2578 }, { "epoch": 0.38052379195868685, "grad_norm": 3.4724230766296387, "learning_rate": 1.5576831142658147e-05, "loss": 0.0461, "step": 2579 }, { "epoch": 0.38067133898930283, "grad_norm": 1.987731695175171, "learning_rate": 1.5572554798213554e-05, "loss": 0.1052, "step": 2580 }, { "epoch": 0.38067133898930283, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9573934837092731, "eval_loss": 0.06765548884868622, "eval_precision": 0.9896373056994818, "eval_recall": 0.9271844660194175, "eval_runtime": 48.65, "eval_samples_per_second": 5.981, "eval_steps_per_second": 0.206, "step": 2580 }, { "epoch": 0.38081888601991887, "grad_norm": 1.3547906875610352, "learning_rate": 1.556827697521623e-05, "loss": 0.0471, "step": 2581 }, { "epoch": 0.38096643305053485, "grad_norm": 1.3106006383895874, "learning_rate": 1.5563997674801202e-05, "loss": 0.0349, "step": 2582 }, { "epoch": 0.3811139800811509, "grad_norm": 1.8305901288986206, "learning_rate": 1.5559716898103894e-05, "loss": 0.0562, "step": 2583 }, { "epoch": 0.38126152711176686, "grad_norm": 1.5525482892990112, "learning_rate": 1.55554346462601e-05, "loss": 0.0223, "step": 2584 }, { "epoch": 0.3814090741423829, "grad_norm": 2.5924134254455566, "learning_rate": 1.5551150920406032e-05, "loss": 0.104, "step": 2585 }, { "epoch": 0.3815566211729989, "grad_norm": 2.3563168048858643, "learning_rate": 1.5546865721678274e-05, "loss": 0.0773, "step": 2586 }, { "epoch": 0.3817041682036149, "grad_norm": 1.7580362558364868, "learning_rate": 1.554257905121381e-05, "loss": 0.0445, "step": 2587 }, { "epoch": 0.3818517152342309, "grad_norm": 3.968413829803467, "learning_rate": 1.553829091015001e-05, "loss": 0.0664, "step": 2588 }, { "epoch": 0.38199926226484693, "grad_norm": 7.13879919052124, "learning_rate": 1.553400129962464e-05, "loss": 0.0879, "step": 2589 }, { "epoch": 0.3821468092954629, "grad_norm": 3.904264211654663, "learning_rate": 1.552971022077585e-05, "loss": 0.1273, "step": 2590 }, { "epoch": 0.38229435632607894, "grad_norm": 2.8514137268066406, "learning_rate": 1.5525417674742182e-05, "loss": 0.0477, "step": 2591 }, { "epoch": 0.3824419033566949, "grad_norm": 2.156263828277588, "learning_rate": 1.5521123662662568e-05, "loss": 0.0737, "step": 2592 }, { "epoch": 0.38258945038731096, "grad_norm": 1.1735329627990723, "learning_rate": 1.5516828185676324e-05, "loss": 0.0203, "step": 2593 }, { "epoch": 0.38273699741792694, "grad_norm": 3.178234577178955, "learning_rate": 1.5512531244923167e-05, "loss": 0.0674, "step": 2594 }, { "epoch": 0.382884544448543, "grad_norm": 1.3014397621154785, "learning_rate": 1.5508232841543184e-05, "loss": 0.0351, "step": 2595 }, { "epoch": 0.383032091479159, "grad_norm": 2.792818307876587, "learning_rate": 1.5503932976676866e-05, "loss": 0.0489, "step": 2596 }, { "epoch": 0.383179638509775, "grad_norm": 1.0767203569412231, "learning_rate": 1.5499631651465086e-05, "loss": 0.0187, "step": 2597 }, { "epoch": 0.383327185540391, "grad_norm": 1.4761302471160889, "learning_rate": 1.5495328867049102e-05, "loss": 0.0408, "step": 2598 }, { "epoch": 0.383474732571007, "grad_norm": 2.7837605476379395, "learning_rate": 1.5491024624570562e-05, "loss": 0.0509, "step": 2599 }, { "epoch": 0.38362227960162304, "grad_norm": 2.75207257270813, "learning_rate": 1.54867189251715e-05, "loss": 0.0384, "step": 2600 }, { "epoch": 0.38362227960162304, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.06409521400928497, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 48.9433, "eval_samples_per_second": 5.946, "eval_steps_per_second": 0.204, "step": 2600 }, { "epoch": 0.383769826632239, "grad_norm": 2.3735427856445312, "learning_rate": 1.5482411769994345e-05, "loss": 0.0282, "step": 2601 }, { "epoch": 0.38391737366285505, "grad_norm": 1.8034306764602661, "learning_rate": 1.5478103160181894e-05, "loss": 0.0385, "step": 2602 }, { "epoch": 0.38406492069347103, "grad_norm": 4.5873703956604, "learning_rate": 1.5473793096877342e-05, "loss": 0.0937, "step": 2603 }, { "epoch": 0.38421246772408707, "grad_norm": 3.5777881145477295, "learning_rate": 1.5469481581224274e-05, "loss": 0.0632, "step": 2604 }, { "epoch": 0.38436001475470305, "grad_norm": 5.519190311431885, "learning_rate": 1.5465168614366642e-05, "loss": 0.1205, "step": 2605 }, { "epoch": 0.3845075617853191, "grad_norm": 2.084310531616211, "learning_rate": 1.5460854197448808e-05, "loss": 0.1105, "step": 2606 }, { "epoch": 0.38465510881593507, "grad_norm": 4.9759416580200195, "learning_rate": 1.54565383316155e-05, "loss": 0.1358, "step": 2607 }, { "epoch": 0.3848026558465511, "grad_norm": 7.058456897735596, "learning_rate": 1.545222101801184e-05, "loss": 0.1129, "step": 2608 }, { "epoch": 0.3849502028771671, "grad_norm": 7.210722923278809, "learning_rate": 1.5447902257783324e-05, "loss": 0.1591, "step": 2609 }, { "epoch": 0.3850977499077831, "grad_norm": 2.3055837154388428, "learning_rate": 1.5443582052075843e-05, "loss": 0.0695, "step": 2610 }, { "epoch": 0.3852452969383991, "grad_norm": 2.112778663635254, "learning_rate": 1.543926040203567e-05, "loss": 0.016, "step": 2611 }, { "epoch": 0.38539284396901513, "grad_norm": 4.085689544677734, "learning_rate": 1.5434937308809454e-05, "loss": 0.0682, "step": 2612 }, { "epoch": 0.3855403909996311, "grad_norm": 1.930567979812622, "learning_rate": 1.543061277354423e-05, "loss": 0.0481, "step": 2613 }, { "epoch": 0.38568793803024715, "grad_norm": 2.3646345138549805, "learning_rate": 1.5426286797387415e-05, "loss": 0.0781, "step": 2614 }, { "epoch": 0.3858354850608631, "grad_norm": 1.1108511686325073, "learning_rate": 1.5421959381486814e-05, "loss": 0.0443, "step": 2615 }, { "epoch": 0.38598303209147916, "grad_norm": 1.9388258457183838, "learning_rate": 1.5417630526990613e-05, "loss": 0.0693, "step": 2616 }, { "epoch": 0.38613057912209514, "grad_norm": 2.1014270782470703, "learning_rate": 1.5413300235047374e-05, "loss": 0.0223, "step": 2617 }, { "epoch": 0.3862781261527112, "grad_norm": 2.504620313644409, "learning_rate": 1.540896850680604e-05, "loss": 0.0857, "step": 2618 }, { "epoch": 0.3864256731833272, "grad_norm": 1.3942559957504272, "learning_rate": 1.5404635343415936e-05, "loss": 0.0474, "step": 2619 }, { "epoch": 0.3865732202139432, "grad_norm": 2.7268474102020264, "learning_rate": 1.5400300746026778e-05, "loss": 0.0482, "step": 2620 }, { "epoch": 0.3865732202139432, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.06430154293775558, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 49.0159, "eval_samples_per_second": 5.937, "eval_steps_per_second": 0.204, "step": 2620 }, { "epoch": 0.3867207672445592, "grad_norm": 2.180053472518921, "learning_rate": 1.539596471578865e-05, "loss": 0.0615, "step": 2621 }, { "epoch": 0.3868683142751752, "grad_norm": 3.4556267261505127, "learning_rate": 1.539162725385202e-05, "loss": 0.0802, "step": 2622 }, { "epoch": 0.38701586130579124, "grad_norm": 1.9063140153884888, "learning_rate": 1.538728836136774e-05, "loss": 0.0955, "step": 2623 }, { "epoch": 0.3871634083364072, "grad_norm": 1.1004489660263062, "learning_rate": 1.5382948039487032e-05, "loss": 0.0403, "step": 2624 }, { "epoch": 0.38731095536702326, "grad_norm": 2.0437510013580322, "learning_rate": 1.537860628936151e-05, "loss": 0.0778, "step": 2625 }, { "epoch": 0.38745850239763924, "grad_norm": 3.103039503097534, "learning_rate": 1.5374263112143152e-05, "loss": 0.0898, "step": 2626 }, { "epoch": 0.3876060494282553, "grad_norm": 5.1956329345703125, "learning_rate": 1.5369918508984333e-05, "loss": 0.1251, "step": 2627 }, { "epoch": 0.38775359645887125, "grad_norm": 4.987479209899902, "learning_rate": 1.536557248103779e-05, "loss": 0.078, "step": 2628 }, { "epoch": 0.3879011434894873, "grad_norm": 2.1238086223602295, "learning_rate": 1.5361225029456644e-05, "loss": 0.0578, "step": 2629 }, { "epoch": 0.38804869052010327, "grad_norm": 3.2691850662231445, "learning_rate": 1.53568761553944e-05, "loss": 0.1406, "step": 2630 }, { "epoch": 0.3881962375507193, "grad_norm": 3.6067352294921875, "learning_rate": 1.535252586000493e-05, "loss": 0.0654, "step": 2631 }, { "epoch": 0.3883437845813353, "grad_norm": 1.4859058856964111, "learning_rate": 1.5348174144442484e-05, "loss": 0.0338, "step": 2632 }, { "epoch": 0.3884913316119513, "grad_norm": 3.181164026260376, "learning_rate": 1.5343821009861694e-05, "loss": 0.0542, "step": 2633 }, { "epoch": 0.3886388786425673, "grad_norm": 1.8944140672683716, "learning_rate": 1.533946645741757e-05, "loss": 0.0572, "step": 2634 }, { "epoch": 0.38878642567318333, "grad_norm": 2.441516637802124, "learning_rate": 1.5335110488265497e-05, "loss": 0.0797, "step": 2635 }, { "epoch": 0.3889339727037993, "grad_norm": 2.4447226524353027, "learning_rate": 1.533075310356123e-05, "loss": 0.0971, "step": 2636 }, { "epoch": 0.38908151973441535, "grad_norm": 6.448934078216553, "learning_rate": 1.53263943044609e-05, "loss": 0.0812, "step": 2637 }, { "epoch": 0.38922906676503133, "grad_norm": 1.7199122905731201, "learning_rate": 1.5322034092121026e-05, "loss": 0.0456, "step": 2638 }, { "epoch": 0.38937661379564736, "grad_norm": 1.9460617303848267, "learning_rate": 1.5317672467698485e-05, "loss": 0.1057, "step": 2639 }, { "epoch": 0.38952416082626334, "grad_norm": 2.730459690093994, "learning_rate": 1.5313309432350542e-05, "loss": 0.0892, "step": 2640 }, { "epoch": 0.38952416082626334, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9573934837092731, "eval_loss": 0.06760074198246002, "eval_precision": 0.9896373056994818, "eval_recall": 0.9271844660194175, "eval_runtime": 48.787, "eval_samples_per_second": 5.965, "eval_steps_per_second": 0.205, "step": 2640 }, { "epoch": 0.3896717078568794, "grad_norm": 0.8042246699333191, "learning_rate": 1.5308944987234825e-05, "loss": 0.013, "step": 2641 }, { "epoch": 0.3898192548874954, "grad_norm": 1.227597713470459, "learning_rate": 1.530457913350935e-05, "loss": 0.0212, "step": 2642 }, { "epoch": 0.3899668019181114, "grad_norm": 1.4712393283843994, "learning_rate": 1.5300211872332486e-05, "loss": 0.0303, "step": 2643 }, { "epoch": 0.39011434894872743, "grad_norm": 3.382760524749756, "learning_rate": 1.5295843204863e-05, "loss": 0.0671, "step": 2644 }, { "epoch": 0.3902618959793434, "grad_norm": 4.514768600463867, "learning_rate": 1.5291473132260014e-05, "loss": 0.1636, "step": 2645 }, { "epoch": 0.39040944300995944, "grad_norm": 1.593783974647522, "learning_rate": 1.528710165568303e-05, "loss": 0.0537, "step": 2646 }, { "epoch": 0.3905569900405754, "grad_norm": 3.488246440887451, "learning_rate": 1.5282728776291925e-05, "loss": 0.0975, "step": 2647 }, { "epoch": 0.39070453707119146, "grad_norm": 2.83547306060791, "learning_rate": 1.527835449524694e-05, "loss": 0.0507, "step": 2648 }, { "epoch": 0.39085208410180744, "grad_norm": 0.731637716293335, "learning_rate": 1.52739788137087e-05, "loss": 0.0151, "step": 2649 }, { "epoch": 0.3909996311324235, "grad_norm": 1.1026577949523926, "learning_rate": 1.526960173283818e-05, "loss": 0.0514, "step": 2650 }, { "epoch": 0.39114717816303946, "grad_norm": 3.2436211109161377, "learning_rate": 1.5265223253796753e-05, "loss": 0.0681, "step": 2651 }, { "epoch": 0.3912947251936555, "grad_norm": 1.2560198307037354, "learning_rate": 1.526084337774615e-05, "loss": 0.0482, "step": 2652 }, { "epoch": 0.39144227222427147, "grad_norm": 1.882205605506897, "learning_rate": 1.5256462105848466e-05, "loss": 0.0497, "step": 2653 }, { "epoch": 0.3915898192548875, "grad_norm": 1.3920810222625732, "learning_rate": 1.5252079439266179e-05, "loss": 0.0175, "step": 2654 }, { "epoch": 0.3917373662855035, "grad_norm": 3.5424575805664062, "learning_rate": 1.5247695379162126e-05, "loss": 0.1318, "step": 2655 }, { "epoch": 0.3918849133161195, "grad_norm": 2.783203125, "learning_rate": 1.5243309926699528e-05, "loss": 0.0881, "step": 2656 }, { "epoch": 0.3920324603467355, "grad_norm": 7.389419078826904, "learning_rate": 1.5238923083041957e-05, "loss": 0.1177, "step": 2657 }, { "epoch": 0.39218000737735154, "grad_norm": 0.9015517830848694, "learning_rate": 1.5234534849353371e-05, "loss": 0.0208, "step": 2658 }, { "epoch": 0.3923275544079675, "grad_norm": 5.897048473358154, "learning_rate": 1.5230145226798088e-05, "loss": 0.1147, "step": 2659 }, { "epoch": 0.39247510143858355, "grad_norm": 2.223234176635742, "learning_rate": 1.5225754216540793e-05, "loss": 0.0999, "step": 2660 }, { "epoch": 0.39247510143858355, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.96, "eval_loss": 0.06694638729095459, "eval_precision": 0.9896907216494846, "eval_recall": 0.9320388349514563, "eval_runtime": 48.8797, "eval_samples_per_second": 5.953, "eval_steps_per_second": 0.205, "step": 2660 }, { "epoch": 0.39262264846919953, "grad_norm": 1.2350666522979736, "learning_rate": 1.5221361819746548e-05, "loss": 0.0281, "step": 2661 }, { "epoch": 0.39277019549981557, "grad_norm": 1.5647228956222534, "learning_rate": 1.5216968037580767e-05, "loss": 0.0405, "step": 2662 }, { "epoch": 0.3929177425304316, "grad_norm": 3.7081758975982666, "learning_rate": 1.5212572871209252e-05, "loss": 0.0871, "step": 2663 }, { "epoch": 0.3930652895610476, "grad_norm": 1.599417805671692, "learning_rate": 1.5208176321798163e-05, "loss": 0.0549, "step": 2664 }, { "epoch": 0.3932128365916636, "grad_norm": 3.1416401863098145, "learning_rate": 1.5203778390514017e-05, "loss": 0.0784, "step": 2665 }, { "epoch": 0.3933603836222796, "grad_norm": 3.2620067596435547, "learning_rate": 1.5199379078523713e-05, "loss": 0.0188, "step": 2666 }, { "epoch": 0.39350793065289563, "grad_norm": 4.300639629364014, "learning_rate": 1.5194978386994508e-05, "loss": 0.094, "step": 2667 }, { "epoch": 0.3936554776835116, "grad_norm": 2.2248342037200928, "learning_rate": 1.5190576317094025e-05, "loss": 0.0496, "step": 2668 }, { "epoch": 0.39380302471412765, "grad_norm": 1.6714344024658203, "learning_rate": 1.518617286999026e-05, "loss": 0.0534, "step": 2669 }, { "epoch": 0.3939505717447436, "grad_norm": 2.5148873329162598, "learning_rate": 1.5181768046851564e-05, "loss": 0.0468, "step": 2670 }, { "epoch": 0.39409811877535966, "grad_norm": 3.705939531326294, "learning_rate": 1.5177361848846661e-05, "loss": 0.0987, "step": 2671 }, { "epoch": 0.39424566580597564, "grad_norm": 3.149794816970825, "learning_rate": 1.5172954277144638e-05, "loss": 0.0939, "step": 2672 }, { "epoch": 0.3943932128365917, "grad_norm": 7.272430419921875, "learning_rate": 1.5168545332914942e-05, "loss": 0.0599, "step": 2673 }, { "epoch": 0.39454075986720766, "grad_norm": 2.7620885372161865, "learning_rate": 1.516413501732739e-05, "loss": 0.0436, "step": 2674 }, { "epoch": 0.3946883068978237, "grad_norm": 1.8695297241210938, "learning_rate": 1.5159723331552163e-05, "loss": 0.0965, "step": 2675 }, { "epoch": 0.3948358539284397, "grad_norm": 1.864319920539856, "learning_rate": 1.5155310276759795e-05, "loss": 0.036, "step": 2676 }, { "epoch": 0.3949834009590557, "grad_norm": 6.322586536407471, "learning_rate": 1.5150895854121204e-05, "loss": 0.1052, "step": 2677 }, { "epoch": 0.3951309479896717, "grad_norm": 1.7098565101623535, "learning_rate": 1.5146480064807648e-05, "loss": 0.057, "step": 2678 }, { "epoch": 0.3952784950202877, "grad_norm": 2.460897445678711, "learning_rate": 1.5142062909990758e-05, "loss": 0.0762, "step": 2679 }, { "epoch": 0.3954260420509037, "grad_norm": 1.830008625984192, "learning_rate": 1.5137644390842533e-05, "loss": 0.055, "step": 2680 }, { "epoch": 0.3954260420509037, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9576059850374065, "eval_loss": 0.06576787680387497, "eval_precision": 0.9846153846153847, "eval_recall": 0.9320388349514563, "eval_runtime": 49.2852, "eval_samples_per_second": 5.904, "eval_steps_per_second": 0.203, "step": 2680 }, { "epoch": 0.39557358908151974, "grad_norm": 1.5389025211334229, "learning_rate": 1.5133224508535325e-05, "loss": 0.0455, "step": 2681 }, { "epoch": 0.3957211361121357, "grad_norm": 2.203688859939575, "learning_rate": 1.5128803264241851e-05, "loss": 0.063, "step": 2682 }, { "epoch": 0.39586868314275175, "grad_norm": 4.756314754486084, "learning_rate": 1.5124380659135187e-05, "loss": 0.0933, "step": 2683 }, { "epoch": 0.39601623017336773, "grad_norm": 2.137584686279297, "learning_rate": 1.5119956694388778e-05, "loss": 0.096, "step": 2684 }, { "epoch": 0.39616377720398377, "grad_norm": 2.0340592861175537, "learning_rate": 1.511553137117642e-05, "loss": 0.0742, "step": 2685 }, { "epoch": 0.3963113242345998, "grad_norm": 0.8629946708679199, "learning_rate": 1.5111104690672274e-05, "loss": 0.035, "step": 2686 }, { "epoch": 0.3964588712652158, "grad_norm": 1.8027026653289795, "learning_rate": 1.5106676654050861e-05, "loss": 0.0558, "step": 2687 }, { "epoch": 0.3966064182958318, "grad_norm": 1.8760504722595215, "learning_rate": 1.5102247262487061e-05, "loss": 0.0454, "step": 2688 }, { "epoch": 0.3967539653264478, "grad_norm": 1.0887387990951538, "learning_rate": 1.5097816517156113e-05, "loss": 0.0422, "step": 2689 }, { "epoch": 0.39690151235706383, "grad_norm": 5.999011039733887, "learning_rate": 1.509338441923362e-05, "loss": 0.1624, "step": 2690 }, { "epoch": 0.3970490593876798, "grad_norm": 1.2648141384124756, "learning_rate": 1.5088950969895534e-05, "loss": 0.0412, "step": 2691 }, { "epoch": 0.39719660641829585, "grad_norm": 1.6419755220413208, "learning_rate": 1.5084516170318181e-05, "loss": 0.0408, "step": 2692 }, { "epoch": 0.39734415344891183, "grad_norm": 1.5324550867080688, "learning_rate": 1.5080080021678223e-05, "loss": 0.042, "step": 2693 }, { "epoch": 0.39749170047952787, "grad_norm": 3.000555992126465, "learning_rate": 1.5075642525152706e-05, "loss": 0.1203, "step": 2694 }, { "epoch": 0.39763924751014385, "grad_norm": 1.6850913763046265, "learning_rate": 1.5071203681919011e-05, "loss": 0.0497, "step": 2695 }, { "epoch": 0.3977867945407599, "grad_norm": 1.7757469415664673, "learning_rate": 1.5066763493154888e-05, "loss": 0.0802, "step": 2696 }, { "epoch": 0.39793434157137586, "grad_norm": 1.6993496417999268, "learning_rate": 1.5062321960038442e-05, "loss": 0.0312, "step": 2697 }, { "epoch": 0.3980818886019919, "grad_norm": 5.520252704620361, "learning_rate": 1.5057879083748134e-05, "loss": 0.1847, "step": 2698 }, { "epoch": 0.3982294356326079, "grad_norm": 1.8554192781448364, "learning_rate": 1.5053434865462789e-05, "loss": 0.0445, "step": 2699 }, { "epoch": 0.3983769826632239, "grad_norm": 2.941020965576172, "learning_rate": 1.5048989306361567e-05, "loss": 0.0561, "step": 2700 }, { "epoch": 0.3983769826632239, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9627791563275434, "eval_loss": 0.06555081158876419, "eval_precision": 0.9847715736040609, "eval_recall": 0.941747572815534, "eval_runtime": 48.8202, "eval_samples_per_second": 5.961, "eval_steps_per_second": 0.205, "step": 2700 }, { "epoch": 0.3985245296938399, "grad_norm": 3.5163369178771973, "learning_rate": 1.5044542407624009e-05, "loss": 0.1374, "step": 2701 }, { "epoch": 0.3986720767244559, "grad_norm": 2.070209264755249, "learning_rate": 1.5040094170429997e-05, "loss": 0.1201, "step": 2702 }, { "epoch": 0.3988196237550719, "grad_norm": 1.979637861251831, "learning_rate": 1.503564459595977e-05, "loss": 0.088, "step": 2703 }, { "epoch": 0.39896717078568794, "grad_norm": 3.4138991832733154, "learning_rate": 1.5031193685393923e-05, "loss": 0.1237, "step": 2704 }, { "epoch": 0.3991147178163039, "grad_norm": 3.020209550857544, "learning_rate": 1.5026741439913405e-05, "loss": 0.1206, "step": 2705 }, { "epoch": 0.39926226484691996, "grad_norm": 2.386651039123535, "learning_rate": 1.5022287860699525e-05, "loss": 0.0304, "step": 2706 }, { "epoch": 0.399409811877536, "grad_norm": 1.7568297386169434, "learning_rate": 1.5017832948933933e-05, "loss": 0.0509, "step": 2707 }, { "epoch": 0.39955735890815197, "grad_norm": 3.0309786796569824, "learning_rate": 1.5013376705798643e-05, "loss": 0.1172, "step": 2708 }, { "epoch": 0.399704905938768, "grad_norm": 8.826024055480957, "learning_rate": 1.5008919132476022e-05, "loss": 0.0542, "step": 2709 }, { "epoch": 0.399852452969384, "grad_norm": 2.5039260387420654, "learning_rate": 1.5004460230148785e-05, "loss": 0.0307, "step": 2710 }, { "epoch": 0.4, "grad_norm": 1.3823162317276, "learning_rate": 1.5000000000000002e-05, "loss": 0.0237, "step": 2711 }, { "epoch": 0.400147547030616, "grad_norm": 1.5165594816207886, "learning_rate": 1.4995538443213093e-05, "loss": 0.042, "step": 2712 }, { "epoch": 0.40029509406123204, "grad_norm": 3.518843412399292, "learning_rate": 1.499107556097184e-05, "loss": 0.0731, "step": 2713 }, { "epoch": 0.400442641091848, "grad_norm": 2.0842881202697754, "learning_rate": 1.498661135446036e-05, "loss": 0.0528, "step": 2714 }, { "epoch": 0.40059018812246405, "grad_norm": 7.332520484924316, "learning_rate": 1.4982145824863135e-05, "loss": 0.083, "step": 2715 }, { "epoch": 0.40073773515308003, "grad_norm": 1.7217494249343872, "learning_rate": 1.4977678973364993e-05, "loss": 0.0566, "step": 2716 }, { "epoch": 0.40088528218369607, "grad_norm": 2.752350330352783, "learning_rate": 1.4973210801151112e-05, "loss": 0.1581, "step": 2717 }, { "epoch": 0.40103282921431205, "grad_norm": 4.191921710968018, "learning_rate": 1.4968741309407024e-05, "loss": 0.1244, "step": 2718 }, { "epoch": 0.4011803762449281, "grad_norm": 2.3183047771453857, "learning_rate": 1.4964270499318602e-05, "loss": 0.0843, "step": 2719 }, { "epoch": 0.40132792327554406, "grad_norm": 1.3595219850540161, "learning_rate": 1.4959798372072085e-05, "loss": 0.0701, "step": 2720 }, { "epoch": 0.40132792327554406, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9571788413098237, "eval_loss": 0.06931287795305252, "eval_precision": 0.9947643979057592, "eval_recall": 0.9223300970873787, "eval_runtime": 50.6403, "eval_samples_per_second": 5.746, "eval_steps_per_second": 0.197, "step": 2720 }, { "epoch": 0.4014754703061601, "grad_norm": 2.4391093254089355, "learning_rate": 1.4955324928854044e-05, "loss": 0.0699, "step": 2721 }, { "epoch": 0.4016230173367761, "grad_norm": 1.50640869140625, "learning_rate": 1.4950850170851412e-05, "loss": 0.0497, "step": 2722 }, { "epoch": 0.4017705643673921, "grad_norm": 1.31010901927948, "learning_rate": 1.4946374099251464e-05, "loss": 0.0245, "step": 2723 }, { "epoch": 0.4019181113980081, "grad_norm": 3.041348695755005, "learning_rate": 1.4941896715241823e-05, "loss": 0.0872, "step": 2724 }, { "epoch": 0.40206565842862413, "grad_norm": 2.8297629356384277, "learning_rate": 1.4937418020010469e-05, "loss": 0.0416, "step": 2725 }, { "epoch": 0.4022132054592401, "grad_norm": 2.1849048137664795, "learning_rate": 1.4932938014745715e-05, "loss": 0.0964, "step": 2726 }, { "epoch": 0.40236075248985614, "grad_norm": 1.8231289386749268, "learning_rate": 1.4928456700636237e-05, "loss": 0.0423, "step": 2727 }, { "epoch": 0.4025082995204721, "grad_norm": 1.6884443759918213, "learning_rate": 1.4923974078871051e-05, "loss": 0.0633, "step": 2728 }, { "epoch": 0.40265584655108816, "grad_norm": 1.4499449729919434, "learning_rate": 1.4919490150639515e-05, "loss": 0.0454, "step": 2729 }, { "epoch": 0.4028033935817042, "grad_norm": 3.217435359954834, "learning_rate": 1.4915004917131345e-05, "loss": 0.0772, "step": 2730 }, { "epoch": 0.4029509406123202, "grad_norm": 1.5667036771774292, "learning_rate": 1.4910518379536597e-05, "loss": 0.0387, "step": 2731 }, { "epoch": 0.4030984876429362, "grad_norm": 3.124260663986206, "learning_rate": 1.4906030539045669e-05, "loss": 0.0587, "step": 2732 }, { "epoch": 0.4032460346735522, "grad_norm": 6.30237340927124, "learning_rate": 1.4901541396849308e-05, "loss": 0.1184, "step": 2733 }, { "epoch": 0.4033935817041682, "grad_norm": 0.9668976664543152, "learning_rate": 1.4897050954138617e-05, "loss": 0.0225, "step": 2734 }, { "epoch": 0.4035411287347842, "grad_norm": 2.487361431121826, "learning_rate": 1.4892559212105027e-05, "loss": 0.0758, "step": 2735 }, { "epoch": 0.40368867576540024, "grad_norm": 1.5224584341049194, "learning_rate": 1.4888066171940318e-05, "loss": 0.0484, "step": 2736 }, { "epoch": 0.4038362227960162, "grad_norm": 1.6367356777191162, "learning_rate": 1.4883571834836628e-05, "loss": 0.0325, "step": 2737 }, { "epoch": 0.40398376982663226, "grad_norm": 1.6221779584884644, "learning_rate": 1.4879076201986421e-05, "loss": 0.0611, "step": 2738 }, { "epoch": 0.40413131685724824, "grad_norm": 2.1196579933166504, "learning_rate": 1.4874579274582515e-05, "loss": 0.0664, "step": 2739 }, { "epoch": 0.40427886388786427, "grad_norm": 1.1505464315414429, "learning_rate": 1.4870081053818066e-05, "loss": 0.0307, "step": 2740 }, { "epoch": 0.40427886388786427, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9580246913580247, "eval_loss": 0.06367842853069305, "eval_precision": 0.9748743718592965, "eval_recall": 0.941747572815534, "eval_runtime": 49.7526, "eval_samples_per_second": 5.849, "eval_steps_per_second": 0.201, "step": 2740 }, { "epoch": 0.40442641091848025, "grad_norm": 1.1882596015930176, "learning_rate": 1.4865581540886582e-05, "loss": 0.0425, "step": 2741 }, { "epoch": 0.4045739579490963, "grad_norm": 4.099381446838379, "learning_rate": 1.4861080736981905e-05, "loss": 0.0244, "step": 2742 }, { "epoch": 0.40472150497971227, "grad_norm": 1.7206964492797852, "learning_rate": 1.485657864329822e-05, "loss": 0.0773, "step": 2743 }, { "epoch": 0.4048690520103283, "grad_norm": 1.7561384439468384, "learning_rate": 1.4852075261030063e-05, "loss": 0.0557, "step": 2744 }, { "epoch": 0.4050165990409443, "grad_norm": 2.1177151203155518, "learning_rate": 1.48475705913723e-05, "loss": 0.0799, "step": 2745 }, { "epoch": 0.4051641460715603, "grad_norm": 2.66115403175354, "learning_rate": 1.4843064635520148e-05, "loss": 0.0652, "step": 2746 }, { "epoch": 0.4053116931021763, "grad_norm": 1.9804788827896118, "learning_rate": 1.483855739466916e-05, "loss": 0.0237, "step": 2747 }, { "epoch": 0.40545924013279233, "grad_norm": 1.6353763341903687, "learning_rate": 1.4834048870015228e-05, "loss": 0.0123, "step": 2748 }, { "epoch": 0.4056067871634083, "grad_norm": 2.2910561561584473, "learning_rate": 1.4829539062754597e-05, "loss": 0.0215, "step": 2749 }, { "epoch": 0.40575433419402435, "grad_norm": 5.663002967834473, "learning_rate": 1.4825027974083834e-05, "loss": 0.1531, "step": 2750 }, { "epoch": 0.4059018812246404, "grad_norm": 5.3702239990234375, "learning_rate": 1.4820515605199861e-05, "loss": 0.0691, "step": 2751 }, { "epoch": 0.40604942825525636, "grad_norm": 3.6045732498168945, "learning_rate": 1.4816001957299933e-05, "loss": 0.1226, "step": 2752 }, { "epoch": 0.4061969752858724, "grad_norm": 3.9299819469451904, "learning_rate": 1.4811487031581645e-05, "loss": 0.0483, "step": 2753 }, { "epoch": 0.4063445223164884, "grad_norm": 10.226605415344238, "learning_rate": 1.4806970829242933e-05, "loss": 0.0871, "step": 2754 }, { "epoch": 0.4064920693471044, "grad_norm": 3.1816301345825195, "learning_rate": 1.4802453351482067e-05, "loss": 0.0527, "step": 2755 }, { "epoch": 0.4066396163777204, "grad_norm": 6.283945083618164, "learning_rate": 1.4797934599497662e-05, "loss": 0.0505, "step": 2756 }, { "epoch": 0.4067871634083364, "grad_norm": 2.2340939044952393, "learning_rate": 1.4793414574488663e-05, "loss": 0.043, "step": 2757 }, { "epoch": 0.4069347104389524, "grad_norm": 2.5966742038726807, "learning_rate": 1.4788893277654363e-05, "loss": 0.0987, "step": 2758 }, { "epoch": 0.40708225746956844, "grad_norm": 1.3183090686798096, "learning_rate": 1.4784370710194387e-05, "loss": 0.0415, "step": 2759 }, { "epoch": 0.4072298045001844, "grad_norm": 2.7218222618103027, "learning_rate": 1.4779846873308695e-05, "loss": 0.029, "step": 2760 }, { "epoch": 0.4072298045001844, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.9601990049751243, "eval_loss": 0.06881628930568695, "eval_precision": 0.9846938775510204, "eval_recall": 0.9368932038834952, "eval_runtime": 49.6728, "eval_samples_per_second": 5.858, "eval_steps_per_second": 0.201, "step": 2760 }, { "epoch": 0.40737735153080046, "grad_norm": 4.200390815734863, "learning_rate": 1.4775321768197581e-05, "loss": 0.0787, "step": 2761 }, { "epoch": 0.40752489856141644, "grad_norm": 3.1574833393096924, "learning_rate": 1.4770795396061688e-05, "loss": 0.0555, "step": 2762 }, { "epoch": 0.4076724455920325, "grad_norm": 2.7258169651031494, "learning_rate": 1.4766267758101987e-05, "loss": 0.1024, "step": 2763 }, { "epoch": 0.40781999262264845, "grad_norm": 3.123180866241455, "learning_rate": 1.4761738855519779e-05, "loss": 0.0417, "step": 2764 }, { "epoch": 0.4079675396532645, "grad_norm": 2.4268579483032227, "learning_rate": 1.4757208689516712e-05, "loss": 0.0895, "step": 2765 }, { "epoch": 0.40811508668388047, "grad_norm": 1.3478612899780273, "learning_rate": 1.4752677261294764e-05, "loss": 0.0468, "step": 2766 }, { "epoch": 0.4082626337144965, "grad_norm": 2.6363048553466797, "learning_rate": 1.474814457205625e-05, "loss": 0.106, "step": 2767 }, { "epoch": 0.4084101807451125, "grad_norm": 0.9977749586105347, "learning_rate": 1.474361062300381e-05, "loss": 0.0295, "step": 2768 }, { "epoch": 0.4085577277757285, "grad_norm": 1.5498818159103394, "learning_rate": 1.4739075415340431e-05, "loss": 0.072, "step": 2769 }, { "epoch": 0.4087052748063445, "grad_norm": 1.334392786026001, "learning_rate": 1.4734538950269435e-05, "loss": 0.0291, "step": 2770 }, { "epoch": 0.40885282183696053, "grad_norm": 1.635050654411316, "learning_rate": 1.473000122899446e-05, "loss": 0.0316, "step": 2771 }, { "epoch": 0.4090003688675765, "grad_norm": 9.07435131072998, "learning_rate": 1.4725462252719496e-05, "loss": 0.1369, "step": 2772 }, { "epoch": 0.40914791589819255, "grad_norm": 2.3774220943450928, "learning_rate": 1.4720922022648858e-05, "loss": 0.1066, "step": 2773 }, { "epoch": 0.4092954629288086, "grad_norm": 3.460172414779663, "learning_rate": 1.4716380539987192e-05, "loss": 0.088, "step": 2774 }, { "epoch": 0.40944300995942456, "grad_norm": 3.6848864555358887, "learning_rate": 1.4711837805939479e-05, "loss": 0.061, "step": 2775 }, { "epoch": 0.4095905569900406, "grad_norm": 1.2851831912994385, "learning_rate": 1.4707293821711033e-05, "loss": 0.044, "step": 2776 }, { "epoch": 0.4097381040206566, "grad_norm": 3.6719295978546143, "learning_rate": 1.4702748588507503e-05, "loss": 0.1967, "step": 2777 }, { "epoch": 0.4098856510512726, "grad_norm": 1.8939952850341797, "learning_rate": 1.4698202107534856e-05, "loss": 0.064, "step": 2778 }, { "epoch": 0.4100331980818886, "grad_norm": 1.3944340944290161, "learning_rate": 1.4693654379999404e-05, "loss": 0.0316, "step": 2779 }, { "epoch": 0.41018074511250463, "grad_norm": 1.997323751449585, "learning_rate": 1.4689105407107787e-05, "loss": 0.0687, "step": 2780 }, { "epoch": 0.41018074511250463, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9578163771712159, "eval_loss": 0.06621849536895752, "eval_precision": 0.9796954314720813, "eval_recall": 0.9368932038834952, "eval_runtime": 50.1189, "eval_samples_per_second": 5.806, "eval_steps_per_second": 0.2, "step": 2780 }, { "epoch": 0.4103282921431206, "grad_norm": 2.3787293434143066, "learning_rate": 1.4684555190066968e-05, "loss": 0.0948, "step": 2781 }, { "epoch": 0.41047583917373665, "grad_norm": 1.8337432146072388, "learning_rate": 1.4680003730084248e-05, "loss": 0.0382, "step": 2782 }, { "epoch": 0.4106233862043526, "grad_norm": 1.2472385168075562, "learning_rate": 1.4675451028367253e-05, "loss": 0.0354, "step": 2783 }, { "epoch": 0.41077093323496866, "grad_norm": 2.4104230403900146, "learning_rate": 1.4670897086123946e-05, "loss": 0.0513, "step": 2784 }, { "epoch": 0.41091848026558464, "grad_norm": 1.518510103225708, "learning_rate": 1.4666341904562611e-05, "loss": 0.0956, "step": 2785 }, { "epoch": 0.4110660272962007, "grad_norm": 2.87170147895813, "learning_rate": 1.4661785484891857e-05, "loss": 0.1163, "step": 2786 }, { "epoch": 0.41121357432681666, "grad_norm": 1.5524059534072876, "learning_rate": 1.4657227828320637e-05, "loss": 0.057, "step": 2787 }, { "epoch": 0.4113611213574327, "grad_norm": 1.718218207359314, "learning_rate": 1.4652668936058221e-05, "loss": 0.0442, "step": 2788 }, { "epoch": 0.41150866838804867, "grad_norm": 1.5808329582214355, "learning_rate": 1.4648108809314206e-05, "loss": 0.0238, "step": 2789 }, { "epoch": 0.4116562154186647, "grad_norm": 1.707741141319275, "learning_rate": 1.4643547449298518e-05, "loss": 0.0301, "step": 2790 }, { "epoch": 0.4118037624492807, "grad_norm": 1.4694123268127441, "learning_rate": 1.4638984857221418e-05, "loss": 0.0515, "step": 2791 }, { "epoch": 0.4119513094798967, "grad_norm": 2.5600531101226807, "learning_rate": 1.4634421034293488e-05, "loss": 0.0839, "step": 2792 }, { "epoch": 0.4120988565105127, "grad_norm": 2.050499439239502, "learning_rate": 1.4629855981725628e-05, "loss": 0.0782, "step": 2793 }, { "epoch": 0.41224640354112874, "grad_norm": 2.949308156967163, "learning_rate": 1.4625289700729082e-05, "loss": 0.0979, "step": 2794 }, { "epoch": 0.4123939505717447, "grad_norm": 3.772714853286743, "learning_rate": 1.4620722192515404e-05, "loss": 0.1116, "step": 2795 }, { "epoch": 0.41254149760236075, "grad_norm": 2.132383108139038, "learning_rate": 1.4616153458296482e-05, "loss": 0.0653, "step": 2796 }, { "epoch": 0.4126890446329768, "grad_norm": 1.8351786136627197, "learning_rate": 1.4611583499284527e-05, "loss": 0.0622, "step": 2797 }, { "epoch": 0.41283659166359277, "grad_norm": 1.5286303758621216, "learning_rate": 1.4607012316692077e-05, "loss": 0.0515, "step": 2798 }, { "epoch": 0.4129841386942088, "grad_norm": 3.168844699859619, "learning_rate": 1.4602439911731996e-05, "loss": 0.0694, "step": 2799 }, { "epoch": 0.4131316857248248, "grad_norm": 1.82865571975708, "learning_rate": 1.4597866285617462e-05, "loss": 0.0577, "step": 2800 }, { "epoch": 0.4131316857248248, "eval_accuracy": 0.9725036179450073, "eval_f1": 0.9521410579345088, "eval_loss": 0.06693486869335175, "eval_precision": 0.9895287958115183, "eval_recall": 0.9174757281553398, "eval_runtime": 51.4016, "eval_samples_per_second": 5.661, "eval_steps_per_second": 0.195, "step": 2800 }, { "epoch": 0.4132792327554408, "grad_norm": 1.5293619632720947, "learning_rate": 1.4593291439561987e-05, "loss": 0.0737, "step": 2801 }, { "epoch": 0.4134267797860568, "grad_norm": 3.1326496601104736, "learning_rate": 1.4588715374779408e-05, "loss": 0.0617, "step": 2802 }, { "epoch": 0.41357432681667283, "grad_norm": 1.7669413089752197, "learning_rate": 1.4584138092483876e-05, "loss": 0.0442, "step": 2803 }, { "epoch": 0.4137218738472888, "grad_norm": 1.1524748802185059, "learning_rate": 1.4579559593889871e-05, "loss": 0.0135, "step": 2804 }, { "epoch": 0.41386942087790485, "grad_norm": 2.5814433097839355, "learning_rate": 1.4574979880212196e-05, "loss": 0.0718, "step": 2805 }, { "epoch": 0.4140169679085208, "grad_norm": 2.010032892227173, "learning_rate": 1.4570398952665982e-05, "loss": 0.0282, "step": 2806 }, { "epoch": 0.41416451493913686, "grad_norm": 1.2112640142440796, "learning_rate": 1.4565816812466667e-05, "loss": 0.061, "step": 2807 }, { "epoch": 0.41431206196975284, "grad_norm": 0.8981008529663086, "learning_rate": 1.4561233460830022e-05, "loss": 0.0089, "step": 2808 }, { "epoch": 0.4144596090003689, "grad_norm": 1.5916980504989624, "learning_rate": 1.4556648898972137e-05, "loss": 0.055, "step": 2809 }, { "epoch": 0.41460715603098486, "grad_norm": 2.2386882305145264, "learning_rate": 1.4552063128109423e-05, "loss": 0.0895, "step": 2810 }, { "epoch": 0.4147547030616009, "grad_norm": 1.1865907907485962, "learning_rate": 1.4547476149458615e-05, "loss": 0.0327, "step": 2811 }, { "epoch": 0.4149022500922169, "grad_norm": 2.549903631210327, "learning_rate": 1.4542887964236756e-05, "loss": 0.0619, "step": 2812 }, { "epoch": 0.4150497971228329, "grad_norm": 2.8767945766448975, "learning_rate": 1.453829857366123e-05, "loss": 0.0844, "step": 2813 }, { "epoch": 0.4151973441534489, "grad_norm": 4.882362365722656, "learning_rate": 1.453370797894972e-05, "loss": 0.1263, "step": 2814 }, { "epoch": 0.4153448911840649, "grad_norm": 0.7119975090026855, "learning_rate": 1.4529116181320242e-05, "loss": 0.0077, "step": 2815 }, { "epoch": 0.4154924382146809, "grad_norm": 1.7586188316345215, "learning_rate": 1.4524523181991127e-05, "loss": 0.0559, "step": 2816 }, { "epoch": 0.41563998524529694, "grad_norm": 1.647408366203308, "learning_rate": 1.4519928982181021e-05, "loss": 0.0582, "step": 2817 }, { "epoch": 0.415787532275913, "grad_norm": 1.6541781425476074, "learning_rate": 1.4515333583108896e-05, "loss": 0.0387, "step": 2818 }, { "epoch": 0.41593507930652895, "grad_norm": 1.3552577495574951, "learning_rate": 1.4510736985994033e-05, "loss": 0.0317, "step": 2819 }, { "epoch": 0.416082626337145, "grad_norm": 1.2799692153930664, "learning_rate": 1.4506139192056045e-05, "loss": 0.0286, "step": 2820 }, { "epoch": 0.416082626337145, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9580246913580247, "eval_loss": 0.06240375339984894, "eval_precision": 0.9748743718592965, "eval_recall": 0.941747572815534, "eval_runtime": 50.6665, "eval_samples_per_second": 5.743, "eval_steps_per_second": 0.197, "step": 2820 }, { "epoch": 0.41623017336776097, "grad_norm": 1.1045743227005005, "learning_rate": 1.4501540202514845e-05, "loss": 0.0199, "step": 2821 }, { "epoch": 0.416377720398377, "grad_norm": 2.379251003265381, "learning_rate": 1.449694001859068e-05, "loss": 0.1016, "step": 2822 }, { "epoch": 0.416525267428993, "grad_norm": 1.0541225671768188, "learning_rate": 1.4492338641504098e-05, "loss": 0.0147, "step": 2823 }, { "epoch": 0.416672814459609, "grad_norm": 2.6938068866729736, "learning_rate": 1.4487736072475975e-05, "loss": 0.0518, "step": 2824 }, { "epoch": 0.416820361490225, "grad_norm": 6.698129177093506, "learning_rate": 1.4483132312727501e-05, "loss": 0.1062, "step": 2825 }, { "epoch": 0.41696790852084104, "grad_norm": 2.626894235610962, "learning_rate": 1.4478527363480177e-05, "loss": 0.1116, "step": 2826 }, { "epoch": 0.417115455551457, "grad_norm": 1.702731728553772, "learning_rate": 1.447392122595583e-05, "loss": 0.0203, "step": 2827 }, { "epoch": 0.41726300258207305, "grad_norm": 0.9909853935241699, "learning_rate": 1.4469313901376587e-05, "loss": 0.0205, "step": 2828 }, { "epoch": 0.41741054961268903, "grad_norm": 3.259319543838501, "learning_rate": 1.4464705390964904e-05, "loss": 0.0159, "step": 2829 }, { "epoch": 0.41755809664330507, "grad_norm": 1.2350735664367676, "learning_rate": 1.4460095695943546e-05, "loss": 0.0122, "step": 2830 }, { "epoch": 0.41770564367392105, "grad_norm": 4.924873352050781, "learning_rate": 1.445548481753559e-05, "loss": 0.0944, "step": 2831 }, { "epoch": 0.4178531907045371, "grad_norm": 3.3702070713043213, "learning_rate": 1.445087275696443e-05, "loss": 0.1016, "step": 2832 }, { "epoch": 0.41800073773515306, "grad_norm": 2.2954537868499756, "learning_rate": 1.444625951545377e-05, "loss": 0.0539, "step": 2833 }, { "epoch": 0.4181482847657691, "grad_norm": 0.8166601061820984, "learning_rate": 1.444164509422764e-05, "loss": 0.0181, "step": 2834 }, { "epoch": 0.4182958317963851, "grad_norm": 6.972323417663574, "learning_rate": 1.4437029494510366e-05, "loss": 0.1693, "step": 2835 }, { "epoch": 0.4184433788270011, "grad_norm": 2.469250202178955, "learning_rate": 1.4432412717526594e-05, "loss": 0.0677, "step": 2836 }, { "epoch": 0.4185909258576171, "grad_norm": 3.437309741973877, "learning_rate": 1.4427794764501285e-05, "loss": 0.0856, "step": 2837 }, { "epoch": 0.4187384728882331, "grad_norm": 2.0519073009490967, "learning_rate": 1.442317563665971e-05, "loss": 0.0551, "step": 2838 }, { "epoch": 0.4188860199188491, "grad_norm": 0.8575913310050964, "learning_rate": 1.441855533522745e-05, "loss": 0.0127, "step": 2839 }, { "epoch": 0.41903356694946514, "grad_norm": 5.212174415588379, "learning_rate": 1.4413933861430394e-05, "loss": 0.1144, "step": 2840 }, { "epoch": 0.41903356694946514, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9625935162094763, "eval_loss": 0.06470693647861481, "eval_precision": 0.9897435897435898, "eval_recall": 0.9368932038834952, "eval_runtime": 50.2554, "eval_samples_per_second": 5.79, "eval_steps_per_second": 0.199, "step": 2840 }, { "epoch": 0.4191811139800812, "grad_norm": 2.56762957572937, "learning_rate": 1.4409311216494755e-05, "loss": 0.0178, "step": 2841 }, { "epoch": 0.41932866101069716, "grad_norm": 3.666867256164551, "learning_rate": 1.4404687401647044e-05, "loss": 0.1174, "step": 2842 }, { "epoch": 0.4194762080413132, "grad_norm": 5.381360054016113, "learning_rate": 1.4400062418114085e-05, "loss": 0.1072, "step": 2843 }, { "epoch": 0.41962375507192917, "grad_norm": 4.543903350830078, "learning_rate": 1.4395436267123017e-05, "loss": 0.2188, "step": 2844 }, { "epoch": 0.4197713021025452, "grad_norm": 3.5416975021362305, "learning_rate": 1.4390808949901285e-05, "loss": 0.0674, "step": 2845 }, { "epoch": 0.4199188491331612, "grad_norm": 4.192496299743652, "learning_rate": 1.4386180467676645e-05, "loss": 0.1477, "step": 2846 }, { "epoch": 0.4200663961637772, "grad_norm": 3.9027862548828125, "learning_rate": 1.4381550821677158e-05, "loss": 0.0729, "step": 2847 }, { "epoch": 0.4202139431943932, "grad_norm": 1.3977766036987305, "learning_rate": 1.4376920013131199e-05, "loss": 0.0419, "step": 2848 }, { "epoch": 0.42036149022500924, "grad_norm": 1.7689125537872314, "learning_rate": 1.4372288043267453e-05, "loss": 0.0576, "step": 2849 }, { "epoch": 0.4205090372556252, "grad_norm": 1.533967137336731, "learning_rate": 1.43676549133149e-05, "loss": 0.055, "step": 2850 }, { "epoch": 0.42065658428624125, "grad_norm": 1.7388272285461426, "learning_rate": 1.4363020624502851e-05, "loss": 0.0225, "step": 2851 }, { "epoch": 0.42080413131685723, "grad_norm": 1.160990595817566, "learning_rate": 1.4358385178060898e-05, "loss": 0.0577, "step": 2852 }, { "epoch": 0.42095167834747327, "grad_norm": 1.5778510570526123, "learning_rate": 1.435374857521896e-05, "loss": 0.0506, "step": 2853 }, { "epoch": 0.42109922537808925, "grad_norm": 1.1069244146347046, "learning_rate": 1.4349110817207254e-05, "loss": 0.0291, "step": 2854 }, { "epoch": 0.4212467724087053, "grad_norm": 3.0894153118133545, "learning_rate": 1.4344471905256305e-05, "loss": 0.0936, "step": 2855 }, { "epoch": 0.42139431943932126, "grad_norm": 4.4461894035339355, "learning_rate": 1.4339831840596948e-05, "loss": 0.0797, "step": 2856 }, { "epoch": 0.4215418664699373, "grad_norm": 1.1861770153045654, "learning_rate": 1.4335190624460311e-05, "loss": 0.0258, "step": 2857 }, { "epoch": 0.4216894135005533, "grad_norm": 2.4359805583953857, "learning_rate": 1.4330548258077845e-05, "loss": 0.0449, "step": 2858 }, { "epoch": 0.4218369605311693, "grad_norm": 1.5920735597610474, "learning_rate": 1.4325904742681297e-05, "loss": 0.0312, "step": 2859 }, { "epoch": 0.4219845075617853, "grad_norm": 2.276762008666992, "learning_rate": 1.4321260079502721e-05, "loss": 0.0424, "step": 2860 }, { "epoch": 0.4219845075617853, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9631449631449631, "eval_loss": 0.062026362866163254, "eval_precision": 0.9751243781094527, "eval_recall": 0.9514563106796117, "eval_runtime": 49.6881, "eval_samples_per_second": 5.857, "eval_steps_per_second": 0.201, "step": 2860 }, { "epoch": 0.42213205459240133, "grad_norm": 3.108128786087036, "learning_rate": 1.4316614269774472e-05, "loss": 0.0525, "step": 2861 }, { "epoch": 0.42227960162301736, "grad_norm": 1.6480109691619873, "learning_rate": 1.4311967314729209e-05, "loss": 0.0913, "step": 2862 }, { "epoch": 0.42242714865363334, "grad_norm": 3.100233316421509, "learning_rate": 1.4307319215599904e-05, "loss": 0.0787, "step": 2863 }, { "epoch": 0.4225746956842494, "grad_norm": 2.273146390914917, "learning_rate": 1.430266997361982e-05, "loss": 0.0709, "step": 2864 }, { "epoch": 0.42272224271486536, "grad_norm": 4.747122287750244, "learning_rate": 1.4298019590022534e-05, "loss": 0.0431, "step": 2865 }, { "epoch": 0.4228697897454814, "grad_norm": 2.219475030899048, "learning_rate": 1.4293368066041917e-05, "loss": 0.0316, "step": 2866 }, { "epoch": 0.4230173367760974, "grad_norm": 1.3505600690841675, "learning_rate": 1.4288715402912152e-05, "loss": 0.0382, "step": 2867 }, { "epoch": 0.4231648838067134, "grad_norm": 0.7899117469787598, "learning_rate": 1.4284061601867715e-05, "loss": 0.0329, "step": 2868 }, { "epoch": 0.4233124308373294, "grad_norm": 5.006072521209717, "learning_rate": 1.427940666414339e-05, "loss": 0.143, "step": 2869 }, { "epoch": 0.4234599778679454, "grad_norm": 2.4051613807678223, "learning_rate": 1.427475059097426e-05, "loss": 0.0283, "step": 2870 }, { "epoch": 0.4236075248985614, "grad_norm": 3.2632482051849365, "learning_rate": 1.4270093383595707e-05, "loss": 0.0924, "step": 2871 }, { "epoch": 0.42375507192917744, "grad_norm": 2.681210994720459, "learning_rate": 1.426543504324342e-05, "loss": 0.0383, "step": 2872 }, { "epoch": 0.4239026189597934, "grad_norm": 2.535755157470703, "learning_rate": 1.4260775571153387e-05, "loss": 0.0873, "step": 2873 }, { "epoch": 0.42405016599040946, "grad_norm": 2.574519634246826, "learning_rate": 1.4256114968561891e-05, "loss": 0.0943, "step": 2874 }, { "epoch": 0.42419771302102544, "grad_norm": 2.3423478603363037, "learning_rate": 1.425145323670552e-05, "loss": 0.0691, "step": 2875 }, { "epoch": 0.42434526005164147, "grad_norm": 1.4635498523712158, "learning_rate": 1.4246790376821158e-05, "loss": 0.0346, "step": 2876 }, { "epoch": 0.42449280708225745, "grad_norm": 7.789509296417236, "learning_rate": 1.4242126390145998e-05, "loss": 0.0369, "step": 2877 }, { "epoch": 0.4246403541128735, "grad_norm": 2.2766497135162354, "learning_rate": 1.4237461277917514e-05, "loss": 0.0564, "step": 2878 }, { "epoch": 0.42478790114348947, "grad_norm": 3.435554027557373, "learning_rate": 1.4232795041373497e-05, "loss": 0.0921, "step": 2879 }, { "epoch": 0.4249354481741055, "grad_norm": 4.713402271270752, "learning_rate": 1.4228127681752026e-05, "loss": 0.1575, "step": 2880 }, { "epoch": 0.4249354481741055, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.96, "eval_loss": 0.06505037099123001, "eval_precision": 0.9896907216494846, "eval_recall": 0.9320388349514563, "eval_runtime": 49.767, "eval_samples_per_second": 5.847, "eval_steps_per_second": 0.201, "step": 2880 }, { "epoch": 0.4250829952047215, "grad_norm": 1.5428632497787476, "learning_rate": 1.422345920029148e-05, "loss": 0.0855, "step": 2881 }, { "epoch": 0.4252305422353375, "grad_norm": 1.876064419746399, "learning_rate": 1.4218789598230536e-05, "loss": 0.0746, "step": 2882 }, { "epoch": 0.4253780892659535, "grad_norm": 0.9815390110015869, "learning_rate": 1.4214118876808167e-05, "loss": 0.0269, "step": 2883 }, { "epoch": 0.42552563629656953, "grad_norm": 1.062477707862854, "learning_rate": 1.4209447037263653e-05, "loss": 0.0264, "step": 2884 }, { "epoch": 0.42567318332718557, "grad_norm": 2.3003664016723633, "learning_rate": 1.4204774080836547e-05, "loss": 0.0859, "step": 2885 }, { "epoch": 0.42582073035780155, "grad_norm": 1.700762391090393, "learning_rate": 1.4200100008766726e-05, "loss": 0.0649, "step": 2886 }, { "epoch": 0.4259682773884176, "grad_norm": 1.427526593208313, "learning_rate": 1.4195424822294346e-05, "loss": 0.0638, "step": 2887 }, { "epoch": 0.42611582441903356, "grad_norm": 3.6006712913513184, "learning_rate": 1.4190748522659865e-05, "loss": 0.041, "step": 2888 }, { "epoch": 0.4262633714496496, "grad_norm": 2.776874542236328, "learning_rate": 1.418607111110403e-05, "loss": 0.0568, "step": 2889 }, { "epoch": 0.4264109184802656, "grad_norm": 2.376478433609009, "learning_rate": 1.418139258886789e-05, "loss": 0.0769, "step": 2890 }, { "epoch": 0.4265584655108816, "grad_norm": 1.7201793193817139, "learning_rate": 1.4176712957192791e-05, "loss": 0.0825, "step": 2891 }, { "epoch": 0.4267060125414976, "grad_norm": 2.0256354808807373, "learning_rate": 1.4172032217320362e-05, "loss": 0.0468, "step": 2892 }, { "epoch": 0.42685355957211363, "grad_norm": 3.5744376182556152, "learning_rate": 1.416735037049253e-05, "loss": 0.0401, "step": 2893 }, { "epoch": 0.4270011066027296, "grad_norm": 1.0371427536010742, "learning_rate": 1.4162667417951527e-05, "loss": 0.0551, "step": 2894 }, { "epoch": 0.42714865363334564, "grad_norm": 1.2663190364837646, "learning_rate": 1.4157983360939866e-05, "loss": 0.0385, "step": 2895 }, { "epoch": 0.4272962006639616, "grad_norm": 1.9672170877456665, "learning_rate": 1.4153298200700356e-05, "loss": 0.068, "step": 2896 }, { "epoch": 0.42744374769457766, "grad_norm": 2.2783992290496826, "learning_rate": 1.4148611938476097e-05, "loss": 0.0578, "step": 2897 }, { "epoch": 0.42759129472519364, "grad_norm": 1.3381931781768799, "learning_rate": 1.414392457551049e-05, "loss": 0.0466, "step": 2898 }, { "epoch": 0.4277388417558097, "grad_norm": 6.6983113288879395, "learning_rate": 1.413923611304722e-05, "loss": 0.1475, "step": 2899 }, { "epoch": 0.42788638878642565, "grad_norm": 1.3276938199996948, "learning_rate": 1.413454655233026e-05, "loss": 0.0651, "step": 2900 }, { "epoch": 0.42788638878642565, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.9603960396039604, "eval_loss": 0.06203996762633324, "eval_precision": 0.9797979797979798, "eval_recall": 0.941747572815534, "eval_runtime": 50.6988, "eval_samples_per_second": 5.74, "eval_steps_per_second": 0.197, "step": 2900 }, { "epoch": 0.4280339358170417, "grad_norm": 2.4186997413635254, "learning_rate": 1.4129855894603885e-05, "loss": 0.0376, "step": 2901 }, { "epoch": 0.42818148284765767, "grad_norm": 3.0510151386260986, "learning_rate": 1.4125164141112656e-05, "loss": 0.0939, "step": 2902 }, { "epoch": 0.4283290298782737, "grad_norm": 2.0167698860168457, "learning_rate": 1.4120471293101425e-05, "loss": 0.0865, "step": 2903 }, { "epoch": 0.4284765769088897, "grad_norm": 1.6645357608795166, "learning_rate": 1.411577735181533e-05, "loss": 0.0358, "step": 2904 }, { "epoch": 0.4286241239395057, "grad_norm": 2.0547103881835938, "learning_rate": 1.411108231849981e-05, "loss": 0.0735, "step": 2905 }, { "epoch": 0.4287716709701217, "grad_norm": 1.8210386037826538, "learning_rate": 1.4106386194400583e-05, "loss": 0.0397, "step": 2906 }, { "epoch": 0.42891921800073773, "grad_norm": 2.793138027191162, "learning_rate": 1.4101688980763659e-05, "loss": 0.1364, "step": 2907 }, { "epoch": 0.42906676503135377, "grad_norm": 1.1232080459594727, "learning_rate": 1.4096990678835343e-05, "loss": 0.0413, "step": 2908 }, { "epoch": 0.42921431206196975, "grad_norm": 1.7142208814620972, "learning_rate": 1.409229128986222e-05, "loss": 0.0477, "step": 2909 }, { "epoch": 0.4293618590925858, "grad_norm": 1.2409260272979736, "learning_rate": 1.408759081509117e-05, "loss": 0.0204, "step": 2910 }, { "epoch": 0.42950940612320176, "grad_norm": 2.550419569015503, "learning_rate": 1.4082889255769359e-05, "loss": 0.1127, "step": 2911 }, { "epoch": 0.4296569531538178, "grad_norm": 1.4273427724838257, "learning_rate": 1.4078186613144238e-05, "loss": 0.0419, "step": 2912 }, { "epoch": 0.4298045001844338, "grad_norm": 2.231109857559204, "learning_rate": 1.4073482888463552e-05, "loss": 0.0785, "step": 2913 }, { "epoch": 0.4299520472150498, "grad_norm": 7.017048358917236, "learning_rate": 1.4068778082975325e-05, "loss": 0.0647, "step": 2914 }, { "epoch": 0.4300995942456658, "grad_norm": 10.177654266357422, "learning_rate": 1.4064072197927876e-05, "loss": 0.1263, "step": 2915 }, { "epoch": 0.43024714127628183, "grad_norm": 2.028869152069092, "learning_rate": 1.4059365234569804e-05, "loss": 0.0555, "step": 2916 }, { "epoch": 0.4303946883068978, "grad_norm": 1.6957443952560425, "learning_rate": 1.4054657194149996e-05, "loss": 0.0398, "step": 2917 }, { "epoch": 0.43054223533751385, "grad_norm": 1.7654411792755127, "learning_rate": 1.4049948077917626e-05, "loss": 0.0368, "step": 2918 }, { "epoch": 0.4306897823681298, "grad_norm": 2.582144021987915, "learning_rate": 1.404523788712215e-05, "loss": 0.0571, "step": 2919 }, { "epoch": 0.43083732939874586, "grad_norm": 14.307313919067383, "learning_rate": 1.4040526623013317e-05, "loss": 0.1524, "step": 2920 }, { "epoch": 0.43083732939874586, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05957566201686859, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 48.9509, "eval_samples_per_second": 5.945, "eval_steps_per_second": 0.204, "step": 2920 }, { "epoch": 0.43098487642936184, "grad_norm": 2.7465403079986572, "learning_rate": 1.4035814286841152e-05, "loss": 0.0548, "step": 2921 }, { "epoch": 0.4311324234599779, "grad_norm": 8.710468292236328, "learning_rate": 1.4031100879855967e-05, "loss": 0.1184, "step": 2922 }, { "epoch": 0.43127997049059386, "grad_norm": 1.2768714427947998, "learning_rate": 1.4026386403308363e-05, "loss": 0.023, "step": 2923 }, { "epoch": 0.4314275175212099, "grad_norm": 3.4529173374176025, "learning_rate": 1.402167085844922e-05, "loss": 0.0994, "step": 2924 }, { "epoch": 0.43157506455182587, "grad_norm": 1.1402685642242432, "learning_rate": 1.4016954246529697e-05, "loss": 0.0168, "step": 2925 }, { "epoch": 0.4317226115824419, "grad_norm": 1.4269877672195435, "learning_rate": 1.4012236568801245e-05, "loss": 0.0106, "step": 2926 }, { "epoch": 0.4318701586130579, "grad_norm": 7.651778221130371, "learning_rate": 1.4007517826515598e-05, "loss": 0.1534, "step": 2927 }, { "epoch": 0.4320177056436739, "grad_norm": 2.558671236038208, "learning_rate": 1.4002798020924762e-05, "loss": 0.0688, "step": 2928 }, { "epoch": 0.43216525267428996, "grad_norm": 2.7218410968780518, "learning_rate": 1.3998077153281035e-05, "loss": 0.0471, "step": 2929 }, { "epoch": 0.43231279970490594, "grad_norm": 4.35391092300415, "learning_rate": 1.3993355224836997e-05, "loss": 0.0529, "step": 2930 }, { "epoch": 0.43246034673552197, "grad_norm": 2.4263267517089844, "learning_rate": 1.39886322368455e-05, "loss": 0.0679, "step": 2931 }, { "epoch": 0.43260789376613795, "grad_norm": 1.5176002979278564, "learning_rate": 1.3983908190559688e-05, "loss": 0.0404, "step": 2932 }, { "epoch": 0.432755440796754, "grad_norm": 3.3419029712677, "learning_rate": 1.3979183087232974e-05, "loss": 0.0812, "step": 2933 }, { "epoch": 0.43290298782736997, "grad_norm": 3.8462107181549072, "learning_rate": 1.3974456928119072e-05, "loss": 0.0801, "step": 2934 }, { "epoch": 0.433050534857986, "grad_norm": 4.845739364624023, "learning_rate": 1.3969729714471948e-05, "loss": 0.0742, "step": 2935 }, { "epoch": 0.433198081888602, "grad_norm": 2.009845495223999, "learning_rate": 1.3965001447545867e-05, "loss": 0.0655, "step": 2936 }, { "epoch": 0.433345628919218, "grad_norm": 3.665219783782959, "learning_rate": 1.396027212859537e-05, "loss": 0.0462, "step": 2937 }, { "epoch": 0.433493175949834, "grad_norm": 5.407234191894531, "learning_rate": 1.395554175887528e-05, "loss": 0.0695, "step": 2938 }, { "epoch": 0.43364072298045003, "grad_norm": 2.5863547325134277, "learning_rate": 1.3950810339640689e-05, "loss": 0.0912, "step": 2939 }, { "epoch": 0.433788270011066, "grad_norm": 2.1580841541290283, "learning_rate": 1.3946077872146973e-05, "loss": 0.0547, "step": 2940 }, { "epoch": 0.433788270011066, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05974743515253067, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 49.3943, "eval_samples_per_second": 5.891, "eval_steps_per_second": 0.202, "step": 2940 }, { "epoch": 0.43393581704168205, "grad_norm": 0.9929150342941284, "learning_rate": 1.3941344357649791e-05, "loss": 0.0071, "step": 2941 }, { "epoch": 0.43408336407229803, "grad_norm": 1.4238746166229248, "learning_rate": 1.3936609797405071e-05, "loss": 0.0637, "step": 2942 }, { "epoch": 0.43423091110291406, "grad_norm": 2.932892084121704, "learning_rate": 1.3931874192669023e-05, "loss": 0.0788, "step": 2943 }, { "epoch": 0.43437845813353004, "grad_norm": 0.5835142135620117, "learning_rate": 1.3927137544698139e-05, "loss": 0.0103, "step": 2944 }, { "epoch": 0.4345260051641461, "grad_norm": 4.464839935302734, "learning_rate": 1.3922399854749176e-05, "loss": 0.0711, "step": 2945 }, { "epoch": 0.43467355219476206, "grad_norm": 1.7374027967453003, "learning_rate": 1.3917661124079175e-05, "loss": 0.0633, "step": 2946 }, { "epoch": 0.4348210992253781, "grad_norm": 1.175042748451233, "learning_rate": 1.3912921353945454e-05, "loss": 0.0125, "step": 2947 }, { "epoch": 0.4349686462559941, "grad_norm": 2.3182601928710938, "learning_rate": 1.390818054560561e-05, "loss": 0.069, "step": 2948 }, { "epoch": 0.4351161932866101, "grad_norm": 1.2398149967193604, "learning_rate": 1.39034387003175e-05, "loss": 0.0259, "step": 2949 }, { "epoch": 0.4352637403172261, "grad_norm": 1.1858731508255005, "learning_rate": 1.389869581933927e-05, "loss": 0.0338, "step": 2950 }, { "epoch": 0.4354112873478421, "grad_norm": 2.8445255756378174, "learning_rate": 1.3893951903929342e-05, "loss": 0.0427, "step": 2951 }, { "epoch": 0.43555883437845816, "grad_norm": 1.1522228717803955, "learning_rate": 1.3889206955346403e-05, "loss": 0.024, "step": 2952 }, { "epoch": 0.43570638140907414, "grad_norm": 1.824994444847107, "learning_rate": 1.388446097484942e-05, "loss": 0.0487, "step": 2953 }, { "epoch": 0.4358539284396902, "grad_norm": 0.9870152473449707, "learning_rate": 1.3879713963697633e-05, "loss": 0.0144, "step": 2954 }, { "epoch": 0.43600147547030615, "grad_norm": 4.8834943771362305, "learning_rate": 1.3874965923150561e-05, "loss": 0.1412, "step": 2955 }, { "epoch": 0.4361490225009222, "grad_norm": 3.2316534519195557, "learning_rate": 1.387021685446798e-05, "loss": 0.0965, "step": 2956 }, { "epoch": 0.43629656953153817, "grad_norm": 1.2645280361175537, "learning_rate": 1.3865466758909955e-05, "loss": 0.0592, "step": 2957 }, { "epoch": 0.4364441165621542, "grad_norm": 1.4252861738204956, "learning_rate": 1.3860715637736817e-05, "loss": 0.0399, "step": 2958 }, { "epoch": 0.4365916635927702, "grad_norm": 1.0654693841934204, "learning_rate": 1.385596349220917e-05, "loss": 0.0067, "step": 2959 }, { "epoch": 0.4367392106233862, "grad_norm": 0.529829204082489, "learning_rate": 1.3851210323587887e-05, "loss": 0.0064, "step": 2960 }, { "epoch": 0.4367392106233862, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.9601990049751243, "eval_loss": 0.060430482029914856, "eval_precision": 0.9846938775510204, "eval_recall": 0.9368932038834952, "eval_runtime": 49.0112, "eval_samples_per_second": 5.937, "eval_steps_per_second": 0.204, "step": 2960 }, { "epoch": 0.4368867576540022, "grad_norm": 2.846092462539673, "learning_rate": 1.3846456133134116e-05, "loss": 0.0428, "step": 2961 }, { "epoch": 0.43703430468461824, "grad_norm": 4.003254413604736, "learning_rate": 1.384170092210928e-05, "loss": 0.0631, "step": 2962 }, { "epoch": 0.4371818517152342, "grad_norm": 5.032021999359131, "learning_rate": 1.383694469177506e-05, "loss": 0.0974, "step": 2963 }, { "epoch": 0.43732939874585025, "grad_norm": 2.4210615158081055, "learning_rate": 1.3832187443393419e-05, "loss": 0.0679, "step": 2964 }, { "epoch": 0.43747694577646623, "grad_norm": 2.187182664871216, "learning_rate": 1.3827429178226586e-05, "loss": 0.0648, "step": 2965 }, { "epoch": 0.43762449280708227, "grad_norm": 1.2246345281600952, "learning_rate": 1.3822669897537062e-05, "loss": 0.0423, "step": 2966 }, { "epoch": 0.43777203983769825, "grad_norm": 1.3826806545257568, "learning_rate": 1.3817909602587614e-05, "loss": 0.0191, "step": 2967 }, { "epoch": 0.4379195868683143, "grad_norm": 1.8679994344711304, "learning_rate": 1.381314829464128e-05, "loss": 0.0448, "step": 2968 }, { "epoch": 0.43806713389893026, "grad_norm": 1.890270709991455, "learning_rate": 1.3808385974961365e-05, "loss": 0.0442, "step": 2969 }, { "epoch": 0.4382146809295463, "grad_norm": 0.7756760716438293, "learning_rate": 1.3803622644811447e-05, "loss": 0.0127, "step": 2970 }, { "epoch": 0.4383622279601623, "grad_norm": 2.30149245262146, "learning_rate": 1.3798858305455364e-05, "loss": 0.0669, "step": 2971 }, { "epoch": 0.4385097749907783, "grad_norm": 1.979562520980835, "learning_rate": 1.3794092958157235e-05, "loss": 0.0447, "step": 2972 }, { "epoch": 0.43865732202139435, "grad_norm": 2.545046091079712, "learning_rate": 1.378932660418143e-05, "loss": 0.0711, "step": 2973 }, { "epoch": 0.4388048690520103, "grad_norm": 4.241297721862793, "learning_rate": 1.3784559244792601e-05, "loss": 0.0886, "step": 2974 }, { "epoch": 0.43895241608262636, "grad_norm": 5.401245594024658, "learning_rate": 1.3779790881255654e-05, "loss": 0.0894, "step": 2975 }, { "epoch": 0.43909996311324234, "grad_norm": 1.664760708808899, "learning_rate": 1.3775021514835772e-05, "loss": 0.0442, "step": 2976 }, { "epoch": 0.4392475101438584, "grad_norm": 1.8569482564926147, "learning_rate": 1.3770251146798401e-05, "loss": 0.038, "step": 2977 }, { "epoch": 0.43939505717447436, "grad_norm": 5.758735179901123, "learning_rate": 1.3765479778409243e-05, "loss": 0.1074, "step": 2978 }, { "epoch": 0.4395426042050904, "grad_norm": 1.310660481452942, "learning_rate": 1.3760707410934285e-05, "loss": 0.0289, "step": 2979 }, { "epoch": 0.4396901512357064, "grad_norm": 3.755596160888672, "learning_rate": 1.3755934045639761e-05, "loss": 0.075, "step": 2980 }, { "epoch": 0.4396901512357064, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9582309582309583, "eval_loss": 0.05913817137479782, "eval_precision": 0.9701492537313433, "eval_recall": 0.9466019417475728, "eval_runtime": 49.4704, "eval_samples_per_second": 5.882, "eval_steps_per_second": 0.202, "step": 2980 }, { "epoch": 0.4398376982663224, "grad_norm": 6.123476028442383, "learning_rate": 1.3751159683792178e-05, "loss": 0.1743, "step": 2981 }, { "epoch": 0.4399852452969384, "grad_norm": 2.4153153896331787, "learning_rate": 1.3746384326658304e-05, "loss": 0.0566, "step": 2982 }, { "epoch": 0.4401327923275544, "grad_norm": 3.284078598022461, "learning_rate": 1.3741607975505175e-05, "loss": 0.0527, "step": 2983 }, { "epoch": 0.4402803393581704, "grad_norm": 1.4554165601730347, "learning_rate": 1.3736830631600096e-05, "loss": 0.0336, "step": 2984 }, { "epoch": 0.44042788638878644, "grad_norm": 3.120377540588379, "learning_rate": 1.3732052296210614e-05, "loss": 0.048, "step": 2985 }, { "epoch": 0.4405754334194024, "grad_norm": 2.6256062984466553, "learning_rate": 1.3727272970604566e-05, "loss": 0.0377, "step": 2986 }, { "epoch": 0.44072298045001845, "grad_norm": 3.1758081912994385, "learning_rate": 1.3722492656050032e-05, "loss": 0.055, "step": 2987 }, { "epoch": 0.44087052748063443, "grad_norm": 1.6299469470977783, "learning_rate": 1.3717711353815363e-05, "loss": 0.0238, "step": 2988 }, { "epoch": 0.44101807451125047, "grad_norm": 3.81626558303833, "learning_rate": 1.3712929065169173e-05, "loss": 0.0744, "step": 2989 }, { "epoch": 0.44116562154186645, "grad_norm": 1.9643718004226685, "learning_rate": 1.3708145791380331e-05, "loss": 0.0482, "step": 2990 }, { "epoch": 0.4413131685724825, "grad_norm": 1.6884900331497192, "learning_rate": 1.370336153371798e-05, "loss": 0.0315, "step": 2991 }, { "epoch": 0.44146071560309846, "grad_norm": 2.5649969577789307, "learning_rate": 1.3698576293451504e-05, "loss": 0.0576, "step": 2992 }, { "epoch": 0.4416082626337145, "grad_norm": 9.247968673706055, "learning_rate": 1.3693790071850568e-05, "loss": 0.2358, "step": 2993 }, { "epoch": 0.4417558096643305, "grad_norm": 5.302760601043701, "learning_rate": 1.3689002870185086e-05, "loss": 0.0711, "step": 2994 }, { "epoch": 0.4419033566949465, "grad_norm": 1.7071747779846191, "learning_rate": 1.3684214689725233e-05, "loss": 0.0612, "step": 2995 }, { "epoch": 0.44205090372556255, "grad_norm": 3.928476572036743, "learning_rate": 1.367942553174145e-05, "loss": 0.0673, "step": 2996 }, { "epoch": 0.44219845075617853, "grad_norm": 6.280981063842773, "learning_rate": 1.3674635397504427e-05, "loss": 0.1014, "step": 2997 }, { "epoch": 0.44234599778679456, "grad_norm": 1.5455940961837769, "learning_rate": 1.3669844288285127e-05, "loss": 0.0607, "step": 2998 }, { "epoch": 0.44249354481741054, "grad_norm": 1.9261088371276855, "learning_rate": 1.3665052205354757e-05, "loss": 0.0468, "step": 2999 }, { "epoch": 0.4426410918480266, "grad_norm": 2.362344264984131, "learning_rate": 1.366025914998479e-05, "loss": 0.0542, "step": 3000 }, { "epoch": 0.4426410918480266, "eval_accuracy": 0.9710564399421129, "eval_f1": 0.9502487562189055, "eval_loss": 0.06608022749423981, "eval_precision": 0.9744897959183674, "eval_recall": 0.9271844660194175, "eval_runtime": 48.8974, "eval_samples_per_second": 5.951, "eval_steps_per_second": 0.205, "step": 3000 }, { "epoch": 0.44278863887864256, "grad_norm": 2.0436556339263916, "learning_rate": 1.3655465123446959e-05, "loss": 0.0417, "step": 3001 }, { "epoch": 0.4429361859092586, "grad_norm": 2.3471758365631104, "learning_rate": 1.3650670127013249e-05, "loss": 0.0539, "step": 3002 }, { "epoch": 0.4430837329398746, "grad_norm": 2.160348653793335, "learning_rate": 1.3645874161955906e-05, "loss": 0.0757, "step": 3003 }, { "epoch": 0.4432312799704906, "grad_norm": 2.4023683071136475, "learning_rate": 1.3641077229547427e-05, "loss": 0.0561, "step": 3004 }, { "epoch": 0.4433788270011066, "grad_norm": 3.0413734912872314, "learning_rate": 1.3636279331060582e-05, "loss": 0.0624, "step": 3005 }, { "epoch": 0.4435263740317226, "grad_norm": 3.7370595932006836, "learning_rate": 1.3631480467768379e-05, "loss": 0.0991, "step": 3006 }, { "epoch": 0.4436739210623386, "grad_norm": 3.022213935852051, "learning_rate": 1.3626680640944083e-05, "loss": 0.0564, "step": 3007 }, { "epoch": 0.44382146809295464, "grad_norm": 2.614274501800537, "learning_rate": 1.3621879851861233e-05, "loss": 0.109, "step": 3008 }, { "epoch": 0.4439690151235706, "grad_norm": 1.7476317882537842, "learning_rate": 1.3617078101793604e-05, "loss": 0.0484, "step": 3009 }, { "epoch": 0.44411656215418666, "grad_norm": 1.0840903520584106, "learning_rate": 1.3612275392015234e-05, "loss": 0.0331, "step": 3010 }, { "epoch": 0.44426410918480264, "grad_norm": 2.450360059738159, "learning_rate": 1.360747172380041e-05, "loss": 0.0844, "step": 3011 }, { "epoch": 0.44441165621541867, "grad_norm": 1.605069637298584, "learning_rate": 1.3602667098423688e-05, "loss": 0.04, "step": 3012 }, { "epoch": 0.44455920324603465, "grad_norm": 2.625332832336426, "learning_rate": 1.3597861517159863e-05, "loss": 0.0532, "step": 3013 }, { "epoch": 0.4447067502766507, "grad_norm": 2.7454235553741455, "learning_rate": 1.3593054981283983e-05, "loss": 0.1003, "step": 3014 }, { "epoch": 0.44485429730726667, "grad_norm": 2.795754909515381, "learning_rate": 1.358824749207136e-05, "loss": 0.0838, "step": 3015 }, { "epoch": 0.4450018443378827, "grad_norm": 1.675205111503601, "learning_rate": 1.3583439050797556e-05, "loss": 0.0482, "step": 3016 }, { "epoch": 0.4451493913684987, "grad_norm": 1.193233609199524, "learning_rate": 1.3578629658738379e-05, "loss": 0.0362, "step": 3017 }, { "epoch": 0.4452969383991147, "grad_norm": 2.819897174835205, "learning_rate": 1.3573819317169895e-05, "loss": 0.079, "step": 3018 }, { "epoch": 0.44544448542973075, "grad_norm": 2.0670878887176514, "learning_rate": 1.3569008027368419e-05, "loss": 0.0675, "step": 3019 }, { "epoch": 0.44559203246034673, "grad_norm": 2.0049493312835693, "learning_rate": 1.3564195790610523e-05, "loss": 0.0455, "step": 3020 }, { "epoch": 0.44559203246034673, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9580246913580247, "eval_loss": 0.06320898979902267, "eval_precision": 0.9748743718592965, "eval_recall": 0.941747572815534, "eval_runtime": 49.0469, "eval_samples_per_second": 5.933, "eval_steps_per_second": 0.204, "step": 3020 }, { "epoch": 0.44573957949096277, "grad_norm": 1.1069061756134033, "learning_rate": 1.3559382608173022e-05, "loss": 0.0262, "step": 3021 }, { "epoch": 0.44588712652157875, "grad_norm": 1.706955909729004, "learning_rate": 1.355456848133299e-05, "loss": 0.0594, "step": 3022 }, { "epoch": 0.4460346735521948, "grad_norm": 2.183849334716797, "learning_rate": 1.3549753411367746e-05, "loss": 0.0594, "step": 3023 }, { "epoch": 0.44618222058281076, "grad_norm": 2.619687557220459, "learning_rate": 1.3544937399554861e-05, "loss": 0.0803, "step": 3024 }, { "epoch": 0.4463297676134268, "grad_norm": 3.3790035247802734, "learning_rate": 1.3540120447172156e-05, "loss": 0.0945, "step": 3025 }, { "epoch": 0.4464773146440428, "grad_norm": 1.902966022491455, "learning_rate": 1.35353025554977e-05, "loss": 0.03, "step": 3026 }, { "epoch": 0.4466248616746588, "grad_norm": 1.8043938875198364, "learning_rate": 1.3530483725809817e-05, "loss": 0.0203, "step": 3027 }, { "epoch": 0.4467724087052748, "grad_norm": 5.156546115875244, "learning_rate": 1.352566395938707e-05, "loss": 0.1184, "step": 3028 }, { "epoch": 0.44691995573589083, "grad_norm": 2.7642292976379395, "learning_rate": 1.3520843257508282e-05, "loss": 0.0577, "step": 3029 }, { "epoch": 0.4470675027665068, "grad_norm": 2.0892961025238037, "learning_rate": 1.3516021621452515e-05, "loss": 0.0593, "step": 3030 }, { "epoch": 0.44721504979712284, "grad_norm": 1.9090710878372192, "learning_rate": 1.3511199052499083e-05, "loss": 0.0391, "step": 3031 }, { "epoch": 0.4473625968277388, "grad_norm": 2.736487627029419, "learning_rate": 1.3506375551927546e-05, "loss": 0.0534, "step": 3032 }, { "epoch": 0.44751014385835486, "grad_norm": 1.6985862255096436, "learning_rate": 1.350155112101771e-05, "loss": 0.025, "step": 3033 }, { "epoch": 0.44765769088897084, "grad_norm": 2.090182065963745, "learning_rate": 1.3496725761049637e-05, "loss": 0.0662, "step": 3034 }, { "epoch": 0.4478052379195869, "grad_norm": 1.589858055114746, "learning_rate": 1.349189947330362e-05, "loss": 0.0646, "step": 3035 }, { "epoch": 0.44795278495020285, "grad_norm": 3.1653220653533936, "learning_rate": 1.3487072259060212e-05, "loss": 0.0767, "step": 3036 }, { "epoch": 0.4481003319808189, "grad_norm": 2.277463436126709, "learning_rate": 1.3482244119600205e-05, "loss": 0.0955, "step": 3037 }, { "epoch": 0.44824787901143487, "grad_norm": 3.4395713806152344, "learning_rate": 1.3477415056204638e-05, "loss": 0.1108, "step": 3038 }, { "epoch": 0.4483954260420509, "grad_norm": 2.3666210174560547, "learning_rate": 1.3472585070154792e-05, "loss": 0.0652, "step": 3039 }, { "epoch": 0.44854297307266694, "grad_norm": 1.4150041341781616, "learning_rate": 1.3467754162732196e-05, "loss": 0.0152, "step": 3040 }, { "epoch": 0.44854297307266694, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9656862745098039, "eval_loss": 0.06047435104846954, "eval_precision": 0.9752475247524752, "eval_recall": 0.9563106796116505, "eval_runtime": 48.7018, "eval_samples_per_second": 5.975, "eval_steps_per_second": 0.205, "step": 3040 }, { "epoch": 0.4486905201032829, "grad_norm": 4.477144718170166, "learning_rate": 1.3462922335218628e-05, "loss": 0.0602, "step": 3041 }, { "epoch": 0.44883806713389895, "grad_norm": 8.590018272399902, "learning_rate": 1.34580895888961e-05, "loss": 0.1576, "step": 3042 }, { "epoch": 0.44898561416451493, "grad_norm": 2.6383631229400635, "learning_rate": 1.3453255925046878e-05, "loss": 0.0918, "step": 3043 }, { "epoch": 0.44913316119513097, "grad_norm": 2.722301721572876, "learning_rate": 1.3448421344953464e-05, "loss": 0.0644, "step": 3044 }, { "epoch": 0.44928070822574695, "grad_norm": 2.381572723388672, "learning_rate": 1.3443585849898604e-05, "loss": 0.0804, "step": 3045 }, { "epoch": 0.449428255256363, "grad_norm": 2.585970163345337, "learning_rate": 1.343874944116529e-05, "loss": 0.108, "step": 3046 }, { "epoch": 0.44957580228697896, "grad_norm": 0.8543123006820679, "learning_rate": 1.3433912120036754e-05, "loss": 0.0213, "step": 3047 }, { "epoch": 0.449723349317595, "grad_norm": 4.142187118530273, "learning_rate": 1.3429073887796475e-05, "loss": 0.0593, "step": 3048 }, { "epoch": 0.449870896348211, "grad_norm": 1.7097938060760498, "learning_rate": 1.3424234745728165e-05, "loss": 0.0383, "step": 3049 }, { "epoch": 0.450018443378827, "grad_norm": 1.9832948446273804, "learning_rate": 1.3419394695115781e-05, "loss": 0.0821, "step": 3050 }, { "epoch": 0.450165990409443, "grad_norm": 4.08234977722168, "learning_rate": 1.341455373724353e-05, "loss": 0.0632, "step": 3051 }, { "epoch": 0.45031353744005903, "grad_norm": 5.740954399108887, "learning_rate": 1.3409711873395843e-05, "loss": 0.1143, "step": 3052 }, { "epoch": 0.450461084470675, "grad_norm": 4.002523422241211, "learning_rate": 1.3404869104857405e-05, "loss": 0.0348, "step": 3053 }, { "epoch": 0.45060863150129105, "grad_norm": 2.5290651321411133, "learning_rate": 1.340002543291313e-05, "loss": 0.1293, "step": 3054 }, { "epoch": 0.450756178531907, "grad_norm": 1.5682281255722046, "learning_rate": 1.3395180858848192e-05, "loss": 0.0537, "step": 3055 }, { "epoch": 0.45090372556252306, "grad_norm": 3.2452805042266846, "learning_rate": 1.3390335383947977e-05, "loss": 0.0505, "step": 3056 }, { "epoch": 0.45105127259313904, "grad_norm": 1.6564780473709106, "learning_rate": 1.3385489009498125e-05, "loss": 0.0393, "step": 3057 }, { "epoch": 0.4511988196237551, "grad_norm": 2.3451671600341797, "learning_rate": 1.3380641736784519e-05, "loss": 0.0672, "step": 3058 }, { "epoch": 0.45134636665437106, "grad_norm": 1.4987071752548218, "learning_rate": 1.3375793567093268e-05, "loss": 0.0283, "step": 3059 }, { "epoch": 0.4514939136849871, "grad_norm": 1.7905908823013306, "learning_rate": 1.3370944501710732e-05, "loss": 0.0467, "step": 3060 }, { "epoch": 0.4514939136849871, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.96, "eval_loss": 0.06455375254154205, "eval_precision": 0.9896907216494846, "eval_recall": 0.9320388349514563, "eval_runtime": 49.0399, "eval_samples_per_second": 5.934, "eval_steps_per_second": 0.204, "step": 3060 }, { "epoch": 0.45164146071560307, "grad_norm": 1.055521845817566, "learning_rate": 1.3366094541923495e-05, "loss": 0.0255, "step": 3061 }, { "epoch": 0.4517890077462191, "grad_norm": 3.3218793869018555, "learning_rate": 1.3361243689018393e-05, "loss": 0.1044, "step": 3062 }, { "epoch": 0.45193655477683514, "grad_norm": 3.484714984893799, "learning_rate": 1.3356391944282486e-05, "loss": 0.1492, "step": 3063 }, { "epoch": 0.4520841018074511, "grad_norm": 1.4983792304992676, "learning_rate": 1.3351539309003075e-05, "loss": 0.0401, "step": 3064 }, { "epoch": 0.45223164883806716, "grad_norm": 3.907957077026367, "learning_rate": 1.3346685784467698e-05, "loss": 0.0942, "step": 3065 }, { "epoch": 0.45237919586868314, "grad_norm": 2.392016649246216, "learning_rate": 1.3341831371964136e-05, "loss": 0.0516, "step": 3066 }, { "epoch": 0.4525267428992992, "grad_norm": 1.7939438819885254, "learning_rate": 1.3336976072780392e-05, "loss": 0.025, "step": 3067 }, { "epoch": 0.45267428992991515, "grad_norm": 2.8417091369628906, "learning_rate": 1.3332119888204711e-05, "loss": 0.0716, "step": 3068 }, { "epoch": 0.4528218369605312, "grad_norm": 0.8938924074172974, "learning_rate": 1.3327262819525579e-05, "loss": 0.0316, "step": 3069 }, { "epoch": 0.45296938399114717, "grad_norm": 3.0555381774902344, "learning_rate": 1.3322404868031704e-05, "loss": 0.0701, "step": 3070 }, { "epoch": 0.4531169310217632, "grad_norm": 1.8799232244491577, "learning_rate": 1.3317546035012037e-05, "loss": 0.0827, "step": 3071 }, { "epoch": 0.4532644780523792, "grad_norm": 1.2035980224609375, "learning_rate": 1.331268632175576e-05, "loss": 0.0388, "step": 3072 }, { "epoch": 0.4534120250829952, "grad_norm": 2.7316396236419678, "learning_rate": 1.3307825729552293e-05, "loss": 0.0499, "step": 3073 }, { "epoch": 0.4535595721136112, "grad_norm": 3.673755407333374, "learning_rate": 1.330296425969128e-05, "loss": 0.0533, "step": 3074 }, { "epoch": 0.45370711914422723, "grad_norm": 2.7418811321258545, "learning_rate": 1.3298101913462604e-05, "loss": 0.0866, "step": 3075 }, { "epoch": 0.4538546661748432, "grad_norm": 1.6127675771713257, "learning_rate": 1.3293238692156385e-05, "loss": 0.0471, "step": 3076 }, { "epoch": 0.45400221320545925, "grad_norm": 2.945185899734497, "learning_rate": 1.3288374597062966e-05, "loss": 0.0882, "step": 3077 }, { "epoch": 0.45414976023607523, "grad_norm": 2.690981388092041, "learning_rate": 1.3283509629472921e-05, "loss": 0.047, "step": 3078 }, { "epoch": 0.45429730726669126, "grad_norm": 1.3387901782989502, "learning_rate": 1.327864379067707e-05, "loss": 0.0344, "step": 3079 }, { "epoch": 0.45444485429730724, "grad_norm": 1.102782130241394, "learning_rate": 1.3273777081966449e-05, "loss": 0.0418, "step": 3080 }, { "epoch": 0.45444485429730724, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9573934837092731, "eval_loss": 0.059664610773324966, "eval_precision": 0.9896373056994818, "eval_recall": 0.9271844660194175, "eval_runtime": 51.1031, "eval_samples_per_second": 5.694, "eval_steps_per_second": 0.196, "step": 3080 }, { "epoch": 0.4545924013279233, "grad_norm": 2.3887124061584473, "learning_rate": 1.326890950463233e-05, "loss": 0.0648, "step": 3081 }, { "epoch": 0.45473994835853926, "grad_norm": 2.8209123611450195, "learning_rate": 1.326404105996622e-05, "loss": 0.0878, "step": 3082 }, { "epoch": 0.4548874953891553, "grad_norm": 2.3824543952941895, "learning_rate": 1.3259171749259845e-05, "loss": 0.0491, "step": 3083 }, { "epoch": 0.45503504241977133, "grad_norm": 2.44303035736084, "learning_rate": 1.3254301573805173e-05, "loss": 0.0557, "step": 3084 }, { "epoch": 0.4551825894503873, "grad_norm": 2.8420848846435547, "learning_rate": 1.3249430534894392e-05, "loss": 0.0761, "step": 3085 }, { "epoch": 0.45533013648100334, "grad_norm": 4.012793064117432, "learning_rate": 1.3244558633819928e-05, "loss": 0.0607, "step": 3086 }, { "epoch": 0.4554776835116193, "grad_norm": 2.539480686187744, "learning_rate": 1.3239685871874428e-05, "loss": 0.0996, "step": 3087 }, { "epoch": 0.45562523054223536, "grad_norm": 2.6961123943328857, "learning_rate": 1.3234812250350773e-05, "loss": 0.0624, "step": 3088 }, { "epoch": 0.45577277757285134, "grad_norm": 3.9700441360473633, "learning_rate": 1.3229937770542065e-05, "loss": 0.0682, "step": 3089 }, { "epoch": 0.4559203246034674, "grad_norm": 3.1787490844726562, "learning_rate": 1.3225062433741638e-05, "loss": 0.0473, "step": 3090 }, { "epoch": 0.45606787163408335, "grad_norm": 1.041110873222351, "learning_rate": 1.3220186241243063e-05, "loss": 0.0193, "step": 3091 }, { "epoch": 0.4562154186646994, "grad_norm": 4.853977203369141, "learning_rate": 1.3215309194340114e-05, "loss": 0.0491, "step": 3092 }, { "epoch": 0.45636296569531537, "grad_norm": 2.1782522201538086, "learning_rate": 1.3210431294326818e-05, "loss": 0.0598, "step": 3093 }, { "epoch": 0.4565105127259314, "grad_norm": 1.3792306184768677, "learning_rate": 1.3205552542497413e-05, "loss": 0.0295, "step": 3094 }, { "epoch": 0.4566580597565474, "grad_norm": 1.9158390760421753, "learning_rate": 1.3200672940146363e-05, "loss": 0.0672, "step": 3095 }, { "epoch": 0.4568056067871634, "grad_norm": 3.026278495788574, "learning_rate": 1.3195792488568368e-05, "loss": 0.0231, "step": 3096 }, { "epoch": 0.4569531538177794, "grad_norm": 1.352473497390747, "learning_rate": 1.319091118905834e-05, "loss": 0.0463, "step": 3097 }, { "epoch": 0.45710070084839544, "grad_norm": 0.758575975894928, "learning_rate": 1.3186029042911428e-05, "loss": 0.0102, "step": 3098 }, { "epoch": 0.4572482478790114, "grad_norm": 1.7205479145050049, "learning_rate": 1.3181146051422995e-05, "loss": 0.06, "step": 3099 }, { "epoch": 0.45739579490962745, "grad_norm": 1.7466613054275513, "learning_rate": 1.317626221588864e-05, "loss": 0.0556, "step": 3100 }, { "epoch": 0.45739579490962745, "eval_accuracy": 0.9739507959479016, "eval_f1": 0.9547738693467337, "eval_loss": 0.06219971552491188, "eval_precision": 0.9895833333333334, "eval_recall": 0.9223300970873787, "eval_runtime": 48.8225, "eval_samples_per_second": 5.96, "eval_steps_per_second": 0.205, "step": 3100 }, { "epoch": 0.45754334194024343, "grad_norm": 2.1532015800476074, "learning_rate": 1.3171377537604172e-05, "loss": 0.0404, "step": 3101 }, { "epoch": 0.45769088897085947, "grad_norm": 1.7680665254592896, "learning_rate": 1.3166492017865637e-05, "loss": 0.0595, "step": 3102 }, { "epoch": 0.45783843600147545, "grad_norm": 1.6938695907592773, "learning_rate": 1.3161605657969298e-05, "loss": 0.0508, "step": 3103 }, { "epoch": 0.4579859830320915, "grad_norm": 2.0567679405212402, "learning_rate": 1.3156718459211634e-05, "loss": 0.0266, "step": 3104 }, { "epoch": 0.45813353006270746, "grad_norm": 1.748059868812561, "learning_rate": 1.3151830422889366e-05, "loss": 0.0695, "step": 3105 }, { "epoch": 0.4582810770933235, "grad_norm": 1.7691744565963745, "learning_rate": 1.3146941550299415e-05, "loss": 0.0379, "step": 3106 }, { "epoch": 0.45842862412393953, "grad_norm": 1.4011801481246948, "learning_rate": 1.314205184273894e-05, "loss": 0.0376, "step": 3107 }, { "epoch": 0.4585761711545555, "grad_norm": 3.8448193073272705, "learning_rate": 1.313716130150531e-05, "loss": 0.1246, "step": 3108 }, { "epoch": 0.45872371818517155, "grad_norm": 2.1899654865264893, "learning_rate": 1.3132269927896126e-05, "loss": 0.0527, "step": 3109 }, { "epoch": 0.4588712652157875, "grad_norm": 2.725059747695923, "learning_rate": 1.31273777232092e-05, "loss": 0.0879, "step": 3110 }, { "epoch": 0.45901881224640356, "grad_norm": 3.0200695991516113, "learning_rate": 1.3122484688742572e-05, "loss": 0.0778, "step": 3111 }, { "epoch": 0.45916635927701954, "grad_norm": 3.0048940181732178, "learning_rate": 1.31175908257945e-05, "loss": 0.146, "step": 3112 }, { "epoch": 0.4593139063076356, "grad_norm": 3.4377994537353516, "learning_rate": 1.3112696135663456e-05, "loss": 0.1139, "step": 3113 }, { "epoch": 0.45946145333825156, "grad_norm": 2.7629282474517822, "learning_rate": 1.3107800619648142e-05, "loss": 0.0724, "step": 3114 }, { "epoch": 0.4596090003688676, "grad_norm": 1.3059062957763672, "learning_rate": 1.310290427904747e-05, "loss": 0.0277, "step": 3115 }, { "epoch": 0.4597565473994836, "grad_norm": 3.3522562980651855, "learning_rate": 1.3098007115160578e-05, "loss": 0.0543, "step": 3116 }, { "epoch": 0.4599040944300996, "grad_norm": 2.7631375789642334, "learning_rate": 1.3093109129286818e-05, "loss": 0.0492, "step": 3117 }, { "epoch": 0.4600516414607156, "grad_norm": 2.275603771209717, "learning_rate": 1.3088210322725757e-05, "loss": 0.0692, "step": 3118 }, { "epoch": 0.4601991884913316, "grad_norm": 3.497124195098877, "learning_rate": 1.308331069677719e-05, "loss": 0.0912, "step": 3119 }, { "epoch": 0.4603467355219476, "grad_norm": 1.872408390045166, "learning_rate": 1.3078410252741117e-05, "loss": 0.0675, "step": 3120 }, { "epoch": 0.4603467355219476, "eval_accuracy": 0.9739507959479016, "eval_f1": 0.9560975609756097, "eval_loss": 0.06415249407291412, "eval_precision": 0.9607843137254902, "eval_recall": 0.9514563106796117, "eval_runtime": 49.5887, "eval_samples_per_second": 5.868, "eval_steps_per_second": 0.202, "step": 3120 }, { "epoch": 0.46049428255256364, "grad_norm": 2.0004093647003174, "learning_rate": 1.3073508991917763e-05, "loss": 0.0546, "step": 3121 }, { "epoch": 0.4606418295831796, "grad_norm": 3.1881308555603027, "learning_rate": 1.306860691560757e-05, "loss": 0.0877, "step": 3122 }, { "epoch": 0.46078937661379565, "grad_norm": 2.6397509574890137, "learning_rate": 1.3063704025111192e-05, "loss": 0.0962, "step": 3123 }, { "epoch": 0.46093692364441163, "grad_norm": 4.078423976898193, "learning_rate": 1.3058800321729503e-05, "loss": 0.0781, "step": 3124 }, { "epoch": 0.46108447067502767, "grad_norm": 1.8027663230895996, "learning_rate": 1.305389580676359e-05, "loss": 0.0521, "step": 3125 }, { "epoch": 0.46123201770564365, "grad_norm": 2.2851879596710205, "learning_rate": 1.304899048151476e-05, "loss": 0.0653, "step": 3126 }, { "epoch": 0.4613795647362597, "grad_norm": 2.1806986331939697, "learning_rate": 1.3044084347284525e-05, "loss": 0.1351, "step": 3127 }, { "epoch": 0.46152711176687566, "grad_norm": 2.5520267486572266, "learning_rate": 1.303917740537462e-05, "loss": 0.0657, "step": 3128 }, { "epoch": 0.4616746587974917, "grad_norm": 3.479849338531494, "learning_rate": 1.3034269657086993e-05, "loss": 0.0971, "step": 3129 }, { "epoch": 0.46182220582810773, "grad_norm": 1.7300620079040527, "learning_rate": 1.3029361103723805e-05, "loss": 0.0383, "step": 3130 }, { "epoch": 0.4619697528587237, "grad_norm": 2.0679094791412354, "learning_rate": 1.3024451746587431e-05, "loss": 0.0505, "step": 3131 }, { "epoch": 0.46211729988933975, "grad_norm": 2.6548893451690674, "learning_rate": 1.3019541586980461e-05, "loss": 0.0978, "step": 3132 }, { "epoch": 0.46226484691995573, "grad_norm": 3.29146409034729, "learning_rate": 1.3014630626205692e-05, "loss": 0.0512, "step": 3133 }, { "epoch": 0.46241239395057177, "grad_norm": 2.246340751647949, "learning_rate": 1.3009718865566143e-05, "loss": 0.1062, "step": 3134 }, { "epoch": 0.46255994098118774, "grad_norm": 1.6049399375915527, "learning_rate": 1.3004806306365035e-05, "loss": 0.0342, "step": 3135 }, { "epoch": 0.4627074880118038, "grad_norm": 1.81931734085083, "learning_rate": 1.2999892949905806e-05, "loss": 0.0593, "step": 3136 }, { "epoch": 0.46285503504241976, "grad_norm": 1.3295485973358154, "learning_rate": 1.2994978797492106e-05, "loss": 0.0347, "step": 3137 }, { "epoch": 0.4630025820730358, "grad_norm": 1.8008508682250977, "learning_rate": 1.2990063850427796e-05, "loss": 0.0575, "step": 3138 }, { "epoch": 0.4631501291036518, "grad_norm": 4.564444541931152, "learning_rate": 1.2985148110016947e-05, "loss": 0.0404, "step": 3139 }, { "epoch": 0.4632976761342678, "grad_norm": 2.6724398136138916, "learning_rate": 1.2980231577563841e-05, "loss": 0.0227, "step": 3140 }, { "epoch": 0.4632976761342678, "eval_accuracy": 0.9739507959479016, "eval_f1": 0.9552238805970149, "eval_loss": 0.06188115477561951, "eval_precision": 0.9795918367346939, "eval_recall": 0.9320388349514563, "eval_runtime": 49.6754, "eval_samples_per_second": 5.858, "eval_steps_per_second": 0.201, "step": 3140 }, { "epoch": 0.4634452231648838, "grad_norm": 2.3146982192993164, "learning_rate": 1.297531425437297e-05, "loss": 0.0679, "step": 3141 }, { "epoch": 0.4635927701954998, "grad_norm": 2.479365348815918, "learning_rate": 1.2970396141749034e-05, "loss": 0.1109, "step": 3142 }, { "epoch": 0.4637403172261158, "grad_norm": 2.1055827140808105, "learning_rate": 1.2965477240996944e-05, "loss": 0.0349, "step": 3143 }, { "epoch": 0.46388786425673184, "grad_norm": 2.6727850437164307, "learning_rate": 1.2960557553421824e-05, "loss": 0.0494, "step": 3144 }, { "epoch": 0.4640354112873478, "grad_norm": 3.2281694412231445, "learning_rate": 1.2955637080329e-05, "loss": 0.0775, "step": 3145 }, { "epoch": 0.46418295831796386, "grad_norm": 2.0746448040008545, "learning_rate": 1.295071582302401e-05, "loss": 0.0564, "step": 3146 }, { "epoch": 0.46433050534857984, "grad_norm": 2.130284547805786, "learning_rate": 1.2945793782812601e-05, "loss": 0.0261, "step": 3147 }, { "epoch": 0.46447805237919587, "grad_norm": 3.4605906009674072, "learning_rate": 1.2940870961000725e-05, "loss": 0.0955, "step": 3148 }, { "epoch": 0.46462559940981185, "grad_norm": 2.5517578125, "learning_rate": 1.2935947358894539e-05, "loss": 0.067, "step": 3149 }, { "epoch": 0.4647731464404279, "grad_norm": 2.643298387527466, "learning_rate": 1.2931022977800417e-05, "loss": 0.0358, "step": 3150 }, { "epoch": 0.4649206934710439, "grad_norm": 3.536396026611328, "learning_rate": 1.2926097819024927e-05, "loss": 0.0791, "step": 3151 }, { "epoch": 0.4650682405016599, "grad_norm": 1.332674503326416, "learning_rate": 1.2921171883874857e-05, "loss": 0.0513, "step": 3152 }, { "epoch": 0.46521578753227594, "grad_norm": 3.1050949096679688, "learning_rate": 1.2916245173657189e-05, "loss": 0.1088, "step": 3153 }, { "epoch": 0.4653633345628919, "grad_norm": 1.3427537679672241, "learning_rate": 1.2911317689679113e-05, "loss": 0.035, "step": 3154 }, { "epoch": 0.46551088159350795, "grad_norm": 3.384613037109375, "learning_rate": 1.2906389433248032e-05, "loss": 0.0986, "step": 3155 }, { "epoch": 0.46565842862412393, "grad_norm": 2.1285743713378906, "learning_rate": 1.2901460405671547e-05, "loss": 0.042, "step": 3156 }, { "epoch": 0.46580597565473997, "grad_norm": 2.8513755798339844, "learning_rate": 1.2896530608257463e-05, "loss": 0.1048, "step": 3157 }, { "epoch": 0.46595352268535595, "grad_norm": 1.4096049070358276, "learning_rate": 1.2891600042313792e-05, "loss": 0.0266, "step": 3158 }, { "epoch": 0.466101069715972, "grad_norm": 3.39280366897583, "learning_rate": 1.2886668709148752e-05, "loss": 0.1037, "step": 3159 }, { "epoch": 0.46624861674658796, "grad_norm": 1.7988632917404175, "learning_rate": 1.2881736610070763e-05, "loss": 0.0502, "step": 3160 }, { "epoch": 0.46624861674658796, "eval_accuracy": 0.9725036179450073, "eval_f1": 0.9528535980148883, "eval_loss": 0.062221840023994446, "eval_precision": 0.9746192893401016, "eval_recall": 0.9320388349514563, "eval_runtime": 48.8448, "eval_samples_per_second": 5.958, "eval_steps_per_second": 0.205, "step": 3160 }, { "epoch": 0.466396163777204, "grad_norm": 1.5144652128219604, "learning_rate": 1.2876803746388438e-05, "loss": 0.0828, "step": 3161 }, { "epoch": 0.46654371080782, "grad_norm": 3.1683759689331055, "learning_rate": 1.2871870119410615e-05, "loss": 0.0509, "step": 3162 }, { "epoch": 0.466691257838436, "grad_norm": 1.1191962957382202, "learning_rate": 1.2866935730446309e-05, "loss": 0.0279, "step": 3163 }, { "epoch": 0.466838804869052, "grad_norm": 1.1399152278900146, "learning_rate": 1.2862000580804757e-05, "loss": 0.0259, "step": 3164 }, { "epoch": 0.46698635189966803, "grad_norm": 3.002091646194458, "learning_rate": 1.2857064671795389e-05, "loss": 0.0518, "step": 3165 }, { "epoch": 0.467133898930284, "grad_norm": 4.319186210632324, "learning_rate": 1.2852128004727842e-05, "loss": 0.0824, "step": 3166 }, { "epoch": 0.46728144596090004, "grad_norm": 2.147864580154419, "learning_rate": 1.2847190580911942e-05, "loss": 0.0815, "step": 3167 }, { "epoch": 0.467428992991516, "grad_norm": 2.2783377170562744, "learning_rate": 1.2842252401657727e-05, "loss": 0.0412, "step": 3168 }, { "epoch": 0.46757654002213206, "grad_norm": 1.3876876831054688, "learning_rate": 1.2837313468275438e-05, "loss": 0.0197, "step": 3169 }, { "epoch": 0.46772408705274804, "grad_norm": 1.5112508535385132, "learning_rate": 1.2832373782075504e-05, "loss": 0.0492, "step": 3170 }, { "epoch": 0.4678716340833641, "grad_norm": 2.5427772998809814, "learning_rate": 1.282743334436856e-05, "loss": 0.0591, "step": 3171 }, { "epoch": 0.46801918111398005, "grad_norm": 2.929298162460327, "learning_rate": 1.2822492156465445e-05, "loss": 0.103, "step": 3172 }, { "epoch": 0.4681667281445961, "grad_norm": 2.18363094329834, "learning_rate": 1.281755021967719e-05, "loss": 0.0733, "step": 3173 }, { "epoch": 0.4683142751752121, "grad_norm": 1.8866115808486938, "learning_rate": 1.281260753531503e-05, "loss": 0.0575, "step": 3174 }, { "epoch": 0.4684618222058281, "grad_norm": 2.4299280643463135, "learning_rate": 1.2807664104690387e-05, "loss": 0.0542, "step": 3175 }, { "epoch": 0.46860936923644414, "grad_norm": 1.3509178161621094, "learning_rate": 1.2802719929114904e-05, "loss": 0.0488, "step": 3176 }, { "epoch": 0.4687569162670601, "grad_norm": 2.8822741508483887, "learning_rate": 1.2797775009900397e-05, "loss": 0.08, "step": 3177 }, { "epoch": 0.46890446329767616, "grad_norm": 2.296353816986084, "learning_rate": 1.2792829348358888e-05, "loss": 0.0974, "step": 3178 }, { "epoch": 0.46905201032829213, "grad_norm": 3.2396507263183594, "learning_rate": 1.2787882945802607e-05, "loss": 0.0409, "step": 3179 }, { "epoch": 0.46919955735890817, "grad_norm": 1.5096304416656494, "learning_rate": 1.2782935803543963e-05, "loss": 0.0478, "step": 3180 }, { "epoch": 0.46919955735890817, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9580246913580247, "eval_loss": 0.0604877807199955, "eval_precision": 0.9748743718592965, "eval_recall": 0.941747572815534, "eval_runtime": 50.1581, "eval_samples_per_second": 5.802, "eval_steps_per_second": 0.199, "step": 3180 }, { "epoch": 0.46934710438952415, "grad_norm": 2.1557698249816895, "learning_rate": 1.2777987922895574e-05, "loss": 0.0891, "step": 3181 }, { "epoch": 0.4694946514201402, "grad_norm": 2.276653528213501, "learning_rate": 1.277303930517025e-05, "loss": 0.0462, "step": 3182 }, { "epoch": 0.46964219845075617, "grad_norm": 1.9296355247497559, "learning_rate": 1.276808995168099e-05, "loss": 0.0819, "step": 3183 }, { "epoch": 0.4697897454813722, "grad_norm": 1.889962077140808, "learning_rate": 1.2763139863741e-05, "loss": 0.0572, "step": 3184 }, { "epoch": 0.4699372925119882, "grad_norm": 3.549494743347168, "learning_rate": 1.2758189042663673e-05, "loss": 0.0345, "step": 3185 }, { "epoch": 0.4700848395426042, "grad_norm": 1.883277177810669, "learning_rate": 1.27532374897626e-05, "loss": 0.0284, "step": 3186 }, { "epoch": 0.4702323865732202, "grad_norm": 4.439860820770264, "learning_rate": 1.2748285206351563e-05, "loss": 0.1137, "step": 3187 }, { "epoch": 0.47037993360383623, "grad_norm": 2.090292453765869, "learning_rate": 1.274333219374454e-05, "loss": 0.0556, "step": 3188 }, { "epoch": 0.4705274806344522, "grad_norm": 3.4212419986724854, "learning_rate": 1.2738378453255702e-05, "loss": 0.0547, "step": 3189 }, { "epoch": 0.47067502766506825, "grad_norm": 1.6721760034561157, "learning_rate": 1.2733423986199416e-05, "loss": 0.034, "step": 3190 }, { "epoch": 0.4708225746956842, "grad_norm": 1.1416772603988647, "learning_rate": 1.2728468793890233e-05, "loss": 0.0531, "step": 3191 }, { "epoch": 0.47097012172630026, "grad_norm": 2.1340465545654297, "learning_rate": 1.2723512877642906e-05, "loss": 0.0359, "step": 3192 }, { "epoch": 0.47111766875691624, "grad_norm": 4.481593608856201, "learning_rate": 1.2718556238772378e-05, "loss": 0.0877, "step": 3193 }, { "epoch": 0.4712652157875323, "grad_norm": 1.3652770519256592, "learning_rate": 1.271359887859378e-05, "loss": 0.0437, "step": 3194 }, { "epoch": 0.4714127628181483, "grad_norm": 1.4086480140686035, "learning_rate": 1.2708640798422438e-05, "loss": 0.0339, "step": 3195 }, { "epoch": 0.4715603098487643, "grad_norm": 1.3899623155593872, "learning_rate": 1.2703681999573867e-05, "loss": 0.0331, "step": 3196 }, { "epoch": 0.4717078568793803, "grad_norm": 2.8398168087005615, "learning_rate": 1.2698722483363772e-05, "loss": 0.0918, "step": 3197 }, { "epoch": 0.4718554039099963, "grad_norm": 2.9988067150115967, "learning_rate": 1.269376225110805e-05, "loss": 0.0634, "step": 3198 }, { "epoch": 0.47200295094061234, "grad_norm": 1.8906351327896118, "learning_rate": 1.2688801304122791e-05, "loss": 0.0655, "step": 3199 }, { "epoch": 0.4721504979712283, "grad_norm": 3.4711852073669434, "learning_rate": 1.2683839643724264e-05, "loss": 0.1021, "step": 3200 }, { "epoch": 0.4721504979712283, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.9601990049751243, "eval_loss": 0.06304433196783066, "eval_precision": 0.9846938775510204, "eval_recall": 0.9368932038834952, "eval_runtime": 50.1014, "eval_samples_per_second": 5.808, "eval_steps_per_second": 0.2, "step": 3200 }, { "epoch": 0.47229804500184436, "grad_norm": 0.8298687934875488, "learning_rate": 1.2678877271228947e-05, "loss": 0.0113, "step": 3201 }, { "epoch": 0.47244559203246034, "grad_norm": 1.6253254413604736, "learning_rate": 1.2673914187953483e-05, "loss": 0.0342, "step": 3202 }, { "epoch": 0.4725931390630764, "grad_norm": 1.7428443431854248, "learning_rate": 1.2668950395214716e-05, "loss": 0.0383, "step": 3203 }, { "epoch": 0.47274068609369235, "grad_norm": 2.046128988265991, "learning_rate": 1.2663985894329685e-05, "loss": 0.0322, "step": 3204 }, { "epoch": 0.4728882331243084, "grad_norm": 1.685314416885376, "learning_rate": 1.2659020686615602e-05, "loss": 0.0352, "step": 3205 }, { "epoch": 0.47303578015492437, "grad_norm": 3.260664939880371, "learning_rate": 1.2654054773389874e-05, "loss": 0.0352, "step": 3206 }, { "epoch": 0.4731833271855404, "grad_norm": 1.4730430841445923, "learning_rate": 1.26490881559701e-05, "loss": 0.0198, "step": 3207 }, { "epoch": 0.4733308742161564, "grad_norm": 1.6515488624572754, "learning_rate": 1.2644120835674057e-05, "loss": 0.0423, "step": 3208 }, { "epoch": 0.4734784212467724, "grad_norm": 2.5386011600494385, "learning_rate": 1.2639152813819714e-05, "loss": 0.0911, "step": 3209 }, { "epoch": 0.4736259682773884, "grad_norm": 1.517215609550476, "learning_rate": 1.2634184091725223e-05, "loss": 0.0439, "step": 3210 }, { "epoch": 0.47377351530800443, "grad_norm": 2.2844605445861816, "learning_rate": 1.2629214670708922e-05, "loss": 0.0774, "step": 3211 }, { "epoch": 0.4739210623386204, "grad_norm": 2.74086594581604, "learning_rate": 1.2624244552089343e-05, "loss": 0.1076, "step": 3212 }, { "epoch": 0.47406860936923645, "grad_norm": 2.7842519283294678, "learning_rate": 1.2619273737185185e-05, "loss": 0.1019, "step": 3213 }, { "epoch": 0.47421615639985243, "grad_norm": 2.3967092037200928, "learning_rate": 1.2614302227315351e-05, "loss": 0.0942, "step": 3214 }, { "epoch": 0.47436370343046846, "grad_norm": 3.565133810043335, "learning_rate": 1.2609330023798919e-05, "loss": 0.1073, "step": 3215 }, { "epoch": 0.47451125046108444, "grad_norm": 3.2078261375427246, "learning_rate": 1.2604357127955145e-05, "loss": 0.0509, "step": 3216 }, { "epoch": 0.4746587974917005, "grad_norm": 4.051640033721924, "learning_rate": 1.2599383541103483e-05, "loss": 0.0706, "step": 3217 }, { "epoch": 0.4748063445223165, "grad_norm": 1.4189480543136597, "learning_rate": 1.2594409264563559e-05, "loss": 0.018, "step": 3218 }, { "epoch": 0.4749538915529325, "grad_norm": 2.131605863571167, "learning_rate": 1.258943429965519e-05, "loss": 0.1087, "step": 3219 }, { "epoch": 0.47510143858354853, "grad_norm": 1.2329082489013672, "learning_rate": 1.2584458647698368e-05, "loss": 0.0418, "step": 3220 }, { "epoch": 0.47510143858354853, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9582309582309583, "eval_loss": 0.06146646663546562, "eval_precision": 0.9701492537313433, "eval_recall": 0.9466019417475728, "eval_runtime": 49.1494, "eval_samples_per_second": 5.921, "eval_steps_per_second": 0.203, "step": 3220 }, { "epoch": 0.4752489856141645, "grad_norm": 1.6588473320007324, "learning_rate": 1.2579482310013269e-05, "loss": 0.0618, "step": 3221 }, { "epoch": 0.47539653264478055, "grad_norm": 3.2114436626434326, "learning_rate": 1.2574505287920258e-05, "loss": 0.0445, "step": 3222 }, { "epoch": 0.4755440796753965, "grad_norm": 2.014280080795288, "learning_rate": 1.2569527582739873e-05, "loss": 0.0833, "step": 3223 }, { "epoch": 0.47569162670601256, "grad_norm": 5.8074164390563965, "learning_rate": 1.2564549195792842e-05, "loss": 0.0808, "step": 3224 }, { "epoch": 0.47583917373662854, "grad_norm": 5.647659778594971, "learning_rate": 1.2559570128400056e-05, "loss": 0.1137, "step": 3225 }, { "epoch": 0.4759867207672446, "grad_norm": 2.1510605812072754, "learning_rate": 1.2554590381882617e-05, "loss": 0.0708, "step": 3226 }, { "epoch": 0.47613426779786056, "grad_norm": 1.5673420429229736, "learning_rate": 1.2549609957561777e-05, "loss": 0.0349, "step": 3227 }, { "epoch": 0.4762818148284766, "grad_norm": 2.7084009647369385, "learning_rate": 1.2544628856758978e-05, "loss": 0.1026, "step": 3228 }, { "epoch": 0.47642936185909257, "grad_norm": 1.2387560606002808, "learning_rate": 1.2539647080795853e-05, "loss": 0.0453, "step": 3229 }, { "epoch": 0.4765769088897086, "grad_norm": 2.017057418823242, "learning_rate": 1.2534664630994203e-05, "loss": 0.0551, "step": 3230 }, { "epoch": 0.4767244559203246, "grad_norm": 1.0644739866256714, "learning_rate": 1.2529681508676003e-05, "loss": 0.0194, "step": 3231 }, { "epoch": 0.4768720029509406, "grad_norm": 2.3492441177368164, "learning_rate": 1.2524697715163416e-05, "loss": 0.0588, "step": 3232 }, { "epoch": 0.4770195499815566, "grad_norm": 4.104902267456055, "learning_rate": 1.2519713251778787e-05, "loss": 0.1416, "step": 3233 }, { "epoch": 0.47716709701217264, "grad_norm": 1.880821704864502, "learning_rate": 1.2514728119844624e-05, "loss": 0.0476, "step": 3234 }, { "epoch": 0.4773146440427886, "grad_norm": 1.9657938480377197, "learning_rate": 1.2509742320683617e-05, "loss": 0.0665, "step": 3235 }, { "epoch": 0.47746219107340465, "grad_norm": 1.0189694166183472, "learning_rate": 1.2504755855618647e-05, "loss": 0.0276, "step": 3236 }, { "epoch": 0.47760973810402063, "grad_norm": 4.213890075683594, "learning_rate": 1.2499768725972755e-05, "loss": 0.0588, "step": 3237 }, { "epoch": 0.47775728513463667, "grad_norm": 3.22441029548645, "learning_rate": 1.2494780933069161e-05, "loss": 0.0851, "step": 3238 }, { "epoch": 0.47790483216525265, "grad_norm": 3.3729279041290283, "learning_rate": 1.2489792478231273e-05, "loss": 0.0402, "step": 3239 }, { "epoch": 0.4780523791958687, "grad_norm": 2.184178352355957, "learning_rate": 1.2484803362782659e-05, "loss": 0.0354, "step": 3240 }, { "epoch": 0.4780523791958687, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.9603960396039604, "eval_loss": 0.0614546537399292, "eval_precision": 0.9797979797979798, "eval_recall": 0.941747572815534, "eval_runtime": 49.6764, "eval_samples_per_second": 5.858, "eval_steps_per_second": 0.201, "step": 3240 }, { "epoch": 0.4781999262264847, "grad_norm": 2.1899147033691406, "learning_rate": 1.247981358804707e-05, "loss": 0.0367, "step": 3241 }, { "epoch": 0.4783474732571007, "grad_norm": 1.4089144468307495, "learning_rate": 1.247482315534843e-05, "loss": 0.0493, "step": 3242 }, { "epoch": 0.47849502028771673, "grad_norm": 3.1078667640686035, "learning_rate": 1.2469832066010843e-05, "loss": 0.072, "step": 3243 }, { "epoch": 0.4786425673183327, "grad_norm": 2.568477153778076, "learning_rate": 1.246484032135858e-05, "loss": 0.0625, "step": 3244 }, { "epoch": 0.47879011434894875, "grad_norm": 4.655767917633057, "learning_rate": 1.245984792271609e-05, "loss": 0.0986, "step": 3245 }, { "epoch": 0.4789376613795647, "grad_norm": 2.5994324684143066, "learning_rate": 1.2454854871407993e-05, "loss": 0.0481, "step": 3246 }, { "epoch": 0.47908520841018076, "grad_norm": 1.9946426153182983, "learning_rate": 1.2449861168759086e-05, "loss": 0.0663, "step": 3247 }, { "epoch": 0.47923275544079674, "grad_norm": 1.1514058113098145, "learning_rate": 1.2444866816094332e-05, "loss": 0.0431, "step": 3248 }, { "epoch": 0.4793803024714128, "grad_norm": 3.6958229541778564, "learning_rate": 1.243987181473887e-05, "loss": 0.0571, "step": 3249 }, { "epoch": 0.47952784950202876, "grad_norm": 3.389021396636963, "learning_rate": 1.243487616601802e-05, "loss": 0.0577, "step": 3250 }, { "epoch": 0.4796753965326448, "grad_norm": 3.1361069679260254, "learning_rate": 1.242987987125726e-05, "loss": 0.043, "step": 3251 }, { "epoch": 0.4798229435632608, "grad_norm": 2.2842636108398438, "learning_rate": 1.2424882931782242e-05, "loss": 0.0595, "step": 3252 }, { "epoch": 0.4799704905938768, "grad_norm": 2.1793570518493652, "learning_rate": 1.24198853489188e-05, "loss": 0.0379, "step": 3253 }, { "epoch": 0.4801180376244928, "grad_norm": 2.192145347595215, "learning_rate": 1.2414887123992925e-05, "loss": 0.0631, "step": 3254 }, { "epoch": 0.4802655846551088, "grad_norm": 2.948481798171997, "learning_rate": 1.2409888258330785e-05, "loss": 0.0737, "step": 3255 }, { "epoch": 0.4804131316857248, "grad_norm": 2.3350939750671387, "learning_rate": 1.2404888753258715e-05, "loss": 0.0834, "step": 3256 }, { "epoch": 0.48056067871634084, "grad_norm": 1.58824622631073, "learning_rate": 1.2399888610103228e-05, "loss": 0.042, "step": 3257 }, { "epoch": 0.4807082257469568, "grad_norm": 1.3110638856887817, "learning_rate": 1.2394887830190998e-05, "loss": 0.0244, "step": 3258 }, { "epoch": 0.48085577277757285, "grad_norm": 1.9750962257385254, "learning_rate": 1.2389886414848863e-05, "loss": 0.0508, "step": 3259 }, { "epoch": 0.48100331980818883, "grad_norm": 4.089970111846924, "learning_rate": 1.2384884365403847e-05, "loss": 0.0575, "step": 3260 }, { "epoch": 0.48100331980818883, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.9605911330049262, "eval_loss": 0.05934501439332962, "eval_precision": 0.975, "eval_recall": 0.9466019417475728, "eval_runtime": 48.8882, "eval_samples_per_second": 5.952, "eval_steps_per_second": 0.205, "step": 3260 }, { "epoch": 0.48115086683880487, "grad_norm": 1.447792649269104, "learning_rate": 1.2379881683183126e-05, "loss": 0.0597, "step": 3261 }, { "epoch": 0.4812984138694209, "grad_norm": 1.7710716724395752, "learning_rate": 1.237487836951405e-05, "loss": 0.0616, "step": 3262 }, { "epoch": 0.4814459609000369, "grad_norm": 1.9847614765167236, "learning_rate": 1.2369874425724135e-05, "loss": 0.0775, "step": 3263 }, { "epoch": 0.4815935079306529, "grad_norm": 3.6079742908477783, "learning_rate": 1.2364869853141072e-05, "loss": 0.1294, "step": 3264 }, { "epoch": 0.4817410549612689, "grad_norm": 4.106072902679443, "learning_rate": 1.2359864653092707e-05, "loss": 0.0657, "step": 3265 }, { "epoch": 0.48188860199188494, "grad_norm": 3.757965087890625, "learning_rate": 1.235485882690706e-05, "loss": 0.0304, "step": 3266 }, { "epoch": 0.4820361490225009, "grad_norm": 3.9054882526397705, "learning_rate": 1.2349852375912313e-05, "loss": 0.132, "step": 3267 }, { "epoch": 0.48218369605311695, "grad_norm": 5.853841781616211, "learning_rate": 1.2344845301436816e-05, "loss": 0.1587, "step": 3268 }, { "epoch": 0.48233124308373293, "grad_norm": 4.04955530166626, "learning_rate": 1.2339837604809086e-05, "loss": 0.0331, "step": 3269 }, { "epoch": 0.48247879011434897, "grad_norm": 2.3575868606567383, "learning_rate": 1.23348292873578e-05, "loss": 0.0817, "step": 3270 }, { "epoch": 0.48262633714496495, "grad_norm": 1.8895323276519775, "learning_rate": 1.2329820350411808e-05, "loss": 0.0383, "step": 3271 }, { "epoch": 0.482773884175581, "grad_norm": 1.5796021223068237, "learning_rate": 1.2324810795300115e-05, "loss": 0.0275, "step": 3272 }, { "epoch": 0.48292143120619696, "grad_norm": 1.9694874286651611, "learning_rate": 1.2319800623351897e-05, "loss": 0.0684, "step": 3273 }, { "epoch": 0.483068978236813, "grad_norm": 3.820848226547241, "learning_rate": 1.2314789835896488e-05, "loss": 0.1, "step": 3274 }, { "epoch": 0.483216525267429, "grad_norm": 1.395221471786499, "learning_rate": 1.2309778434263389e-05, "loss": 0.0316, "step": 3275 }, { "epoch": 0.483364072298045, "grad_norm": 4.243491172790527, "learning_rate": 1.2304766419782271e-05, "loss": 0.0591, "step": 3276 }, { "epoch": 0.483511619328661, "grad_norm": 1.959580421447754, "learning_rate": 1.2299753793782947e-05, "loss": 0.0358, "step": 3277 }, { "epoch": 0.483659166359277, "grad_norm": 2.0912506580352783, "learning_rate": 1.2294740557595413e-05, "loss": 0.0863, "step": 3278 }, { "epoch": 0.483806713389893, "grad_norm": 1.9119911193847656, "learning_rate": 1.2289726712549817e-05, "loss": 0.0474, "step": 3279 }, { "epoch": 0.48395426042050904, "grad_norm": 2.0787830352783203, "learning_rate": 1.2284712259976472e-05, "loss": 0.0901, "step": 3280 }, { "epoch": 0.48395426042050904, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9576059850374065, "eval_loss": 0.06300003826618195, "eval_precision": 0.9846153846153847, "eval_recall": 0.9320388349514563, "eval_runtime": 49.463, "eval_samples_per_second": 5.883, "eval_steps_per_second": 0.202, "step": 3280 }, { "epoch": 0.484101807451125, "grad_norm": 2.2309439182281494, "learning_rate": 1.2279697201205852e-05, "loss": 0.0561, "step": 3281 }, { "epoch": 0.48424935448174106, "grad_norm": 2.0479013919830322, "learning_rate": 1.2274681537568585e-05, "loss": 0.0606, "step": 3282 }, { "epoch": 0.48439690151235704, "grad_norm": 2.588655710220337, "learning_rate": 1.2269665270395473e-05, "loss": 0.0437, "step": 3283 }, { "epoch": 0.48454444854297307, "grad_norm": 2.420430898666382, "learning_rate": 1.2264648401017465e-05, "loss": 0.0828, "step": 3284 }, { "epoch": 0.4846919955735891, "grad_norm": 2.113615036010742, "learning_rate": 1.2259630930765672e-05, "loss": 0.0277, "step": 3285 }, { "epoch": 0.4848395426042051, "grad_norm": 2.4751641750335693, "learning_rate": 1.2254612860971373e-05, "loss": 0.0676, "step": 3286 }, { "epoch": 0.4849870896348211, "grad_norm": 2.615586757659912, "learning_rate": 1.2249594192966002e-05, "loss": 0.0863, "step": 3287 }, { "epoch": 0.4851346366654371, "grad_norm": 1.8583539724349976, "learning_rate": 1.2244574928081146e-05, "loss": 0.0452, "step": 3288 }, { "epoch": 0.48528218369605314, "grad_norm": 4.22299337387085, "learning_rate": 1.2239555067648551e-05, "loss": 0.0595, "step": 3289 }, { "epoch": 0.4854297307266691, "grad_norm": 1.649377703666687, "learning_rate": 1.2234534613000137e-05, "loss": 0.0734, "step": 3290 }, { "epoch": 0.48557727775728515, "grad_norm": 2.125147581100464, "learning_rate": 1.2229513565467958e-05, "loss": 0.0383, "step": 3291 }, { "epoch": 0.48572482478790113, "grad_norm": 1.9109916687011719, "learning_rate": 1.222449192638424e-05, "loss": 0.0315, "step": 3292 }, { "epoch": 0.48587237181851717, "grad_norm": 1.9446419477462769, "learning_rate": 1.2219469697081365e-05, "loss": 0.0367, "step": 3293 }, { "epoch": 0.48601991884913315, "grad_norm": 2.424917697906494, "learning_rate": 1.2214446878891866e-05, "loss": 0.063, "step": 3294 }, { "epoch": 0.4861674658797492, "grad_norm": 1.5332897901535034, "learning_rate": 1.2209423473148439e-05, "loss": 0.0584, "step": 3295 }, { "epoch": 0.48631501291036516, "grad_norm": 1.4473016262054443, "learning_rate": 1.2204399481183927e-05, "loss": 0.0432, "step": 3296 }, { "epoch": 0.4864625599409812, "grad_norm": 1.2174797058105469, "learning_rate": 1.2199374904331337e-05, "loss": 0.0175, "step": 3297 }, { "epoch": 0.4866101069715972, "grad_norm": 2.287526845932007, "learning_rate": 1.219434974392383e-05, "loss": 0.033, "step": 3298 }, { "epoch": 0.4867576540022132, "grad_norm": 1.1310158967971802, "learning_rate": 1.2189324001294716e-05, "loss": 0.0211, "step": 3299 }, { "epoch": 0.4869052010328292, "grad_norm": 1.383001685142517, "learning_rate": 1.2184297677777463e-05, "loss": 0.0166, "step": 3300 }, { "epoch": 0.4869052010328292, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.061642926186323166, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 48.6871, "eval_samples_per_second": 5.977, "eval_steps_per_second": 0.205, "step": 3300 }, { "epoch": 0.48705274806344523, "grad_norm": 4.178576469421387, "learning_rate": 1.21792707747057e-05, "loss": 0.182, "step": 3301 }, { "epoch": 0.4872002950940612, "grad_norm": 2.3253228664398193, "learning_rate": 1.2174243293413198e-05, "loss": 0.0322, "step": 3302 }, { "epoch": 0.48734784212467724, "grad_norm": 2.2082271575927734, "learning_rate": 1.2169215235233885e-05, "loss": 0.076, "step": 3303 }, { "epoch": 0.4874953891552932, "grad_norm": 2.8886120319366455, "learning_rate": 1.2164186601501848e-05, "loss": 0.0357, "step": 3304 }, { "epoch": 0.48764293618590926, "grad_norm": 1.3837871551513672, "learning_rate": 1.2159157393551319e-05, "loss": 0.0314, "step": 3305 }, { "epoch": 0.4877904832165253, "grad_norm": 3.8915963172912598, "learning_rate": 1.2154127612716688e-05, "loss": 0.1051, "step": 3306 }, { "epoch": 0.4879380302471413, "grad_norm": 2.2977776527404785, "learning_rate": 1.2149097260332494e-05, "loss": 0.0216, "step": 3307 }, { "epoch": 0.4880855772777573, "grad_norm": 2.2296063899993896, "learning_rate": 1.2144066337733432e-05, "loss": 0.0256, "step": 3308 }, { "epoch": 0.4882331243083733, "grad_norm": 2.327798366546631, "learning_rate": 1.2139034846254339e-05, "loss": 0.0636, "step": 3309 }, { "epoch": 0.4883806713389893, "grad_norm": 2.6611835956573486, "learning_rate": 1.213400278723021e-05, "loss": 0.0558, "step": 3310 }, { "epoch": 0.4885282183696053, "grad_norm": 2.4991540908813477, "learning_rate": 1.2128970161996191e-05, "loss": 0.0801, "step": 3311 }, { "epoch": 0.48867576540022134, "grad_norm": 3.6378371715545654, "learning_rate": 1.2123936971887578e-05, "loss": 0.0444, "step": 3312 }, { "epoch": 0.4888233124308373, "grad_norm": 1.7967331409454346, "learning_rate": 1.2118903218239807e-05, "loss": 0.0581, "step": 3313 }, { "epoch": 0.48897085946145336, "grad_norm": 1.1032989025115967, "learning_rate": 1.2113868902388485e-05, "loss": 0.0193, "step": 3314 }, { "epoch": 0.48911840649206934, "grad_norm": 1.5536303520202637, "learning_rate": 1.210883402566935e-05, "loss": 0.0444, "step": 3315 }, { "epoch": 0.48926595352268537, "grad_norm": 2.333468437194824, "learning_rate": 1.2103798589418285e-05, "loss": 0.0504, "step": 3316 }, { "epoch": 0.48941350055330135, "grad_norm": 2.8541698455810547, "learning_rate": 1.209876259497134e-05, "loss": 0.0622, "step": 3317 }, { "epoch": 0.4895610475839174, "grad_norm": 5.411912441253662, "learning_rate": 1.2093726043664701e-05, "loss": 0.0511, "step": 3318 }, { "epoch": 0.48970859461453337, "grad_norm": 2.42372989654541, "learning_rate": 1.2088688936834705e-05, "loss": 0.0447, "step": 3319 }, { "epoch": 0.4898561416451494, "grad_norm": 1.756615400314331, "learning_rate": 1.2083651275817834e-05, "loss": 0.0942, "step": 3320 }, { "epoch": 0.4898561416451494, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9582309582309583, "eval_loss": 0.06117742881178856, "eval_precision": 0.9701492537313433, "eval_recall": 0.9466019417475728, "eval_runtime": 48.7452, "eval_samples_per_second": 5.97, "eval_steps_per_second": 0.205, "step": 3320 }, { "epoch": 0.4900036886757654, "grad_norm": 1.3376930952072144, "learning_rate": 1.207861306195072e-05, "loss": 0.0277, "step": 3321 }, { "epoch": 0.4901512357063814, "grad_norm": 2.201416254043579, "learning_rate": 1.2073574296570141e-05, "loss": 0.0495, "step": 3322 }, { "epoch": 0.4902987827369974, "grad_norm": 1.9058219194412231, "learning_rate": 1.2068534981013013e-05, "loss": 0.0701, "step": 3323 }, { "epoch": 0.49044632976761343, "grad_norm": 2.2066309452056885, "learning_rate": 1.206349511661642e-05, "loss": 0.0826, "step": 3324 }, { "epoch": 0.4905938767982294, "grad_norm": 1.5200670957565308, "learning_rate": 1.2058454704717564e-05, "loss": 0.0436, "step": 3325 }, { "epoch": 0.49074142382884545, "grad_norm": 4.031019687652588, "learning_rate": 1.2053413746653813e-05, "loss": 0.1096, "step": 3326 }, { "epoch": 0.4908889708594614, "grad_norm": 2.4115147590637207, "learning_rate": 1.204837224376267e-05, "loss": 0.0488, "step": 3327 }, { "epoch": 0.49103651789007746, "grad_norm": 3.219503879547119, "learning_rate": 1.2043330197381781e-05, "loss": 0.0903, "step": 3328 }, { "epoch": 0.4911840649206935, "grad_norm": 2.092334747314453, "learning_rate": 1.203828760884895e-05, "loss": 0.0766, "step": 3329 }, { "epoch": 0.4913316119513095, "grad_norm": 4.484579563140869, "learning_rate": 1.2033244479502106e-05, "loss": 0.0603, "step": 3330 }, { "epoch": 0.4914791589819255, "grad_norm": 1.6717311143875122, "learning_rate": 1.2028200810679336e-05, "loss": 0.0277, "step": 3331 }, { "epoch": 0.4916267060125415, "grad_norm": 3.2204461097717285, "learning_rate": 1.2023156603718859e-05, "loss": 0.0703, "step": 3332 }, { "epoch": 0.4917742530431575, "grad_norm": 3.173253297805786, "learning_rate": 1.2018111859959049e-05, "loss": 0.0613, "step": 3333 }, { "epoch": 0.4919218000737735, "grad_norm": 3.0369443893432617, "learning_rate": 1.2013066580738413e-05, "loss": 0.0223, "step": 3334 }, { "epoch": 0.49206934710438954, "grad_norm": 3.397386074066162, "learning_rate": 1.2008020767395601e-05, "loss": 0.0949, "step": 3335 }, { "epoch": 0.4922168941350055, "grad_norm": 7.01145076751709, "learning_rate": 1.200297442126941e-05, "loss": 0.1451, "step": 3336 }, { "epoch": 0.49236444116562156, "grad_norm": 2.1937735080718994, "learning_rate": 1.1997927543698773e-05, "loss": 0.0398, "step": 3337 }, { "epoch": 0.49251198819623754, "grad_norm": 1.7639225721359253, "learning_rate": 1.1992880136022766e-05, "loss": 0.048, "step": 3338 }, { "epoch": 0.4926595352268536, "grad_norm": 3.8040966987609863, "learning_rate": 1.1987832199580605e-05, "loss": 0.0992, "step": 3339 }, { "epoch": 0.49280708225746955, "grad_norm": 2.164365768432617, "learning_rate": 1.1982783735711652e-05, "loss": 0.1136, "step": 3340 }, { "epoch": 0.49280708225746955, "eval_accuracy": 0.9710564399421129, "eval_f1": 0.9504950495049505, "eval_loss": 0.06606943905353546, "eval_precision": 0.9696969696969697, "eval_recall": 0.9320388349514563, "eval_runtime": 49.2326, "eval_samples_per_second": 5.911, "eval_steps_per_second": 0.203, "step": 3340 }, { "epoch": 0.4929546292880856, "grad_norm": 3.3804688453674316, "learning_rate": 1.1977734745755398e-05, "loss": 0.0921, "step": 3341 }, { "epoch": 0.49310217631870157, "grad_norm": 2.3935983180999756, "learning_rate": 1.197268523105148e-05, "loss": 0.1284, "step": 3342 }, { "epoch": 0.4932497233493176, "grad_norm": 1.334424376487732, "learning_rate": 1.1967635192939676e-05, "loss": 0.0408, "step": 3343 }, { "epoch": 0.4933972703799336, "grad_norm": 1.6750712394714355, "learning_rate": 1.1962584632759904e-05, "loss": 0.0512, "step": 3344 }, { "epoch": 0.4935448174105496, "grad_norm": 2.5647964477539062, "learning_rate": 1.195753355185221e-05, "loss": 0.121, "step": 3345 }, { "epoch": 0.4936923644411656, "grad_norm": 1.7515825033187866, "learning_rate": 1.1952481951556787e-05, "loss": 0.0958, "step": 3346 }, { "epoch": 0.49383991147178163, "grad_norm": 2.156981945037842, "learning_rate": 1.1947429833213968e-05, "loss": 0.0775, "step": 3347 }, { "epoch": 0.4939874585023976, "grad_norm": 1.9456983804702759, "learning_rate": 1.1942377198164217e-05, "loss": 0.0661, "step": 3348 }, { "epoch": 0.49413500553301365, "grad_norm": 1.5547610521316528, "learning_rate": 1.1937324047748136e-05, "loss": 0.0382, "step": 3349 }, { "epoch": 0.4942825525636297, "grad_norm": 2.471306324005127, "learning_rate": 1.1932270383306468e-05, "loss": 0.0967, "step": 3350 }, { "epoch": 0.49443009959424566, "grad_norm": 4.576428413391113, "learning_rate": 1.192721620618009e-05, "loss": 0.1027, "step": 3351 }, { "epoch": 0.4945776466248617, "grad_norm": 1.274628758430481, "learning_rate": 1.1922161517710014e-05, "loss": 0.0741, "step": 3352 }, { "epoch": 0.4947251936554777, "grad_norm": 2.1708405017852783, "learning_rate": 1.1917106319237386e-05, "loss": 0.0812, "step": 3353 }, { "epoch": 0.4948727406860937, "grad_norm": 1.9123146533966064, "learning_rate": 1.1912050612103495e-05, "loss": 0.0516, "step": 3354 }, { "epoch": 0.4950202877167097, "grad_norm": 3.2317662239074707, "learning_rate": 1.1906994397649754e-05, "loss": 0.0367, "step": 3355 }, { "epoch": 0.49516783474732573, "grad_norm": 1.662158489227295, "learning_rate": 1.1901937677217719e-05, "loss": 0.0876, "step": 3356 }, { "epoch": 0.4953153817779417, "grad_norm": 2.2653963565826416, "learning_rate": 1.1896880452149077e-05, "loss": 0.1079, "step": 3357 }, { "epoch": 0.49546292880855775, "grad_norm": 1.229744791984558, "learning_rate": 1.1891822723785652e-05, "loss": 0.0672, "step": 3358 }, { "epoch": 0.4956104758391737, "grad_norm": 3.344952344894409, "learning_rate": 1.18867644934694e-05, "loss": 0.0767, "step": 3359 }, { "epoch": 0.49575802286978976, "grad_norm": 1.8295615911483765, "learning_rate": 1.1881705762542404e-05, "loss": 0.0865, "step": 3360 }, { "epoch": 0.49575802286978976, "eval_accuracy": 0.9696092619392185, "eval_f1": 0.9484029484029484, "eval_loss": 0.06659555435180664, "eval_precision": 0.9601990049751243, "eval_recall": 0.9368932038834952, "eval_runtime": 49.1099, "eval_samples_per_second": 5.925, "eval_steps_per_second": 0.204, "step": 3360 }, { "epoch": 0.49590556990040574, "grad_norm": 1.6982154846191406, "learning_rate": 1.1876646532346889e-05, "loss": 0.0318, "step": 3361 }, { "epoch": 0.4960531169310218, "grad_norm": 1.907339334487915, "learning_rate": 1.1871586804225207e-05, "loss": 0.1013, "step": 3362 }, { "epoch": 0.49620066396163776, "grad_norm": 1.2680565118789673, "learning_rate": 1.186652657951984e-05, "loss": 0.0421, "step": 3363 }, { "epoch": 0.4963482109922538, "grad_norm": 1.6503113508224487, "learning_rate": 1.1861465859573413e-05, "loss": 0.093, "step": 3364 }, { "epoch": 0.49649575802286977, "grad_norm": 1.203139305114746, "learning_rate": 1.1856404645728672e-05, "loss": 0.0313, "step": 3365 }, { "epoch": 0.4966433050534858, "grad_norm": 1.1758270263671875, "learning_rate": 1.1851342939328494e-05, "loss": 0.0199, "step": 3366 }, { "epoch": 0.4967908520841018, "grad_norm": 1.7214784622192383, "learning_rate": 1.1846280741715892e-05, "loss": 0.0453, "step": 3367 }, { "epoch": 0.4969383991147178, "grad_norm": 2.7133383750915527, "learning_rate": 1.1841218054234005e-05, "loss": 0.0943, "step": 3368 }, { "epoch": 0.4970859461453338, "grad_norm": 3.3889307975769043, "learning_rate": 1.1836154878226107e-05, "loss": 0.074, "step": 3369 }, { "epoch": 0.49723349317594984, "grad_norm": 1.4902371168136597, "learning_rate": 1.1831091215035593e-05, "loss": 0.0415, "step": 3370 }, { "epoch": 0.4973810402065658, "grad_norm": 1.4596829414367676, "learning_rate": 1.1826027066006e-05, "loss": 0.0386, "step": 3371 }, { "epoch": 0.49752858723718185, "grad_norm": 1.033669352531433, "learning_rate": 1.1820962432480984e-05, "loss": 0.0352, "step": 3372 }, { "epoch": 0.4976761342677979, "grad_norm": 4.543931007385254, "learning_rate": 1.1815897315804328e-05, "loss": 0.0759, "step": 3373 }, { "epoch": 0.49782368129841387, "grad_norm": 3.2970826625823975, "learning_rate": 1.181083171731995e-05, "loss": 0.0591, "step": 3374 }, { "epoch": 0.4979712283290299, "grad_norm": 2.151228427886963, "learning_rate": 1.1805765638371897e-05, "loss": 0.0628, "step": 3375 }, { "epoch": 0.4981187753596459, "grad_norm": 5.1897711753845215, "learning_rate": 1.1800699080304333e-05, "loss": 0.0541, "step": 3376 }, { "epoch": 0.4982663223902619, "grad_norm": 2.8217885494232178, "learning_rate": 1.1795632044461561e-05, "loss": 0.1121, "step": 3377 }, { "epoch": 0.4984138694208779, "grad_norm": 2.626544237136841, "learning_rate": 1.1790564532188004e-05, "loss": 0.0577, "step": 3378 }, { "epoch": 0.49856141645149393, "grad_norm": 6.306614398956299, "learning_rate": 1.1785496544828217e-05, "loss": 0.0775, "step": 3379 }, { "epoch": 0.4987089634821099, "grad_norm": 2.6504967212677, "learning_rate": 1.1780428083726865e-05, "loss": 0.0684, "step": 3380 }, { "epoch": 0.4987089634821099, "eval_accuracy": 0.9681620839363242, "eval_f1": 0.946078431372549, "eval_loss": 0.06167106330394745, "eval_precision": 0.9554455445544554, "eval_recall": 0.9368932038834952, "eval_runtime": 49.1216, "eval_samples_per_second": 5.924, "eval_steps_per_second": 0.204, "step": 3380 }, { "epoch": 0.49885651051272595, "grad_norm": 2.5828070640563965, "learning_rate": 1.1775359150228763e-05, "loss": 0.09, "step": 3381 }, { "epoch": 0.49900405754334193, "grad_norm": 2.398191452026367, "learning_rate": 1.1770289745678833e-05, "loss": 0.065, "step": 3382 }, { "epoch": 0.49915160457395796, "grad_norm": 1.40326726436615, "learning_rate": 1.1765219871422132e-05, "loss": 0.0305, "step": 3383 }, { "epoch": 0.49929915160457394, "grad_norm": 2.270385503768921, "learning_rate": 1.1760149528803835e-05, "loss": 0.0348, "step": 3384 }, { "epoch": 0.49944669863519, "grad_norm": 0.7901647686958313, "learning_rate": 1.1755078719169241e-05, "loss": 0.0186, "step": 3385 }, { "epoch": 0.49959424566580596, "grad_norm": 2.2264490127563477, "learning_rate": 1.1750007443863784e-05, "loss": 0.0558, "step": 3386 }, { "epoch": 0.499741792696422, "grad_norm": 2.380129814147949, "learning_rate": 1.1744935704233005e-05, "loss": 0.0928, "step": 3387 }, { "epoch": 0.499889339727038, "grad_norm": 3.0580735206604004, "learning_rate": 1.173986350162258e-05, "loss": 0.0699, "step": 3388 }, { "epoch": 0.500036886757654, "grad_norm": 1.0945861339569092, "learning_rate": 1.1734790837378309e-05, "loss": 0.0223, "step": 3389 }, { "epoch": 0.50018443378827, "grad_norm": 3.3074185848236084, "learning_rate": 1.1729717712846105e-05, "loss": 0.0863, "step": 3390 }, { "epoch": 0.500331980818886, "grad_norm": 2.1055445671081543, "learning_rate": 1.1724644129372007e-05, "loss": 0.0685, "step": 3391 }, { "epoch": 0.500479527849502, "grad_norm": 2.0396888256073, "learning_rate": 1.1719570088302178e-05, "loss": 0.065, "step": 3392 }, { "epoch": 0.500627074880118, "grad_norm": 4.517836570739746, "learning_rate": 1.1714495590982904e-05, "loss": 0.0903, "step": 3393 }, { "epoch": 0.5007746219107341, "grad_norm": 2.1543447971343994, "learning_rate": 1.1709420638760586e-05, "loss": 0.0517, "step": 3394 }, { "epoch": 0.5009221689413501, "grad_norm": 1.9930365085601807, "learning_rate": 1.170434523298175e-05, "loss": 0.0399, "step": 3395 }, { "epoch": 0.501069715971966, "grad_norm": 1.5285929441452026, "learning_rate": 1.1699269374993043e-05, "loss": 0.0299, "step": 3396 }, { "epoch": 0.5012172630025821, "grad_norm": 3.59216046333313, "learning_rate": 1.169419306614123e-05, "loss": 0.0625, "step": 3397 }, { "epoch": 0.5013648100331981, "grad_norm": 1.5272828340530396, "learning_rate": 1.1689116307773194e-05, "loss": 0.0613, "step": 3398 }, { "epoch": 0.5015123570638141, "grad_norm": 3.010084390640259, "learning_rate": 1.1684039101235938e-05, "loss": 0.1231, "step": 3399 }, { "epoch": 0.5016599040944301, "grad_norm": 1.7401190996170044, "learning_rate": 1.1678961447876588e-05, "loss": 0.0556, "step": 3400 }, { "epoch": 0.5016599040944301, "eval_accuracy": 0.9681620839363242, "eval_f1": 0.946078431372549, "eval_loss": 0.06494998186826706, "eval_precision": 0.9554455445544554, "eval_recall": 0.9368932038834952, "eval_runtime": 49.2005, "eval_samples_per_second": 5.915, "eval_steps_per_second": 0.203, "step": 3400 }, { "epoch": 0.5018074511250461, "grad_norm": 1.2393238544464111, "learning_rate": 1.1673883349042388e-05, "loss": 0.0372, "step": 3401 }, { "epoch": 0.5019549981556621, "grad_norm": 3.2965199947357178, "learning_rate": 1.1668804806080693e-05, "loss": 0.0383, "step": 3402 }, { "epoch": 0.5021025451862782, "grad_norm": 2.1697685718536377, "learning_rate": 1.1663725820338985e-05, "loss": 0.0325, "step": 3403 }, { "epoch": 0.5022500922168941, "grad_norm": 3.793184995651245, "learning_rate": 1.1658646393164853e-05, "loss": 0.0689, "step": 3404 }, { "epoch": 0.5023976392475101, "grad_norm": 4.530444145202637, "learning_rate": 1.1653566525906018e-05, "loss": 0.0915, "step": 3405 }, { "epoch": 0.5025451862781262, "grad_norm": 2.261080026626587, "learning_rate": 1.1648486219910297e-05, "loss": 0.0843, "step": 3406 }, { "epoch": 0.5026927333087422, "grad_norm": 3.6483511924743652, "learning_rate": 1.1643405476525648e-05, "loss": 0.0959, "step": 3407 }, { "epoch": 0.5028402803393581, "grad_norm": 3.5210161209106445, "learning_rate": 1.1638324297100129e-05, "loss": 0.0576, "step": 3408 }, { "epoch": 0.5029878273699742, "grad_norm": 2.061551094055176, "learning_rate": 1.1633242682981914e-05, "loss": 0.0625, "step": 3409 }, { "epoch": 0.5031353744005902, "grad_norm": 2.662553071975708, "learning_rate": 1.1628160635519298e-05, "loss": 0.096, "step": 3410 }, { "epoch": 0.5032829214312062, "grad_norm": 1.3100721836090088, "learning_rate": 1.1623078156060688e-05, "loss": 0.0342, "step": 3411 }, { "epoch": 0.5034304684618222, "grad_norm": 1.8624759912490845, "learning_rate": 1.1617995245954604e-05, "loss": 0.0368, "step": 3412 }, { "epoch": 0.5035780154924382, "grad_norm": 4.2568440437316895, "learning_rate": 1.1612911906549687e-05, "loss": 0.084, "step": 3413 }, { "epoch": 0.5037255625230542, "grad_norm": 1.8648234605789185, "learning_rate": 1.1607828139194683e-05, "loss": 0.0643, "step": 3414 }, { "epoch": 0.5038731095536703, "grad_norm": 3.886632204055786, "learning_rate": 1.160274394523846e-05, "loss": 0.0728, "step": 3415 }, { "epoch": 0.5040206565842862, "grad_norm": 1.649542212486267, "learning_rate": 1.159765932602999e-05, "loss": 0.0425, "step": 3416 }, { "epoch": 0.5041682036149022, "grad_norm": 3.0311574935913086, "learning_rate": 1.159257428291837e-05, "loss": 0.0612, "step": 3417 }, { "epoch": 0.5043157506455183, "grad_norm": 2.325615406036377, "learning_rate": 1.1587488817252796e-05, "loss": 0.0544, "step": 3418 }, { "epoch": 0.5044632976761343, "grad_norm": 2.380730152130127, "learning_rate": 1.1582402930382585e-05, "loss": 0.0806, "step": 3419 }, { "epoch": 0.5046108447067503, "grad_norm": 2.337064504623413, "learning_rate": 1.1577316623657163e-05, "loss": 0.079, "step": 3420 }, { "epoch": 0.5046108447067503, "eval_accuracy": 0.9710564399421129, "eval_f1": 0.9512195121951219, "eval_loss": 0.06396031379699707, "eval_precision": 0.9558823529411765, "eval_recall": 0.9466019417475728, "eval_runtime": 49.329, "eval_samples_per_second": 5.899, "eval_steps_per_second": 0.203, "step": 3420 }, { "epoch": 0.5047583917373663, "grad_norm": 0.9309882521629333, "learning_rate": 1.1572229898426068e-05, "loss": 0.0165, "step": 3421 }, { "epoch": 0.5049059387679823, "grad_norm": 2.355309247970581, "learning_rate": 1.1567142756038954e-05, "loss": 0.0363, "step": 3422 }, { "epoch": 0.5050534857985983, "grad_norm": 1.6004250049591064, "learning_rate": 1.1562055197845567e-05, "loss": 0.0368, "step": 3423 }, { "epoch": 0.5052010328292144, "grad_norm": 3.023813486099243, "learning_rate": 1.1556967225195787e-05, "loss": 0.0749, "step": 3424 }, { "epoch": 0.5053485798598303, "grad_norm": 1.7470098733901978, "learning_rate": 1.1551878839439592e-05, "loss": 0.0399, "step": 3425 }, { "epoch": 0.5054961268904463, "grad_norm": 4.3731560707092285, "learning_rate": 1.1546790041927072e-05, "loss": 0.1128, "step": 3426 }, { "epoch": 0.5056436739210624, "grad_norm": 3.562906265258789, "learning_rate": 1.1541700834008423e-05, "loss": 0.0642, "step": 3427 }, { "epoch": 0.5057912209516784, "grad_norm": 2.7933309078216553, "learning_rate": 1.1536611217033953e-05, "loss": 0.0836, "step": 3428 }, { "epoch": 0.5059387679822943, "grad_norm": 6.0216546058654785, "learning_rate": 1.1531521192354083e-05, "loss": 0.1012, "step": 3429 }, { "epoch": 0.5060863150129103, "grad_norm": 1.867408275604248, "learning_rate": 1.1526430761319328e-05, "loss": 0.0672, "step": 3430 }, { "epoch": 0.5062338620435264, "grad_norm": 5.641247749328613, "learning_rate": 1.1521339925280326e-05, "loss": 0.1349, "step": 3431 }, { "epoch": 0.5063814090741424, "grad_norm": 4.582265853881836, "learning_rate": 1.1516248685587815e-05, "loss": 0.0876, "step": 3432 }, { "epoch": 0.5065289561047583, "grad_norm": 3.5694081783294678, "learning_rate": 1.1511157043592642e-05, "loss": 0.0701, "step": 3433 }, { "epoch": 0.5066765031353744, "grad_norm": 2.2162024974823, "learning_rate": 1.1506065000645758e-05, "loss": 0.0705, "step": 3434 }, { "epoch": 0.5068240501659904, "grad_norm": 1.5952503681182861, "learning_rate": 1.1500972558098229e-05, "loss": 0.0464, "step": 3435 }, { "epoch": 0.5069715971966064, "grad_norm": 1.7615381479263306, "learning_rate": 1.1495879717301217e-05, "loss": 0.0403, "step": 3436 }, { "epoch": 0.5071191442272224, "grad_norm": 2.962190866470337, "learning_rate": 1.149078647960599e-05, "loss": 0.0682, "step": 3437 }, { "epoch": 0.5072666912578384, "grad_norm": 2.2013590335845947, "learning_rate": 1.1485692846363927e-05, "loss": 0.0723, "step": 3438 }, { "epoch": 0.5074142382884544, "grad_norm": 2.3486366271972656, "learning_rate": 1.1480598818926511e-05, "loss": 0.0722, "step": 3439 }, { "epoch": 0.5075617853190705, "grad_norm": 5.332746505737305, "learning_rate": 1.1475504398645328e-05, "loss": 0.0976, "step": 3440 }, { "epoch": 0.5075617853190705, "eval_accuracy": 0.9696092619392185, "eval_f1": 0.9484029484029484, "eval_loss": 0.06323620676994324, "eval_precision": 0.9601990049751243, "eval_recall": 0.9368932038834952, "eval_runtime": 48.7257, "eval_samples_per_second": 5.972, "eval_steps_per_second": 0.205, "step": 3440 }, { "epoch": 0.5077093323496865, "grad_norm": 5.610714435577393, "learning_rate": 1.1470409586872067e-05, "loss": 0.0861, "step": 3441 }, { "epoch": 0.5078568793803024, "grad_norm": 4.403682231903076, "learning_rate": 1.1465314384958521e-05, "loss": 0.1044, "step": 3442 }, { "epoch": 0.5080044264109185, "grad_norm": 1.1849644184112549, "learning_rate": 1.1460218794256594e-05, "loss": 0.0313, "step": 3443 }, { "epoch": 0.5081519734415345, "grad_norm": 2.254742383956909, "learning_rate": 1.1455122816118281e-05, "loss": 0.0532, "step": 3444 }, { "epoch": 0.5082995204721505, "grad_norm": 3.868600368499756, "learning_rate": 1.1450026451895686e-05, "loss": 0.0209, "step": 3445 }, { "epoch": 0.5084470675027665, "grad_norm": 2.215444326400757, "learning_rate": 1.1444929702941019e-05, "loss": 0.0761, "step": 3446 }, { "epoch": 0.5085946145333825, "grad_norm": 2.021327018737793, "learning_rate": 1.1439832570606585e-05, "loss": 0.0784, "step": 3447 }, { "epoch": 0.5087421615639985, "grad_norm": 1.8668911457061768, "learning_rate": 1.1434735056244795e-05, "loss": 0.0389, "step": 3448 }, { "epoch": 0.5088897085946146, "grad_norm": 1.9178575277328491, "learning_rate": 1.142963716120816e-05, "loss": 0.0527, "step": 3449 }, { "epoch": 0.5090372556252305, "grad_norm": 2.1388702392578125, "learning_rate": 1.1424538886849294e-05, "loss": 0.0997, "step": 3450 }, { "epoch": 0.5091848026558465, "grad_norm": 1.4415851831436157, "learning_rate": 1.1419440234520906e-05, "loss": 0.0421, "step": 3451 }, { "epoch": 0.5093323496864626, "grad_norm": 3.691077947616577, "learning_rate": 1.1414341205575817e-05, "loss": 0.1069, "step": 3452 }, { "epoch": 0.5094798967170786, "grad_norm": 3.339951753616333, "learning_rate": 1.1409241801366932e-05, "loss": 0.0795, "step": 3453 }, { "epoch": 0.5096274437476945, "grad_norm": 3.0262269973754883, "learning_rate": 1.140414202324727e-05, "loss": 0.0616, "step": 3454 }, { "epoch": 0.5097749907783106, "grad_norm": 1.3839607238769531, "learning_rate": 1.1399041872569938e-05, "loss": 0.0338, "step": 3455 }, { "epoch": 0.5099225378089266, "grad_norm": 1.7392637729644775, "learning_rate": 1.1393941350688147e-05, "loss": 0.0559, "step": 3456 }, { "epoch": 0.5100700848395426, "grad_norm": 1.4031736850738525, "learning_rate": 1.1388840458955212e-05, "loss": 0.0362, "step": 3457 }, { "epoch": 0.5102176318701586, "grad_norm": 2.8859026432037354, "learning_rate": 1.1383739198724537e-05, "loss": 0.1129, "step": 3458 }, { "epoch": 0.5103651789007746, "grad_norm": 2.3893625736236572, "learning_rate": 1.1378637571349626e-05, "loss": 0.1154, "step": 3459 }, { "epoch": 0.5105127259313906, "grad_norm": 2.337246894836426, "learning_rate": 1.1373535578184083e-05, "loss": 0.0756, "step": 3460 }, { "epoch": 0.5105127259313906, "eval_accuracy": 0.9710564399421129, "eval_f1": 0.9509803921568627, "eval_loss": 0.06456489115953445, "eval_precision": 0.9603960396039604, "eval_recall": 0.941747572815534, "eval_runtime": 49.1501, "eval_samples_per_second": 5.921, "eval_steps_per_second": 0.203, "step": 3460 }, { "epoch": 0.5106602729620067, "grad_norm": 2.6703622341156006, "learning_rate": 1.136843322058161e-05, "loss": 0.0705, "step": 3461 }, { "epoch": 0.5108078199926227, "grad_norm": 2.123413562774658, "learning_rate": 1.1363330499895996e-05, "loss": 0.0663, "step": 3462 }, { "epoch": 0.5109553670232386, "grad_norm": 2.953259229660034, "learning_rate": 1.1358227417481143e-05, "loss": 0.0867, "step": 3463 }, { "epoch": 0.5111029140538547, "grad_norm": 1.2372174263000488, "learning_rate": 1.1353123974691034e-05, "loss": 0.0416, "step": 3464 }, { "epoch": 0.5112504610844707, "grad_norm": 1.8095917701721191, "learning_rate": 1.1348020172879755e-05, "loss": 0.0484, "step": 3465 }, { "epoch": 0.5113980081150867, "grad_norm": 2.4721059799194336, "learning_rate": 1.1342916013401486e-05, "loss": 0.0664, "step": 3466 }, { "epoch": 0.5115455551457027, "grad_norm": 0.934148907661438, "learning_rate": 1.13378114976105e-05, "loss": 0.0274, "step": 3467 }, { "epoch": 0.5116931021763187, "grad_norm": 1.3078500032424927, "learning_rate": 1.1332706626861167e-05, "loss": 0.0392, "step": 3468 }, { "epoch": 0.5118406492069347, "grad_norm": 2.1768994331359863, "learning_rate": 1.1327601402507953e-05, "loss": 0.0382, "step": 3469 }, { "epoch": 0.5119881962375508, "grad_norm": 8.157400131225586, "learning_rate": 1.1322495825905406e-05, "loss": 0.0549, "step": 3470 }, { "epoch": 0.5121357432681667, "grad_norm": 1.0004053115844727, "learning_rate": 1.1317389898408188e-05, "loss": 0.0223, "step": 3471 }, { "epoch": 0.5122832902987827, "grad_norm": 2.1725616455078125, "learning_rate": 1.1312283621371036e-05, "loss": 0.0589, "step": 3472 }, { "epoch": 0.5124308373293988, "grad_norm": 1.3505425453186035, "learning_rate": 1.130717699614879e-05, "loss": 0.0352, "step": 3473 }, { "epoch": 0.5125783843600148, "grad_norm": 4.985128402709961, "learning_rate": 1.1302070024096374e-05, "loss": 0.0559, "step": 3474 }, { "epoch": 0.5127259313906307, "grad_norm": 2.157815456390381, "learning_rate": 1.1296962706568814e-05, "loss": 0.0188, "step": 3475 }, { "epoch": 0.5128734784212468, "grad_norm": 2.7989468574523926, "learning_rate": 1.1291855044921218e-05, "loss": 0.0461, "step": 3476 }, { "epoch": 0.5130210254518628, "grad_norm": 2.34574031829834, "learning_rate": 1.128674704050879e-05, "loss": 0.0784, "step": 3477 }, { "epoch": 0.5131685724824788, "grad_norm": 1.7945078611373901, "learning_rate": 1.1281638694686827e-05, "loss": 0.0593, "step": 3478 }, { "epoch": 0.5133161195130947, "grad_norm": 2.6016180515289307, "learning_rate": 1.1276530008810718e-05, "loss": 0.1086, "step": 3479 }, { "epoch": 0.5134636665437108, "grad_norm": 2.5083601474761963, "learning_rate": 1.1271420984235928e-05, "loss": 0.0786, "step": 3480 }, { "epoch": 0.5134636665437108, "eval_accuracy": 0.9681620839363242, "eval_f1": 0.9455445544554455, "eval_loss": 0.06514859199523926, "eval_precision": 0.9646464646464646, "eval_recall": 0.9271844660194175, "eval_runtime": 49.166, "eval_samples_per_second": 5.919, "eval_steps_per_second": 0.203, "step": 3480 }, { "epoch": 0.5136112135743268, "grad_norm": 3.769692897796631, "learning_rate": 1.126631162231803e-05, "loss": 0.0271, "step": 3481 }, { "epoch": 0.5137587606049429, "grad_norm": 1.8234009742736816, "learning_rate": 1.1261201924412677e-05, "loss": 0.0455, "step": 3482 }, { "epoch": 0.5139063076355588, "grad_norm": 2.0844228267669678, "learning_rate": 1.1256091891875616e-05, "loss": 0.0673, "step": 3483 }, { "epoch": 0.5140538546661748, "grad_norm": 1.3127630949020386, "learning_rate": 1.1250981526062672e-05, "loss": 0.0451, "step": 3484 }, { "epoch": 0.5142014016967908, "grad_norm": 2.6537387371063232, "learning_rate": 1.1245870828329775e-05, "loss": 0.0963, "step": 3485 }, { "epoch": 0.5143489487274069, "grad_norm": 2.382176160812378, "learning_rate": 1.1240759800032929e-05, "loss": 0.0614, "step": 3486 }, { "epoch": 0.5144964957580229, "grad_norm": 2.104100227355957, "learning_rate": 1.1235648442528232e-05, "loss": 0.0351, "step": 3487 }, { "epoch": 0.5146440427886388, "grad_norm": 2.1463024616241455, "learning_rate": 1.1230536757171871e-05, "loss": 0.0708, "step": 3488 }, { "epoch": 0.5147915898192549, "grad_norm": 0.8108016848564148, "learning_rate": 1.1225424745320113e-05, "loss": 0.0116, "step": 3489 }, { "epoch": 0.5149391368498709, "grad_norm": 2.8194987773895264, "learning_rate": 1.122031240832932e-05, "loss": 0.0578, "step": 3490 }, { "epoch": 0.515086683880487, "grad_norm": 1.612223505973816, "learning_rate": 1.1215199747555932e-05, "loss": 0.0583, "step": 3491 }, { "epoch": 0.5152342309111029, "grad_norm": 7.699445724487305, "learning_rate": 1.121008676435648e-05, "loss": 0.0716, "step": 3492 }, { "epoch": 0.5153817779417189, "grad_norm": 2.0828969478607178, "learning_rate": 1.1204973460087587e-05, "loss": 0.0631, "step": 3493 }, { "epoch": 0.515529324972335, "grad_norm": 1.2769685983657837, "learning_rate": 1.1199859836105941e-05, "loss": 0.009, "step": 3494 }, { "epoch": 0.515676872002951, "grad_norm": 7.922608852386475, "learning_rate": 1.119474589376834e-05, "loss": 0.082, "step": 3495 }, { "epoch": 0.5158244190335669, "grad_norm": 1.4526466131210327, "learning_rate": 1.1189631634431649e-05, "loss": 0.0181, "step": 3496 }, { "epoch": 0.5159719660641829, "grad_norm": 1.4317309856414795, "learning_rate": 1.118451705945282e-05, "loss": 0.0333, "step": 3497 }, { "epoch": 0.516119513094799, "grad_norm": 3.841702938079834, "learning_rate": 1.1179402170188893e-05, "loss": 0.1089, "step": 3498 }, { "epoch": 0.516267060125415, "grad_norm": 3.362285852432251, "learning_rate": 1.1174286967996988e-05, "loss": 0.0657, "step": 3499 }, { "epoch": 0.5164146071560309, "grad_norm": 2.0894968509674072, "learning_rate": 1.1169171454234315e-05, "loss": 0.0663, "step": 3500 }, { "epoch": 0.5164146071560309, "eval_accuracy": 0.9710564399421129, "eval_f1": 0.9512195121951219, "eval_loss": 0.05881144851446152, "eval_precision": 0.9558823529411765, "eval_recall": 0.9466019417475728, "eval_runtime": 49.3038, "eval_samples_per_second": 5.902, "eval_steps_per_second": 0.203, "step": 3500 }, { "epoch": 0.516562154186647, "grad_norm": 2.469064712524414, "learning_rate": 1.1164055630258155e-05, "loss": 0.1101, "step": 3501 }, { "epoch": 0.516709701217263, "grad_norm": 1.4656027555465698, "learning_rate": 1.1158939497425881e-05, "loss": 0.037, "step": 3502 }, { "epoch": 0.516857248247879, "grad_norm": 3.337543249130249, "learning_rate": 1.1153823057094941e-05, "loss": 0.0785, "step": 3503 }, { "epoch": 0.517004795278495, "grad_norm": 2.9996700286865234, "learning_rate": 1.1148706310622872e-05, "loss": 0.0834, "step": 3504 }, { "epoch": 0.517152342309111, "grad_norm": 1.8667278289794922, "learning_rate": 1.1143589259367287e-05, "loss": 0.0438, "step": 3505 }, { "epoch": 0.517299889339727, "grad_norm": 3.0531399250030518, "learning_rate": 1.1138471904685875e-05, "loss": 0.0689, "step": 3506 }, { "epoch": 0.5174474363703431, "grad_norm": 2.248572587966919, "learning_rate": 1.1133354247936423e-05, "loss": 0.0348, "step": 3507 }, { "epoch": 0.5175949834009591, "grad_norm": 2.1095054149627686, "learning_rate": 1.1128236290476778e-05, "loss": 0.0305, "step": 3508 }, { "epoch": 0.517742530431575, "grad_norm": 4.427886009216309, "learning_rate": 1.1123118033664877e-05, "loss": 0.0373, "step": 3509 }, { "epoch": 0.5178900774621911, "grad_norm": 1.3057057857513428, "learning_rate": 1.1117999478858737e-05, "loss": 0.0132, "step": 3510 }, { "epoch": 0.5180376244928071, "grad_norm": 3.117243766784668, "learning_rate": 1.1112880627416449e-05, "loss": 0.0896, "step": 3511 }, { "epoch": 0.5181851715234231, "grad_norm": 1.9899787902832031, "learning_rate": 1.110776148069619e-05, "loss": 0.0578, "step": 3512 }, { "epoch": 0.5183327185540391, "grad_norm": 8.810548782348633, "learning_rate": 1.1102642040056206e-05, "loss": 0.1897, "step": 3513 }, { "epoch": 0.5184802655846551, "grad_norm": 1.2062597274780273, "learning_rate": 1.1097522306854831e-05, "loss": 0.0264, "step": 3514 }, { "epoch": 0.5186278126152711, "grad_norm": 1.4792219400405884, "learning_rate": 1.1092402282450468e-05, "loss": 0.0592, "step": 3515 }, { "epoch": 0.5187753596458872, "grad_norm": 2.792485475540161, "learning_rate": 1.1087281968201603e-05, "loss": 0.06, "step": 3516 }, { "epoch": 0.5189229066765031, "grad_norm": 2.4529595375061035, "learning_rate": 1.1082161365466797e-05, "loss": 0.0824, "step": 3517 }, { "epoch": 0.5190704537071191, "grad_norm": 3.275951623916626, "learning_rate": 1.1077040475604684e-05, "loss": 0.0914, "step": 3518 }, { "epoch": 0.5192180007377352, "grad_norm": 1.783270001411438, "learning_rate": 1.107191929997398e-05, "loss": 0.0651, "step": 3519 }, { "epoch": 0.5193655477683512, "grad_norm": 2.4270193576812744, "learning_rate": 1.1066797839933473e-05, "loss": 0.1079, "step": 3520 }, { "epoch": 0.5193655477683512, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9580246913580247, "eval_loss": 0.05977718532085419, "eval_precision": 0.9748743718592965, "eval_recall": 0.941747572815534, "eval_runtime": 50.4104, "eval_samples_per_second": 5.773, "eval_steps_per_second": 0.198, "step": 3520 }, { "epoch": 0.5195130947989671, "grad_norm": 3.448223829269409, "learning_rate": 1.1061676096842033e-05, "loss": 0.067, "step": 3521 }, { "epoch": 0.5196606418295832, "grad_norm": 3.3494110107421875, "learning_rate": 1.1056554072058596e-05, "loss": 0.0937, "step": 3522 }, { "epoch": 0.5198081888601992, "grad_norm": 2.503899097442627, "learning_rate": 1.1051431766942176e-05, "loss": 0.0774, "step": 3523 }, { "epoch": 0.5199557358908152, "grad_norm": 5.1114821434021, "learning_rate": 1.1046309182851863e-05, "loss": 0.0394, "step": 3524 }, { "epoch": 0.5201032829214312, "grad_norm": 1.1896251440048218, "learning_rate": 1.1041186321146818e-05, "loss": 0.0321, "step": 3525 }, { "epoch": 0.5202508299520472, "grad_norm": 1.3631348609924316, "learning_rate": 1.103606318318628e-05, "loss": 0.048, "step": 3526 }, { "epoch": 0.5203983769826632, "grad_norm": 1.6505790948867798, "learning_rate": 1.1030939770329557e-05, "loss": 0.0526, "step": 3527 }, { "epoch": 0.5205459240132793, "grad_norm": 1.5522688627243042, "learning_rate": 1.1025816083936036e-05, "loss": 0.0587, "step": 3528 }, { "epoch": 0.5206934710438953, "grad_norm": 3.606168270111084, "learning_rate": 1.1020692125365171e-05, "loss": 0.1196, "step": 3529 }, { "epoch": 0.5208410180745112, "grad_norm": 2.4806244373321533, "learning_rate": 1.1015567895976486e-05, "loss": 0.0705, "step": 3530 }, { "epoch": 0.5209885651051273, "grad_norm": 1.4611976146697998, "learning_rate": 1.1010443397129584e-05, "loss": 0.0367, "step": 3531 }, { "epoch": 0.5211361121357433, "grad_norm": 2.171333074569702, "learning_rate": 1.100531863018414e-05, "loss": 0.0199, "step": 3532 }, { "epoch": 0.5212836591663593, "grad_norm": 1.693715214729309, "learning_rate": 1.1000193596499886e-05, "loss": 0.0364, "step": 3533 }, { "epoch": 0.5214312061969752, "grad_norm": 2.185756206512451, "learning_rate": 1.0995068297436643e-05, "loss": 0.0883, "step": 3534 }, { "epoch": 0.5215787532275913, "grad_norm": 5.272037982940674, "learning_rate": 1.098994273435429e-05, "loss": 0.0791, "step": 3535 }, { "epoch": 0.5217263002582073, "grad_norm": 3.4218335151672363, "learning_rate": 1.0984816908612787e-05, "loss": 0.0471, "step": 3536 }, { "epoch": 0.5218738472888234, "grad_norm": 2.2550787925720215, "learning_rate": 1.097969082157215e-05, "loss": 0.0442, "step": 3537 }, { "epoch": 0.5220213943194393, "grad_norm": 4.873686790466309, "learning_rate": 1.0974564474592475e-05, "loss": 0.071, "step": 3538 }, { "epoch": 0.5221689413500553, "grad_norm": 2.3027725219726562, "learning_rate": 1.0969437869033923e-05, "loss": 0.0323, "step": 3539 }, { "epoch": 0.5223164883806714, "grad_norm": 1.5242536067962646, "learning_rate": 1.0964311006256725e-05, "loss": 0.0547, "step": 3540 }, { "epoch": 0.5223164883806714, "eval_accuracy": 0.9739507959479016, "eval_f1": 0.9558823529411765, "eval_loss": 0.057289425283670425, "eval_precision": 0.9653465346534653, "eval_recall": 0.9466019417475728, "eval_runtime": 49.7325, "eval_samples_per_second": 5.851, "eval_steps_per_second": 0.201, "step": 3540 }, { "epoch": 0.5224640354112874, "grad_norm": 1.0918387174606323, "learning_rate": 1.0959183887621177e-05, "loss": 0.0143, "step": 3541 }, { "epoch": 0.5226115824419033, "grad_norm": 2.3980836868286133, "learning_rate": 1.0954056514487645e-05, "loss": 0.061, "step": 3542 }, { "epoch": 0.5227591294725193, "grad_norm": 4.099221706390381, "learning_rate": 1.094892888821657e-05, "loss": 0.0662, "step": 3543 }, { "epoch": 0.5229066765031354, "grad_norm": 3.6991629600524902, "learning_rate": 1.094380101016844e-05, "loss": 0.111, "step": 3544 }, { "epoch": 0.5230542235337514, "grad_norm": 2.514206886291504, "learning_rate": 1.0938672881703833e-05, "loss": 0.0417, "step": 3545 }, { "epoch": 0.5232017705643673, "grad_norm": 2.7861063480377197, "learning_rate": 1.093354450418338e-05, "loss": 0.0457, "step": 3546 }, { "epoch": 0.5233493175949834, "grad_norm": 2.9248032569885254, "learning_rate": 1.0928415878967781e-05, "loss": 0.0946, "step": 3547 }, { "epoch": 0.5234968646255994, "grad_norm": 1.7036411762237549, "learning_rate": 1.0923287007417802e-05, "loss": 0.0275, "step": 3548 }, { "epoch": 0.5236444116562154, "grad_norm": 1.976135492324829, "learning_rate": 1.0918157890894274e-05, "loss": 0.0384, "step": 3549 }, { "epoch": 0.5237919586868315, "grad_norm": 2.986879587173462, "learning_rate": 1.0913028530758096e-05, "loss": 0.0647, "step": 3550 }, { "epoch": 0.5239395057174474, "grad_norm": 4.570195198059082, "learning_rate": 1.0907898928370222e-05, "loss": 0.1014, "step": 3551 }, { "epoch": 0.5240870527480634, "grad_norm": 2.2751247882843018, "learning_rate": 1.0902769085091687e-05, "loss": 0.0924, "step": 3552 }, { "epoch": 0.5242345997786795, "grad_norm": 1.7858340740203857, "learning_rate": 1.089763900228357e-05, "loss": 0.0527, "step": 3553 }, { "epoch": 0.5243821468092955, "grad_norm": 1.6592340469360352, "learning_rate": 1.0892508681307032e-05, "loss": 0.0619, "step": 3554 }, { "epoch": 0.5245296938399114, "grad_norm": 2.0850613117218018, "learning_rate": 1.0887378123523287e-05, "loss": 0.0467, "step": 3555 }, { "epoch": 0.5246772408705275, "grad_norm": 4.593194961547852, "learning_rate": 1.0882247330293607e-05, "loss": 0.0592, "step": 3556 }, { "epoch": 0.5248247879011435, "grad_norm": 3.315997838973999, "learning_rate": 1.0877116302979345e-05, "loss": 0.0922, "step": 3557 }, { "epoch": 0.5249723349317595, "grad_norm": 2.3332576751708984, "learning_rate": 1.0871985042941893e-05, "loss": 0.0618, "step": 3558 }, { "epoch": 0.5251198819623755, "grad_norm": 5.566976547241211, "learning_rate": 1.0866853551542723e-05, "loss": 0.1125, "step": 3559 }, { "epoch": 0.5252674289929915, "grad_norm": 1.3656704425811768, "learning_rate": 1.086172183014336e-05, "loss": 0.0204, "step": 3560 }, { "epoch": 0.5252674289929915, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9582309582309583, "eval_loss": 0.05937637761235237, "eval_precision": 0.9701492537313433, "eval_recall": 0.9466019417475728, "eval_runtime": 50.7983, "eval_samples_per_second": 5.729, "eval_steps_per_second": 0.197, "step": 3560 }, { "epoch": 0.5254149760236075, "grad_norm": 1.3643617630004883, "learning_rate": 1.0856589880105388e-05, "loss": 0.0296, "step": 3561 }, { "epoch": 0.5255625230542236, "grad_norm": 1.5215461254119873, "learning_rate": 1.085145770279046e-05, "loss": 0.0296, "step": 3562 }, { "epoch": 0.5257100700848395, "grad_norm": 2.7468132972717285, "learning_rate": 1.0846325299560278e-05, "loss": 0.0865, "step": 3563 }, { "epoch": 0.5258576171154555, "grad_norm": 2.774982452392578, "learning_rate": 1.0841192671776621e-05, "loss": 0.0902, "step": 3564 }, { "epoch": 0.5260051641460716, "grad_norm": 2.7266316413879395, "learning_rate": 1.0836059820801305e-05, "loss": 0.1313, "step": 3565 }, { "epoch": 0.5261527111766876, "grad_norm": 1.377890706062317, "learning_rate": 1.0830926747996225e-05, "loss": 0.0301, "step": 3566 }, { "epoch": 0.5263002582073035, "grad_norm": 1.1492201089859009, "learning_rate": 1.0825793454723325e-05, "loss": 0.0336, "step": 3567 }, { "epoch": 0.5264478052379196, "grad_norm": 2.800081968307495, "learning_rate": 1.0820659942344609e-05, "loss": 0.0639, "step": 3568 }, { "epoch": 0.5265953522685356, "grad_norm": 1.6292766332626343, "learning_rate": 1.0815526212222141e-05, "loss": 0.0731, "step": 3569 }, { "epoch": 0.5267428992991516, "grad_norm": 1.7465286254882812, "learning_rate": 1.0810392265718034e-05, "loss": 0.0214, "step": 3570 }, { "epoch": 0.5268904463297676, "grad_norm": 4.0834197998046875, "learning_rate": 1.080525810419448e-05, "loss": 0.0991, "step": 3571 }, { "epoch": 0.5270379933603836, "grad_norm": 1.60231614112854, "learning_rate": 1.0800123729013703e-05, "loss": 0.0668, "step": 3572 }, { "epoch": 0.5271855403909996, "grad_norm": 2.3066658973693848, "learning_rate": 1.0794989141537997e-05, "loss": 0.1016, "step": 3573 }, { "epoch": 0.5273330874216157, "grad_norm": 1.236547589302063, "learning_rate": 1.078985434312971e-05, "loss": 0.0292, "step": 3574 }, { "epoch": 0.5274806344522317, "grad_norm": 1.3361313343048096, "learning_rate": 1.0784719335151248e-05, "loss": 0.0466, "step": 3575 }, { "epoch": 0.5276281814828476, "grad_norm": 1.3268588781356812, "learning_rate": 1.0779584118965068e-05, "loss": 0.0321, "step": 3576 }, { "epoch": 0.5277757285134637, "grad_norm": 1.567400336265564, "learning_rate": 1.0774448695933683e-05, "loss": 0.0688, "step": 3577 }, { "epoch": 0.5279232755440797, "grad_norm": 2.6386070251464844, "learning_rate": 1.0769313067419666e-05, "loss": 0.016, "step": 3578 }, { "epoch": 0.5280708225746957, "grad_norm": 2.8470680713653564, "learning_rate": 1.0764177234785644e-05, "loss": 0.0581, "step": 3579 }, { "epoch": 0.5282183696053117, "grad_norm": 1.7170813083648682, "learning_rate": 1.0759041199394289e-05, "loss": 0.0595, "step": 3580 }, { "epoch": 0.5282183696053117, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9578163771712159, "eval_loss": 0.06021639332175255, "eval_precision": 0.9796954314720813, "eval_recall": 0.9368932038834952, "eval_runtime": 49.6835, "eval_samples_per_second": 5.857, "eval_steps_per_second": 0.201, "step": 3580 }, { "epoch": 0.5283659166359277, "grad_norm": 3.634925127029419, "learning_rate": 1.0753904962608334e-05, "loss": 0.0781, "step": 3581 }, { "epoch": 0.5285134636665437, "grad_norm": 0.9435338973999023, "learning_rate": 1.074876852579057e-05, "loss": 0.0271, "step": 3582 }, { "epoch": 0.5286610106971598, "grad_norm": 0.8338215351104736, "learning_rate": 1.074363189030383e-05, "loss": 0.0165, "step": 3583 }, { "epoch": 0.5288085577277757, "grad_norm": 0.8123714923858643, "learning_rate": 1.0738495057511003e-05, "loss": 0.0207, "step": 3584 }, { "epoch": 0.5289561047583917, "grad_norm": 3.053124189376831, "learning_rate": 1.073335802877504e-05, "loss": 0.0471, "step": 3585 }, { "epoch": 0.5291036517890078, "grad_norm": 1.5023503303527832, "learning_rate": 1.0728220805458935e-05, "loss": 0.052, "step": 3586 }, { "epoch": 0.5292511988196238, "grad_norm": 3.0605030059814453, "learning_rate": 1.0723083388925731e-05, "loss": 0.0758, "step": 3587 }, { "epoch": 0.5293987458502397, "grad_norm": 4.905116558074951, "learning_rate": 1.0717945780538527e-05, "loss": 0.2057, "step": 3588 }, { "epoch": 0.5295462928808558, "grad_norm": 2.5521600246429443, "learning_rate": 1.0712807981660478e-05, "loss": 0.0507, "step": 3589 }, { "epoch": 0.5296938399114718, "grad_norm": 1.739632248878479, "learning_rate": 1.0707669993654778e-05, "loss": 0.0502, "step": 3590 }, { "epoch": 0.5298413869420878, "grad_norm": 2.5149118900299072, "learning_rate": 1.0702531817884681e-05, "loss": 0.0704, "step": 3591 }, { "epoch": 0.5299889339727037, "grad_norm": 2.5621867179870605, "learning_rate": 1.0697393455713483e-05, "loss": 0.0577, "step": 3592 }, { "epoch": 0.5301364810033198, "grad_norm": 2.1689069271087646, "learning_rate": 1.0692254908504542e-05, "loss": 0.0489, "step": 3593 }, { "epoch": 0.5302840280339358, "grad_norm": 1.0757484436035156, "learning_rate": 1.0687116177621244e-05, "loss": 0.0278, "step": 3594 }, { "epoch": 0.5304315750645519, "grad_norm": 1.871010422706604, "learning_rate": 1.0681977264427046e-05, "loss": 0.0642, "step": 3595 }, { "epoch": 0.5305791220951679, "grad_norm": 2.093369483947754, "learning_rate": 1.067683817028544e-05, "loss": 0.0527, "step": 3596 }, { "epoch": 0.5307266691257838, "grad_norm": 2.046318769454956, "learning_rate": 1.0671698896559968e-05, "loss": 0.0725, "step": 3597 }, { "epoch": 0.5308742161563998, "grad_norm": 2.6404740810394287, "learning_rate": 1.0666559444614225e-05, "loss": 0.0556, "step": 3598 }, { "epoch": 0.5310217631870159, "grad_norm": 2.6905758380889893, "learning_rate": 1.0661419815811846e-05, "loss": 0.0428, "step": 3599 }, { "epoch": 0.5311693102176319, "grad_norm": 5.128182411193848, "learning_rate": 1.0656280011516523e-05, "loss": 0.0574, "step": 3600 }, { "epoch": 0.5311693102176319, "eval_accuracy": 0.9710564399421129, "eval_f1": 0.9512195121951219, "eval_loss": 0.06035442277789116, "eval_precision": 0.9558823529411765, "eval_recall": 0.9466019417475728, "eval_runtime": 50.258, "eval_samples_per_second": 5.79, "eval_steps_per_second": 0.199, "step": 3600 }, { "epoch": 0.5313168572482478, "grad_norm": 2.427949905395508, "learning_rate": 1.0651140033091978e-05, "loss": 0.0438, "step": 3601 }, { "epoch": 0.5314644042788639, "grad_norm": 2.9920191764831543, "learning_rate": 1.0645999881901995e-05, "loss": 0.0779, "step": 3602 }, { "epoch": 0.5316119513094799, "grad_norm": 2.497612714767456, "learning_rate": 1.0640859559310402e-05, "loss": 0.0447, "step": 3603 }, { "epoch": 0.531759498340096, "grad_norm": 1.1314340829849243, "learning_rate": 1.0635719066681064e-05, "loss": 0.0263, "step": 3604 }, { "epoch": 0.5319070453707119, "grad_norm": 1.788355827331543, "learning_rate": 1.0630578405377894e-05, "loss": 0.0318, "step": 3605 }, { "epoch": 0.5320545924013279, "grad_norm": 2.9042913913726807, "learning_rate": 1.062543757676485e-05, "loss": 0.0462, "step": 3606 }, { "epoch": 0.5322021394319439, "grad_norm": 3.9081053733825684, "learning_rate": 1.0620296582205946e-05, "loss": 0.1665, "step": 3607 }, { "epoch": 0.53234968646256, "grad_norm": 2.334308385848999, "learning_rate": 1.0615155423065218e-05, "loss": 0.0879, "step": 3608 }, { "epoch": 0.5324972334931759, "grad_norm": 2.932739734649658, "learning_rate": 1.0610014100706768e-05, "loss": 0.1005, "step": 3609 }, { "epoch": 0.5326447805237919, "grad_norm": 1.4589275121688843, "learning_rate": 1.0604872616494722e-05, "loss": 0.0296, "step": 3610 }, { "epoch": 0.532792327554408, "grad_norm": 2.4524049758911133, "learning_rate": 1.059973097179326e-05, "loss": 0.0809, "step": 3611 }, { "epoch": 0.532939874585024, "grad_norm": 4.211121082305908, "learning_rate": 1.0594589167966606e-05, "loss": 0.1157, "step": 3612 }, { "epoch": 0.5330874216156399, "grad_norm": 2.3657655715942383, "learning_rate": 1.058944720637902e-05, "loss": 0.0435, "step": 3613 }, { "epoch": 0.533234968646256, "grad_norm": 3.1218788623809814, "learning_rate": 1.058430508839481e-05, "loss": 0.1024, "step": 3614 }, { "epoch": 0.533382515676872, "grad_norm": 6.471559524536133, "learning_rate": 1.0579162815378314e-05, "loss": 0.0711, "step": 3615 }, { "epoch": 0.533530062707488, "grad_norm": 2.9321231842041016, "learning_rate": 1.0574020388693926e-05, "loss": 0.0695, "step": 3616 }, { "epoch": 0.5336776097381041, "grad_norm": 3.0945262908935547, "learning_rate": 1.0568877809706072e-05, "loss": 0.0614, "step": 3617 }, { "epoch": 0.53382515676872, "grad_norm": 2.731660842895508, "learning_rate": 1.056373507977922e-05, "loss": 0.1243, "step": 3618 }, { "epoch": 0.533972703799336, "grad_norm": 2.141627550125122, "learning_rate": 1.0558592200277878e-05, "loss": 0.0508, "step": 3619 }, { "epoch": 0.5341202508299521, "grad_norm": 2.444028377532959, "learning_rate": 1.0553449172566592e-05, "loss": 0.0249, "step": 3620 }, { "epoch": 0.5341202508299521, "eval_accuracy": 0.9739507959479016, "eval_f1": 0.9552238805970149, "eval_loss": 0.06539621949195862, "eval_precision": 0.9795918367346939, "eval_recall": 0.9320388349514563, "eval_runtime": 48.9358, "eval_samples_per_second": 5.947, "eval_steps_per_second": 0.204, "step": 3620 }, { "epoch": 0.5342677978605681, "grad_norm": 1.7489478588104248, "learning_rate": 1.0548305998009957e-05, "loss": 0.0331, "step": 3621 }, { "epoch": 0.534415344891184, "grad_norm": 3.899421453475952, "learning_rate": 1.054316267797259e-05, "loss": 0.0584, "step": 3622 }, { "epoch": 0.5345628919218001, "grad_norm": 1.6799280643463135, "learning_rate": 1.053801921381916e-05, "loss": 0.0229, "step": 3623 }, { "epoch": 0.5347104389524161, "grad_norm": 2.7103888988494873, "learning_rate": 1.0532875606914372e-05, "loss": 0.1122, "step": 3624 }, { "epoch": 0.5348579859830321, "grad_norm": 2.4744038581848145, "learning_rate": 1.0527731858622962e-05, "loss": 0.0386, "step": 3625 }, { "epoch": 0.5350055330136481, "grad_norm": 1.2592852115631104, "learning_rate": 1.0522587970309715e-05, "loss": 0.0332, "step": 3626 }, { "epoch": 0.5351530800442641, "grad_norm": 1.2552969455718994, "learning_rate": 1.0517443943339438e-05, "loss": 0.0467, "step": 3627 }, { "epoch": 0.5353006270748801, "grad_norm": 2.6253955364227295, "learning_rate": 1.0512299779076995e-05, "loss": 0.0578, "step": 3628 }, { "epoch": 0.5354481741054962, "grad_norm": 3.4663279056549072, "learning_rate": 1.0507155478887263e-05, "loss": 0.0606, "step": 3629 }, { "epoch": 0.5355957211361121, "grad_norm": 2.210115432739258, "learning_rate": 1.0502011044135176e-05, "loss": 0.0837, "step": 3630 }, { "epoch": 0.5357432681667281, "grad_norm": 1.6879552602767944, "learning_rate": 1.049686647618569e-05, "loss": 0.0416, "step": 3631 }, { "epoch": 0.5358908151973442, "grad_norm": 2.3775551319122314, "learning_rate": 1.0491721776403802e-05, "loss": 0.0544, "step": 3632 }, { "epoch": 0.5360383622279602, "grad_norm": 2.4179673194885254, "learning_rate": 1.0486576946154545e-05, "loss": 0.0796, "step": 3633 }, { "epoch": 0.5361859092585761, "grad_norm": 4.693197250366211, "learning_rate": 1.048143198680298e-05, "loss": 0.1065, "step": 3634 }, { "epoch": 0.5363334562891922, "grad_norm": 1.2805655002593994, "learning_rate": 1.047628689971421e-05, "loss": 0.0424, "step": 3635 }, { "epoch": 0.5364810033198082, "grad_norm": 1.463444709777832, "learning_rate": 1.0471141686253367e-05, "loss": 0.0536, "step": 3636 }, { "epoch": 0.5366285503504242, "grad_norm": 1.8686429262161255, "learning_rate": 1.0465996347785621e-05, "loss": 0.052, "step": 3637 }, { "epoch": 0.5367760973810402, "grad_norm": 2.3696820735931396, "learning_rate": 1.0460850885676173e-05, "loss": 0.0637, "step": 3638 }, { "epoch": 0.5369236444116562, "grad_norm": 1.668507695198059, "learning_rate": 1.045570530129025e-05, "loss": 0.0323, "step": 3639 }, { "epoch": 0.5370711914422722, "grad_norm": 6.714794158935547, "learning_rate": 1.0450559595993126e-05, "loss": 0.131, "step": 3640 }, { "epoch": 0.5370711914422722, "eval_accuracy": 0.9739507959479016, "eval_f1": 0.9560975609756097, "eval_loss": 0.05963349714875221, "eval_precision": 0.9607843137254902, "eval_recall": 0.9514563106796117, "eval_runtime": 49.0203, "eval_samples_per_second": 5.936, "eval_steps_per_second": 0.204, "step": 3640 }, { "epoch": 0.5372187384728883, "grad_norm": 1.5180429220199585, "learning_rate": 1.044541377115009e-05, "loss": 0.0472, "step": 3641 }, { "epoch": 0.5373662855035043, "grad_norm": 2.118589162826538, "learning_rate": 1.0440267828126478e-05, "loss": 0.0403, "step": 3642 }, { "epoch": 0.5375138325341202, "grad_norm": 1.6448218822479248, "learning_rate": 1.0435121768287652e-05, "loss": 0.0486, "step": 3643 }, { "epoch": 0.5376613795647363, "grad_norm": 2.6264731884002686, "learning_rate": 1.0429975592998998e-05, "loss": 0.0305, "step": 3644 }, { "epoch": 0.5378089265953523, "grad_norm": 1.7372239828109741, "learning_rate": 1.0424829303625941e-05, "loss": 0.0364, "step": 3645 }, { "epoch": 0.5379564736259683, "grad_norm": 1.8526792526245117, "learning_rate": 1.0419682901533934e-05, "loss": 0.0309, "step": 3646 }, { "epoch": 0.5381040206565842, "grad_norm": 1.8069508075714111, "learning_rate": 1.041453638808846e-05, "loss": 0.0466, "step": 3647 }, { "epoch": 0.5382515676872003, "grad_norm": 1.4902995824813843, "learning_rate": 1.0409389764655031e-05, "loss": 0.0443, "step": 3648 }, { "epoch": 0.5383991147178163, "grad_norm": 3.2903122901916504, "learning_rate": 1.0404243032599188e-05, "loss": 0.0458, "step": 3649 }, { "epoch": 0.5385466617484324, "grad_norm": 2.076983690261841, "learning_rate": 1.0399096193286504e-05, "loss": 0.0518, "step": 3650 }, { "epoch": 0.5386942087790483, "grad_norm": 1.6475075483322144, "learning_rate": 1.0393949248082572e-05, "loss": 0.02, "step": 3651 }, { "epoch": 0.5388417558096643, "grad_norm": 1.3995980024337769, "learning_rate": 1.038880219835302e-05, "loss": 0.0402, "step": 3652 }, { "epoch": 0.5389893028402803, "grad_norm": 3.903316020965576, "learning_rate": 1.0383655045463508e-05, "loss": 0.0502, "step": 3653 }, { "epoch": 0.5391368498708964, "grad_norm": 2.024240016937256, "learning_rate": 1.0378507790779712e-05, "loss": 0.0568, "step": 3654 }, { "epoch": 0.5392843969015123, "grad_norm": 6.113285541534424, "learning_rate": 1.0373360435667346e-05, "loss": 0.1567, "step": 3655 }, { "epoch": 0.5394319439321283, "grad_norm": 1.540674090385437, "learning_rate": 1.0368212981492136e-05, "loss": 0.0136, "step": 3656 }, { "epoch": 0.5395794909627444, "grad_norm": 3.9668731689453125, "learning_rate": 1.036306542961986e-05, "loss": 0.0825, "step": 3657 }, { "epoch": 0.5397270379933604, "grad_norm": 3.607522487640381, "learning_rate": 1.0357917781416287e-05, "loss": 0.0944, "step": 3658 }, { "epoch": 0.5398745850239763, "grad_norm": 2.4661123752593994, "learning_rate": 1.0352770038247242e-05, "loss": 0.0518, "step": 3659 }, { "epoch": 0.5400221320545924, "grad_norm": 1.7464462518692017, "learning_rate": 1.0347622201478563e-05, "loss": 0.0454, "step": 3660 }, { "epoch": 0.5400221320545924, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9576059850374065, "eval_loss": 0.06375893950462341, "eval_precision": 0.9846153846153847, "eval_recall": 0.9320388349514563, "eval_runtime": 49.2446, "eval_samples_per_second": 5.909, "eval_steps_per_second": 0.203, "step": 3660 }, { "epoch": 0.5401696790852084, "grad_norm": 5.017095565795898, "learning_rate": 1.0342474272476108e-05, "loss": 0.0776, "step": 3661 }, { "epoch": 0.5403172261158244, "grad_norm": 2.051027774810791, "learning_rate": 1.0337326252605768e-05, "loss": 0.04, "step": 3662 }, { "epoch": 0.5404647731464405, "grad_norm": 3.897878408432007, "learning_rate": 1.0332178143233453e-05, "loss": 0.0741, "step": 3663 }, { "epoch": 0.5406123201770564, "grad_norm": 1.8527952432632446, "learning_rate": 1.0327029945725107e-05, "loss": 0.0352, "step": 3664 }, { "epoch": 0.5407598672076724, "grad_norm": 2.326930522918701, "learning_rate": 1.0321881661446675e-05, "loss": 0.0915, "step": 3665 }, { "epoch": 0.5409074142382885, "grad_norm": 1.5918142795562744, "learning_rate": 1.0316733291764152e-05, "loss": 0.0153, "step": 3666 }, { "epoch": 0.5410549612689045, "grad_norm": 5.2848334312438965, "learning_rate": 1.0311584838043533e-05, "loss": 0.1226, "step": 3667 }, { "epoch": 0.5412025082995204, "grad_norm": 3.7213995456695557, "learning_rate": 1.030643630165085e-05, "loss": 0.05, "step": 3668 }, { "epoch": 0.5413500553301365, "grad_norm": 3.5198309421539307, "learning_rate": 1.0301287683952153e-05, "loss": 0.0339, "step": 3669 }, { "epoch": 0.5414976023607525, "grad_norm": 3.1995413303375244, "learning_rate": 1.0296138986313506e-05, "loss": 0.0395, "step": 3670 }, { "epoch": 0.5416451493913685, "grad_norm": 4.593168258666992, "learning_rate": 1.0290990210101011e-05, "loss": 0.0376, "step": 3671 }, { "epoch": 0.5417926964219845, "grad_norm": 1.1846117973327637, "learning_rate": 1.0285841356680771e-05, "loss": 0.0281, "step": 3672 }, { "epoch": 0.5419402434526005, "grad_norm": 2.156420946121216, "learning_rate": 1.0280692427418923e-05, "loss": 0.0522, "step": 3673 }, { "epoch": 0.5420877904832165, "grad_norm": 2.1500136852264404, "learning_rate": 1.0275543423681622e-05, "loss": 0.0592, "step": 3674 }, { "epoch": 0.5422353375138326, "grad_norm": 2.7693979740142822, "learning_rate": 1.0270394346835038e-05, "loss": 0.1162, "step": 3675 }, { "epoch": 0.5423828845444485, "grad_norm": 1.5747324228286743, "learning_rate": 1.0265245198245364e-05, "loss": 0.0453, "step": 3676 }, { "epoch": 0.5425304315750645, "grad_norm": 1.7522733211517334, "learning_rate": 1.0260095979278809e-05, "loss": 0.0501, "step": 3677 }, { "epoch": 0.5426779786056806, "grad_norm": 1.2786890268325806, "learning_rate": 1.025494669130161e-05, "loss": 0.0501, "step": 3678 }, { "epoch": 0.5428255256362966, "grad_norm": 1.827549695968628, "learning_rate": 1.0249797335680007e-05, "loss": 0.0779, "step": 3679 }, { "epoch": 0.5429730726669125, "grad_norm": 0.6877404451370239, "learning_rate": 1.0244647913780272e-05, "loss": 0.0219, "step": 3680 }, { "epoch": 0.5429730726669125, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.96, "eval_loss": 0.06361615657806396, "eval_precision": 0.9896907216494846, "eval_recall": 0.9320388349514563, "eval_runtime": 48.9893, "eval_samples_per_second": 5.94, "eval_steps_per_second": 0.204, "step": 3680 }, { "epoch": 0.5431206196975286, "grad_norm": 2.077110767364502, "learning_rate": 1.0239498426968689e-05, "loss": 0.0537, "step": 3681 }, { "epoch": 0.5432681667281446, "grad_norm": 2.2908997535705566, "learning_rate": 1.0234348876611556e-05, "loss": 0.0915, "step": 3682 }, { "epoch": 0.5434157137587606, "grad_norm": 2.7487967014312744, "learning_rate": 1.0229199264075192e-05, "loss": 0.0575, "step": 3683 }, { "epoch": 0.5435632607893767, "grad_norm": 5.112874984741211, "learning_rate": 1.0224049590725929e-05, "loss": 0.1078, "step": 3684 }, { "epoch": 0.5437108078199926, "grad_norm": 1.2296621799468994, "learning_rate": 1.0218899857930125e-05, "loss": 0.0174, "step": 3685 }, { "epoch": 0.5438583548506086, "grad_norm": 0.8048681020736694, "learning_rate": 1.0213750067054137e-05, "loss": 0.0085, "step": 3686 }, { "epoch": 0.5440059018812247, "grad_norm": 2.3535094261169434, "learning_rate": 1.0208600219464355e-05, "loss": 0.0791, "step": 3687 }, { "epoch": 0.5441534489118407, "grad_norm": 2.2542917728424072, "learning_rate": 1.0203450316527173e-05, "loss": 0.0739, "step": 3688 }, { "epoch": 0.5443009959424566, "grad_norm": 1.5458574295043945, "learning_rate": 1.0198300359608998e-05, "loss": 0.0474, "step": 3689 }, { "epoch": 0.5444485429730727, "grad_norm": 2.1573433876037598, "learning_rate": 1.0193150350076265e-05, "loss": 0.0721, "step": 3690 }, { "epoch": 0.5445960900036887, "grad_norm": 5.027381896972656, "learning_rate": 1.01880002892954e-05, "loss": 0.0401, "step": 3691 }, { "epoch": 0.5447436370343047, "grad_norm": 3.6800339221954346, "learning_rate": 1.0182850178632872e-05, "loss": 0.0526, "step": 3692 }, { "epoch": 0.5448911840649207, "grad_norm": 1.688063621520996, "learning_rate": 1.017770001945514e-05, "loss": 0.052, "step": 3693 }, { "epoch": 0.5450387310955367, "grad_norm": 3.0386409759521484, "learning_rate": 1.017254981312868e-05, "loss": 0.1287, "step": 3694 }, { "epoch": 0.5451862781261527, "grad_norm": 2.4379842281341553, "learning_rate": 1.016739956101999e-05, "loss": 0.0852, "step": 3695 }, { "epoch": 0.5453338251567688, "grad_norm": 1.9624031782150269, "learning_rate": 1.0162249264495573e-05, "loss": 0.0392, "step": 3696 }, { "epoch": 0.5454813721873847, "grad_norm": 2.183525323867798, "learning_rate": 1.0157098924921944e-05, "loss": 0.0902, "step": 3697 }, { "epoch": 0.5456289192180007, "grad_norm": 2.8118762969970703, "learning_rate": 1.0151948543665627e-05, "loss": 0.0995, "step": 3698 }, { "epoch": 0.5457764662486168, "grad_norm": 2.5438857078552246, "learning_rate": 1.0146798122093167e-05, "loss": 0.0924, "step": 3699 }, { "epoch": 0.5459240132792328, "grad_norm": 1.5641134977340698, "learning_rate": 1.0141647661571112e-05, "loss": 0.0547, "step": 3700 }, { "epoch": 0.5459240132792328, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9582309582309583, "eval_loss": 0.06087513640522957, "eval_precision": 0.9701492537313433, "eval_recall": 0.9466019417475728, "eval_runtime": 49.054, "eval_samples_per_second": 5.932, "eval_steps_per_second": 0.204, "step": 3700 }, { "epoch": 0.5460715603098487, "grad_norm": 0.927797257900238, "learning_rate": 1.0136497163466014e-05, "loss": 0.0212, "step": 3701 }, { "epoch": 0.5462191073404647, "grad_norm": 1.628825068473816, "learning_rate": 1.0131346629144452e-05, "loss": 0.0271, "step": 3702 }, { "epoch": 0.5463666543710808, "grad_norm": 0.6106228232383728, "learning_rate": 1.0126196059973e-05, "loss": 0.0175, "step": 3703 }, { "epoch": 0.5465142014016968, "grad_norm": 1.9554325342178345, "learning_rate": 1.0121045457318247e-05, "loss": 0.0704, "step": 3704 }, { "epoch": 0.5466617484323129, "grad_norm": 2.2551918029785156, "learning_rate": 1.0115894822546788e-05, "loss": 0.0811, "step": 3705 }, { "epoch": 0.5468092954629288, "grad_norm": 1.5803611278533936, "learning_rate": 1.0110744157025234e-05, "loss": 0.0474, "step": 3706 }, { "epoch": 0.5469568424935448, "grad_norm": 1.926823377609253, "learning_rate": 1.0105593462120196e-05, "loss": 0.0307, "step": 3707 }, { "epoch": 0.5471043895241609, "grad_norm": 1.9144130945205688, "learning_rate": 1.0100442739198293e-05, "loss": 0.0449, "step": 3708 }, { "epoch": 0.5472519365547769, "grad_norm": 1.6707820892333984, "learning_rate": 1.009529198962616e-05, "loss": 0.041, "step": 3709 }, { "epoch": 0.5473994835853928, "grad_norm": 1.5497639179229736, "learning_rate": 1.0090141214770426e-05, "loss": 0.0295, "step": 3710 }, { "epoch": 0.5475470306160088, "grad_norm": 2.402508020401001, "learning_rate": 1.008499041599774e-05, "loss": 0.0531, "step": 3711 }, { "epoch": 0.5476945776466249, "grad_norm": 1.2764594554901123, "learning_rate": 1.0079839594674749e-05, "loss": 0.0321, "step": 3712 }, { "epoch": 0.5478421246772409, "grad_norm": 1.8888596296310425, "learning_rate": 1.0074688752168104e-05, "loss": 0.038, "step": 3713 }, { "epoch": 0.5479896717078568, "grad_norm": 2.022836923599243, "learning_rate": 1.0069537889844476e-05, "loss": 0.0605, "step": 3714 }, { "epoch": 0.5481372187384729, "grad_norm": 1.7803722620010376, "learning_rate": 1.006438700907052e-05, "loss": 0.0681, "step": 3715 }, { "epoch": 0.5482847657690889, "grad_norm": 4.56878662109375, "learning_rate": 1.0059236111212914e-05, "loss": 0.1236, "step": 3716 }, { "epoch": 0.548432312799705, "grad_norm": 0.7959854006767273, "learning_rate": 1.005408519763833e-05, "loss": 0.0145, "step": 3717 }, { "epoch": 0.5485798598303209, "grad_norm": 6.06070613861084, "learning_rate": 1.004893426971345e-05, "loss": 0.0377, "step": 3718 }, { "epoch": 0.5487274068609369, "grad_norm": 3.700913190841675, "learning_rate": 1.004378332880496e-05, "loss": 0.0278, "step": 3719 }, { "epoch": 0.5488749538915529, "grad_norm": 5.320037364959717, "learning_rate": 1.0038632376279535e-05, "loss": 0.077, "step": 3720 }, { "epoch": 0.5488749538915529, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9578163771712159, "eval_loss": 0.05850488319993019, "eval_precision": 0.9796954314720813, "eval_recall": 0.9368932038834952, "eval_runtime": 49.9661, "eval_samples_per_second": 5.824, "eval_steps_per_second": 0.2, "step": 3720 }, { "epoch": 0.549022500922169, "grad_norm": 2.794523000717163, "learning_rate": 1.0033481413503883e-05, "loss": 0.0327, "step": 3721 }, { "epoch": 0.5491700479527849, "grad_norm": 1.0081136226654053, "learning_rate": 1.0028330441844678e-05, "loss": 0.0218, "step": 3722 }, { "epoch": 0.5493175949834009, "grad_norm": 2.727607011795044, "learning_rate": 1.002317946266863e-05, "loss": 0.0634, "step": 3723 }, { "epoch": 0.549465142014017, "grad_norm": 0.6229975819587708, "learning_rate": 1.001802847734243e-05, "loss": 0.0194, "step": 3724 }, { "epoch": 0.549612689044633, "grad_norm": 5.12562894821167, "learning_rate": 1.0012877487232774e-05, "loss": 0.0319, "step": 3725 }, { "epoch": 0.5497602360752489, "grad_norm": 2.87324595451355, "learning_rate": 1.0007726493706367e-05, "loss": 0.0708, "step": 3726 }, { "epoch": 0.549907783105865, "grad_norm": 5.746551036834717, "learning_rate": 1.0002575498129905e-05, "loss": 0.0559, "step": 3727 }, { "epoch": 0.550055330136481, "grad_norm": 2.7586886882781982, "learning_rate": 9.997424501870095e-06, "loss": 0.0571, "step": 3728 }, { "epoch": 0.550202877167097, "grad_norm": 2.8424432277679443, "learning_rate": 9.992273506293636e-06, "loss": 0.1035, "step": 3729 }, { "epoch": 0.5503504241977131, "grad_norm": 3.6614279747009277, "learning_rate": 9.987122512767229e-06, "loss": 0.0861, "step": 3730 }, { "epoch": 0.550497971228329, "grad_norm": 1.9463998079299927, "learning_rate": 9.981971522657575e-06, "loss": 0.0671, "step": 3731 }, { "epoch": 0.550645518258945, "grad_norm": 1.3599612712860107, "learning_rate": 9.976820537331375e-06, "loss": 0.0494, "step": 3732 }, { "epoch": 0.5507930652895611, "grad_norm": 2.320894956588745, "learning_rate": 9.971669558155322e-06, "loss": 0.0593, "step": 3733 }, { "epoch": 0.5509406123201771, "grad_norm": 2.7197000980377197, "learning_rate": 9.96651858649612e-06, "loss": 0.1013, "step": 3734 }, { "epoch": 0.551088159350793, "grad_norm": 1.2852067947387695, "learning_rate": 9.961367623720467e-06, "loss": 0.0303, "step": 3735 }, { "epoch": 0.5512357063814091, "grad_norm": 2.8085477352142334, "learning_rate": 9.956216671195047e-06, "loss": 0.0701, "step": 3736 }, { "epoch": 0.5513832534120251, "grad_norm": 2.4579927921295166, "learning_rate": 9.951065730286553e-06, "loss": 0.0677, "step": 3737 }, { "epoch": 0.5515308004426411, "grad_norm": 2.144411325454712, "learning_rate": 9.945914802361672e-06, "loss": 0.0607, "step": 3738 }, { "epoch": 0.5516783474732571, "grad_norm": 1.9104830026626587, "learning_rate": 9.940763888787088e-06, "loss": 0.0403, "step": 3739 }, { "epoch": 0.5518258945038731, "grad_norm": 2.6878914833068848, "learning_rate": 9.935612990929482e-06, "loss": 0.0403, "step": 3740 }, { "epoch": 0.5518258945038731, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05521642044186592, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 49.6835, "eval_samples_per_second": 5.857, "eval_steps_per_second": 0.201, "step": 3740 }, { "epoch": 0.5519734415344891, "grad_norm": 1.8800990581512451, "learning_rate": 9.930462110155529e-06, "loss": 0.0719, "step": 3741 }, { "epoch": 0.5521209885651052, "grad_norm": 1.9986878633499146, "learning_rate": 9.925311247831896e-06, "loss": 0.0581, "step": 3742 }, { "epoch": 0.5522685355957211, "grad_norm": 3.641836166381836, "learning_rate": 9.920160405325254e-06, "loss": 0.0726, "step": 3743 }, { "epoch": 0.5524160826263371, "grad_norm": 5.52808141708374, "learning_rate": 9.915009584002263e-06, "loss": 0.0852, "step": 3744 }, { "epoch": 0.5525636296569532, "grad_norm": 1.0799669027328491, "learning_rate": 9.909858785229576e-06, "loss": 0.0342, "step": 3745 }, { "epoch": 0.5527111766875692, "grad_norm": 3.680673599243164, "learning_rate": 9.904708010373847e-06, "loss": 0.0844, "step": 3746 }, { "epoch": 0.5528587237181851, "grad_norm": 1.3180592060089111, "learning_rate": 9.899557260801707e-06, "loss": 0.0474, "step": 3747 }, { "epoch": 0.5530062707488012, "grad_norm": 2.9669387340545654, "learning_rate": 9.894406537879805e-06, "loss": 0.083, "step": 3748 }, { "epoch": 0.5531538177794172, "grad_norm": 3.1382808685302734, "learning_rate": 9.88925584297477e-06, "loss": 0.0916, "step": 3749 }, { "epoch": 0.5533013648100332, "grad_norm": 4.144131183624268, "learning_rate": 9.884105177453216e-06, "loss": 0.0752, "step": 3750 }, { "epoch": 0.5534489118406493, "grad_norm": 1.2315720319747925, "learning_rate": 9.878954542681758e-06, "loss": 0.0259, "step": 3751 }, { "epoch": 0.5535964588712652, "grad_norm": 3.0691041946411133, "learning_rate": 9.873803940027004e-06, "loss": 0.0566, "step": 3752 }, { "epoch": 0.5537440059018812, "grad_norm": 1.3682444095611572, "learning_rate": 9.868653370855552e-06, "loss": 0.0314, "step": 3753 }, { "epoch": 0.5538915529324973, "grad_norm": 1.8178353309631348, "learning_rate": 9.863502836533988e-06, "loss": 0.0739, "step": 3754 }, { "epoch": 0.5540390999631133, "grad_norm": 1.305626392364502, "learning_rate": 9.858352338428894e-06, "loss": 0.0497, "step": 3755 }, { "epoch": 0.5541866469937292, "grad_norm": 2.9078972339630127, "learning_rate": 9.853201877906836e-06, "loss": 0.0698, "step": 3756 }, { "epoch": 0.5543341940243453, "grad_norm": 2.165985584259033, "learning_rate": 9.848051456334373e-06, "loss": 0.0694, "step": 3757 }, { "epoch": 0.5544817410549613, "grad_norm": 2.040043830871582, "learning_rate": 9.84290107507806e-06, "loss": 0.0768, "step": 3758 }, { "epoch": 0.5546292880855773, "grad_norm": 3.4539248943328857, "learning_rate": 9.83775073550443e-06, "loss": 0.0918, "step": 3759 }, { "epoch": 0.5547768351161932, "grad_norm": 1.5912948846817017, "learning_rate": 9.832600438980012e-06, "loss": 0.0228, "step": 3760 }, { "epoch": 0.5547768351161932, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9653465346534653, "eval_loss": 0.054135989397764206, "eval_precision": 0.9848484848484849, "eval_recall": 0.9466019417475728, "eval_runtime": 49.6086, "eval_samples_per_second": 5.866, "eval_steps_per_second": 0.202, "step": 3760 }, { "epoch": 0.5549243821468093, "grad_norm": 2.391852378845215, "learning_rate": 9.82745018687132e-06, "loss": 0.117, "step": 3761 }, { "epoch": 0.5550719291774253, "grad_norm": 2.916215419769287, "learning_rate": 9.822299980544862e-06, "loss": 0.0331, "step": 3762 }, { "epoch": 0.5552194762080414, "grad_norm": 1.3798753023147583, "learning_rate": 9.81714982136713e-06, "loss": 0.0258, "step": 3763 }, { "epoch": 0.5553670232386573, "grad_norm": 5.286415100097656, "learning_rate": 9.811999710704601e-06, "loss": 0.0462, "step": 3764 }, { "epoch": 0.5555145702692733, "grad_norm": 1.4460058212280273, "learning_rate": 9.806849649923742e-06, "loss": 0.0545, "step": 3765 }, { "epoch": 0.5556621172998893, "grad_norm": 1.8188203573226929, "learning_rate": 9.801699640391001e-06, "loss": 0.0381, "step": 3766 }, { "epoch": 0.5558096643305054, "grad_norm": 1.858678936958313, "learning_rate": 9.79654968347283e-06, "loss": 0.0585, "step": 3767 }, { "epoch": 0.5559572113611213, "grad_norm": 2.441951036453247, "learning_rate": 9.791399780535648e-06, "loss": 0.0571, "step": 3768 }, { "epoch": 0.5561047583917373, "grad_norm": 1.4246195554733276, "learning_rate": 9.786249932945865e-06, "loss": 0.0715, "step": 3769 }, { "epoch": 0.5562523054223534, "grad_norm": 1.7215012311935425, "learning_rate": 9.781100142069879e-06, "loss": 0.0237, "step": 3770 }, { "epoch": 0.5563998524529694, "grad_norm": 1.2334809303283691, "learning_rate": 9.775950409274071e-06, "loss": 0.048, "step": 3771 }, { "epoch": 0.5565473994835854, "grad_norm": 1.7480311393737793, "learning_rate": 9.770800735924811e-06, "loss": 0.0436, "step": 3772 }, { "epoch": 0.5566949465142014, "grad_norm": 1.593654751777649, "learning_rate": 9.765651123388448e-06, "loss": 0.0335, "step": 3773 }, { "epoch": 0.5568424935448174, "grad_norm": 1.513624668121338, "learning_rate": 9.760501573031314e-06, "loss": 0.0529, "step": 3774 }, { "epoch": 0.5569900405754334, "grad_norm": 1.5197149515151978, "learning_rate": 9.755352086219733e-06, "loss": 0.0559, "step": 3775 }, { "epoch": 0.5571375876060495, "grad_norm": 1.9495011568069458, "learning_rate": 9.750202664319993e-06, "loss": 0.0655, "step": 3776 }, { "epoch": 0.5572851346366654, "grad_norm": 0.9071054458618164, "learning_rate": 9.745053308698392e-06, "loss": 0.0193, "step": 3777 }, { "epoch": 0.5574326816672814, "grad_norm": 1.7866899967193604, "learning_rate": 9.739904020721193e-06, "loss": 0.0312, "step": 3778 }, { "epoch": 0.5575802286978975, "grad_norm": 3.7618916034698486, "learning_rate": 9.734754801754643e-06, "loss": 0.0442, "step": 3779 }, { "epoch": 0.5577277757285135, "grad_norm": 1.682389497756958, "learning_rate": 9.729605653164962e-06, "loss": 0.0321, "step": 3780 }, { "epoch": 0.5577277757285135, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9651741293532339, "eval_loss": 0.053348224610090256, "eval_precision": 0.9897959183673469, "eval_recall": 0.941747572815534, "eval_runtime": 49.4818, "eval_samples_per_second": 5.881, "eval_steps_per_second": 0.202, "step": 3780 }, { "epoch": 0.5578753227591294, "grad_norm": 2.075875759124756, "learning_rate": 9.724456576318383e-06, "loss": 0.0552, "step": 3781 }, { "epoch": 0.5580228697897455, "grad_norm": 5.893950462341309, "learning_rate": 9.71930757258108e-06, "loss": 0.1236, "step": 3782 }, { "epoch": 0.5581704168203615, "grad_norm": 1.4630471467971802, "learning_rate": 9.714158643319232e-06, "loss": 0.0175, "step": 3783 }, { "epoch": 0.5583179638509775, "grad_norm": 6.606502532958984, "learning_rate": 9.709009789898994e-06, "loss": 0.0888, "step": 3784 }, { "epoch": 0.5584655108815935, "grad_norm": 1.5867756605148315, "learning_rate": 9.703861013686494e-06, "loss": 0.0337, "step": 3785 }, { "epoch": 0.5586130579122095, "grad_norm": 3.8968021869659424, "learning_rate": 9.69871231604785e-06, "loss": 0.0457, "step": 3786 }, { "epoch": 0.5587606049428255, "grad_norm": 0.7190189957618713, "learning_rate": 9.693563698349151e-06, "loss": 0.0195, "step": 3787 }, { "epoch": 0.5589081519734416, "grad_norm": 1.2359848022460938, "learning_rate": 9.688415161956468e-06, "loss": 0.0381, "step": 3788 }, { "epoch": 0.5590556990040575, "grad_norm": 2.6988964080810547, "learning_rate": 9.683266708235855e-06, "loss": 0.0293, "step": 3789 }, { "epoch": 0.5592032460346735, "grad_norm": 3.265552043914795, "learning_rate": 9.678118338553325e-06, "loss": 0.1007, "step": 3790 }, { "epoch": 0.5593507930652896, "grad_norm": 1.6083853244781494, "learning_rate": 9.672970054274896e-06, "loss": 0.0314, "step": 3791 }, { "epoch": 0.5594983400959056, "grad_norm": 2.069735050201416, "learning_rate": 9.667821856766548e-06, "loss": 0.032, "step": 3792 }, { "epoch": 0.5596458871265215, "grad_norm": 3.529930830001831, "learning_rate": 9.662673747394237e-06, "loss": 0.133, "step": 3793 }, { "epoch": 0.5597934341571376, "grad_norm": 1.4641337394714355, "learning_rate": 9.657525727523897e-06, "loss": 0.0386, "step": 3794 }, { "epoch": 0.5599409811877536, "grad_norm": 2.047414779663086, "learning_rate": 9.652377798521442e-06, "loss": 0.0156, "step": 3795 }, { "epoch": 0.5600885282183696, "grad_norm": 2.2744388580322266, "learning_rate": 9.647229961752761e-06, "loss": 0.0385, "step": 3796 }, { "epoch": 0.5602360752489857, "grad_norm": 8.05351448059082, "learning_rate": 9.642082218583718e-06, "loss": 0.0934, "step": 3797 }, { "epoch": 0.5603836222796016, "grad_norm": 1.128142237663269, "learning_rate": 9.636934570380147e-06, "loss": 0.0389, "step": 3798 }, { "epoch": 0.5605311693102176, "grad_norm": 1.107566237449646, "learning_rate": 9.631787018507864e-06, "loss": 0.0389, "step": 3799 }, { "epoch": 0.5606787163408337, "grad_norm": 2.5889945030212402, "learning_rate": 9.626639564332659e-06, "loss": 0.0698, "step": 3800 }, { "epoch": 0.5606787163408337, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9655172413793104, "eval_loss": 0.05252087116241455, "eval_precision": 0.98, "eval_recall": 0.9514563106796117, "eval_runtime": 50.0962, "eval_samples_per_second": 5.809, "eval_steps_per_second": 0.2, "step": 3800 }, { "epoch": 0.5608262633714497, "grad_norm": 3.1710586547851562, "learning_rate": 9.62149220922029e-06, "loss": 0.0688, "step": 3801 }, { "epoch": 0.5609738104020656, "grad_norm": 2.8163652420043945, "learning_rate": 9.616344954536495e-06, "loss": 0.0649, "step": 3802 }, { "epoch": 0.5611213574326817, "grad_norm": 0.7462462186813354, "learning_rate": 9.611197801646984e-06, "loss": 0.0154, "step": 3803 }, { "epoch": 0.5612689044632977, "grad_norm": 1.615426778793335, "learning_rate": 9.60605075191743e-06, "loss": 0.0407, "step": 3804 }, { "epoch": 0.5614164514939137, "grad_norm": 4.731834411621094, "learning_rate": 9.6009038067135e-06, "loss": 0.1288, "step": 3805 }, { "epoch": 0.5615639985245297, "grad_norm": 2.108180284500122, "learning_rate": 9.595756967400815e-06, "loss": 0.0409, "step": 3806 }, { "epoch": 0.5617115455551457, "grad_norm": 1.363415241241455, "learning_rate": 9.590610235344972e-06, "loss": 0.0313, "step": 3807 }, { "epoch": 0.5618590925857617, "grad_norm": 3.3501205444335938, "learning_rate": 9.585463611911545e-06, "loss": 0.0754, "step": 3808 }, { "epoch": 0.5620066396163778, "grad_norm": 1.5632984638214111, "learning_rate": 9.580317098466068e-06, "loss": 0.0625, "step": 3809 }, { "epoch": 0.5621541866469937, "grad_norm": 1.50901198387146, "learning_rate": 9.575170696374062e-06, "loss": 0.0351, "step": 3810 }, { "epoch": 0.5623017336776097, "grad_norm": 1.1810615062713623, "learning_rate": 9.570024407001005e-06, "loss": 0.0332, "step": 3811 }, { "epoch": 0.5624492807082258, "grad_norm": 2.9707541465759277, "learning_rate": 9.564878231712353e-06, "loss": 0.0586, "step": 3812 }, { "epoch": 0.5625968277388418, "grad_norm": 0.9940451383590698, "learning_rate": 9.559732171873524e-06, "loss": 0.0112, "step": 3813 }, { "epoch": 0.5627443747694577, "grad_norm": 1.6572270393371582, "learning_rate": 9.55458622884991e-06, "loss": 0.0519, "step": 3814 }, { "epoch": 0.5628919218000737, "grad_norm": 1.559641718864441, "learning_rate": 9.549440404006877e-06, "loss": 0.0327, "step": 3815 }, { "epoch": 0.5630394688306898, "grad_norm": 4.432082653045654, "learning_rate": 9.544294698709751e-06, "loss": 0.1221, "step": 3816 }, { "epoch": 0.5631870158613058, "grad_norm": 3.4049956798553467, "learning_rate": 9.53914911432383e-06, "loss": 0.0732, "step": 3817 }, { "epoch": 0.5633345628919219, "grad_norm": 1.6620103120803833, "learning_rate": 9.534003652214379e-06, "loss": 0.0318, "step": 3818 }, { "epoch": 0.5634821099225378, "grad_norm": 2.7731335163116455, "learning_rate": 9.528858313746634e-06, "loss": 0.048, "step": 3819 }, { "epoch": 0.5636296569531538, "grad_norm": 1.7576640844345093, "learning_rate": 9.523713100285791e-06, "loss": 0.0555, "step": 3820 }, { "epoch": 0.5636296569531538, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.9607843137254902, "eval_loss": 0.0535358227789402, "eval_precision": 0.9702970297029703, "eval_recall": 0.9514563106796117, "eval_runtime": 48.7904, "eval_samples_per_second": 5.964, "eval_steps_per_second": 0.205, "step": 3820 }, { "epoch": 0.5637772039837698, "grad_norm": 2.026264190673828, "learning_rate": 9.518568013197026e-06, "loss": 0.0632, "step": 3821 }, { "epoch": 0.5639247510143859, "grad_norm": 2.458831310272217, "learning_rate": 9.513423053845462e-06, "loss": 0.0328, "step": 3822 }, { "epoch": 0.5640722980450018, "grad_norm": 6.9917378425598145, "learning_rate": 9.508278223596198e-06, "loss": 0.0631, "step": 3823 }, { "epoch": 0.5642198450756178, "grad_norm": 2.0488762855529785, "learning_rate": 9.503133523814311e-06, "loss": 0.0604, "step": 3824 }, { "epoch": 0.5643673921062339, "grad_norm": 0.830234706401825, "learning_rate": 9.497988955864826e-06, "loss": 0.0087, "step": 3825 }, { "epoch": 0.5645149391368499, "grad_norm": 3.167125940322876, "learning_rate": 9.492844521112738e-06, "loss": 0.0443, "step": 3826 }, { "epoch": 0.5646624861674658, "grad_norm": 2.5500237941741943, "learning_rate": 9.48770022092301e-06, "loss": 0.0239, "step": 3827 }, { "epoch": 0.5648100331980819, "grad_norm": 6.909282207489014, "learning_rate": 9.48255605666056e-06, "loss": 0.1248, "step": 3828 }, { "epoch": 0.5649575802286979, "grad_norm": 2.240964651107788, "learning_rate": 9.477412029690288e-06, "loss": 0.0617, "step": 3829 }, { "epoch": 0.5651051272593139, "grad_norm": 1.944341778755188, "learning_rate": 9.472268141377041e-06, "loss": 0.0484, "step": 3830 }, { "epoch": 0.5652526742899299, "grad_norm": 1.236659049987793, "learning_rate": 9.467124393085631e-06, "loss": 0.0184, "step": 3831 }, { "epoch": 0.5654002213205459, "grad_norm": 5.857029438018799, "learning_rate": 9.461980786180844e-06, "loss": 0.077, "step": 3832 }, { "epoch": 0.5655477683511619, "grad_norm": 1.798120141029358, "learning_rate": 9.456837322027412e-06, "loss": 0.0528, "step": 3833 }, { "epoch": 0.565695315381778, "grad_norm": 1.5542166233062744, "learning_rate": 9.451694001990046e-06, "loss": 0.0315, "step": 3834 }, { "epoch": 0.5658428624123939, "grad_norm": 3.6191813945770264, "learning_rate": 9.44655082743341e-06, "loss": 0.0492, "step": 3835 }, { "epoch": 0.5659904094430099, "grad_norm": 3.5314884185791016, "learning_rate": 9.441407799722127e-06, "loss": 0.0662, "step": 3836 }, { "epoch": 0.566137956473626, "grad_norm": 1.5825424194335938, "learning_rate": 9.436264920220781e-06, "loss": 0.0247, "step": 3837 }, { "epoch": 0.566285503504242, "grad_norm": 3.2723326683044434, "learning_rate": 9.431122190293931e-06, "loss": 0.0462, "step": 3838 }, { "epoch": 0.566433050534858, "grad_norm": 3.276448965072632, "learning_rate": 9.425979611306077e-06, "loss": 0.0162, "step": 3839 }, { "epoch": 0.566580597565474, "grad_norm": 3.932093858718872, "learning_rate": 9.420837184621689e-06, "loss": 0.0853, "step": 3840 }, { "epoch": 0.566580597565474, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.053437910974025726, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 49.686, "eval_samples_per_second": 5.857, "eval_steps_per_second": 0.201, "step": 3840 }, { "epoch": 0.56672814459609, "grad_norm": 4.619809627532959, "learning_rate": 9.415694911605195e-06, "loss": 0.0282, "step": 3841 }, { "epoch": 0.566875691626706, "grad_norm": 1.1630444526672363, "learning_rate": 9.41055279362098e-06, "loss": 0.047, "step": 3842 }, { "epoch": 0.5670232386573221, "grad_norm": 1.8689054250717163, "learning_rate": 9.405410832033395e-06, "loss": 0.0444, "step": 3843 }, { "epoch": 0.567170785687938, "grad_norm": 1.823365569114685, "learning_rate": 9.400269028206742e-06, "loss": 0.0457, "step": 3844 }, { "epoch": 0.567318332718554, "grad_norm": 4.076317310333252, "learning_rate": 9.395127383505281e-06, "loss": 0.0416, "step": 3845 }, { "epoch": 0.5674658797491701, "grad_norm": 1.7907932996749878, "learning_rate": 9.389985899293239e-06, "loss": 0.0209, "step": 3846 }, { "epoch": 0.5676134267797861, "grad_norm": 1.7579771280288696, "learning_rate": 9.384844576934782e-06, "loss": 0.0402, "step": 3847 }, { "epoch": 0.567760973810402, "grad_norm": 1.7290488481521606, "learning_rate": 9.379703417794057e-06, "loss": 0.04, "step": 3848 }, { "epoch": 0.5679085208410181, "grad_norm": 2.796541929244995, "learning_rate": 9.374562423235151e-06, "loss": 0.1218, "step": 3849 }, { "epoch": 0.5680560678716341, "grad_norm": 2.8260679244995117, "learning_rate": 9.369421594622112e-06, "loss": 0.0814, "step": 3850 }, { "epoch": 0.5682036149022501, "grad_norm": 5.476070880889893, "learning_rate": 9.364280933318943e-06, "loss": 0.0636, "step": 3851 }, { "epoch": 0.5683511619328661, "grad_norm": 2.3253324031829834, "learning_rate": 9.359140440689602e-06, "loss": 0.0498, "step": 3852 }, { "epoch": 0.5684987089634821, "grad_norm": 3.8428282737731934, "learning_rate": 9.354000118098006e-06, "loss": 0.0782, "step": 3853 }, { "epoch": 0.5686462559940981, "grad_norm": 2.1177563667297363, "learning_rate": 9.348859966908025e-06, "loss": 0.0568, "step": 3854 }, { "epoch": 0.5687938030247142, "grad_norm": 1.1606041193008423, "learning_rate": 9.343719988483482e-06, "loss": 0.0374, "step": 3855 }, { "epoch": 0.5689413500553301, "grad_norm": 2.144253969192505, "learning_rate": 9.338580184188156e-06, "loss": 0.0401, "step": 3856 }, { "epoch": 0.5690888970859461, "grad_norm": 1.7679747343063354, "learning_rate": 9.333440555385777e-06, "loss": 0.0174, "step": 3857 }, { "epoch": 0.5692364441165622, "grad_norm": 2.921172618865967, "learning_rate": 9.328301103440034e-06, "loss": 0.0615, "step": 3858 }, { "epoch": 0.5693839911471782, "grad_norm": 4.199213981628418, "learning_rate": 9.323161829714565e-06, "loss": 0.079, "step": 3859 }, { "epoch": 0.5695315381777941, "grad_norm": 1.8991618156433105, "learning_rate": 9.31802273557296e-06, "loss": 0.0385, "step": 3860 }, { "epoch": 0.5695315381777941, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9625935162094763, "eval_loss": 0.05809960886836052, "eval_precision": 0.9897435897435898, "eval_recall": 0.9368932038834952, "eval_runtime": 49.1622, "eval_samples_per_second": 5.919, "eval_steps_per_second": 0.203, "step": 3860 }, { "epoch": 0.5696790852084102, "grad_norm": 3.209085464477539, "learning_rate": 9.312883822378756e-06, "loss": 0.0699, "step": 3861 }, { "epoch": 0.5698266322390262, "grad_norm": 1.4736233949661255, "learning_rate": 9.307745091495461e-06, "loss": 0.0336, "step": 3862 }, { "epoch": 0.5699741792696422, "grad_norm": 3.236189365386963, "learning_rate": 9.302606544286519e-06, "loss": 0.1302, "step": 3863 }, { "epoch": 0.5701217263002583, "grad_norm": 4.287188529968262, "learning_rate": 9.297468182115324e-06, "loss": 0.0981, "step": 3864 }, { "epoch": 0.5702692733308742, "grad_norm": 4.168046951293945, "learning_rate": 9.292330006345227e-06, "loss": 0.1042, "step": 3865 }, { "epoch": 0.5704168203614902, "grad_norm": 2.1270487308502197, "learning_rate": 9.287192018339525e-06, "loss": 0.0317, "step": 3866 }, { "epoch": 0.5705643673921063, "grad_norm": 2.488492727279663, "learning_rate": 9.282054219461474e-06, "loss": 0.0855, "step": 3867 }, { "epoch": 0.5707119144227223, "grad_norm": 0.9665935039520264, "learning_rate": 9.276916611074274e-06, "loss": 0.0153, "step": 3868 }, { "epoch": 0.5708594614533382, "grad_norm": 1.3190501928329468, "learning_rate": 9.27177919454107e-06, "loss": 0.0168, "step": 3869 }, { "epoch": 0.5710070084839542, "grad_norm": 1.27963125705719, "learning_rate": 9.266641971224963e-06, "loss": 0.0772, "step": 3870 }, { "epoch": 0.5711545555145703, "grad_norm": 2.482548475265503, "learning_rate": 9.261504942488999e-06, "loss": 0.0403, "step": 3871 }, { "epoch": 0.5713021025451863, "grad_norm": 1.0966283082962036, "learning_rate": 9.256368109696175e-06, "loss": 0.0294, "step": 3872 }, { "epoch": 0.5714496495758022, "grad_norm": 1.081498146057129, "learning_rate": 9.251231474209434e-06, "loss": 0.0279, "step": 3873 }, { "epoch": 0.5715971966064183, "grad_norm": 1.884391188621521, "learning_rate": 9.246095037391669e-06, "loss": 0.0568, "step": 3874 }, { "epoch": 0.5717447436370343, "grad_norm": 1.778388500213623, "learning_rate": 9.240958800605711e-06, "loss": 0.0309, "step": 3875 }, { "epoch": 0.5718922906676503, "grad_norm": 2.042877674102783, "learning_rate": 9.235822765214358e-06, "loss": 0.0625, "step": 3876 }, { "epoch": 0.5720398376982663, "grad_norm": 1.6187607049942017, "learning_rate": 9.230686932580335e-06, "loss": 0.0361, "step": 3877 }, { "epoch": 0.5721873847288823, "grad_norm": 3.0899477005004883, "learning_rate": 9.22555130406632e-06, "loss": 0.092, "step": 3878 }, { "epoch": 0.5723349317594983, "grad_norm": 1.573171615600586, "learning_rate": 9.220415881034937e-06, "loss": 0.0334, "step": 3879 }, { "epoch": 0.5724824787901144, "grad_norm": 2.0591092109680176, "learning_rate": 9.215280664848754e-06, "loss": 0.0516, "step": 3880 }, { "epoch": 0.5724824787901144, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9582309582309583, "eval_loss": 0.05518487095832825, "eval_precision": 0.9701492537313433, "eval_recall": 0.9466019417475728, "eval_runtime": 49.4983, "eval_samples_per_second": 5.879, "eval_steps_per_second": 0.202, "step": 3880 }, { "epoch": 0.5726300258207303, "grad_norm": 1.4262274503707886, "learning_rate": 9.210145656870293e-06, "loss": 0.0252, "step": 3881 }, { "epoch": 0.5727775728513463, "grad_norm": 4.3779096603393555, "learning_rate": 9.205010858462007e-06, "loss": 0.0489, "step": 3882 }, { "epoch": 0.5729251198819624, "grad_norm": 3.449279546737671, "learning_rate": 9.199876270986302e-06, "loss": 0.1386, "step": 3883 }, { "epoch": 0.5730726669125784, "grad_norm": 1.4920333623886108, "learning_rate": 9.194741895805525e-06, "loss": 0.0465, "step": 3884 }, { "epoch": 0.5732202139431944, "grad_norm": 6.747317790985107, "learning_rate": 9.189607734281966e-06, "loss": 0.0791, "step": 3885 }, { "epoch": 0.5733677609738104, "grad_norm": 1.4162299633026123, "learning_rate": 9.184473787777864e-06, "loss": 0.0138, "step": 3886 }, { "epoch": 0.5735153080044264, "grad_norm": 4.019728183746338, "learning_rate": 9.179340057655393e-06, "loss": 0.0781, "step": 3887 }, { "epoch": 0.5736628550350424, "grad_norm": 2.296534776687622, "learning_rate": 9.174206545276678e-06, "loss": 0.0214, "step": 3888 }, { "epoch": 0.5738104020656585, "grad_norm": 1.3734296560287476, "learning_rate": 9.16907325200378e-06, "loss": 0.0346, "step": 3889 }, { "epoch": 0.5739579490962744, "grad_norm": 1.6411160230636597, "learning_rate": 9.163940179198697e-06, "loss": 0.0384, "step": 3890 }, { "epoch": 0.5741054961268904, "grad_norm": 3.931699514389038, "learning_rate": 9.158807328223382e-06, "loss": 0.1044, "step": 3891 }, { "epoch": 0.5742530431575065, "grad_norm": 3.5760281085968018, "learning_rate": 9.153674700439724e-06, "loss": 0.094, "step": 3892 }, { "epoch": 0.5744005901881225, "grad_norm": 2.080636978149414, "learning_rate": 9.148542297209547e-06, "loss": 0.0376, "step": 3893 }, { "epoch": 0.5745481372187384, "grad_norm": 1.0770453214645386, "learning_rate": 9.143410119894612e-06, "loss": 0.0304, "step": 3894 }, { "epoch": 0.5746956842493545, "grad_norm": 5.011770725250244, "learning_rate": 9.138278169856645e-06, "loss": 0.0811, "step": 3895 }, { "epoch": 0.5748432312799705, "grad_norm": 1.8867977857589722, "learning_rate": 9.13314644845728e-06, "loss": 0.0308, "step": 3896 }, { "epoch": 0.5749907783105865, "grad_norm": 2.4041988849639893, "learning_rate": 9.12801495705811e-06, "loss": 0.0976, "step": 3897 }, { "epoch": 0.5751383253412025, "grad_norm": 3.5025932788848877, "learning_rate": 9.12288369702066e-06, "loss": 0.0924, "step": 3898 }, { "epoch": 0.5752858723718185, "grad_norm": 2.0537772178649902, "learning_rate": 9.117752669706393e-06, "loss": 0.0505, "step": 3899 }, { "epoch": 0.5754334194024345, "grad_norm": 1.910369873046875, "learning_rate": 9.112621876476716e-06, "loss": 0.0248, "step": 3900 }, { "epoch": 0.5754334194024345, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.9605911330049262, "eval_loss": 0.056240469217300415, "eval_precision": 0.975, "eval_recall": 0.9466019417475728, "eval_runtime": 48.9633, "eval_samples_per_second": 5.943, "eval_steps_per_second": 0.204, "step": 3900 }, { "epoch": 0.5755809664330506, "grad_norm": 1.5095288753509521, "learning_rate": 9.10749131869297e-06, "loss": 0.0397, "step": 3901 }, { "epoch": 0.5757285134636665, "grad_norm": 1.6505756378173828, "learning_rate": 9.10236099771643e-06, "loss": 0.0721, "step": 3902 }, { "epoch": 0.5758760604942825, "grad_norm": 7.723455429077148, "learning_rate": 9.09723091490832e-06, "loss": 0.066, "step": 3903 }, { "epoch": 0.5760236075248986, "grad_norm": 4.502751350402832, "learning_rate": 9.092101071629778e-06, "loss": 0.0542, "step": 3904 }, { "epoch": 0.5761711545555146, "grad_norm": 1.6639736890792847, "learning_rate": 9.086971469241908e-06, "loss": 0.0512, "step": 3905 }, { "epoch": 0.5763187015861306, "grad_norm": 5.314734935760498, "learning_rate": 9.08184210910573e-06, "loss": 0.0539, "step": 3906 }, { "epoch": 0.5764662486167466, "grad_norm": 1.896054983139038, "learning_rate": 9.076712992582202e-06, "loss": 0.086, "step": 3907 }, { "epoch": 0.5766137956473626, "grad_norm": 2.7757315635681152, "learning_rate": 9.071584121032224e-06, "loss": 0.0547, "step": 3908 }, { "epoch": 0.5767613426779786, "grad_norm": 1.4387867450714111, "learning_rate": 9.066455495816623e-06, "loss": 0.0185, "step": 3909 }, { "epoch": 0.5769088897085947, "grad_norm": 1.908809781074524, "learning_rate": 9.061327118296169e-06, "loss": 0.0672, "step": 3910 }, { "epoch": 0.5770564367392106, "grad_norm": 2.655132532119751, "learning_rate": 9.056198989831563e-06, "loss": 0.0357, "step": 3911 }, { "epoch": 0.5772039837698266, "grad_norm": 1.7110357284545898, "learning_rate": 9.051071111783436e-06, "loss": 0.0283, "step": 3912 }, { "epoch": 0.5773515308004427, "grad_norm": 2.7300167083740234, "learning_rate": 9.045943485512355e-06, "loss": 0.0712, "step": 3913 }, { "epoch": 0.5774990778310587, "grad_norm": 0.30095189809799194, "learning_rate": 9.040816112378824e-06, "loss": 0.0036, "step": 3914 }, { "epoch": 0.5776466248616746, "grad_norm": 0.934050977230072, "learning_rate": 9.035688993743278e-06, "loss": 0.0322, "step": 3915 }, { "epoch": 0.5777941718922907, "grad_norm": 1.222118854522705, "learning_rate": 9.030562130966078e-06, "loss": 0.0286, "step": 3916 }, { "epoch": 0.5779417189229067, "grad_norm": 1.0570924282073975, "learning_rate": 9.02543552540753e-06, "loss": 0.027, "step": 3917 }, { "epoch": 0.5780892659535227, "grad_norm": 1.1076701879501343, "learning_rate": 9.020309178427851e-06, "loss": 0.0176, "step": 3918 }, { "epoch": 0.5782368129841386, "grad_norm": 3.0885276794433594, "learning_rate": 9.015183091387215e-06, "loss": 0.1081, "step": 3919 }, { "epoch": 0.5783843600147547, "grad_norm": 1.6919373273849487, "learning_rate": 9.010057265645712e-06, "loss": 0.0351, "step": 3920 }, { "epoch": 0.5783843600147547, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9576059850374065, "eval_loss": 0.06167769059538841, "eval_precision": 0.9846153846153847, "eval_recall": 0.9320388349514563, "eval_runtime": 49.7166, "eval_samples_per_second": 5.853, "eval_steps_per_second": 0.201, "step": 3920 }, { "epoch": 0.5785319070453707, "grad_norm": 3.549628496170044, "learning_rate": 9.004931702563362e-06, "loss": 0.0253, "step": 3921 }, { "epoch": 0.5786794540759868, "grad_norm": 4.592872619628906, "learning_rate": 8.999806403500119e-06, "loss": 0.1232, "step": 3922 }, { "epoch": 0.5788270011066027, "grad_norm": 2.5092933177948, "learning_rate": 8.994681369815866e-06, "loss": 0.0625, "step": 3923 }, { "epoch": 0.5789745481372187, "grad_norm": 2.0754332542419434, "learning_rate": 8.989556602870417e-06, "loss": 0.0536, "step": 3924 }, { "epoch": 0.5791220951678347, "grad_norm": 1.9622751474380493, "learning_rate": 8.984432104023517e-06, "loss": 0.0591, "step": 3925 }, { "epoch": 0.5792696421984508, "grad_norm": 1.9531738758087158, "learning_rate": 8.979307874634834e-06, "loss": 0.0747, "step": 3926 }, { "epoch": 0.5794171892290668, "grad_norm": 0.795220673084259, "learning_rate": 8.974183916063967e-06, "loss": 0.0136, "step": 3927 }, { "epoch": 0.5795647362596827, "grad_norm": 2.075864791870117, "learning_rate": 8.969060229670443e-06, "loss": 0.0507, "step": 3928 }, { "epoch": 0.5797122832902988, "grad_norm": 2.9399094581604004, "learning_rate": 8.963936816813723e-06, "loss": 0.0533, "step": 3929 }, { "epoch": 0.5798598303209148, "grad_norm": 1.125855803489685, "learning_rate": 8.958813678853185e-06, "loss": 0.0146, "step": 3930 }, { "epoch": 0.5800073773515309, "grad_norm": 1.9573447704315186, "learning_rate": 8.953690817148144e-06, "loss": 0.0329, "step": 3931 }, { "epoch": 0.5801549243821468, "grad_norm": 1.6442813873291016, "learning_rate": 8.948568233057826e-06, "loss": 0.0478, "step": 3932 }, { "epoch": 0.5803024714127628, "grad_norm": 2.580252170562744, "learning_rate": 8.943445927941405e-06, "loss": 0.071, "step": 3933 }, { "epoch": 0.5804500184433788, "grad_norm": 3.6204605102539062, "learning_rate": 8.938323903157969e-06, "loss": 0.0363, "step": 3934 }, { "epoch": 0.5805975654739949, "grad_norm": 1.011398196220398, "learning_rate": 8.933202160066528e-06, "loss": 0.0113, "step": 3935 }, { "epoch": 0.5807451125046108, "grad_norm": 7.602753639221191, "learning_rate": 8.928080700026023e-06, "loss": 0.0718, "step": 3936 }, { "epoch": 0.5808926595352268, "grad_norm": 2.8507704734802246, "learning_rate": 8.922959524395318e-06, "loss": 0.0393, "step": 3937 }, { "epoch": 0.5810402065658429, "grad_norm": 1.1147637367248535, "learning_rate": 8.917838634533206e-06, "loss": 0.0333, "step": 3938 }, { "epoch": 0.5811877535964589, "grad_norm": 2.194823741912842, "learning_rate": 8.912718031798399e-06, "loss": 0.0534, "step": 3939 }, { "epoch": 0.5813353006270748, "grad_norm": 2.4377574920654297, "learning_rate": 8.907597717549534e-06, "loss": 0.0629, "step": 3940 }, { "epoch": 0.5813353006270748, "eval_accuracy": 0.9739507959479016, "eval_f1": 0.9554455445544554, "eval_loss": 0.05873502418398857, "eval_precision": 0.9747474747474747, "eval_recall": 0.9368932038834952, "eval_runtime": 50.6818, "eval_samples_per_second": 5.742, "eval_steps_per_second": 0.197, "step": 3940 }, { "epoch": 0.5814828476576909, "grad_norm": 2.1661298274993896, "learning_rate": 8.902477693145172e-06, "loss": 0.0316, "step": 3941 }, { "epoch": 0.5816303946883069, "grad_norm": 3.152526378631592, "learning_rate": 8.897357959943796e-06, "loss": 0.0763, "step": 3942 }, { "epoch": 0.5817779417189229, "grad_norm": 3.010192632675171, "learning_rate": 8.892238519303813e-06, "loss": 0.1028, "step": 3943 }, { "epoch": 0.5819254887495389, "grad_norm": 4.747084140777588, "learning_rate": 8.887119372583554e-06, "loss": 0.1743, "step": 3944 }, { "epoch": 0.5820730357801549, "grad_norm": 2.437009811401367, "learning_rate": 8.882000521141267e-06, "loss": 0.0293, "step": 3945 }, { "epoch": 0.5822205828107709, "grad_norm": 3.940436840057373, "learning_rate": 8.876881966335128e-06, "loss": 0.0697, "step": 3946 }, { "epoch": 0.582368129841387, "grad_norm": 0.942025363445282, "learning_rate": 8.871763709523224e-06, "loss": 0.0121, "step": 3947 }, { "epoch": 0.5825156768720029, "grad_norm": 3.339005947113037, "learning_rate": 8.86664575206358e-06, "loss": 0.0953, "step": 3948 }, { "epoch": 0.5826632239026189, "grad_norm": 3.1999847888946533, "learning_rate": 8.861528095314127e-06, "loss": 0.0656, "step": 3949 }, { "epoch": 0.582810770933235, "grad_norm": 1.3015685081481934, "learning_rate": 8.85641074063272e-06, "loss": 0.0176, "step": 3950 }, { "epoch": 0.582958317963851, "grad_norm": 3.261951208114624, "learning_rate": 8.851293689377128e-06, "loss": 0.0569, "step": 3951 }, { "epoch": 0.583105864994467, "grad_norm": 1.490349531173706, "learning_rate": 8.84617694290506e-06, "loss": 0.0394, "step": 3952 }, { "epoch": 0.583253412025083, "grad_norm": 1.292184591293335, "learning_rate": 8.841060502574122e-06, "loss": 0.0264, "step": 3953 }, { "epoch": 0.583400959055699, "grad_norm": 2.875925302505493, "learning_rate": 8.835944369741848e-06, "loss": 0.0634, "step": 3954 }, { "epoch": 0.583548506086315, "grad_norm": 1.9885458946228027, "learning_rate": 8.830828545765688e-06, "loss": 0.0552, "step": 3955 }, { "epoch": 0.5836960531169311, "grad_norm": 2.189422845840454, "learning_rate": 8.825713032003012e-06, "loss": 0.0297, "step": 3956 }, { "epoch": 0.583843600147547, "grad_norm": 1.6836330890655518, "learning_rate": 8.820597829811109e-06, "loss": 0.0542, "step": 3957 }, { "epoch": 0.583991147178163, "grad_norm": 1.595002293586731, "learning_rate": 8.815482940547185e-06, "loss": 0.0296, "step": 3958 }, { "epoch": 0.5841386942087791, "grad_norm": 1.6693047285079956, "learning_rate": 8.810368365568356e-06, "loss": 0.0156, "step": 3959 }, { "epoch": 0.5842862412393951, "grad_norm": 2.1079792976379395, "learning_rate": 8.805254106231665e-06, "loss": 0.0564, "step": 3960 }, { "epoch": 0.5842862412393951, "eval_accuracy": 0.9739507959479016, "eval_f1": 0.9558823529411765, "eval_loss": 0.058652084320783615, "eval_precision": 0.9653465346534653, "eval_recall": 0.9466019417475728, "eval_runtime": 48.8663, "eval_samples_per_second": 5.955, "eval_steps_per_second": 0.205, "step": 3960 }, { "epoch": 0.584433788270011, "grad_norm": 2.7561991214752197, "learning_rate": 8.800140163894058e-06, "loss": 0.0895, "step": 3961 }, { "epoch": 0.5845813353006271, "grad_norm": 2.003634214401245, "learning_rate": 8.795026539912417e-06, "loss": 0.0609, "step": 3962 }, { "epoch": 0.5847288823312431, "grad_norm": 1.7471199035644531, "learning_rate": 8.789913235643522e-06, "loss": 0.06, "step": 3963 }, { "epoch": 0.5848764293618591, "grad_norm": 1.7889010906219482, "learning_rate": 8.784800252444073e-06, "loss": 0.0286, "step": 3964 }, { "epoch": 0.585023976392475, "grad_norm": 1.4778516292572021, "learning_rate": 8.779687591670687e-06, "loss": 0.0236, "step": 3965 }, { "epoch": 0.5851715234230911, "grad_norm": 2.9709126949310303, "learning_rate": 8.77457525467989e-06, "loss": 0.1013, "step": 3966 }, { "epoch": 0.5853190704537071, "grad_norm": 0.9231874942779541, "learning_rate": 8.769463242828134e-06, "loss": 0.0131, "step": 3967 }, { "epoch": 0.5854666174843232, "grad_norm": 3.977604866027832, "learning_rate": 8.764351557471771e-06, "loss": 0.0326, "step": 3968 }, { "epoch": 0.5856141645149391, "grad_norm": 5.999762058258057, "learning_rate": 8.759240199967076e-06, "loss": 0.0759, "step": 3969 }, { "epoch": 0.5857617115455551, "grad_norm": 2.7629787921905518, "learning_rate": 8.754129171670226e-06, "loss": 0.0274, "step": 3970 }, { "epoch": 0.5859092585761712, "grad_norm": 2.6080102920532227, "learning_rate": 8.74901847393733e-06, "loss": 0.1104, "step": 3971 }, { "epoch": 0.5860568056067872, "grad_norm": 3.7312004566192627, "learning_rate": 8.743908108124389e-06, "loss": 0.1154, "step": 3972 }, { "epoch": 0.5862043526374032, "grad_norm": 7.975347518920898, "learning_rate": 8.738798075587324e-06, "loss": 0.0867, "step": 3973 }, { "epoch": 0.5863518996680191, "grad_norm": 0.8750149607658386, "learning_rate": 8.733688377681975e-06, "loss": 0.0119, "step": 3974 }, { "epoch": 0.5864994466986352, "grad_norm": 6.730655670166016, "learning_rate": 8.728579015764073e-06, "loss": 0.1155, "step": 3975 }, { "epoch": 0.5866469937292512, "grad_norm": 2.230890989303589, "learning_rate": 8.723469991189285e-06, "loss": 0.0547, "step": 3976 }, { "epoch": 0.5867945407598673, "grad_norm": 1.6347341537475586, "learning_rate": 8.718361305313174e-06, "loss": 0.0319, "step": 3977 }, { "epoch": 0.5869420877904832, "grad_norm": 2.3533952236175537, "learning_rate": 8.713252959491213e-06, "loss": 0.0391, "step": 3978 }, { "epoch": 0.5870896348210992, "grad_norm": 5.144891738891602, "learning_rate": 8.708144955078787e-06, "loss": 0.0724, "step": 3979 }, { "epoch": 0.5872371818517153, "grad_norm": 1.9312024116516113, "learning_rate": 8.703037293431189e-06, "loss": 0.0364, "step": 3980 }, { "epoch": 0.5872371818517153, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9576059850374065, "eval_loss": 0.05960436910390854, "eval_precision": 0.9846153846153847, "eval_recall": 0.9320388349514563, "eval_runtime": 48.9385, "eval_samples_per_second": 5.946, "eval_steps_per_second": 0.204, "step": 3980 }, { "epoch": 0.5873847288823313, "grad_norm": 1.8058987855911255, "learning_rate": 8.697929975903628e-06, "loss": 0.0848, "step": 3981 }, { "epoch": 0.5875322759129472, "grad_norm": 3.032989263534546, "learning_rate": 8.692823003851214e-06, "loss": 0.0182, "step": 3982 }, { "epoch": 0.5876798229435632, "grad_norm": 2.738995313644409, "learning_rate": 8.687716378628966e-06, "loss": 0.0457, "step": 3983 }, { "epoch": 0.5878273699741793, "grad_norm": 5.541809558868408, "learning_rate": 8.682610101591813e-06, "loss": 0.1565, "step": 3984 }, { "epoch": 0.5879749170047953, "grad_norm": 1.7722115516662598, "learning_rate": 8.677504174094594e-06, "loss": 0.0384, "step": 3985 }, { "epoch": 0.5881224640354112, "grad_norm": 2.3504600524902344, "learning_rate": 8.672398597492052e-06, "loss": 0.0886, "step": 3986 }, { "epoch": 0.5882700110660273, "grad_norm": 3.7052090167999268, "learning_rate": 8.667293373138835e-06, "loss": 0.0594, "step": 3987 }, { "epoch": 0.5884175580966433, "grad_norm": 3.6242613792419434, "learning_rate": 8.662188502389505e-06, "loss": 0.0609, "step": 3988 }, { "epoch": 0.5885651051272593, "grad_norm": 2.6701276302337646, "learning_rate": 8.657083986598516e-06, "loss": 0.0789, "step": 3989 }, { "epoch": 0.5887126521578753, "grad_norm": 2.211733341217041, "learning_rate": 8.651979827120247e-06, "loss": 0.0776, "step": 3990 }, { "epoch": 0.5888601991884913, "grad_norm": 1.3977913856506348, "learning_rate": 8.64687602530897e-06, "loss": 0.0552, "step": 3991 }, { "epoch": 0.5890077462191073, "grad_norm": 1.2899699211120605, "learning_rate": 8.641772582518862e-06, "loss": 0.0142, "step": 3992 }, { "epoch": 0.5891552932497234, "grad_norm": 1.311137318611145, "learning_rate": 8.636669500104007e-06, "loss": 0.04, "step": 3993 }, { "epoch": 0.5893028402803394, "grad_norm": 2.065599203109741, "learning_rate": 8.631566779418394e-06, "loss": 0.0909, "step": 3994 }, { "epoch": 0.5894503873109553, "grad_norm": 1.879962682723999, "learning_rate": 8.626464421815919e-06, "loss": 0.0595, "step": 3995 }, { "epoch": 0.5895979343415714, "grad_norm": 2.1021625995635986, "learning_rate": 8.621362428650377e-06, "loss": 0.056, "step": 3996 }, { "epoch": 0.5897454813721874, "grad_norm": 1.290037989616394, "learning_rate": 8.616260801275467e-06, "loss": 0.0213, "step": 3997 }, { "epoch": 0.5898930284028034, "grad_norm": 2.81037974357605, "learning_rate": 8.611159541044791e-06, "loss": 0.104, "step": 3998 }, { "epoch": 0.5900405754334194, "grad_norm": 2.338932991027832, "learning_rate": 8.606058649311853e-06, "loss": 0.1156, "step": 3999 }, { "epoch": 0.5901881224640354, "grad_norm": 2.4306275844573975, "learning_rate": 8.600958127430065e-06, "loss": 0.0995, "step": 4000 }, { "epoch": 0.5901881224640354, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.96, "eval_loss": 0.06034836545586586, "eval_precision": 0.9896907216494846, "eval_recall": 0.9320388349514563, "eval_runtime": 48.5059, "eval_samples_per_second": 5.999, "eval_steps_per_second": 0.206, "step": 4000 }, { "epoch": 0.5903356694946514, "grad_norm": 1.614266276359558, "learning_rate": 8.595857976752734e-06, "loss": 0.0638, "step": 4001 }, { "epoch": 0.5904832165252675, "grad_norm": 2.430948495864868, "learning_rate": 8.590758198633071e-06, "loss": 0.0608, "step": 4002 }, { "epoch": 0.5906307635558834, "grad_norm": 1.02809476852417, "learning_rate": 8.585658794424188e-06, "loss": 0.0369, "step": 4003 }, { "epoch": 0.5907783105864994, "grad_norm": 1.6501526832580566, "learning_rate": 8.580559765479092e-06, "loss": 0.0464, "step": 4004 }, { "epoch": 0.5909258576171155, "grad_norm": 1.7904622554779053, "learning_rate": 8.575461113150708e-06, "loss": 0.0353, "step": 4005 }, { "epoch": 0.5910734046477315, "grad_norm": 1.6023961305618286, "learning_rate": 8.570362838791843e-06, "loss": 0.0531, "step": 4006 }, { "epoch": 0.5912209516783474, "grad_norm": 2.241295337677002, "learning_rate": 8.56526494375521e-06, "loss": 0.0251, "step": 4007 }, { "epoch": 0.5913684987089635, "grad_norm": 1.1265102624893188, "learning_rate": 8.560167429393416e-06, "loss": 0.0591, "step": 4008 }, { "epoch": 0.5915160457395795, "grad_norm": 0.996741771697998, "learning_rate": 8.555070297058985e-06, "loss": 0.0219, "step": 4009 }, { "epoch": 0.5916635927701955, "grad_norm": 2.147981882095337, "learning_rate": 8.549973548104316e-06, "loss": 0.0758, "step": 4010 }, { "epoch": 0.5918111398008115, "grad_norm": 4.8437604904174805, "learning_rate": 8.544877183881724e-06, "loss": 0.0822, "step": 4011 }, { "epoch": 0.5919586868314275, "grad_norm": 2.6284608840942383, "learning_rate": 8.53978120574341e-06, "loss": 0.0591, "step": 4012 }, { "epoch": 0.5921062338620435, "grad_norm": 2.1678919792175293, "learning_rate": 8.534685615041479e-06, "loss": 0.0597, "step": 4013 }, { "epoch": 0.5922537808926596, "grad_norm": 3.9284839630126953, "learning_rate": 8.529590413127934e-06, "loss": 0.0968, "step": 4014 }, { "epoch": 0.5924013279232755, "grad_norm": 1.429666519165039, "learning_rate": 8.524495601354674e-06, "loss": 0.0253, "step": 4015 }, { "epoch": 0.5925488749538915, "grad_norm": 1.6633669137954712, "learning_rate": 8.51940118107349e-06, "loss": 0.0599, "step": 4016 }, { "epoch": 0.5926964219845076, "grad_norm": 1.4551674127578735, "learning_rate": 8.514307153636078e-06, "loss": 0.031, "step": 4017 }, { "epoch": 0.5928439690151236, "grad_norm": 1.0635619163513184, "learning_rate": 8.509213520394012e-06, "loss": 0.0334, "step": 4018 }, { "epoch": 0.5929915160457396, "grad_norm": 1.568542242050171, "learning_rate": 8.504120282698786e-06, "loss": 0.0523, "step": 4019 }, { "epoch": 0.5931390630763556, "grad_norm": 1.6317611932754517, "learning_rate": 8.499027441901774e-06, "loss": 0.0686, "step": 4020 }, { "epoch": 0.5931390630763556, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9580246913580247, "eval_loss": 0.058240365236997604, "eval_precision": 0.9748743718592965, "eval_recall": 0.941747572815534, "eval_runtime": 50.1337, "eval_samples_per_second": 5.804, "eval_steps_per_second": 0.199, "step": 4020 }, { "epoch": 0.5932866101069716, "grad_norm": 2.486640453338623, "learning_rate": 8.493934999354243e-06, "loss": 0.0781, "step": 4021 }, { "epoch": 0.5934341571375876, "grad_norm": 1.7704969644546509, "learning_rate": 8.488842956407361e-06, "loss": 0.0276, "step": 4022 }, { "epoch": 0.5935817041682037, "grad_norm": 3.37807559967041, "learning_rate": 8.483751314412187e-06, "loss": 0.0469, "step": 4023 }, { "epoch": 0.5937292511988196, "grad_norm": 1.8829337358474731, "learning_rate": 8.478660074719678e-06, "loss": 0.0549, "step": 4024 }, { "epoch": 0.5938767982294356, "grad_norm": 3.660616159439087, "learning_rate": 8.473569238680677e-06, "loss": 0.049, "step": 4025 }, { "epoch": 0.5940243452600517, "grad_norm": 2.0410664081573486, "learning_rate": 8.468478807645924e-06, "loss": 0.0365, "step": 4026 }, { "epoch": 0.5941718922906677, "grad_norm": 2.373417854309082, "learning_rate": 8.463388782966048e-06, "loss": 0.068, "step": 4027 }, { "epoch": 0.5943194393212836, "grad_norm": 2.731009006500244, "learning_rate": 8.458299165991579e-06, "loss": 0.0239, "step": 4028 }, { "epoch": 0.5944669863518997, "grad_norm": 1.7002266645431519, "learning_rate": 8.45320995807293e-06, "loss": 0.0444, "step": 4029 }, { "epoch": 0.5946145333825157, "grad_norm": 2.2088232040405273, "learning_rate": 8.44812116056041e-06, "loss": 0.0417, "step": 4030 }, { "epoch": 0.5947620804131317, "grad_norm": 1.181602120399475, "learning_rate": 8.443032774804218e-06, "loss": 0.0197, "step": 4031 }, { "epoch": 0.5949096274437476, "grad_norm": 1.2374571561813354, "learning_rate": 8.437944802154434e-06, "loss": 0.0119, "step": 4032 }, { "epoch": 0.5950571744743637, "grad_norm": 1.5645852088928223, "learning_rate": 8.43285724396105e-06, "loss": 0.034, "step": 4033 }, { "epoch": 0.5952047215049797, "grad_norm": 1.6383975744247437, "learning_rate": 8.427770101573933e-06, "loss": 0.0459, "step": 4034 }, { "epoch": 0.5953522685355958, "grad_norm": 1.1739534139633179, "learning_rate": 8.422683376342839e-06, "loss": 0.0214, "step": 4035 }, { "epoch": 0.5954998155662117, "grad_norm": 2.949166774749756, "learning_rate": 8.417597069617418e-06, "loss": 0.0837, "step": 4036 }, { "epoch": 0.5956473625968277, "grad_norm": 1.6540577411651611, "learning_rate": 8.412511182747205e-06, "loss": 0.0374, "step": 4037 }, { "epoch": 0.5957949096274437, "grad_norm": 0.655131459236145, "learning_rate": 8.407425717081632e-06, "loss": 0.0124, "step": 4038 }, { "epoch": 0.5959424566580598, "grad_norm": 2.0629687309265137, "learning_rate": 8.402340673970012e-06, "loss": 0.0561, "step": 4039 }, { "epoch": 0.5960900036886758, "grad_norm": 1.758529782295227, "learning_rate": 8.397256054761544e-06, "loss": 0.0269, "step": 4040 }, { "epoch": 0.5960900036886758, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.96, "eval_loss": 0.062456224113702774, "eval_precision": 0.9896907216494846, "eval_recall": 0.9320388349514563, "eval_runtime": 50.2756, "eval_samples_per_second": 5.788, "eval_steps_per_second": 0.199, "step": 4040 }, { "epoch": 0.5962375507192917, "grad_norm": 2.973538398742676, "learning_rate": 8.39217186080532e-06, "loss": 0.0644, "step": 4041 }, { "epoch": 0.5963850977499078, "grad_norm": 2.3122551441192627, "learning_rate": 8.387088093450315e-06, "loss": 0.0636, "step": 4042 }, { "epoch": 0.5965326447805238, "grad_norm": 1.2486623525619507, "learning_rate": 8.382004754045397e-06, "loss": 0.0249, "step": 4043 }, { "epoch": 0.5966801918111398, "grad_norm": 3.0583341121673584, "learning_rate": 8.376921843939315e-06, "loss": 0.0912, "step": 4044 }, { "epoch": 0.5968277388417558, "grad_norm": 3.5858421325683594, "learning_rate": 8.371839364480707e-06, "loss": 0.0306, "step": 4045 }, { "epoch": 0.5969752858723718, "grad_norm": 4.066762447357178, "learning_rate": 8.366757317018086e-06, "loss": 0.0965, "step": 4046 }, { "epoch": 0.5971228329029878, "grad_norm": 1.7109265327453613, "learning_rate": 8.361675702899873e-06, "loss": 0.0576, "step": 4047 }, { "epoch": 0.5972703799336039, "grad_norm": 1.577673316001892, "learning_rate": 8.356594523474353e-06, "loss": 0.031, "step": 4048 }, { "epoch": 0.5974179269642198, "grad_norm": 3.5720999240875244, "learning_rate": 8.351513780089704e-06, "loss": 0.0749, "step": 4049 }, { "epoch": 0.5975654739948358, "grad_norm": 1.7540377378463745, "learning_rate": 8.346433474093989e-06, "loss": 0.0471, "step": 4050 }, { "epoch": 0.5977130210254519, "grad_norm": 1.669903039932251, "learning_rate": 8.341353606835148e-06, "loss": 0.0229, "step": 4051 }, { "epoch": 0.5978605680560679, "grad_norm": 3.7939202785491943, "learning_rate": 8.336274179661018e-06, "loss": 0.1389, "step": 4052 }, { "epoch": 0.5980081150866838, "grad_norm": 5.558625221252441, "learning_rate": 8.331195193919308e-06, "loss": 0.1058, "step": 4053 }, { "epoch": 0.5981556621172999, "grad_norm": 2.3846969604492188, "learning_rate": 8.326116650957615e-06, "loss": 0.0832, "step": 4054 }, { "epoch": 0.5983032091479159, "grad_norm": 3.1761257648468018, "learning_rate": 8.321038552123413e-06, "loss": 0.0729, "step": 4055 }, { "epoch": 0.5984507561785319, "grad_norm": 2.646514654159546, "learning_rate": 8.315960898764063e-06, "loss": 0.0708, "step": 4056 }, { "epoch": 0.5985983032091479, "grad_norm": 3.5044147968292236, "learning_rate": 8.31088369222681e-06, "loss": 0.0732, "step": 4057 }, { "epoch": 0.5987458502397639, "grad_norm": 1.22723388671875, "learning_rate": 8.305806933858773e-06, "loss": 0.0127, "step": 4058 }, { "epoch": 0.5988933972703799, "grad_norm": 3.667985200881958, "learning_rate": 8.30073062500696e-06, "loss": 0.0657, "step": 4059 }, { "epoch": 0.599040944300996, "grad_norm": 2.531170606613159, "learning_rate": 8.295654767018254e-06, "loss": 0.0478, "step": 4060 }, { "epoch": 0.599040944300996, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9582309582309583, "eval_loss": 0.05882137268781662, "eval_precision": 0.9701492537313433, "eval_recall": 0.9466019417475728, "eval_runtime": 49.6056, "eval_samples_per_second": 5.866, "eval_steps_per_second": 0.202, "step": 4060 }, { "epoch": 0.599188491331612, "grad_norm": 4.068436622619629, "learning_rate": 8.290579361239415e-06, "loss": 0.1229, "step": 4061 }, { "epoch": 0.5993360383622279, "grad_norm": 3.970015287399292, "learning_rate": 8.2855044090171e-06, "loss": 0.0344, "step": 4062 }, { "epoch": 0.599483585392844, "grad_norm": 3.391155242919922, "learning_rate": 8.280429911697825e-06, "loss": 0.07, "step": 4063 }, { "epoch": 0.59963113242346, "grad_norm": 2.5408804416656494, "learning_rate": 8.275355870627998e-06, "loss": 0.0836, "step": 4064 }, { "epoch": 0.599778679454076, "grad_norm": 3.025726795196533, "learning_rate": 8.2702822871539e-06, "loss": 0.1013, "step": 4065 }, { "epoch": 0.599926226484692, "grad_norm": 2.77030348777771, "learning_rate": 8.265209162621694e-06, "loss": 0.0593, "step": 4066 }, { "epoch": 0.600073773515308, "grad_norm": 1.9434685707092285, "learning_rate": 8.260136498377421e-06, "loss": 0.043, "step": 4067 }, { "epoch": 0.600221320545924, "grad_norm": 1.3754554986953735, "learning_rate": 8.255064295766997e-06, "loss": 0.0251, "step": 4068 }, { "epoch": 0.6003688675765401, "grad_norm": 2.7446134090423584, "learning_rate": 8.249992556136221e-06, "loss": 0.0717, "step": 4069 }, { "epoch": 0.600516414607156, "grad_norm": 3.1507766246795654, "learning_rate": 8.24492128083076e-06, "loss": 0.0741, "step": 4070 }, { "epoch": 0.600663961637772, "grad_norm": 1.6349372863769531, "learning_rate": 8.239850471196169e-06, "loss": 0.0221, "step": 4071 }, { "epoch": 0.6008115086683881, "grad_norm": 1.8611254692077637, "learning_rate": 8.23478012857787e-06, "loss": 0.0359, "step": 4072 }, { "epoch": 0.6009590556990041, "grad_norm": 2.038020372390747, "learning_rate": 8.229710254321169e-06, "loss": 0.0434, "step": 4073 }, { "epoch": 0.60110660272962, "grad_norm": 2.05490779876709, "learning_rate": 8.224640849771242e-06, "loss": 0.0667, "step": 4074 }, { "epoch": 0.6012541497602361, "grad_norm": 2.0123698711395264, "learning_rate": 8.219571916273134e-06, "loss": 0.0536, "step": 4075 }, { "epoch": 0.6014016967908521, "grad_norm": 2.4281039237976074, "learning_rate": 8.214503455171788e-06, "loss": 0.0722, "step": 4076 }, { "epoch": 0.6015492438214681, "grad_norm": 3.9477169513702393, "learning_rate": 8.209435467811998e-06, "loss": 0.0925, "step": 4077 }, { "epoch": 0.601696790852084, "grad_norm": 1.8821567296981812, "learning_rate": 8.20436795553844e-06, "loss": 0.0727, "step": 4078 }, { "epoch": 0.6018443378827001, "grad_norm": 1.2030612230300903, "learning_rate": 8.19930091969567e-06, "loss": 0.0227, "step": 4079 }, { "epoch": 0.6019918849133161, "grad_norm": 2.11921763420105, "learning_rate": 8.194234361628106e-06, "loss": 0.0405, "step": 4080 }, { "epoch": 0.6019918849133161, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.9605911330049262, "eval_loss": 0.05782787501811981, "eval_precision": 0.975, "eval_recall": 0.9466019417475728, "eval_runtime": 50.1986, "eval_samples_per_second": 5.797, "eval_steps_per_second": 0.199, "step": 4080 }, { "epoch": 0.6021394319439322, "grad_norm": 2.6245100498199463, "learning_rate": 8.189168282680052e-06, "loss": 0.0816, "step": 4081 }, { "epoch": 0.6022869789745481, "grad_norm": 1.9298090934753418, "learning_rate": 8.184102684195677e-06, "loss": 0.0542, "step": 4082 }, { "epoch": 0.6024345260051641, "grad_norm": 2.777143716812134, "learning_rate": 8.17903756751902e-06, "loss": 0.0844, "step": 4083 }, { "epoch": 0.6025820730357802, "grad_norm": 1.914462685585022, "learning_rate": 8.173972933994003e-06, "loss": 0.0559, "step": 4084 }, { "epoch": 0.6027296200663962, "grad_norm": 4.3868279457092285, "learning_rate": 8.168908784964407e-06, "loss": 0.0825, "step": 4085 }, { "epoch": 0.6028771670970122, "grad_norm": 1.8260278701782227, "learning_rate": 8.163845121773896e-06, "loss": 0.0479, "step": 4086 }, { "epoch": 0.6030247141276281, "grad_norm": 3.0764830112457275, "learning_rate": 8.158781945765998e-06, "loss": 0.0525, "step": 4087 }, { "epoch": 0.6031722611582442, "grad_norm": 2.1458253860473633, "learning_rate": 8.153719258284113e-06, "loss": 0.0484, "step": 4088 }, { "epoch": 0.6033198081888602, "grad_norm": 2.5246200561523438, "learning_rate": 8.148657060671507e-06, "loss": 0.0476, "step": 4089 }, { "epoch": 0.6034673552194763, "grad_norm": 2.05627703666687, "learning_rate": 8.14359535427133e-06, "loss": 0.0211, "step": 4090 }, { "epoch": 0.6036149022500922, "grad_norm": 2.763218879699707, "learning_rate": 8.13853414042659e-06, "loss": 0.0548, "step": 4091 }, { "epoch": 0.6037624492807082, "grad_norm": 3.247293472290039, "learning_rate": 8.133473420480163e-06, "loss": 0.0679, "step": 4092 }, { "epoch": 0.6039099963113242, "grad_norm": 1.4476892948150635, "learning_rate": 8.128413195774798e-06, "loss": 0.0401, "step": 4093 }, { "epoch": 0.6040575433419403, "grad_norm": 1.8784327507019043, "learning_rate": 8.123353467653115e-06, "loss": 0.0586, "step": 4094 }, { "epoch": 0.6042050903725562, "grad_norm": 3.985814332962036, "learning_rate": 8.118294237457598e-06, "loss": 0.087, "step": 4095 }, { "epoch": 0.6043526374031722, "grad_norm": 1.7837779521942139, "learning_rate": 8.113235506530603e-06, "loss": 0.0377, "step": 4096 }, { "epoch": 0.6045001844337883, "grad_norm": 1.724265694618225, "learning_rate": 8.10817727621435e-06, "loss": 0.0259, "step": 4097 }, { "epoch": 0.6046477314644043, "grad_norm": 1.8024345636367798, "learning_rate": 8.103119547850924e-06, "loss": 0.0447, "step": 4098 }, { "epoch": 0.6047952784950202, "grad_norm": 2.173997640609741, "learning_rate": 8.098062322782283e-06, "loss": 0.0394, "step": 4099 }, { "epoch": 0.6049428255256363, "grad_norm": 1.8738340139389038, "learning_rate": 8.093005602350249e-06, "loss": 0.0824, "step": 4100 }, { "epoch": 0.6049428255256363, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9631449631449631, "eval_loss": 0.05601954460144043, "eval_precision": 0.9751243781094527, "eval_recall": 0.9514563106796117, "eval_runtime": 49.1339, "eval_samples_per_second": 5.923, "eval_steps_per_second": 0.204, "step": 4100 }, { "epoch": 0.6050903725562523, "grad_norm": 3.8552191257476807, "learning_rate": 8.08794938789651e-06, "loss": 0.0902, "step": 4101 }, { "epoch": 0.6052379195868683, "grad_norm": 2.390991449356079, "learning_rate": 8.082893680762619e-06, "loss": 0.0643, "step": 4102 }, { "epoch": 0.6053854666174843, "grad_norm": 2.3756585121154785, "learning_rate": 8.077838482289992e-06, "loss": 0.0814, "step": 4103 }, { "epoch": 0.6055330136481003, "grad_norm": 5.707764625549316, "learning_rate": 8.072783793819912e-06, "loss": 0.0824, "step": 4104 }, { "epoch": 0.6056805606787163, "grad_norm": 0.8822938203811646, "learning_rate": 8.067729616693535e-06, "loss": 0.0216, "step": 4105 }, { "epoch": 0.6058281077093324, "grad_norm": 2.632237672805786, "learning_rate": 8.062675952251868e-06, "loss": 0.0541, "step": 4106 }, { "epoch": 0.6059756547399484, "grad_norm": 2.279637575149536, "learning_rate": 8.057622801835788e-06, "loss": 0.0651, "step": 4107 }, { "epoch": 0.6061232017705643, "grad_norm": 0.6592308878898621, "learning_rate": 8.052570166786035e-06, "loss": 0.013, "step": 4108 }, { "epoch": 0.6062707488011804, "grad_norm": 1.0987926721572876, "learning_rate": 8.047518048443215e-06, "loss": 0.0268, "step": 4109 }, { "epoch": 0.6064182958317964, "grad_norm": 1.4274981021881104, "learning_rate": 8.042466448147794e-06, "loss": 0.0437, "step": 4110 }, { "epoch": 0.6065658428624124, "grad_norm": 1.6543102264404297, "learning_rate": 8.0374153672401e-06, "loss": 0.0228, "step": 4111 }, { "epoch": 0.6067133898930284, "grad_norm": 1.1552343368530273, "learning_rate": 8.032364807060325e-06, "loss": 0.0304, "step": 4112 }, { "epoch": 0.6068609369236444, "grad_norm": 4.618214130401611, "learning_rate": 8.02731476894852e-06, "loss": 0.1189, "step": 4113 }, { "epoch": 0.6070084839542604, "grad_norm": 1.773003101348877, "learning_rate": 8.022265254244605e-06, "loss": 0.0426, "step": 4114 }, { "epoch": 0.6071560309848765, "grad_norm": 1.1350699663162231, "learning_rate": 8.017216264288351e-06, "loss": 0.0317, "step": 4115 }, { "epoch": 0.6073035780154924, "grad_norm": 4.476233959197998, "learning_rate": 8.012167800419398e-06, "loss": 0.0298, "step": 4116 }, { "epoch": 0.6074511250461084, "grad_norm": 1.932405710220337, "learning_rate": 8.00711986397724e-06, "loss": 0.0539, "step": 4117 }, { "epoch": 0.6075986720767245, "grad_norm": 0.7615958452224731, "learning_rate": 8.002072456301229e-06, "loss": 0.0088, "step": 4118 }, { "epoch": 0.6077462191073405, "grad_norm": 2.148376703262329, "learning_rate": 7.997025578730594e-06, "loss": 0.0695, "step": 4119 }, { "epoch": 0.6078937661379564, "grad_norm": 0.6418356895446777, "learning_rate": 7.991979232604402e-06, "loss": 0.008, "step": 4120 }, { "epoch": 0.6078937661379564, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9651741293532339, "eval_loss": 0.05638247728347778, "eval_precision": 0.9897959183673469, "eval_recall": 0.941747572815534, "eval_runtime": 50.0504, "eval_samples_per_second": 5.814, "eval_steps_per_second": 0.2, "step": 4120 }, { "epoch": 0.6080413131685725, "grad_norm": 3.168421745300293, "learning_rate": 7.986933419261592e-06, "loss": 0.0976, "step": 4121 }, { "epoch": 0.6081888601991885, "grad_norm": 2.0744261741638184, "learning_rate": 7.981888140040956e-06, "loss": 0.0443, "step": 4122 }, { "epoch": 0.6083364072298045, "grad_norm": 1.7259821891784668, "learning_rate": 7.976843396281143e-06, "loss": 0.0132, "step": 4123 }, { "epoch": 0.6084839542604205, "grad_norm": 1.163658618927002, "learning_rate": 7.971799189320669e-06, "loss": 0.0555, "step": 4124 }, { "epoch": 0.6086315012910365, "grad_norm": 2.0477635860443115, "learning_rate": 7.966755520497897e-06, "loss": 0.0537, "step": 4125 }, { "epoch": 0.6087790483216525, "grad_norm": 3.661975145339966, "learning_rate": 7.961712391151055e-06, "loss": 0.0702, "step": 4126 }, { "epoch": 0.6089265953522686, "grad_norm": 2.977675199508667, "learning_rate": 7.956669802618219e-06, "loss": 0.1197, "step": 4127 }, { "epoch": 0.6090741423828846, "grad_norm": 0.9189351797103882, "learning_rate": 7.951627756237333e-06, "loss": 0.0203, "step": 4128 }, { "epoch": 0.6092216894135005, "grad_norm": 1.9204919338226318, "learning_rate": 7.94658625334619e-06, "loss": 0.0884, "step": 4129 }, { "epoch": 0.6093692364441166, "grad_norm": 2.1584606170654297, "learning_rate": 7.94154529528244e-06, "loss": 0.0493, "step": 4130 }, { "epoch": 0.6095167834747326, "grad_norm": 2.547930955886841, "learning_rate": 7.936504883383587e-06, "loss": 0.0353, "step": 4131 }, { "epoch": 0.6096643305053486, "grad_norm": 1.9196710586547852, "learning_rate": 7.931465018986985e-06, "loss": 0.0385, "step": 4132 }, { "epoch": 0.6098118775359646, "grad_norm": 3.0897343158721924, "learning_rate": 7.926425703429862e-06, "loss": 0.0544, "step": 4133 }, { "epoch": 0.6099594245665806, "grad_norm": 2.7181038856506348, "learning_rate": 7.921386938049285e-06, "loss": 0.0901, "step": 4134 }, { "epoch": 0.6101069715971966, "grad_norm": 2.0732929706573486, "learning_rate": 7.91634872418217e-06, "loss": 0.0682, "step": 4135 }, { "epoch": 0.6102545186278127, "grad_norm": 2.726588249206543, "learning_rate": 7.911311063165298e-06, "loss": 0.0378, "step": 4136 }, { "epoch": 0.6104020656584286, "grad_norm": 2.0474002361297607, "learning_rate": 7.9062739563353e-06, "loss": 0.0409, "step": 4137 }, { "epoch": 0.6105496126890446, "grad_norm": 2.4663820266723633, "learning_rate": 7.901237405028663e-06, "loss": 0.0563, "step": 4138 }, { "epoch": 0.6106971597196607, "grad_norm": 2.1829729080200195, "learning_rate": 7.896201410581718e-06, "loss": 0.0446, "step": 4139 }, { "epoch": 0.6108447067502767, "grad_norm": 1.3930015563964844, "learning_rate": 7.891165974330657e-06, "loss": 0.0488, "step": 4140 }, { "epoch": 0.6108447067502767, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05426744371652603, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 50.0419, "eval_samples_per_second": 5.815, "eval_steps_per_second": 0.2, "step": 4140 }, { "epoch": 0.6109922537808926, "grad_norm": 1.1906510591506958, "learning_rate": 7.886131097611518e-06, "loss": 0.0305, "step": 4141 }, { "epoch": 0.6111398008115086, "grad_norm": 3.0324180126190186, "learning_rate": 7.88109678176019e-06, "loss": 0.0613, "step": 4142 }, { "epoch": 0.6112873478421247, "grad_norm": 3.933577060699463, "learning_rate": 7.876063028112427e-06, "loss": 0.0843, "step": 4143 }, { "epoch": 0.6114348948727407, "grad_norm": 3.2984468936920166, "learning_rate": 7.871029838003812e-06, "loss": 0.0428, "step": 4144 }, { "epoch": 0.6115824419033566, "grad_norm": 1.0057547092437744, "learning_rate": 7.865997212769795e-06, "loss": 0.0147, "step": 4145 }, { "epoch": 0.6117299889339727, "grad_norm": 1.3197414875030518, "learning_rate": 7.860965153745662e-06, "loss": 0.0257, "step": 4146 }, { "epoch": 0.6118775359645887, "grad_norm": 1.6716506481170654, "learning_rate": 7.85593366226657e-06, "loss": 0.0402, "step": 4147 }, { "epoch": 0.6120250829952047, "grad_norm": 2.6991477012634277, "learning_rate": 7.850902739667508e-06, "loss": 0.1088, "step": 4148 }, { "epoch": 0.6121726300258208, "grad_norm": 2.7861545085906982, "learning_rate": 7.845872387283315e-06, "loss": 0.0734, "step": 4149 }, { "epoch": 0.6123201770564367, "grad_norm": 3.647874355316162, "learning_rate": 7.840842606448685e-06, "loss": 0.1207, "step": 4150 }, { "epoch": 0.6124677240870527, "grad_norm": 2.441539764404297, "learning_rate": 7.835813398498155e-06, "loss": 0.0657, "step": 4151 }, { "epoch": 0.6126152711176688, "grad_norm": 1.9289296865463257, "learning_rate": 7.830784764766117e-06, "loss": 0.065, "step": 4152 }, { "epoch": 0.6127628181482848, "grad_norm": 2.469740867614746, "learning_rate": 7.825756706586807e-06, "loss": 0.0729, "step": 4153 }, { "epoch": 0.6129103651789007, "grad_norm": 2.932300567626953, "learning_rate": 7.820729225294303e-06, "loss": 0.0476, "step": 4154 }, { "epoch": 0.6130579122095168, "grad_norm": 1.9549894332885742, "learning_rate": 7.815702322222539e-06, "loss": 0.0408, "step": 4155 }, { "epoch": 0.6132054592401328, "grad_norm": 4.48233699798584, "learning_rate": 7.810675998705285e-06, "loss": 0.1081, "step": 4156 }, { "epoch": 0.6133530062707488, "grad_norm": 3.0865495204925537, "learning_rate": 7.805650256076173e-06, "loss": 0.0614, "step": 4157 }, { "epoch": 0.6135005533013648, "grad_norm": 2.7239348888397217, "learning_rate": 7.800625095668664e-06, "loss": 0.0521, "step": 4158 }, { "epoch": 0.6136481003319808, "grad_norm": 0.8912237882614136, "learning_rate": 7.795600518816078e-06, "loss": 0.0296, "step": 4159 }, { "epoch": 0.6137956473625968, "grad_norm": 2.5065150260925293, "learning_rate": 7.790576526851568e-06, "loss": 0.0837, "step": 4160 }, { "epoch": 0.6137956473625968, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.055733777582645416, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 50.1279, "eval_samples_per_second": 5.805, "eval_steps_per_second": 0.199, "step": 4160 }, { "epoch": 0.6139431943932129, "grad_norm": 1.7436301708221436, "learning_rate": 7.785553121108134e-06, "loss": 0.0589, "step": 4161 }, { "epoch": 0.6140907414238288, "grad_norm": 3.0089666843414307, "learning_rate": 7.780530302918638e-06, "loss": 0.0499, "step": 4162 }, { "epoch": 0.6142382884544448, "grad_norm": 1.1288248300552368, "learning_rate": 7.775508073615763e-06, "loss": 0.0295, "step": 4163 }, { "epoch": 0.6143858354850609, "grad_norm": 2.924893617630005, "learning_rate": 7.770486434532045e-06, "loss": 0.0571, "step": 4164 }, { "epoch": 0.6145333825156769, "grad_norm": 5.625210285186768, "learning_rate": 7.765465386999865e-06, "loss": 0.1014, "step": 4165 }, { "epoch": 0.6146809295462928, "grad_norm": 3.9194157123565674, "learning_rate": 7.760444932351449e-06, "loss": 0.1229, "step": 4166 }, { "epoch": 0.6148284765769089, "grad_norm": 2.162351369857788, "learning_rate": 7.755425071918859e-06, "loss": 0.0458, "step": 4167 }, { "epoch": 0.6149760236075249, "grad_norm": 1.519978404045105, "learning_rate": 7.750405807034002e-06, "loss": 0.0359, "step": 4168 }, { "epoch": 0.6151235706381409, "grad_norm": 1.8831725120544434, "learning_rate": 7.745387139028628e-06, "loss": 0.0847, "step": 4169 }, { "epoch": 0.6152711176687569, "grad_norm": 4.288518905639648, "learning_rate": 7.74036906923433e-06, "loss": 0.0834, "step": 4170 }, { "epoch": 0.6154186646993729, "grad_norm": 3.176762819290161, "learning_rate": 7.735351598982538e-06, "loss": 0.0501, "step": 4171 }, { "epoch": 0.6155662117299889, "grad_norm": 2.158085346221924, "learning_rate": 7.730334729604529e-06, "loss": 0.0227, "step": 4172 }, { "epoch": 0.615713758760605, "grad_norm": 1.5917812585830688, "learning_rate": 7.725318462431418e-06, "loss": 0.0435, "step": 4173 }, { "epoch": 0.615861305791221, "grad_norm": 1.8492577075958252, "learning_rate": 7.720302798794153e-06, "loss": 0.0609, "step": 4174 }, { "epoch": 0.6160088528218369, "grad_norm": 4.046690940856934, "learning_rate": 7.715287740023528e-06, "loss": 0.065, "step": 4175 }, { "epoch": 0.616156399852453, "grad_norm": 1.6979717016220093, "learning_rate": 7.710273287450184e-06, "loss": 0.055, "step": 4176 }, { "epoch": 0.616303946883069, "grad_norm": 3.7858529090881348, "learning_rate": 7.705259442404592e-06, "loss": 0.0821, "step": 4177 }, { "epoch": 0.616451493913685, "grad_norm": 2.090599536895752, "learning_rate": 7.700246206217056e-06, "loss": 0.0651, "step": 4178 }, { "epoch": 0.616599040944301, "grad_norm": 2.0852935314178467, "learning_rate": 7.695233580217736e-06, "loss": 0.067, "step": 4179 }, { "epoch": 0.616746587974917, "grad_norm": 1.2941761016845703, "learning_rate": 7.690221565736613e-06, "loss": 0.021, "step": 4180 }, { "epoch": 0.616746587974917, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.9605911330049262, "eval_loss": 0.058517731726169586, "eval_precision": 0.975, "eval_recall": 0.9466019417475728, "eval_runtime": 49.4095, "eval_samples_per_second": 5.89, "eval_steps_per_second": 0.202, "step": 4180 }, { "epoch": 0.616894135005533, "grad_norm": 2.8819026947021484, "learning_rate": 7.685210164103516e-06, "loss": 0.094, "step": 4181 }, { "epoch": 0.6170416820361491, "grad_norm": 1.7932785749435425, "learning_rate": 7.680199376648108e-06, "loss": 0.0569, "step": 4182 }, { "epoch": 0.617189229066765, "grad_norm": 1.9176204204559326, "learning_rate": 7.675189204699888e-06, "loss": 0.0708, "step": 4183 }, { "epoch": 0.617336776097381, "grad_norm": 1.369228482246399, "learning_rate": 7.670179649588194e-06, "loss": 0.0394, "step": 4184 }, { "epoch": 0.6174843231279971, "grad_norm": 1.5957475900650024, "learning_rate": 7.665170712642202e-06, "loss": 0.0231, "step": 4185 }, { "epoch": 0.6176318701586131, "grad_norm": 2.3948867321014404, "learning_rate": 7.660162395190917e-06, "loss": 0.0369, "step": 4186 }, { "epoch": 0.617779417189229, "grad_norm": 2.401414155960083, "learning_rate": 7.655154698563188e-06, "loss": 0.0505, "step": 4187 }, { "epoch": 0.617926964219845, "grad_norm": 3.1531596183776855, "learning_rate": 7.650147624087692e-06, "loss": 0.0762, "step": 4188 }, { "epoch": 0.6180745112504611, "grad_norm": 1.573538899421692, "learning_rate": 7.64514117309294e-06, "loss": 0.0117, "step": 4189 }, { "epoch": 0.6182220582810771, "grad_norm": 1.0233681201934814, "learning_rate": 7.640135346907294e-06, "loss": 0.0083, "step": 4190 }, { "epoch": 0.618369605311693, "grad_norm": 1.5323415994644165, "learning_rate": 7.635130146858931e-06, "loss": 0.0627, "step": 4191 }, { "epoch": 0.6185171523423091, "grad_norm": 1.9722416400909424, "learning_rate": 7.630125574275867e-06, "loss": 0.0418, "step": 4192 }, { "epoch": 0.6186646993729251, "grad_norm": 2.8584020137786865, "learning_rate": 7.6251216304859555e-06, "loss": 0.071, "step": 4193 }, { "epoch": 0.6188122464035412, "grad_norm": 2.9157702922821045, "learning_rate": 7.620118316816878e-06, "loss": 0.0734, "step": 4194 }, { "epoch": 0.6189597934341572, "grad_norm": 4.185041427612305, "learning_rate": 7.615115634596156e-06, "loss": 0.1528, "step": 4195 }, { "epoch": 0.6191073404647731, "grad_norm": 4.080869197845459, "learning_rate": 7.61011358515114e-06, "loss": 0.1039, "step": 4196 }, { "epoch": 0.6192548874953892, "grad_norm": 2.566082239151001, "learning_rate": 7.605112169809008e-06, "loss": 0.0694, "step": 4197 }, { "epoch": 0.6194024345260052, "grad_norm": 4.36874532699585, "learning_rate": 7.600111389896776e-06, "loss": 0.1159, "step": 4198 }, { "epoch": 0.6195499815566212, "grad_norm": 1.262406826019287, "learning_rate": 7.595111246741286e-06, "loss": 0.0181, "step": 4199 }, { "epoch": 0.6196975285872371, "grad_norm": 2.8586490154266357, "learning_rate": 7.590111741669218e-06, "loss": 0.0661, "step": 4200 }, { "epoch": 0.6196975285872371, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9580246913580247, "eval_loss": 0.06000620871782303, "eval_precision": 0.9748743718592965, "eval_recall": 0.941747572815534, "eval_runtime": 49.8978, "eval_samples_per_second": 5.832, "eval_steps_per_second": 0.2, "step": 4200 }, { "epoch": 0.6198450756178532, "grad_norm": 1.7657350301742554, "learning_rate": 7.585112876007078e-06, "loss": 0.0415, "step": 4201 }, { "epoch": 0.6199926226484692, "grad_norm": 1.5066734552383423, "learning_rate": 7.5801146510812054e-06, "loss": 0.0318, "step": 4202 }, { "epoch": 0.6201401696790853, "grad_norm": 7.8486008644104, "learning_rate": 7.575117068217756e-06, "loss": 0.0818, "step": 4203 }, { "epoch": 0.6202877167097012, "grad_norm": 3.317439317703247, "learning_rate": 7.570120128742742e-06, "loss": 0.0841, "step": 4204 }, { "epoch": 0.6204352637403172, "grad_norm": 1.4075021743774414, "learning_rate": 7.565123833981982e-06, "loss": 0.0456, "step": 4205 }, { "epoch": 0.6205828107709332, "grad_norm": 3.1329197883605957, "learning_rate": 7.560128185261131e-06, "loss": 0.0413, "step": 4206 }, { "epoch": 0.6207303578015493, "grad_norm": 1.794213891029358, "learning_rate": 7.555133183905673e-06, "loss": 0.0483, "step": 4207 }, { "epoch": 0.6208779048321652, "grad_norm": 7.027262210845947, "learning_rate": 7.550138831240919e-06, "loss": 0.0912, "step": 4208 }, { "epoch": 0.6210254518627812, "grad_norm": 2.2453949451446533, "learning_rate": 7.545145128592009e-06, "loss": 0.0748, "step": 4209 }, { "epoch": 0.6211729988933973, "grad_norm": 0.9941662549972534, "learning_rate": 7.5401520772839135e-06, "loss": 0.0255, "step": 4210 }, { "epoch": 0.6213205459240133, "grad_norm": 1.269698143005371, "learning_rate": 7.535159678641422e-06, "loss": 0.0261, "step": 4211 }, { "epoch": 0.6214680929546292, "grad_norm": 3.2999441623687744, "learning_rate": 7.530167933989161e-06, "loss": 0.0562, "step": 4212 }, { "epoch": 0.6216156399852453, "grad_norm": 2.0216445922851562, "learning_rate": 7.5251768446515714e-06, "loss": 0.0394, "step": 4213 }, { "epoch": 0.6217631870158613, "grad_norm": 1.8177595138549805, "learning_rate": 7.5201864119529345e-06, "loss": 0.0632, "step": 4214 }, { "epoch": 0.6219107340464773, "grad_norm": 2.0241763591766357, "learning_rate": 7.515196637217345e-06, "loss": 0.0422, "step": 4215 }, { "epoch": 0.6220582810770934, "grad_norm": 1.2666562795639038, "learning_rate": 7.510207521768733e-06, "loss": 0.0381, "step": 4216 }, { "epoch": 0.6222058281077093, "grad_norm": 2.903526782989502, "learning_rate": 7.505219066930842e-06, "loss": 0.0487, "step": 4217 }, { "epoch": 0.6223533751383253, "grad_norm": 17.313955307006836, "learning_rate": 7.500231274027248e-06, "loss": 0.1157, "step": 4218 }, { "epoch": 0.6225009221689414, "grad_norm": 7.110257148742676, "learning_rate": 7.495244144381356e-06, "loss": 0.1107, "step": 4219 }, { "epoch": 0.6226484691995574, "grad_norm": 2.883481025695801, "learning_rate": 7.490257679316385e-06, "loss": 0.0784, "step": 4220 }, { "epoch": 0.6226484691995574, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.9603960396039604, "eval_loss": 0.05933501943945885, "eval_precision": 0.9797979797979798, "eval_recall": 0.941747572815534, "eval_runtime": 49.4576, "eval_samples_per_second": 5.884, "eval_steps_per_second": 0.202, "step": 4220 }, { "epoch": 0.6227960162301733, "grad_norm": 3.8103809356689453, "learning_rate": 7.485271880155383e-06, "loss": 0.0307, "step": 4221 }, { "epoch": 0.6229435632607894, "grad_norm": 3.953392744064331, "learning_rate": 7.480286748221215e-06, "loss": 0.0391, "step": 4222 }, { "epoch": 0.6230911102914054, "grad_norm": 1.8104286193847656, "learning_rate": 7.475302284836583e-06, "loss": 0.0557, "step": 4223 }, { "epoch": 0.6232386573220214, "grad_norm": 2.2431328296661377, "learning_rate": 7.470318491324e-06, "loss": 0.0513, "step": 4224 }, { "epoch": 0.6233862043526374, "grad_norm": 3.2055277824401855, "learning_rate": 7.4653353690058015e-06, "loss": 0.0867, "step": 4225 }, { "epoch": 0.6235337513832534, "grad_norm": 4.622310638427734, "learning_rate": 7.460352919204148e-06, "loss": 0.032, "step": 4226 }, { "epoch": 0.6236812984138694, "grad_norm": 4.114480972290039, "learning_rate": 7.455371143241021e-06, "loss": 0.0759, "step": 4227 }, { "epoch": 0.6238288454444855, "grad_norm": 2.682769775390625, "learning_rate": 7.450390042438227e-06, "loss": 0.132, "step": 4228 }, { "epoch": 0.6239763924751014, "grad_norm": 2.0491926670074463, "learning_rate": 7.445409618117386e-06, "loss": 0.0903, "step": 4229 }, { "epoch": 0.6241239395057174, "grad_norm": 1.7917917966842651, "learning_rate": 7.440429871599945e-06, "loss": 0.0393, "step": 4230 }, { "epoch": 0.6242714865363335, "grad_norm": 4.333006858825684, "learning_rate": 7.435450804207165e-06, "loss": 0.1534, "step": 4231 }, { "epoch": 0.6244190335669495, "grad_norm": 1.886030912399292, "learning_rate": 7.430472417260127e-06, "loss": 0.0515, "step": 4232 }, { "epoch": 0.6245665805975654, "grad_norm": 2.254577159881592, "learning_rate": 7.425494712079746e-06, "loss": 0.0638, "step": 4233 }, { "epoch": 0.6247141276281815, "grad_norm": 1.4084246158599854, "learning_rate": 7.4205176899867346e-06, "loss": 0.0243, "step": 4234 }, { "epoch": 0.6248616746587975, "grad_norm": 1.7003649473190308, "learning_rate": 7.415541352301638e-06, "loss": 0.0425, "step": 4235 }, { "epoch": 0.6250092216894135, "grad_norm": 3.0880212783813477, "learning_rate": 7.410565700344815e-06, "loss": 0.0977, "step": 4236 }, { "epoch": 0.6251567687200295, "grad_norm": 2.6423213481903076, "learning_rate": 7.405590735436443e-06, "loss": 0.0829, "step": 4237 }, { "epoch": 0.6253043157506455, "grad_norm": 2.087374210357666, "learning_rate": 7.400616458896519e-06, "loss": 0.0514, "step": 4238 }, { "epoch": 0.6254518627812615, "grad_norm": 1.6531434059143066, "learning_rate": 7.395642872044857e-06, "loss": 0.044, "step": 4239 }, { "epoch": 0.6255994098118776, "grad_norm": 3.461820363998413, "learning_rate": 7.390669976201086e-06, "loss": 0.0598, "step": 4240 }, { "epoch": 0.6255994098118776, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9580246913580247, "eval_loss": 0.06126275286078453, "eval_precision": 0.9748743718592965, "eval_recall": 0.941747572815534, "eval_runtime": 48.6406, "eval_samples_per_second": 5.983, "eval_steps_per_second": 0.206, "step": 4240 }, { "epoch": 0.6257469568424936, "grad_norm": 3.890740394592285, "learning_rate": 7.385697772684649e-06, "loss": 0.0869, "step": 4241 }, { "epoch": 0.6258945038731095, "grad_norm": 2.101179361343384, "learning_rate": 7.3807262628148145e-06, "loss": 0.0763, "step": 4242 }, { "epoch": 0.6260420509037256, "grad_norm": 2.4949724674224854, "learning_rate": 7.37575544791066e-06, "loss": 0.0789, "step": 4243 }, { "epoch": 0.6261895979343416, "grad_norm": 2.3185555934906006, "learning_rate": 7.370785329291079e-06, "loss": 0.0406, "step": 4244 }, { "epoch": 0.6263371449649576, "grad_norm": 4.3666462898254395, "learning_rate": 7.365815908274781e-06, "loss": 0.065, "step": 4245 }, { "epoch": 0.6264846919955736, "grad_norm": 1.7676814794540405, "learning_rate": 7.3608471861802865e-06, "loss": 0.07, "step": 4246 }, { "epoch": 0.6266322390261896, "grad_norm": 3.035621166229248, "learning_rate": 7.355879164325946e-06, "loss": 0.0387, "step": 4247 }, { "epoch": 0.6267797860568056, "grad_norm": 1.4600419998168945, "learning_rate": 7.350911844029904e-06, "loss": 0.0282, "step": 4248 }, { "epoch": 0.6269273330874217, "grad_norm": 3.34195613861084, "learning_rate": 7.3459452266101295e-06, "loss": 0.0936, "step": 4249 }, { "epoch": 0.6270748801180376, "grad_norm": 2.3505427837371826, "learning_rate": 7.340979313384404e-06, "loss": 0.0654, "step": 4250 }, { "epoch": 0.6272224271486536, "grad_norm": 4.918964862823486, "learning_rate": 7.336014105670319e-06, "loss": 0.116, "step": 4251 }, { "epoch": 0.6273699741792697, "grad_norm": 1.7442243099212646, "learning_rate": 7.3310496047852865e-06, "loss": 0.0515, "step": 4252 }, { "epoch": 0.6275175212098857, "grad_norm": 2.795384168624878, "learning_rate": 7.326085812046521e-06, "loss": 0.0363, "step": 4253 }, { "epoch": 0.6276650682405016, "grad_norm": 1.1311917304992676, "learning_rate": 7.321122728771057e-06, "loss": 0.0346, "step": 4254 }, { "epoch": 0.6278126152711176, "grad_norm": 1.2705563306808472, "learning_rate": 7.3161603562757365e-06, "loss": 0.0326, "step": 4255 }, { "epoch": 0.6279601623017337, "grad_norm": 1.188093662261963, "learning_rate": 7.311198695877211e-06, "loss": 0.0314, "step": 4256 }, { "epoch": 0.6281077093323497, "grad_norm": 1.0960687398910522, "learning_rate": 7.306237748891951e-06, "loss": 0.028, "step": 4257 }, { "epoch": 0.6282552563629656, "grad_norm": 3.610050678253174, "learning_rate": 7.30127751663623e-06, "loss": 0.0718, "step": 4258 }, { "epoch": 0.6284028033935817, "grad_norm": 1.8842809200286865, "learning_rate": 7.296318000426139e-06, "loss": 0.0377, "step": 4259 }, { "epoch": 0.6285503504241977, "grad_norm": 2.776057481765747, "learning_rate": 7.291359201577563e-06, "loss": 0.0856, "step": 4260 }, { "epoch": 0.6285503504241977, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9627791563275434, "eval_loss": 0.06147691234946251, "eval_precision": 0.9847715736040609, "eval_recall": 0.941747572815534, "eval_runtime": 48.8498, "eval_samples_per_second": 5.957, "eval_steps_per_second": 0.205, "step": 4260 }, { "epoch": 0.6286978974548137, "grad_norm": 5.599775791168213, "learning_rate": 7.2864011214062204e-06, "loss": 0.0706, "step": 4261 }, { "epoch": 0.6288454444854298, "grad_norm": 7.060204029083252, "learning_rate": 7.281443761227624e-06, "loss": 0.1015, "step": 4262 }, { "epoch": 0.6289929915160457, "grad_norm": 2.257662534713745, "learning_rate": 7.276487122357097e-06, "loss": 0.073, "step": 4263 }, { "epoch": 0.6291405385466617, "grad_norm": 7.968137741088867, "learning_rate": 7.271531206109772e-06, "loss": 0.0955, "step": 4264 }, { "epoch": 0.6292880855772778, "grad_norm": 1.9258322715759277, "learning_rate": 7.266576013800588e-06, "loss": 0.0652, "step": 4265 }, { "epoch": 0.6294356326078938, "grad_norm": 2.357192039489746, "learning_rate": 7.2616215467443e-06, "loss": 0.0424, "step": 4266 }, { "epoch": 0.6295831796385097, "grad_norm": 3.2165818214416504, "learning_rate": 7.256667806255463e-06, "loss": 0.0916, "step": 4267 }, { "epoch": 0.6297307266691258, "grad_norm": 2.9980475902557373, "learning_rate": 7.25171479364844e-06, "loss": 0.1144, "step": 4268 }, { "epoch": 0.6298782736997418, "grad_norm": 1.3218269348144531, "learning_rate": 7.246762510237404e-06, "loss": 0.0473, "step": 4269 }, { "epoch": 0.6300258207303578, "grad_norm": 1.7165931463241577, "learning_rate": 7.2418109573363285e-06, "loss": 0.0487, "step": 4270 }, { "epoch": 0.6301733677609738, "grad_norm": 3.0025315284729004, "learning_rate": 7.236860136259001e-06, "loss": 0.0943, "step": 4271 }, { "epoch": 0.6303209147915898, "grad_norm": 1.295914649963379, "learning_rate": 7.231910048319011e-06, "loss": 0.0281, "step": 4272 }, { "epoch": 0.6304684618222058, "grad_norm": 3.7335450649261475, "learning_rate": 7.226960694829756e-06, "loss": 0.1197, "step": 4273 }, { "epoch": 0.6306160088528219, "grad_norm": 1.2858421802520752, "learning_rate": 7.22201207710443e-06, "loss": 0.0151, "step": 4274 }, { "epoch": 0.6307635558834378, "grad_norm": 2.043832302093506, "learning_rate": 7.217064196456036e-06, "loss": 0.0558, "step": 4275 }, { "epoch": 0.6309111029140538, "grad_norm": 1.936403512954712, "learning_rate": 7.212117054197396e-06, "loss": 0.071, "step": 4276 }, { "epoch": 0.6310586499446699, "grad_norm": 1.6205322742462158, "learning_rate": 7.207170651641113e-06, "loss": 0.08, "step": 4277 }, { "epoch": 0.6312061969752859, "grad_norm": 4.227931976318359, "learning_rate": 7.202224990099609e-06, "loss": 0.0555, "step": 4278 }, { "epoch": 0.6313537440059018, "grad_norm": 0.9507524371147156, "learning_rate": 7.197280070885099e-06, "loss": 0.0248, "step": 4279 }, { "epoch": 0.6315012910365179, "grad_norm": 4.14948034286499, "learning_rate": 7.192335895309613e-06, "loss": 0.0634, "step": 4280 }, { "epoch": 0.6315012910365179, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9655172413793104, "eval_loss": 0.05828209966421127, "eval_precision": 0.98, "eval_recall": 0.9514563106796117, "eval_runtime": 49.2632, "eval_samples_per_second": 5.907, "eval_steps_per_second": 0.203, "step": 4280 }, { "epoch": 0.6316488380671339, "grad_norm": 1.2990920543670654, "learning_rate": 7.187392464684975e-06, "loss": 0.0434, "step": 4281 }, { "epoch": 0.6317963850977499, "grad_norm": 1.4383738040924072, "learning_rate": 7.1824497803228125e-06, "loss": 0.0416, "step": 4282 }, { "epoch": 0.631943932128366, "grad_norm": 5.892875671386719, "learning_rate": 7.177507843534557e-06, "loss": 0.0922, "step": 4283 }, { "epoch": 0.6320914791589819, "grad_norm": 2.504098653793335, "learning_rate": 7.1725666556314415e-06, "loss": 0.0764, "step": 4284 }, { "epoch": 0.6322390261895979, "grad_norm": 6.054599761962891, "learning_rate": 7.1676262179244995e-06, "loss": 0.0561, "step": 4285 }, { "epoch": 0.632386573220214, "grad_norm": 4.2917256355285645, "learning_rate": 7.162686531724565e-06, "loss": 0.0316, "step": 4286 }, { "epoch": 0.63253412025083, "grad_norm": 4.427036285400391, "learning_rate": 7.157747598342275e-06, "loss": 0.1017, "step": 4287 }, { "epoch": 0.6326816672814459, "grad_norm": 2.0212011337280273, "learning_rate": 7.1528094190880625e-06, "loss": 0.058, "step": 4288 }, { "epoch": 0.632829214312062, "grad_norm": 0.8010533452033997, "learning_rate": 7.14787199527216e-06, "loss": 0.0387, "step": 4289 }, { "epoch": 0.632976761342678, "grad_norm": 1.603652000427246, "learning_rate": 7.142935328204612e-06, "loss": 0.0638, "step": 4290 }, { "epoch": 0.633124308373294, "grad_norm": 2.390528678894043, "learning_rate": 7.137999419195245e-06, "loss": 0.083, "step": 4291 }, { "epoch": 0.63327185540391, "grad_norm": 2.5104868412017822, "learning_rate": 7.133064269553695e-06, "loss": 0.0766, "step": 4292 }, { "epoch": 0.633419402434526, "grad_norm": 0.9960589408874512, "learning_rate": 7.128129880589391e-06, "loss": 0.0255, "step": 4293 }, { "epoch": 0.633566949465142, "grad_norm": 1.167784333229065, "learning_rate": 7.1231962536115626e-06, "loss": 0.0359, "step": 4294 }, { "epoch": 0.6337144964957581, "grad_norm": 1.447001576423645, "learning_rate": 7.118263389929241e-06, "loss": 0.0493, "step": 4295 }, { "epoch": 0.633862043526374, "grad_norm": 1.721623182296753, "learning_rate": 7.1133312908512486e-06, "loss": 0.04, "step": 4296 }, { "epoch": 0.63400959055699, "grad_norm": 1.4011988639831543, "learning_rate": 7.1083999576862095e-06, "loss": 0.0595, "step": 4297 }, { "epoch": 0.6341571375876061, "grad_norm": 2.552780866622925, "learning_rate": 7.103469391742538e-06, "loss": 0.0444, "step": 4298 }, { "epoch": 0.6343046846182221, "grad_norm": 5.8526482582092285, "learning_rate": 7.098539594328455e-06, "loss": 0.1093, "step": 4299 }, { "epoch": 0.634452231648838, "grad_norm": 1.3136979341506958, "learning_rate": 7.093610566751969e-06, "loss": 0.0396, "step": 4300 }, { "epoch": 0.634452231648838, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05825934186577797, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 50.5181, "eval_samples_per_second": 5.76, "eval_steps_per_second": 0.198, "step": 4300 }, { "epoch": 0.634599778679454, "grad_norm": 1.335571050643921, "learning_rate": 7.088682310320892e-06, "loss": 0.0347, "step": 4301 }, { "epoch": 0.6347473257100701, "grad_norm": 2.1791419982910156, "learning_rate": 7.083754826342817e-06, "loss": 0.0466, "step": 4302 }, { "epoch": 0.6348948727406861, "grad_norm": 0.7265709638595581, "learning_rate": 7.078828116125145e-06, "loss": 0.0082, "step": 4303 }, { "epoch": 0.6350424197713022, "grad_norm": 2.2156481742858887, "learning_rate": 7.0739021809750755e-06, "loss": 0.0303, "step": 4304 }, { "epoch": 0.6351899668019181, "grad_norm": 9.068015098571777, "learning_rate": 7.0689770221995875e-06, "loss": 0.1245, "step": 4305 }, { "epoch": 0.6353375138325341, "grad_norm": 1.998000144958496, "learning_rate": 7.064052641105465e-06, "loss": 0.0746, "step": 4306 }, { "epoch": 0.6354850608631502, "grad_norm": 5.119497776031494, "learning_rate": 7.059129038999282e-06, "loss": 0.1227, "step": 4307 }, { "epoch": 0.6356326078937662, "grad_norm": 1.8573399782180786, "learning_rate": 7.054206217187401e-06, "loss": 0.061, "step": 4308 }, { "epoch": 0.6357801549243821, "grad_norm": 2.4584293365478516, "learning_rate": 7.049284176975991e-06, "loss": 0.0407, "step": 4309 }, { "epoch": 0.6359277019549981, "grad_norm": 1.060968279838562, "learning_rate": 7.044362919671001e-06, "loss": 0.0539, "step": 4310 }, { "epoch": 0.6360752489856142, "grad_norm": 0.9224686026573181, "learning_rate": 7.039442446578178e-06, "loss": 0.0166, "step": 4311 }, { "epoch": 0.6362227960162302, "grad_norm": 1.2175883054733276, "learning_rate": 7.034522759003057e-06, "loss": 0.0329, "step": 4312 }, { "epoch": 0.6363703430468461, "grad_norm": 1.4985488653182983, "learning_rate": 7.029603858250969e-06, "loss": 0.0416, "step": 4313 }, { "epoch": 0.6365178900774622, "grad_norm": 2.3336963653564453, "learning_rate": 7.024685745627033e-06, "loss": 0.0612, "step": 4314 }, { "epoch": 0.6366654371080782, "grad_norm": 1.72765052318573, "learning_rate": 7.019768422436162e-06, "loss": 0.0305, "step": 4315 }, { "epoch": 0.6368129841386942, "grad_norm": 3.6524429321289062, "learning_rate": 7.014851889983058e-06, "loss": 0.1069, "step": 4316 }, { "epoch": 0.6369605311693102, "grad_norm": 3.2659170627593994, "learning_rate": 7.009936149572205e-06, "loss": 0.0389, "step": 4317 }, { "epoch": 0.6371080781999262, "grad_norm": 3.4207839965820312, "learning_rate": 7.005021202507896e-06, "loss": 0.0527, "step": 4318 }, { "epoch": 0.6372556252305422, "grad_norm": 1.6852847337722778, "learning_rate": 7.000107050094198e-06, "loss": 0.0708, "step": 4319 }, { "epoch": 0.6374031722611583, "grad_norm": 1.3607537746429443, "learning_rate": 6.995193693634969e-06, "loss": 0.0307, "step": 4320 }, { "epoch": 0.6374031722611583, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9651741293532339, "eval_loss": 0.05951828137040138, "eval_precision": 0.9897959183673469, "eval_recall": 0.941747572815534, "eval_runtime": 49.1014, "eval_samples_per_second": 5.927, "eval_steps_per_second": 0.204, "step": 4320 }, { "epoch": 0.6375507192917742, "grad_norm": 12.323122024536133, "learning_rate": 6.990281134433861e-06, "loss": 0.1313, "step": 4321 }, { "epoch": 0.6376982663223902, "grad_norm": 1.0568668842315674, "learning_rate": 6.985369373794308e-06, "loss": 0.0141, "step": 4322 }, { "epoch": 0.6378458133530063, "grad_norm": 1.780819296836853, "learning_rate": 6.98045841301954e-06, "loss": 0.0259, "step": 4323 }, { "epoch": 0.6379933603836223, "grad_norm": 2.2122550010681152, "learning_rate": 6.975548253412569e-06, "loss": 0.0476, "step": 4324 }, { "epoch": 0.6381409074142382, "grad_norm": 2.8033788204193115, "learning_rate": 6.970638896276197e-06, "loss": 0.0997, "step": 4325 }, { "epoch": 0.6382884544448543, "grad_norm": 2.2346558570861816, "learning_rate": 6.965730342913011e-06, "loss": 0.0519, "step": 4326 }, { "epoch": 0.6384360014754703, "grad_norm": 3.35166597366333, "learning_rate": 6.960822594625383e-06, "loss": 0.0589, "step": 4327 }, { "epoch": 0.6385835485060863, "grad_norm": 2.0227742195129395, "learning_rate": 6.955915652715478e-06, "loss": 0.0574, "step": 4328 }, { "epoch": 0.6387310955367024, "grad_norm": 4.834615230560303, "learning_rate": 6.951009518485243e-06, "loss": 0.0529, "step": 4329 }, { "epoch": 0.6388786425673183, "grad_norm": 2.0972015857696533, "learning_rate": 6.9461041932364126e-06, "loss": 0.0724, "step": 4330 }, { "epoch": 0.6390261895979343, "grad_norm": 2.6968727111816406, "learning_rate": 6.9411996782705e-06, "loss": 0.05, "step": 4331 }, { "epoch": 0.6391737366285504, "grad_norm": 1.9320448637008667, "learning_rate": 6.936295974888808e-06, "loss": 0.0464, "step": 4332 }, { "epoch": 0.6393212836591664, "grad_norm": 2.0188467502593994, "learning_rate": 6.931393084392433e-06, "loss": 0.0516, "step": 4333 }, { "epoch": 0.6394688306897823, "grad_norm": 3.2714526653289795, "learning_rate": 6.926491008082241e-06, "loss": 0.0354, "step": 4334 }, { "epoch": 0.6396163777203984, "grad_norm": 2.6314733028411865, "learning_rate": 6.921589747258888e-06, "loss": 0.0679, "step": 4335 }, { "epoch": 0.6397639247510144, "grad_norm": 1.1279199123382568, "learning_rate": 6.916689303222813e-06, "loss": 0.0222, "step": 4336 }, { "epoch": 0.6399114717816304, "grad_norm": 1.7717738151550293, "learning_rate": 6.911789677274244e-06, "loss": 0.0393, "step": 4337 }, { "epoch": 0.6400590188122464, "grad_norm": 3.078800916671753, "learning_rate": 6.906890870713186e-06, "loss": 0.0533, "step": 4338 }, { "epoch": 0.6402065658428624, "grad_norm": 2.007112741470337, "learning_rate": 6.901992884839423e-06, "loss": 0.0472, "step": 4339 }, { "epoch": 0.6403541128734784, "grad_norm": 2.3135464191436768, "learning_rate": 6.897095720952531e-06, "loss": 0.0549, "step": 4340 }, { "epoch": 0.6403541128734784, "eval_accuracy": 0.9811866859623734, "eval_f1": 0.967741935483871, "eval_loss": 0.05770711600780487, "eval_precision": 0.9898477157360406, "eval_recall": 0.9466019417475728, "eval_runtime": 49.8718, "eval_samples_per_second": 5.835, "eval_steps_per_second": 0.201, "step": 4340 }, { "epoch": 0.6405016599040945, "grad_norm": 1.0793713331222534, "learning_rate": 6.89219938035186e-06, "loss": 0.0195, "step": 4341 }, { "epoch": 0.6406492069347104, "grad_norm": 0.9406227469444275, "learning_rate": 6.887303864336545e-06, "loss": 0.0257, "step": 4342 }, { "epoch": 0.6407967539653264, "grad_norm": 5.699306964874268, "learning_rate": 6.882409174205505e-06, "loss": 0.0913, "step": 4343 }, { "epoch": 0.6409443009959425, "grad_norm": 1.8053065538406372, "learning_rate": 6.877515311257432e-06, "loss": 0.0541, "step": 4344 }, { "epoch": 0.6410918480265585, "grad_norm": 1.0377061367034912, "learning_rate": 6.872622276790804e-06, "loss": 0.0224, "step": 4345 }, { "epoch": 0.6412393950571744, "grad_norm": 2.0995044708251953, "learning_rate": 6.867730072103875e-06, "loss": 0.0557, "step": 4346 }, { "epoch": 0.6413869420877905, "grad_norm": 4.648935794830322, "learning_rate": 6.862838698494693e-06, "loss": 0.0892, "step": 4347 }, { "epoch": 0.6415344891184065, "grad_norm": 3.6482994556427, "learning_rate": 6.857948157261064e-06, "loss": 0.1102, "step": 4348 }, { "epoch": 0.6416820361490225, "grad_norm": 4.0318989753723145, "learning_rate": 6.853058449700587e-06, "loss": 0.0738, "step": 4349 }, { "epoch": 0.6418295831796386, "grad_norm": 1.5417934656143188, "learning_rate": 6.848169577110637e-06, "loss": 0.0456, "step": 4350 }, { "epoch": 0.6419771302102545, "grad_norm": 1.3491369485855103, "learning_rate": 6.843281540788365e-06, "loss": 0.0386, "step": 4351 }, { "epoch": 0.6421246772408705, "grad_norm": 1.9848618507385254, "learning_rate": 6.838394342030705e-06, "loss": 0.0603, "step": 4352 }, { "epoch": 0.6422722242714866, "grad_norm": 2.239560127258301, "learning_rate": 6.833507982134364e-06, "loss": 0.068, "step": 4353 }, { "epoch": 0.6424197713021026, "grad_norm": 1.493889331817627, "learning_rate": 6.82862246239583e-06, "loss": 0.0407, "step": 4354 }, { "epoch": 0.6425673183327185, "grad_norm": 2.0036933422088623, "learning_rate": 6.823737784111361e-06, "loss": 0.0538, "step": 4355 }, { "epoch": 0.6427148653633346, "grad_norm": 1.0764005184173584, "learning_rate": 6.818853948577006e-06, "loss": 0.0479, "step": 4356 }, { "epoch": 0.6428624123939506, "grad_norm": 0.9348846077919006, "learning_rate": 6.813970957088573e-06, "loss": 0.0169, "step": 4357 }, { "epoch": 0.6430099594245666, "grad_norm": 3.483400344848633, "learning_rate": 6.809088810941664e-06, "loss": 0.1008, "step": 4358 }, { "epoch": 0.6431575064551825, "grad_norm": 1.706274390220642, "learning_rate": 6.804207511431638e-06, "loss": 0.0711, "step": 4359 }, { "epoch": 0.6433050534857986, "grad_norm": 1.971518635749817, "learning_rate": 6.799327059853637e-06, "loss": 0.0647, "step": 4360 }, { "epoch": 0.6433050534857986, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9625935162094763, "eval_loss": 0.05963670834898949, "eval_precision": 0.9897435897435898, "eval_recall": 0.9368932038834952, "eval_runtime": 49.0238, "eval_samples_per_second": 5.936, "eval_steps_per_second": 0.204, "step": 4360 }, { "epoch": 0.6434526005164146, "grad_norm": 1.544968605041504, "learning_rate": 6.794447457502591e-06, "loss": 0.0317, "step": 4361 }, { "epoch": 0.6436001475470307, "grad_norm": 2.4929938316345215, "learning_rate": 6.789568705673184e-06, "loss": 0.0908, "step": 4362 }, { "epoch": 0.6437476945776466, "grad_norm": 1.2263356447219849, "learning_rate": 6.784690805659888e-06, "loss": 0.0451, "step": 4363 }, { "epoch": 0.6438952416082626, "grad_norm": 3.6235475540161133, "learning_rate": 6.779813758756943e-06, "loss": 0.0539, "step": 4364 }, { "epoch": 0.6440427886388786, "grad_norm": 4.680814743041992, "learning_rate": 6.774937566258361e-06, "loss": 0.0338, "step": 4365 }, { "epoch": 0.6441903356694947, "grad_norm": 2.933483123779297, "learning_rate": 6.770062229457938e-06, "loss": 0.0955, "step": 4366 }, { "epoch": 0.6443378827001106, "grad_norm": 2.347795009613037, "learning_rate": 6.765187749649232e-06, "loss": 0.0976, "step": 4367 }, { "epoch": 0.6444854297307266, "grad_norm": 1.3517539501190186, "learning_rate": 6.760314128125573e-06, "loss": 0.0285, "step": 4368 }, { "epoch": 0.6446329767613427, "grad_norm": 2.4707748889923096, "learning_rate": 6.755441366180074e-06, "loss": 0.0555, "step": 4369 }, { "epoch": 0.6447805237919587, "grad_norm": 1.6405353546142578, "learning_rate": 6.750569465105607e-06, "loss": 0.0459, "step": 4370 }, { "epoch": 0.6449280708225748, "grad_norm": 1.1564868688583374, "learning_rate": 6.745698426194829e-06, "loss": 0.0323, "step": 4371 }, { "epoch": 0.6450756178531907, "grad_norm": 4.656186103820801, "learning_rate": 6.7408282507401565e-06, "loss": 0.0511, "step": 4372 }, { "epoch": 0.6452231648838067, "grad_norm": 1.9538291692733765, "learning_rate": 6.7359589400337865e-06, "loss": 0.0463, "step": 4373 }, { "epoch": 0.6453707119144227, "grad_norm": 2.5232486724853516, "learning_rate": 6.731090495367674e-06, "loss": 0.0582, "step": 4374 }, { "epoch": 0.6455182589450388, "grad_norm": 1.2026257514953613, "learning_rate": 6.726222918033551e-06, "loss": 0.0133, "step": 4375 }, { "epoch": 0.6456658059756547, "grad_norm": 0.8682292103767395, "learning_rate": 6.7213562093229334e-06, "loss": 0.022, "step": 4376 }, { "epoch": 0.6458133530062707, "grad_norm": 2.8112149238586426, "learning_rate": 6.716490370527081e-06, "loss": 0.0272, "step": 4377 }, { "epoch": 0.6459609000368868, "grad_norm": 1.589779019355774, "learning_rate": 6.711625402937041e-06, "loss": 0.0368, "step": 4378 }, { "epoch": 0.6461084470675028, "grad_norm": 2.021059513092041, "learning_rate": 6.706761307843618e-06, "loss": 0.0552, "step": 4379 }, { "epoch": 0.6462559940981187, "grad_norm": 1.0665171146392822, "learning_rate": 6.701898086537397e-06, "loss": 0.0181, "step": 4380 }, { "epoch": 0.6462559940981187, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9625935162094763, "eval_loss": 0.05858885869383812, "eval_precision": 0.9897435897435898, "eval_recall": 0.9368932038834952, "eval_runtime": 48.9119, "eval_samples_per_second": 5.949, "eval_steps_per_second": 0.204, "step": 4380 }, { "epoch": 0.6464035411287348, "grad_norm": 2.5532329082489014, "learning_rate": 6.697035740308724e-06, "loss": 0.0511, "step": 4381 }, { "epoch": 0.6465510881593508, "grad_norm": 2.250873327255249, "learning_rate": 6.692174270447711e-06, "loss": 0.0465, "step": 4382 }, { "epoch": 0.6466986351899668, "grad_norm": 1.262923002243042, "learning_rate": 6.687313678244243e-06, "loss": 0.045, "step": 4383 }, { "epoch": 0.6468461822205828, "grad_norm": 2.211190938949585, "learning_rate": 6.682453964987965e-06, "loss": 0.0458, "step": 4384 }, { "epoch": 0.6469937292511988, "grad_norm": 3.513896942138672, "learning_rate": 6.677595131968297e-06, "loss": 0.0726, "step": 4385 }, { "epoch": 0.6471412762818148, "grad_norm": 1.591567873954773, "learning_rate": 6.6727371804744234e-06, "loss": 0.0256, "step": 4386 }, { "epoch": 0.6472888233124309, "grad_norm": 2.3493664264678955, "learning_rate": 6.667880111795292e-06, "loss": 0.0683, "step": 4387 }, { "epoch": 0.6474363703430468, "grad_norm": 2.122281074523926, "learning_rate": 6.663023927219613e-06, "loss": 0.0574, "step": 4388 }, { "epoch": 0.6475839173736628, "grad_norm": 3.3464603424072266, "learning_rate": 6.658168628035865e-06, "loss": 0.0679, "step": 4389 }, { "epoch": 0.6477314644042789, "grad_norm": 2.1836118698120117, "learning_rate": 6.6533142155323025e-06, "loss": 0.0751, "step": 4390 }, { "epoch": 0.6478790114348949, "grad_norm": 3.5879385471343994, "learning_rate": 6.648460690996929e-06, "loss": 0.1238, "step": 4391 }, { "epoch": 0.6480265584655108, "grad_norm": 6.010129451751709, "learning_rate": 6.6436080557175185e-06, "loss": 0.1163, "step": 4392 }, { "epoch": 0.6481741054961269, "grad_norm": 4.628668785095215, "learning_rate": 6.638756310981611e-06, "loss": 0.0828, "step": 4393 }, { "epoch": 0.6483216525267429, "grad_norm": 2.6903927326202393, "learning_rate": 6.633905458076505e-06, "loss": 0.0362, "step": 4394 }, { "epoch": 0.6484691995573589, "grad_norm": 3.0415492057800293, "learning_rate": 6.629055498289269e-06, "loss": 0.0916, "step": 4395 }, { "epoch": 0.648616746587975, "grad_norm": 3.397050380706787, "learning_rate": 6.624206432906732e-06, "loss": 0.1034, "step": 4396 }, { "epoch": 0.6487642936185909, "grad_norm": 5.7768449783325195, "learning_rate": 6.619358263215485e-06, "loss": 0.0957, "step": 4397 }, { "epoch": 0.6489118406492069, "grad_norm": 2.6319448947906494, "learning_rate": 6.614510990501876e-06, "loss": 0.0783, "step": 4398 }, { "epoch": 0.649059387679823, "grad_norm": 2.8324670791625977, "learning_rate": 6.6096646160520264e-06, "loss": 0.0438, "step": 4399 }, { "epoch": 0.649206934710439, "grad_norm": 5.01246976852417, "learning_rate": 6.604819141151812e-06, "loss": 0.0798, "step": 4400 }, { "epoch": 0.649206934710439, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05617249384522438, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 49.1949, "eval_samples_per_second": 5.915, "eval_steps_per_second": 0.203, "step": 4400 }, { "epoch": 0.6493544817410549, "grad_norm": 2.757441520690918, "learning_rate": 6.599974567086871e-06, "loss": 0.0286, "step": 4401 }, { "epoch": 0.649502028771671, "grad_norm": 3.9749069213867188, "learning_rate": 6.595130895142601e-06, "loss": 0.033, "step": 4402 }, { "epoch": 0.649649575802287, "grad_norm": 2.7191781997680664, "learning_rate": 6.590288126604159e-06, "loss": 0.0503, "step": 4403 }, { "epoch": 0.649797122832903, "grad_norm": 1.5683810710906982, "learning_rate": 6.585446262756474e-06, "loss": 0.0423, "step": 4404 }, { "epoch": 0.649944669863519, "grad_norm": 2.9194579124450684, "learning_rate": 6.58060530488422e-06, "loss": 0.0579, "step": 4405 }, { "epoch": 0.650092216894135, "grad_norm": 1.7084243297576904, "learning_rate": 6.5757652542718385e-06, "loss": 0.0357, "step": 4406 }, { "epoch": 0.650239763924751, "grad_norm": 6.491199493408203, "learning_rate": 6.570926112203528e-06, "loss": 0.023, "step": 4407 }, { "epoch": 0.6503873109553671, "grad_norm": 4.800782680511475, "learning_rate": 6.566087879963245e-06, "loss": 0.1412, "step": 4408 }, { "epoch": 0.650534857985983, "grad_norm": 3.0766396522521973, "learning_rate": 6.561250558834712e-06, "loss": 0.049, "step": 4409 }, { "epoch": 0.650682405016599, "grad_norm": 1.7469078302383423, "learning_rate": 6.556414150101399e-06, "loss": 0.0304, "step": 4410 }, { "epoch": 0.650829952047215, "grad_norm": 2.0975399017333984, "learning_rate": 6.55157865504654e-06, "loss": 0.0704, "step": 4411 }, { "epoch": 0.6509774990778311, "grad_norm": 1.7246694564819336, "learning_rate": 6.546744074953126e-06, "loss": 0.0428, "step": 4412 }, { "epoch": 0.651125046108447, "grad_norm": 6.032076835632324, "learning_rate": 6.5419104111039e-06, "loss": 0.1259, "step": 4413 }, { "epoch": 0.651272593139063, "grad_norm": 2.2777020931243896, "learning_rate": 6.537077664781374e-06, "loss": 0.0643, "step": 4414 }, { "epoch": 0.6514201401696791, "grad_norm": 3.9178431034088135, "learning_rate": 6.532245837267807e-06, "loss": 0.1072, "step": 4415 }, { "epoch": 0.6515676872002951, "grad_norm": 2.0883002281188965, "learning_rate": 6.527414929845214e-06, "loss": 0.077, "step": 4416 }, { "epoch": 0.6517152342309112, "grad_norm": 2.4088566303253174, "learning_rate": 6.522584943795365e-06, "loss": 0.0674, "step": 4417 }, { "epoch": 0.6518627812615271, "grad_norm": 2.5000414848327637, "learning_rate": 6.5177558803997976e-06, "loss": 0.0632, "step": 4418 }, { "epoch": 0.6520103282921431, "grad_norm": 3.176330327987671, "learning_rate": 6.512927740939789e-06, "loss": 0.0972, "step": 4419 }, { "epoch": 0.6521578753227592, "grad_norm": 1.9688628911972046, "learning_rate": 6.508100526696381e-06, "loss": 0.0386, "step": 4420 }, { "epoch": 0.6521578753227592, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9653465346534653, "eval_loss": 0.057804737240076065, "eval_precision": 0.9848484848484849, "eval_recall": 0.9466019417475728, "eval_runtime": 53.0749, "eval_samples_per_second": 5.483, "eval_steps_per_second": 0.188, "step": 4420 }, { "epoch": 0.6523054223533752, "grad_norm": 3.077688217163086, "learning_rate": 6.5032742389503676e-06, "loss": 0.0519, "step": 4421 }, { "epoch": 0.6524529693839911, "grad_norm": 1.935166835784912, "learning_rate": 6.4984488789822905e-06, "loss": 0.0555, "step": 4422 }, { "epoch": 0.6526005164146071, "grad_norm": 2.046032190322876, "learning_rate": 6.4936244480724575e-06, "loss": 0.0555, "step": 4423 }, { "epoch": 0.6527480634452232, "grad_norm": 12.51380443572998, "learning_rate": 6.488800947500921e-06, "loss": 0.0686, "step": 4424 }, { "epoch": 0.6528956104758392, "grad_norm": 1.8526471853256226, "learning_rate": 6.4839783785474884e-06, "loss": 0.0701, "step": 4425 }, { "epoch": 0.6530431575064551, "grad_norm": 3.5263936519622803, "learning_rate": 6.479156742491721e-06, "loss": 0.0854, "step": 4426 }, { "epoch": 0.6531907045370712, "grad_norm": 3.3337717056274414, "learning_rate": 6.4743360406129294e-06, "loss": 0.0904, "step": 4427 }, { "epoch": 0.6533382515676872, "grad_norm": 2.0364885330200195, "learning_rate": 6.469516274190185e-06, "loss": 0.0579, "step": 4428 }, { "epoch": 0.6534857985983032, "grad_norm": 1.7969423532485962, "learning_rate": 6.464697444502301e-06, "loss": 0.0366, "step": 4429 }, { "epoch": 0.6536333456289192, "grad_norm": 1.5288689136505127, "learning_rate": 6.4598795528278495e-06, "loss": 0.0425, "step": 4430 }, { "epoch": 0.6537808926595352, "grad_norm": 1.8399264812469482, "learning_rate": 6.455062600445143e-06, "loss": 0.0547, "step": 4431 }, { "epoch": 0.6539284396901512, "grad_norm": 1.8224679231643677, "learning_rate": 6.450246588632254e-06, "loss": 0.0411, "step": 4432 }, { "epoch": 0.6540759867207673, "grad_norm": 2.015737771987915, "learning_rate": 6.4454315186670125e-06, "loss": 0.0847, "step": 4433 }, { "epoch": 0.6542235337513832, "grad_norm": 1.4043141603469849, "learning_rate": 6.44061739182698e-06, "loss": 0.056, "step": 4434 }, { "epoch": 0.6543710807819992, "grad_norm": 8.94338607788086, "learning_rate": 6.43580420938948e-06, "loss": 0.1093, "step": 4435 }, { "epoch": 0.6545186278126153, "grad_norm": 1.838114857673645, "learning_rate": 6.430991972631582e-06, "loss": 0.0174, "step": 4436 }, { "epoch": 0.6546661748432313, "grad_norm": 3.10587477684021, "learning_rate": 6.426180682830108e-06, "loss": 0.0434, "step": 4437 }, { "epoch": 0.6548137218738473, "grad_norm": 2.1863956451416016, "learning_rate": 6.421370341261624e-06, "loss": 0.1018, "step": 4438 }, { "epoch": 0.6549612689044633, "grad_norm": 3.246185064315796, "learning_rate": 6.416560949202449e-06, "loss": 0.1015, "step": 4439 }, { "epoch": 0.6551088159350793, "grad_norm": 2.234623670578003, "learning_rate": 6.411752507928643e-06, "loss": 0.0986, "step": 4440 }, { "epoch": 0.6551088159350793, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.9601990049751243, "eval_loss": 0.06005861982703209, "eval_precision": 0.9846938775510204, "eval_recall": 0.9368932038834952, "eval_runtime": 50.2076, "eval_samples_per_second": 5.796, "eval_steps_per_second": 0.199, "step": 4440 }, { "epoch": 0.6552563629656953, "grad_norm": 1.7946083545684814, "learning_rate": 6.40694501871602e-06, "loss": 0.0633, "step": 4441 }, { "epoch": 0.6554039099963114, "grad_norm": 1.7781100273132324, "learning_rate": 6.402138482840141e-06, "loss": 0.0698, "step": 4442 }, { "epoch": 0.6555514570269273, "grad_norm": 1.7382947206497192, "learning_rate": 6.397332901576314e-06, "loss": 0.036, "step": 4443 }, { "epoch": 0.6556990040575433, "grad_norm": 2.0687174797058105, "learning_rate": 6.392528276199592e-06, "loss": 0.0691, "step": 4444 }, { "epoch": 0.6558465510881594, "grad_norm": 2.8955514430999756, "learning_rate": 6.387724607984772e-06, "loss": 0.0881, "step": 4445 }, { "epoch": 0.6559940981187754, "grad_norm": 1.676020622253418, "learning_rate": 6.382921898206398e-06, "loss": 0.0279, "step": 4446 }, { "epoch": 0.6561416451493913, "grad_norm": 2.6807148456573486, "learning_rate": 6.37812014813877e-06, "loss": 0.0223, "step": 4447 }, { "epoch": 0.6562891921800074, "grad_norm": 1.6488813161849976, "learning_rate": 6.373319359055919e-06, "loss": 0.0535, "step": 4448 }, { "epoch": 0.6564367392106234, "grad_norm": 2.6411168575286865, "learning_rate": 6.368519532231627e-06, "loss": 0.0428, "step": 4449 }, { "epoch": 0.6565842862412394, "grad_norm": 2.7337758541107178, "learning_rate": 6.363720668939421e-06, "loss": 0.0866, "step": 4450 }, { "epoch": 0.6567318332718554, "grad_norm": 1.1719896793365479, "learning_rate": 6.358922770452573e-06, "loss": 0.0301, "step": 4451 }, { "epoch": 0.6568793803024714, "grad_norm": 1.586561918258667, "learning_rate": 6.354125838044099e-06, "loss": 0.0819, "step": 4452 }, { "epoch": 0.6570269273330874, "grad_norm": 1.3872873783111572, "learning_rate": 6.349329872986754e-06, "loss": 0.0434, "step": 4453 }, { "epoch": 0.6571744743637035, "grad_norm": 2.2281954288482666, "learning_rate": 6.344534876553044e-06, "loss": 0.0736, "step": 4454 }, { "epoch": 0.6573220213943194, "grad_norm": 2.525529146194458, "learning_rate": 6.33974085001521e-06, "loss": 0.0239, "step": 4455 }, { "epoch": 0.6574695684249354, "grad_norm": 2.3365206718444824, "learning_rate": 6.334947794645244e-06, "loss": 0.0944, "step": 4456 }, { "epoch": 0.6576171154555515, "grad_norm": 3.1038765907287598, "learning_rate": 6.3301557117148746e-06, "loss": 0.0443, "step": 4457 }, { "epoch": 0.6577646624861675, "grad_norm": 1.8159865140914917, "learning_rate": 6.325364602495575e-06, "loss": 0.0413, "step": 4458 }, { "epoch": 0.6579122095167834, "grad_norm": 1.2702845335006714, "learning_rate": 6.3205744682585545e-06, "loss": 0.0395, "step": 4459 }, { "epoch": 0.6580597565473995, "grad_norm": 2.11480450630188, "learning_rate": 6.3157853102747665e-06, "loss": 0.0731, "step": 4460 }, { "epoch": 0.6580597565473995, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9576059850374065, "eval_loss": 0.05957798287272453, "eval_precision": 0.9846153846153847, "eval_recall": 0.9320388349514563, "eval_runtime": 50.0277, "eval_samples_per_second": 5.817, "eval_steps_per_second": 0.2, "step": 4460 }, { "epoch": 0.6582073035780155, "grad_norm": 2.299861192703247, "learning_rate": 6.310997129814917e-06, "loss": 0.0435, "step": 4461 }, { "epoch": 0.6583548506086315, "grad_norm": 2.645285129547119, "learning_rate": 6.306209928149436e-06, "loss": 0.0596, "step": 4462 }, { "epoch": 0.6585023976392476, "grad_norm": 1.9211374521255493, "learning_rate": 6.301423706548499e-06, "loss": 0.0587, "step": 4463 }, { "epoch": 0.6586499446698635, "grad_norm": 2.9688777923583984, "learning_rate": 6.296638466282026e-06, "loss": 0.061, "step": 4464 }, { "epoch": 0.6587974917004795, "grad_norm": 0.9423332810401917, "learning_rate": 6.291854208619669e-06, "loss": 0.0282, "step": 4465 }, { "epoch": 0.6589450387310956, "grad_norm": 2.3356895446777344, "learning_rate": 6.287070934830829e-06, "loss": 0.0624, "step": 4466 }, { "epoch": 0.6590925857617116, "grad_norm": 3.511073350906372, "learning_rate": 6.282288646184638e-06, "loss": 0.0648, "step": 4467 }, { "epoch": 0.6592401327923275, "grad_norm": 3.802366018295288, "learning_rate": 6.277507343949971e-06, "loss": 0.1404, "step": 4468 }, { "epoch": 0.6593876798229436, "grad_norm": 1.36722993850708, "learning_rate": 6.2727270293954405e-06, "loss": 0.021, "step": 4469 }, { "epoch": 0.6595352268535596, "grad_norm": 1.5654717683792114, "learning_rate": 6.267947703789385e-06, "loss": 0.0595, "step": 4470 }, { "epoch": 0.6596827738841756, "grad_norm": 1.5576167106628418, "learning_rate": 6.263169368399908e-06, "loss": 0.0602, "step": 4471 }, { "epoch": 0.6598303209147915, "grad_norm": 0.7984778881072998, "learning_rate": 6.258392024494826e-06, "loss": 0.0172, "step": 4472 }, { "epoch": 0.6599778679454076, "grad_norm": 2.393357276916504, "learning_rate": 6.2536156733417e-06, "loss": 0.0725, "step": 4473 }, { "epoch": 0.6601254149760236, "grad_norm": 1.6943670511245728, "learning_rate": 6.248840316207825e-06, "loss": 0.0409, "step": 4474 }, { "epoch": 0.6602729620066397, "grad_norm": 1.4864270687103271, "learning_rate": 6.244065954360243e-06, "loss": 0.0323, "step": 4475 }, { "epoch": 0.6604205090372556, "grad_norm": 0.8731735348701477, "learning_rate": 6.239292589065719e-06, "loss": 0.0275, "step": 4476 }, { "epoch": 0.6605680560678716, "grad_norm": 2.9448368549346924, "learning_rate": 6.234520221590759e-06, "loss": 0.0817, "step": 4477 }, { "epoch": 0.6607156030984876, "grad_norm": 1.8645683526992798, "learning_rate": 6.229748853201605e-06, "loss": 0.0478, "step": 4478 }, { "epoch": 0.6608631501291037, "grad_norm": 1.7074979543685913, "learning_rate": 6.224978485164229e-06, "loss": 0.0486, "step": 4479 }, { "epoch": 0.6610106971597196, "grad_norm": 1.5328609943389893, "learning_rate": 6.220209118744347e-06, "loss": 0.0269, "step": 4480 }, { "epoch": 0.6610106971597196, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9580246913580247, "eval_loss": 0.057335324585437775, "eval_precision": 0.9748743718592965, "eval_recall": 0.941747572815534, "eval_runtime": 50.1223, "eval_samples_per_second": 5.806, "eval_steps_per_second": 0.2, "step": 4480 }, { "epoch": 0.6611582441903356, "grad_norm": 1.9285787343978882, "learning_rate": 6.215440755207402e-06, "loss": 0.0423, "step": 4481 }, { "epoch": 0.6613057912209517, "grad_norm": 1.8386911153793335, "learning_rate": 6.210673395818572e-06, "loss": 0.0366, "step": 4482 }, { "epoch": 0.6614533382515677, "grad_norm": 2.162466049194336, "learning_rate": 6.205907041842771e-06, "loss": 0.0675, "step": 4483 }, { "epoch": 0.6616008852821837, "grad_norm": 1.4594266414642334, "learning_rate": 6.201141694544637e-06, "loss": 0.0317, "step": 4484 }, { "epoch": 0.6617484323127997, "grad_norm": 0.9458202719688416, "learning_rate": 6.196377355188556e-06, "loss": 0.0123, "step": 4485 }, { "epoch": 0.6618959793434157, "grad_norm": 1.6956408023834229, "learning_rate": 6.191614025038639e-06, "loss": 0.0245, "step": 4486 }, { "epoch": 0.6620435263740317, "grad_norm": 2.439507007598877, "learning_rate": 6.186851705358725e-06, "loss": 0.0253, "step": 4487 }, { "epoch": 0.6621910734046478, "grad_norm": 2.100860357284546, "learning_rate": 6.182090397412391e-06, "loss": 0.0743, "step": 4488 }, { "epoch": 0.6623386204352637, "grad_norm": 1.2288669347763062, "learning_rate": 6.177330102462941e-06, "loss": 0.0504, "step": 4489 }, { "epoch": 0.6624861674658797, "grad_norm": 3.726640224456787, "learning_rate": 6.1725708217734155e-06, "loss": 0.0605, "step": 4490 }, { "epoch": 0.6626337144964958, "grad_norm": 2.50217604637146, "learning_rate": 6.167812556606585e-06, "loss": 0.1003, "step": 4491 }, { "epoch": 0.6627812615271118, "grad_norm": 9.090914726257324, "learning_rate": 6.163055308224944e-06, "loss": 0.0448, "step": 4492 }, { "epoch": 0.6629288085577277, "grad_norm": 2.791994094848633, "learning_rate": 6.158299077890724e-06, "loss": 0.1023, "step": 4493 }, { "epoch": 0.6630763555883438, "grad_norm": 2.8435914516448975, "learning_rate": 6.1535438668658855e-06, "loss": 0.0526, "step": 4494 }, { "epoch": 0.6632239026189598, "grad_norm": 1.862433671951294, "learning_rate": 6.148789676412114e-06, "loss": 0.0369, "step": 4495 }, { "epoch": 0.6633714496495758, "grad_norm": 1.6116923093795776, "learning_rate": 6.1440365077908335e-06, "loss": 0.0285, "step": 4496 }, { "epoch": 0.6635189966801918, "grad_norm": 2.023977756500244, "learning_rate": 6.139284362263185e-06, "loss": 0.0303, "step": 4497 }, { "epoch": 0.6636665437108078, "grad_norm": 2.118455648422241, "learning_rate": 6.134533241090046e-06, "loss": 0.0377, "step": 4498 }, { "epoch": 0.6638140907414238, "grad_norm": 1.1348005533218384, "learning_rate": 6.12978314553202e-06, "loss": 0.0251, "step": 4499 }, { "epoch": 0.6639616377720399, "grad_norm": 2.13145112991333, "learning_rate": 6.125034076849441e-06, "loss": 0.0509, "step": 4500 }, { "epoch": 0.6639616377720399, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9625935162094763, "eval_loss": 0.05893443897366524, "eval_precision": 0.9897435897435898, "eval_recall": 0.9368932038834952, "eval_runtime": 49.9972, "eval_samples_per_second": 5.82, "eval_steps_per_second": 0.2, "step": 4500 }, { "epoch": 0.6641091848026558, "grad_norm": 1.4929237365722656, "learning_rate": 6.120286036302367e-06, "loss": 0.0399, "step": 4501 }, { "epoch": 0.6642567318332718, "grad_norm": 1.828254222869873, "learning_rate": 6.115539025150583e-06, "loss": 0.0578, "step": 4502 }, { "epoch": 0.6644042788638879, "grad_norm": 2.7728404998779297, "learning_rate": 6.1107930446535975e-06, "loss": 0.0828, "step": 4503 }, { "epoch": 0.6645518258945039, "grad_norm": 1.4778112173080444, "learning_rate": 6.106048096070662e-06, "loss": 0.0257, "step": 4504 }, { "epoch": 0.6646993729251199, "grad_norm": 1.056089162826538, "learning_rate": 6.101304180660732e-06, "loss": 0.0294, "step": 4505 }, { "epoch": 0.6648469199557359, "grad_norm": 1.326404333114624, "learning_rate": 6.096561299682505e-06, "loss": 0.0172, "step": 4506 }, { "epoch": 0.6649944669863519, "grad_norm": 2.7227141857147217, "learning_rate": 6.091819454394397e-06, "loss": 0.0241, "step": 4507 }, { "epoch": 0.6651420140169679, "grad_norm": 3.35990309715271, "learning_rate": 6.087078646054546e-06, "loss": 0.0682, "step": 4508 }, { "epoch": 0.665289561047584, "grad_norm": 2.0490784645080566, "learning_rate": 6.0823388759208255e-06, "loss": 0.0859, "step": 4509 }, { "epoch": 0.6654371080781999, "grad_norm": 1.0189433097839355, "learning_rate": 6.077600145250827e-06, "loss": 0.0129, "step": 4510 }, { "epoch": 0.6655846551088159, "grad_norm": 3.876119375228882, "learning_rate": 6.0728624553018645e-06, "loss": 0.0569, "step": 4511 }, { "epoch": 0.665732202139432, "grad_norm": 3.0607597827911377, "learning_rate": 6.068125807330975e-06, "loss": 0.0448, "step": 4512 }, { "epoch": 0.665879749170048, "grad_norm": 2.093217611312866, "learning_rate": 6.06339020259493e-06, "loss": 0.0451, "step": 4513 }, { "epoch": 0.6660272962006639, "grad_norm": 1.8887628316879272, "learning_rate": 6.058655642350211e-06, "loss": 0.047, "step": 4514 }, { "epoch": 0.66617484323128, "grad_norm": 2.465681552886963, "learning_rate": 6.05392212785303e-06, "loss": 0.0533, "step": 4515 }, { "epoch": 0.666322390261896, "grad_norm": 3.3657305240631104, "learning_rate": 6.049189660359316e-06, "loss": 0.0748, "step": 4516 }, { "epoch": 0.666469937292512, "grad_norm": 4.978641510009766, "learning_rate": 6.044458241124721e-06, "loss": 0.0326, "step": 4517 }, { "epoch": 0.666617484323128, "grad_norm": 1.293929100036621, "learning_rate": 6.03972787140463e-06, "loss": 0.0466, "step": 4518 }, { "epoch": 0.666765031353744, "grad_norm": 2.5016493797302246, "learning_rate": 6.034998552454136e-06, "loss": 0.0945, "step": 4519 }, { "epoch": 0.66691257838436, "grad_norm": 2.860934257507324, "learning_rate": 6.030270285528057e-06, "loss": 0.0554, "step": 4520 }, { "epoch": 0.66691257838436, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.9603960396039604, "eval_loss": 0.05680849030613899, "eval_precision": 0.9797979797979798, "eval_recall": 0.941747572815534, "eval_runtime": 49.3284, "eval_samples_per_second": 5.899, "eval_steps_per_second": 0.203, "step": 4520 }, { "epoch": 0.6670601254149761, "grad_norm": 1.9814680814743042, "learning_rate": 6.025543071880934e-06, "loss": 0.0484, "step": 4521 }, { "epoch": 0.667207672445592, "grad_norm": 2.749361991882324, "learning_rate": 6.020816912767024e-06, "loss": 0.058, "step": 4522 }, { "epoch": 0.667355219476208, "grad_norm": 2.8421387672424316, "learning_rate": 6.016091809440314e-06, "loss": 0.0496, "step": 4523 }, { "epoch": 0.667502766506824, "grad_norm": 3.5100228786468506, "learning_rate": 6.011367763154501e-06, "loss": 0.0656, "step": 4524 }, { "epoch": 0.6676503135374401, "grad_norm": 1.4443405866622925, "learning_rate": 6.006644775163006e-06, "loss": 0.0408, "step": 4525 }, { "epoch": 0.6677978605680561, "grad_norm": 1.6495566368103027, "learning_rate": 6.0019228467189685e-06, "loss": 0.0411, "step": 4526 }, { "epoch": 0.667945407598672, "grad_norm": 4.903963565826416, "learning_rate": 5.997201979075239e-06, "loss": 0.1453, "step": 4527 }, { "epoch": 0.6680929546292881, "grad_norm": 1.5589641332626343, "learning_rate": 5.992482173484404e-06, "loss": 0.0462, "step": 4528 }, { "epoch": 0.6682405016599041, "grad_norm": 2.184011220932007, "learning_rate": 5.9877634311987585e-06, "loss": 0.0409, "step": 4529 }, { "epoch": 0.6683880486905202, "grad_norm": 1.4928693771362305, "learning_rate": 5.983045753470308e-06, "loss": 0.0295, "step": 4530 }, { "epoch": 0.6685355957211361, "grad_norm": 4.098515033721924, "learning_rate": 5.978329141550783e-06, "loss": 0.1329, "step": 4531 }, { "epoch": 0.6686831427517521, "grad_norm": 1.2859034538269043, "learning_rate": 5.97361359669164e-06, "loss": 0.0436, "step": 4532 }, { "epoch": 0.6688306897823681, "grad_norm": 3.2776687145233154, "learning_rate": 5.968899120144035e-06, "loss": 0.0901, "step": 4533 }, { "epoch": 0.6689782368129842, "grad_norm": 2.1422314643859863, "learning_rate": 5.964185713158852e-06, "loss": 0.0956, "step": 4534 }, { "epoch": 0.6691257838436001, "grad_norm": 1.6884253025054932, "learning_rate": 5.959473376986686e-06, "loss": 0.0686, "step": 4535 }, { "epoch": 0.6692733308742161, "grad_norm": 1.7804298400878906, "learning_rate": 5.954762112877851e-06, "loss": 0.0646, "step": 4536 }, { "epoch": 0.6694208779048322, "grad_norm": 1.5012410879135132, "learning_rate": 5.950051922082377e-06, "loss": 0.0878, "step": 4537 }, { "epoch": 0.6695684249354482, "grad_norm": 1.1877696514129639, "learning_rate": 5.9453428058500075e-06, "loss": 0.0147, "step": 4538 }, { "epoch": 0.6697159719660641, "grad_norm": 2.5986015796661377, "learning_rate": 5.9406347654302e-06, "loss": 0.0556, "step": 4539 }, { "epoch": 0.6698635189966802, "grad_norm": 5.018265724182129, "learning_rate": 5.935927802072129e-06, "loss": 0.0536, "step": 4540 }, { "epoch": 0.6698635189966802, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05546742305159569, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 49.0025, "eval_samples_per_second": 5.938, "eval_steps_per_second": 0.204, "step": 4540 }, { "epoch": 0.6700110660272962, "grad_norm": 1.5961254835128784, "learning_rate": 5.931221917024676e-06, "loss": 0.0453, "step": 4541 }, { "epoch": 0.6701586130579122, "grad_norm": 1.4644670486450195, "learning_rate": 5.9265171115364495e-06, "loss": 0.0406, "step": 4542 }, { "epoch": 0.6703061600885282, "grad_norm": 2.0708227157592773, "learning_rate": 5.921813386855766e-06, "loss": 0.0556, "step": 4543 }, { "epoch": 0.6704537071191442, "grad_norm": 1.5247308015823364, "learning_rate": 5.917110744230647e-06, "loss": 0.0394, "step": 4544 }, { "epoch": 0.6706012541497602, "grad_norm": 4.288096904754639, "learning_rate": 5.912409184908835e-06, "loss": 0.0287, "step": 4545 }, { "epoch": 0.6707488011803763, "grad_norm": 2.6560306549072266, "learning_rate": 5.9077087101377824e-06, "loss": 0.0823, "step": 4546 }, { "epoch": 0.6708963482109922, "grad_norm": 1.3913815021514893, "learning_rate": 5.903009321164661e-06, "loss": 0.0331, "step": 4547 }, { "epoch": 0.6710438952416082, "grad_norm": 4.867445468902588, "learning_rate": 5.898311019236344e-06, "loss": 0.0784, "step": 4548 }, { "epoch": 0.6711914422722243, "grad_norm": 1.5966745615005493, "learning_rate": 5.8936138055994204e-06, "loss": 0.0379, "step": 4549 }, { "epoch": 0.6713389893028403, "grad_norm": 1.4375993013381958, "learning_rate": 5.888917681500192e-06, "loss": 0.042, "step": 4550 }, { "epoch": 0.6714865363334563, "grad_norm": 1.0158500671386719, "learning_rate": 5.884222648184671e-06, "loss": 0.0366, "step": 4551 }, { "epoch": 0.6716340833640723, "grad_norm": 1.9147976636886597, "learning_rate": 5.879528706898578e-06, "loss": 0.1084, "step": 4552 }, { "epoch": 0.6717816303946883, "grad_norm": 3.1156363487243652, "learning_rate": 5.874835858887347e-06, "loss": 0.0847, "step": 4553 }, { "epoch": 0.6719291774253043, "grad_norm": 1.3792158365249634, "learning_rate": 5.8701441053961185e-06, "loss": 0.0369, "step": 4554 }, { "epoch": 0.6720767244559204, "grad_norm": 1.6121898889541626, "learning_rate": 5.865453447669742e-06, "loss": 0.0549, "step": 4555 }, { "epoch": 0.6722242714865363, "grad_norm": 2.166245222091675, "learning_rate": 5.860763886952783e-06, "loss": 0.0736, "step": 4556 }, { "epoch": 0.6723718185171523, "grad_norm": 1.5299535989761353, "learning_rate": 5.856075424489511e-06, "loss": 0.0517, "step": 4557 }, { "epoch": 0.6725193655477684, "grad_norm": 1.8735179901123047, "learning_rate": 5.851388061523906e-06, "loss": 0.0313, "step": 4558 }, { "epoch": 0.6726669125783844, "grad_norm": 1.2511532306671143, "learning_rate": 5.846701799299649e-06, "loss": 0.0321, "step": 4559 }, { "epoch": 0.6728144596090003, "grad_norm": 2.152923822402954, "learning_rate": 5.842016639060135e-06, "loss": 0.0681, "step": 4560 }, { "epoch": 0.6728144596090003, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.9601990049751243, "eval_loss": 0.056243639439344406, "eval_precision": 0.9846938775510204, "eval_recall": 0.9368932038834952, "eval_runtime": 48.8199, "eval_samples_per_second": 5.961, "eval_steps_per_second": 0.205, "step": 4560 }, { "epoch": 0.6729620066396164, "grad_norm": 8.56369400024414, "learning_rate": 5.837332582048476e-06, "loss": 0.1148, "step": 4561 }, { "epoch": 0.6731095536702324, "grad_norm": 1.1628910303115845, "learning_rate": 5.8326496295074695e-06, "loss": 0.0419, "step": 4562 }, { "epoch": 0.6732571007008484, "grad_norm": 0.9492687582969666, "learning_rate": 5.827967782679644e-06, "loss": 0.0248, "step": 4563 }, { "epoch": 0.6734046477314644, "grad_norm": 4.251297473907471, "learning_rate": 5.8232870428072165e-06, "loss": 0.1106, "step": 4564 }, { "epoch": 0.6735521947620804, "grad_norm": 1.2760663032531738, "learning_rate": 5.818607411132109e-06, "loss": 0.0399, "step": 4565 }, { "epoch": 0.6736997417926964, "grad_norm": 2.141134262084961, "learning_rate": 5.813928888895972e-06, "loss": 0.0412, "step": 4566 }, { "epoch": 0.6738472888233125, "grad_norm": 1.8894126415252686, "learning_rate": 5.809251477340141e-06, "loss": 0.0652, "step": 4567 }, { "epoch": 0.6739948358539284, "grad_norm": 2.264436960220337, "learning_rate": 5.804575177705656e-06, "loss": 0.0533, "step": 4568 }, { "epoch": 0.6741423828845444, "grad_norm": 3.341160774230957, "learning_rate": 5.799899991233274e-06, "loss": 0.1103, "step": 4569 }, { "epoch": 0.6742899299151605, "grad_norm": 2.6579737663269043, "learning_rate": 5.795225919163455e-06, "loss": 0.0633, "step": 4570 }, { "epoch": 0.6744374769457765, "grad_norm": 2.684652090072632, "learning_rate": 5.790552962736351e-06, "loss": 0.0401, "step": 4571 }, { "epoch": 0.6745850239763925, "grad_norm": 6.264983654022217, "learning_rate": 5.7858811231918345e-06, "loss": 0.0476, "step": 4572 }, { "epoch": 0.6747325710070085, "grad_norm": 2.9257707595825195, "learning_rate": 5.781210401769466e-06, "loss": 0.0885, "step": 4573 }, { "epoch": 0.6748801180376245, "grad_norm": 2.52628755569458, "learning_rate": 5.776540799708521e-06, "loss": 0.0889, "step": 4574 }, { "epoch": 0.6750276650682405, "grad_norm": 0.8999586701393127, "learning_rate": 5.771872318247978e-06, "loss": 0.0213, "step": 4575 }, { "epoch": 0.6751752120988566, "grad_norm": 1.6948333978652954, "learning_rate": 5.7672049586265045e-06, "loss": 0.0449, "step": 4576 }, { "epoch": 0.6753227591294725, "grad_norm": 1.96860933303833, "learning_rate": 5.762538722082489e-06, "loss": 0.0589, "step": 4577 }, { "epoch": 0.6754703061600885, "grad_norm": 1.3142787218093872, "learning_rate": 5.75787360985401e-06, "loss": 0.0214, "step": 4578 }, { "epoch": 0.6756178531907046, "grad_norm": 2.433887243270874, "learning_rate": 5.753209623178842e-06, "loss": 0.0982, "step": 4579 }, { "epoch": 0.6757654002213206, "grad_norm": 2.526639938354492, "learning_rate": 5.7485467632944825e-06, "loss": 0.0446, "step": 4580 }, { "epoch": 0.6757654002213206, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9653465346534653, "eval_loss": 0.056997910141944885, "eval_precision": 0.9848484848484849, "eval_recall": 0.9466019417475728, "eval_runtime": 49.3581, "eval_samples_per_second": 5.896, "eval_steps_per_second": 0.203, "step": 4580 }, { "epoch": 0.6759129472519365, "grad_norm": 2.1252338886260986, "learning_rate": 5.743885031438115e-06, "loss": 0.0691, "step": 4581 }, { "epoch": 0.6760604942825525, "grad_norm": 3.2747371196746826, "learning_rate": 5.739224428846617e-06, "loss": 0.1189, "step": 4582 }, { "epoch": 0.6762080413131686, "grad_norm": 1.904829740524292, "learning_rate": 5.7345649567565855e-06, "loss": 0.0348, "step": 4583 }, { "epoch": 0.6763555883437846, "grad_norm": 2.2084009647369385, "learning_rate": 5.729906616404297e-06, "loss": 0.0413, "step": 4584 }, { "epoch": 0.6765031353744005, "grad_norm": 3.237769603729248, "learning_rate": 5.725249409025744e-06, "loss": 0.0656, "step": 4585 }, { "epoch": 0.6766506824050166, "grad_norm": 1.5133224725723267, "learning_rate": 5.7205933358566146e-06, "loss": 0.0364, "step": 4586 }, { "epoch": 0.6767982294356326, "grad_norm": 2.2344539165496826, "learning_rate": 5.7159383981322866e-06, "loss": 0.05, "step": 4587 }, { "epoch": 0.6769457764662486, "grad_norm": 2.808020830154419, "learning_rate": 5.711284597087848e-06, "loss": 0.0776, "step": 4588 }, { "epoch": 0.6770933234968646, "grad_norm": 2.5633034706115723, "learning_rate": 5.706631933958084e-06, "loss": 0.0553, "step": 4589 }, { "epoch": 0.6772408705274806, "grad_norm": 1.4727919101715088, "learning_rate": 5.701980409977468e-06, "loss": 0.0262, "step": 4590 }, { "epoch": 0.6773884175580966, "grad_norm": 2.089125394821167, "learning_rate": 5.697330026380183e-06, "loss": 0.0457, "step": 4591 }, { "epoch": 0.6775359645887127, "grad_norm": 1.4878268241882324, "learning_rate": 5.692680784400102e-06, "loss": 0.0571, "step": 4592 }, { "epoch": 0.6776835116193287, "grad_norm": 3.494631290435791, "learning_rate": 5.68803268527079e-06, "loss": 0.0596, "step": 4593 }, { "epoch": 0.6778310586499446, "grad_norm": 3.8416221141815186, "learning_rate": 5.683385730225532e-06, "loss": 0.0532, "step": 4594 }, { "epoch": 0.6779786056805607, "grad_norm": 2.775015354156494, "learning_rate": 5.678739920497284e-06, "loss": 0.0491, "step": 4595 }, { "epoch": 0.6781261527111767, "grad_norm": 1.2298039197921753, "learning_rate": 5.674095257318703e-06, "loss": 0.0426, "step": 4596 }, { "epoch": 0.6782736997417927, "grad_norm": 2.8514065742492676, "learning_rate": 5.669451741922159e-06, "loss": 0.1025, "step": 4597 }, { "epoch": 0.6784212467724087, "grad_norm": 1.3261842727661133, "learning_rate": 5.664809375539692e-06, "loss": 0.0266, "step": 4598 }, { "epoch": 0.6785687938030247, "grad_norm": 1.6873785257339478, "learning_rate": 5.660168159403056e-06, "loss": 0.0628, "step": 4599 }, { "epoch": 0.6787163408336407, "grad_norm": 1.8481849431991577, "learning_rate": 5.655528094743698e-06, "loss": 0.0238, "step": 4600 }, { "epoch": 0.6787163408336407, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9653465346534653, "eval_loss": 0.05593443289399147, "eval_precision": 0.9848484848484849, "eval_recall": 0.9466019417475728, "eval_runtime": 49.3756, "eval_samples_per_second": 5.894, "eval_steps_per_second": 0.203, "step": 4600 }, { "epoch": 0.6788638878642568, "grad_norm": 2.081822395324707, "learning_rate": 5.650889182792748e-06, "loss": 0.0279, "step": 4601 }, { "epoch": 0.6790114348948727, "grad_norm": 1.4815239906311035, "learning_rate": 5.6462514247810444e-06, "loss": 0.052, "step": 4602 }, { "epoch": 0.6791589819254887, "grad_norm": 1.4074938297271729, "learning_rate": 5.641614821939104e-06, "loss": 0.0142, "step": 4603 }, { "epoch": 0.6793065289561048, "grad_norm": 2.3841392993927, "learning_rate": 5.636979375497151e-06, "loss": 0.0638, "step": 4604 }, { "epoch": 0.6794540759867208, "grad_norm": 2.219118595123291, "learning_rate": 5.6323450866851e-06, "loss": 0.0562, "step": 4605 }, { "epoch": 0.6796016230173367, "grad_norm": 5.843283176422119, "learning_rate": 5.627711956732553e-06, "loss": 0.0699, "step": 4606 }, { "epoch": 0.6797491700479528, "grad_norm": 3.2192442417144775, "learning_rate": 5.623079986868799e-06, "loss": 0.096, "step": 4607 }, { "epoch": 0.6798967170785688, "grad_norm": 3.709993362426758, "learning_rate": 5.618449178322844e-06, "loss": 0.0356, "step": 4608 }, { "epoch": 0.6800442641091848, "grad_norm": 4.850296974182129, "learning_rate": 5.61381953232336e-06, "loss": 0.0652, "step": 4609 }, { "epoch": 0.6801918111398008, "grad_norm": 2.156365156173706, "learning_rate": 5.6091910500987166e-06, "loss": 0.0412, "step": 4610 }, { "epoch": 0.6803393581704168, "grad_norm": 1.5853725671768188, "learning_rate": 5.604563732876989e-06, "loss": 0.037, "step": 4611 }, { "epoch": 0.6804869052010328, "grad_norm": 1.649251937866211, "learning_rate": 5.59993758188592e-06, "loss": 0.0571, "step": 4612 }, { "epoch": 0.6806344522316489, "grad_norm": 3.2777063846588135, "learning_rate": 5.59531259835296e-06, "loss": 0.0849, "step": 4613 }, { "epoch": 0.6807819992622648, "grad_norm": 1.2833290100097656, "learning_rate": 5.590688783505249e-06, "loss": 0.0248, "step": 4614 }, { "epoch": 0.6809295462928808, "grad_norm": 3.259247303009033, "learning_rate": 5.586066138569608e-06, "loss": 0.0439, "step": 4615 }, { "epoch": 0.6810770933234969, "grad_norm": 1.504201889038086, "learning_rate": 5.5814446647725574e-06, "loss": 0.0436, "step": 4616 }, { "epoch": 0.6812246403541129, "grad_norm": 3.5449728965759277, "learning_rate": 5.576824363340293e-06, "loss": 0.0643, "step": 4617 }, { "epoch": 0.6813721873847289, "grad_norm": 4.240931987762451, "learning_rate": 5.5722052354987154e-06, "loss": 0.103, "step": 4618 }, { "epoch": 0.6815197344153449, "grad_norm": 4.5008368492126465, "learning_rate": 5.567587282473408e-06, "loss": 0.0847, "step": 4619 }, { "epoch": 0.6816672814459609, "grad_norm": 2.1032140254974365, "learning_rate": 5.56297050548964e-06, "loss": 0.0653, "step": 4620 }, { "epoch": 0.6816672814459609, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9653465346534653, "eval_loss": 0.0547698512673378, "eval_precision": 0.9848484848484849, "eval_recall": 0.9466019417475728, "eval_runtime": 49.8649, "eval_samples_per_second": 5.836, "eval_steps_per_second": 0.201, "step": 4620 }, { "epoch": 0.6818148284765769, "grad_norm": 1.6405268907546997, "learning_rate": 5.558354905772363e-06, "loss": 0.0211, "step": 4621 }, { "epoch": 0.681962375507193, "grad_norm": 3.434992551803589, "learning_rate": 5.553740484546229e-06, "loss": 0.1316, "step": 4622 }, { "epoch": 0.6821099225378089, "grad_norm": 2.5380048751831055, "learning_rate": 5.549127243035574e-06, "loss": 0.0606, "step": 4623 }, { "epoch": 0.6822574695684249, "grad_norm": 1.9085692167282104, "learning_rate": 5.544515182464414e-06, "loss": 0.0258, "step": 4624 }, { "epoch": 0.682405016599041, "grad_norm": 2.6448922157287598, "learning_rate": 5.53990430405646e-06, "loss": 0.0737, "step": 4625 }, { "epoch": 0.682552563629657, "grad_norm": 0.7642562985420227, "learning_rate": 5.535294609035099e-06, "loss": 0.0093, "step": 4626 }, { "epoch": 0.6827001106602729, "grad_norm": 1.6839114427566528, "learning_rate": 5.530686098623414e-06, "loss": 0.0351, "step": 4627 }, { "epoch": 0.682847657690889, "grad_norm": 1.5677410364151, "learning_rate": 5.526078774044174e-06, "loss": 0.0597, "step": 4628 }, { "epoch": 0.682995204721505, "grad_norm": 4.345653533935547, "learning_rate": 5.521472636519823e-06, "loss": 0.1201, "step": 4629 }, { "epoch": 0.683142751752121, "grad_norm": 1.3154315948486328, "learning_rate": 5.516867687272504e-06, "loss": 0.0283, "step": 4630 }, { "epoch": 0.683290298782737, "grad_norm": 1.605300784111023, "learning_rate": 5.512263927524026e-06, "loss": 0.0409, "step": 4631 }, { "epoch": 0.683437845813353, "grad_norm": 3.8189263343811035, "learning_rate": 5.5076613584959035e-06, "loss": 0.053, "step": 4632 }, { "epoch": 0.683585392843969, "grad_norm": 4.527725696563721, "learning_rate": 5.503059981409325e-06, "loss": 0.046, "step": 4633 }, { "epoch": 0.683732939874585, "grad_norm": 6.160957336425781, "learning_rate": 5.498459797485158e-06, "loss": 0.095, "step": 4634 }, { "epoch": 0.683880486905201, "grad_norm": 2.801215171813965, "learning_rate": 5.493860807943958e-06, "loss": 0.0699, "step": 4635 }, { "epoch": 0.684028033935817, "grad_norm": 5.829277515411377, "learning_rate": 5.4892630140059665e-06, "loss": 0.0592, "step": 4636 }, { "epoch": 0.684175580966433, "grad_norm": 2.45528244972229, "learning_rate": 5.484666416891109e-06, "loss": 0.0606, "step": 4637 }, { "epoch": 0.6843231279970491, "grad_norm": 1.8605555295944214, "learning_rate": 5.480071017818982e-06, "loss": 0.0521, "step": 4638 }, { "epoch": 0.6844706750276651, "grad_norm": 1.5198736190795898, "learning_rate": 5.475476818008878e-06, "loss": 0.0358, "step": 4639 }, { "epoch": 0.684618222058281, "grad_norm": 2.3037924766540527, "learning_rate": 5.470883818679761e-06, "loss": 0.05, "step": 4640 }, { "epoch": 0.684618222058281, "eval_accuracy": 0.9811866859623734, "eval_f1": 0.9679012345679012, "eval_loss": 0.05457611009478569, "eval_precision": 0.9849246231155779, "eval_recall": 0.9514563106796117, "eval_runtime": 49.3996, "eval_samples_per_second": 5.891, "eval_steps_per_second": 0.202, "step": 4640 }, { "epoch": 0.6847657690888971, "grad_norm": 2.474771499633789, "learning_rate": 5.466292021050281e-06, "loss": 0.048, "step": 4641 }, { "epoch": 0.6849133161195131, "grad_norm": 0.7616437673568726, "learning_rate": 5.461701426338775e-06, "loss": 0.0069, "step": 4642 }, { "epoch": 0.6850608631501292, "grad_norm": 3.2531471252441406, "learning_rate": 5.457112035763243e-06, "loss": 0.0453, "step": 4643 }, { "epoch": 0.6852084101807451, "grad_norm": 3.4857687950134277, "learning_rate": 5.452523850541391e-06, "loss": 0.1229, "step": 4644 }, { "epoch": 0.6853559572113611, "grad_norm": 1.3255691528320312, "learning_rate": 5.447936871890578e-06, "loss": 0.037, "step": 4645 }, { "epoch": 0.6855035042419771, "grad_norm": 2.3378636837005615, "learning_rate": 5.443351101027863e-06, "loss": 0.0385, "step": 4646 }, { "epoch": 0.6856510512725932, "grad_norm": 2.0383617877960205, "learning_rate": 5.438766539169982e-06, "loss": 0.0611, "step": 4647 }, { "epoch": 0.6857985983032091, "grad_norm": 3.790567398071289, "learning_rate": 5.434183187533335e-06, "loss": 0.1215, "step": 4648 }, { "epoch": 0.6859461453338251, "grad_norm": 2.1201913356781006, "learning_rate": 5.429601047334022e-06, "loss": 0.0217, "step": 4649 }, { "epoch": 0.6860936923644412, "grad_norm": 1.737138032913208, "learning_rate": 5.425020119787803e-06, "loss": 0.0609, "step": 4650 }, { "epoch": 0.6862412393950572, "grad_norm": 2.2892613410949707, "learning_rate": 5.420440406110129e-06, "loss": 0.0591, "step": 4651 }, { "epoch": 0.6863887864256731, "grad_norm": 2.4004242420196533, "learning_rate": 5.415861907516129e-06, "loss": 0.0333, "step": 4652 }, { "epoch": 0.6865363334562892, "grad_norm": 2.855923891067505, "learning_rate": 5.4112846252205985e-06, "loss": 0.0308, "step": 4653 }, { "epoch": 0.6866838804869052, "grad_norm": 1.8993141651153564, "learning_rate": 5.406708560438015e-06, "loss": 0.0476, "step": 4654 }, { "epoch": 0.6868314275175212, "grad_norm": 2.9163031578063965, "learning_rate": 5.40213371438254e-06, "loss": 0.0792, "step": 4655 }, { "epoch": 0.6869789745481372, "grad_norm": 1.5787954330444336, "learning_rate": 5.397560088268008e-06, "loss": 0.0358, "step": 4656 }, { "epoch": 0.6871265215787532, "grad_norm": 2.2038626670837402, "learning_rate": 5.392987683307922e-06, "loss": 0.0468, "step": 4657 }, { "epoch": 0.6872740686093692, "grad_norm": 2.4745113849639893, "learning_rate": 5.388416500715474e-06, "loss": 0.0929, "step": 4658 }, { "epoch": 0.6874216156399853, "grad_norm": 1.4698036909103394, "learning_rate": 5.383846541703519e-06, "loss": 0.0305, "step": 4659 }, { "epoch": 0.6875691626706013, "grad_norm": 1.6903125047683716, "learning_rate": 5.379277807484596e-06, "loss": 0.048, "step": 4660 }, { "epoch": 0.6875691626706013, "eval_accuracy": 0.9811866859623734, "eval_f1": 0.9679012345679012, "eval_loss": 0.054064974188804626, "eval_precision": 0.9849246231155779, "eval_recall": 0.9514563106796117, "eval_runtime": 50.136, "eval_samples_per_second": 5.804, "eval_steps_per_second": 0.199, "step": 4660 }, { "epoch": 0.6877167097012172, "grad_norm": 1.3866111040115356, "learning_rate": 5.374710299270921e-06, "loss": 0.0273, "step": 4661 }, { "epoch": 0.6878642567318333, "grad_norm": 3.9755289554595947, "learning_rate": 5.370144018274372e-06, "loss": 0.0478, "step": 4662 }, { "epoch": 0.6880118037624493, "grad_norm": 2.0998730659484863, "learning_rate": 5.365578965706516e-06, "loss": 0.0599, "step": 4663 }, { "epoch": 0.6881593507930653, "grad_norm": 2.7026755809783936, "learning_rate": 5.361015142778582e-06, "loss": 0.0394, "step": 4664 }, { "epoch": 0.6883068978236813, "grad_norm": 3.7726657390594482, "learning_rate": 5.356452550701481e-06, "loss": 0.0823, "step": 4665 }, { "epoch": 0.6884544448542973, "grad_norm": 3.169844627380371, "learning_rate": 5.3518911906857985e-06, "loss": 0.1038, "step": 4666 }, { "epoch": 0.6886019918849133, "grad_norm": 1.8277506828308105, "learning_rate": 5.347331063941786e-06, "loss": 0.023, "step": 4667 }, { "epoch": 0.6887495389155294, "grad_norm": 1.5094001293182373, "learning_rate": 5.342772171679364e-06, "loss": 0.0413, "step": 4668 }, { "epoch": 0.6888970859461453, "grad_norm": 2.628838539123535, "learning_rate": 5.3382145151081435e-06, "loss": 0.0724, "step": 4669 }, { "epoch": 0.6890446329767613, "grad_norm": 2.540367364883423, "learning_rate": 5.333658095437395e-06, "loss": 0.0624, "step": 4670 }, { "epoch": 0.6891921800073774, "grad_norm": 2.2520368099212646, "learning_rate": 5.329102913876055e-06, "loss": 0.0609, "step": 4671 }, { "epoch": 0.6893397270379934, "grad_norm": 1.8281601667404175, "learning_rate": 5.324548971632748e-06, "loss": 0.0628, "step": 4672 }, { "epoch": 0.6894872740686093, "grad_norm": 2.0493619441986084, "learning_rate": 5.319996269915755e-06, "loss": 0.0245, "step": 4673 }, { "epoch": 0.6896348210992254, "grad_norm": 4.497646331787109, "learning_rate": 5.315444809933032e-06, "loss": 0.1471, "step": 4674 }, { "epoch": 0.6897823681298414, "grad_norm": 0.9106382727622986, "learning_rate": 5.310894592892217e-06, "loss": 0.0064, "step": 4675 }, { "epoch": 0.6899299151604574, "grad_norm": 1.2549246549606323, "learning_rate": 5.3063456200005965e-06, "loss": 0.0402, "step": 4676 }, { "epoch": 0.6900774621910734, "grad_norm": 3.292407989501953, "learning_rate": 5.301797892465148e-06, "loss": 0.0655, "step": 4677 }, { "epoch": 0.6902250092216894, "grad_norm": 1.2164607048034668, "learning_rate": 5.297251411492503e-06, "loss": 0.0145, "step": 4678 }, { "epoch": 0.6903725562523054, "grad_norm": 3.410823106765747, "learning_rate": 5.292706178288966e-06, "loss": 0.0675, "step": 4679 }, { "epoch": 0.6905201032829215, "grad_norm": 3.0682482719421387, "learning_rate": 5.288162194060523e-06, "loss": 0.0746, "step": 4680 }, { "epoch": 0.6905201032829215, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9653465346534653, "eval_loss": 0.056046705693006516, "eval_precision": 0.9848484848484849, "eval_recall": 0.9466019417475728, "eval_runtime": 50.476, "eval_samples_per_second": 5.765, "eval_steps_per_second": 0.198, "step": 4680 }, { "epoch": 0.6906676503135374, "grad_norm": 2.1872940063476562, "learning_rate": 5.283619460012814e-06, "loss": 0.0564, "step": 4681 }, { "epoch": 0.6908151973441534, "grad_norm": 3.5949339866638184, "learning_rate": 5.279077977351146e-06, "loss": 0.1179, "step": 4682 }, { "epoch": 0.6909627443747695, "grad_norm": 1.711521863937378, "learning_rate": 5.274537747280505e-06, "loss": 0.0486, "step": 4683 }, { "epoch": 0.6911102914053855, "grad_norm": 1.7035961151123047, "learning_rate": 5.269998771005542e-06, "loss": 0.072, "step": 4684 }, { "epoch": 0.6912578384360015, "grad_norm": 3.3933184146881104, "learning_rate": 5.265461049730568e-06, "loss": 0.1412, "step": 4685 }, { "epoch": 0.6914053854666175, "grad_norm": 1.9112924337387085, "learning_rate": 5.26092458465957e-06, "loss": 0.0739, "step": 4686 }, { "epoch": 0.6915529324972335, "grad_norm": 3.6496407985687256, "learning_rate": 5.256389376996192e-06, "loss": 0.0548, "step": 4687 }, { "epoch": 0.6917004795278495, "grad_norm": 1.4398243427276611, "learning_rate": 5.251855427943753e-06, "loss": 0.0415, "step": 4688 }, { "epoch": 0.6918480265584656, "grad_norm": 3.6281752586364746, "learning_rate": 5.247322738705239e-06, "loss": 0.1014, "step": 4689 }, { "epoch": 0.6919955735890815, "grad_norm": 2.4000320434570312, "learning_rate": 5.242791310483288e-06, "loss": 0.0639, "step": 4690 }, { "epoch": 0.6921431206196975, "grad_norm": 2.0405635833740234, "learning_rate": 5.238261144480225e-06, "loss": 0.0538, "step": 4691 }, { "epoch": 0.6922906676503136, "grad_norm": 1.8086013793945312, "learning_rate": 5.233732241898021e-06, "loss": 0.0505, "step": 4692 }, { "epoch": 0.6924382146809296, "grad_norm": 2.220877170562744, "learning_rate": 5.2292046039383115e-06, "loss": 0.042, "step": 4693 }, { "epoch": 0.6925857617115455, "grad_norm": 2.452057361602783, "learning_rate": 5.22467823180242e-06, "loss": 0.0892, "step": 4694 }, { "epoch": 0.6927333087421615, "grad_norm": 1.4002968072891235, "learning_rate": 5.220153126691312e-06, "loss": 0.0412, "step": 4695 }, { "epoch": 0.6928808557727776, "grad_norm": 2.3197379112243652, "learning_rate": 5.215629289805616e-06, "loss": 0.048, "step": 4696 }, { "epoch": 0.6930284028033936, "grad_norm": 1.9746086597442627, "learning_rate": 5.21110672234564e-06, "loss": 0.0294, "step": 4697 }, { "epoch": 0.6931759498340095, "grad_norm": 2.7834994792938232, "learning_rate": 5.206585425511339e-06, "loss": 0.0497, "step": 4698 }, { "epoch": 0.6933234968646256, "grad_norm": 2.4984586238861084, "learning_rate": 5.202065400502341e-06, "loss": 0.058, "step": 4699 }, { "epoch": 0.6934710438952416, "grad_norm": 1.4515494108200073, "learning_rate": 5.197546648517937e-06, "loss": 0.036, "step": 4700 }, { "epoch": 0.6934710438952416, "eval_accuracy": 0.9811866859623734, "eval_f1": 0.9679012345679012, "eval_loss": 0.05514227971434593, "eval_precision": 0.9849246231155779, "eval_recall": 0.9514563106796117, "eval_runtime": 49.5267, "eval_samples_per_second": 5.876, "eval_steps_per_second": 0.202, "step": 4700 }, { "epoch": 0.6936185909258576, "grad_norm": 8.75732707977295, "learning_rate": 5.19302917075707e-06, "loss": 0.0309, "step": 4701 }, { "epoch": 0.6937661379564736, "grad_norm": 2.6954081058502197, "learning_rate": 5.18851296841836e-06, "loss": 0.0874, "step": 4702 }, { "epoch": 0.6939136849870896, "grad_norm": 2.049119472503662, "learning_rate": 5.1839980427000695e-06, "loss": 0.1053, "step": 4703 }, { "epoch": 0.6940612320177056, "grad_norm": 1.6941841840744019, "learning_rate": 5.1794843948001396e-06, "loss": 0.0369, "step": 4704 }, { "epoch": 0.6942087790483217, "grad_norm": 5.694141387939453, "learning_rate": 5.1749720259161695e-06, "loss": 0.0585, "step": 4705 }, { "epoch": 0.6943563260789377, "grad_norm": 4.27250862121582, "learning_rate": 5.17046093724541e-06, "loss": 0.0332, "step": 4706 }, { "epoch": 0.6945038731095536, "grad_norm": 1.8809525966644287, "learning_rate": 5.16595112998477e-06, "loss": 0.0549, "step": 4707 }, { "epoch": 0.6946514201401697, "grad_norm": 1.3173998594284058, "learning_rate": 5.161442605330844e-06, "loss": 0.0141, "step": 4708 }, { "epoch": 0.6947989671707857, "grad_norm": 2.760469436645508, "learning_rate": 5.156935364479856e-06, "loss": 0.0847, "step": 4709 }, { "epoch": 0.6949465142014017, "grad_norm": 1.119059443473816, "learning_rate": 5.152429408627701e-06, "loss": 0.0219, "step": 4710 }, { "epoch": 0.6950940612320177, "grad_norm": 4.338699817657471, "learning_rate": 5.147924738969942e-06, "loss": 0.0608, "step": 4711 }, { "epoch": 0.6952416082626337, "grad_norm": 3.4097437858581543, "learning_rate": 5.143421356701781e-06, "loss": 0.0407, "step": 4712 }, { "epoch": 0.6953891552932497, "grad_norm": 3.3604140281677246, "learning_rate": 5.138919263018096e-06, "loss": 0.0866, "step": 4713 }, { "epoch": 0.6955367023238658, "grad_norm": 1.3192962408065796, "learning_rate": 5.134418459113422e-06, "loss": 0.0258, "step": 4714 }, { "epoch": 0.6956842493544817, "grad_norm": 2.792595148086548, "learning_rate": 5.129918946181934e-06, "loss": 0.0566, "step": 4715 }, { "epoch": 0.6958317963850977, "grad_norm": 1.9875514507293701, "learning_rate": 5.12542072541749e-06, "loss": 0.0257, "step": 4716 }, { "epoch": 0.6959793434157138, "grad_norm": 1.403470516204834, "learning_rate": 5.120923798013582e-06, "loss": 0.0213, "step": 4717 }, { "epoch": 0.6961268904463298, "grad_norm": 2.1419942378997803, "learning_rate": 5.116428165163373e-06, "loss": 0.0458, "step": 4718 }, { "epoch": 0.6962744374769457, "grad_norm": 1.3339232206344604, "learning_rate": 5.111933828059683e-06, "loss": 0.0318, "step": 4719 }, { "epoch": 0.6964219845075618, "grad_norm": 3.28307843208313, "learning_rate": 5.1074407878949795e-06, "loss": 0.0498, "step": 4720 }, { "epoch": 0.6964219845075618, "eval_accuracy": 0.9811866859623734, "eval_f1": 0.9679012345679012, "eval_loss": 0.055511847138404846, "eval_precision": 0.9849246231155779, "eval_recall": 0.9514563106796117, "eval_runtime": 52.8324, "eval_samples_per_second": 5.508, "eval_steps_per_second": 0.189, "step": 4720 }, { "epoch": 0.6965695315381778, "grad_norm": 2.7041454315185547, "learning_rate": 5.102949045861387e-06, "loss": 0.0656, "step": 4721 }, { "epoch": 0.6967170785687938, "grad_norm": 1.473251223564148, "learning_rate": 5.098458603150691e-06, "loss": 0.023, "step": 4722 }, { "epoch": 0.6968646255994098, "grad_norm": 1.7501991987228394, "learning_rate": 5.0939694609543355e-06, "loss": 0.0344, "step": 4723 }, { "epoch": 0.6970121726300258, "grad_norm": 1.2850037813186646, "learning_rate": 5.089481620463407e-06, "loss": 0.024, "step": 4724 }, { "epoch": 0.6971597196606418, "grad_norm": 2.0505523681640625, "learning_rate": 5.084995082868658e-06, "loss": 0.0433, "step": 4725 }, { "epoch": 0.6973072666912579, "grad_norm": 1.732586145401001, "learning_rate": 5.0805098493604866e-06, "loss": 0.0381, "step": 4726 }, { "epoch": 0.6974548137218739, "grad_norm": 0.7503189444541931, "learning_rate": 5.076025921128951e-06, "loss": 0.0183, "step": 4727 }, { "epoch": 0.6976023607524898, "grad_norm": 1.3052852153778076, "learning_rate": 5.071543299363766e-06, "loss": 0.0256, "step": 4728 }, { "epoch": 0.6977499077831059, "grad_norm": 1.709081768989563, "learning_rate": 5.067061985254287e-06, "loss": 0.0469, "step": 4729 }, { "epoch": 0.6978974548137219, "grad_norm": 2.1274194717407227, "learning_rate": 5.062581979989537e-06, "loss": 0.0336, "step": 4730 }, { "epoch": 0.6980450018443379, "grad_norm": 1.1657981872558594, "learning_rate": 5.058103284758179e-06, "loss": 0.046, "step": 4731 }, { "epoch": 0.6981925488749539, "grad_norm": 2.0870044231414795, "learning_rate": 5.053625900748538e-06, "loss": 0.0471, "step": 4732 }, { "epoch": 0.6983400959055699, "grad_norm": 4.870445728302002, "learning_rate": 5.0491498291485915e-06, "loss": 0.1134, "step": 4733 }, { "epoch": 0.6984876429361859, "grad_norm": 7.5975213050842285, "learning_rate": 5.04467507114596e-06, "loss": 0.1201, "step": 4734 }, { "epoch": 0.698635189966802, "grad_norm": 1.4844144582748413, "learning_rate": 5.040201627927918e-06, "loss": 0.029, "step": 4735 }, { "epoch": 0.6987827369974179, "grad_norm": 1.3683654069900513, "learning_rate": 5.035729500681399e-06, "loss": 0.0345, "step": 4736 }, { "epoch": 0.6989302840280339, "grad_norm": 2.903507947921753, "learning_rate": 5.031258690592982e-06, "loss": 0.0789, "step": 4737 }, { "epoch": 0.69907783105865, "grad_norm": 1.0233534574508667, "learning_rate": 5.0267891988488895e-06, "loss": 0.0261, "step": 4738 }, { "epoch": 0.699225378089266, "grad_norm": 4.892428874969482, "learning_rate": 5.022321026635011e-06, "loss": 0.1184, "step": 4739 }, { "epoch": 0.6993729251198819, "grad_norm": 3.0594170093536377, "learning_rate": 5.017854175136867e-06, "loss": 0.0327, "step": 4740 }, { "epoch": 0.6993729251198819, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9627791563275434, "eval_loss": 0.06025285646319389, "eval_precision": 0.9847715736040609, "eval_recall": 0.941747572815534, "eval_runtime": 51.3682, "eval_samples_per_second": 5.665, "eval_steps_per_second": 0.195, "step": 4740 }, { "epoch": 0.699520472150498, "grad_norm": 2.0277719497680664, "learning_rate": 5.013388645539641e-06, "loss": 0.0156, "step": 4741 }, { "epoch": 0.699668019181114, "grad_norm": 3.091407537460327, "learning_rate": 5.0089244390281645e-06, "loss": 0.0646, "step": 4742 }, { "epoch": 0.69981556621173, "grad_norm": 0.809323251247406, "learning_rate": 5.004461556786906e-06, "loss": 0.016, "step": 4743 }, { "epoch": 0.699963113242346, "grad_norm": 4.385626316070557, "learning_rate": 5.000000000000003e-06, "loss": 0.0838, "step": 4744 }, { "epoch": 0.700110660272962, "grad_norm": 4.867463111877441, "learning_rate": 4.995539769851218e-06, "loss": 0.115, "step": 4745 }, { "epoch": 0.700258207303578, "grad_norm": 1.2739475965499878, "learning_rate": 4.9910808675239785e-06, "loss": 0.0246, "step": 4746 }, { "epoch": 0.700405754334194, "grad_norm": 1.8665733337402344, "learning_rate": 4.986623294201359e-06, "loss": 0.0216, "step": 4747 }, { "epoch": 0.7005533013648101, "grad_norm": 3.2292773723602295, "learning_rate": 4.982167051066072e-06, "loss": 0.0992, "step": 4748 }, { "epoch": 0.700700848395426, "grad_norm": 1.9645966291427612, "learning_rate": 4.977712139300479e-06, "loss": 0.06, "step": 4749 }, { "epoch": 0.700848395426042, "grad_norm": 2.251530647277832, "learning_rate": 4.973258560086595e-06, "loss": 0.0505, "step": 4750 }, { "epoch": 0.7009959424566581, "grad_norm": 3.1392159461975098, "learning_rate": 4.9688063146060805e-06, "loss": 0.0586, "step": 4751 }, { "epoch": 0.7011434894872741, "grad_norm": 2.2067291736602783, "learning_rate": 4.964355404040232e-06, "loss": 0.0477, "step": 4752 }, { "epoch": 0.70129103651789, "grad_norm": 1.406163215637207, "learning_rate": 4.959905829570008e-06, "loss": 0.0291, "step": 4753 }, { "epoch": 0.7014385835485061, "grad_norm": 4.8565826416015625, "learning_rate": 4.9554575923759926e-06, "loss": 0.0792, "step": 4754 }, { "epoch": 0.7015861305791221, "grad_norm": 2.68526554107666, "learning_rate": 4.951010693638433e-06, "loss": 0.0391, "step": 4755 }, { "epoch": 0.7017336776097381, "grad_norm": 1.7152152061462402, "learning_rate": 4.946565134537216e-06, "loss": 0.0265, "step": 4756 }, { "epoch": 0.7018812246403541, "grad_norm": 6.578200817108154, "learning_rate": 4.942120916251866e-06, "loss": 0.0637, "step": 4757 }, { "epoch": 0.7020287716709701, "grad_norm": 3.0830886363983154, "learning_rate": 4.937678039961562e-06, "loss": 0.0234, "step": 4758 }, { "epoch": 0.7021763187015861, "grad_norm": 3.7739224433898926, "learning_rate": 4.933236506845119e-06, "loss": 0.0807, "step": 4759 }, { "epoch": 0.7023238657322022, "grad_norm": 3.905097723007202, "learning_rate": 4.928796318080991e-06, "loss": 0.0637, "step": 4760 }, { "epoch": 0.7023238657322022, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9655172413793104, "eval_loss": 0.05525914207100868, "eval_precision": 0.98, "eval_recall": 0.9514563106796117, "eval_runtime": 49.4588, "eval_samples_per_second": 5.884, "eval_steps_per_second": 0.202, "step": 4760 }, { "epoch": 0.7024714127628181, "grad_norm": 9.33144474029541, "learning_rate": 4.924357474847299e-06, "loss": 0.0732, "step": 4761 }, { "epoch": 0.7026189597934341, "grad_norm": 2.88041615486145, "learning_rate": 4.91991997832178e-06, "loss": 0.0888, "step": 4762 }, { "epoch": 0.7027665068240502, "grad_norm": 4.127414226531982, "learning_rate": 4.9154838296818246e-06, "loss": 0.0478, "step": 4763 }, { "epoch": 0.7029140538546662, "grad_norm": 2.960581064224243, "learning_rate": 4.911049030104466e-06, "loss": 0.0557, "step": 4764 }, { "epoch": 0.7030616008852821, "grad_norm": 2.913341999053955, "learning_rate": 4.906615580766384e-06, "loss": 0.0587, "step": 4765 }, { "epoch": 0.7032091479158982, "grad_norm": 2.4078097343444824, "learning_rate": 4.902183482843888e-06, "loss": 0.041, "step": 4766 }, { "epoch": 0.7033566949465142, "grad_norm": 4.419480323791504, "learning_rate": 4.897752737512944e-06, "loss": 0.0448, "step": 4767 }, { "epoch": 0.7035042419771302, "grad_norm": 2.7044637203216553, "learning_rate": 4.8933233459491424e-06, "loss": 0.0855, "step": 4768 }, { "epoch": 0.7036517890077462, "grad_norm": 3.614534378051758, "learning_rate": 4.888895309327728e-06, "loss": 0.0531, "step": 4769 }, { "epoch": 0.7037993360383622, "grad_norm": 5.617615699768066, "learning_rate": 4.884468628823583e-06, "loss": 0.0801, "step": 4770 }, { "epoch": 0.7039468830689782, "grad_norm": 1.2464123964309692, "learning_rate": 4.880043305611224e-06, "loss": 0.0223, "step": 4771 }, { "epoch": 0.7040944300995943, "grad_norm": 3.384556770324707, "learning_rate": 4.875619340864815e-06, "loss": 0.0683, "step": 4772 }, { "epoch": 0.7042419771302103, "grad_norm": 3.4770638942718506, "learning_rate": 4.871196735758153e-06, "loss": 0.1092, "step": 4773 }, { "epoch": 0.7043895241608262, "grad_norm": 3.406653642654419, "learning_rate": 4.866775491464676e-06, "loss": 0.1264, "step": 4774 }, { "epoch": 0.7045370711914423, "grad_norm": 4.548017501831055, "learning_rate": 4.86235560915747e-06, "loss": 0.0511, "step": 4775 }, { "epoch": 0.7046846182220583, "grad_norm": 1.751783847808838, "learning_rate": 4.857937090009243e-06, "loss": 0.0455, "step": 4776 }, { "epoch": 0.7048321652526743, "grad_norm": 2.5198867321014404, "learning_rate": 4.853519935192357e-06, "loss": 0.0956, "step": 4777 }, { "epoch": 0.7049797122832903, "grad_norm": 2.1479275226593018, "learning_rate": 4.849104145878803e-06, "loss": 0.0538, "step": 4778 }, { "epoch": 0.7051272593139063, "grad_norm": 3.9840872287750244, "learning_rate": 4.844689723240202e-06, "loss": 0.0641, "step": 4779 }, { "epoch": 0.7052748063445223, "grad_norm": 1.1084744930267334, "learning_rate": 4.8402766684478396e-06, "loss": 0.0089, "step": 4780 }, { "epoch": 0.7052748063445223, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.9601990049751243, "eval_loss": 0.06107376888394356, "eval_precision": 0.9846938775510204, "eval_recall": 0.9368932038834952, "eval_runtime": 50.0538, "eval_samples_per_second": 5.814, "eval_steps_per_second": 0.2, "step": 4780 }, { "epoch": 0.7054223533751384, "grad_norm": 1.3147999048233032, "learning_rate": 4.835864982672612e-06, "loss": 0.0576, "step": 4781 }, { "epoch": 0.7055699004057543, "grad_norm": 1.7662733793258667, "learning_rate": 4.831454667085059e-06, "loss": 0.0417, "step": 4782 }, { "epoch": 0.7057174474363703, "grad_norm": 2.976698398590088, "learning_rate": 4.827045722855364e-06, "loss": 0.1089, "step": 4783 }, { "epoch": 0.7058649944669864, "grad_norm": 1.1642670631408691, "learning_rate": 4.822638151153341e-06, "loss": 0.0158, "step": 4784 }, { "epoch": 0.7060125414976024, "grad_norm": 2.3276305198669434, "learning_rate": 4.818231953148437e-06, "loss": 0.0227, "step": 4785 }, { "epoch": 0.7061600885282183, "grad_norm": 3.0932188034057617, "learning_rate": 4.813827130009744e-06, "loss": 0.0716, "step": 4786 }, { "epoch": 0.7063076355588344, "grad_norm": 3.571537971496582, "learning_rate": 4.809423682905977e-06, "loss": 0.0715, "step": 4787 }, { "epoch": 0.7064551825894504, "grad_norm": 3.9377129077911377, "learning_rate": 4.805021613005494e-06, "loss": 0.0279, "step": 4788 }, { "epoch": 0.7066027296200664, "grad_norm": 2.9944138526916504, "learning_rate": 4.80062092147629e-06, "loss": 0.0664, "step": 4789 }, { "epoch": 0.7067502766506824, "grad_norm": 1.9466123580932617, "learning_rate": 4.796221609485983e-06, "loss": 0.0783, "step": 4790 }, { "epoch": 0.7068978236812984, "grad_norm": 1.0460115671157837, "learning_rate": 4.791823678201841e-06, "loss": 0.0294, "step": 4791 }, { "epoch": 0.7070453707119144, "grad_norm": 2.0468013286590576, "learning_rate": 4.78742712879075e-06, "loss": 0.0377, "step": 4792 }, { "epoch": 0.7071929177425305, "grad_norm": 1.687163233757019, "learning_rate": 4.783031962419231e-06, "loss": 0.0272, "step": 4793 }, { "epoch": 0.7073404647731465, "grad_norm": 2.560441017150879, "learning_rate": 4.778638180253456e-06, "loss": 0.0734, "step": 4794 }, { "epoch": 0.7074880118037624, "grad_norm": 2.3611509799957275, "learning_rate": 4.774245783459212e-06, "loss": 0.0925, "step": 4795 }, { "epoch": 0.7076355588343785, "grad_norm": 1.7989883422851562, "learning_rate": 4.769854773201915e-06, "loss": 0.0644, "step": 4796 }, { "epoch": 0.7077831058649945, "grad_norm": 2.3656089305877686, "learning_rate": 4.765465150646633e-06, "loss": 0.0647, "step": 4797 }, { "epoch": 0.7079306528956105, "grad_norm": 2.227722406387329, "learning_rate": 4.761076916958045e-06, "loss": 0.0739, "step": 4798 }, { "epoch": 0.7080781999262264, "grad_norm": 1.5509002208709717, "learning_rate": 4.756690073300475e-06, "loss": 0.0793, "step": 4799 }, { "epoch": 0.7082257469568425, "grad_norm": 2.06139874458313, "learning_rate": 4.752304620837876e-06, "loss": 0.0219, "step": 4800 }, { "epoch": 0.7082257469568425, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9655172413793104, "eval_loss": 0.057671889662742615, "eval_precision": 0.98, "eval_recall": 0.9514563106796117, "eval_runtime": 48.7845, "eval_samples_per_second": 5.965, "eval_steps_per_second": 0.205, "step": 4800 }, { "epoch": 0.7083732939874585, "grad_norm": 1.3491781949996948, "learning_rate": 4.747920560733825e-06, "loss": 0.0497, "step": 4801 }, { "epoch": 0.7085208410180746, "grad_norm": 2.348538398742676, "learning_rate": 4.743537894151535e-06, "loss": 0.0424, "step": 4802 }, { "epoch": 0.7086683880486905, "grad_norm": 3.854368209838867, "learning_rate": 4.739156622253854e-06, "loss": 0.0865, "step": 4803 }, { "epoch": 0.7088159350793065, "grad_norm": 2.624882698059082, "learning_rate": 4.7347767462032465e-06, "loss": 0.087, "step": 4804 }, { "epoch": 0.7089634821099225, "grad_norm": 1.3223381042480469, "learning_rate": 4.730398267161822e-06, "loss": 0.0526, "step": 4805 }, { "epoch": 0.7091110291405386, "grad_norm": 1.99947190284729, "learning_rate": 4.726021186291309e-06, "loss": 0.0633, "step": 4806 }, { "epoch": 0.7092585761711545, "grad_norm": 1.5057780742645264, "learning_rate": 4.721645504753059e-06, "loss": 0.0285, "step": 4807 }, { "epoch": 0.7094061232017705, "grad_norm": 1.374821662902832, "learning_rate": 4.717271223708076e-06, "loss": 0.0353, "step": 4808 }, { "epoch": 0.7095536702323866, "grad_norm": 1.7715858221054077, "learning_rate": 4.712898344316973e-06, "loss": 0.0333, "step": 4809 }, { "epoch": 0.7097012172630026, "grad_norm": 4.284231662750244, "learning_rate": 4.708526867739988e-06, "loss": 0.1314, "step": 4810 }, { "epoch": 0.7098487642936185, "grad_norm": 1.2447797060012817, "learning_rate": 4.704156795137006e-06, "loss": 0.0256, "step": 4811 }, { "epoch": 0.7099963113242346, "grad_norm": 2.242856502532959, "learning_rate": 4.699788127667517e-06, "loss": 0.0563, "step": 4812 }, { "epoch": 0.7101438583548506, "grad_norm": 2.6883840560913086, "learning_rate": 4.695420866490655e-06, "loss": 0.1042, "step": 4813 }, { "epoch": 0.7102914053854666, "grad_norm": 2.614682197570801, "learning_rate": 4.691055012765179e-06, "loss": 0.035, "step": 4814 }, { "epoch": 0.7104389524160827, "grad_norm": 4.825432300567627, "learning_rate": 4.686690567649461e-06, "loss": 0.0966, "step": 4815 }, { "epoch": 0.7105864994466986, "grad_norm": 2.8247244358062744, "learning_rate": 4.682327532301518e-06, "loss": 0.0735, "step": 4816 }, { "epoch": 0.7107340464773146, "grad_norm": 2.1444296836853027, "learning_rate": 4.677965907878976e-06, "loss": 0.0604, "step": 4817 }, { "epoch": 0.7108815935079307, "grad_norm": 0.9742994904518127, "learning_rate": 4.6736056955391e-06, "loss": 0.0223, "step": 4818 }, { "epoch": 0.7110291405385467, "grad_norm": 6.483880519866943, "learning_rate": 4.669246896438774e-06, "loss": 0.0939, "step": 4819 }, { "epoch": 0.7111766875691626, "grad_norm": 1.5703705549240112, "learning_rate": 4.664889511734509e-06, "loss": 0.0171, "step": 4820 }, { "epoch": 0.7111766875691626, "eval_accuracy": 0.9811866859623734, "eval_f1": 0.9679012345679012, "eval_loss": 0.05624791607260704, "eval_precision": 0.9849246231155779, "eval_recall": 0.9514563106796117, "eval_runtime": 48.9838, "eval_samples_per_second": 5.941, "eval_steps_per_second": 0.204, "step": 4820 }, { "epoch": 0.7113242345997787, "grad_norm": 1.573594093322754, "learning_rate": 4.660533542582427e-06, "loss": 0.0643, "step": 4821 }, { "epoch": 0.7114717816303947, "grad_norm": 1.3411532640457153, "learning_rate": 4.656178990138307e-06, "loss": 0.0131, "step": 4822 }, { "epoch": 0.7116193286610107, "grad_norm": 2.8468737602233887, "learning_rate": 4.651825855557522e-06, "loss": 0.041, "step": 4823 }, { "epoch": 0.7117668756916267, "grad_norm": 3.8967766761779785, "learning_rate": 4.647474139995075e-06, "loss": 0.051, "step": 4824 }, { "epoch": 0.7119144227222427, "grad_norm": 4.845344543457031, "learning_rate": 4.643123844605603e-06, "loss": 0.12, "step": 4825 }, { "epoch": 0.7120619697528587, "grad_norm": 2.19871187210083, "learning_rate": 4.638774970543356e-06, "loss": 0.0703, "step": 4826 }, { "epoch": 0.7122095167834748, "grad_norm": 2.758091688156128, "learning_rate": 4.634427518962209e-06, "loss": 0.0515, "step": 4827 }, { "epoch": 0.7123570638140907, "grad_norm": 3.0133373737335205, "learning_rate": 4.630081491015669e-06, "loss": 0.0366, "step": 4828 }, { "epoch": 0.7125046108447067, "grad_norm": 2.8721578121185303, "learning_rate": 4.625736887856848e-06, "loss": 0.0813, "step": 4829 }, { "epoch": 0.7126521578753228, "grad_norm": 2.369920253753662, "learning_rate": 4.621393710638496e-06, "loss": 0.0935, "step": 4830 }, { "epoch": 0.7127997049059388, "grad_norm": 1.4751105308532715, "learning_rate": 4.61705196051297e-06, "loss": 0.021, "step": 4831 }, { "epoch": 0.7129472519365547, "grad_norm": 1.4269704818725586, "learning_rate": 4.612711638632263e-06, "loss": 0.0262, "step": 4832 }, { "epoch": 0.7130947989671708, "grad_norm": 3.1407742500305176, "learning_rate": 4.608372746147983e-06, "loss": 0.0233, "step": 4833 }, { "epoch": 0.7132423459977868, "grad_norm": 2.9399356842041016, "learning_rate": 4.604035284211356e-06, "loss": 0.0394, "step": 4834 }, { "epoch": 0.7133898930284028, "grad_norm": 2.724482774734497, "learning_rate": 4.5996992539732255e-06, "loss": 0.0161, "step": 4835 }, { "epoch": 0.7135374400590188, "grad_norm": 2.8107852935791016, "learning_rate": 4.595364656584066e-06, "loss": 0.0552, "step": 4836 }, { "epoch": 0.7136849870896348, "grad_norm": 1.3565391302108765, "learning_rate": 4.591031493193967e-06, "loss": 0.0463, "step": 4837 }, { "epoch": 0.7138325341202508, "grad_norm": 1.0748212337493896, "learning_rate": 4.58669976495263e-06, "loss": 0.0237, "step": 4838 }, { "epoch": 0.7139800811508669, "grad_norm": 2.125095844268799, "learning_rate": 4.58236947300939e-06, "loss": 0.059, "step": 4839 }, { "epoch": 0.7141276281814829, "grad_norm": 4.943905830383301, "learning_rate": 4.5780406185131866e-06, "loss": 0.0388, "step": 4840 }, { "epoch": 0.7141276281814829, "eval_accuracy": 0.9811866859623734, "eval_f1": 0.9679012345679012, "eval_loss": 0.05590560659766197, "eval_precision": 0.9849246231155779, "eval_recall": 0.9514563106796117, "eval_runtime": 49.5376, "eval_samples_per_second": 5.874, "eval_steps_per_second": 0.202, "step": 4840 }, { "epoch": 0.7142751752120988, "grad_norm": 5.2195143699646, "learning_rate": 4.573713202612585e-06, "loss": 0.0585, "step": 4841 }, { "epoch": 0.7144227222427149, "grad_norm": 2.1044042110443115, "learning_rate": 4.569387226455776e-06, "loss": 0.0464, "step": 4842 }, { "epoch": 0.7145702692733309, "grad_norm": 3.341923236846924, "learning_rate": 4.565062691190549e-06, "loss": 0.0573, "step": 4843 }, { "epoch": 0.7147178163039469, "grad_norm": 2.710599899291992, "learning_rate": 4.560739597964334e-06, "loss": 0.0401, "step": 4844 }, { "epoch": 0.7148653633345629, "grad_norm": 1.703153371810913, "learning_rate": 4.556417947924158e-06, "loss": 0.0293, "step": 4845 }, { "epoch": 0.7150129103651789, "grad_norm": 1.2568097114562988, "learning_rate": 4.552097742216676e-06, "loss": 0.0319, "step": 4846 }, { "epoch": 0.7151604573957949, "grad_norm": 1.016923427581787, "learning_rate": 4.5477789819881645e-06, "loss": 0.0239, "step": 4847 }, { "epoch": 0.715308004426411, "grad_norm": 1.6680554151535034, "learning_rate": 4.543461668384505e-06, "loss": 0.0476, "step": 4848 }, { "epoch": 0.7154555514570269, "grad_norm": 2.5092504024505615, "learning_rate": 4.539145802551195e-06, "loss": 0.0806, "step": 4849 }, { "epoch": 0.7156030984876429, "grad_norm": 2.4317209720611572, "learning_rate": 4.5348313856333596e-06, "loss": 0.073, "step": 4850 }, { "epoch": 0.715750645518259, "grad_norm": 1.3609871864318848, "learning_rate": 4.530518418775734e-06, "loss": 0.0152, "step": 4851 }, { "epoch": 0.715898192548875, "grad_norm": 3.874760150909424, "learning_rate": 4.526206903122661e-06, "loss": 0.0327, "step": 4852 }, { "epoch": 0.7160457395794909, "grad_norm": 1.1470457315444946, "learning_rate": 4.521896839818112e-06, "loss": 0.035, "step": 4853 }, { "epoch": 0.716193286610107, "grad_norm": 2.0697035789489746, "learning_rate": 4.517588230005659e-06, "loss": 0.0278, "step": 4854 }, { "epoch": 0.716340833640723, "grad_norm": 2.5554862022399902, "learning_rate": 4.5132810748284975e-06, "loss": 0.0793, "step": 4855 }, { "epoch": 0.716488380671339, "grad_norm": 1.2551517486572266, "learning_rate": 4.5089753754294394e-06, "loss": 0.0272, "step": 4856 }, { "epoch": 0.716635927701955, "grad_norm": 8.998085975646973, "learning_rate": 4.5046711329509e-06, "loss": 0.028, "step": 4857 }, { "epoch": 0.716783474732571, "grad_norm": 1.6881085634231567, "learning_rate": 4.500368348534918e-06, "loss": 0.0705, "step": 4858 }, { "epoch": 0.716931021763187, "grad_norm": 2.618722677230835, "learning_rate": 4.496067023323137e-06, "loss": 0.0557, "step": 4859 }, { "epoch": 0.717078568793803, "grad_norm": 0.3381759524345398, "learning_rate": 4.491767158456817e-06, "loss": 0.0057, "step": 4860 }, { "epoch": 0.717078568793803, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.9601990049751243, "eval_loss": 0.05862228944897652, "eval_precision": 0.9846938775510204, "eval_recall": 0.9368932038834952, "eval_runtime": 49.7409, "eval_samples_per_second": 5.85, "eval_steps_per_second": 0.201, "step": 4860 }, { "epoch": 0.7172261158244191, "grad_norm": 2.75651216506958, "learning_rate": 4.487468755076837e-06, "loss": 0.0498, "step": 4861 }, { "epoch": 0.717373662855035, "grad_norm": 3.354355573654175, "learning_rate": 4.483171814323679e-06, "loss": 0.0943, "step": 4862 }, { "epoch": 0.717521209885651, "grad_norm": 3.9032034873962402, "learning_rate": 4.478876337337435e-06, "loss": 0.1044, "step": 4863 }, { "epoch": 0.7176687569162671, "grad_norm": 1.7555657625198364, "learning_rate": 4.474582325257818e-06, "loss": 0.0443, "step": 4864 }, { "epoch": 0.7178163039468831, "grad_norm": 3.1032423973083496, "learning_rate": 4.470289779224152e-06, "loss": 0.0742, "step": 4865 }, { "epoch": 0.717963850977499, "grad_norm": 2.7565388679504395, "learning_rate": 4.46599870037536e-06, "loss": 0.0395, "step": 4866 }, { "epoch": 0.7181113980081151, "grad_norm": 1.0605816841125488, "learning_rate": 4.461709089849993e-06, "loss": 0.0187, "step": 4867 }, { "epoch": 0.7182589450387311, "grad_norm": 2.2450878620147705, "learning_rate": 4.457420948786193e-06, "loss": 0.0306, "step": 4868 }, { "epoch": 0.7184064920693471, "grad_norm": 3.1171584129333496, "learning_rate": 4.453134278321727e-06, "loss": 0.0923, "step": 4869 }, { "epoch": 0.7185540390999631, "grad_norm": 2.8394124507904053, "learning_rate": 4.448849079593972e-06, "loss": 0.0305, "step": 4870 }, { "epoch": 0.7187015861305791, "grad_norm": 2.4069812297821045, "learning_rate": 4.4445653537399e-06, "loss": 0.0549, "step": 4871 }, { "epoch": 0.7188491331611951, "grad_norm": 3.646878242492676, "learning_rate": 4.440283101896113e-06, "loss": 0.0394, "step": 4872 }, { "epoch": 0.7189966801918112, "grad_norm": 1.8418575525283813, "learning_rate": 4.436002325198801e-06, "loss": 0.0738, "step": 4873 }, { "epoch": 0.7191442272224271, "grad_norm": 5.827348709106445, "learning_rate": 4.431723024783771e-06, "loss": 0.0814, "step": 4874 }, { "epoch": 0.7192917742530431, "grad_norm": 2.071404218673706, "learning_rate": 4.427445201786451e-06, "loss": 0.0459, "step": 4875 }, { "epoch": 0.7194393212836592, "grad_norm": 0.9559571743011475, "learning_rate": 4.423168857341859e-06, "loss": 0.0068, "step": 4876 }, { "epoch": 0.7195868683142752, "grad_norm": 5.078649044036865, "learning_rate": 4.418893992584624e-06, "loss": 0.086, "step": 4877 }, { "epoch": 0.7197344153448911, "grad_norm": 1.5742383003234863, "learning_rate": 4.414620608648989e-06, "loss": 0.0372, "step": 4878 }, { "epoch": 0.7198819623755072, "grad_norm": 2.6624889373779297, "learning_rate": 4.410348706668805e-06, "loss": 0.0686, "step": 4879 }, { "epoch": 0.7200295094061232, "grad_norm": 2.357564926147461, "learning_rate": 4.406078287777517e-06, "loss": 0.0507, "step": 4880 }, { "epoch": 0.7200295094061232, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9576059850374065, "eval_loss": 0.05922112986445427, "eval_precision": 0.9846153846153847, "eval_recall": 0.9320388349514563, "eval_runtime": 49.9173, "eval_samples_per_second": 5.83, "eval_steps_per_second": 0.2, "step": 4880 }, { "epoch": 0.7201770564367392, "grad_norm": 2.5022599697113037, "learning_rate": 4.401809353108194e-06, "loss": 0.0502, "step": 4881 }, { "epoch": 0.7203246034673553, "grad_norm": 2.529521942138672, "learning_rate": 4.397541903793493e-06, "loss": 0.074, "step": 4882 }, { "epoch": 0.7204721504979712, "grad_norm": 2.8516416549682617, "learning_rate": 4.393275940965692e-06, "loss": 0.0966, "step": 4883 }, { "epoch": 0.7206196975285872, "grad_norm": 3.647569179534912, "learning_rate": 4.389011465756672e-06, "loss": 0.0859, "step": 4884 }, { "epoch": 0.7207672445592033, "grad_norm": 2.2108426094055176, "learning_rate": 4.3847484792979065e-06, "loss": 0.077, "step": 4885 }, { "epoch": 0.7209147915898193, "grad_norm": 4.508794784545898, "learning_rate": 4.380486982720494e-06, "loss": 0.0146, "step": 4886 }, { "epoch": 0.7210623386204352, "grad_norm": 1.7417070865631104, "learning_rate": 4.376226977155118e-06, "loss": 0.0699, "step": 4887 }, { "epoch": 0.7212098856510513, "grad_norm": 3.349015951156616, "learning_rate": 4.37196846373208e-06, "loss": 0.0757, "step": 4888 }, { "epoch": 0.7213574326816673, "grad_norm": 5.347111225128174, "learning_rate": 4.367711443581286e-06, "loss": 0.1619, "step": 4889 }, { "epoch": 0.7215049797122833, "grad_norm": 1.6683688163757324, "learning_rate": 4.363455917832231e-06, "loss": 0.0316, "step": 4890 }, { "epoch": 0.7216525267428993, "grad_norm": 3.199214220046997, "learning_rate": 4.359201887614034e-06, "loss": 0.0764, "step": 4891 }, { "epoch": 0.7218000737735153, "grad_norm": 2.8147830963134766, "learning_rate": 4.3549493540554e-06, "loss": 0.0564, "step": 4892 }, { "epoch": 0.7219476208041313, "grad_norm": 2.7976462841033936, "learning_rate": 4.350698318284638e-06, "loss": 0.0635, "step": 4893 }, { "epoch": 0.7220951678347474, "grad_norm": 2.589905261993408, "learning_rate": 4.346448781429681e-06, "loss": 0.046, "step": 4894 }, { "epoch": 0.7222427148653633, "grad_norm": 3.977156400680542, "learning_rate": 4.342200744618038e-06, "loss": 0.0791, "step": 4895 }, { "epoch": 0.7223902618959793, "grad_norm": 2.2973368167877197, "learning_rate": 4.33795420897683e-06, "loss": 0.0559, "step": 4896 }, { "epoch": 0.7225378089265954, "grad_norm": 2.4549851417541504, "learning_rate": 4.333709175632782e-06, "loss": 0.0492, "step": 4897 }, { "epoch": 0.7226853559572114, "grad_norm": 1.2905851602554321, "learning_rate": 4.329465645712223e-06, "loss": 0.0361, "step": 4898 }, { "epoch": 0.7228329029878273, "grad_norm": 1.1179414987564087, "learning_rate": 4.325223620341072e-06, "loss": 0.018, "step": 4899 }, { "epoch": 0.7229804500184434, "grad_norm": 3.536717653274536, "learning_rate": 4.320983100644861e-06, "loss": 0.0478, "step": 4900 }, { "epoch": 0.7229804500184434, "eval_accuracy": 0.9811866859623734, "eval_f1": 0.9679012345679012, "eval_loss": 0.056641675531864166, "eval_precision": 0.9849246231155779, "eval_recall": 0.9514563106796117, "eval_runtime": 50.1675, "eval_samples_per_second": 5.801, "eval_steps_per_second": 0.199, "step": 4900 }, { "epoch": 0.7231279970490594, "grad_norm": 1.9635512828826904, "learning_rate": 4.316744087748711e-06, "loss": 0.0454, "step": 4901 }, { "epoch": 0.7232755440796754, "grad_norm": 1.4981812238693237, "learning_rate": 4.312506582777354e-06, "loss": 0.0163, "step": 4902 }, { "epoch": 0.7234230911102915, "grad_norm": 1.09555983543396, "learning_rate": 4.308270586855118e-06, "loss": 0.0179, "step": 4903 }, { "epoch": 0.7235706381409074, "grad_norm": 2.8694074153900146, "learning_rate": 4.304036101105924e-06, "loss": 0.0578, "step": 4904 }, { "epoch": 0.7237181851715234, "grad_norm": 3.82897686958313, "learning_rate": 4.299803126653306e-06, "loss": 0.0699, "step": 4905 }, { "epoch": 0.7238657322021395, "grad_norm": 3.1547136306762695, "learning_rate": 4.295571664620384e-06, "loss": 0.112, "step": 4906 }, { "epoch": 0.7240132792327555, "grad_norm": 2.945875406265259, "learning_rate": 4.291341716129876e-06, "loss": 0.0526, "step": 4907 }, { "epoch": 0.7241608262633714, "grad_norm": 1.0003485679626465, "learning_rate": 4.287113282304116e-06, "loss": 0.031, "step": 4908 }, { "epoch": 0.7243083732939875, "grad_norm": 3.5373058319091797, "learning_rate": 4.28288636426502e-06, "loss": 0.1333, "step": 4909 }, { "epoch": 0.7244559203246035, "grad_norm": 2.889504909515381, "learning_rate": 4.278660963134102e-06, "loss": 0.0721, "step": 4910 }, { "epoch": 0.7246034673552195, "grad_norm": 1.1867845058441162, "learning_rate": 4.274437080032483e-06, "loss": 0.0205, "step": 4911 }, { "epoch": 0.7247510143858354, "grad_norm": 3.941420316696167, "learning_rate": 4.270214716080872e-06, "loss": 0.1031, "step": 4912 }, { "epoch": 0.7248985614164515, "grad_norm": 3.12939715385437, "learning_rate": 4.265993872399579e-06, "loss": 0.0461, "step": 4913 }, { "epoch": 0.7250461084470675, "grad_norm": 4.692959785461426, "learning_rate": 4.261774550108515e-06, "loss": 0.0471, "step": 4914 }, { "epoch": 0.7251936554776836, "grad_norm": 4.04848051071167, "learning_rate": 4.257556750327176e-06, "loss": 0.0424, "step": 4915 }, { "epoch": 0.7253412025082995, "grad_norm": 2.4679744243621826, "learning_rate": 4.253340474174665e-06, "loss": 0.0606, "step": 4916 }, { "epoch": 0.7254887495389155, "grad_norm": 2.272331476211548, "learning_rate": 4.249125722769679e-06, "loss": 0.0252, "step": 4917 }, { "epoch": 0.7256362965695315, "grad_norm": 3.518855094909668, "learning_rate": 4.244912497230501e-06, "loss": 0.0453, "step": 4918 }, { "epoch": 0.7257838436001476, "grad_norm": 1.4808404445648193, "learning_rate": 4.240700798675024e-06, "loss": 0.0284, "step": 4919 }, { "epoch": 0.7259313906307635, "grad_norm": 2.8190481662750244, "learning_rate": 4.236490628220724e-06, "loss": 0.0949, "step": 4920 }, { "epoch": 0.7259313906307635, "eval_accuracy": 0.9811866859623734, "eval_f1": 0.9679012345679012, "eval_loss": 0.05497198924422264, "eval_precision": 0.9849246231155779, "eval_recall": 0.9514563106796117, "eval_runtime": 50.1118, "eval_samples_per_second": 5.807, "eval_steps_per_second": 0.2, "step": 4920 }, { "epoch": 0.7260789376613795, "grad_norm": 2.051560878753662, "learning_rate": 4.232281986984668e-06, "loss": 0.025, "step": 4921 }, { "epoch": 0.7262264846919956, "grad_norm": 3.728090286254883, "learning_rate": 4.22807487608354e-06, "loss": 0.1165, "step": 4922 }, { "epoch": 0.7263740317226116, "grad_norm": 1.673712968826294, "learning_rate": 4.223869296633595e-06, "loss": 0.0448, "step": 4923 }, { "epoch": 0.7265215787532275, "grad_norm": 5.206331729888916, "learning_rate": 4.2196652497506856e-06, "loss": 0.112, "step": 4924 }, { "epoch": 0.7266691257838436, "grad_norm": 5.384866237640381, "learning_rate": 4.215462736550267e-06, "loss": 0.0636, "step": 4925 }, { "epoch": 0.7268166728144596, "grad_norm": 4.246710777282715, "learning_rate": 4.211261758147379e-06, "loss": 0.0769, "step": 4926 }, { "epoch": 0.7269642198450756, "grad_norm": 2.2620885372161865, "learning_rate": 4.2070623156566594e-06, "loss": 0.0385, "step": 4927 }, { "epoch": 0.7271117668756917, "grad_norm": 2.2140722274780273, "learning_rate": 4.202864410192338e-06, "loss": 0.0643, "step": 4928 }, { "epoch": 0.7272593139063076, "grad_norm": 3.6797232627868652, "learning_rate": 4.198668042868228e-06, "loss": 0.0907, "step": 4929 }, { "epoch": 0.7274068609369236, "grad_norm": 2.7158584594726562, "learning_rate": 4.194473214797751e-06, "loss": 0.0674, "step": 4930 }, { "epoch": 0.7275544079675397, "grad_norm": 3.0263805389404297, "learning_rate": 4.190279927093902e-06, "loss": 0.0823, "step": 4931 }, { "epoch": 0.7277019549981557, "grad_norm": 2.7703890800476074, "learning_rate": 4.18608818086928e-06, "loss": 0.0785, "step": 4932 }, { "epoch": 0.7278495020287716, "grad_norm": 4.104133605957031, "learning_rate": 4.181897977236076e-06, "loss": 0.0887, "step": 4933 }, { "epoch": 0.7279970490593877, "grad_norm": 1.7006194591522217, "learning_rate": 4.17770931730606e-06, "loss": 0.047, "step": 4934 }, { "epoch": 0.7281445960900037, "grad_norm": 1.969412088394165, "learning_rate": 4.173522202190595e-06, "loss": 0.0189, "step": 4935 }, { "epoch": 0.7282921431206197, "grad_norm": 1.2648470401763916, "learning_rate": 4.169336633000652e-06, "loss": 0.0314, "step": 4936 }, { "epoch": 0.7284396901512357, "grad_norm": 1.2264314889907837, "learning_rate": 4.165152610846769e-06, "loss": 0.0369, "step": 4937 }, { "epoch": 0.7285872371818517, "grad_norm": 3.0328118801116943, "learning_rate": 4.160970136839081e-06, "loss": 0.0754, "step": 4938 }, { "epoch": 0.7287347842124677, "grad_norm": 2.6485111713409424, "learning_rate": 4.156789212087321e-06, "loss": 0.0765, "step": 4939 }, { "epoch": 0.7288823312430838, "grad_norm": 2.0226709842681885, "learning_rate": 4.152609837700796e-06, "loss": 0.0567, "step": 4940 }, { "epoch": 0.7288823312430838, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05580342188477516, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 51.094, "eval_samples_per_second": 5.695, "eval_steps_per_second": 0.196, "step": 4940 }, { "epoch": 0.7290298782736997, "grad_norm": 2.157094717025757, "learning_rate": 4.1484320147884134e-06, "loss": 0.0675, "step": 4941 }, { "epoch": 0.7291774253043157, "grad_norm": 2.5645432472229004, "learning_rate": 4.14425574445867e-06, "loss": 0.0709, "step": 4942 }, { "epoch": 0.7293249723349318, "grad_norm": 1.4542399644851685, "learning_rate": 4.140081027819636e-06, "loss": 0.0175, "step": 4943 }, { "epoch": 0.7294725193655478, "grad_norm": 3.29392409324646, "learning_rate": 4.135907865978987e-06, "loss": 0.1216, "step": 4944 }, { "epoch": 0.7296200663961637, "grad_norm": 3.0780889987945557, "learning_rate": 4.131736260043972e-06, "loss": 0.0888, "step": 4945 }, { "epoch": 0.7297676134267798, "grad_norm": 2.905585527420044, "learning_rate": 4.127566211121435e-06, "loss": 0.0204, "step": 4946 }, { "epoch": 0.7299151604573958, "grad_norm": 3.287302017211914, "learning_rate": 4.12339772031781e-06, "loss": 0.0848, "step": 4947 }, { "epoch": 0.7300627074880118, "grad_norm": 3.854551315307617, "learning_rate": 4.1192307887391105e-06, "loss": 0.1238, "step": 4948 }, { "epoch": 0.7302102545186279, "grad_norm": 1.3358299732208252, "learning_rate": 4.115065417490932e-06, "loss": 0.0332, "step": 4949 }, { "epoch": 0.7303578015492438, "grad_norm": 1.9069033861160278, "learning_rate": 4.1109016076784695e-06, "loss": 0.0675, "step": 4950 }, { "epoch": 0.7305053485798598, "grad_norm": 2.7234113216400146, "learning_rate": 4.106739360406497e-06, "loss": 0.0688, "step": 4951 }, { "epoch": 0.7306528956104759, "grad_norm": 1.232861876487732, "learning_rate": 4.102578676779368e-06, "loss": 0.0497, "step": 4952 }, { "epoch": 0.7308004426410919, "grad_norm": 1.5629462003707886, "learning_rate": 4.098419557901036e-06, "loss": 0.0299, "step": 4953 }, { "epoch": 0.7309479896717078, "grad_norm": 2.002155065536499, "learning_rate": 4.0942620048750195e-06, "loss": 0.0352, "step": 4954 }, { "epoch": 0.7310955367023239, "grad_norm": 1.2821314334869385, "learning_rate": 4.090106018804437e-06, "loss": 0.03, "step": 4955 }, { "epoch": 0.7312430837329399, "grad_norm": 1.617584466934204, "learning_rate": 4.085951600791991e-06, "loss": 0.0275, "step": 4956 }, { "epoch": 0.7313906307635559, "grad_norm": 1.2304725646972656, "learning_rate": 4.081798751939955e-06, "loss": 0.0173, "step": 4957 }, { "epoch": 0.7315381777941719, "grad_norm": 2.5429775714874268, "learning_rate": 4.077647473350201e-06, "loss": 0.0989, "step": 4958 }, { "epoch": 0.7316857248247879, "grad_norm": 4.065185546875, "learning_rate": 4.073497766124174e-06, "loss": 0.0731, "step": 4959 }, { "epoch": 0.7318332718554039, "grad_norm": 1.9886144399642944, "learning_rate": 4.0693496313629065e-06, "loss": 0.025, "step": 4960 }, { "epoch": 0.7318332718554039, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.9603960396039604, "eval_loss": 0.05799402296543121, "eval_precision": 0.9797979797979798, "eval_recall": 0.941747572815534, "eval_runtime": 50.9829, "eval_samples_per_second": 5.708, "eval_steps_per_second": 0.196, "step": 4960 }, { "epoch": 0.73198081888602, "grad_norm": 3.3338429927825928, "learning_rate": 4.065203070167018e-06, "loss": 0.0824, "step": 4961 }, { "epoch": 0.7321283659166359, "grad_norm": 1.6041611433029175, "learning_rate": 4.061058083636702e-06, "loss": 0.051, "step": 4962 }, { "epoch": 0.7322759129472519, "grad_norm": 2.8508100509643555, "learning_rate": 4.056914672871733e-06, "loss": 0.1187, "step": 4963 }, { "epoch": 0.732423459977868, "grad_norm": 1.3557360172271729, "learning_rate": 4.0527728389714785e-06, "loss": 0.0334, "step": 4964 }, { "epoch": 0.732571007008484, "grad_norm": 0.5890713334083557, "learning_rate": 4.048632583034884e-06, "loss": 0.0081, "step": 4965 }, { "epoch": 0.7327185540390999, "grad_norm": 1.1613587141036987, "learning_rate": 4.044493906160465e-06, "loss": 0.0189, "step": 4966 }, { "epoch": 0.732866101069716, "grad_norm": 1.4688493013381958, "learning_rate": 4.040356809446335e-06, "loss": 0.0255, "step": 4967 }, { "epoch": 0.733013648100332, "grad_norm": 2.065842628479004, "learning_rate": 4.036221293990174e-06, "loss": 0.0394, "step": 4968 }, { "epoch": 0.733161195130948, "grad_norm": 2.254481315612793, "learning_rate": 4.032087360889252e-06, "loss": 0.0512, "step": 4969 }, { "epoch": 0.733308742161564, "grad_norm": 1.6552678346633911, "learning_rate": 4.027955011240417e-06, "loss": 0.0329, "step": 4970 }, { "epoch": 0.73345628919218, "grad_norm": 1.8482474088668823, "learning_rate": 4.023824246140091e-06, "loss": 0.0573, "step": 4971 }, { "epoch": 0.733603836222796, "grad_norm": 2.3554043769836426, "learning_rate": 4.019695066684285e-06, "loss": 0.081, "step": 4972 }, { "epoch": 0.733751383253412, "grad_norm": 2.734476327896118, "learning_rate": 4.015567473968579e-06, "loss": 0.0691, "step": 4973 }, { "epoch": 0.7338989302840281, "grad_norm": 3.21683669090271, "learning_rate": 4.0114414690881395e-06, "loss": 0.081, "step": 4974 }, { "epoch": 0.734046477314644, "grad_norm": 4.958075046539307, "learning_rate": 4.0073170531377145e-06, "loss": 0.0705, "step": 4975 }, { "epoch": 0.73419402434526, "grad_norm": 3.259782314300537, "learning_rate": 4.003194227211622e-06, "loss": 0.1247, "step": 4976 }, { "epoch": 0.7343415713758761, "grad_norm": 2.7615573406219482, "learning_rate": 3.999072992403757e-06, "loss": 0.0368, "step": 4977 }, { "epoch": 0.7344891184064921, "grad_norm": 1.0847185850143433, "learning_rate": 3.994953349807598e-06, "loss": 0.0235, "step": 4978 }, { "epoch": 0.734636665437108, "grad_norm": 2.452235460281372, "learning_rate": 3.99083530051621e-06, "loss": 0.0221, "step": 4979 }, { "epoch": 0.7347842124677241, "grad_norm": 2.13835072517395, "learning_rate": 3.9867188456222126e-06, "loss": 0.0488, "step": 4980 }, { "epoch": 0.7347842124677241, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.96, "eval_loss": 0.06122511625289917, "eval_precision": 0.9896907216494846, "eval_recall": 0.9320388349514563, "eval_runtime": 49.6079, "eval_samples_per_second": 5.866, "eval_steps_per_second": 0.202, "step": 4980 }, { "epoch": 0.7349317594983401, "grad_norm": 5.491005897521973, "learning_rate": 3.982603986217826e-06, "loss": 0.0402, "step": 4981 }, { "epoch": 0.7350793065289561, "grad_norm": 3.362074375152588, "learning_rate": 3.9784907233948245e-06, "loss": 0.0878, "step": 4982 }, { "epoch": 0.7352268535595721, "grad_norm": 5.016378879547119, "learning_rate": 3.974379058244577e-06, "loss": 0.0406, "step": 4983 }, { "epoch": 0.7353744005901881, "grad_norm": 3.493664503097534, "learning_rate": 3.970268991858026e-06, "loss": 0.1325, "step": 4984 }, { "epoch": 0.7355219476208041, "grad_norm": 1.981062412261963, "learning_rate": 3.966160525325677e-06, "loss": 0.0528, "step": 4985 }, { "epoch": 0.7356694946514202, "grad_norm": 5.002155780792236, "learning_rate": 3.962053659737625e-06, "loss": 0.0476, "step": 4986 }, { "epoch": 0.7358170416820361, "grad_norm": 1.775918960571289, "learning_rate": 3.957948396183536e-06, "loss": 0.0418, "step": 4987 }, { "epoch": 0.7359645887126521, "grad_norm": 2.9521892070770264, "learning_rate": 3.95384473575264e-06, "loss": 0.0787, "step": 4988 }, { "epoch": 0.7361121357432682, "grad_norm": 6.41652774810791, "learning_rate": 3.9497426795337646e-06, "loss": 0.0715, "step": 4989 }, { "epoch": 0.7362596827738842, "grad_norm": 3.5608973503112793, "learning_rate": 3.945642228615293e-06, "loss": 0.0684, "step": 4990 }, { "epoch": 0.7364072298045001, "grad_norm": 2.6047325134277344, "learning_rate": 3.9415433840851845e-06, "loss": 0.0385, "step": 4991 }, { "epoch": 0.7365547768351162, "grad_norm": 2.0453879833221436, "learning_rate": 3.93744614703098e-06, "loss": 0.0508, "step": 4992 }, { "epoch": 0.7367023238657322, "grad_norm": 3.014782428741455, "learning_rate": 3.933350518539791e-06, "loss": 0.029, "step": 4993 }, { "epoch": 0.7368498708963482, "grad_norm": 1.8775115013122559, "learning_rate": 3.929256499698296e-06, "loss": 0.0595, "step": 4994 }, { "epoch": 0.7369974179269643, "grad_norm": 3.319164991378784, "learning_rate": 3.925164091592758e-06, "loss": 0.069, "step": 4995 }, { "epoch": 0.7371449649575802, "grad_norm": 2.6777350902557373, "learning_rate": 3.921073295308998e-06, "loss": 0.0803, "step": 4996 }, { "epoch": 0.7372925119881962, "grad_norm": 1.0832637548446655, "learning_rate": 3.91698411193242e-06, "loss": 0.0282, "step": 4997 }, { "epoch": 0.7374400590188123, "grad_norm": 2.4308724403381348, "learning_rate": 3.912896542548004e-06, "loss": 0.032, "step": 4998 }, { "epoch": 0.7375876060494283, "grad_norm": 3.247943162918091, "learning_rate": 3.908810588240285e-06, "loss": 0.1269, "step": 4999 }, { "epoch": 0.7377351530800442, "grad_norm": 1.6741664409637451, "learning_rate": 3.904726250093388e-06, "loss": 0.0313, "step": 5000 }, { "epoch": 0.7377351530800442, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9655172413793104, "eval_loss": 0.05591617152094841, "eval_precision": 0.98, "eval_recall": 0.9514563106796117, "eval_runtime": 50.6909, "eval_samples_per_second": 5.741, "eval_steps_per_second": 0.197, "step": 5000 }, { "epoch": 0.7378827001106603, "grad_norm": 1.8698846101760864, "learning_rate": 3.900643529190996e-06, "loss": 0.0383, "step": 5001 }, { "epoch": 0.7380302471412763, "grad_norm": 1.9142612218856812, "learning_rate": 3.896562426616363e-06, "loss": 0.0399, "step": 5002 }, { "epoch": 0.7381777941718923, "grad_norm": 2.3804843425750732, "learning_rate": 3.892482943452329e-06, "loss": 0.0589, "step": 5003 }, { "epoch": 0.7383253412025083, "grad_norm": 3.9642651081085205, "learning_rate": 3.888405080781289e-06, "loss": 0.0608, "step": 5004 }, { "epoch": 0.7384728882331243, "grad_norm": 1.6783424615859985, "learning_rate": 3.884328839685209e-06, "loss": 0.0297, "step": 5005 }, { "epoch": 0.7386204352637403, "grad_norm": 2.49522066116333, "learning_rate": 3.880254221245635e-06, "loss": 0.0955, "step": 5006 }, { "epoch": 0.7387679822943564, "grad_norm": 2.9898521900177, "learning_rate": 3.876181226543669e-06, "loss": 0.0392, "step": 5007 }, { "epoch": 0.7389155293249723, "grad_norm": 3.909217357635498, "learning_rate": 3.87210985665999e-06, "loss": 0.0569, "step": 5008 }, { "epoch": 0.7390630763555883, "grad_norm": 2.383328676223755, "learning_rate": 3.868040112674852e-06, "loss": 0.0678, "step": 5009 }, { "epoch": 0.7392106233862044, "grad_norm": 2.1897435188293457, "learning_rate": 3.8639719956680624e-06, "loss": 0.0296, "step": 5010 }, { "epoch": 0.7393581704168204, "grad_norm": 1.9390332698822021, "learning_rate": 3.859905506719007e-06, "loss": 0.031, "step": 5011 }, { "epoch": 0.7395057174474363, "grad_norm": 4.121059894561768, "learning_rate": 3.855840646906644e-06, "loss": 0.0518, "step": 5012 }, { "epoch": 0.7396532644780524, "grad_norm": 2.5393035411834717, "learning_rate": 3.851777417309484e-06, "loss": 0.0801, "step": 5013 }, { "epoch": 0.7398008115086684, "grad_norm": 2.933671474456787, "learning_rate": 3.8477158190056196e-06, "loss": 0.0276, "step": 5014 }, { "epoch": 0.7399483585392844, "grad_norm": 0.9197447299957275, "learning_rate": 3.843655853072702e-06, "loss": 0.0143, "step": 5015 }, { "epoch": 0.7400959055699005, "grad_norm": 1.2443636655807495, "learning_rate": 3.839597520587952e-06, "loss": 0.0233, "step": 5016 }, { "epoch": 0.7402434526005164, "grad_norm": 4.511955261230469, "learning_rate": 3.835540822628163e-06, "loss": 0.0593, "step": 5017 }, { "epoch": 0.7403909996311324, "grad_norm": 3.8662686347961426, "learning_rate": 3.831485760269682e-06, "loss": 0.053, "step": 5018 }, { "epoch": 0.7405385466617485, "grad_norm": 2.3428421020507812, "learning_rate": 3.827432334588436e-06, "loss": 0.0393, "step": 5019 }, { "epoch": 0.7406860936923645, "grad_norm": 14.282426834106445, "learning_rate": 3.8233805466599065e-06, "loss": 0.0552, "step": 5020 }, { "epoch": 0.7406860936923645, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.9601990049751243, "eval_loss": 0.0588710755109787, "eval_precision": 0.9846938775510204, "eval_recall": 0.9368932038834952, "eval_runtime": 48.7662, "eval_samples_per_second": 5.967, "eval_steps_per_second": 0.205, "step": 5020 }, { "epoch": 0.7408336407229804, "grad_norm": 0.8560268878936768, "learning_rate": 3.819330397559137e-06, "loss": 0.0171, "step": 5021 }, { "epoch": 0.7409811877535964, "grad_norm": 1.0845798254013062, "learning_rate": 3.81528188836076e-06, "loss": 0.0207, "step": 5022 }, { "epoch": 0.7411287347842125, "grad_norm": 2.9367661476135254, "learning_rate": 3.811235020138949e-06, "loss": 0.0875, "step": 5023 }, { "epoch": 0.7412762818148285, "grad_norm": 3.378262996673584, "learning_rate": 3.807189793967446e-06, "loss": 0.0669, "step": 5024 }, { "epoch": 0.7414238288454444, "grad_norm": 6.66485595703125, "learning_rate": 3.8031462109195692e-06, "loss": 0.0725, "step": 5025 }, { "epoch": 0.7415713758760605, "grad_norm": 1.393086552619934, "learning_rate": 3.799104272068185e-06, "loss": 0.0204, "step": 5026 }, { "epoch": 0.7417189229066765, "grad_norm": 3.0999999046325684, "learning_rate": 3.7950639784857347e-06, "loss": 0.0569, "step": 5027 }, { "epoch": 0.7418664699372925, "grad_norm": 2.4492030143737793, "learning_rate": 3.7910253312442245e-06, "loss": 0.0946, "step": 5028 }, { "epoch": 0.7420140169679085, "grad_norm": 2.5732202529907227, "learning_rate": 3.7869883314152114e-06, "loss": 0.0242, "step": 5029 }, { "epoch": 0.7421615639985245, "grad_norm": 6.5129594802856445, "learning_rate": 3.7829529800698295e-06, "loss": 0.0963, "step": 5030 }, { "epoch": 0.7423091110291405, "grad_norm": 3.6957192420959473, "learning_rate": 3.778919278278762e-06, "loss": 0.077, "step": 5031 }, { "epoch": 0.7424566580597566, "grad_norm": 4.02890682220459, "learning_rate": 3.774887227112266e-06, "loss": 0.0765, "step": 5032 }, { "epoch": 0.7426042050903725, "grad_norm": 2.6238794326782227, "learning_rate": 3.770856827640159e-06, "loss": 0.0528, "step": 5033 }, { "epoch": 0.7427517521209885, "grad_norm": 3.193618059158325, "learning_rate": 3.7668280809318136e-06, "loss": 0.0609, "step": 5034 }, { "epoch": 0.7428992991516046, "grad_norm": 2.332149028778076, "learning_rate": 3.7628009880561612e-06, "loss": 0.1053, "step": 5035 }, { "epoch": 0.7430468461822206, "grad_norm": 3.265127658843994, "learning_rate": 3.7587755500817136e-06, "loss": 0.0664, "step": 5036 }, { "epoch": 0.7431943932128366, "grad_norm": 1.7198021411895752, "learning_rate": 3.7547517680765243e-06, "loss": 0.0359, "step": 5037 }, { "epoch": 0.7433419402434526, "grad_norm": 1.2974985837936401, "learning_rate": 3.750729643108212e-06, "loss": 0.0113, "step": 5038 }, { "epoch": 0.7434894872740686, "grad_norm": 2.256070852279663, "learning_rate": 3.746709176243961e-06, "loss": 0.0417, "step": 5039 }, { "epoch": 0.7436370343046846, "grad_norm": 2.7929069995880127, "learning_rate": 3.742690368550509e-06, "loss": 0.0579, "step": 5040 }, { "epoch": 0.7436370343046846, "eval_accuracy": 0.9826338639652678, "eval_f1": 0.9702970297029703, "eval_loss": 0.055557604879140854, "eval_precision": 0.98989898989899, "eval_recall": 0.9514563106796117, "eval_runtime": 49.1896, "eval_samples_per_second": 5.916, "eval_steps_per_second": 0.203, "step": 5040 }, { "epoch": 0.7437845813353007, "grad_norm": 2.1392483711242676, "learning_rate": 3.7386732210941566e-06, "loss": 0.0368, "step": 5041 }, { "epoch": 0.7439321283659166, "grad_norm": 3.4329309463500977, "learning_rate": 3.734657734940771e-06, "loss": 0.0703, "step": 5042 }, { "epoch": 0.7440796753965326, "grad_norm": 2.0319983959198, "learning_rate": 3.7306439111557622e-06, "loss": 0.0539, "step": 5043 }, { "epoch": 0.7442272224271487, "grad_norm": 1.8808428049087524, "learning_rate": 3.726631750804116e-06, "loss": 0.0394, "step": 5044 }, { "epoch": 0.7443747694577647, "grad_norm": 1.6444365978240967, "learning_rate": 3.7226212549503637e-06, "loss": 0.0496, "step": 5045 }, { "epoch": 0.7445223164883806, "grad_norm": 2.433212995529175, "learning_rate": 3.718612424658602e-06, "loss": 0.0189, "step": 5046 }, { "epoch": 0.7446698635189967, "grad_norm": 2.4419362545013428, "learning_rate": 3.714605260992489e-06, "loss": 0.0453, "step": 5047 }, { "epoch": 0.7448174105496127, "grad_norm": 2.760617733001709, "learning_rate": 3.7105997650152326e-06, "loss": 0.0597, "step": 5048 }, { "epoch": 0.7449649575802287, "grad_norm": 2.417325973510742, "learning_rate": 3.7065959377895975e-06, "loss": 0.0807, "step": 5049 }, { "epoch": 0.7451125046108447, "grad_norm": 2.111337423324585, "learning_rate": 3.7025937803779146e-06, "loss": 0.0377, "step": 5050 }, { "epoch": 0.7452600516414607, "grad_norm": 3.3178672790527344, "learning_rate": 3.698593293842068e-06, "loss": 0.0656, "step": 5051 }, { "epoch": 0.7454075986720767, "grad_norm": 2.8166885375976562, "learning_rate": 3.6945944792434908e-06, "loss": 0.0554, "step": 5052 }, { "epoch": 0.7455551457026928, "grad_norm": 2.8551924228668213, "learning_rate": 3.690597337643188e-06, "loss": 0.0957, "step": 5053 }, { "epoch": 0.7457026927333087, "grad_norm": 0.8701147437095642, "learning_rate": 3.6866018701017026e-06, "loss": 0.01, "step": 5054 }, { "epoch": 0.7458502397639247, "grad_norm": 1.3501099348068237, "learning_rate": 3.682608077679146e-06, "loss": 0.0307, "step": 5055 }, { "epoch": 0.7459977867945408, "grad_norm": 5.349863052368164, "learning_rate": 3.678615961435187e-06, "loss": 0.0943, "step": 5056 }, { "epoch": 0.7461453338251568, "grad_norm": 1.8712959289550781, "learning_rate": 3.674625522429036e-06, "loss": 0.0298, "step": 5057 }, { "epoch": 0.7462928808557727, "grad_norm": 7.770257949829102, "learning_rate": 3.670636761719474e-06, "loss": 0.1044, "step": 5058 }, { "epoch": 0.7464404278863888, "grad_norm": 2.9371540546417236, "learning_rate": 3.666649680364822e-06, "loss": 0.0807, "step": 5059 }, { "epoch": 0.7465879749170048, "grad_norm": 2.7339513301849365, "learning_rate": 3.662664279422966e-06, "loss": 0.0704, "step": 5060 }, { "epoch": 0.7465879749170048, "eval_accuracy": 0.9811866859623734, "eval_f1": 0.967741935483871, "eval_loss": 0.05509909987449646, "eval_precision": 0.9898477157360406, "eval_recall": 0.9466019417475728, "eval_runtime": 49.0151, "eval_samples_per_second": 5.937, "eval_steps_per_second": 0.204, "step": 5060 }, { "epoch": 0.7467355219476208, "grad_norm": 3.5106041431427, "learning_rate": 3.6586805599513477e-06, "loss": 0.0763, "step": 5061 }, { "epoch": 0.7468830689782369, "grad_norm": 1.7270084619522095, "learning_rate": 3.6546985230069543e-06, "loss": 0.0419, "step": 5062 }, { "epoch": 0.7470306160088528, "grad_norm": 2.038734197616577, "learning_rate": 3.6507181696463257e-06, "loss": 0.0303, "step": 5063 }, { "epoch": 0.7471781630394688, "grad_norm": 2.03090500831604, "learning_rate": 3.6467395009255634e-06, "loss": 0.056, "step": 5064 }, { "epoch": 0.7473257100700849, "grad_norm": 2.465895652770996, "learning_rate": 3.6427625179003223e-06, "loss": 0.0762, "step": 5065 }, { "epoch": 0.7474732571007009, "grad_norm": 3.926563024520874, "learning_rate": 3.638787221625798e-06, "loss": 0.0405, "step": 5066 }, { "epoch": 0.7476208041313168, "grad_norm": 1.244516372680664, "learning_rate": 3.6348136131567537e-06, "loss": 0.0153, "step": 5067 }, { "epoch": 0.7477683511619329, "grad_norm": 1.2845311164855957, "learning_rate": 3.630841693547491e-06, "loss": 0.0511, "step": 5068 }, { "epoch": 0.7479158981925489, "grad_norm": 5.366288661956787, "learning_rate": 3.626871463851873e-06, "loss": 0.0841, "step": 5069 }, { "epoch": 0.7480634452231649, "grad_norm": 2.0090479850769043, "learning_rate": 3.622902925123314e-06, "loss": 0.0874, "step": 5070 }, { "epoch": 0.7482109922537808, "grad_norm": 2.04909348487854, "learning_rate": 3.618936078414772e-06, "loss": 0.0245, "step": 5071 }, { "epoch": 0.7483585392843969, "grad_norm": 3.025183916091919, "learning_rate": 3.6149709247787656e-06, "loss": 0.0509, "step": 5072 }, { "epoch": 0.7485060863150129, "grad_norm": 3.0461366176605225, "learning_rate": 3.611007465267354e-06, "loss": 0.0822, "step": 5073 }, { "epoch": 0.748653633345629, "grad_norm": 1.8610765933990479, "learning_rate": 3.607045700932157e-06, "loss": 0.0331, "step": 5074 }, { "epoch": 0.7488011803762449, "grad_norm": 3.327871561050415, "learning_rate": 3.603085632824341e-06, "loss": 0.0886, "step": 5075 }, { "epoch": 0.7489487274068609, "grad_norm": 1.6442581415176392, "learning_rate": 3.5991272619946205e-06, "loss": 0.0356, "step": 5076 }, { "epoch": 0.749096274437477, "grad_norm": 2.01371693611145, "learning_rate": 3.5951705894932555e-06, "loss": 0.0665, "step": 5077 }, { "epoch": 0.749243821468093, "grad_norm": 4.2385053634643555, "learning_rate": 3.591215616370064e-06, "loss": 0.049, "step": 5078 }, { "epoch": 0.7493913684987089, "grad_norm": 4.80850887298584, "learning_rate": 3.587262343674414e-06, "loss": 0.0274, "step": 5079 }, { "epoch": 0.749538915529325, "grad_norm": 8.128585815429688, "learning_rate": 3.583310772455211e-06, "loss": 0.0537, "step": 5080 }, { "epoch": 0.749538915529325, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9625935162094763, "eval_loss": 0.057520877569913864, "eval_precision": 0.9897435897435898, "eval_recall": 0.9368932038834952, "eval_runtime": 50.2491, "eval_samples_per_second": 5.791, "eval_steps_per_second": 0.199, "step": 5080 }, { "epoch": 0.749686462559941, "grad_norm": 2.640580177307129, "learning_rate": 3.5793609037609225e-06, "loss": 0.0937, "step": 5081 }, { "epoch": 0.749834009590557, "grad_norm": 1.4052523374557495, "learning_rate": 3.57541273863955e-06, "loss": 0.0338, "step": 5082 }, { "epoch": 0.749981556621173, "grad_norm": 4.573547840118408, "learning_rate": 3.5714662781386554e-06, "loss": 0.0252, "step": 5083 }, { "epoch": 0.750129103651789, "grad_norm": 5.047595500946045, "learning_rate": 3.5675215233053473e-06, "loss": 0.0884, "step": 5084 }, { "epoch": 0.750276650682405, "grad_norm": 3.0312185287475586, "learning_rate": 3.563578475186271e-06, "loss": 0.1193, "step": 5085 }, { "epoch": 0.750424197713021, "grad_norm": 1.0070552825927734, "learning_rate": 3.5596371348276325e-06, "loss": 0.0239, "step": 5086 }, { "epoch": 0.7505717447436371, "grad_norm": 3.070418357849121, "learning_rate": 3.5556975032751752e-06, "loss": 0.0659, "step": 5087 }, { "epoch": 0.750719291774253, "grad_norm": 6.563639163970947, "learning_rate": 3.5517595815741857e-06, "loss": 0.0389, "step": 5088 }, { "epoch": 0.750866838804869, "grad_norm": 3.832509756088257, "learning_rate": 3.5478233707695155e-06, "loss": 0.0566, "step": 5089 }, { "epoch": 0.7510143858354851, "grad_norm": 1.9744356870651245, "learning_rate": 3.543888871905545e-06, "loss": 0.0319, "step": 5090 }, { "epoch": 0.7511619328661011, "grad_norm": 1.9785645008087158, "learning_rate": 3.539956086026202e-06, "loss": 0.0246, "step": 5091 }, { "epoch": 0.751309479896717, "grad_norm": 1.8142704963684082, "learning_rate": 3.5360250141749652e-06, "loss": 0.044, "step": 5092 }, { "epoch": 0.7514570269273331, "grad_norm": 1.4420737028121948, "learning_rate": 3.5320956573948616e-06, "loss": 0.0523, "step": 5093 }, { "epoch": 0.7516045739579491, "grad_norm": 4.183411598205566, "learning_rate": 3.528168016728449e-06, "loss": 0.0892, "step": 5094 }, { "epoch": 0.7517521209885651, "grad_norm": 3.7427468299865723, "learning_rate": 3.524242093217849e-06, "loss": 0.158, "step": 5095 }, { "epoch": 0.7518996680191811, "grad_norm": 1.429665207862854, "learning_rate": 3.5203178879047085e-06, "loss": 0.0182, "step": 5096 }, { "epoch": 0.7520472150497971, "grad_norm": 0.6736586689949036, "learning_rate": 3.5163954018302317e-06, "loss": 0.0081, "step": 5097 }, { "epoch": 0.7521947620804131, "grad_norm": 2.3966901302337646, "learning_rate": 3.512474636035166e-06, "loss": 0.0322, "step": 5098 }, { "epoch": 0.7523423091110292, "grad_norm": 1.7501212358474731, "learning_rate": 3.5085555915597902e-06, "loss": 0.0398, "step": 5099 }, { "epoch": 0.7524898561416451, "grad_norm": 2.2130255699157715, "learning_rate": 3.5046382694439453e-06, "loss": 0.038, "step": 5100 }, { "epoch": 0.7524898561416451, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9625935162094763, "eval_loss": 0.05771620571613312, "eval_precision": 0.9897435897435898, "eval_recall": 0.9368932038834952, "eval_runtime": 50.1558, "eval_samples_per_second": 5.802, "eval_steps_per_second": 0.199, "step": 5100 }, { "epoch": 0.7526374031722611, "grad_norm": 1.081679344177246, "learning_rate": 3.500722670726999e-06, "loss": 0.0128, "step": 5101 }, { "epoch": 0.7527849502028772, "grad_norm": 1.6140114068984985, "learning_rate": 3.4968087964478613e-06, "loss": 0.048, "step": 5102 }, { "epoch": 0.7529324972334932, "grad_norm": 1.6351380348205566, "learning_rate": 3.4928966476450065e-06, "loss": 0.0255, "step": 5103 }, { "epoch": 0.7530800442641092, "grad_norm": 0.8849024772644043, "learning_rate": 3.4889862253564267e-06, "loss": 0.008, "step": 5104 }, { "epoch": 0.7532275912947252, "grad_norm": 4.266582489013672, "learning_rate": 3.485077530619664e-06, "loss": 0.1164, "step": 5105 }, { "epoch": 0.7533751383253412, "grad_norm": 2.3778281211853027, "learning_rate": 3.4811705644718073e-06, "loss": 0.0232, "step": 5106 }, { "epoch": 0.7535226853559572, "grad_norm": 0.9353477358818054, "learning_rate": 3.477265327949477e-06, "loss": 0.029, "step": 5107 }, { "epoch": 0.7536702323865733, "grad_norm": 2.7445995807647705, "learning_rate": 3.4733618220888443e-06, "loss": 0.0429, "step": 5108 }, { "epoch": 0.7538177794171892, "grad_norm": 2.1860287189483643, "learning_rate": 3.4694600479256202e-06, "loss": 0.0752, "step": 5109 }, { "epoch": 0.7539653264478052, "grad_norm": 4.554017543792725, "learning_rate": 3.4655600064950457e-06, "loss": 0.1008, "step": 5110 }, { "epoch": 0.7541128734784213, "grad_norm": 2.78831148147583, "learning_rate": 3.461661698831913e-06, "loss": 0.1126, "step": 5111 }, { "epoch": 0.7542604205090373, "grad_norm": 1.1810117959976196, "learning_rate": 3.4577651259705547e-06, "loss": 0.0264, "step": 5112 }, { "epoch": 0.7544079675396532, "grad_norm": 1.072582483291626, "learning_rate": 3.453870288944833e-06, "loss": 0.0087, "step": 5113 }, { "epoch": 0.7545555145702693, "grad_norm": 2.7644457817077637, "learning_rate": 3.4499771887881606e-06, "loss": 0.0317, "step": 5114 }, { "epoch": 0.7547030616008853, "grad_norm": 1.348696231842041, "learning_rate": 3.446085826533483e-06, "loss": 0.0388, "step": 5115 }, { "epoch": 0.7548506086315013, "grad_norm": 1.3266764879226685, "learning_rate": 3.4421962032132784e-06, "loss": 0.0397, "step": 5116 }, { "epoch": 0.7549981556621173, "grad_norm": 3.281674385070801, "learning_rate": 3.4383083198595855e-06, "loss": 0.0622, "step": 5117 }, { "epoch": 0.7551457026927333, "grad_norm": 3.0210163593292236, "learning_rate": 3.43442217750396e-06, "loss": 0.0494, "step": 5118 }, { "epoch": 0.7552932497233493, "grad_norm": 1.837327241897583, "learning_rate": 3.4305377771774993e-06, "loss": 0.0441, "step": 5119 }, { "epoch": 0.7554407967539654, "grad_norm": 4.557116985321045, "learning_rate": 3.426655119910849e-06, "loss": 0.1133, "step": 5120 }, { "epoch": 0.7554407967539654, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9625935162094763, "eval_loss": 0.0592590793967247, "eval_precision": 0.9897435897435898, "eval_recall": 0.9368932038834952, "eval_runtime": 51.133, "eval_samples_per_second": 5.691, "eval_steps_per_second": 0.196, "step": 5120 }, { "epoch": 0.7555883437845813, "grad_norm": 2.635477304458618, "learning_rate": 3.4227742067341784e-06, "loss": 0.0423, "step": 5121 }, { "epoch": 0.7557358908151973, "grad_norm": 2.3272955417633057, "learning_rate": 3.4188950386772045e-06, "loss": 0.058, "step": 5122 }, { "epoch": 0.7558834378458134, "grad_norm": 3.9071474075317383, "learning_rate": 3.4150176167691816e-06, "loss": 0.0905, "step": 5123 }, { "epoch": 0.7560309848764294, "grad_norm": 1.2327368259429932, "learning_rate": 3.4111419420388904e-06, "loss": 0.0282, "step": 5124 }, { "epoch": 0.7561785319070454, "grad_norm": 3.7281596660614014, "learning_rate": 3.4072680155146586e-06, "loss": 0.0877, "step": 5125 }, { "epoch": 0.7563260789376614, "grad_norm": 2.743711471557617, "learning_rate": 3.4033958382243424e-06, "loss": 0.0692, "step": 5126 }, { "epoch": 0.7564736259682774, "grad_norm": 4.099350452423096, "learning_rate": 3.399525411195339e-06, "loss": 0.0871, "step": 5127 }, { "epoch": 0.7566211729988934, "grad_norm": 2.237304925918579, "learning_rate": 3.3956567354545833e-06, "loss": 0.0999, "step": 5128 }, { "epoch": 0.7567687200295095, "grad_norm": 1.9612205028533936, "learning_rate": 3.3917898120285354e-06, "loss": 0.0563, "step": 5129 }, { "epoch": 0.7569162670601254, "grad_norm": 1.1322190761566162, "learning_rate": 3.387924641943201e-06, "loss": 0.0129, "step": 5130 }, { "epoch": 0.7570638140907414, "grad_norm": 2.140263795852661, "learning_rate": 3.384061226224118e-06, "loss": 0.0505, "step": 5131 }, { "epoch": 0.7572113611213575, "grad_norm": 1.5361111164093018, "learning_rate": 3.3801995658963517e-06, "loss": 0.0572, "step": 5132 }, { "epoch": 0.7573589081519735, "grad_norm": 2.38028621673584, "learning_rate": 3.3763396619845135e-06, "loss": 0.0683, "step": 5133 }, { "epoch": 0.7575064551825894, "grad_norm": 5.6130690574646, "learning_rate": 3.37248151551274e-06, "loss": 0.1424, "step": 5134 }, { "epoch": 0.7576540022132054, "grad_norm": 5.945466041564941, "learning_rate": 3.3686251275046965e-06, "loss": 0.0666, "step": 5135 }, { "epoch": 0.7578015492438215, "grad_norm": 1.9206665754318237, "learning_rate": 3.364770498983603e-06, "loss": 0.0628, "step": 5136 }, { "epoch": 0.7579490962744375, "grad_norm": 1.562780737876892, "learning_rate": 3.3609176309721923e-06, "loss": 0.0591, "step": 5137 }, { "epoch": 0.7580966433050534, "grad_norm": 1.0350431203842163, "learning_rate": 3.3570665244927336e-06, "loss": 0.0129, "step": 5138 }, { "epoch": 0.7582441903356695, "grad_norm": 1.7675925493240356, "learning_rate": 3.3532171805670377e-06, "loss": 0.0419, "step": 5139 }, { "epoch": 0.7583917373662855, "grad_norm": 2.1110544204711914, "learning_rate": 3.3493696002164367e-06, "loss": 0.072, "step": 5140 }, { "epoch": 0.7583917373662855, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9625935162094763, "eval_loss": 0.05753913149237633, "eval_precision": 0.9897435897435898, "eval_recall": 0.9368932038834952, "eval_runtime": 49.9012, "eval_samples_per_second": 5.832, "eval_steps_per_second": 0.2, "step": 5140 }, { "epoch": 0.7585392843969015, "grad_norm": 1.4793115854263306, "learning_rate": 3.345523784461802e-06, "loss": 0.0188, "step": 5141 }, { "epoch": 0.7586868314275175, "grad_norm": 3.675874710083008, "learning_rate": 3.3416797343235375e-06, "loss": 0.0655, "step": 5142 }, { "epoch": 0.7588343784581335, "grad_norm": 2.748819351196289, "learning_rate": 3.3378374508215704e-06, "loss": 0.1014, "step": 5143 }, { "epoch": 0.7589819254887495, "grad_norm": 1.8901461362838745, "learning_rate": 3.333996934975372e-06, "loss": 0.0535, "step": 5144 }, { "epoch": 0.7591294725193656, "grad_norm": 2.983309745788574, "learning_rate": 3.330158187803929e-06, "loss": 0.0643, "step": 5145 }, { "epoch": 0.7592770195499815, "grad_norm": 0.45692744851112366, "learning_rate": 3.3263212103257713e-06, "loss": 0.0051, "step": 5146 }, { "epoch": 0.7594245665805975, "grad_norm": 1.113645315170288, "learning_rate": 3.322486003558958e-06, "loss": 0.024, "step": 5147 }, { "epoch": 0.7595721136112136, "grad_norm": 2.4810791015625, "learning_rate": 3.3186525685210726e-06, "loss": 0.0617, "step": 5148 }, { "epoch": 0.7597196606418296, "grad_norm": 5.08605432510376, "learning_rate": 3.3148209062292235e-06, "loss": 0.0717, "step": 5149 }, { "epoch": 0.7598672076724456, "grad_norm": 4.354735374450684, "learning_rate": 3.3109910177000715e-06, "loss": 0.1019, "step": 5150 }, { "epoch": 0.7600147547030616, "grad_norm": 3.3769724369049072, "learning_rate": 3.307162903949783e-06, "loss": 0.0818, "step": 5151 }, { "epoch": 0.7601623017336776, "grad_norm": 1.6954307556152344, "learning_rate": 3.3033365659940607e-06, "loss": 0.0362, "step": 5152 }, { "epoch": 0.7603098487642936, "grad_norm": 2.8093931674957275, "learning_rate": 3.2995120048481454e-06, "loss": 0.0997, "step": 5153 }, { "epoch": 0.7604573957949097, "grad_norm": 3.8435254096984863, "learning_rate": 3.2956892215267887e-06, "loss": 0.0324, "step": 5154 }, { "epoch": 0.7606049428255256, "grad_norm": 5.64138650894165, "learning_rate": 3.291868217044287e-06, "loss": 0.0701, "step": 5155 }, { "epoch": 0.7607524898561416, "grad_norm": 2.0043938159942627, "learning_rate": 3.288048992414461e-06, "loss": 0.0389, "step": 5156 }, { "epoch": 0.7609000368867577, "grad_norm": 5.118934631347656, "learning_rate": 3.284231548650649e-06, "loss": 0.0527, "step": 5157 }, { "epoch": 0.7610475839173737, "grad_norm": 3.248034715652466, "learning_rate": 3.280415886765732e-06, "loss": 0.0854, "step": 5158 }, { "epoch": 0.7611951309479896, "grad_norm": 3.6389718055725098, "learning_rate": 3.276602007772103e-06, "loss": 0.1142, "step": 5159 }, { "epoch": 0.7613426779786057, "grad_norm": 3.4509336948394775, "learning_rate": 3.272789912681693e-06, "loss": 0.0663, "step": 5160 }, { "epoch": 0.7613426779786057, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9651741293532339, "eval_loss": 0.055882327258586884, "eval_precision": 0.9897959183673469, "eval_recall": 0.941747572815534, "eval_runtime": 49.1697, "eval_samples_per_second": 5.918, "eval_steps_per_second": 0.203, "step": 5160 }, { "epoch": 0.7614902250092217, "grad_norm": 2.4059665203094482, "learning_rate": 3.2689796025059593e-06, "loss": 0.0404, "step": 5161 }, { "epoch": 0.7616377720398377, "grad_norm": 0.8011085391044617, "learning_rate": 3.2651710782558798e-06, "loss": 0.0217, "step": 5162 }, { "epoch": 0.7617853190704537, "grad_norm": 1.9200159311294556, "learning_rate": 3.261364340941957e-06, "loss": 0.0354, "step": 5163 }, { "epoch": 0.7619328661010697, "grad_norm": 1.6926106214523315, "learning_rate": 3.2575593915742288e-06, "loss": 0.0304, "step": 5164 }, { "epoch": 0.7620804131316857, "grad_norm": 1.37638258934021, "learning_rate": 3.2537562311622528e-06, "loss": 0.0139, "step": 5165 }, { "epoch": 0.7622279601623018, "grad_norm": 3.854456901550293, "learning_rate": 3.24995486071511e-06, "loss": 0.1176, "step": 5166 }, { "epoch": 0.7623755071929177, "grad_norm": 1.3088114261627197, "learning_rate": 3.246155281241413e-06, "loss": 0.0423, "step": 5167 }, { "epoch": 0.7625230542235337, "grad_norm": 2.321991443634033, "learning_rate": 3.2423574937492895e-06, "loss": 0.089, "step": 5168 }, { "epoch": 0.7626706012541498, "grad_norm": 1.9959030151367188, "learning_rate": 3.238561499246401e-06, "loss": 0.0307, "step": 5169 }, { "epoch": 0.7628181482847658, "grad_norm": 3.2400119304656982, "learning_rate": 3.2347672987399325e-06, "loss": 0.0827, "step": 5170 }, { "epoch": 0.7629656953153818, "grad_norm": 0.8505774736404419, "learning_rate": 3.2309748932365834e-06, "loss": 0.0204, "step": 5171 }, { "epoch": 0.7631132423459978, "grad_norm": 2.5697474479675293, "learning_rate": 3.2271842837425917e-06, "loss": 0.0374, "step": 5172 }, { "epoch": 0.7632607893766138, "grad_norm": 1.2055716514587402, "learning_rate": 3.223395471263702e-06, "loss": 0.0282, "step": 5173 }, { "epoch": 0.7634083364072298, "grad_norm": 1.3824479579925537, "learning_rate": 3.2196084568051957e-06, "loss": 0.0245, "step": 5174 }, { "epoch": 0.7635558834378459, "grad_norm": 2.052051067352295, "learning_rate": 3.2158232413718747e-06, "loss": 0.0419, "step": 5175 }, { "epoch": 0.7637034304684618, "grad_norm": 2.7622101306915283, "learning_rate": 3.21203982596806e-06, "loss": 0.0937, "step": 5176 }, { "epoch": 0.7638509774990778, "grad_norm": 2.4138739109039307, "learning_rate": 3.20825821159759e-06, "loss": 0.0851, "step": 5177 }, { "epoch": 0.7639985245296939, "grad_norm": 1.903193712234497, "learning_rate": 3.204478399263836e-06, "loss": 0.0525, "step": 5178 }, { "epoch": 0.7641460715603099, "grad_norm": 2.280181646347046, "learning_rate": 3.200700389969692e-06, "loss": 0.099, "step": 5179 }, { "epoch": 0.7642936185909258, "grad_norm": 2.3096065521240234, "learning_rate": 3.196924184717558e-06, "loss": 0.0416, "step": 5180 }, { "epoch": 0.7642936185909258, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9627791563275434, "eval_loss": 0.0562606044113636, "eval_precision": 0.9847715736040609, "eval_recall": 0.941747572815534, "eval_runtime": 48.7641, "eval_samples_per_second": 5.968, "eval_steps_per_second": 0.205, "step": 5180 }, { "epoch": 0.7644411656215419, "grad_norm": 1.4210330247879028, "learning_rate": 3.1931497845093753e-06, "loss": 0.0239, "step": 5181 }, { "epoch": 0.7645887126521579, "grad_norm": 1.720584511756897, "learning_rate": 3.1893771903465876e-06, "loss": 0.0318, "step": 5182 }, { "epoch": 0.7647362596827739, "grad_norm": 3.058134078979492, "learning_rate": 3.1856064032301724e-06, "loss": 0.079, "step": 5183 }, { "epoch": 0.7648838067133898, "grad_norm": 1.3148670196533203, "learning_rate": 3.1818374241606275e-06, "loss": 0.0598, "step": 5184 }, { "epoch": 0.7650313537440059, "grad_norm": 1.7986829280853271, "learning_rate": 3.1780702541379605e-06, "loss": 0.0542, "step": 5185 }, { "epoch": 0.7651789007746219, "grad_norm": 4.018672943115234, "learning_rate": 3.1743048941617117e-06, "loss": 0.1001, "step": 5186 }, { "epoch": 0.765326447805238, "grad_norm": 5.976879596710205, "learning_rate": 3.17054134523093e-06, "loss": 0.0931, "step": 5187 }, { "epoch": 0.7654739948358539, "grad_norm": 1.592237949371338, "learning_rate": 3.1667796083441893e-06, "loss": 0.0579, "step": 5188 }, { "epoch": 0.7656215418664699, "grad_norm": 2.6272668838500977, "learning_rate": 3.163019684499589e-06, "loss": 0.0332, "step": 5189 }, { "epoch": 0.765769088897086, "grad_norm": 1.2269562482833862, "learning_rate": 3.159261574694734e-06, "loss": 0.0534, "step": 5190 }, { "epoch": 0.765916635927702, "grad_norm": 1.5458407402038574, "learning_rate": 3.1555052799267547e-06, "loss": 0.0256, "step": 5191 }, { "epoch": 0.766064182958318, "grad_norm": 2.287184715270996, "learning_rate": 3.1517508011923017e-06, "loss": 0.0535, "step": 5192 }, { "epoch": 0.7662117299889339, "grad_norm": 2.458590030670166, "learning_rate": 3.147998139487545e-06, "loss": 0.0577, "step": 5193 }, { "epoch": 0.76635927701955, "grad_norm": 3.7397634983062744, "learning_rate": 3.1442472958081614e-06, "loss": 0.0587, "step": 5194 }, { "epoch": 0.766506824050166, "grad_norm": 1.2630696296691895, "learning_rate": 3.1404982711493637e-06, "loss": 0.047, "step": 5195 }, { "epoch": 0.766654371080782, "grad_norm": 1.2866291999816895, "learning_rate": 3.136751066505862e-06, "loss": 0.0197, "step": 5196 }, { "epoch": 0.766801918111398, "grad_norm": 2.062364101409912, "learning_rate": 3.1330056828718967e-06, "loss": 0.0232, "step": 5197 }, { "epoch": 0.766949465142014, "grad_norm": 2.595435619354248, "learning_rate": 3.1292621212412265e-06, "loss": 0.0415, "step": 5198 }, { "epoch": 0.76709701217263, "grad_norm": 1.2132503986358643, "learning_rate": 3.1255203826071147e-06, "loss": 0.0481, "step": 5199 }, { "epoch": 0.7672445592032461, "grad_norm": 1.734116792678833, "learning_rate": 3.121780467962353e-06, "loss": 0.0462, "step": 5200 }, { "epoch": 0.7672445592032461, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9651741293532339, "eval_loss": 0.05774553865194321, "eval_precision": 0.9897959183673469, "eval_recall": 0.941747572815534, "eval_runtime": 48.6431, "eval_samples_per_second": 5.982, "eval_steps_per_second": 0.206, "step": 5200 }, { "epoch": 0.767392106233862, "grad_norm": 1.9193569421768188, "learning_rate": 3.118042378299243e-06, "loss": 0.017, "step": 5201 }, { "epoch": 0.767539653264478, "grad_norm": 3.5385794639587402, "learning_rate": 3.1143061146095943e-06, "loss": 0.0724, "step": 5202 }, { "epoch": 0.7676872002950941, "grad_norm": 1.2794243097305298, "learning_rate": 3.110571677884755e-06, "loss": 0.0168, "step": 5203 }, { "epoch": 0.7678347473257101, "grad_norm": 2.951565980911255, "learning_rate": 3.106839069115568e-06, "loss": 0.0772, "step": 5204 }, { "epoch": 0.767982294356326, "grad_norm": 1.701050877571106, "learning_rate": 3.103108289292395e-06, "loss": 0.0396, "step": 5205 }, { "epoch": 0.7681298413869421, "grad_norm": 1.6941574811935425, "learning_rate": 3.0993793394051164e-06, "loss": 0.0222, "step": 5206 }, { "epoch": 0.7682773884175581, "grad_norm": 3.3940978050231934, "learning_rate": 3.0956522204431305e-06, "loss": 0.0473, "step": 5207 }, { "epoch": 0.7684249354481741, "grad_norm": 2.65412974357605, "learning_rate": 3.091926933395337e-06, "loss": 0.0745, "step": 5208 }, { "epoch": 0.7685724824787901, "grad_norm": 1.8963632583618164, "learning_rate": 3.0882034792501648e-06, "loss": 0.0452, "step": 5209 }, { "epoch": 0.7687200295094061, "grad_norm": 5.716403484344482, "learning_rate": 3.0844818589955427e-06, "loss": 0.0788, "step": 5210 }, { "epoch": 0.7688675765400221, "grad_norm": 3.9042954444885254, "learning_rate": 3.0807620736189225e-06, "loss": 0.1066, "step": 5211 }, { "epoch": 0.7690151235706382, "grad_norm": 2.744814395904541, "learning_rate": 3.077044124107269e-06, "loss": 0.066, "step": 5212 }, { "epoch": 0.7691626706012541, "grad_norm": 3.1964786052703857, "learning_rate": 3.073328011447051e-06, "loss": 0.1027, "step": 5213 }, { "epoch": 0.7693102176318701, "grad_norm": 1.8805004358291626, "learning_rate": 3.0696137366242617e-06, "loss": 0.0747, "step": 5214 }, { "epoch": 0.7694577646624862, "grad_norm": 1.7196863889694214, "learning_rate": 3.0659013006243978e-06, "loss": 0.0239, "step": 5215 }, { "epoch": 0.7696053116931022, "grad_norm": 1.4553349018096924, "learning_rate": 3.062190704432464e-06, "loss": 0.0675, "step": 5216 }, { "epoch": 0.7697528587237182, "grad_norm": 1.0774309635162354, "learning_rate": 3.058481949032999e-06, "loss": 0.0303, "step": 5217 }, { "epoch": 0.7699004057543342, "grad_norm": 0.8425986766815186, "learning_rate": 3.0547750354100293e-06, "loss": 0.0143, "step": 5218 }, { "epoch": 0.7700479527849502, "grad_norm": 3.5233235359191895, "learning_rate": 3.0510699645470988e-06, "loss": 0.1084, "step": 5219 }, { "epoch": 0.7701954998155662, "grad_norm": 2.555990695953369, "learning_rate": 3.047366737427272e-06, "loss": 0.0268, "step": 5220 }, { "epoch": 0.7701954998155662, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9651741293532339, "eval_loss": 0.05848207324743271, "eval_precision": 0.9897959183673469, "eval_recall": 0.941747572815534, "eval_runtime": 49.145, "eval_samples_per_second": 5.921, "eval_steps_per_second": 0.203, "step": 5220 }, { "epoch": 0.7703430468461823, "grad_norm": 3.001358985900879, "learning_rate": 3.0436653550331118e-06, "loss": 0.0602, "step": 5221 }, { "epoch": 0.7704905938767982, "grad_norm": 2.2504324913024902, "learning_rate": 3.0399658183466986e-06, "loss": 0.0409, "step": 5222 }, { "epoch": 0.7706381409074142, "grad_norm": 2.4788732528686523, "learning_rate": 3.0362681283496265e-06, "loss": 0.0466, "step": 5223 }, { "epoch": 0.7707856879380303, "grad_norm": 2.33152437210083, "learning_rate": 3.032572286022987e-06, "loss": 0.0841, "step": 5224 }, { "epoch": 0.7709332349686463, "grad_norm": 1.1605726480484009, "learning_rate": 3.0288782923473923e-06, "loss": 0.0289, "step": 5225 }, { "epoch": 0.7710807819992622, "grad_norm": 0.8467137813568115, "learning_rate": 3.0251861483029663e-06, "loss": 0.0291, "step": 5226 }, { "epoch": 0.7712283290298783, "grad_norm": 0.3946162760257721, "learning_rate": 3.0214958548693283e-06, "loss": 0.005, "step": 5227 }, { "epoch": 0.7713758760604943, "grad_norm": 2.120964765548706, "learning_rate": 3.017807413025623e-06, "loss": 0.0446, "step": 5228 }, { "epoch": 0.7715234230911103, "grad_norm": 1.7589060068130493, "learning_rate": 3.0141208237504914e-06, "loss": 0.0465, "step": 5229 }, { "epoch": 0.7716709701217263, "grad_norm": 1.674916386604309, "learning_rate": 3.0104360880220805e-06, "loss": 0.0482, "step": 5230 }, { "epoch": 0.7718185171523423, "grad_norm": 2.2239298820495605, "learning_rate": 3.0067532068180683e-06, "loss": 0.0875, "step": 5231 }, { "epoch": 0.7719660641829583, "grad_norm": 1.9524692296981812, "learning_rate": 3.003072181115615e-06, "loss": 0.0151, "step": 5232 }, { "epoch": 0.7721136112135744, "grad_norm": 5.365119934082031, "learning_rate": 2.9993930118913973e-06, "loss": 0.1871, "step": 5233 }, { "epoch": 0.7722611582441903, "grad_norm": 5.179162502288818, "learning_rate": 2.9957157001216066e-06, "loss": 0.0461, "step": 5234 }, { "epoch": 0.7724087052748063, "grad_norm": 3.075514793395996, "learning_rate": 2.9920402467819287e-06, "loss": 0.092, "step": 5235 }, { "epoch": 0.7725562523054224, "grad_norm": 5.156469345092773, "learning_rate": 2.988366652847565e-06, "loss": 0.1425, "step": 5236 }, { "epoch": 0.7727037993360384, "grad_norm": 3.7485973834991455, "learning_rate": 2.984694919293226e-06, "loss": 0.0344, "step": 5237 }, { "epoch": 0.7728513463666544, "grad_norm": 1.7240041494369507, "learning_rate": 2.981025047093118e-06, "loss": 0.0283, "step": 5238 }, { "epoch": 0.7729988933972703, "grad_norm": 0.9162787795066833, "learning_rate": 2.9773570372209646e-06, "loss": 0.0176, "step": 5239 }, { "epoch": 0.7731464404278864, "grad_norm": 1.6263129711151123, "learning_rate": 2.973690890649985e-06, "loss": 0.0406, "step": 5240 }, { "epoch": 0.7731464404278864, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.9603960396039604, "eval_loss": 0.05597265809774399, "eval_precision": 0.9797979797979798, "eval_recall": 0.941747572815534, "eval_runtime": 50.3581, "eval_samples_per_second": 5.779, "eval_steps_per_second": 0.199, "step": 5240 }, { "epoch": 0.7732939874585024, "grad_norm": 9.431975364685059, "learning_rate": 2.9700266083529115e-06, "loss": 0.0876, "step": 5241 }, { "epoch": 0.7734415344891185, "grad_norm": 2.355358600616455, "learning_rate": 2.9663641913019824e-06, "loss": 0.0835, "step": 5242 }, { "epoch": 0.7735890815197344, "grad_norm": 1.5662035942077637, "learning_rate": 2.9627036404689358e-06, "loss": 0.0188, "step": 5243 }, { "epoch": 0.7737366285503504, "grad_norm": 2.697141647338867, "learning_rate": 2.95904495682501e-06, "loss": 0.0449, "step": 5244 }, { "epoch": 0.7738841755809664, "grad_norm": 2.2803616523742676, "learning_rate": 2.955388141340967e-06, "loss": 0.0767, "step": 5245 }, { "epoch": 0.7740317226115825, "grad_norm": 1.724706768989563, "learning_rate": 2.9517331949870564e-06, "loss": 0.0372, "step": 5246 }, { "epoch": 0.7741792696421984, "grad_norm": 1.7451180219650269, "learning_rate": 2.948080118733031e-06, "loss": 0.0676, "step": 5247 }, { "epoch": 0.7743268166728144, "grad_norm": 1.5027248859405518, "learning_rate": 2.9444289135481606e-06, "loss": 0.0152, "step": 5248 }, { "epoch": 0.7744743637034305, "grad_norm": 2.300201654434204, "learning_rate": 2.940779580401205e-06, "loss": 0.0845, "step": 5249 }, { "epoch": 0.7746219107340465, "grad_norm": 1.5628377199172974, "learning_rate": 2.9371321202604354e-06, "loss": 0.0464, "step": 5250 }, { "epoch": 0.7747694577646624, "grad_norm": 1.8562650680541992, "learning_rate": 2.933486534093626e-06, "loss": 0.0174, "step": 5251 }, { "epoch": 0.7749170047952785, "grad_norm": 2.797605037689209, "learning_rate": 2.9298428228680466e-06, "loss": 0.0761, "step": 5252 }, { "epoch": 0.7750645518258945, "grad_norm": 2.299274444580078, "learning_rate": 2.9262009875504803e-06, "loss": 0.0439, "step": 5253 }, { "epoch": 0.7752120988565105, "grad_norm": 5.1961212158203125, "learning_rate": 2.9225610291072002e-06, "loss": 0.1431, "step": 5254 }, { "epoch": 0.7753596458871265, "grad_norm": 1.4678155183792114, "learning_rate": 2.9189229485039917e-06, "loss": 0.0421, "step": 5255 }, { "epoch": 0.7755071929177425, "grad_norm": 1.2804898023605347, "learning_rate": 2.9152867467061407e-06, "loss": 0.0226, "step": 5256 }, { "epoch": 0.7756547399483585, "grad_norm": 1.6000317335128784, "learning_rate": 2.911652424678425e-06, "loss": 0.0241, "step": 5257 }, { "epoch": 0.7758022869789746, "grad_norm": 2.390232563018799, "learning_rate": 2.9080199833851375e-06, "loss": 0.0608, "step": 5258 }, { "epoch": 0.7759498340095906, "grad_norm": 3.5458486080169678, "learning_rate": 2.9043894237900593e-06, "loss": 0.0993, "step": 5259 }, { "epoch": 0.7760973810402065, "grad_norm": 3.5456619262695312, "learning_rate": 2.9007607468564815e-06, "loss": 0.0304, "step": 5260 }, { "epoch": 0.7760973810402065, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9625935162094763, "eval_loss": 0.059240952134132385, "eval_precision": 0.9897435897435898, "eval_recall": 0.9368932038834952, "eval_runtime": 50.3608, "eval_samples_per_second": 5.778, "eval_steps_per_second": 0.199, "step": 5260 }, { "epoch": 0.7762449280708226, "grad_norm": 2.1354801654815674, "learning_rate": 2.8971339535471943e-06, "loss": 0.0842, "step": 5261 }, { "epoch": 0.7763924751014386, "grad_norm": 3.436898708343506, "learning_rate": 2.893509044824484e-06, "loss": 0.0869, "step": 5262 }, { "epoch": 0.7765400221320546, "grad_norm": 2.397908926010132, "learning_rate": 2.889886021650132e-06, "loss": 0.0432, "step": 5263 }, { "epoch": 0.7766875691626706, "grad_norm": 6.913381576538086, "learning_rate": 2.8862648849854393e-06, "loss": 0.1307, "step": 5264 }, { "epoch": 0.7768351161932866, "grad_norm": 2.0887036323547363, "learning_rate": 2.8826456357911883e-06, "loss": 0.0561, "step": 5265 }, { "epoch": 0.7769826632239026, "grad_norm": 2.4647176265716553, "learning_rate": 2.87902827502766e-06, "loss": 0.0783, "step": 5266 }, { "epoch": 0.7771302102545187, "grad_norm": 1.824882984161377, "learning_rate": 2.8754128036546493e-06, "loss": 0.0685, "step": 5267 }, { "epoch": 0.7772777572851346, "grad_norm": 0.9750489592552185, "learning_rate": 2.8717992226314327e-06, "loss": 0.0345, "step": 5268 }, { "epoch": 0.7774253043157506, "grad_norm": 2.5599873065948486, "learning_rate": 2.8681875329167975e-06, "loss": 0.0414, "step": 5269 }, { "epoch": 0.7775728513463667, "grad_norm": 1.7387886047363281, "learning_rate": 2.864577735469026e-06, "loss": 0.0482, "step": 5270 }, { "epoch": 0.7777203983769827, "grad_norm": 2.2460641860961914, "learning_rate": 2.860969831245892e-06, "loss": 0.0619, "step": 5271 }, { "epoch": 0.7778679454075986, "grad_norm": 3.2521746158599854, "learning_rate": 2.857363821204678e-06, "loss": 0.0731, "step": 5272 }, { "epoch": 0.7780154924382147, "grad_norm": 2.0009443759918213, "learning_rate": 2.853759706302154e-06, "loss": 0.0487, "step": 5273 }, { "epoch": 0.7781630394688307, "grad_norm": 5.98586368560791, "learning_rate": 2.8501574874945913e-06, "loss": 0.1677, "step": 5274 }, { "epoch": 0.7783105864994467, "grad_norm": 1.6888837814331055, "learning_rate": 2.8465571657377622e-06, "loss": 0.0289, "step": 5275 }, { "epoch": 0.7784581335300627, "grad_norm": 4.116874694824219, "learning_rate": 2.8429587419869288e-06, "loss": 0.0579, "step": 5276 }, { "epoch": 0.7786056805606787, "grad_norm": 0.8754475712776184, "learning_rate": 2.8393622171968495e-06, "loss": 0.0216, "step": 5277 }, { "epoch": 0.7787532275912947, "grad_norm": 2.2235543727874756, "learning_rate": 2.835767592321784e-06, "loss": 0.0438, "step": 5278 }, { "epoch": 0.7789007746219108, "grad_norm": 2.6578171253204346, "learning_rate": 2.8321748683154893e-06, "loss": 0.0405, "step": 5279 }, { "epoch": 0.7790483216525267, "grad_norm": 6.510404109954834, "learning_rate": 2.8285840461312074e-06, "loss": 0.0758, "step": 5280 }, { "epoch": 0.7790483216525267, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.9601990049751243, "eval_loss": 0.05800038203597069, "eval_precision": 0.9846938775510204, "eval_recall": 0.9368932038834952, "eval_runtime": 49.0171, "eval_samples_per_second": 5.937, "eval_steps_per_second": 0.204, "step": 5280 }, { "epoch": 0.7791958686831427, "grad_norm": 1.615730881690979, "learning_rate": 2.824995126721689e-06, "loss": 0.0347, "step": 5281 }, { "epoch": 0.7793434157137588, "grad_norm": 4.460781097412109, "learning_rate": 2.821408111039169e-06, "loss": 0.097, "step": 5282 }, { "epoch": 0.7794909627443748, "grad_norm": 4.584042549133301, "learning_rate": 2.817823000035382e-06, "loss": 0.084, "step": 5283 }, { "epoch": 0.7796385097749908, "grad_norm": 2.201601028442383, "learning_rate": 2.814239794661563e-06, "loss": 0.0288, "step": 5284 }, { "epoch": 0.7797860568056068, "grad_norm": 1.7005723714828491, "learning_rate": 2.810658495868427e-06, "loss": 0.0756, "step": 5285 }, { "epoch": 0.7799336038362228, "grad_norm": 1.3044039011001587, "learning_rate": 2.8070791046061984e-06, "loss": 0.0273, "step": 5286 }, { "epoch": 0.7800811508668388, "grad_norm": 1.7226636409759521, "learning_rate": 2.8035016218245825e-06, "loss": 0.0416, "step": 5287 }, { "epoch": 0.7802286978974549, "grad_norm": 2.6224257946014404, "learning_rate": 2.7999260484727864e-06, "loss": 0.0386, "step": 5288 }, { "epoch": 0.7803762449280708, "grad_norm": 1.1985563039779663, "learning_rate": 2.7963523854995135e-06, "loss": 0.0269, "step": 5289 }, { "epoch": 0.7805237919586868, "grad_norm": 0.6113758683204651, "learning_rate": 2.7927806338529517e-06, "loss": 0.0058, "step": 5290 }, { "epoch": 0.7806713389893029, "grad_norm": 3.3421359062194824, "learning_rate": 2.7892107944807813e-06, "loss": 0.0541, "step": 5291 }, { "epoch": 0.7808188860199189, "grad_norm": 5.942457675933838, "learning_rate": 2.785642868330183e-06, "loss": 0.0936, "step": 5292 }, { "epoch": 0.7809664330505348, "grad_norm": 3.3285698890686035, "learning_rate": 2.7820768563478285e-06, "loss": 0.0472, "step": 5293 }, { "epoch": 0.7811139800811508, "grad_norm": 2.533691883087158, "learning_rate": 2.7785127594798746e-06, "loss": 0.0592, "step": 5294 }, { "epoch": 0.7812615271117669, "grad_norm": 2.4985992908477783, "learning_rate": 2.77495057867198e-06, "loss": 0.0767, "step": 5295 }, { "epoch": 0.7814090741423829, "grad_norm": 2.9149117469787598, "learning_rate": 2.771390314869282e-06, "loss": 0.075, "step": 5296 }, { "epoch": 0.7815566211729988, "grad_norm": 2.872842788696289, "learning_rate": 2.7678319690164234e-06, "loss": 0.0104, "step": 5297 }, { "epoch": 0.7817041682036149, "grad_norm": 4.3801774978637695, "learning_rate": 2.764275542057533e-06, "loss": 0.0347, "step": 5298 }, { "epoch": 0.7818517152342309, "grad_norm": 2.8672521114349365, "learning_rate": 2.7607210349362212e-06, "loss": 0.078, "step": 5299 }, { "epoch": 0.781999262264847, "grad_norm": 1.9409520626068115, "learning_rate": 2.7571684485956064e-06, "loss": 0.0623, "step": 5300 }, { "epoch": 0.781999262264847, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.9601990049751243, "eval_loss": 0.05822517350316048, "eval_precision": 0.9846938775510204, "eval_recall": 0.9368932038834952, "eval_runtime": 49.1412, "eval_samples_per_second": 5.922, "eval_steps_per_second": 0.203, "step": 5300 }, { "epoch": 0.7821468092954629, "grad_norm": 1.5656439065933228, "learning_rate": 2.7536177839782786e-06, "loss": 0.0463, "step": 5301 }, { "epoch": 0.7822943563260789, "grad_norm": 2.506927728652954, "learning_rate": 2.750069042026332e-06, "loss": 0.0595, "step": 5302 }, { "epoch": 0.782441903356695, "grad_norm": 1.9016752243041992, "learning_rate": 2.746522223681349e-06, "loss": 0.0678, "step": 5303 }, { "epoch": 0.782589450387311, "grad_norm": 2.0928428173065186, "learning_rate": 2.7429773298843952e-06, "loss": 0.0629, "step": 5304 }, { "epoch": 0.782736997417927, "grad_norm": 1.3887627124786377, "learning_rate": 2.7394343615760265e-06, "loss": 0.0364, "step": 5305 }, { "epoch": 0.7828845444485429, "grad_norm": 3.573763608932495, "learning_rate": 2.735893319696291e-06, "loss": 0.0537, "step": 5306 }, { "epoch": 0.783032091479159, "grad_norm": 0.8567660450935364, "learning_rate": 2.7323542051847308e-06, "loss": 0.0237, "step": 5307 }, { "epoch": 0.783179638509775, "grad_norm": 2.4486968517303467, "learning_rate": 2.728817018980362e-06, "loss": 0.064, "step": 5308 }, { "epoch": 0.783327185540391, "grad_norm": 1.3299813270568848, "learning_rate": 2.7252817620217066e-06, "loss": 0.0321, "step": 5309 }, { "epoch": 0.783474732571007, "grad_norm": 1.498084545135498, "learning_rate": 2.7217484352467582e-06, "loss": 0.0168, "step": 5310 }, { "epoch": 0.783622279601623, "grad_norm": 3.375323534011841, "learning_rate": 2.7182170395930084e-06, "loss": 0.0588, "step": 5311 }, { "epoch": 0.783769826632239, "grad_norm": 0.9653108716011047, "learning_rate": 2.714687575997439e-06, "loss": 0.0142, "step": 5312 }, { "epoch": 0.7839173736628551, "grad_norm": 1.4389169216156006, "learning_rate": 2.711160045396507e-06, "loss": 0.0254, "step": 5313 }, { "epoch": 0.784064920693471, "grad_norm": 2.4575436115264893, "learning_rate": 2.7076344487261695e-06, "loss": 0.0447, "step": 5314 }, { "epoch": 0.784212467724087, "grad_norm": 2.1882901191711426, "learning_rate": 2.704110786921862e-06, "loss": 0.0654, "step": 5315 }, { "epoch": 0.7843600147547031, "grad_norm": 1.6151487827301025, "learning_rate": 2.7005890609185036e-06, "loss": 0.034, "step": 5316 }, { "epoch": 0.7845075617853191, "grad_norm": 2.9926345348358154, "learning_rate": 2.6970692716505175e-06, "loss": 0.0426, "step": 5317 }, { "epoch": 0.784655108815935, "grad_norm": 1.633247971534729, "learning_rate": 2.6935514200517943e-06, "loss": 0.0344, "step": 5318 }, { "epoch": 0.7848026558465511, "grad_norm": 5.228299617767334, "learning_rate": 2.690035507055715e-06, "loss": 0.0786, "step": 5319 }, { "epoch": 0.7849502028771671, "grad_norm": 3.2154364585876465, "learning_rate": 2.6865215335951513e-06, "loss": 0.0472, "step": 5320 }, { "epoch": 0.7849502028771671, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9651741293532339, "eval_loss": 0.05855966731905937, "eval_precision": 0.9897959183673469, "eval_recall": 0.941747572815534, "eval_runtime": 51.3509, "eval_samples_per_second": 5.667, "eval_steps_per_second": 0.195, "step": 5320 }, { "epoch": 0.7850977499077831, "grad_norm": 2.0289244651794434, "learning_rate": 2.683009500602459e-06, "loss": 0.0461, "step": 5321 }, { "epoch": 0.7852452969383991, "grad_norm": 1.6469234228134155, "learning_rate": 2.679499409009475e-06, "loss": 0.0758, "step": 5322 }, { "epoch": 0.7853928439690151, "grad_norm": 2.004470109939575, "learning_rate": 2.6759912597475257e-06, "loss": 0.0281, "step": 5323 }, { "epoch": 0.7855403909996311, "grad_norm": 1.8586945533752441, "learning_rate": 2.6724850537474157e-06, "loss": 0.093, "step": 5324 }, { "epoch": 0.7856879380302472, "grad_norm": 1.0184153318405151, "learning_rate": 2.6689807919394418e-06, "loss": 0.0159, "step": 5325 }, { "epoch": 0.7858354850608632, "grad_norm": 3.390073776245117, "learning_rate": 2.6654784752533836e-06, "loss": 0.0966, "step": 5326 }, { "epoch": 0.7859830320914791, "grad_norm": 1.927875280380249, "learning_rate": 2.661978104618498e-06, "loss": 0.0308, "step": 5327 }, { "epoch": 0.7861305791220952, "grad_norm": 1.220982551574707, "learning_rate": 2.6584796809635326e-06, "loss": 0.0255, "step": 5328 }, { "epoch": 0.7862781261527112, "grad_norm": 3.14682674407959, "learning_rate": 2.6549832052167167e-06, "loss": 0.0681, "step": 5329 }, { "epoch": 0.7864256731833272, "grad_norm": 2.710415840148926, "learning_rate": 2.6514886783057536e-06, "loss": 0.072, "step": 5330 }, { "epoch": 0.7865732202139432, "grad_norm": 2.1022698879241943, "learning_rate": 2.64799610115785e-06, "loss": 0.0582, "step": 5331 }, { "epoch": 0.7867207672445592, "grad_norm": 2.8391401767730713, "learning_rate": 2.644505474699679e-06, "loss": 0.0821, "step": 5332 }, { "epoch": 0.7868683142751752, "grad_norm": 3.7496092319488525, "learning_rate": 2.6410167998573945e-06, "loss": 0.1063, "step": 5333 }, { "epoch": 0.7870158613057913, "grad_norm": 2.5423924922943115, "learning_rate": 2.637530077556646e-06, "loss": 0.1281, "step": 5334 }, { "epoch": 0.7871634083364072, "grad_norm": 2.38148832321167, "learning_rate": 2.6340453087225506e-06, "loss": 0.0409, "step": 5335 }, { "epoch": 0.7873109553670232, "grad_norm": 2.238295078277588, "learning_rate": 2.630562494279717e-06, "loss": 0.0626, "step": 5336 }, { "epoch": 0.7874585023976393, "grad_norm": 1.4229605197906494, "learning_rate": 2.6270816351522353e-06, "loss": 0.0491, "step": 5337 }, { "epoch": 0.7876060494282553, "grad_norm": 1.0119794607162476, "learning_rate": 2.623602732263667e-06, "loss": 0.023, "step": 5338 }, { "epoch": 0.7877535964588712, "grad_norm": 1.6918085813522339, "learning_rate": 2.620125786537068e-06, "loss": 0.0485, "step": 5339 }, { "epoch": 0.7879011434894873, "grad_norm": 2.1017305850982666, "learning_rate": 2.6166507988949618e-06, "loss": 0.0349, "step": 5340 }, { "epoch": 0.7879011434894873, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9627791563275434, "eval_loss": 0.055785804986953735, "eval_precision": 0.9847715736040609, "eval_recall": 0.941747572815534, "eval_runtime": 50.9288, "eval_samples_per_second": 5.714, "eval_steps_per_second": 0.196, "step": 5340 }, { "epoch": 0.7880486905201033, "grad_norm": 2.438767433166504, "learning_rate": 2.613177770259361e-06, "loss": 0.0311, "step": 5341 }, { "epoch": 0.7881962375507193, "grad_norm": 1.9589561223983765, "learning_rate": 2.6097067015517584e-06, "loss": 0.0206, "step": 5342 }, { "epoch": 0.7883437845813352, "grad_norm": 2.1601192951202393, "learning_rate": 2.6062375936931237e-06, "loss": 0.0722, "step": 5343 }, { "epoch": 0.7884913316119513, "grad_norm": 0.8042226433753967, "learning_rate": 2.6027704476038997e-06, "loss": 0.0164, "step": 5344 }, { "epoch": 0.7886388786425673, "grad_norm": 3.878230333328247, "learning_rate": 2.5993052642040272e-06, "loss": 0.0614, "step": 5345 }, { "epoch": 0.7887864256731834, "grad_norm": 5.761648178100586, "learning_rate": 2.5958420444129106e-06, "loss": 0.0831, "step": 5346 }, { "epoch": 0.7889339727037994, "grad_norm": 1.7258800268173218, "learning_rate": 2.5923807891494344e-06, "loss": 0.0635, "step": 5347 }, { "epoch": 0.7890815197344153, "grad_norm": 1.34648859500885, "learning_rate": 2.58892149933197e-06, "loss": 0.0234, "step": 5348 }, { "epoch": 0.7892290667650314, "grad_norm": 1.4770740270614624, "learning_rate": 2.585464175878358e-06, "loss": 0.0385, "step": 5349 }, { "epoch": 0.7893766137956474, "grad_norm": 1.7273871898651123, "learning_rate": 2.5820088197059257e-06, "loss": 0.0207, "step": 5350 }, { "epoch": 0.7895241608262634, "grad_norm": 2.5592103004455566, "learning_rate": 2.5785554317314743e-06, "loss": 0.055, "step": 5351 }, { "epoch": 0.7896717078568793, "grad_norm": 0.9181654453277588, "learning_rate": 2.57510401287128e-06, "loss": 0.0244, "step": 5352 }, { "epoch": 0.7898192548874954, "grad_norm": 4.085352420806885, "learning_rate": 2.5716545640411038e-06, "loss": 0.0526, "step": 5353 }, { "epoch": 0.7899668019181114, "grad_norm": 2.349663257598877, "learning_rate": 2.5682070861561746e-06, "loss": 0.0454, "step": 5354 }, { "epoch": 0.7901143489487275, "grad_norm": 2.401130199432373, "learning_rate": 2.5647615801312075e-06, "loss": 0.0912, "step": 5355 }, { "epoch": 0.7902618959793434, "grad_norm": 4.645292282104492, "learning_rate": 2.561318046880391e-06, "loss": 0.0439, "step": 5356 }, { "epoch": 0.7904094430099594, "grad_norm": 1.8019828796386719, "learning_rate": 2.557876487317389e-06, "loss": 0.0349, "step": 5357 }, { "epoch": 0.7905569900405754, "grad_norm": 4.21326208114624, "learning_rate": 2.5544369023553374e-06, "loss": 0.0663, "step": 5358 }, { "epoch": 0.7907045370711915, "grad_norm": 2.248645782470703, "learning_rate": 2.5509992929068574e-06, "loss": 0.0488, "step": 5359 }, { "epoch": 0.7908520841018074, "grad_norm": 1.2899450063705444, "learning_rate": 2.5475636598840436e-06, "loss": 0.0311, "step": 5360 }, { "epoch": 0.7908520841018074, "eval_accuracy": 0.9811866859623734, "eval_f1": 0.967741935483871, "eval_loss": 0.05607510730624199, "eval_precision": 0.9898477157360406, "eval_recall": 0.9466019417475728, "eval_runtime": 50.099, "eval_samples_per_second": 5.809, "eval_steps_per_second": 0.2, "step": 5360 }, { "epoch": 0.7909996311324234, "grad_norm": 1.009870171546936, "learning_rate": 2.5441300041984597e-06, "loss": 0.0173, "step": 5361 }, { "epoch": 0.7911471781630395, "grad_norm": 2.5811681747436523, "learning_rate": 2.540698326761155e-06, "loss": 0.0604, "step": 5362 }, { "epoch": 0.7912947251936555, "grad_norm": 1.6870110034942627, "learning_rate": 2.537268628482641e-06, "loss": 0.0499, "step": 5363 }, { "epoch": 0.7914422722242714, "grad_norm": 3.89064884185791, "learning_rate": 2.5338409102729167e-06, "loss": 0.0769, "step": 5364 }, { "epoch": 0.7915898192548875, "grad_norm": 2.933382987976074, "learning_rate": 2.5304151730414515e-06, "loss": 0.1065, "step": 5365 }, { "epoch": 0.7917373662855035, "grad_norm": 2.460965156555176, "learning_rate": 2.5269914176971833e-06, "loss": 0.0238, "step": 5366 }, { "epoch": 0.7918849133161195, "grad_norm": 1.991490364074707, "learning_rate": 2.5235696451485338e-06, "loss": 0.04, "step": 5367 }, { "epoch": 0.7920324603467355, "grad_norm": 1.4925801753997803, "learning_rate": 2.520149856303389e-06, "loss": 0.0355, "step": 5368 }, { "epoch": 0.7921800073773515, "grad_norm": 3.1280343532562256, "learning_rate": 2.516732052069115e-06, "loss": 0.0649, "step": 5369 }, { "epoch": 0.7923275544079675, "grad_norm": 1.5759592056274414, "learning_rate": 2.5133162333525542e-06, "loss": 0.0257, "step": 5370 }, { "epoch": 0.7924751014385836, "grad_norm": 2.7479288578033447, "learning_rate": 2.5099024010600136e-06, "loss": 0.0994, "step": 5371 }, { "epoch": 0.7926226484691996, "grad_norm": 1.898484230041504, "learning_rate": 2.506490556097275e-06, "loss": 0.0695, "step": 5372 }, { "epoch": 0.7927701954998155, "grad_norm": 5.363552093505859, "learning_rate": 2.503080699369598e-06, "loss": 0.0814, "step": 5373 }, { "epoch": 0.7929177425304316, "grad_norm": 4.32010555267334, "learning_rate": 2.499672831781714e-06, "loss": 0.061, "step": 5374 }, { "epoch": 0.7930652895610476, "grad_norm": 3.316005229949951, "learning_rate": 2.4962669542378192e-06, "loss": 0.0392, "step": 5375 }, { "epoch": 0.7932128365916636, "grad_norm": 1.608512282371521, "learning_rate": 2.4928630676415934e-06, "loss": 0.0159, "step": 5376 }, { "epoch": 0.7933603836222796, "grad_norm": 3.7987148761749268, "learning_rate": 2.489461172896174e-06, "loss": 0.0827, "step": 5377 }, { "epoch": 0.7935079306528956, "grad_norm": 2.835662603378296, "learning_rate": 2.486061270904182e-06, "loss": 0.0588, "step": 5378 }, { "epoch": 0.7936554776835116, "grad_norm": 3.811069965362549, "learning_rate": 2.4826633625677076e-06, "loss": 0.0465, "step": 5379 }, { "epoch": 0.7938030247141277, "grad_norm": 6.750401973724365, "learning_rate": 2.479267448788305e-06, "loss": 0.0943, "step": 5380 }, { "epoch": 0.7938030247141277, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9625935162094763, "eval_loss": 0.05801532045006752, "eval_precision": 0.9897435897435898, "eval_recall": 0.9368932038834952, "eval_runtime": 50.7029, "eval_samples_per_second": 5.739, "eval_steps_per_second": 0.197, "step": 5380 }, { "epoch": 0.7939505717447436, "grad_norm": 1.1002846956253052, "learning_rate": 2.475873530467008e-06, "loss": 0.0337, "step": 5381 }, { "epoch": 0.7940981187753596, "grad_norm": 1.5650755167007446, "learning_rate": 2.4724816085043122e-06, "loss": 0.0179, "step": 5382 }, { "epoch": 0.7942456658059757, "grad_norm": 0.6831604838371277, "learning_rate": 2.4690916838001922e-06, "loss": 0.0105, "step": 5383 }, { "epoch": 0.7943932128365917, "grad_norm": 3.756869077682495, "learning_rate": 2.46570375725409e-06, "loss": 0.0926, "step": 5384 }, { "epoch": 0.7945407598672076, "grad_norm": 2.83058500289917, "learning_rate": 2.4623178297649096e-06, "loss": 0.1012, "step": 5385 }, { "epoch": 0.7946883068978237, "grad_norm": 3.7224292755126953, "learning_rate": 2.4589339022310386e-06, "loss": 0.0692, "step": 5386 }, { "epoch": 0.7948358539284397, "grad_norm": 1.9389256238937378, "learning_rate": 2.45555197555032e-06, "loss": 0.0629, "step": 5387 }, { "epoch": 0.7949834009590557, "grad_norm": 5.5249738693237305, "learning_rate": 2.452172050620075e-06, "loss": 0.1869, "step": 5388 }, { "epoch": 0.7951309479896717, "grad_norm": 1.9063117504119873, "learning_rate": 2.4487941283370954e-06, "loss": 0.0326, "step": 5389 }, { "epoch": 0.7952784950202877, "grad_norm": 2.4969637393951416, "learning_rate": 2.445418209597632e-06, "loss": 0.0569, "step": 5390 }, { "epoch": 0.7954260420509037, "grad_norm": 1.2057452201843262, "learning_rate": 2.442044295297409e-06, "loss": 0.0214, "step": 5391 }, { "epoch": 0.7955735890815198, "grad_norm": 2.613948106765747, "learning_rate": 2.43867238633162e-06, "loss": 0.0917, "step": 5392 }, { "epoch": 0.7957211361121358, "grad_norm": 1.5845582485198975, "learning_rate": 2.435302483594928e-06, "loss": 0.0626, "step": 5393 }, { "epoch": 0.7958686831427517, "grad_norm": 2.9313783645629883, "learning_rate": 2.4319345879814572e-06, "loss": 0.0397, "step": 5394 }, { "epoch": 0.7960162301733678, "grad_norm": 4.550658226013184, "learning_rate": 2.428568700384809e-06, "loss": 0.0916, "step": 5395 }, { "epoch": 0.7961637772039838, "grad_norm": 0.9189959764480591, "learning_rate": 2.4252048216980395e-06, "loss": 0.0205, "step": 5396 }, { "epoch": 0.7963113242345998, "grad_norm": 1.97365140914917, "learning_rate": 2.4218429528136834e-06, "loss": 0.0289, "step": 5397 }, { "epoch": 0.7964588712652158, "grad_norm": 4.259189605712891, "learning_rate": 2.4184830946237382e-06, "loss": 0.0488, "step": 5398 }, { "epoch": 0.7966064182958318, "grad_norm": 3.0501973628997803, "learning_rate": 2.415125248019662e-06, "loss": 0.1202, "step": 5399 }, { "epoch": 0.7967539653264478, "grad_norm": 1.3943859338760376, "learning_rate": 2.4117694138923897e-06, "loss": 0.0458, "step": 5400 }, { "epoch": 0.7967539653264478, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9627791563275434, "eval_loss": 0.05791114270687103, "eval_precision": 0.9847715736040609, "eval_recall": 0.941747572815534, "eval_runtime": 51.2864, "eval_samples_per_second": 5.674, "eval_steps_per_second": 0.195, "step": 5400 }, { "epoch": 0.7969015123570639, "grad_norm": 1.2007352113723755, "learning_rate": 2.408415593132312e-06, "loss": 0.017, "step": 5401 }, { "epoch": 0.7970490593876798, "grad_norm": 3.607623338699341, "learning_rate": 2.4050637866292923e-06, "loss": 0.0923, "step": 5402 }, { "epoch": 0.7971966064182958, "grad_norm": 1.9466493129730225, "learning_rate": 2.401713995272661e-06, "loss": 0.0218, "step": 5403 }, { "epoch": 0.7973441534489119, "grad_norm": 2.285327672958374, "learning_rate": 2.398366219951207e-06, "loss": 0.0657, "step": 5404 }, { "epoch": 0.7974917004795279, "grad_norm": 4.971370220184326, "learning_rate": 2.395020461553185e-06, "loss": 0.0864, "step": 5405 }, { "epoch": 0.7976392475101438, "grad_norm": 2.9141769409179688, "learning_rate": 2.3916767209663195e-06, "loss": 0.061, "step": 5406 }, { "epoch": 0.7977867945407598, "grad_norm": 1.888851523399353, "learning_rate": 2.3883349990778005e-06, "loss": 0.0386, "step": 5407 }, { "epoch": 0.7979343415713759, "grad_norm": 4.009326457977295, "learning_rate": 2.3849952967742727e-06, "loss": 0.0952, "step": 5408 }, { "epoch": 0.7980818886019919, "grad_norm": 2.857234239578247, "learning_rate": 2.381657614941858e-06, "loss": 0.0881, "step": 5409 }, { "epoch": 0.7982294356326078, "grad_norm": 4.285737991333008, "learning_rate": 2.3783219544661294e-06, "loss": 0.1084, "step": 5410 }, { "epoch": 0.7983769826632239, "grad_norm": 1.116903305053711, "learning_rate": 2.374988316232132e-06, "loss": 0.0141, "step": 5411 }, { "epoch": 0.7985245296938399, "grad_norm": 0.8273770809173584, "learning_rate": 2.3716567011243762e-06, "loss": 0.0172, "step": 5412 }, { "epoch": 0.798672076724456, "grad_norm": 4.864143371582031, "learning_rate": 2.3683271100268257e-06, "loss": 0.0996, "step": 5413 }, { "epoch": 0.798819623755072, "grad_norm": 3.367530584335327, "learning_rate": 2.364999543822919e-06, "loss": 0.059, "step": 5414 }, { "epoch": 0.7989671707856879, "grad_norm": 1.7367578744888306, "learning_rate": 2.3616740033955466e-06, "loss": 0.0689, "step": 5415 }, { "epoch": 0.7991147178163039, "grad_norm": 2.719728708267212, "learning_rate": 2.3583504896270627e-06, "loss": 0.0893, "step": 5416 }, { "epoch": 0.79926226484692, "grad_norm": 1.859072208404541, "learning_rate": 2.3550290033992974e-06, "loss": 0.0616, "step": 5417 }, { "epoch": 0.799409811877536, "grad_norm": 2.1265299320220947, "learning_rate": 2.3517095455935267e-06, "loss": 0.0355, "step": 5418 }, { "epoch": 0.7995573589081519, "grad_norm": 2.4309635162353516, "learning_rate": 2.348392117090493e-06, "loss": 0.0311, "step": 5419 }, { "epoch": 0.799704905938768, "grad_norm": 2.0215108394622803, "learning_rate": 2.345076718770404e-06, "loss": 0.0744, "step": 5420 }, { "epoch": 0.799704905938768, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9651741293532339, "eval_loss": 0.05763740837574005, "eval_precision": 0.9897959183673469, "eval_recall": 0.941747572815534, "eval_runtime": 50.5402, "eval_samples_per_second": 5.758, "eval_steps_per_second": 0.198, "step": 5420 }, { "epoch": 0.799852452969384, "grad_norm": 3.9636318683624268, "learning_rate": 2.341763351512929e-06, "loss": 0.1156, "step": 5421 }, { "epoch": 0.8, "grad_norm": 2.876650810241699, "learning_rate": 2.3384520161971903e-06, "loss": 0.1039, "step": 5422 }, { "epoch": 0.800147547030616, "grad_norm": 3.375288248062134, "learning_rate": 2.335142713701782e-06, "loss": 0.067, "step": 5423 }, { "epoch": 0.800295094061232, "grad_norm": 1.431705355644226, "learning_rate": 2.3318354449047486e-06, "loss": 0.0308, "step": 5424 }, { "epoch": 0.800442641091848, "grad_norm": 1.4194387197494507, "learning_rate": 2.328530210683603e-06, "loss": 0.0223, "step": 5425 }, { "epoch": 0.8005901881224641, "grad_norm": 2.721086263656616, "learning_rate": 2.3252270119153165e-06, "loss": 0.1125, "step": 5426 }, { "epoch": 0.80073773515308, "grad_norm": 2.3908205032348633, "learning_rate": 2.321925849476314e-06, "loss": 0.067, "step": 5427 }, { "epoch": 0.800885282183696, "grad_norm": 1.74275803565979, "learning_rate": 2.318626724242491e-06, "loss": 0.0751, "step": 5428 }, { "epoch": 0.8010328292143121, "grad_norm": 2.4928338527679443, "learning_rate": 2.315329637089193e-06, "loss": 0.0475, "step": 5429 }, { "epoch": 0.8011803762449281, "grad_norm": 2.3432343006134033, "learning_rate": 2.3120345888912245e-06, "loss": 0.0922, "step": 5430 }, { "epoch": 0.801327923275544, "grad_norm": 3.3698785305023193, "learning_rate": 2.308741580522862e-06, "loss": 0.0574, "step": 5431 }, { "epoch": 0.8014754703061601, "grad_norm": 3.8240702152252197, "learning_rate": 2.305450612857827e-06, "loss": 0.049, "step": 5432 }, { "epoch": 0.8016230173367761, "grad_norm": 3.203747510910034, "learning_rate": 2.302161686769302e-06, "loss": 0.1235, "step": 5433 }, { "epoch": 0.8017705643673921, "grad_norm": 2.2558703422546387, "learning_rate": 2.2988748031299335e-06, "loss": 0.0375, "step": 5434 }, { "epoch": 0.8019181113980081, "grad_norm": 3.6345064640045166, "learning_rate": 2.2955899628118183e-06, "loss": 0.0836, "step": 5435 }, { "epoch": 0.8020656584286241, "grad_norm": 1.8519630432128906, "learning_rate": 2.292307166686518e-06, "loss": 0.0356, "step": 5436 }, { "epoch": 0.8022132054592401, "grad_norm": 2.69828462600708, "learning_rate": 2.289026415625052e-06, "loss": 0.0606, "step": 5437 }, { "epoch": 0.8023607524898562, "grad_norm": 2.2864813804626465, "learning_rate": 2.2857477104978877e-06, "loss": 0.0189, "step": 5438 }, { "epoch": 0.8025082995204722, "grad_norm": 1.0424892902374268, "learning_rate": 2.2824710521749604e-06, "loss": 0.0159, "step": 5439 }, { "epoch": 0.8026558465510881, "grad_norm": 3.116734027862549, "learning_rate": 2.2791964415256596e-06, "loss": 0.1293, "step": 5440 }, { "epoch": 0.8026558465510881, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9653465346534653, "eval_loss": 0.05615336447954178, "eval_precision": 0.9848484848484849, "eval_recall": 0.9466019417475728, "eval_runtime": 50.6315, "eval_samples_per_second": 5.747, "eval_steps_per_second": 0.198, "step": 5440 }, { "epoch": 0.8028033935817042, "grad_norm": 1.384605884552002, "learning_rate": 2.2759238794188243e-06, "loss": 0.0401, "step": 5441 }, { "epoch": 0.8029509406123202, "grad_norm": 1.5033726692199707, "learning_rate": 2.2726533667227625e-06, "loss": 0.0378, "step": 5442 }, { "epoch": 0.8030984876429362, "grad_norm": 2.265686273574829, "learning_rate": 2.2693849043052284e-06, "loss": 0.0602, "step": 5443 }, { "epoch": 0.8032460346735522, "grad_norm": 1.1287288665771484, "learning_rate": 2.266118493033429e-06, "loss": 0.0287, "step": 5444 }, { "epoch": 0.8033935817041682, "grad_norm": 3.111320734024048, "learning_rate": 2.262854133774044e-06, "loss": 0.0235, "step": 5445 }, { "epoch": 0.8035411287347842, "grad_norm": 3.1071579456329346, "learning_rate": 2.259591827393195e-06, "loss": 0.04, "step": 5446 }, { "epoch": 0.8036886757654003, "grad_norm": 1.9131447076797485, "learning_rate": 2.2563315747564575e-06, "loss": 0.0381, "step": 5447 }, { "epoch": 0.8038362227960162, "grad_norm": 2.323465347290039, "learning_rate": 2.2530733767288715e-06, "loss": 0.0997, "step": 5448 }, { "epoch": 0.8039837698266322, "grad_norm": 3.0455734729766846, "learning_rate": 2.2498172341749234e-06, "loss": 0.0766, "step": 5449 }, { "epoch": 0.8041313168572483, "grad_norm": 1.7361172437667847, "learning_rate": 2.2465631479585593e-06, "loss": 0.0266, "step": 5450 }, { "epoch": 0.8042788638878643, "grad_norm": 3.0005178451538086, "learning_rate": 2.24331111894318e-06, "loss": 0.1193, "step": 5451 }, { "epoch": 0.8044264109184802, "grad_norm": 2.052396297454834, "learning_rate": 2.2400611479916356e-06, "loss": 0.0424, "step": 5452 }, { "epoch": 0.8045739579490963, "grad_norm": 2.114999532699585, "learning_rate": 2.2368132359662365e-06, "loss": 0.0682, "step": 5453 }, { "epoch": 0.8047215049797123, "grad_norm": 2.3181893825531006, "learning_rate": 2.2335673837287385e-06, "loss": 0.0669, "step": 5454 }, { "epoch": 0.8048690520103283, "grad_norm": 1.6198588609695435, "learning_rate": 2.230323592140359e-06, "loss": 0.0739, "step": 5455 }, { "epoch": 0.8050165990409442, "grad_norm": 2.042635917663574, "learning_rate": 2.2270818620617685e-06, "loss": 0.0469, "step": 5456 }, { "epoch": 0.8051641460715603, "grad_norm": 2.2596940994262695, "learning_rate": 2.223842194353084e-06, "loss": 0.0553, "step": 5457 }, { "epoch": 0.8053116931021763, "grad_norm": 2.0843050479888916, "learning_rate": 2.2206045898738737e-06, "loss": 0.0287, "step": 5458 }, { "epoch": 0.8054592401327924, "grad_norm": 1.1255006790161133, "learning_rate": 2.2173690494831755e-06, "loss": 0.0296, "step": 5459 }, { "epoch": 0.8056067871634084, "grad_norm": 1.4823483228683472, "learning_rate": 2.21413557403946e-06, "loss": 0.0526, "step": 5460 }, { "epoch": 0.8056067871634084, "eval_accuracy": 0.9811866859623734, "eval_f1": 0.967741935483871, "eval_loss": 0.05573568865656853, "eval_precision": 0.9898477157360406, "eval_recall": 0.9466019417475728, "eval_runtime": 49.1803, "eval_samples_per_second": 5.917, "eval_steps_per_second": 0.203, "step": 5460 }, { "epoch": 0.8057543341940243, "grad_norm": 4.884945869445801, "learning_rate": 2.210904164400657e-06, "loss": 0.1202, "step": 5461 }, { "epoch": 0.8059018812246403, "grad_norm": 2.093738079071045, "learning_rate": 2.2076748214241527e-06, "loss": 0.0398, "step": 5462 }, { "epoch": 0.8060494282552564, "grad_norm": 3.1090734004974365, "learning_rate": 2.204447545966776e-06, "loss": 0.0783, "step": 5463 }, { "epoch": 0.8061969752858724, "grad_norm": 2.0073373317718506, "learning_rate": 2.2012223388848154e-06, "loss": 0.0247, "step": 5464 }, { "epoch": 0.8063445223164883, "grad_norm": 4.456292152404785, "learning_rate": 2.19799920103401e-06, "loss": 0.0691, "step": 5465 }, { "epoch": 0.8064920693471044, "grad_norm": 1.8321577310562134, "learning_rate": 2.1947781332695406e-06, "loss": 0.0731, "step": 5466 }, { "epoch": 0.8066396163777204, "grad_norm": 3.1030261516571045, "learning_rate": 2.191559136446053e-06, "loss": 0.1024, "step": 5467 }, { "epoch": 0.8067871634083364, "grad_norm": 4.386995792388916, "learning_rate": 2.188342211417629e-06, "loss": 0.1063, "step": 5468 }, { "epoch": 0.8069347104389524, "grad_norm": 2.436401128768921, "learning_rate": 2.185127359037811e-06, "loss": 0.0761, "step": 5469 }, { "epoch": 0.8070822574695684, "grad_norm": 1.8892295360565186, "learning_rate": 2.181914580159591e-06, "loss": 0.082, "step": 5470 }, { "epoch": 0.8072298045001844, "grad_norm": 1.5109373331069946, "learning_rate": 2.178703875635405e-06, "loss": 0.022, "step": 5471 }, { "epoch": 0.8073773515308005, "grad_norm": 3.947361946105957, "learning_rate": 2.1754952463171385e-06, "loss": 0.0493, "step": 5472 }, { "epoch": 0.8075248985614164, "grad_norm": 0.4869849681854248, "learning_rate": 2.1722886930561328e-06, "loss": 0.0065, "step": 5473 }, { "epoch": 0.8076724455920324, "grad_norm": 1.2354382276535034, "learning_rate": 2.169084216703178e-06, "loss": 0.0268, "step": 5474 }, { "epoch": 0.8078199926226485, "grad_norm": 0.9162119030952454, "learning_rate": 2.165881818108504e-06, "loss": 0.0383, "step": 5475 }, { "epoch": 0.8079675396532645, "grad_norm": 0.9065362811088562, "learning_rate": 2.1626814981218013e-06, "loss": 0.0193, "step": 5476 }, { "epoch": 0.8081150866838804, "grad_norm": 1.2631582021713257, "learning_rate": 2.159483257592199e-06, "loss": 0.036, "step": 5477 }, { "epoch": 0.8082626337144965, "grad_norm": 1.5097599029541016, "learning_rate": 2.1562870973682794e-06, "loss": 0.0254, "step": 5478 }, { "epoch": 0.8084101807451125, "grad_norm": 1.760907530784607, "learning_rate": 2.1530930182980756e-06, "loss": 0.0467, "step": 5479 }, { "epoch": 0.8085577277757285, "grad_norm": 1.9933987855911255, "learning_rate": 2.1499010212290595e-06, "loss": 0.0609, "step": 5480 }, { "epoch": 0.8085577277757285, "eval_accuracy": 0.9811866859623734, "eval_f1": 0.967741935483871, "eval_loss": 0.05714625120162964, "eval_precision": 0.9898477157360406, "eval_recall": 0.9466019417475728, "eval_runtime": 49.8495, "eval_samples_per_second": 5.838, "eval_steps_per_second": 0.201, "step": 5480 }, { "epoch": 0.8087052748063446, "grad_norm": 2.1747310161590576, "learning_rate": 2.1467111070081627e-06, "loss": 0.067, "step": 5481 }, { "epoch": 0.8088528218369605, "grad_norm": 1.3904106616973877, "learning_rate": 2.143523276481749e-06, "loss": 0.0485, "step": 5482 }, { "epoch": 0.8090003688675765, "grad_norm": 3.125833511352539, "learning_rate": 2.140337530495645e-06, "loss": 0.0531, "step": 5483 }, { "epoch": 0.8091479158981926, "grad_norm": 2.7471671104431152, "learning_rate": 2.1371538698951168e-06, "loss": 0.0858, "step": 5484 }, { "epoch": 0.8092954629288086, "grad_norm": 3.0611491203308105, "learning_rate": 2.133972295524875e-06, "loss": 0.0437, "step": 5485 }, { "epoch": 0.8094430099594245, "grad_norm": 3.135230541229248, "learning_rate": 2.1307928082290773e-06, "loss": 0.092, "step": 5486 }, { "epoch": 0.8095905569900406, "grad_norm": 5.204007148742676, "learning_rate": 2.1276154088513322e-06, "loss": 0.0534, "step": 5487 }, { "epoch": 0.8097381040206566, "grad_norm": 1.881319284439087, "learning_rate": 2.1244400982346945e-06, "loss": 0.0409, "step": 5488 }, { "epoch": 0.8098856510512726, "grad_norm": 2.79185152053833, "learning_rate": 2.121266877221656e-06, "loss": 0.011, "step": 5489 }, { "epoch": 0.8100331980818886, "grad_norm": 1.5726232528686523, "learning_rate": 2.118095746654164e-06, "loss": 0.0431, "step": 5490 }, { "epoch": 0.8101807451125046, "grad_norm": 3.3432977199554443, "learning_rate": 2.1149267073736036e-06, "loss": 0.0426, "step": 5491 }, { "epoch": 0.8103282921431206, "grad_norm": 2.675602674484253, "learning_rate": 2.1117597602208105e-06, "loss": 0.0705, "step": 5492 }, { "epoch": 0.8104758391737367, "grad_norm": 2.9666032791137695, "learning_rate": 2.1085949060360654e-06, "loss": 0.1274, "step": 5493 }, { "epoch": 0.8106233862043526, "grad_norm": 1.940152883529663, "learning_rate": 2.1054321456590863e-06, "loss": 0.0356, "step": 5494 }, { "epoch": 0.8107709332349686, "grad_norm": 1.9591424465179443, "learning_rate": 2.102271479929048e-06, "loss": 0.0379, "step": 5495 }, { "epoch": 0.8109184802655847, "grad_norm": 2.405116319656372, "learning_rate": 2.0991129096845543e-06, "loss": 0.0452, "step": 5496 }, { "epoch": 0.8110660272962007, "grad_norm": 1.4649170637130737, "learning_rate": 2.0959564357636654e-06, "loss": 0.063, "step": 5497 }, { "epoch": 0.8112135743268166, "grad_norm": 2.354795455932617, "learning_rate": 2.0928020590038845e-06, "loss": 0.0789, "step": 5498 }, { "epoch": 0.8113611213574327, "grad_norm": 2.549468994140625, "learning_rate": 2.0896497802421477e-06, "loss": 0.0693, "step": 5499 }, { "epoch": 0.8115086683880487, "grad_norm": 1.6205826997756958, "learning_rate": 2.0864996003148473e-06, "loss": 0.0684, "step": 5500 }, { "epoch": 0.8115086683880487, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.9601990049751243, "eval_loss": 0.05832415819168091, "eval_precision": 0.9846938775510204, "eval_recall": 0.9368932038834952, "eval_runtime": 48.9014, "eval_samples_per_second": 5.951, "eval_steps_per_second": 0.204, "step": 5500 }, { "epoch": 0.8116562154186647, "grad_norm": 1.650113582611084, "learning_rate": 2.0833515200578093e-06, "loss": 0.0287, "step": 5501 }, { "epoch": 0.8118037624492808, "grad_norm": 1.5239334106445312, "learning_rate": 2.0802055403063072e-06, "loss": 0.0319, "step": 5502 }, { "epoch": 0.8119513094798967, "grad_norm": 2.426981210708618, "learning_rate": 2.0770616618950603e-06, "loss": 0.0783, "step": 5503 }, { "epoch": 0.8120988565105127, "grad_norm": 4.627350807189941, "learning_rate": 2.073919885658223e-06, "loss": 0.1236, "step": 5504 }, { "epoch": 0.8122464035411288, "grad_norm": 2.5734071731567383, "learning_rate": 2.070780212429393e-06, "loss": 0.0757, "step": 5505 }, { "epoch": 0.8123939505717448, "grad_norm": 1.922461986541748, "learning_rate": 2.067642643041614e-06, "loss": 0.0277, "step": 5506 }, { "epoch": 0.8125414976023607, "grad_norm": 1.8315447568893433, "learning_rate": 2.064507178327374e-06, "loss": 0.0923, "step": 5507 }, { "epoch": 0.8126890446329768, "grad_norm": 2.1510634422302246, "learning_rate": 2.061373819118593e-06, "loss": 0.0572, "step": 5508 }, { "epoch": 0.8128365916635928, "grad_norm": 2.1236438751220703, "learning_rate": 2.058242566246642e-06, "loss": 0.0778, "step": 5509 }, { "epoch": 0.8129841386942088, "grad_norm": 1.2244946956634521, "learning_rate": 2.055113420542324e-06, "loss": 0.0172, "step": 5510 }, { "epoch": 0.8131316857248247, "grad_norm": 1.8886568546295166, "learning_rate": 2.0519863828358898e-06, "loss": 0.0278, "step": 5511 }, { "epoch": 0.8132792327554408, "grad_norm": 0.7750194668769836, "learning_rate": 2.0488614539570327e-06, "loss": 0.023, "step": 5512 }, { "epoch": 0.8134267797860568, "grad_norm": 2.702937602996826, "learning_rate": 2.0457386347348772e-06, "loss": 0.048, "step": 5513 }, { "epoch": 0.8135743268166729, "grad_norm": 3.9338552951812744, "learning_rate": 2.0426179259979995e-06, "loss": 0.0541, "step": 5514 }, { "epoch": 0.8137218738472888, "grad_norm": 3.6060047149658203, "learning_rate": 2.0394993285744025e-06, "loss": 0.1098, "step": 5515 }, { "epoch": 0.8138694208779048, "grad_norm": 1.3368220329284668, "learning_rate": 2.03638284329154e-06, "loss": 0.0307, "step": 5516 }, { "epoch": 0.8140169679085208, "grad_norm": 2.128084182739258, "learning_rate": 2.033268470976304e-06, "loss": 0.0678, "step": 5517 }, { "epoch": 0.8141645149391369, "grad_norm": 1.9943575859069824, "learning_rate": 2.030156212455021e-06, "loss": 0.0531, "step": 5518 }, { "epoch": 0.8143120619697528, "grad_norm": 4.4735565185546875, "learning_rate": 2.0270460685534577e-06, "loss": 0.0871, "step": 5519 }, { "epoch": 0.8144596090003688, "grad_norm": 2.26501202583313, "learning_rate": 2.023938040096821e-06, "loss": 0.0579, "step": 5520 }, { "epoch": 0.8144596090003688, "eval_accuracy": 0.9768451519536903, "eval_f1": 0.9601990049751243, "eval_loss": 0.05940677970647812, "eval_precision": 0.9846938775510204, "eval_recall": 0.9368932038834952, "eval_runtime": 50.9723, "eval_samples_per_second": 5.709, "eval_steps_per_second": 0.196, "step": 5520 }, { "epoch": 0.8146071560309849, "grad_norm": 1.4533849954605103, "learning_rate": 2.0208321279097633e-06, "loss": 0.0284, "step": 5521 }, { "epoch": 0.8147547030616009, "grad_norm": 4.059301376342773, "learning_rate": 2.01772833281636e-06, "loss": 0.1158, "step": 5522 }, { "epoch": 0.8149022500922168, "grad_norm": 0.8373374342918396, "learning_rate": 2.0146266556401405e-06, "loss": 0.0093, "step": 5523 }, { "epoch": 0.8150497971228329, "grad_norm": 2.7619588375091553, "learning_rate": 2.01152709720406e-06, "loss": 0.0603, "step": 5524 }, { "epoch": 0.8151973441534489, "grad_norm": 0.9868106245994568, "learning_rate": 2.00842965833052e-06, "loss": 0.0348, "step": 5525 }, { "epoch": 0.815344891184065, "grad_norm": 2.8609325885772705, "learning_rate": 2.0053343398413593e-06, "loss": 0.0666, "step": 5526 }, { "epoch": 0.815492438214681, "grad_norm": 3.3137271404266357, "learning_rate": 2.002241142557845e-06, "loss": 0.0442, "step": 5527 }, { "epoch": 0.8156399852452969, "grad_norm": 1.7064261436462402, "learning_rate": 1.999150067300694e-06, "loss": 0.0453, "step": 5528 }, { "epoch": 0.8157875322759129, "grad_norm": 7.342414855957031, "learning_rate": 1.9960611148900512e-06, "loss": 0.0922, "step": 5529 }, { "epoch": 0.815935079306529, "grad_norm": 1.4057012796401978, "learning_rate": 1.992974286145496e-06, "loss": 0.0421, "step": 5530 }, { "epoch": 0.816082626337145, "grad_norm": 2.3584372997283936, "learning_rate": 1.9898895818860587e-06, "loss": 0.0853, "step": 5531 }, { "epoch": 0.8162301733677609, "grad_norm": 3.214398145675659, "learning_rate": 1.986807002930192e-06, "loss": 0.1406, "step": 5532 }, { "epoch": 0.816377720398377, "grad_norm": 2.6488702297210693, "learning_rate": 1.983726550095788e-06, "loss": 0.0736, "step": 5533 }, { "epoch": 0.816525267428993, "grad_norm": 1.6178686618804932, "learning_rate": 1.9806482242001757e-06, "loss": 0.0517, "step": 5534 }, { "epoch": 0.816672814459609, "grad_norm": 1.2991693019866943, "learning_rate": 1.977572026060126e-06, "loss": 0.0266, "step": 5535 }, { "epoch": 0.816820361490225, "grad_norm": 3.655134916305542, "learning_rate": 1.974497956491831e-06, "loss": 0.1264, "step": 5536 }, { "epoch": 0.816967908520841, "grad_norm": 2.377314805984497, "learning_rate": 1.9714260163109333e-06, "loss": 0.0421, "step": 5537 }, { "epoch": 0.817115455551457, "grad_norm": 1.8061245679855347, "learning_rate": 1.9683562063324967e-06, "loss": 0.0468, "step": 5538 }, { "epoch": 0.8172630025820731, "grad_norm": 5.690897464752197, "learning_rate": 1.965288527371031e-06, "loss": 0.0458, "step": 5539 }, { "epoch": 0.817410549612689, "grad_norm": 2.7435295581817627, "learning_rate": 1.962222980240478e-06, "loss": 0.0728, "step": 5540 }, { "epoch": 0.817410549612689, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9625935162094763, "eval_loss": 0.059236492961645126, "eval_precision": 0.9897435897435898, "eval_recall": 0.9368932038834952, "eval_runtime": 49.5566, "eval_samples_per_second": 5.872, "eval_steps_per_second": 0.202, "step": 5540 }, { "epoch": 0.817558096643305, "grad_norm": 2.3198349475860596, "learning_rate": 1.959159565754206e-06, "loss": 0.0756, "step": 5541 }, { "epoch": 0.8177056436739211, "grad_norm": 1.5689759254455566, "learning_rate": 1.956098284725031e-06, "loss": 0.0347, "step": 5542 }, { "epoch": 0.8178531907045371, "grad_norm": 1.8071298599243164, "learning_rate": 1.9530391379651904e-06, "loss": 0.0868, "step": 5543 }, { "epoch": 0.818000737735153, "grad_norm": 2.6697700023651123, "learning_rate": 1.949982126286356e-06, "loss": 0.0863, "step": 5544 }, { "epoch": 0.8181482847657691, "grad_norm": 1.0986168384552002, "learning_rate": 1.9469272504996485e-06, "loss": 0.0148, "step": 5545 }, { "epoch": 0.8182958317963851, "grad_norm": 0.9309797883033752, "learning_rate": 1.9438745114156045e-06, "loss": 0.0265, "step": 5546 }, { "epoch": 0.8184433788270011, "grad_norm": 1.8434813022613525, "learning_rate": 1.9408239098441983e-06, "loss": 0.0643, "step": 5547 }, { "epoch": 0.8185909258576172, "grad_norm": 1.6878859996795654, "learning_rate": 1.937775446594843e-06, "loss": 0.0464, "step": 5548 }, { "epoch": 0.8187384728882331, "grad_norm": 4.890466213226318, "learning_rate": 1.9347291224763763e-06, "loss": 0.1108, "step": 5549 }, { "epoch": 0.8188860199188491, "grad_norm": 2.195685386657715, "learning_rate": 1.931684938297074e-06, "loss": 0.1185, "step": 5550 }, { "epoch": 0.8190335669494652, "grad_norm": 3.9062345027923584, "learning_rate": 1.928642894864644e-06, "loss": 0.0323, "step": 5551 }, { "epoch": 0.8191811139800812, "grad_norm": 2.8789162635803223, "learning_rate": 1.925602992986221e-06, "loss": 0.0452, "step": 5552 }, { "epoch": 0.8193286610106971, "grad_norm": 1.9474332332611084, "learning_rate": 1.9225652334683753e-06, "loss": 0.0828, "step": 5553 }, { "epoch": 0.8194762080413132, "grad_norm": 0.850060224533081, "learning_rate": 1.919529617117114e-06, "loss": 0.0177, "step": 5554 }, { "epoch": 0.8196237550719292, "grad_norm": 3.326565980911255, "learning_rate": 1.9164961447378616e-06, "loss": 0.0562, "step": 5555 }, { "epoch": 0.8197713021025452, "grad_norm": 1.6768985986709595, "learning_rate": 1.91346481713549e-06, "loss": 0.0321, "step": 5556 }, { "epoch": 0.8199188491331612, "grad_norm": 1.6117336750030518, "learning_rate": 1.9104356351142907e-06, "loss": 0.044, "step": 5557 }, { "epoch": 0.8200663961637772, "grad_norm": 1.4517720937728882, "learning_rate": 1.9074085994779825e-06, "loss": 0.0778, "step": 5558 }, { "epoch": 0.8202139431943932, "grad_norm": 2.334791421890259, "learning_rate": 1.9043837110297336e-06, "loss": 0.0569, "step": 5559 }, { "epoch": 0.8203614902250093, "grad_norm": 1.485473871231079, "learning_rate": 1.9013609705721259e-06, "loss": 0.0304, "step": 5560 }, { "epoch": 0.8203614902250093, "eval_accuracy": 0.975397973950796, "eval_f1": 0.9578163771712159, "eval_loss": 0.05924277380108833, "eval_precision": 0.9796954314720813, "eval_recall": 0.9368932038834952, "eval_runtime": 48.5437, "eval_samples_per_second": 5.995, "eval_steps_per_second": 0.206, "step": 5560 }, { "epoch": 0.8205090372556252, "grad_norm": 2.336688280105591, "learning_rate": 1.898340378907172e-06, "loss": 0.0511, "step": 5561 }, { "epoch": 0.8206565842862412, "grad_norm": 2.928649663925171, "learning_rate": 1.895321936836324e-06, "loss": 0.0366, "step": 5562 }, { "epoch": 0.8208041313168573, "grad_norm": 2.8058505058288574, "learning_rate": 1.8923056451604527e-06, "loss": 0.0819, "step": 5563 }, { "epoch": 0.8209516783474733, "grad_norm": 1.5189590454101562, "learning_rate": 1.8892915046798665e-06, "loss": 0.0464, "step": 5564 }, { "epoch": 0.8210992253780892, "grad_norm": 1.5630435943603516, "learning_rate": 1.8862795161943027e-06, "loss": 0.0536, "step": 5565 }, { "epoch": 0.8212467724087053, "grad_norm": 1.761367678642273, "learning_rate": 1.8832696805029194e-06, "loss": 0.0592, "step": 5566 }, { "epoch": 0.8213943194393213, "grad_norm": 1.1648681163787842, "learning_rate": 1.8802619984043147e-06, "loss": 0.0276, "step": 5567 }, { "epoch": 0.8215418664699373, "grad_norm": 3.4482805728912354, "learning_rate": 1.8772564706965036e-06, "loss": 0.1262, "step": 5568 }, { "epoch": 0.8216894135005534, "grad_norm": 3.8894097805023193, "learning_rate": 1.8742530981769402e-06, "loss": 0.0965, "step": 5569 }, { "epoch": 0.8218369605311693, "grad_norm": 3.6397407054901123, "learning_rate": 1.8712518816425029e-06, "loss": 0.0823, "step": 5570 }, { "epoch": 0.8219845075617853, "grad_norm": 4.3590240478515625, "learning_rate": 1.8682528218894958e-06, "loss": 0.0487, "step": 5571 }, { "epoch": 0.8221320545924014, "grad_norm": 3.2629575729370117, "learning_rate": 1.8652559197136467e-06, "loss": 0.0776, "step": 5572 }, { "epoch": 0.8222796016230174, "grad_norm": 2.547247886657715, "learning_rate": 1.8622611759101272e-06, "loss": 0.0513, "step": 5573 }, { "epoch": 0.8224271486536333, "grad_norm": 1.8660508394241333, "learning_rate": 1.8592685912735199e-06, "loss": 0.0333, "step": 5574 }, { "epoch": 0.8225746956842493, "grad_norm": 2.1486570835113525, "learning_rate": 1.856278166597838e-06, "loss": 0.0724, "step": 5575 }, { "epoch": 0.8227222427148654, "grad_norm": 1.9876329898834229, "learning_rate": 1.8532899026765283e-06, "loss": 0.0258, "step": 5576 }, { "epoch": 0.8228697897454814, "grad_norm": 2.840949296951294, "learning_rate": 1.8503038003024565e-06, "loss": 0.0441, "step": 5577 }, { "epoch": 0.8230173367760973, "grad_norm": 4.701907634735107, "learning_rate": 1.8473198602679188e-06, "loss": 0.0877, "step": 5578 }, { "epoch": 0.8231648838067134, "grad_norm": 1.2933789491653442, "learning_rate": 1.8443380833646397e-06, "loss": 0.0188, "step": 5579 }, { "epoch": 0.8233124308373294, "grad_norm": 1.5784660577774048, "learning_rate": 1.8413584703837618e-06, "loss": 0.0519, "step": 5580 }, { "epoch": 0.8233124308373294, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05753292515873909, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 48.9103, "eval_samples_per_second": 5.95, "eval_steps_per_second": 0.204, "step": 5580 }, { "epoch": 0.8234599778679454, "grad_norm": 2.5424628257751465, "learning_rate": 1.8383810221158638e-06, "loss": 0.0899, "step": 5581 }, { "epoch": 0.8236075248985614, "grad_norm": 2.4343628883361816, "learning_rate": 1.8354057393509416e-06, "loss": 0.0477, "step": 5582 }, { "epoch": 0.8237550719291774, "grad_norm": 4.214339256286621, "learning_rate": 1.8324326228784206e-06, "loss": 0.0617, "step": 5583 }, { "epoch": 0.8239026189597934, "grad_norm": 5.240863800048828, "learning_rate": 1.8294616734871528e-06, "loss": 0.1021, "step": 5584 }, { "epoch": 0.8240501659904095, "grad_norm": 3.220684289932251, "learning_rate": 1.8264928919654134e-06, "loss": 0.1246, "step": 5585 }, { "epoch": 0.8241977130210254, "grad_norm": 1.1685765981674194, "learning_rate": 1.8235262791008968e-06, "loss": 0.0272, "step": 5586 }, { "epoch": 0.8243452600516414, "grad_norm": 1.6796470880508423, "learning_rate": 1.8205618356807309e-06, "loss": 0.0302, "step": 5587 }, { "epoch": 0.8244928070822575, "grad_norm": 1.9728130102157593, "learning_rate": 1.8175995624914666e-06, "loss": 0.0499, "step": 5588 }, { "epoch": 0.8246403541128735, "grad_norm": 1.6222174167633057, "learning_rate": 1.814639460319072e-06, "loss": 0.0214, "step": 5589 }, { "epoch": 0.8247879011434894, "grad_norm": 1.2235058546066284, "learning_rate": 1.8116815299489498e-06, "loss": 0.0084, "step": 5590 }, { "epoch": 0.8249354481741055, "grad_norm": 1.5723819732666016, "learning_rate": 1.8087257721659145e-06, "loss": 0.0617, "step": 5591 }, { "epoch": 0.8250829952047215, "grad_norm": 1.5226105451583862, "learning_rate": 1.8057721877542133e-06, "loss": 0.0272, "step": 5592 }, { "epoch": 0.8252305422353375, "grad_norm": 1.7810994386672974, "learning_rate": 1.802820777497516e-06, "loss": 0.0353, "step": 5593 }, { "epoch": 0.8253780892659536, "grad_norm": 1.6320428848266602, "learning_rate": 1.7998715421789093e-06, "loss": 0.0489, "step": 5594 }, { "epoch": 0.8255256362965695, "grad_norm": 1.0215790271759033, "learning_rate": 1.7969244825809096e-06, "loss": 0.0106, "step": 5595 }, { "epoch": 0.8256731833271855, "grad_norm": 2.0916707515716553, "learning_rate": 1.7939795994854504e-06, "loss": 0.0769, "step": 5596 }, { "epoch": 0.8258207303578016, "grad_norm": 2.6067142486572266, "learning_rate": 1.791036893673892e-06, "loss": 0.1043, "step": 5597 }, { "epoch": 0.8259682773884176, "grad_norm": 3.1246252059936523, "learning_rate": 1.7880963659270178e-06, "loss": 0.0861, "step": 5598 }, { "epoch": 0.8261158244190335, "grad_norm": 1.8496789932250977, "learning_rate": 1.7851580170250304e-06, "loss": 0.0478, "step": 5599 }, { "epoch": 0.8262633714496496, "grad_norm": 1.2401660680770874, "learning_rate": 1.7822218477475496e-06, "loss": 0.0091, "step": 5600 }, { "epoch": 0.8262633714496496, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9653465346534653, "eval_loss": 0.05679009482264519, "eval_precision": 0.9848484848484849, "eval_recall": 0.9466019417475728, "eval_runtime": 48.8892, "eval_samples_per_second": 5.952, "eval_steps_per_second": 0.205, "step": 5600 }, { "epoch": 0.8264109184802656, "grad_norm": 1.5956645011901855, "learning_rate": 1.7792878588736262e-06, "loss": 0.0605, "step": 5601 }, { "epoch": 0.8265584655108816, "grad_norm": 2.7995615005493164, "learning_rate": 1.7763560511817313e-06, "loss": 0.0761, "step": 5602 }, { "epoch": 0.8267060125414976, "grad_norm": 3.7268588542938232, "learning_rate": 1.7734264254497492e-06, "loss": 0.0905, "step": 5603 }, { "epoch": 0.8268535595721136, "grad_norm": 1.0305407047271729, "learning_rate": 1.7704989824549955e-06, "loss": 0.019, "step": 5604 }, { "epoch": 0.8270011066027296, "grad_norm": 2.4888436794281006, "learning_rate": 1.7675737229741963e-06, "loss": 0.0588, "step": 5605 }, { "epoch": 0.8271486536333457, "grad_norm": 2.30344557762146, "learning_rate": 1.7646506477835068e-06, "loss": 0.0518, "step": 5606 }, { "epoch": 0.8272962006639616, "grad_norm": 1.7901335954666138, "learning_rate": 1.7617297576585023e-06, "loss": 0.0673, "step": 5607 }, { "epoch": 0.8274437476945776, "grad_norm": 2.1730308532714844, "learning_rate": 1.7588110533741698e-06, "loss": 0.0287, "step": 5608 }, { "epoch": 0.8275912947251937, "grad_norm": 3.6601784229278564, "learning_rate": 1.7558945357049273e-06, "loss": 0.0583, "step": 5609 }, { "epoch": 0.8277388417558097, "grad_norm": 1.5753626823425293, "learning_rate": 1.7529802054246027e-06, "loss": 0.0249, "step": 5610 }, { "epoch": 0.8278863887864256, "grad_norm": 2.235729694366455, "learning_rate": 1.7500680633064515e-06, "loss": 0.0934, "step": 5611 }, { "epoch": 0.8280339358170417, "grad_norm": 1.4514387845993042, "learning_rate": 1.7471581101231472e-06, "loss": 0.0258, "step": 5612 }, { "epoch": 0.8281814828476577, "grad_norm": 1.4143081903457642, "learning_rate": 1.7442503466467785e-06, "loss": 0.0555, "step": 5613 }, { "epoch": 0.8283290298782737, "grad_norm": 2.5379421710968018, "learning_rate": 1.7413447736488531e-06, "loss": 0.0422, "step": 5614 }, { "epoch": 0.8284765769088898, "grad_norm": 3.532888174057007, "learning_rate": 1.7384413919003008e-06, "loss": 0.0462, "step": 5615 }, { "epoch": 0.8286241239395057, "grad_norm": 5.810385227203369, "learning_rate": 1.735540202171474e-06, "loss": 0.09, "step": 5616 }, { "epoch": 0.8287716709701217, "grad_norm": 2.430147647857666, "learning_rate": 1.7326412052321317e-06, "loss": 0.0377, "step": 5617 }, { "epoch": 0.8289192180007378, "grad_norm": 1.4339234828948975, "learning_rate": 1.729744401851463e-06, "loss": 0.019, "step": 5618 }, { "epoch": 0.8290667650313538, "grad_norm": 2.2027747631073, "learning_rate": 1.7268497927980655e-06, "loss": 0.0351, "step": 5619 }, { "epoch": 0.8292143120619697, "grad_norm": 1.9734925031661987, "learning_rate": 1.723957378839961e-06, "loss": 0.0306, "step": 5620 }, { "epoch": 0.8292143120619697, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.0573807954788208, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 48.7321, "eval_samples_per_second": 5.971, "eval_steps_per_second": 0.205, "step": 5620 }, { "epoch": 0.8293618590925858, "grad_norm": 1.7956327199935913, "learning_rate": 1.7210671607445905e-06, "loss": 0.0343, "step": 5621 }, { "epoch": 0.8295094061232018, "grad_norm": 1.7271618843078613, "learning_rate": 1.7181791392788017e-06, "loss": 0.0357, "step": 5622 }, { "epoch": 0.8296569531538178, "grad_norm": 1.1316182613372803, "learning_rate": 1.7152933152088725e-06, "loss": 0.0153, "step": 5623 }, { "epoch": 0.8298045001844337, "grad_norm": 4.591516971588135, "learning_rate": 1.712409689300487e-06, "loss": 0.0781, "step": 5624 }, { "epoch": 0.8299520472150498, "grad_norm": 2.654439687728882, "learning_rate": 1.7095282623187526e-06, "loss": 0.0458, "step": 5625 }, { "epoch": 0.8300995942456658, "grad_norm": 2.633575439453125, "learning_rate": 1.706649035028194e-06, "loss": 0.0396, "step": 5626 }, { "epoch": 0.8302471412762819, "grad_norm": 2.340981960296631, "learning_rate": 1.7037720081927468e-06, "loss": 0.0524, "step": 5627 }, { "epoch": 0.8303946883068978, "grad_norm": 2.159475326538086, "learning_rate": 1.7008971825757669e-06, "loss": 0.0867, "step": 5628 }, { "epoch": 0.8305422353375138, "grad_norm": 2.9038820266723633, "learning_rate": 1.6980245589400234e-06, "loss": 0.0921, "step": 5629 }, { "epoch": 0.8306897823681298, "grad_norm": 8.652804374694824, "learning_rate": 1.6951541380477032e-06, "loss": 0.0816, "step": 5630 }, { "epoch": 0.8308373293987459, "grad_norm": 1.2753736972808838, "learning_rate": 1.6922859206604126e-06, "loss": 0.0314, "step": 5631 }, { "epoch": 0.8309848764293618, "grad_norm": 1.063549280166626, "learning_rate": 1.6894199075391638e-06, "loss": 0.0159, "step": 5632 }, { "epoch": 0.8311324234599778, "grad_norm": 2.372098922729492, "learning_rate": 1.6865560994443886e-06, "loss": 0.0532, "step": 5633 }, { "epoch": 0.8312799704905939, "grad_norm": 1.112519383430481, "learning_rate": 1.683694497135937e-06, "loss": 0.0319, "step": 5634 }, { "epoch": 0.8314275175212099, "grad_norm": 1.4782158136367798, "learning_rate": 1.6808351013730727e-06, "loss": 0.0266, "step": 5635 }, { "epoch": 0.831575064551826, "grad_norm": 2.3959615230560303, "learning_rate": 1.677977912914468e-06, "loss": 0.0283, "step": 5636 }, { "epoch": 0.8317226115824419, "grad_norm": 7.045351982116699, "learning_rate": 1.6751229325182194e-06, "loss": 0.0397, "step": 5637 }, { "epoch": 0.8318701586130579, "grad_norm": 7.252335548400879, "learning_rate": 1.6722701609418257e-06, "loss": 0.0823, "step": 5638 }, { "epoch": 0.8320177056436739, "grad_norm": 1.0566500425338745, "learning_rate": 1.669419598942209e-06, "loss": 0.023, "step": 5639 }, { "epoch": 0.83216525267429, "grad_norm": 0.6527475118637085, "learning_rate": 1.6665712472757057e-06, "loss": 0.0112, "step": 5640 }, { "epoch": 0.83216525267429, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9625935162094763, "eval_loss": 0.0587141215801239, "eval_precision": 0.9897435897435898, "eval_recall": 0.9368932038834952, "eval_runtime": 48.6591, "eval_samples_per_second": 5.98, "eval_steps_per_second": 0.206, "step": 5640 }, { "epoch": 0.8323127997049059, "grad_norm": 1.9920316934585571, "learning_rate": 1.6637251066980554e-06, "loss": 0.0494, "step": 5641 }, { "epoch": 0.8324603467355219, "grad_norm": 5.416084289550781, "learning_rate": 1.660881177964424e-06, "loss": 0.0762, "step": 5642 }, { "epoch": 0.832607893766138, "grad_norm": 4.887792587280273, "learning_rate": 1.6580394618293816e-06, "loss": 0.0516, "step": 5643 }, { "epoch": 0.832755440796754, "grad_norm": 4.527590274810791, "learning_rate": 1.6551999590469091e-06, "loss": 0.0678, "step": 5644 }, { "epoch": 0.8329029878273699, "grad_norm": 1.1300346851348877, "learning_rate": 1.6523626703704155e-06, "loss": 0.0253, "step": 5645 }, { "epoch": 0.833050534857986, "grad_norm": 3.3112246990203857, "learning_rate": 1.6495275965527057e-06, "loss": 0.0288, "step": 5646 }, { "epoch": 0.833198081888602, "grad_norm": 1.675912618637085, "learning_rate": 1.6466947383460007e-06, "loss": 0.0304, "step": 5647 }, { "epoch": 0.833345628919218, "grad_norm": 2.1557228565216064, "learning_rate": 1.6438640965019403e-06, "loss": 0.0278, "step": 5648 }, { "epoch": 0.833493175949834, "grad_norm": 3.1431148052215576, "learning_rate": 1.6410356717715724e-06, "loss": 0.0571, "step": 5649 }, { "epoch": 0.83364072298045, "grad_norm": 1.0991026163101196, "learning_rate": 1.6382094649053525e-06, "loss": 0.033, "step": 5650 }, { "epoch": 0.833788270011066, "grad_norm": 4.678040981292725, "learning_rate": 1.6353854766531552e-06, "loss": 0.1281, "step": 5651 }, { "epoch": 0.8339358170416821, "grad_norm": 3.0620951652526855, "learning_rate": 1.6325637077642598e-06, "loss": 0.0513, "step": 5652 }, { "epoch": 0.834083364072298, "grad_norm": 2.518862724304199, "learning_rate": 1.6297441589873587e-06, "loss": 0.0453, "step": 5653 }, { "epoch": 0.834230911102914, "grad_norm": 1.7291972637176514, "learning_rate": 1.626926831070561e-06, "loss": 0.052, "step": 5654 }, { "epoch": 0.8343784581335301, "grad_norm": 1.7597038745880127, "learning_rate": 1.624111724761377e-06, "loss": 0.0265, "step": 5655 }, { "epoch": 0.8345260051641461, "grad_norm": 3.891979217529297, "learning_rate": 1.6212988408067354e-06, "loss": 0.041, "step": 5656 }, { "epoch": 0.834673552194762, "grad_norm": 3.0513861179351807, "learning_rate": 1.6184881799529695e-06, "loss": 0.0833, "step": 5657 }, { "epoch": 0.8348210992253781, "grad_norm": 1.5519582033157349, "learning_rate": 1.6156797429458227e-06, "loss": 0.0377, "step": 5658 }, { "epoch": 0.8349686462559941, "grad_norm": 0.7139090895652771, "learning_rate": 1.6128735305304588e-06, "loss": 0.0099, "step": 5659 }, { "epoch": 0.8351161932866101, "grad_norm": 0.9821717143058777, "learning_rate": 1.6100695434514379e-06, "loss": 0.0214, "step": 5660 }, { "epoch": 0.8351161932866101, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9651741293532339, "eval_loss": 0.057609084993600845, "eval_precision": 0.9897959183673469, "eval_recall": 0.941747572815534, "eval_runtime": 48.5709, "eval_samples_per_second": 5.991, "eval_steps_per_second": 0.206, "step": 5660 }, { "epoch": 0.8352637403172262, "grad_norm": 1.504457950592041, "learning_rate": 1.607267782452735e-06, "loss": 0.0335, "step": 5661 }, { "epoch": 0.8354112873478421, "grad_norm": 2.8167669773101807, "learning_rate": 1.6044682482777386e-06, "loss": 0.1283, "step": 5662 }, { "epoch": 0.8355588343784581, "grad_norm": 1.8938806056976318, "learning_rate": 1.6016709416692367e-06, "loss": 0.0409, "step": 5663 }, { "epoch": 0.8357063814090742, "grad_norm": 1.4544540643692017, "learning_rate": 1.5988758633694356e-06, "loss": 0.0494, "step": 5664 }, { "epoch": 0.8358539284396902, "grad_norm": 1.8806601762771606, "learning_rate": 1.596083014119948e-06, "loss": 0.0656, "step": 5665 }, { "epoch": 0.8360014754703061, "grad_norm": 2.814997673034668, "learning_rate": 1.5932923946617907e-06, "loss": 0.084, "step": 5666 }, { "epoch": 0.8361490225009222, "grad_norm": 2.6153440475463867, "learning_rate": 1.590504005735396e-06, "loss": 0.0475, "step": 5667 }, { "epoch": 0.8362965695315382, "grad_norm": 4.640908718109131, "learning_rate": 1.5877178480805954e-06, "loss": 0.1177, "step": 5668 }, { "epoch": 0.8364441165621542, "grad_norm": 1.7950429916381836, "learning_rate": 1.5849339224366377e-06, "loss": 0.0437, "step": 5669 }, { "epoch": 0.8365916635927702, "grad_norm": 3.886131525039673, "learning_rate": 1.5821522295421753e-06, "loss": 0.0694, "step": 5670 }, { "epoch": 0.8367392106233862, "grad_norm": 3.2617337703704834, "learning_rate": 1.579372770135267e-06, "loss": 0.1242, "step": 5671 }, { "epoch": 0.8368867576540022, "grad_norm": 1.9004480838775635, "learning_rate": 1.5765955449533743e-06, "loss": 0.0319, "step": 5672 }, { "epoch": 0.8370343046846183, "grad_norm": 1.8157849311828613, "learning_rate": 1.573820554733384e-06, "loss": 0.0164, "step": 5673 }, { "epoch": 0.8371818517152342, "grad_norm": 1.7056431770324707, "learning_rate": 1.5710478002115692e-06, "loss": 0.0316, "step": 5674 }, { "epoch": 0.8373293987458502, "grad_norm": 5.834029674530029, "learning_rate": 1.5682772821236192e-06, "loss": 0.1158, "step": 5675 }, { "epoch": 0.8374769457764663, "grad_norm": 1.8330997228622437, "learning_rate": 1.5655090012046315e-06, "loss": 0.0559, "step": 5676 }, { "epoch": 0.8376244928070823, "grad_norm": 3.6596038341522217, "learning_rate": 1.562742958189104e-06, "loss": 0.0708, "step": 5677 }, { "epoch": 0.8377720398376982, "grad_norm": 1.3743090629577637, "learning_rate": 1.5599791538109466e-06, "loss": 0.023, "step": 5678 }, { "epoch": 0.8379195868683142, "grad_norm": 2.024369716644287, "learning_rate": 1.5572175888034747e-06, "loss": 0.0466, "step": 5679 }, { "epoch": 0.8380671338989303, "grad_norm": 1.4012556076049805, "learning_rate": 1.554458263899402e-06, "loss": 0.02, "step": 5680 }, { "epoch": 0.8380671338989303, "eval_accuracy": 0.9811866859623734, "eval_f1": 0.967741935483871, "eval_loss": 0.05637633427977562, "eval_precision": 0.9898477157360406, "eval_recall": 0.9466019417475728, "eval_runtime": 48.6506, "eval_samples_per_second": 5.981, "eval_steps_per_second": 0.206, "step": 5680 }, { "epoch": 0.8382146809295463, "grad_norm": 2.209381103515625, "learning_rate": 1.5517011798308612e-06, "loss": 0.0758, "step": 5681 }, { "epoch": 0.8383622279601624, "grad_norm": 2.144597291946411, "learning_rate": 1.5489463373293768e-06, "loss": 0.0765, "step": 5682 }, { "epoch": 0.8385097749907783, "grad_norm": 4.175070762634277, "learning_rate": 1.5461937371258862e-06, "loss": 0.1402, "step": 5683 }, { "epoch": 0.8386573220213943, "grad_norm": 3.235828399658203, "learning_rate": 1.5434433799507331e-06, "loss": 0.0699, "step": 5684 }, { "epoch": 0.8388048690520103, "grad_norm": 6.306621551513672, "learning_rate": 1.5406952665336616e-06, "loss": 0.1684, "step": 5685 }, { "epoch": 0.8389524160826264, "grad_norm": 2.249080181121826, "learning_rate": 1.5379493976038185e-06, "loss": 0.0817, "step": 5686 }, { "epoch": 0.8390999631132423, "grad_norm": 4.360023498535156, "learning_rate": 1.535205773889762e-06, "loss": 0.0979, "step": 5687 }, { "epoch": 0.8392475101438583, "grad_norm": 3.345550298690796, "learning_rate": 1.532464396119453e-06, "loss": 0.0952, "step": 5688 }, { "epoch": 0.8393950571744744, "grad_norm": 0.9074431657791138, "learning_rate": 1.529725265020251e-06, "loss": 0.0163, "step": 5689 }, { "epoch": 0.8395426042050904, "grad_norm": 1.0018547773361206, "learning_rate": 1.5269883813189268e-06, "loss": 0.0213, "step": 5690 }, { "epoch": 0.8396901512357063, "grad_norm": 1.0021008253097534, "learning_rate": 1.5242537457416472e-06, "loss": 0.0224, "step": 5691 }, { "epoch": 0.8398376982663224, "grad_norm": 2.912518262863159, "learning_rate": 1.5215213590139888e-06, "loss": 0.1009, "step": 5692 }, { "epoch": 0.8399852452969384, "grad_norm": 1.1571531295776367, "learning_rate": 1.5187912218609313e-06, "loss": 0.0313, "step": 5693 }, { "epoch": 0.8401327923275544, "grad_norm": 2.414637565612793, "learning_rate": 1.516063335006851e-06, "loss": 0.0815, "step": 5694 }, { "epoch": 0.8402803393581704, "grad_norm": 2.117668628692627, "learning_rate": 1.5133376991755366e-06, "loss": 0.0555, "step": 5695 }, { "epoch": 0.8404278863887864, "grad_norm": 2.501960515975952, "learning_rate": 1.5106143150901698e-06, "loss": 0.0639, "step": 5696 }, { "epoch": 0.8405754334194024, "grad_norm": 1.5345666408538818, "learning_rate": 1.5078931834733412e-06, "loss": 0.0571, "step": 5697 }, { "epoch": 0.8407229804500185, "grad_norm": 2.431872844696045, "learning_rate": 1.5051743050470447e-06, "loss": 0.0193, "step": 5698 }, { "epoch": 0.8408705274806344, "grad_norm": 1.7443726062774658, "learning_rate": 1.502457680532673e-06, "loss": 0.0305, "step": 5699 }, { "epoch": 0.8410180745112504, "grad_norm": 3.444777488708496, "learning_rate": 1.4997433106510185e-06, "loss": 0.0866, "step": 5700 }, { "epoch": 0.8410180745112504, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9653465346534653, "eval_loss": 0.0559396967291832, "eval_precision": 0.9848484848484849, "eval_recall": 0.9466019417475728, "eval_runtime": 49.9431, "eval_samples_per_second": 5.827, "eval_steps_per_second": 0.2, "step": 5700 }, { "epoch": 0.8411656215418665, "grad_norm": 2.1464152336120605, "learning_rate": 1.4970311961222795e-06, "loss": 0.0256, "step": 5701 }, { "epoch": 0.8413131685724825, "grad_norm": 1.440287709236145, "learning_rate": 1.4943213376660593e-06, "loss": 0.0346, "step": 5702 }, { "epoch": 0.8414607156030985, "grad_norm": 4.759016036987305, "learning_rate": 1.4916137360013515e-06, "loss": 0.1046, "step": 5703 }, { "epoch": 0.8416082626337145, "grad_norm": 1.751717448234558, "learning_rate": 1.488908391846564e-06, "loss": 0.0309, "step": 5704 }, { "epoch": 0.8417558096643305, "grad_norm": 2.9776530265808105, "learning_rate": 1.4862053059194936e-06, "loss": 0.0618, "step": 5705 }, { "epoch": 0.8419033566949465, "grad_norm": 4.0823564529418945, "learning_rate": 1.4835044789373476e-06, "loss": 0.0832, "step": 5706 }, { "epoch": 0.8420509037255626, "grad_norm": 2.550696849822998, "learning_rate": 1.4808059116167306e-06, "loss": 0.0437, "step": 5707 }, { "epoch": 0.8421984507561785, "grad_norm": 1.0314558744430542, "learning_rate": 1.4781096046736421e-06, "loss": 0.0139, "step": 5708 }, { "epoch": 0.8423459977867945, "grad_norm": 2.7498323917388916, "learning_rate": 1.475415558823493e-06, "loss": 0.0488, "step": 5709 }, { "epoch": 0.8424935448174106, "grad_norm": 4.794541835784912, "learning_rate": 1.472723774781083e-06, "loss": 0.0557, "step": 5710 }, { "epoch": 0.8426410918480266, "grad_norm": 4.705298900604248, "learning_rate": 1.4700342532606193e-06, "loss": 0.022, "step": 5711 }, { "epoch": 0.8427886388786425, "grad_norm": 1.9125524759292603, "learning_rate": 1.467346994975708e-06, "loss": 0.0452, "step": 5712 }, { "epoch": 0.8429361859092586, "grad_norm": 2.408352851867676, "learning_rate": 1.4646620006393497e-06, "loss": 0.0401, "step": 5713 }, { "epoch": 0.8430837329398746, "grad_norm": 1.7403894662857056, "learning_rate": 1.461979270963947e-06, "loss": 0.0423, "step": 5714 }, { "epoch": 0.8432312799704906, "grad_norm": 2.193599224090576, "learning_rate": 1.4592988066613045e-06, "loss": 0.0303, "step": 5715 }, { "epoch": 0.8433788270011066, "grad_norm": 3.449028491973877, "learning_rate": 1.4566206084426238e-06, "loss": 0.0829, "step": 5716 }, { "epoch": 0.8435263740317226, "grad_norm": 2.0021655559539795, "learning_rate": 1.453944677018503e-06, "loss": 0.0562, "step": 5717 }, { "epoch": 0.8436739210623386, "grad_norm": 2.076805830001831, "learning_rate": 1.4512710130989427e-06, "loss": 0.0621, "step": 5718 }, { "epoch": 0.8438214680929547, "grad_norm": 2.1435418128967285, "learning_rate": 1.448599617393337e-06, "loss": 0.1112, "step": 5719 }, { "epoch": 0.8439690151235706, "grad_norm": 1.225638747215271, "learning_rate": 1.4459304906104821e-06, "loss": 0.0269, "step": 5720 }, { "epoch": 0.8439690151235706, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9651741293532339, "eval_loss": 0.05757652968168259, "eval_precision": 0.9897959183673469, "eval_recall": 0.941747572815534, "eval_runtime": 49.2938, "eval_samples_per_second": 5.903, "eval_steps_per_second": 0.203, "step": 5720 }, { "epoch": 0.8441165621541866, "grad_norm": 1.08596670627594, "learning_rate": 1.4432636334585736e-06, "loss": 0.0421, "step": 5721 }, { "epoch": 0.8442641091848027, "grad_norm": 3.0772130489349365, "learning_rate": 1.440599046645198e-06, "loss": 0.0793, "step": 5722 }, { "epoch": 0.8444116562154187, "grad_norm": 2.5330147743225098, "learning_rate": 1.4379367308773483e-06, "loss": 0.07, "step": 5723 }, { "epoch": 0.8445592032460347, "grad_norm": 0.7319562435150146, "learning_rate": 1.4352766868614088e-06, "loss": 0.0085, "step": 5724 }, { "epoch": 0.8447067502766507, "grad_norm": 1.175978183746338, "learning_rate": 1.4326189153031577e-06, "loss": 0.0295, "step": 5725 }, { "epoch": 0.8448542973072667, "grad_norm": 2.1906511783599854, "learning_rate": 1.4299634169077836e-06, "loss": 0.035, "step": 5726 }, { "epoch": 0.8450018443378827, "grad_norm": 2.8638551235198975, "learning_rate": 1.4273101923798593e-06, "loss": 0.0444, "step": 5727 }, { "epoch": 0.8451493913684988, "grad_norm": 1.8354886770248413, "learning_rate": 1.4246592424233575e-06, "loss": 0.0679, "step": 5728 }, { "epoch": 0.8452969383991147, "grad_norm": 3.6023311614990234, "learning_rate": 1.4220105677416485e-06, "loss": 0.072, "step": 5729 }, { "epoch": 0.8454444854297307, "grad_norm": 1.198553204536438, "learning_rate": 1.4193641690375037e-06, "loss": 0.0114, "step": 5730 }, { "epoch": 0.8455920324603468, "grad_norm": 3.0524094104766846, "learning_rate": 1.4167200470130794e-06, "loss": 0.0847, "step": 5731 }, { "epoch": 0.8457395794909628, "grad_norm": 3.2249886989593506, "learning_rate": 1.4140782023699396e-06, "loss": 0.0754, "step": 5732 }, { "epoch": 0.8458871265215787, "grad_norm": 0.9703981876373291, "learning_rate": 1.4114386358090325e-06, "loss": 0.0109, "step": 5733 }, { "epoch": 0.8460346735521947, "grad_norm": 1.8067426681518555, "learning_rate": 1.4088013480307128e-06, "loss": 0.0368, "step": 5734 }, { "epoch": 0.8461822205828108, "grad_norm": 2.0494327545166016, "learning_rate": 1.406166339734727e-06, "loss": 0.0446, "step": 5735 }, { "epoch": 0.8463297676134268, "grad_norm": 3.375311851501465, "learning_rate": 1.4035336116202113e-06, "loss": 0.0711, "step": 5736 }, { "epoch": 0.8464773146440427, "grad_norm": 4.0707688331604, "learning_rate": 1.4009031643857041e-06, "loss": 0.0532, "step": 5737 }, { "epoch": 0.8466248616746588, "grad_norm": 6.29263162612915, "learning_rate": 1.3982749987291333e-06, "loss": 0.1401, "step": 5738 }, { "epoch": 0.8467724087052748, "grad_norm": 2.0079193115234375, "learning_rate": 1.3956491153478258e-06, "loss": 0.053, "step": 5739 }, { "epoch": 0.8469199557358909, "grad_norm": 1.6431809663772583, "learning_rate": 1.393025514938502e-06, "loss": 0.0441, "step": 5740 }, { "epoch": 0.8469199557358909, "eval_accuracy": 0.9811866859623734, "eval_f1": 0.967741935483871, "eval_loss": 0.05722363665699959, "eval_precision": 0.9898477157360406, "eval_recall": 0.9466019417475728, "eval_runtime": 50.1974, "eval_samples_per_second": 5.797, "eval_steps_per_second": 0.199, "step": 5740 }, { "epoch": 0.8470675027665068, "grad_norm": 1.8669764995574951, "learning_rate": 1.3904041981972715e-06, "loss": 0.0562, "step": 5741 }, { "epoch": 0.8472150497971228, "grad_norm": 3.058616876602173, "learning_rate": 1.3877851658196473e-06, "loss": 0.0533, "step": 5742 }, { "epoch": 0.8473625968277388, "grad_norm": 0.4952690303325653, "learning_rate": 1.3851684185005275e-06, "loss": 0.0046, "step": 5743 }, { "epoch": 0.8475101438583549, "grad_norm": 3.9359915256500244, "learning_rate": 1.382553956934205e-06, "loss": 0.0784, "step": 5744 }, { "epoch": 0.8476576908889708, "grad_norm": 2.8091506958007812, "learning_rate": 1.3799417818143758e-06, "loss": 0.0859, "step": 5745 }, { "epoch": 0.8478052379195868, "grad_norm": 3.728245735168457, "learning_rate": 1.3773318938341186e-06, "loss": 0.0574, "step": 5746 }, { "epoch": 0.8479527849502029, "grad_norm": 5.488831520080566, "learning_rate": 1.3747242936859062e-06, "loss": 0.1272, "step": 5747 }, { "epoch": 0.8481003319808189, "grad_norm": 2.9584414958953857, "learning_rate": 1.3721189820616098e-06, "loss": 0.089, "step": 5748 }, { "epoch": 0.848247879011435, "grad_norm": 3.1330299377441406, "learning_rate": 1.369515959652491e-06, "loss": 0.1249, "step": 5749 }, { "epoch": 0.8483954260420509, "grad_norm": 1.467090129852295, "learning_rate": 1.3669152271492015e-06, "loss": 0.0427, "step": 5750 }, { "epoch": 0.8485429730726669, "grad_norm": 3.071939468383789, "learning_rate": 1.3643167852417894e-06, "loss": 0.0431, "step": 5751 }, { "epoch": 0.8486905201032829, "grad_norm": 2.546959638595581, "learning_rate": 1.361720634619691e-06, "loss": 0.0879, "step": 5752 }, { "epoch": 0.848838067133899, "grad_norm": 2.3573038578033447, "learning_rate": 1.3591267759717374e-06, "loss": 0.0535, "step": 5753 }, { "epoch": 0.8489856141645149, "grad_norm": 2.136235475540161, "learning_rate": 1.3565352099861539e-06, "loss": 0.0416, "step": 5754 }, { "epoch": 0.8491331611951309, "grad_norm": 2.895474433898926, "learning_rate": 1.3539459373505503e-06, "loss": 0.0762, "step": 5755 }, { "epoch": 0.849280708225747, "grad_norm": 1.7000274658203125, "learning_rate": 1.351358958751936e-06, "loss": 0.0323, "step": 5756 }, { "epoch": 0.849428255256363, "grad_norm": 0.9407041072845459, "learning_rate": 1.3487742748767051e-06, "loss": 0.0127, "step": 5757 }, { "epoch": 0.8495758022869789, "grad_norm": 7.56793212890625, "learning_rate": 1.3461918864106438e-06, "loss": 0.0745, "step": 5758 }, { "epoch": 0.849723349317595, "grad_norm": 1.6602380275726318, "learning_rate": 1.343611794038937e-06, "loss": 0.0482, "step": 5759 }, { "epoch": 0.849870896348211, "grad_norm": 1.466604471206665, "learning_rate": 1.3410339984461518e-06, "loss": 0.0413, "step": 5760 }, { "epoch": 0.849870896348211, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9653465346534653, "eval_loss": 0.05454903468489647, "eval_precision": 0.9848484848484849, "eval_recall": 0.9466019417475728, "eval_runtime": 48.9571, "eval_samples_per_second": 5.944, "eval_steps_per_second": 0.204, "step": 5760 }, { "epoch": 0.850018443378827, "grad_norm": 1.593299388885498, "learning_rate": 1.3384585003162455e-06, "loss": 0.0262, "step": 5761 }, { "epoch": 0.850165990409443, "grad_norm": 1.3665624856948853, "learning_rate": 1.3358853003325733e-06, "loss": 0.0251, "step": 5762 }, { "epoch": 0.850313537440059, "grad_norm": 0.8125731945037842, "learning_rate": 1.333314399177873e-06, "loss": 0.0128, "step": 5763 }, { "epoch": 0.850461084470675, "grad_norm": 1.1369338035583496, "learning_rate": 1.3307457975342764e-06, "loss": 0.0297, "step": 5764 }, { "epoch": 0.8506086315012911, "grad_norm": 1.9386367797851562, "learning_rate": 1.3281794960833072e-06, "loss": 0.0523, "step": 5765 }, { "epoch": 0.850756178531907, "grad_norm": 1.1021301746368408, "learning_rate": 1.325615495505872e-06, "loss": 0.0218, "step": 5766 }, { "epoch": 0.850903725562523, "grad_norm": 1.1594314575195312, "learning_rate": 1.3230537964822732e-06, "loss": 0.0364, "step": 5767 }, { "epoch": 0.8510512725931391, "grad_norm": 1.921069622039795, "learning_rate": 1.3204943996922025e-06, "loss": 0.03, "step": 5768 }, { "epoch": 0.8511988196237551, "grad_norm": 2.836876630783081, "learning_rate": 1.3179373058147337e-06, "loss": 0.0601, "step": 5769 }, { "epoch": 0.8513463666543711, "grad_norm": 2.206815719604492, "learning_rate": 1.3153825155283395e-06, "loss": 0.0651, "step": 5770 }, { "epoch": 0.8514939136849871, "grad_norm": 1.552211046218872, "learning_rate": 1.312830029510873e-06, "loss": 0.0603, "step": 5771 }, { "epoch": 0.8516414607156031, "grad_norm": 2.625312328338623, "learning_rate": 1.3102798484395761e-06, "loss": 0.0581, "step": 5772 }, { "epoch": 0.8517890077462191, "grad_norm": 2.256977081298828, "learning_rate": 1.307731972991091e-06, "loss": 0.0579, "step": 5773 }, { "epoch": 0.8519365547768352, "grad_norm": 2.2679216861724854, "learning_rate": 1.3051864038414342e-06, "loss": 0.0561, "step": 5774 }, { "epoch": 0.8520841018074511, "grad_norm": 1.9951554536819458, "learning_rate": 1.3026431416660135e-06, "loss": 0.0175, "step": 5775 }, { "epoch": 0.8522316488380671, "grad_norm": 1.83767569065094, "learning_rate": 1.3001021871396313e-06, "loss": 0.0652, "step": 5776 }, { "epoch": 0.8523791958686832, "grad_norm": 2.527323007583618, "learning_rate": 1.2975635409364684e-06, "loss": 0.1043, "step": 5777 }, { "epoch": 0.8525267428992992, "grad_norm": 3.0767338275909424, "learning_rate": 1.2950272037301015e-06, "loss": 0.1132, "step": 5778 }, { "epoch": 0.8526742899299151, "grad_norm": 6.410322189331055, "learning_rate": 1.29249317619349e-06, "loss": 0.0782, "step": 5779 }, { "epoch": 0.8528218369605312, "grad_norm": 2.966045618057251, "learning_rate": 1.2899614589989795e-06, "loss": 0.061, "step": 5780 }, { "epoch": 0.8528218369605312, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.054151300340890884, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 49.0986, "eval_samples_per_second": 5.927, "eval_steps_per_second": 0.204, "step": 5780 }, { "epoch": 0.8529693839911472, "grad_norm": 3.7127346992492676, "learning_rate": 1.2874320528183081e-06, "loss": 0.0686, "step": 5781 }, { "epoch": 0.8531169310217632, "grad_norm": 0.8974189162254333, "learning_rate": 1.2849049583225937e-06, "loss": 0.0346, "step": 5782 }, { "epoch": 0.8532644780523791, "grad_norm": 1.8226568698883057, "learning_rate": 1.282380176182345e-06, "loss": 0.0364, "step": 5783 }, { "epoch": 0.8534120250829952, "grad_norm": 1.6044015884399414, "learning_rate": 1.279857707067459e-06, "loss": 0.0351, "step": 5784 }, { "epoch": 0.8535595721136112, "grad_norm": 1.8267277479171753, "learning_rate": 1.2773375516472153e-06, "loss": 0.0491, "step": 5785 }, { "epoch": 0.8537071191442273, "grad_norm": 2.8557074069976807, "learning_rate": 1.2748197105902748e-06, "loss": 0.063, "step": 5786 }, { "epoch": 0.8538546661748432, "grad_norm": 1.9956883192062378, "learning_rate": 1.2723041845647e-06, "loss": 0.0622, "step": 5787 }, { "epoch": 0.8540022132054592, "grad_norm": 7.793476581573486, "learning_rate": 1.2697909742379243e-06, "loss": 0.0918, "step": 5788 }, { "epoch": 0.8541497602360753, "grad_norm": 2.934429883956909, "learning_rate": 1.2672800802767715e-06, "loss": 0.1029, "step": 5789 }, { "epoch": 0.8542973072666913, "grad_norm": 3.2007949352264404, "learning_rate": 1.264771503347454e-06, "loss": 0.0534, "step": 5790 }, { "epoch": 0.8544448542973073, "grad_norm": 1.6562820672988892, "learning_rate": 1.2622652441155614e-06, "loss": 0.0265, "step": 5791 }, { "epoch": 0.8545924013279232, "grad_norm": 2.057903528213501, "learning_rate": 1.2597613032460776e-06, "loss": 0.0482, "step": 5792 }, { "epoch": 0.8547399483585393, "grad_norm": 4.989871978759766, "learning_rate": 1.257259681403369e-06, "loss": 0.044, "step": 5793 }, { "epoch": 0.8548874953891553, "grad_norm": 3.3362135887145996, "learning_rate": 1.2547603792511787e-06, "loss": 0.1074, "step": 5794 }, { "epoch": 0.8550350424197714, "grad_norm": 2.7018706798553467, "learning_rate": 1.2522633974526476e-06, "loss": 0.0851, "step": 5795 }, { "epoch": 0.8551825894503873, "grad_norm": 4.39471960067749, "learning_rate": 1.2497687366702883e-06, "loss": 0.1162, "step": 5796 }, { "epoch": 0.8553301364810033, "grad_norm": 2.1254372596740723, "learning_rate": 1.2472763975660051e-06, "loss": 0.1178, "step": 5797 }, { "epoch": 0.8554776835116193, "grad_norm": 1.8468786478042603, "learning_rate": 1.2447863808010874e-06, "loss": 0.0239, "step": 5798 }, { "epoch": 0.8556252305422354, "grad_norm": 1.604872703552246, "learning_rate": 1.242298687036203e-06, "loss": 0.0556, "step": 5799 }, { "epoch": 0.8557727775728513, "grad_norm": 2.3689584732055664, "learning_rate": 1.2398133169314031e-06, "loss": 0.0699, "step": 5800 }, { "epoch": 0.8557727775728513, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05439816415309906, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 48.645, "eval_samples_per_second": 5.982, "eval_steps_per_second": 0.206, "step": 5800 }, { "epoch": 0.8559203246034673, "grad_norm": 2.5868420600891113, "learning_rate": 1.2373302711461276e-06, "loss": 0.0567, "step": 5801 }, { "epoch": 0.8560678716340834, "grad_norm": 1.1004620790481567, "learning_rate": 1.2348495503392e-06, "loss": 0.0245, "step": 5802 }, { "epoch": 0.8562154186646994, "grad_norm": 3.2596724033355713, "learning_rate": 1.2323711551688189e-06, "loss": 0.0451, "step": 5803 }, { "epoch": 0.8563629656953153, "grad_norm": 2.8130266666412354, "learning_rate": 1.2298950862925752e-06, "loss": 0.0447, "step": 5804 }, { "epoch": 0.8565105127259314, "grad_norm": 3.4810211658477783, "learning_rate": 1.2274213443674342e-06, "loss": 0.0424, "step": 5805 }, { "epoch": 0.8566580597565474, "grad_norm": 2.2311394214630127, "learning_rate": 1.2249499300497502e-06, "loss": 0.0605, "step": 5806 }, { "epoch": 0.8568056067871634, "grad_norm": 1.9466489553451538, "learning_rate": 1.2224808439952595e-06, "loss": 0.0383, "step": 5807 }, { "epoch": 0.8569531538177794, "grad_norm": 2.809014320373535, "learning_rate": 1.2200140868590759e-06, "loss": 0.0635, "step": 5808 }, { "epoch": 0.8571007008483954, "grad_norm": 2.4443066120147705, "learning_rate": 1.2175496592957016e-06, "loss": 0.1043, "step": 5809 }, { "epoch": 0.8572482478790114, "grad_norm": 1.5663566589355469, "learning_rate": 1.215087561959013e-06, "loss": 0.0346, "step": 5810 }, { "epoch": 0.8573957949096275, "grad_norm": 1.8980169296264648, "learning_rate": 1.2126277955022736e-06, "loss": 0.0561, "step": 5811 }, { "epoch": 0.8575433419402434, "grad_norm": 0.9008997082710266, "learning_rate": 1.2101703605781312e-06, "loss": 0.032, "step": 5812 }, { "epoch": 0.8576908889708594, "grad_norm": 1.4687256813049316, "learning_rate": 1.2077152578386086e-06, "loss": 0.0296, "step": 5813 }, { "epoch": 0.8578384360014755, "grad_norm": 3.059095621109009, "learning_rate": 1.2052624879351105e-06, "loss": 0.1066, "step": 5814 }, { "epoch": 0.8579859830320915, "grad_norm": 3.6488006114959717, "learning_rate": 1.202812051518425e-06, "loss": 0.1657, "step": 5815 }, { "epoch": 0.8581335300627075, "grad_norm": 5.846051216125488, "learning_rate": 1.2003639492387254e-06, "loss": 0.0762, "step": 5816 }, { "epoch": 0.8582810770933235, "grad_norm": 3.844508409500122, "learning_rate": 1.1979181817455544e-06, "loss": 0.1101, "step": 5817 }, { "epoch": 0.8584286241239395, "grad_norm": 0.9341302514076233, "learning_rate": 1.195474749687846e-06, "loss": 0.0092, "step": 5818 }, { "epoch": 0.8585761711545555, "grad_norm": 3.447010040283203, "learning_rate": 1.1930336537139076e-06, "loss": 0.0568, "step": 5819 }, { "epoch": 0.8587237181851716, "grad_norm": 5.085091590881348, "learning_rate": 1.1905948944714295e-06, "loss": 0.0674, "step": 5820 }, { "epoch": 0.8587237181851716, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.054757051169872284, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 49.1301, "eval_samples_per_second": 5.923, "eval_steps_per_second": 0.204, "step": 5820 }, { "epoch": 0.8588712652157875, "grad_norm": 1.6856117248535156, "learning_rate": 1.1881584726074857e-06, "loss": 0.0433, "step": 5821 }, { "epoch": 0.8590188122464035, "grad_norm": 1.9084707498550415, "learning_rate": 1.1857243887685222e-06, "loss": 0.0635, "step": 5822 }, { "epoch": 0.8591663592770196, "grad_norm": 3.1395390033721924, "learning_rate": 1.183292643600371e-06, "loss": 0.0949, "step": 5823 }, { "epoch": 0.8593139063076356, "grad_norm": 1.4562228918075562, "learning_rate": 1.1808632377482377e-06, "loss": 0.0345, "step": 5824 }, { "epoch": 0.8594614533382515, "grad_norm": 1.8482775688171387, "learning_rate": 1.178436171856714e-06, "loss": 0.034, "step": 5825 }, { "epoch": 0.8596090003688676, "grad_norm": 5.482147216796875, "learning_rate": 1.1760114465697693e-06, "loss": 0.0982, "step": 5826 }, { "epoch": 0.8597565473994836, "grad_norm": 5.184039115905762, "learning_rate": 1.1735890625307466e-06, "loss": 0.049, "step": 5827 }, { "epoch": 0.8599040944300996, "grad_norm": 5.377273082733154, "learning_rate": 1.1711690203823712e-06, "loss": 0.0903, "step": 5828 }, { "epoch": 0.8600516414607156, "grad_norm": 3.9669299125671387, "learning_rate": 1.168751320766749e-06, "loss": 0.0593, "step": 5829 }, { "epoch": 0.8601991884913316, "grad_norm": 2.927678108215332, "learning_rate": 1.1663359643253625e-06, "loss": 0.0876, "step": 5830 }, { "epoch": 0.8603467355219476, "grad_norm": 2.9616873264312744, "learning_rate": 1.1639229516990712e-06, "loss": 0.0243, "step": 5831 }, { "epoch": 0.8604942825525637, "grad_norm": 1.829149603843689, "learning_rate": 1.1615122835281157e-06, "loss": 0.0333, "step": 5832 }, { "epoch": 0.8606418295831796, "grad_norm": 2.9309916496276855, "learning_rate": 1.1591039604521104e-06, "loss": 0.0741, "step": 5833 }, { "epoch": 0.8607893766137956, "grad_norm": 1.516096591949463, "learning_rate": 1.156697983110051e-06, "loss": 0.0271, "step": 5834 }, { "epoch": 0.8609369236444117, "grad_norm": 2.9557275772094727, "learning_rate": 1.1542943521403116e-06, "loss": 0.0862, "step": 5835 }, { "epoch": 0.8610844706750277, "grad_norm": 4.701959133148193, "learning_rate": 1.1518930681806395e-06, "loss": 0.0465, "step": 5836 }, { "epoch": 0.8612320177056437, "grad_norm": 1.5266114473342896, "learning_rate": 1.149494131868164e-06, "loss": 0.036, "step": 5837 }, { "epoch": 0.8613795647362597, "grad_norm": 2.2609028816223145, "learning_rate": 1.1470975438393883e-06, "loss": 0.0528, "step": 5838 }, { "epoch": 0.8615271117668757, "grad_norm": 0.7571324110031128, "learning_rate": 1.144703304730188e-06, "loss": 0.0093, "step": 5839 }, { "epoch": 0.8616746587974917, "grad_norm": 1.8648948669433594, "learning_rate": 1.1423114151758297e-06, "loss": 0.0402, "step": 5840 }, { "epoch": 0.8616746587974917, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9653465346534653, "eval_loss": 0.05714450031518936, "eval_precision": 0.9848484848484849, "eval_recall": 0.9466019417475728, "eval_runtime": 48.7597, "eval_samples_per_second": 5.968, "eval_steps_per_second": 0.205, "step": 5840 }, { "epoch": 0.8618222058281078, "grad_norm": 1.357142686843872, "learning_rate": 1.1399218758109442e-06, "loss": 0.0338, "step": 5841 }, { "epoch": 0.8619697528587237, "grad_norm": 0.8647232055664062, "learning_rate": 1.1375346872695403e-06, "loss": 0.0108, "step": 5842 }, { "epoch": 0.8621172998893397, "grad_norm": 1.7541052103042603, "learning_rate": 1.1351498501850067e-06, "loss": 0.0615, "step": 5843 }, { "epoch": 0.8622648469199558, "grad_norm": 1.8767192363739014, "learning_rate": 1.1327673651901084e-06, "loss": 0.0401, "step": 5844 }, { "epoch": 0.8624123939505718, "grad_norm": 2.4952330589294434, "learning_rate": 1.1303872329169795e-06, "loss": 0.0745, "step": 5845 }, { "epoch": 0.8625599409811877, "grad_norm": 1.116189956665039, "learning_rate": 1.128009453997142e-06, "loss": 0.0143, "step": 5846 }, { "epoch": 0.8627074880118037, "grad_norm": 4.7948899269104, "learning_rate": 1.1256340290614787e-06, "loss": 0.0891, "step": 5847 }, { "epoch": 0.8628550350424198, "grad_norm": 2.2906928062438965, "learning_rate": 1.12326095874026e-06, "loss": 0.0344, "step": 5848 }, { "epoch": 0.8630025820730358, "grad_norm": 1.9315592050552368, "learning_rate": 1.1208902436631275e-06, "loss": 0.022, "step": 5849 }, { "epoch": 0.8631501291036517, "grad_norm": 4.874817848205566, "learning_rate": 1.118521884459094e-06, "loss": 0.0905, "step": 5850 }, { "epoch": 0.8632976761342678, "grad_norm": 1.6982738971710205, "learning_rate": 1.1161558817565543e-06, "loss": 0.0249, "step": 5851 }, { "epoch": 0.8634452231648838, "grad_norm": 2.982245683670044, "learning_rate": 1.1137922361832732e-06, "loss": 0.0442, "step": 5852 }, { "epoch": 0.8635927701954998, "grad_norm": 0.7714384198188782, "learning_rate": 1.1114309483663865e-06, "loss": 0.0103, "step": 5853 }, { "epoch": 0.8637403172261158, "grad_norm": 1.4380220174789429, "learning_rate": 1.1090720189324167e-06, "loss": 0.0404, "step": 5854 }, { "epoch": 0.8638878642567318, "grad_norm": 2.176177978515625, "learning_rate": 1.1067154485072506e-06, "loss": 0.0419, "step": 5855 }, { "epoch": 0.8640354112873478, "grad_norm": 2.0907082557678223, "learning_rate": 1.1043612377161472e-06, "loss": 0.0655, "step": 5856 }, { "epoch": 0.8641829583179639, "grad_norm": 3.851604461669922, "learning_rate": 1.1020093871837478e-06, "loss": 0.0977, "step": 5857 }, { "epoch": 0.8643305053485799, "grad_norm": 2.9414432048797607, "learning_rate": 1.0996598975340623e-06, "loss": 0.1312, "step": 5858 }, { "epoch": 0.8644780523791958, "grad_norm": 2.7025697231292725, "learning_rate": 1.0973127693904728e-06, "loss": 0.0523, "step": 5859 }, { "epoch": 0.8646255994098119, "grad_norm": 1.426220417022705, "learning_rate": 1.0949680033757425e-06, "loss": 0.0226, "step": 5860 }, { "epoch": 0.8646255994098119, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9627791563275434, "eval_loss": 0.057654667645692825, "eval_precision": 0.9847715736040609, "eval_recall": 0.941747572815534, "eval_runtime": 49.051, "eval_samples_per_second": 5.933, "eval_steps_per_second": 0.204, "step": 5860 }, { "epoch": 0.8647731464404279, "grad_norm": 2.250925064086914, "learning_rate": 1.0926256001119983e-06, "loss": 0.0812, "step": 5861 }, { "epoch": 0.8649206934710439, "grad_norm": 1.027513861656189, "learning_rate": 1.0902855602207451e-06, "loss": 0.0064, "step": 5862 }, { "epoch": 0.8650682405016599, "grad_norm": 2.2142293453216553, "learning_rate": 1.087947884322863e-06, "loss": 0.0702, "step": 5863 }, { "epoch": 0.8652157875322759, "grad_norm": 2.2209510803222656, "learning_rate": 1.085612573038598e-06, "loss": 0.0573, "step": 5864 }, { "epoch": 0.8653633345628919, "grad_norm": 1.4538800716400146, "learning_rate": 1.0832796269875757e-06, "loss": 0.0239, "step": 5865 }, { "epoch": 0.865510881593508, "grad_norm": 1.4328197240829468, "learning_rate": 1.0809490467887896e-06, "loss": 0.0457, "step": 5866 }, { "epoch": 0.8656584286241239, "grad_norm": 1.6774661540985107, "learning_rate": 1.0786208330606063e-06, "loss": 0.0387, "step": 5867 }, { "epoch": 0.8658059756547399, "grad_norm": 2.5413808822631836, "learning_rate": 1.0762949864207672e-06, "loss": 0.0632, "step": 5868 }, { "epoch": 0.865953522685356, "grad_norm": 1.5700201988220215, "learning_rate": 1.073971507486382e-06, "loss": 0.0361, "step": 5869 }, { "epoch": 0.866101069715972, "grad_norm": 2.1447982788085938, "learning_rate": 1.0716503968739367e-06, "loss": 0.0504, "step": 5870 }, { "epoch": 0.8662486167465879, "grad_norm": 1.6840871572494507, "learning_rate": 1.0693316551992838e-06, "loss": 0.0698, "step": 5871 }, { "epoch": 0.866396163777204, "grad_norm": 2.8754312992095947, "learning_rate": 1.067015283077646e-06, "loss": 0.0951, "step": 5872 }, { "epoch": 0.86654371080782, "grad_norm": 3.0255074501037598, "learning_rate": 1.0647012811236279e-06, "loss": 0.0815, "step": 5873 }, { "epoch": 0.866691257838436, "grad_norm": 1.687869668006897, "learning_rate": 1.0623896499511954e-06, "loss": 0.0389, "step": 5874 }, { "epoch": 0.866838804869052, "grad_norm": 2.293487787246704, "learning_rate": 1.0600803901736846e-06, "loss": 0.0784, "step": 5875 }, { "epoch": 0.866986351899668, "grad_norm": 4.182459354400635, "learning_rate": 1.0577735024038116e-06, "loss": 0.0379, "step": 5876 }, { "epoch": 0.867133898930284, "grad_norm": 2.875462293624878, "learning_rate": 1.0554689872536516e-06, "loss": 0.0294, "step": 5877 }, { "epoch": 0.8672814459609001, "grad_norm": 1.6233718395233154, "learning_rate": 1.05316684533466e-06, "loss": 0.0274, "step": 5878 }, { "epoch": 0.867428992991516, "grad_norm": 1.9789857864379883, "learning_rate": 1.050867077257659e-06, "loss": 0.0584, "step": 5879 }, { "epoch": 0.867576540022132, "grad_norm": 2.7687809467315674, "learning_rate": 1.0485696836328385e-06, "loss": 0.0419, "step": 5880 }, { "epoch": 0.867576540022132, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9627791563275434, "eval_loss": 0.05770847946405411, "eval_precision": 0.9847715736040609, "eval_recall": 0.941747572815534, "eval_runtime": 49.1485, "eval_samples_per_second": 5.921, "eval_steps_per_second": 0.203, "step": 5880 }, { "epoch": 0.8677240870527481, "grad_norm": 1.6345365047454834, "learning_rate": 1.0462746650697607e-06, "loss": 0.0448, "step": 5881 }, { "epoch": 0.8678716340833641, "grad_norm": 2.9569079875946045, "learning_rate": 1.04398202217736e-06, "loss": 0.0821, "step": 5882 }, { "epoch": 0.8680191811139801, "grad_norm": 1.1559429168701172, "learning_rate": 1.041691755563934e-06, "loss": 0.0316, "step": 5883 }, { "epoch": 0.8681667281445961, "grad_norm": 1.6933823823928833, "learning_rate": 1.0394038658371575e-06, "loss": 0.0705, "step": 5884 }, { "epoch": 0.8683142751752121, "grad_norm": 1.4633872509002686, "learning_rate": 1.0371183536040696e-06, "loss": 0.0383, "step": 5885 }, { "epoch": 0.8684618222058281, "grad_norm": 1.4230270385742188, "learning_rate": 1.0348352194710752e-06, "loss": 0.0235, "step": 5886 }, { "epoch": 0.8686093692364442, "grad_norm": 1.4916578531265259, "learning_rate": 1.032554464043961e-06, "loss": 0.0146, "step": 5887 }, { "epoch": 0.8687569162670601, "grad_norm": 0.9468370676040649, "learning_rate": 1.0302760879278707e-06, "loss": 0.0296, "step": 5888 }, { "epoch": 0.8689044632976761, "grad_norm": 9.44964599609375, "learning_rate": 1.0280000917273169e-06, "loss": 0.086, "step": 5889 }, { "epoch": 0.8690520103282922, "grad_norm": 1.7721306085586548, "learning_rate": 1.0257264760461905e-06, "loss": 0.0532, "step": 5890 }, { "epoch": 0.8691995573589082, "grad_norm": 1.6889609098434448, "learning_rate": 1.0234552414877385e-06, "loss": 0.0298, "step": 5891 }, { "epoch": 0.8693471043895241, "grad_norm": 2.219182252883911, "learning_rate": 1.0211863886545858e-06, "loss": 0.0371, "step": 5892 }, { "epoch": 0.8694946514201402, "grad_norm": 3.2385971546173096, "learning_rate": 1.018919918148723e-06, "loss": 0.0548, "step": 5893 }, { "epoch": 0.8696421984507562, "grad_norm": 3.4648733139038086, "learning_rate": 1.0166558305715035e-06, "loss": 0.0532, "step": 5894 }, { "epoch": 0.8697897454813722, "grad_norm": 1.6734100580215454, "learning_rate": 1.0143941265236568e-06, "loss": 0.0468, "step": 5895 }, { "epoch": 0.8699372925119881, "grad_norm": 2.0575058460235596, "learning_rate": 1.0121348066052705e-06, "loss": 0.0594, "step": 5896 }, { "epoch": 0.8700848395426042, "grad_norm": 3.5154271125793457, "learning_rate": 1.0098778714158063e-06, "loss": 0.0338, "step": 5897 }, { "epoch": 0.8702323865732202, "grad_norm": 3.4978115558624268, "learning_rate": 1.0076233215540943e-06, "loss": 0.0871, "step": 5898 }, { "epoch": 0.8703799336038363, "grad_norm": 2.0322773456573486, "learning_rate": 1.0053711576183267e-06, "loss": 0.045, "step": 5899 }, { "epoch": 0.8705274806344522, "grad_norm": 3.7609992027282715, "learning_rate": 1.0031213802060613e-06, "loss": 0.089, "step": 5900 }, { "epoch": 0.8705274806344522, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9653465346534653, "eval_loss": 0.056723564863204956, "eval_precision": 0.9848484848484849, "eval_recall": 0.9466019417475728, "eval_runtime": 49.0912, "eval_samples_per_second": 5.928, "eval_steps_per_second": 0.204, "step": 5900 }, { "epoch": 0.8706750276650682, "grad_norm": 4.275539398193359, "learning_rate": 1.000873989914234e-06, "loss": 0.0684, "step": 5901 }, { "epoch": 0.8708225746956842, "grad_norm": 3.274839162826538, "learning_rate": 9.98628987339134e-07, "loss": 0.0949, "step": 5902 }, { "epoch": 0.8709701217263003, "grad_norm": 2.307234764099121, "learning_rate": 9.963863730764222e-07, "loss": 0.0624, "step": 5903 }, { "epoch": 0.8711176687569163, "grad_norm": 2.237243413925171, "learning_rate": 9.941461477211301e-07, "loss": 0.086, "step": 5904 }, { "epoch": 0.8712652157875322, "grad_norm": 2.7823374271392822, "learning_rate": 9.919083118676465e-07, "loss": 0.0639, "step": 5905 }, { "epoch": 0.8714127628181483, "grad_norm": 3.3530755043029785, "learning_rate": 9.896728661097332e-07, "loss": 0.0129, "step": 5906 }, { "epoch": 0.8715603098487643, "grad_norm": 1.7607320547103882, "learning_rate": 9.874398110405182e-07, "loss": 0.044, "step": 5907 }, { "epoch": 0.8717078568793803, "grad_norm": 1.0484280586242676, "learning_rate": 9.852091472524882e-07, "loss": 0.0245, "step": 5908 }, { "epoch": 0.8718554039099963, "grad_norm": 6.2049055099487305, "learning_rate": 9.829808753375046e-07, "loss": 0.1017, "step": 5909 }, { "epoch": 0.8720029509406123, "grad_norm": 2.4204776287078857, "learning_rate": 9.807549958867856e-07, "loss": 0.0652, "step": 5910 }, { "epoch": 0.8721504979712283, "grad_norm": 3.092439651489258, "learning_rate": 9.785315094909188e-07, "loss": 0.1103, "step": 5911 }, { "epoch": 0.8722980450018444, "grad_norm": 3.280195474624634, "learning_rate": 9.763104167398608e-07, "loss": 0.0362, "step": 5912 }, { "epoch": 0.8724455920324603, "grad_norm": 1.6988095045089722, "learning_rate": 9.740917182229248e-07, "loss": 0.046, "step": 5913 }, { "epoch": 0.8725931390630763, "grad_norm": 0.9181917905807495, "learning_rate": 9.718754145287922e-07, "loss": 0.0319, "step": 5914 }, { "epoch": 0.8727406860936924, "grad_norm": 1.576512336730957, "learning_rate": 9.696615062455118e-07, "loss": 0.0274, "step": 5915 }, { "epoch": 0.8728882331243084, "grad_norm": 1.6942555904388428, "learning_rate": 9.674499939604964e-07, "loss": 0.0168, "step": 5916 }, { "epoch": 0.8730357801549243, "grad_norm": 1.1324032545089722, "learning_rate": 9.652408782605161e-07, "loss": 0.0333, "step": 5917 }, { "epoch": 0.8731833271855404, "grad_norm": 1.4638354778289795, "learning_rate": 9.63034159731715e-07, "loss": 0.0123, "step": 5918 }, { "epoch": 0.8733308742161564, "grad_norm": 3.813880681991577, "learning_rate": 9.608298389595926e-07, "loss": 0.0558, "step": 5919 }, { "epoch": 0.8734784212467724, "grad_norm": 2.4182288646698, "learning_rate": 9.586279165290192e-07, "loss": 0.1096, "step": 5920 }, { "epoch": 0.8734784212467724, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05622292309999466, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 49.8826, "eval_samples_per_second": 5.834, "eval_steps_per_second": 0.2, "step": 5920 }, { "epoch": 0.8736259682773884, "grad_norm": 1.3478705883026123, "learning_rate": 9.564283930242258e-07, "loss": 0.033, "step": 5921 }, { "epoch": 0.8737735153080044, "grad_norm": 2.0680789947509766, "learning_rate": 9.542312690288035e-07, "loss": 0.0784, "step": 5922 }, { "epoch": 0.8739210623386204, "grad_norm": 3.976668357849121, "learning_rate": 9.52036545125714e-07, "loss": 0.1268, "step": 5923 }, { "epoch": 0.8740686093692365, "grad_norm": 2.448589563369751, "learning_rate": 9.498442218972748e-07, "loss": 0.0588, "step": 5924 }, { "epoch": 0.8742161563998525, "grad_norm": 1.7691428661346436, "learning_rate": 9.476542999251714e-07, "loss": 0.0443, "step": 5925 }, { "epoch": 0.8743637034304684, "grad_norm": 2.7442705631256104, "learning_rate": 9.454667797904515e-07, "loss": 0.0751, "step": 5926 }, { "epoch": 0.8745112504610845, "grad_norm": 3.767246723175049, "learning_rate": 9.432816620735242e-07, "loss": 0.0461, "step": 5927 }, { "epoch": 0.8746587974917005, "grad_norm": 2.4902091026306152, "learning_rate": 9.410989473541587e-07, "loss": 0.0497, "step": 5928 }, { "epoch": 0.8748063445223165, "grad_norm": 4.763408184051514, "learning_rate": 9.389186362114921e-07, "loss": 0.0801, "step": 5929 }, { "epoch": 0.8749538915529325, "grad_norm": 2.5694327354431152, "learning_rate": 9.367407292240228e-07, "loss": 0.0466, "step": 5930 }, { "epoch": 0.8751014385835485, "grad_norm": 1.3149958848953247, "learning_rate": 9.345652269696059e-07, "loss": 0.0415, "step": 5931 }, { "epoch": 0.8752489856141645, "grad_norm": 2.9025168418884277, "learning_rate": 9.323921300254657e-07, "loss": 0.0622, "step": 5932 }, { "epoch": 0.8753965326447806, "grad_norm": 0.823527455329895, "learning_rate": 9.302214389681807e-07, "loss": 0.0141, "step": 5933 }, { "epoch": 0.8755440796753965, "grad_norm": 2.63572359085083, "learning_rate": 9.280531543736982e-07, "loss": 0.0447, "step": 5934 }, { "epoch": 0.8756916267060125, "grad_norm": 2.119584560394287, "learning_rate": 9.258872768173255e-07, "loss": 0.0279, "step": 5935 }, { "epoch": 0.8758391737366286, "grad_norm": 2.0648109912872314, "learning_rate": 9.237238068737265e-07, "loss": 0.0517, "step": 5936 }, { "epoch": 0.8759867207672446, "grad_norm": 1.330884337425232, "learning_rate": 9.215627451169318e-07, "loss": 0.0191, "step": 5937 }, { "epoch": 0.8761342677978605, "grad_norm": 6.870659351348877, "learning_rate": 9.194040921203284e-07, "loss": 0.106, "step": 5938 }, { "epoch": 0.8762818148284766, "grad_norm": 3.1247828006744385, "learning_rate": 9.172478484566671e-07, "loss": 0.0726, "step": 5939 }, { "epoch": 0.8764293618590926, "grad_norm": 3.2152442932128906, "learning_rate": 9.150940146980624e-07, "loss": 0.0933, "step": 5940 }, { "epoch": 0.8764293618590926, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05513066053390503, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 49.8287, "eval_samples_per_second": 5.84, "eval_steps_per_second": 0.201, "step": 5940 }, { "epoch": 0.8765769088897086, "grad_norm": 3.2563045024871826, "learning_rate": 9.129425914159839e-07, "loss": 0.0574, "step": 5941 }, { "epoch": 0.8767244559203246, "grad_norm": 2.5582735538482666, "learning_rate": 9.107935791812605e-07, "loss": 0.0449, "step": 5942 }, { "epoch": 0.8768720029509406, "grad_norm": 1.2111361026763916, "learning_rate": 9.086469785640862e-07, "loss": 0.0268, "step": 5943 }, { "epoch": 0.8770195499815566, "grad_norm": 2.597418785095215, "learning_rate": 9.065027901340173e-07, "loss": 0.067, "step": 5944 }, { "epoch": 0.8771670970121727, "grad_norm": 1.3513870239257812, "learning_rate": 9.043610144599612e-07, "loss": 0.0342, "step": 5945 }, { "epoch": 0.8773146440427887, "grad_norm": 1.4286096096038818, "learning_rate": 9.022216521101934e-07, "loss": 0.0356, "step": 5946 }, { "epoch": 0.8774621910734046, "grad_norm": 2.183363437652588, "learning_rate": 9.00084703652343e-07, "loss": 0.0446, "step": 5947 }, { "epoch": 0.8776097381040207, "grad_norm": 3.5890183448791504, "learning_rate": 8.979501696534032e-07, "loss": 0.0908, "step": 5948 }, { "epoch": 0.8777572851346367, "grad_norm": 1.664736270904541, "learning_rate": 8.958180506797265e-07, "loss": 0.0466, "step": 5949 }, { "epoch": 0.8779048321652527, "grad_norm": 3.184309244155884, "learning_rate": 8.936883472970193e-07, "loss": 0.0774, "step": 5950 }, { "epoch": 0.8780523791958686, "grad_norm": 2.4639813899993896, "learning_rate": 8.915610600703539e-07, "loss": 0.0793, "step": 5951 }, { "epoch": 0.8781999262264847, "grad_norm": 2.775432825088501, "learning_rate": 8.894361895641568e-07, "loss": 0.0637, "step": 5952 }, { "epoch": 0.8783474732571007, "grad_norm": 3.227356195449829, "learning_rate": 8.873137363422125e-07, "loss": 0.0733, "step": 5953 }, { "epoch": 0.8784950202877168, "grad_norm": 1.4808876514434814, "learning_rate": 8.851937009676714e-07, "loss": 0.0535, "step": 5954 }, { "epoch": 0.8786425673183327, "grad_norm": 2.2464683055877686, "learning_rate": 8.830760840030361e-07, "loss": 0.049, "step": 5955 }, { "epoch": 0.8787901143489487, "grad_norm": 1.7445260286331177, "learning_rate": 8.80960886010166e-07, "loss": 0.0515, "step": 5956 }, { "epoch": 0.8789376613795647, "grad_norm": 8.007856369018555, "learning_rate": 8.788481075502831e-07, "loss": 0.0436, "step": 5957 }, { "epoch": 0.8790852084101808, "grad_norm": 1.341110110282898, "learning_rate": 8.76737749183968e-07, "loss": 0.0147, "step": 5958 }, { "epoch": 0.8792327554407967, "grad_norm": 1.3692198991775513, "learning_rate": 8.746298114711538e-07, "loss": 0.0286, "step": 5959 }, { "epoch": 0.8793803024714127, "grad_norm": 2.7240824699401855, "learning_rate": 8.725242949711376e-07, "loss": 0.0482, "step": 5960 }, { "epoch": 0.8793803024714127, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.055228136479854584, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 50.0963, "eval_samples_per_second": 5.809, "eval_steps_per_second": 0.2, "step": 5960 }, { "epoch": 0.8795278495020288, "grad_norm": 1.6086735725402832, "learning_rate": 8.704212002425683e-07, "loss": 0.051, "step": 5961 }, { "epoch": 0.8796753965326448, "grad_norm": 2.4951272010803223, "learning_rate": 8.683205278434559e-07, "loss": 0.0779, "step": 5962 }, { "epoch": 0.8798229435632607, "grad_norm": 2.1152498722076416, "learning_rate": 8.662222783311691e-07, "loss": 0.0203, "step": 5963 }, { "epoch": 0.8799704905938768, "grad_norm": 2.3825652599334717, "learning_rate": 8.641264522624282e-07, "loss": 0.0648, "step": 5964 }, { "epoch": 0.8801180376244928, "grad_norm": 1.6257972717285156, "learning_rate": 8.620330501933161e-07, "loss": 0.0628, "step": 5965 }, { "epoch": 0.8802655846551088, "grad_norm": 0.8832866549491882, "learning_rate": 8.599420726792696e-07, "loss": 0.0181, "step": 5966 }, { "epoch": 0.8804131316857248, "grad_norm": 3.3614399433135986, "learning_rate": 8.578535202750793e-07, "loss": 0.0355, "step": 5967 }, { "epoch": 0.8805606787163408, "grad_norm": 1.1095460653305054, "learning_rate": 8.557673935349021e-07, "loss": 0.0147, "step": 5968 }, { "epoch": 0.8807082257469568, "grad_norm": 2.085298538208008, "learning_rate": 8.536836930122416e-07, "loss": 0.0692, "step": 5969 }, { "epoch": 0.8808557727775729, "grad_norm": 1.3290832042694092, "learning_rate": 8.516024192599604e-07, "loss": 0.0471, "step": 5970 }, { "epoch": 0.8810033198081889, "grad_norm": 1.9308030605316162, "learning_rate": 8.495235728302809e-07, "loss": 0.0326, "step": 5971 }, { "epoch": 0.8811508668388048, "grad_norm": 3.052764654159546, "learning_rate": 8.474471542747742e-07, "loss": 0.0581, "step": 5972 }, { "epoch": 0.8812984138694209, "grad_norm": 1.6666488647460938, "learning_rate": 8.453731641443741e-07, "loss": 0.0506, "step": 5973 }, { "epoch": 0.8814459609000369, "grad_norm": 3.605884075164795, "learning_rate": 8.433016029893692e-07, "loss": 0.0608, "step": 5974 }, { "epoch": 0.8815935079306529, "grad_norm": 2.5897908210754395, "learning_rate": 8.412324713593978e-07, "loss": 0.0588, "step": 5975 }, { "epoch": 0.8817410549612689, "grad_norm": 0.7357593774795532, "learning_rate": 8.391657698034616e-07, "loss": 0.0121, "step": 5976 }, { "epoch": 0.8818886019918849, "grad_norm": 1.547512173652649, "learning_rate": 8.3710149886991e-07, "loss": 0.0598, "step": 5977 }, { "epoch": 0.8820361490225009, "grad_norm": 1.7373154163360596, "learning_rate": 8.350396591064535e-07, "loss": 0.0567, "step": 5978 }, { "epoch": 0.882183696053117, "grad_norm": 2.9452950954437256, "learning_rate": 8.329802510601559e-07, "loss": 0.0536, "step": 5979 }, { "epoch": 0.8823312430837329, "grad_norm": 4.783194065093994, "learning_rate": 8.309232752774343e-07, "loss": 0.1723, "step": 5980 }, { "epoch": 0.8823312430837329, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.055726367980241776, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 50.9561, "eval_samples_per_second": 5.711, "eval_steps_per_second": 0.196, "step": 5980 }, { "epoch": 0.8824787901143489, "grad_norm": 2.6260926723480225, "learning_rate": 8.288687323040568e-07, "loss": 0.0891, "step": 5981 }, { "epoch": 0.882626337144965, "grad_norm": 2.6471948623657227, "learning_rate": 8.26816622685157e-07, "loss": 0.0756, "step": 5982 }, { "epoch": 0.882773884175581, "grad_norm": 3.824842691421509, "learning_rate": 8.247669469652142e-07, "loss": 0.1009, "step": 5983 }, { "epoch": 0.8829214312061969, "grad_norm": 1.891882061958313, "learning_rate": 8.227197056880609e-07, "loss": 0.0769, "step": 5984 }, { "epoch": 0.883068978236813, "grad_norm": 2.1029231548309326, "learning_rate": 8.206748993968916e-07, "loss": 0.0392, "step": 5985 }, { "epoch": 0.883216525267429, "grad_norm": 1.9693273305892944, "learning_rate": 8.186325286342456e-07, "loss": 0.055, "step": 5986 }, { "epoch": 0.883364072298045, "grad_norm": 1.6374262571334839, "learning_rate": 8.165925939420227e-07, "loss": 0.0462, "step": 5987 }, { "epoch": 0.883511619328661, "grad_norm": 2.6680667400360107, "learning_rate": 8.145550958614745e-07, "loss": 0.086, "step": 5988 }, { "epoch": 0.883659166359277, "grad_norm": 2.1669082641601562, "learning_rate": 8.12520034933203e-07, "loss": 0.0585, "step": 5989 }, { "epoch": 0.883806713389893, "grad_norm": 1.926206350326538, "learning_rate": 8.104874116971683e-07, "loss": 0.0491, "step": 5990 }, { "epoch": 0.8839542604205091, "grad_norm": 1.9720089435577393, "learning_rate": 8.084572266926805e-07, "loss": 0.0514, "step": 5991 }, { "epoch": 0.8841018074511251, "grad_norm": 0.9564663767814636, "learning_rate": 8.064294804584027e-07, "loss": 0.041, "step": 5992 }, { "epoch": 0.884249354481741, "grad_norm": 2.2758212089538574, "learning_rate": 8.044041735323549e-07, "loss": 0.0473, "step": 5993 }, { "epoch": 0.8843969015123571, "grad_norm": 2.2937676906585693, "learning_rate": 8.023813064519037e-07, "loss": 0.0378, "step": 5994 }, { "epoch": 0.8845444485429731, "grad_norm": 4.157854080200195, "learning_rate": 8.003608797537754e-07, "loss": 0.0982, "step": 5995 }, { "epoch": 0.8846919955735891, "grad_norm": 3.3299248218536377, "learning_rate": 7.983428939740412e-07, "loss": 0.0517, "step": 5996 }, { "epoch": 0.884839542604205, "grad_norm": 1.161108374595642, "learning_rate": 7.963273496481294e-07, "loss": 0.0139, "step": 5997 }, { "epoch": 0.8849870896348211, "grad_norm": 2.8756136894226074, "learning_rate": 7.943142473108234e-07, "loss": 0.1444, "step": 5998 }, { "epoch": 0.8851346366654371, "grad_norm": 0.6647000908851624, "learning_rate": 7.923035874962504e-07, "loss": 0.0129, "step": 5999 }, { "epoch": 0.8852821836960532, "grad_norm": 1.4540106058120728, "learning_rate": 7.902953707378925e-07, "loss": 0.0259, "step": 6000 }, { "epoch": 0.8852821836960532, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05572304502129555, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 49.6282, "eval_samples_per_second": 5.864, "eval_steps_per_second": 0.201, "step": 6000 }, { "epoch": 0.8854297307266691, "grad_norm": 1.9520748853683472, "learning_rate": 7.882895975685912e-07, "loss": 0.039, "step": 6001 }, { "epoch": 0.8855772777572851, "grad_norm": 1.3660613298416138, "learning_rate": 7.862862685205296e-07, "loss": 0.0214, "step": 6002 }, { "epoch": 0.8857248247879012, "grad_norm": 3.9320688247680664, "learning_rate": 7.842853841252463e-07, "loss": 0.0278, "step": 6003 }, { "epoch": 0.8858723718185172, "grad_norm": 1.4639867544174194, "learning_rate": 7.822869449136328e-07, "loss": 0.0421, "step": 6004 }, { "epoch": 0.8860199188491331, "grad_norm": 3.7693629264831543, "learning_rate": 7.802909514159285e-07, "loss": 0.091, "step": 6005 }, { "epoch": 0.8861674658797491, "grad_norm": 1.5250922441482544, "learning_rate": 7.782974041617253e-07, "loss": 0.0195, "step": 6006 }, { "epoch": 0.8863150129103652, "grad_norm": 3.0657765865325928, "learning_rate": 7.763063036799701e-07, "loss": 0.0338, "step": 6007 }, { "epoch": 0.8864625599409812, "grad_norm": 1.9614242315292358, "learning_rate": 7.743176504989513e-07, "loss": 0.0447, "step": 6008 }, { "epoch": 0.8866101069715971, "grad_norm": 3.2453866004943848, "learning_rate": 7.723314451463193e-07, "loss": 0.137, "step": 6009 }, { "epoch": 0.8867576540022132, "grad_norm": 2.51401424407959, "learning_rate": 7.703476881490634e-07, "loss": 0.098, "step": 6010 }, { "epoch": 0.8869052010328292, "grad_norm": 3.9559733867645264, "learning_rate": 7.683663800335328e-07, "loss": 0.0941, "step": 6011 }, { "epoch": 0.8870527480634453, "grad_norm": 1.9767736196517944, "learning_rate": 7.663875213254246e-07, "loss": 0.0472, "step": 6012 }, { "epoch": 0.8872002950940613, "grad_norm": 1.6465672254562378, "learning_rate": 7.644111125497822e-07, "loss": 0.0145, "step": 6013 }, { "epoch": 0.8873478421246772, "grad_norm": 1.3866339921951294, "learning_rate": 7.624371542310005e-07, "loss": 0.0386, "step": 6014 }, { "epoch": 0.8874953891552932, "grad_norm": 2.0481443405151367, "learning_rate": 7.604656468928262e-07, "loss": 0.0383, "step": 6015 }, { "epoch": 0.8876429361859093, "grad_norm": 3.9279582500457764, "learning_rate": 7.584965910583564e-07, "loss": 0.0489, "step": 6016 }, { "epoch": 0.8877904832165253, "grad_norm": 1.7083287239074707, "learning_rate": 7.565299872500331e-07, "loss": 0.0244, "step": 6017 }, { "epoch": 0.8879380302471412, "grad_norm": 1.2823542356491089, "learning_rate": 7.545658359896547e-07, "loss": 0.0164, "step": 6018 }, { "epoch": 0.8880855772777573, "grad_norm": 0.9202921390533447, "learning_rate": 7.526041377983596e-07, "loss": 0.0263, "step": 6019 }, { "epoch": 0.8882331243083733, "grad_norm": 1.0891423225402832, "learning_rate": 7.506448931966436e-07, "loss": 0.0291, "step": 6020 }, { "epoch": 0.8882331243083733, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05567174404859543, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 48.4257, "eval_samples_per_second": 6.009, "eval_steps_per_second": 0.207, "step": 6020 }, { "epoch": 0.8883806713389893, "grad_norm": 1.6009353399276733, "learning_rate": 7.486881027043491e-07, "loss": 0.0527, "step": 6021 }, { "epoch": 0.8885282183696053, "grad_norm": 6.035427570343018, "learning_rate": 7.467337668406638e-07, "loss": 0.0817, "step": 6022 }, { "epoch": 0.8886757654002213, "grad_norm": 1.1803969144821167, "learning_rate": 7.447818861241308e-07, "loss": 0.0216, "step": 6023 }, { "epoch": 0.8888233124308373, "grad_norm": 2.7183499336242676, "learning_rate": 7.428324610726345e-07, "loss": 0.0815, "step": 6024 }, { "epoch": 0.8889708594614534, "grad_norm": 1.5589689016342163, "learning_rate": 7.408854922034126e-07, "loss": 0.0311, "step": 6025 }, { "epoch": 0.8891184064920693, "grad_norm": 3.315922498703003, "learning_rate": 7.389409800330516e-07, "loss": 0.0731, "step": 6026 }, { "epoch": 0.8892659535226853, "grad_norm": 1.9708974361419678, "learning_rate": 7.369989250774812e-07, "loss": 0.0703, "step": 6027 }, { "epoch": 0.8894135005533014, "grad_norm": 1.7425222396850586, "learning_rate": 7.350593278519824e-07, "loss": 0.0496, "step": 6028 }, { "epoch": 0.8895610475839174, "grad_norm": 1.3247371912002563, "learning_rate": 7.331221888711859e-07, "loss": 0.0324, "step": 6029 }, { "epoch": 0.8897085946145333, "grad_norm": 1.3036816120147705, "learning_rate": 7.311875086490683e-07, "loss": 0.033, "step": 6030 }, { "epoch": 0.8898561416451494, "grad_norm": 2.608323097229004, "learning_rate": 7.292552876989511e-07, "loss": 0.0983, "step": 6031 }, { "epoch": 0.8900036886757654, "grad_norm": 2.148296356201172, "learning_rate": 7.273255265335088e-07, "loss": 0.0705, "step": 6032 }, { "epoch": 0.8901512357063814, "grad_norm": 2.8998477458953857, "learning_rate": 7.253982256647574e-07, "loss": 0.0512, "step": 6033 }, { "epoch": 0.8902987827369974, "grad_norm": 3.0811049938201904, "learning_rate": 7.234733856040654e-07, "loss": 0.1346, "step": 6034 }, { "epoch": 0.8904463297676134, "grad_norm": 2.189905881881714, "learning_rate": 7.215510068621467e-07, "loss": 0.0503, "step": 6035 }, { "epoch": 0.8905938767982294, "grad_norm": 3.884209394454956, "learning_rate": 7.196310899490577e-07, "loss": 0.0547, "step": 6036 }, { "epoch": 0.8907414238288455, "grad_norm": 1.5360444784164429, "learning_rate": 7.177136353742098e-07, "loss": 0.0523, "step": 6037 }, { "epoch": 0.8908889708594615, "grad_norm": 1.0694087743759155, "learning_rate": 7.157986436463537e-07, "loss": 0.022, "step": 6038 }, { "epoch": 0.8910365178900774, "grad_norm": 1.7816051244735718, "learning_rate": 7.138861152735898e-07, "loss": 0.0618, "step": 6039 }, { "epoch": 0.8911840649206935, "grad_norm": 2.7336528301239014, "learning_rate": 7.119760507633678e-07, "loss": 0.0872, "step": 6040 }, { "epoch": 0.8911840649206935, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05596858263015747, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 48.9314, "eval_samples_per_second": 5.947, "eval_steps_per_second": 0.204, "step": 6040 }, { "epoch": 0.8913316119513095, "grad_norm": 1.510573148727417, "learning_rate": 7.100684506224775e-07, "loss": 0.0494, "step": 6041 }, { "epoch": 0.8914791589819255, "grad_norm": 4.796574115753174, "learning_rate": 7.081633153570577e-07, "loss": 0.0196, "step": 6042 }, { "epoch": 0.8916267060125415, "grad_norm": 1.6497973203659058, "learning_rate": 7.06260645472594e-07, "loss": 0.0334, "step": 6043 }, { "epoch": 0.8917742530431575, "grad_norm": 1.3166993856430054, "learning_rate": 7.043604414739203e-07, "loss": 0.0214, "step": 6044 }, { "epoch": 0.8919218000737735, "grad_norm": 3.4348347187042236, "learning_rate": 7.024627038652077e-07, "loss": 0.0721, "step": 6045 }, { "epoch": 0.8920693471043896, "grad_norm": 2.7059199810028076, "learning_rate": 7.005674331499834e-07, "loss": 0.0711, "step": 6046 }, { "epoch": 0.8922168941350055, "grad_norm": 2.4796156883239746, "learning_rate": 6.98674629831112e-07, "loss": 0.0405, "step": 6047 }, { "epoch": 0.8923644411656215, "grad_norm": 5.228332042694092, "learning_rate": 6.96784294410806e-07, "loss": 0.1015, "step": 6048 }, { "epoch": 0.8925119881962376, "grad_norm": 2.4152002334594727, "learning_rate": 6.948964273906278e-07, "loss": 0.0533, "step": 6049 }, { "epoch": 0.8926595352268536, "grad_norm": 3.5302391052246094, "learning_rate": 6.930110292714765e-07, "loss": 0.0874, "step": 6050 }, { "epoch": 0.8928070822574695, "grad_norm": 1.5268058776855469, "learning_rate": 6.911281005536031e-07, "loss": 0.037, "step": 6051 }, { "epoch": 0.8929546292880856, "grad_norm": 2.8932971954345703, "learning_rate": 6.892476417365989e-07, "loss": 0.0532, "step": 6052 }, { "epoch": 0.8931021763187016, "grad_norm": 2.512969732284546, "learning_rate": 6.873696533193996e-07, "loss": 0.0757, "step": 6053 }, { "epoch": 0.8932497233493176, "grad_norm": 3.8057785034179688, "learning_rate": 6.85494135800292e-07, "loss": 0.036, "step": 6054 }, { "epoch": 0.8933972703799336, "grad_norm": 3.723708391189575, "learning_rate": 6.836210896769014e-07, "loss": 0.0765, "step": 6055 }, { "epoch": 0.8935448174105496, "grad_norm": 2.7616844177246094, "learning_rate": 6.817505154461956e-07, "loss": 0.0504, "step": 6056 }, { "epoch": 0.8936923644411656, "grad_norm": 2.264693260192871, "learning_rate": 6.798824136044913e-07, "loss": 0.0518, "step": 6057 }, { "epoch": 0.8938399114717817, "grad_norm": 4.492088317871094, "learning_rate": 6.780167846474506e-07, "loss": 0.1116, "step": 6058 }, { "epoch": 0.8939874585023977, "grad_norm": 5.52939510345459, "learning_rate": 6.761536290700721e-07, "loss": 0.0688, "step": 6059 }, { "epoch": 0.8941350055330136, "grad_norm": 2.8091180324554443, "learning_rate": 6.742929473667048e-07, "loss": 0.0819, "step": 6060 }, { "epoch": 0.8941350055330136, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.0556659959256649, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 48.6299, "eval_samples_per_second": 5.984, "eval_steps_per_second": 0.206, "step": 6060 }, { "epoch": 0.8942825525636297, "grad_norm": 2.870699167251587, "learning_rate": 6.724347400310371e-07, "loss": 0.0833, "step": 6061 }, { "epoch": 0.8944300995942457, "grad_norm": 2.0958855152130127, "learning_rate": 6.705790075561047e-07, "loss": 0.0436, "step": 6062 }, { "epoch": 0.8945776466248617, "grad_norm": 3.7520968914031982, "learning_rate": 6.687257504342848e-07, "loss": 0.0666, "step": 6063 }, { "epoch": 0.8947251936554776, "grad_norm": 1.8063980340957642, "learning_rate": 6.668749691572951e-07, "loss": 0.0345, "step": 6064 }, { "epoch": 0.8948727406860937, "grad_norm": 4.453982830047607, "learning_rate": 6.650266642162029e-07, "loss": 0.1076, "step": 6065 }, { "epoch": 0.8950202877167097, "grad_norm": 2.0510618686676025, "learning_rate": 6.631808361014113e-07, "loss": 0.0227, "step": 6066 }, { "epoch": 0.8951678347473258, "grad_norm": 2.578852415084839, "learning_rate": 6.613374853026689e-07, "loss": 0.04, "step": 6067 }, { "epoch": 0.8953153817779417, "grad_norm": 2.0627481937408447, "learning_rate": 6.594966123090718e-07, "loss": 0.0538, "step": 6068 }, { "epoch": 0.8954629288085577, "grad_norm": 5.8762617111206055, "learning_rate": 6.576582176090518e-07, "loss": 0.0398, "step": 6069 }, { "epoch": 0.8956104758391737, "grad_norm": 2.373622179031372, "learning_rate": 6.558223016903842e-07, "loss": 0.0969, "step": 6070 }, { "epoch": 0.8957580228697898, "grad_norm": 1.838910698890686, "learning_rate": 6.539888650401916e-07, "loss": 0.0363, "step": 6071 }, { "epoch": 0.8959055699004057, "grad_norm": 10.581929206848145, "learning_rate": 6.521579081449325e-07, "loss": 0.0918, "step": 6072 }, { "epoch": 0.8960531169310217, "grad_norm": 3.3708910942077637, "learning_rate": 6.503294314904108e-07, "loss": 0.059, "step": 6073 }, { "epoch": 0.8962006639616378, "grad_norm": 2.5104010105133057, "learning_rate": 6.485034355617748e-07, "loss": 0.0224, "step": 6074 }, { "epoch": 0.8963482109922538, "grad_norm": 1.8458657264709473, "learning_rate": 6.466799208435081e-07, "loss": 0.0375, "step": 6075 }, { "epoch": 0.8964957580228697, "grad_norm": 4.009340763092041, "learning_rate": 6.448588878194406e-07, "loss": 0.0681, "step": 6076 }, { "epoch": 0.8966433050534858, "grad_norm": 1.2859127521514893, "learning_rate": 6.430403369727445e-07, "loss": 0.0283, "step": 6077 }, { "epoch": 0.8967908520841018, "grad_norm": 1.2199759483337402, "learning_rate": 6.412242687859294e-07, "loss": 0.0296, "step": 6078 }, { "epoch": 0.8969383991147178, "grad_norm": 2.5927979946136475, "learning_rate": 6.394106837408487e-07, "loss": 0.0468, "step": 6079 }, { "epoch": 0.8970859461453339, "grad_norm": 7.891746997833252, "learning_rate": 6.375995823186987e-07, "loss": 0.0912, "step": 6080 }, { "epoch": 0.8970859461453339, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05581069737672806, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 48.7869, "eval_samples_per_second": 5.965, "eval_steps_per_second": 0.205, "step": 6080 }, { "epoch": 0.8972334931759498, "grad_norm": 2.4385836124420166, "learning_rate": 6.357909650000094e-07, "loss": 0.0744, "step": 6081 }, { "epoch": 0.8973810402065658, "grad_norm": 1.913123607635498, "learning_rate": 6.339848322646625e-07, "loss": 0.0558, "step": 6082 }, { "epoch": 0.8975285872371819, "grad_norm": 2.230882167816162, "learning_rate": 6.321811845918735e-07, "loss": 0.0325, "step": 6083 }, { "epoch": 0.8976761342677979, "grad_norm": 1.8755987882614136, "learning_rate": 6.303800224601986e-07, "loss": 0.0615, "step": 6084 }, { "epoch": 0.8978236812984138, "grad_norm": 3.3818376064300537, "learning_rate": 6.285813463475366e-07, "loss": 0.0615, "step": 6085 }, { "epoch": 0.8979712283290299, "grad_norm": 1.8080580234527588, "learning_rate": 6.267851567311256e-07, "loss": 0.0506, "step": 6086 }, { "epoch": 0.8981187753596459, "grad_norm": 3.3100953102111816, "learning_rate": 6.249914540875445e-07, "loss": 0.0438, "step": 6087 }, { "epoch": 0.8982663223902619, "grad_norm": 2.4367289543151855, "learning_rate": 6.232002388927127e-07, "loss": 0.0651, "step": 6088 }, { "epoch": 0.8984138694208779, "grad_norm": 3.0320327281951904, "learning_rate": 6.214115116218877e-07, "loss": 0.0466, "step": 6089 }, { "epoch": 0.8985614164514939, "grad_norm": 1.3278833627700806, "learning_rate": 6.19625272749671e-07, "loss": 0.0228, "step": 6090 }, { "epoch": 0.8987089634821099, "grad_norm": 2.3165955543518066, "learning_rate": 6.178415227499979e-07, "loss": 0.0507, "step": 6091 }, { "epoch": 0.898856510512726, "grad_norm": 1.7083970308303833, "learning_rate": 6.160602620961487e-07, "loss": 0.043, "step": 6092 }, { "epoch": 0.8990040575433419, "grad_norm": 2.6195430755615234, "learning_rate": 6.142814912607409e-07, "loss": 0.0706, "step": 6093 }, { "epoch": 0.8991516045739579, "grad_norm": 2.1232919692993164, "learning_rate": 6.125052107157304e-07, "loss": 0.0561, "step": 6094 }, { "epoch": 0.899299151604574, "grad_norm": 3.1567232608795166, "learning_rate": 6.107314209324123e-07, "loss": 0.0495, "step": 6095 }, { "epoch": 0.89944669863519, "grad_norm": 1.2741518020629883, "learning_rate": 6.089601223814268e-07, "loss": 0.0388, "step": 6096 }, { "epoch": 0.8995942456658059, "grad_norm": 2.4637041091918945, "learning_rate": 6.071913155327447e-07, "loss": 0.0538, "step": 6097 }, { "epoch": 0.899741792696422, "grad_norm": 2.406578302383423, "learning_rate": 6.054250008556783e-07, "loss": 0.0796, "step": 6098 }, { "epoch": 0.899889339727038, "grad_norm": 3.432889223098755, "learning_rate": 6.036611788188829e-07, "loss": 0.0655, "step": 6099 }, { "epoch": 0.900036886757654, "grad_norm": 3.666919708251953, "learning_rate": 6.018998498903472e-07, "loss": 0.0307, "step": 6100 }, { "epoch": 0.900036886757654, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05625491961836815, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 49.337, "eval_samples_per_second": 5.898, "eval_steps_per_second": 0.203, "step": 6100 }, { "epoch": 0.9001844337882701, "grad_norm": 1.5873862504959106, "learning_rate": 6.001410145373998e-07, "loss": 0.0303, "step": 6101 }, { "epoch": 0.900331980818886, "grad_norm": 2.3204448223114014, "learning_rate": 5.983846732267118e-07, "loss": 0.0738, "step": 6102 }, { "epoch": 0.900479527849502, "grad_norm": 3.2990872859954834, "learning_rate": 5.966308264242837e-07, "loss": 0.0545, "step": 6103 }, { "epoch": 0.9006270748801181, "grad_norm": 2.091925859451294, "learning_rate": 5.948794745954655e-07, "loss": 0.0608, "step": 6104 }, { "epoch": 0.9007746219107341, "grad_norm": 2.3603947162628174, "learning_rate": 5.931306182049335e-07, "loss": 0.0288, "step": 6105 }, { "epoch": 0.90092216894135, "grad_norm": 3.0553927421569824, "learning_rate": 5.913842577167117e-07, "loss": 0.0304, "step": 6106 }, { "epoch": 0.9010697159719661, "grad_norm": 1.18839693069458, "learning_rate": 5.896403935941564e-07, "loss": 0.0255, "step": 6107 }, { "epoch": 0.9012172630025821, "grad_norm": 0.8999655842781067, "learning_rate": 5.878990262999628e-07, "loss": 0.0247, "step": 6108 }, { "epoch": 0.9013648100331981, "grad_norm": 1.7418278455734253, "learning_rate": 5.861601562961639e-07, "loss": 0.0533, "step": 6109 }, { "epoch": 0.901512357063814, "grad_norm": 1.063058853149414, "learning_rate": 5.844237840441291e-07, "loss": 0.0156, "step": 6110 }, { "epoch": 0.9016599040944301, "grad_norm": 4.956036567687988, "learning_rate": 5.826899100045669e-07, "loss": 0.1087, "step": 6111 }, { "epoch": 0.9018074511250461, "grad_norm": 0.6496356725692749, "learning_rate": 5.809585346375235e-07, "loss": 0.0248, "step": 6112 }, { "epoch": 0.9019549981556622, "grad_norm": 4.678279876708984, "learning_rate": 5.792296584023782e-07, "loss": 0.0501, "step": 6113 }, { "epoch": 0.9021025451862781, "grad_norm": 2.635258674621582, "learning_rate": 5.775032817578486e-07, "loss": 0.0709, "step": 6114 }, { "epoch": 0.9022500922168941, "grad_norm": 1.1114413738250732, "learning_rate": 5.757794051619936e-07, "loss": 0.0233, "step": 6115 }, { "epoch": 0.9023976392475102, "grad_norm": 2.946363925933838, "learning_rate": 5.740580290722042e-07, "loss": 0.0664, "step": 6116 }, { "epoch": 0.9025451862781262, "grad_norm": 3.6926958560943604, "learning_rate": 5.723391539452061e-07, "loss": 0.049, "step": 6117 }, { "epoch": 0.9026927333087421, "grad_norm": 3.990233898162842, "learning_rate": 5.70622780237069e-07, "loss": 0.1301, "step": 6118 }, { "epoch": 0.9028402803393581, "grad_norm": 1.862289547920227, "learning_rate": 5.689089084031896e-07, "loss": 0.0485, "step": 6119 }, { "epoch": 0.9029878273699742, "grad_norm": 1.8196097612380981, "learning_rate": 5.671975388983086e-07, "loss": 0.0271, "step": 6120 }, { "epoch": 0.9029878273699742, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05541698634624481, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 48.5394, "eval_samples_per_second": 5.995, "eval_steps_per_second": 0.206, "step": 6120 }, { "epoch": 0.9031353744005902, "grad_norm": 0.9414036273956299, "learning_rate": 5.654886721764997e-07, "loss": 0.0153, "step": 6121 }, { "epoch": 0.9032829214312061, "grad_norm": 1.2081336975097656, "learning_rate": 5.637823086911698e-07, "loss": 0.0251, "step": 6122 }, { "epoch": 0.9034304684618222, "grad_norm": 2.001443386077881, "learning_rate": 5.620784488950681e-07, "loss": 0.0558, "step": 6123 }, { "epoch": 0.9035780154924382, "grad_norm": 1.7640726566314697, "learning_rate": 5.603770932402719e-07, "loss": 0.0503, "step": 6124 }, { "epoch": 0.9037255625230542, "grad_norm": 5.103485107421875, "learning_rate": 5.586782421781989e-07, "loss": 0.1603, "step": 6125 }, { "epoch": 0.9038731095536703, "grad_norm": 1.4745298624038696, "learning_rate": 5.569818961596041e-07, "loss": 0.0552, "step": 6126 }, { "epoch": 0.9040206565842862, "grad_norm": 1.9164541959762573, "learning_rate": 5.552880556345719e-07, "loss": 0.0409, "step": 6127 }, { "epoch": 0.9041682036149022, "grad_norm": 3.4288718700408936, "learning_rate": 5.535967210525239e-07, "loss": 0.0773, "step": 6128 }, { "epoch": 0.9043157506455183, "grad_norm": 1.621910572052002, "learning_rate": 5.519078928622212e-07, "loss": 0.0666, "step": 6129 }, { "epoch": 0.9044632976761343, "grad_norm": 1.874854564666748, "learning_rate": 5.502215715117553e-07, "loss": 0.0426, "step": 6130 }, { "epoch": 0.9046108447067502, "grad_norm": 3.9866039752960205, "learning_rate": 5.485377574485528e-07, "loss": 0.0918, "step": 6131 }, { "epoch": 0.9047583917373663, "grad_norm": 2.3686044216156006, "learning_rate": 5.468564511193786e-07, "loss": 0.0882, "step": 6132 }, { "epoch": 0.9049059387679823, "grad_norm": 2.9734747409820557, "learning_rate": 5.451776529703256e-07, "loss": 0.1109, "step": 6133 }, { "epoch": 0.9050534857985983, "grad_norm": 2.4147839546203613, "learning_rate": 5.435013634468289e-07, "loss": 0.0677, "step": 6134 }, { "epoch": 0.9052010328292143, "grad_norm": 4.090640068054199, "learning_rate": 5.418275829936537e-07, "loss": 0.0467, "step": 6135 }, { "epoch": 0.9053485798598303, "grad_norm": 7.092474460601807, "learning_rate": 5.401563120548991e-07, "loss": 0.0388, "step": 6136 }, { "epoch": 0.9054961268904463, "grad_norm": 2.077030897140503, "learning_rate": 5.384875510740007e-07, "loss": 0.0505, "step": 6137 }, { "epoch": 0.9056436739210624, "grad_norm": 2.185776710510254, "learning_rate": 5.368213004937262e-07, "loss": 0.0583, "step": 6138 }, { "epoch": 0.9057912209516783, "grad_norm": 2.0845303535461426, "learning_rate": 5.351575607561766e-07, "loss": 0.0499, "step": 6139 }, { "epoch": 0.9059387679822943, "grad_norm": 1.5722860097885132, "learning_rate": 5.334963323027919e-07, "loss": 0.0405, "step": 6140 }, { "epoch": 0.9059387679822943, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05552350729703903, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 49.1374, "eval_samples_per_second": 5.922, "eval_steps_per_second": 0.204, "step": 6140 }, { "epoch": 0.9060863150129104, "grad_norm": 4.133938789367676, "learning_rate": 5.318376155743387e-07, "loss": 0.0862, "step": 6141 }, { "epoch": 0.9062338620435264, "grad_norm": 1.9547992944717407, "learning_rate": 5.301814110109205e-07, "loss": 0.0653, "step": 6142 }, { "epoch": 0.9063814090741423, "grad_norm": 3.051151990890503, "learning_rate": 5.285277190519744e-07, "loss": 0.0737, "step": 6143 }, { "epoch": 0.9065289561047584, "grad_norm": 1.4388315677642822, "learning_rate": 5.268765401362718e-07, "loss": 0.0179, "step": 6144 }, { "epoch": 0.9066765031353744, "grad_norm": 1.3435120582580566, "learning_rate": 5.252278747019146e-07, "loss": 0.0307, "step": 6145 }, { "epoch": 0.9068240501659904, "grad_norm": 2.0128400325775146, "learning_rate": 5.235817231863405e-07, "loss": 0.0618, "step": 6146 }, { "epoch": 0.9069715971966065, "grad_norm": 1.971063256263733, "learning_rate": 5.219380860263168e-07, "loss": 0.0684, "step": 6147 }, { "epoch": 0.9071191442272224, "grad_norm": 1.8959208726882935, "learning_rate": 5.20296963657948e-07, "loss": 0.0111, "step": 6148 }, { "epoch": 0.9072666912578384, "grad_norm": 4.560550212860107, "learning_rate": 5.186583565166692e-07, "loss": 0.1139, "step": 6149 }, { "epoch": 0.9074142382884545, "grad_norm": 2.025960922241211, "learning_rate": 5.17022265037247e-07, "loss": 0.051, "step": 6150 }, { "epoch": 0.9075617853190705, "grad_norm": 2.0765039920806885, "learning_rate": 5.153886896537829e-07, "loss": 0.0616, "step": 6151 }, { "epoch": 0.9077093323496864, "grad_norm": 1.6943057775497437, "learning_rate": 5.137576307997083e-07, "loss": 0.0418, "step": 6152 }, { "epoch": 0.9078568793803025, "grad_norm": 3.1802217960357666, "learning_rate": 5.121290889077879e-07, "loss": 0.0782, "step": 6153 }, { "epoch": 0.9080044264109185, "grad_norm": 5.109692573547363, "learning_rate": 5.105030644101206e-07, "loss": 0.0681, "step": 6154 }, { "epoch": 0.9081519734415345, "grad_norm": 2.058464527130127, "learning_rate": 5.088795577381356e-07, "loss": 0.0458, "step": 6155 }, { "epoch": 0.9082995204721505, "grad_norm": 2.1350038051605225, "learning_rate": 5.072585693225918e-07, "loss": 0.0465, "step": 6156 }, { "epoch": 0.9084470675027665, "grad_norm": 0.7591288685798645, "learning_rate": 5.056400995935829e-07, "loss": 0.018, "step": 6157 }, { "epoch": 0.9085946145333825, "grad_norm": 4.6449761390686035, "learning_rate": 5.040241489805365e-07, "loss": 0.1426, "step": 6158 }, { "epoch": 0.9087421615639986, "grad_norm": 6.209170341491699, "learning_rate": 5.024107179122051e-07, "loss": 0.164, "step": 6159 }, { "epoch": 0.9088897085946145, "grad_norm": 2.090540647506714, "learning_rate": 5.007998068166786e-07, "loss": 0.0175, "step": 6160 }, { "epoch": 0.9088897085946145, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05504719540476799, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 49.3101, "eval_samples_per_second": 5.901, "eval_steps_per_second": 0.203, "step": 6160 }, { "epoch": 0.9090372556252305, "grad_norm": 3.121851682662964, "learning_rate": 4.991914161213751e-07, "loss": 0.032, "step": 6161 }, { "epoch": 0.9091848026558466, "grad_norm": 0.8654899597167969, "learning_rate": 4.975855462530465e-07, "loss": 0.0127, "step": 6162 }, { "epoch": 0.9093323496864626, "grad_norm": 1.8461565971374512, "learning_rate": 4.959821976377743e-07, "loss": 0.0472, "step": 6163 }, { "epoch": 0.9094798967170785, "grad_norm": 1.1268733739852905, "learning_rate": 4.943813707009693e-07, "loss": 0.0343, "step": 6164 }, { "epoch": 0.9096274437476946, "grad_norm": 2.749328374862671, "learning_rate": 4.927830658673771e-07, "loss": 0.1282, "step": 6165 }, { "epoch": 0.9097749907783106, "grad_norm": 1.882821798324585, "learning_rate": 4.911872835610721e-07, "loss": 0.0469, "step": 6166 }, { "epoch": 0.9099225378089266, "grad_norm": 2.297895669937134, "learning_rate": 4.895940242054564e-07, "loss": 0.0466, "step": 6167 }, { "epoch": 0.9100700848395427, "grad_norm": 2.072247266769409, "learning_rate": 4.880032882232699e-07, "loss": 0.0569, "step": 6168 }, { "epoch": 0.9102176318701586, "grad_norm": 3.2294955253601074, "learning_rate": 4.864150760365771e-07, "loss": 0.1044, "step": 6169 }, { "epoch": 0.9103651789007746, "grad_norm": 1.4208635091781616, "learning_rate": 4.848293880667732e-07, "loss": 0.0203, "step": 6170 }, { "epoch": 0.9105127259313907, "grad_norm": 3.2182838916778564, "learning_rate": 4.83246224734587e-07, "loss": 0.0434, "step": 6171 }, { "epoch": 0.9106602729620067, "grad_norm": 3.255988359451294, "learning_rate": 4.81665586460075e-07, "loss": 0.0959, "step": 6172 }, { "epoch": 0.9108078199926226, "grad_norm": 4.523223876953125, "learning_rate": 4.800874736626226e-07, "loss": 0.0774, "step": 6173 }, { "epoch": 0.9109553670232386, "grad_norm": 1.8783808946609497, "learning_rate": 4.785118867609507e-07, "loss": 0.0606, "step": 6174 }, { "epoch": 0.9111029140538547, "grad_norm": 2.5650320053100586, "learning_rate": 4.769388261731012e-07, "loss": 0.0939, "step": 6175 }, { "epoch": 0.9112504610844707, "grad_norm": 5.402647495269775, "learning_rate": 4.7536829231645156e-07, "loss": 0.0886, "step": 6176 }, { "epoch": 0.9113980081150866, "grad_norm": 3.0229949951171875, "learning_rate": 4.738002856077117e-07, "loss": 0.0827, "step": 6177 }, { "epoch": 0.9115455551457027, "grad_norm": 1.8020105361938477, "learning_rate": 4.722348064629123e-07, "loss": 0.0423, "step": 6178 }, { "epoch": 0.9116931021763187, "grad_norm": 1.6765297651290894, "learning_rate": 4.706718552974221e-07, "loss": 0.0494, "step": 6179 }, { "epoch": 0.9118406492069348, "grad_norm": 1.5524864196777344, "learning_rate": 4.691114325259327e-07, "loss": 0.0459, "step": 6180 }, { "epoch": 0.9118406492069348, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05488729849457741, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 51.2558, "eval_samples_per_second": 5.677, "eval_steps_per_second": 0.195, "step": 6180 }, { "epoch": 0.9119881962375507, "grad_norm": 1.8164972066879272, "learning_rate": 4.6755353856246635e-07, "loss": 0.0531, "step": 6181 }, { "epoch": 0.9121357432681667, "grad_norm": 2.300924301147461, "learning_rate": 4.6599817382037895e-07, "loss": 0.0215, "step": 6182 }, { "epoch": 0.9122832902987827, "grad_norm": 1.35698664188385, "learning_rate": 4.644453387123504e-07, "loss": 0.0215, "step": 6183 }, { "epoch": 0.9124308373293988, "grad_norm": 2.6017749309539795, "learning_rate": 4.6289503365038904e-07, "loss": 0.0511, "step": 6184 }, { "epoch": 0.9125783843600147, "grad_norm": 0.7743318676948547, "learning_rate": 4.6134725904583565e-07, "loss": 0.0137, "step": 6185 }, { "epoch": 0.9127259313906307, "grad_norm": 2.4677441120147705, "learning_rate": 4.598020153093552e-07, "loss": 0.0779, "step": 6186 }, { "epoch": 0.9128734784212468, "grad_norm": 2.9131996631622314, "learning_rate": 4.582593028509452e-07, "loss": 0.062, "step": 6187 }, { "epoch": 0.9130210254518628, "grad_norm": 1.517683982849121, "learning_rate": 4.567191220799305e-07, "loss": 0.0688, "step": 6188 }, { "epoch": 0.9131685724824787, "grad_norm": 3.0885980129241943, "learning_rate": 4.55181473404962e-07, "loss": 0.0986, "step": 6189 }, { "epoch": 0.9133161195130948, "grad_norm": 1.978442907333374, "learning_rate": 4.536463572340222e-07, "loss": 0.0489, "step": 6190 }, { "epoch": 0.9134636665437108, "grad_norm": 1.8119323253631592, "learning_rate": 4.5211377397441857e-07, "loss": 0.0523, "step": 6191 }, { "epoch": 0.9136112135743268, "grad_norm": 0.8473356366157532, "learning_rate": 4.505837240327882e-07, "loss": 0.0098, "step": 6192 }, { "epoch": 0.9137587606049429, "grad_norm": 0.7812674045562744, "learning_rate": 4.490562078150962e-07, "loss": 0.0172, "step": 6193 }, { "epoch": 0.9139063076355588, "grad_norm": 2.3568999767303467, "learning_rate": 4.4753122572663397e-07, "loss": 0.0394, "step": 6194 }, { "epoch": 0.9140538546661748, "grad_norm": 4.555817604064941, "learning_rate": 4.460087781720179e-07, "loss": 0.089, "step": 6195 }, { "epoch": 0.9142014016967909, "grad_norm": 1.3016505241394043, "learning_rate": 4.4448886555520266e-07, "loss": 0.0215, "step": 6196 }, { "epoch": 0.9143489487274069, "grad_norm": 1.2897732257843018, "learning_rate": 4.4297148827946e-07, "loss": 0.0215, "step": 6197 }, { "epoch": 0.9144964957580228, "grad_norm": 2.542720079421997, "learning_rate": 4.414566467473891e-07, "loss": 0.026, "step": 6198 }, { "epoch": 0.9146440427886389, "grad_norm": 1.3125649690628052, "learning_rate": 4.399443413609228e-07, "loss": 0.038, "step": 6199 }, { "epoch": 0.9147915898192549, "grad_norm": 2.4134104251861572, "learning_rate": 4.384345725213157e-07, "loss": 0.0631, "step": 6200 }, { "epoch": 0.9147915898192549, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.055400192737579346, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 49.8115, "eval_samples_per_second": 5.842, "eval_steps_per_second": 0.201, "step": 6200 }, { "epoch": 0.9149391368498709, "grad_norm": 1.5670089721679688, "learning_rate": 4.369273406291519e-07, "loss": 0.0568, "step": 6201 }, { "epoch": 0.9150866838804869, "grad_norm": 1.30291748046875, "learning_rate": 4.354226460843414e-07, "loss": 0.0265, "step": 6202 }, { "epoch": 0.9152342309111029, "grad_norm": 1.8413887023925781, "learning_rate": 4.339204892861215e-07, "loss": 0.0398, "step": 6203 }, { "epoch": 0.9153817779417189, "grad_norm": 2.272320032119751, "learning_rate": 4.3242087063305684e-07, "loss": 0.1022, "step": 6204 }, { "epoch": 0.915529324972335, "grad_norm": 2.3131356239318848, "learning_rate": 4.3092379052303457e-07, "loss": 0.0353, "step": 6205 }, { "epoch": 0.9156768720029509, "grad_norm": 4.904222011566162, "learning_rate": 4.294292493532737e-07, "loss": 0.0836, "step": 6206 }, { "epoch": 0.9158244190335669, "grad_norm": 1.8545290231704712, "learning_rate": 4.2793724752031807e-07, "loss": 0.0445, "step": 6207 }, { "epoch": 0.915971966064183, "grad_norm": 4.446885585784912, "learning_rate": 4.264477854200366e-07, "loss": 0.065, "step": 6208 }, { "epoch": 0.916119513094799, "grad_norm": 0.9898476004600525, "learning_rate": 4.24960863447621e-07, "loss": 0.0215, "step": 6209 }, { "epoch": 0.9162670601254149, "grad_norm": 2.3899457454681396, "learning_rate": 4.2347648199759784e-07, "loss": 0.0282, "step": 6210 }, { "epoch": 0.916414607156031, "grad_norm": 1.5149897336959839, "learning_rate": 4.219946414638132e-07, "loss": 0.0236, "step": 6211 }, { "epoch": 0.916562154186647, "grad_norm": 2.0021674633026123, "learning_rate": 4.205153422394381e-07, "loss": 0.0429, "step": 6212 }, { "epoch": 0.916709701217263, "grad_norm": 3.0172886848449707, "learning_rate": 4.1903858471697424e-07, "loss": 0.0445, "step": 6213 }, { "epoch": 0.9168572482478791, "grad_norm": 4.3301310539245605, "learning_rate": 4.175643692882436e-07, "loss": 0.1072, "step": 6214 }, { "epoch": 0.917004795278495, "grad_norm": 3.8663575649261475, "learning_rate": 4.160926963443979e-07, "loss": 0.0675, "step": 6215 }, { "epoch": 0.917152342309111, "grad_norm": 1.8558573722839355, "learning_rate": 4.1462356627591236e-07, "loss": 0.0268, "step": 6216 }, { "epoch": 0.9172998893397271, "grad_norm": 1.4661530256271362, "learning_rate": 4.131569794725876e-07, "loss": 0.0256, "step": 6217 }, { "epoch": 0.9174474363703431, "grad_norm": 1.4858981370925903, "learning_rate": 4.1169293632355114e-07, "loss": 0.0414, "step": 6218 }, { "epoch": 0.917594983400959, "grad_norm": 4.485099792480469, "learning_rate": 4.1023143721725e-07, "loss": 0.0801, "step": 6219 }, { "epoch": 0.917742530431575, "grad_norm": 3.052419662475586, "learning_rate": 4.0877248254146404e-07, "loss": 0.0737, "step": 6220 }, { "epoch": 0.917742530431575, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9653465346534653, "eval_loss": 0.05517810955643654, "eval_precision": 0.9848484848484849, "eval_recall": 0.9466019417475728, "eval_runtime": 50.2014, "eval_samples_per_second": 5.797, "eval_steps_per_second": 0.199, "step": 6220 }, { "epoch": 0.9178900774621911, "grad_norm": 2.319796562194824, "learning_rate": 4.0731607268329477e-07, "loss": 0.0639, "step": 6221 }, { "epoch": 0.9180376244928071, "grad_norm": 2.6878068447113037, "learning_rate": 4.058622080291652e-07, "loss": 0.0571, "step": 6222 }, { "epoch": 0.918185171523423, "grad_norm": 2.0226361751556396, "learning_rate": 4.0441088896482574e-07, "loss": 0.0681, "step": 6223 }, { "epoch": 0.9183327185540391, "grad_norm": 1.9138096570968628, "learning_rate": 4.029621158753538e-07, "loss": 0.0566, "step": 6224 }, { "epoch": 0.9184802655846551, "grad_norm": 2.777580976486206, "learning_rate": 4.015158891451476e-07, "loss": 0.0417, "step": 6225 }, { "epoch": 0.9186278126152712, "grad_norm": 1.0402213335037231, "learning_rate": 4.000722091579301e-07, "loss": 0.0174, "step": 6226 }, { "epoch": 0.9187753596458871, "grad_norm": 3.61226749420166, "learning_rate": 3.986310762967527e-07, "loss": 0.0224, "step": 6227 }, { "epoch": 0.9189229066765031, "grad_norm": 1.522729516029358, "learning_rate": 3.971924909439828e-07, "loss": 0.052, "step": 6228 }, { "epoch": 0.9190704537071192, "grad_norm": 3.099111557006836, "learning_rate": 3.9575645348132074e-07, "loss": 0.1204, "step": 6229 }, { "epoch": 0.9192180007377352, "grad_norm": 1.7007861137390137, "learning_rate": 3.943229642897861e-07, "loss": 0.0405, "step": 6230 }, { "epoch": 0.9193655477683511, "grad_norm": 5.425076484680176, "learning_rate": 3.9289202374972247e-07, "loss": 0.1331, "step": 6231 }, { "epoch": 0.9195130947989671, "grad_norm": 3.1894729137420654, "learning_rate": 3.9146363224079943e-07, "loss": 0.1074, "step": 6232 }, { "epoch": 0.9196606418295832, "grad_norm": 4.616338729858398, "learning_rate": 3.90037790142006e-07, "loss": 0.0738, "step": 6233 }, { "epoch": 0.9198081888601992, "grad_norm": 2.9994616508483887, "learning_rate": 3.886144978316586e-07, "loss": 0.1025, "step": 6234 }, { "epoch": 0.9199557358908153, "grad_norm": 3.527212142944336, "learning_rate": 3.8719375568739834e-07, "loss": 0.0572, "step": 6235 }, { "epoch": 0.9201032829214312, "grad_norm": 1.8691866397857666, "learning_rate": 3.8577556408618487e-07, "loss": 0.0501, "step": 6236 }, { "epoch": 0.9202508299520472, "grad_norm": 3.163731813430786, "learning_rate": 3.8435992340430383e-07, "loss": 0.0944, "step": 6237 }, { "epoch": 0.9203983769826632, "grad_norm": 2.161836862564087, "learning_rate": 3.829468340173637e-07, "loss": 0.0457, "step": 6238 }, { "epoch": 0.9205459240132793, "grad_norm": 3.0532407760620117, "learning_rate": 3.8153629630029666e-07, "loss": 0.0415, "step": 6239 }, { "epoch": 0.9206934710438952, "grad_norm": 2.5379703044891357, "learning_rate": 3.80128310627359e-07, "loss": 0.042, "step": 6240 }, { "epoch": 0.9206934710438952, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.0559084378182888, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 49.6899, "eval_samples_per_second": 5.856, "eval_steps_per_second": 0.201, "step": 6240 }, { "epoch": 0.9208410180745112, "grad_norm": 2.550798177719116, "learning_rate": 3.787228773721252e-07, "loss": 0.1024, "step": 6241 }, { "epoch": 0.9209885651051273, "grad_norm": 5.740802764892578, "learning_rate": 3.773199969074959e-07, "loss": 0.0571, "step": 6242 }, { "epoch": 0.9211361121357433, "grad_norm": 3.259659767150879, "learning_rate": 3.759196696056955e-07, "loss": 0.0458, "step": 6243 }, { "epoch": 0.9212836591663592, "grad_norm": 1.5455894470214844, "learning_rate": 3.7452189583827017e-07, "loss": 0.0435, "step": 6244 }, { "epoch": 0.9214312061969753, "grad_norm": 3.3945140838623047, "learning_rate": 3.731266759760854e-07, "loss": 0.1067, "step": 6245 }, { "epoch": 0.9215787532275913, "grad_norm": 2.3547747135162354, "learning_rate": 3.717340103893341e-07, "loss": 0.0584, "step": 6246 }, { "epoch": 0.9217263002582073, "grad_norm": 1.9721163511276245, "learning_rate": 3.7034389944752613e-07, "loss": 0.0601, "step": 6247 }, { "epoch": 0.9218738472888233, "grad_norm": 4.440569877624512, "learning_rate": 3.689563435194976e-07, "loss": 0.1317, "step": 6248 }, { "epoch": 0.9220213943194393, "grad_norm": 1.7474677562713623, "learning_rate": 3.6757134297340735e-07, "loss": 0.044, "step": 6249 }, { "epoch": 0.9221689413500553, "grad_norm": 1.8465862274169922, "learning_rate": 3.661888981767314e-07, "loss": 0.0436, "step": 6250 }, { "epoch": 0.9223164883806714, "grad_norm": 1.5237339735031128, "learning_rate": 3.6480900949627306e-07, "loss": 0.0412, "step": 6251 }, { "epoch": 0.9224640354112873, "grad_norm": 4.066259384155273, "learning_rate": 3.6343167729815164e-07, "loss": 0.0675, "step": 6252 }, { "epoch": 0.9226115824419033, "grad_norm": 1.155721664428711, "learning_rate": 3.6205690194781487e-07, "loss": 0.0208, "step": 6253 }, { "epoch": 0.9227591294725194, "grad_norm": 2.956277370452881, "learning_rate": 3.606846838100264e-07, "loss": 0.0557, "step": 6254 }, { "epoch": 0.9229066765031354, "grad_norm": 2.8474464416503906, "learning_rate": 3.5931502324887624e-07, "loss": 0.1175, "step": 6255 }, { "epoch": 0.9230542235337513, "grad_norm": 0.9943166971206665, "learning_rate": 3.579479206277692e-07, "loss": 0.0091, "step": 6256 }, { "epoch": 0.9232017705643674, "grad_norm": 2.0411195755004883, "learning_rate": 3.565833763094373e-07, "loss": 0.0226, "step": 6257 }, { "epoch": 0.9233493175949834, "grad_norm": 3.972092390060425, "learning_rate": 3.552213906559343e-07, "loss": 0.0739, "step": 6258 }, { "epoch": 0.9234968646255994, "grad_norm": 2.7468929290771484, "learning_rate": 3.538619640286278e-07, "loss": 0.1084, "step": 6259 }, { "epoch": 0.9236444116562155, "grad_norm": 3.2310478687286377, "learning_rate": 3.52505096788216e-07, "loss": 0.0505, "step": 6260 }, { "epoch": 0.9236444116562155, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05572787672281265, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 51.4965, "eval_samples_per_second": 5.651, "eval_steps_per_second": 0.194, "step": 6260 }, { "epoch": 0.9237919586868314, "grad_norm": 2.4628522396087646, "learning_rate": 3.5115078929470856e-07, "loss": 0.1245, "step": 6261 }, { "epoch": 0.9239395057174474, "grad_norm": 2.1519012451171875, "learning_rate": 3.4979904190744486e-07, "loss": 0.0936, "step": 6262 }, { "epoch": 0.9240870527480635, "grad_norm": 1.2964609861373901, "learning_rate": 3.48449854985079e-07, "loss": 0.0132, "step": 6263 }, { "epoch": 0.9242345997786795, "grad_norm": 1.963150143623352, "learning_rate": 3.471032288855869e-07, "loss": 0.0431, "step": 6264 }, { "epoch": 0.9243821468092954, "grad_norm": 2.247939109802246, "learning_rate": 3.457591639662672e-07, "loss": 0.0297, "step": 6265 }, { "epoch": 0.9245296938399115, "grad_norm": 1.91328763961792, "learning_rate": 3.444176605837368e-07, "loss": 0.0626, "step": 6266 }, { "epoch": 0.9246772408705275, "grad_norm": 2.5009827613830566, "learning_rate": 3.430787190939322e-07, "loss": 0.0545, "step": 6267 }, { "epoch": 0.9248247879011435, "grad_norm": 0.9863361120223999, "learning_rate": 3.4174233985211467e-07, "loss": 0.0193, "step": 6268 }, { "epoch": 0.9249723349317595, "grad_norm": 1.2787401676177979, "learning_rate": 3.4040852321285954e-07, "loss": 0.0104, "step": 6269 }, { "epoch": 0.9251198819623755, "grad_norm": 2.8463070392608643, "learning_rate": 3.39077269530067e-07, "loss": 0.0565, "step": 6270 }, { "epoch": 0.9252674289929915, "grad_norm": 2.008657217025757, "learning_rate": 3.3774857915695346e-07, "loss": 0.0718, "step": 6271 }, { "epoch": 0.9254149760236076, "grad_norm": 0.9314476251602173, "learning_rate": 3.364224524460602e-07, "loss": 0.0236, "step": 6272 }, { "epoch": 0.9255625230542235, "grad_norm": 2.7881734371185303, "learning_rate": 3.3509888974924243e-07, "loss": 0.0565, "step": 6273 }, { "epoch": 0.9257100700848395, "grad_norm": 3.7052223682403564, "learning_rate": 3.3377789141768035e-07, "loss": 0.0528, "step": 6274 }, { "epoch": 0.9258576171154556, "grad_norm": 1.0346524715423584, "learning_rate": 3.324594578018681e-07, "loss": 0.0221, "step": 6275 }, { "epoch": 0.9260051641460716, "grad_norm": 1.8550926446914673, "learning_rate": 3.3114358925162573e-07, "loss": 0.0368, "step": 6276 }, { "epoch": 0.9261527111766875, "grad_norm": 4.289306163787842, "learning_rate": 3.298302861160885e-07, "loss": 0.0772, "step": 6277 }, { "epoch": 0.9263002582073036, "grad_norm": 3.7169032096862793, "learning_rate": 3.2851954874371095e-07, "loss": 0.116, "step": 6278 }, { "epoch": 0.9264478052379196, "grad_norm": 1.6580818891525269, "learning_rate": 3.272113774822694e-07, "loss": 0.0495, "step": 6279 }, { "epoch": 0.9265953522685356, "grad_norm": 2.004760980606079, "learning_rate": 3.2590577267885726e-07, "loss": 0.0424, "step": 6280 }, { "epoch": 0.9265953522685356, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9653465346534653, "eval_loss": 0.05522174760699272, "eval_precision": 0.9848484848484849, "eval_recall": 0.9466019417475728, "eval_runtime": 52.1889, "eval_samples_per_second": 5.576, "eval_steps_per_second": 0.192, "step": 6280 }, { "epoch": 0.9267428992991517, "grad_norm": 2.8334689140319824, "learning_rate": 3.2460273467988635e-07, "loss": 0.0398, "step": 6281 }, { "epoch": 0.9268904463297676, "grad_norm": 0.9800840616226196, "learning_rate": 3.233022638310901e-07, "loss": 0.0199, "step": 6282 }, { "epoch": 0.9270379933603836, "grad_norm": 1.883017897605896, "learning_rate": 3.2200436047752026e-07, "loss": 0.0243, "step": 6283 }, { "epoch": 0.9271855403909997, "grad_norm": 3.470026731491089, "learning_rate": 3.207090249635436e-07, "loss": 0.0418, "step": 6284 }, { "epoch": 0.9273330874216157, "grad_norm": 3.9536657333374023, "learning_rate": 3.194162576328508e-07, "loss": 0.1209, "step": 6285 }, { "epoch": 0.9274806344522316, "grad_norm": 1.0981996059417725, "learning_rate": 3.181260588284485e-07, "loss": 0.0188, "step": 6286 }, { "epoch": 0.9276281814828476, "grad_norm": 13.924962997436523, "learning_rate": 3.168384288926596e-07, "loss": 0.1309, "step": 6287 }, { "epoch": 0.9277757285134637, "grad_norm": 1.7328006029129028, "learning_rate": 3.155533681671319e-07, "loss": 0.0376, "step": 6288 }, { "epoch": 0.9279232755440797, "grad_norm": 7.636415481567383, "learning_rate": 3.1427087699282375e-07, "loss": 0.0774, "step": 6289 }, { "epoch": 0.9280708225746956, "grad_norm": 2.2514214515686035, "learning_rate": 3.1299095571001745e-07, "loss": 0.1002, "step": 6290 }, { "epoch": 0.9282183696053117, "grad_norm": 2.580007314682007, "learning_rate": 3.1171360465831245e-07, "loss": 0.066, "step": 6291 }, { "epoch": 0.9283659166359277, "grad_norm": 4.111058712005615, "learning_rate": 3.104388241766232e-07, "loss": 0.1297, "step": 6292 }, { "epoch": 0.9285134636665437, "grad_norm": 2.8205816745758057, "learning_rate": 3.091666146031858e-07, "loss": 0.0468, "step": 6293 }, { "epoch": 0.9286610106971597, "grad_norm": 1.6066216230392456, "learning_rate": 3.0789697627555124e-07, "loss": 0.0386, "step": 6294 }, { "epoch": 0.9288085577277757, "grad_norm": 2.5085225105285645, "learning_rate": 3.0662990953058803e-07, "loss": 0.0928, "step": 6295 }, { "epoch": 0.9289561047583917, "grad_norm": 1.2958418130874634, "learning_rate": 3.0536541470448824e-07, "loss": 0.0261, "step": 6296 }, { "epoch": 0.9291036517890078, "grad_norm": 1.3905576467514038, "learning_rate": 3.041034921327557e-07, "loss": 0.0206, "step": 6297 }, { "epoch": 0.9292511988196237, "grad_norm": 2.7028305530548096, "learning_rate": 3.028441421502115e-07, "loss": 0.0687, "step": 6298 }, { "epoch": 0.9293987458502397, "grad_norm": 2.6002049446105957, "learning_rate": 3.015873650909984e-07, "loss": 0.0873, "step": 6299 }, { "epoch": 0.9295462928808558, "grad_norm": 2.208272695541382, "learning_rate": 3.003331612885718e-07, "loss": 0.0506, "step": 6300 }, { "epoch": 0.9295462928808558, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05601061135530472, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 50.1847, "eval_samples_per_second": 5.799, "eval_steps_per_second": 0.199, "step": 6300 }, { "epoch": 0.9296938399114718, "grad_norm": 3.9481570720672607, "learning_rate": 2.990815310757078e-07, "loss": 0.0607, "step": 6301 }, { "epoch": 0.9298413869420878, "grad_norm": 3.901336908340454, "learning_rate": 2.978324747844996e-07, "loss": 0.0626, "step": 6302 }, { "epoch": 0.9299889339727038, "grad_norm": 1.755456566810608, "learning_rate": 2.9658599274635435e-07, "loss": 0.0784, "step": 6303 }, { "epoch": 0.9301364810033198, "grad_norm": 1.976046085357666, "learning_rate": 2.953420852919997e-07, "loss": 0.0357, "step": 6304 }, { "epoch": 0.9302840280339358, "grad_norm": 1.2929192781448364, "learning_rate": 2.941007527514772e-07, "loss": 0.0359, "step": 6305 }, { "epoch": 0.9304315750645519, "grad_norm": 4.491754531860352, "learning_rate": 2.9286199545414675e-07, "loss": 0.0641, "step": 6306 }, { "epoch": 0.9305791220951678, "grad_norm": 2.579371690750122, "learning_rate": 2.916258137286876e-07, "loss": 0.0456, "step": 6307 }, { "epoch": 0.9307266691257838, "grad_norm": 3.1688661575317383, "learning_rate": 2.9039220790308965e-07, "loss": 0.0636, "step": 6308 }, { "epoch": 0.9308742161563999, "grad_norm": 1.6486138105392456, "learning_rate": 2.8916117830466215e-07, "loss": 0.0534, "step": 6309 }, { "epoch": 0.9310217631870159, "grad_norm": 1.5931568145751953, "learning_rate": 2.8793272526003504e-07, "loss": 0.0462, "step": 6310 }, { "epoch": 0.9311693102176318, "grad_norm": 2.7471938133239746, "learning_rate": 2.8670684909514854e-07, "loss": 0.0801, "step": 6311 }, { "epoch": 0.9313168572482479, "grad_norm": 2.7132720947265625, "learning_rate": 2.854835501352615e-07, "loss": 0.0797, "step": 6312 }, { "epoch": 0.9314644042788639, "grad_norm": 2.3825271129608154, "learning_rate": 2.842628287049498e-07, "loss": 0.0573, "step": 6313 }, { "epoch": 0.9316119513094799, "grad_norm": 4.8826680183410645, "learning_rate": 2.830446851281021e-07, "loss": 0.0617, "step": 6314 }, { "epoch": 0.9317594983400959, "grad_norm": 1.6326963901519775, "learning_rate": 2.818291197279277e-07, "loss": 0.0433, "step": 6315 }, { "epoch": 0.9319070453707119, "grad_norm": 4.223081588745117, "learning_rate": 2.806161328269508e-07, "loss": 0.0342, "step": 6316 }, { "epoch": 0.9320545924013279, "grad_norm": 1.6812433004379272, "learning_rate": 2.7940572474700724e-07, "loss": 0.0363, "step": 6317 }, { "epoch": 0.932202139431944, "grad_norm": 1.4430614709854126, "learning_rate": 2.781978958092535e-07, "loss": 0.0376, "step": 6318 }, { "epoch": 0.9323496864625599, "grad_norm": 1.3855177164077759, "learning_rate": 2.769926463341599e-07, "loss": 0.0322, "step": 6319 }, { "epoch": 0.9324972334931759, "grad_norm": 0.8598216772079468, "learning_rate": 2.7578997664151176e-07, "loss": 0.0238, "step": 6320 }, { "epoch": 0.9324972334931759, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05572595074772835, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 49.1318, "eval_samples_per_second": 5.923, "eval_steps_per_second": 0.204, "step": 6320 }, { "epoch": 0.932644780523792, "grad_norm": 2.7048840522766113, "learning_rate": 2.745898870504116e-07, "loss": 0.1049, "step": 6321 }, { "epoch": 0.932792327554408, "grad_norm": 3.241443634033203, "learning_rate": 2.733923778792769e-07, "loss": 0.119, "step": 6322 }, { "epoch": 0.932939874585024, "grad_norm": 3.209383249282837, "learning_rate": 2.721974494458368e-07, "loss": 0.0777, "step": 6323 }, { "epoch": 0.93308742161564, "grad_norm": 3.865638494491577, "learning_rate": 2.7100510206714225e-07, "loss": 0.0896, "step": 6324 }, { "epoch": 0.933234968646256, "grad_norm": 3.3464627265930176, "learning_rate": 2.6981533605955455e-07, "loss": 0.046, "step": 6325 }, { "epoch": 0.933382515676872, "grad_norm": 1.673051118850708, "learning_rate": 2.686281517387501e-07, "loss": 0.058, "step": 6326 }, { "epoch": 0.9335300627074881, "grad_norm": 1.3582005500793457, "learning_rate": 2.674435494197247e-07, "loss": 0.023, "step": 6327 }, { "epoch": 0.933677609738104, "grad_norm": 1.89664626121521, "learning_rate": 2.662615294167836e-07, "loss": 0.0617, "step": 6328 }, { "epoch": 0.93382515676872, "grad_norm": 1.3908201456069946, "learning_rate": 2.650820920435493e-07, "loss": 0.0196, "step": 6329 }, { "epoch": 0.9339727037993361, "grad_norm": 1.5495145320892334, "learning_rate": 2.639052376129614e-07, "loss": 0.0203, "step": 6330 }, { "epoch": 0.9341202508299521, "grad_norm": 4.128423690795898, "learning_rate": 2.6273096643727015e-07, "loss": 0.0518, "step": 6331 }, { "epoch": 0.934267797860568, "grad_norm": 1.1344972848892212, "learning_rate": 2.61559278828043e-07, "loss": 0.0158, "step": 6332 }, { "epoch": 0.934415344891184, "grad_norm": 3.8028311729431152, "learning_rate": 2.603901750961602e-07, "loss": 0.0839, "step": 6333 }, { "epoch": 0.9345628919218001, "grad_norm": 1.9522721767425537, "learning_rate": 2.5922365555181686e-07, "loss": 0.0402, "step": 6334 }, { "epoch": 0.9347104389524161, "grad_norm": 2.5471351146698, "learning_rate": 2.5805972050452434e-07, "loss": 0.0524, "step": 6335 }, { "epoch": 0.934857985983032, "grad_norm": 6.653663158416748, "learning_rate": 2.568983702631067e-07, "loss": 0.1402, "step": 6336 }, { "epoch": 0.9350055330136481, "grad_norm": 0.4973243176937103, "learning_rate": 2.5573960513570085e-07, "loss": 0.0042, "step": 6337 }, { "epoch": 0.9351530800442641, "grad_norm": 2.1447861194610596, "learning_rate": 2.5458342542975855e-07, "loss": 0.0523, "step": 6338 }, { "epoch": 0.9353006270748802, "grad_norm": 2.394932985305786, "learning_rate": 2.5342983145205003e-07, "loss": 0.0951, "step": 6339 }, { "epoch": 0.9354481741054961, "grad_norm": 3.6239054203033447, "learning_rate": 2.5227882350865154e-07, "loss": 0.0703, "step": 6340 }, { "epoch": 0.9354481741054961, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05495457723736763, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 48.8024, "eval_samples_per_second": 5.963, "eval_steps_per_second": 0.205, "step": 6340 }, { "epoch": 0.9355957211361121, "grad_norm": 1.973219394683838, "learning_rate": 2.5113040190495986e-07, "loss": 0.0354, "step": 6341 }, { "epoch": 0.9357432681667281, "grad_norm": 4.711592674255371, "learning_rate": 2.4998456694568016e-07, "loss": 0.0819, "step": 6342 }, { "epoch": 0.9358908151973442, "grad_norm": 1.5042883157730103, "learning_rate": 2.488413189348371e-07, "loss": 0.0336, "step": 6343 }, { "epoch": 0.9360383622279601, "grad_norm": 3.2596170902252197, "learning_rate": 2.477006581757657e-07, "loss": 0.0782, "step": 6344 }, { "epoch": 0.9361859092585761, "grad_norm": 3.104275703430176, "learning_rate": 2.4656258497111285e-07, "loss": 0.0566, "step": 6345 }, { "epoch": 0.9363334562891922, "grad_norm": 1.263534426689148, "learning_rate": 2.454270996228425e-07, "loss": 0.0165, "step": 6346 }, { "epoch": 0.9364810033198082, "grad_norm": 1.779310941696167, "learning_rate": 2.4429420243222924e-07, "loss": 0.0441, "step": 6347 }, { "epoch": 0.9366285503504242, "grad_norm": 2.7920477390289307, "learning_rate": 2.431638936998615e-07, "loss": 0.0906, "step": 6348 }, { "epoch": 0.9367760973810402, "grad_norm": 3.6723668575286865, "learning_rate": 2.420361737256438e-07, "loss": 0.0574, "step": 6349 }, { "epoch": 0.9369236444116562, "grad_norm": 1.9342572689056396, "learning_rate": 2.4091104280878906e-07, "loss": 0.0695, "step": 6350 }, { "epoch": 0.9370711914422722, "grad_norm": 1.4174875020980835, "learning_rate": 2.3978850124782736e-07, "loss": 0.023, "step": 6351 }, { "epoch": 0.9372187384728883, "grad_norm": 4.9079389572143555, "learning_rate": 2.3866854934059823e-07, "loss": 0.0569, "step": 6352 }, { "epoch": 0.9373662855035042, "grad_norm": 1.6540197134017944, "learning_rate": 2.375511873842562e-07, "loss": 0.0381, "step": 6353 }, { "epoch": 0.9375138325341202, "grad_norm": 4.948291301727295, "learning_rate": 2.3643641567526966e-07, "loss": 0.0529, "step": 6354 }, { "epoch": 0.9376613795647363, "grad_norm": 2.5287394523620605, "learning_rate": 2.3532423450941755e-07, "loss": 0.1042, "step": 6355 }, { "epoch": 0.9378089265953523, "grad_norm": 2.72228741645813, "learning_rate": 2.3421464418179163e-07, "loss": 0.0786, "step": 6356 }, { "epoch": 0.9379564736259682, "grad_norm": 0.9982196092605591, "learning_rate": 2.331076449867975e-07, "loss": 0.0208, "step": 6357 }, { "epoch": 0.9381040206565843, "grad_norm": 2.173036575317383, "learning_rate": 2.3200323721815244e-07, "loss": 0.0312, "step": 6358 }, { "epoch": 0.9382515676872003, "grad_norm": 2.706411123275757, "learning_rate": 2.309014211688865e-07, "loss": 0.0505, "step": 6359 }, { "epoch": 0.9383991147178163, "grad_norm": 2.960676908493042, "learning_rate": 2.2980219713134133e-07, "loss": 0.0772, "step": 6360 }, { "epoch": 0.9383991147178163, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.055578552186489105, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 49.2114, "eval_samples_per_second": 5.913, "eval_steps_per_second": 0.203, "step": 6360 }, { "epoch": 0.9385466617484323, "grad_norm": 2.327997922897339, "learning_rate": 2.2870556539717258e-07, "loss": 0.0591, "step": 6361 }, { "epoch": 0.9386942087790483, "grad_norm": 1.6461297273635864, "learning_rate": 2.2761152625734527e-07, "loss": 0.0382, "step": 6362 }, { "epoch": 0.9388417558096643, "grad_norm": 1.021716594696045, "learning_rate": 2.2652008000214055e-07, "loss": 0.0326, "step": 6363 }, { "epoch": 0.9389893028402804, "grad_norm": 1.242136001586914, "learning_rate": 2.2543122692114672e-07, "loss": 0.0328, "step": 6364 }, { "epoch": 0.9391368498708963, "grad_norm": 1.448472499847412, "learning_rate": 2.2434496730326937e-07, "loss": 0.0381, "step": 6365 }, { "epoch": 0.9392843969015123, "grad_norm": 3.8739380836486816, "learning_rate": 2.2326130143671908e-07, "loss": 0.046, "step": 6366 }, { "epoch": 0.9394319439321284, "grad_norm": 2.4928221702575684, "learning_rate": 2.2218022960902696e-07, "loss": 0.0498, "step": 6367 }, { "epoch": 0.9395794909627444, "grad_norm": 2.8968076705932617, "learning_rate": 2.21101752107028e-07, "loss": 0.0644, "step": 6368 }, { "epoch": 0.9397270379933604, "grad_norm": 3.011594295501709, "learning_rate": 2.200258692168744e-07, "loss": 0.1531, "step": 6369 }, { "epoch": 0.9398745850239764, "grad_norm": 1.3657293319702148, "learning_rate": 2.1895258122402563e-07, "loss": 0.0265, "step": 6370 }, { "epoch": 0.9400221320545924, "grad_norm": 1.2389910221099854, "learning_rate": 2.1788188841325497e-07, "loss": 0.0218, "step": 6371 }, { "epoch": 0.9401696790852084, "grad_norm": 8.222478866577148, "learning_rate": 2.1681379106864853e-07, "loss": 0.0657, "step": 6372 }, { "epoch": 0.9403172261158245, "grad_norm": 2.385483980178833, "learning_rate": 2.157482894735996e-07, "loss": 0.016, "step": 6373 }, { "epoch": 0.9404647731464404, "grad_norm": 2.611680507659912, "learning_rate": 2.146853839108165e-07, "loss": 0.0888, "step": 6374 }, { "epoch": 0.9406123201770564, "grad_norm": 3.23009991645813, "learning_rate": 2.1362507466231808e-07, "loss": 0.0801, "step": 6375 }, { "epoch": 0.9407598672076725, "grad_norm": 0.6852890849113464, "learning_rate": 2.1256736200943152e-07, "loss": 0.0077, "step": 6376 }, { "epoch": 0.9409074142382885, "grad_norm": 0.5792398452758789, "learning_rate": 2.1151224623280008e-07, "loss": 0.0139, "step": 6377 }, { "epoch": 0.9410549612689044, "grad_norm": 1.1180408000946045, "learning_rate": 2.104597276123721e-07, "loss": 0.0198, "step": 6378 }, { "epoch": 0.9412025082995205, "grad_norm": 2.6714653968811035, "learning_rate": 2.0940980642741304e-07, "loss": 0.0531, "step": 6379 }, { "epoch": 0.9413500553301365, "grad_norm": 4.037075519561768, "learning_rate": 2.0836248295649342e-07, "loss": 0.0697, "step": 6380 }, { "epoch": 0.9413500553301365, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05569841340184212, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 49.1432, "eval_samples_per_second": 5.921, "eval_steps_per_second": 0.203, "step": 6380 }, { "epoch": 0.9414976023607525, "grad_norm": 1.3099905252456665, "learning_rate": 2.0731775747749761e-07, "loss": 0.0283, "step": 6381 }, { "epoch": 0.9416451493913685, "grad_norm": 3.0591821670532227, "learning_rate": 2.0627563026762053e-07, "loss": 0.0455, "step": 6382 }, { "epoch": 0.9417926964219845, "grad_norm": 1.2957801818847656, "learning_rate": 2.0523610160336883e-07, "loss": 0.0367, "step": 6383 }, { "epoch": 0.9419402434526005, "grad_norm": 4.634521007537842, "learning_rate": 2.0419917176055514e-07, "loss": 0.05, "step": 6384 }, { "epoch": 0.9420877904832166, "grad_norm": 2.9300975799560547, "learning_rate": 2.0316484101430722e-07, "loss": 0.0338, "step": 6385 }, { "epoch": 0.9422353375138325, "grad_norm": 2.606213092803955, "learning_rate": 2.021331096390622e-07, "loss": 0.0643, "step": 6386 }, { "epoch": 0.9423828845444485, "grad_norm": 0.7693409323692322, "learning_rate": 2.0110397790856552e-07, "loss": 0.0224, "step": 6387 }, { "epoch": 0.9425304315750646, "grad_norm": 3.6793739795684814, "learning_rate": 2.0007744609587542e-07, "loss": 0.0514, "step": 6388 }, { "epoch": 0.9426779786056806, "grad_norm": 1.294873595237732, "learning_rate": 1.9905351447335742e-07, "loss": 0.0233, "step": 6389 }, { "epoch": 0.9428255256362966, "grad_norm": 1.8262622356414795, "learning_rate": 1.980321833126908e-07, "loss": 0.075, "step": 6390 }, { "epoch": 0.9429730726669125, "grad_norm": 2.2138702869415283, "learning_rate": 1.9701345288486328e-07, "loss": 0.0569, "step": 6391 }, { "epoch": 0.9431206196975286, "grad_norm": 1.8159193992614746, "learning_rate": 1.9599732346016974e-07, "loss": 0.0345, "step": 6392 }, { "epoch": 0.9432681667281446, "grad_norm": 3.349161386489868, "learning_rate": 1.949837953082201e-07, "loss": 0.1248, "step": 6393 }, { "epoch": 0.9434157137587607, "grad_norm": 1.3875066041946411, "learning_rate": 1.939728686979292e-07, "loss": 0.012, "step": 6394 }, { "epoch": 0.9435632607893766, "grad_norm": 1.954710602760315, "learning_rate": 1.9296454389752362e-07, "loss": 0.0378, "step": 6395 }, { "epoch": 0.9437108078199926, "grad_norm": 3.135584592819214, "learning_rate": 1.9195882117454267e-07, "loss": 0.1147, "step": 6396 }, { "epoch": 0.9438583548506086, "grad_norm": 2.1549274921417236, "learning_rate": 1.909557007958307e-07, "loss": 0.0458, "step": 6397 }, { "epoch": 0.9440059018812247, "grad_norm": 3.548346519470215, "learning_rate": 1.8995518302754145e-07, "loss": 0.0478, "step": 6398 }, { "epoch": 0.9441534489118406, "grad_norm": 1.0740330219268799, "learning_rate": 1.8895726813514258e-07, "loss": 0.0244, "step": 6399 }, { "epoch": 0.9443009959424566, "grad_norm": 3.005735397338867, "learning_rate": 1.8796195638340676e-07, "loss": 0.0446, "step": 6400 }, { "epoch": 0.9443009959424566, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05581614002585411, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 49.0804, "eval_samples_per_second": 5.929, "eval_steps_per_second": 0.204, "step": 6400 }, { "epoch": 0.9444485429730727, "grad_norm": 5.365920543670654, "learning_rate": 1.8696924803641825e-07, "loss": 0.0539, "step": 6401 }, { "epoch": 0.9445960900036887, "grad_norm": 1.1807736158370972, "learning_rate": 1.8597914335757085e-07, "loss": 0.0485, "step": 6402 }, { "epoch": 0.9447436370343046, "grad_norm": 1.9345782995224, "learning_rate": 1.8499164260956548e-07, "loss": 0.025, "step": 6403 }, { "epoch": 0.9448911840649207, "grad_norm": 2.943807363510132, "learning_rate": 1.840067460544137e-07, "loss": 0.0545, "step": 6404 }, { "epoch": 0.9450387310955367, "grad_norm": 1.9922869205474854, "learning_rate": 1.830244539534376e-07, "loss": 0.0358, "step": 6405 }, { "epoch": 0.9451862781261527, "grad_norm": 2.3645715713500977, "learning_rate": 1.820447665672631e-07, "loss": 0.0791, "step": 6406 }, { "epoch": 0.9453338251567687, "grad_norm": 2.1656434535980225, "learning_rate": 1.810676841558301e-07, "loss": 0.0472, "step": 6407 }, { "epoch": 0.9454813721873847, "grad_norm": 1.5175650119781494, "learning_rate": 1.8009320697838672e-07, "loss": 0.024, "step": 6408 }, { "epoch": 0.9456289192180007, "grad_norm": 1.9532819986343384, "learning_rate": 1.791213352934851e-07, "loss": 0.045, "step": 6409 }, { "epoch": 0.9457764662486168, "grad_norm": 1.9861067533493042, "learning_rate": 1.7815206935899332e-07, "loss": 0.0689, "step": 6410 }, { "epoch": 0.9459240132792327, "grad_norm": 5.2447967529296875, "learning_rate": 1.771854094320835e-07, "loss": 0.0425, "step": 6411 }, { "epoch": 0.9460715603098487, "grad_norm": 2.146310567855835, "learning_rate": 1.7622135576923495e-07, "loss": 0.0601, "step": 6412 }, { "epoch": 0.9462191073404648, "grad_norm": 2.063035726547241, "learning_rate": 1.7525990862624188e-07, "loss": 0.0776, "step": 6413 }, { "epoch": 0.9463666543710808, "grad_norm": 2.2232484817504883, "learning_rate": 1.7430106825819804e-07, "loss": 0.0306, "step": 6414 }, { "epoch": 0.9465142014016968, "grad_norm": 1.885646939277649, "learning_rate": 1.7334483491951327e-07, "loss": 0.0365, "step": 6415 }, { "epoch": 0.9466617484323128, "grad_norm": 2.823607921600342, "learning_rate": 1.7239120886390347e-07, "loss": 0.0833, "step": 6416 }, { "epoch": 0.9468092954629288, "grad_norm": 3.014352798461914, "learning_rate": 1.7144019034438851e-07, "loss": 0.0913, "step": 6417 }, { "epoch": 0.9469568424935448, "grad_norm": 2.5719082355499268, "learning_rate": 1.7049177961330432e-07, "loss": 0.0422, "step": 6418 }, { "epoch": 0.9471043895241609, "grad_norm": 2.7025134563446045, "learning_rate": 1.6954597692228626e-07, "loss": 0.1112, "step": 6419 }, { "epoch": 0.9472519365547768, "grad_norm": 1.737754464149475, "learning_rate": 1.6860278252228358e-07, "loss": 0.0323, "step": 6420 }, { "epoch": 0.9472519365547768, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05500521510839462, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 48.669, "eval_samples_per_second": 5.979, "eval_steps_per_second": 0.205, "step": 6420 }, { "epoch": 0.9473994835853928, "grad_norm": 2.6450870037078857, "learning_rate": 1.6766219666355278e-07, "loss": 0.0535, "step": 6421 }, { "epoch": 0.9475470306160089, "grad_norm": 2.7876486778259277, "learning_rate": 1.6672421959565755e-07, "loss": 0.0498, "step": 6422 }, { "epoch": 0.9476945776466249, "grad_norm": 4.724228858947754, "learning_rate": 1.6578885156746548e-07, "loss": 0.0836, "step": 6423 }, { "epoch": 0.9478421246772408, "grad_norm": 1.4518014192581177, "learning_rate": 1.648560928271592e-07, "loss": 0.0291, "step": 6424 }, { "epoch": 0.9479896717078569, "grad_norm": 2.3440370559692383, "learning_rate": 1.6392594362222515e-07, "loss": 0.0662, "step": 6425 }, { "epoch": 0.9481372187384729, "grad_norm": 1.8037214279174805, "learning_rate": 1.629984041994559e-07, "loss": 0.0443, "step": 6426 }, { "epoch": 0.9482847657690889, "grad_norm": 0.7664615511894226, "learning_rate": 1.6207347480495462e-07, "loss": 0.0098, "step": 6427 }, { "epoch": 0.9484323127997049, "grad_norm": 3.4400036334991455, "learning_rate": 1.6115115568412942e-07, "loss": 0.084, "step": 6428 }, { "epoch": 0.9485798598303209, "grad_norm": 2.720244884490967, "learning_rate": 1.602314470816968e-07, "loss": 0.0446, "step": 6429 }, { "epoch": 0.9487274068609369, "grad_norm": 1.642297625541687, "learning_rate": 1.5931434924168377e-07, "loss": 0.0146, "step": 6430 }, { "epoch": 0.948874953891553, "grad_norm": 2.7700729370117188, "learning_rate": 1.583998624074179e-07, "loss": 0.0629, "step": 6431 }, { "epoch": 0.9490225009221689, "grad_norm": 4.600953578948975, "learning_rate": 1.5748798682154177e-07, "loss": 0.1125, "step": 6432 }, { "epoch": 0.9491700479527849, "grad_norm": 1.555990219116211, "learning_rate": 1.5657872272599738e-07, "loss": 0.0382, "step": 6433 }, { "epoch": 0.949317594983401, "grad_norm": 2.383833408355713, "learning_rate": 1.5567207036203957e-07, "loss": 0.0967, "step": 6434 }, { "epoch": 0.949465142014017, "grad_norm": 3.316439390182495, "learning_rate": 1.5476802997022812e-07, "loss": 0.1172, "step": 6435 }, { "epoch": 0.949612689044633, "grad_norm": 4.898162841796875, "learning_rate": 1.538666017904311e-07, "loss": 0.1416, "step": 6436 }, { "epoch": 0.949760236075249, "grad_norm": 2.7083256244659424, "learning_rate": 1.5296778606181839e-07, "loss": 0.057, "step": 6437 }, { "epoch": 0.949907783105865, "grad_norm": 2.0090880393981934, "learning_rate": 1.5207158302287472e-07, "loss": 0.0365, "step": 6438 }, { "epoch": 0.950055330136481, "grad_norm": 2.323190689086914, "learning_rate": 1.5117799291138657e-07, "loss": 0.0455, "step": 6439 }, { "epoch": 0.9502028771670971, "grad_norm": 2.2493746280670166, "learning_rate": 1.502870159644465e-07, "loss": 0.0496, "step": 6440 }, { "epoch": 0.9502028771670971, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.055333010852336884, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 49.0651, "eval_samples_per_second": 5.931, "eval_steps_per_second": 0.204, "step": 6440 }, { "epoch": 0.950350424197713, "grad_norm": 9.028923034667969, "learning_rate": 1.4939865241845652e-07, "loss": 0.1114, "step": 6441 }, { "epoch": 0.950497971228329, "grad_norm": 3.4927148818969727, "learning_rate": 1.4851290250912365e-07, "loss": 0.0462, "step": 6442 }, { "epoch": 0.950645518258945, "grad_norm": 1.9196674823760986, "learning_rate": 1.476297664714621e-07, "loss": 0.0741, "step": 6443 }, { "epoch": 0.9507930652895611, "grad_norm": 0.9236531257629395, "learning_rate": 1.4674924453979223e-07, "loss": 0.0087, "step": 6444 }, { "epoch": 0.950940612320177, "grad_norm": 1.8691339492797852, "learning_rate": 1.4587133694774048e-07, "loss": 0.0449, "step": 6445 }, { "epoch": 0.951088159350793, "grad_norm": 3.3348920345306396, "learning_rate": 1.4499604392824052e-07, "loss": 0.0528, "step": 6446 }, { "epoch": 0.9512357063814091, "grad_norm": 3.1573545932769775, "learning_rate": 1.4412336571353103e-07, "loss": 0.1077, "step": 6447 }, { "epoch": 0.9513832534120251, "grad_norm": 1.6667567491531372, "learning_rate": 1.4325330253515902e-07, "loss": 0.0289, "step": 6448 }, { "epoch": 0.951530800442641, "grad_norm": 2.5860416889190674, "learning_rate": 1.4238585462397536e-07, "loss": 0.076, "step": 6449 }, { "epoch": 0.9516783474732571, "grad_norm": 2.118489980697632, "learning_rate": 1.4152102221013708e-07, "loss": 0.0561, "step": 6450 }, { "epoch": 0.9518258945038731, "grad_norm": 1.936941385269165, "learning_rate": 1.4065880552310952e-07, "loss": 0.0525, "step": 6451 }, { "epoch": 0.9519734415344892, "grad_norm": 1.3135408163070679, "learning_rate": 1.3979920479166187e-07, "loss": 0.0513, "step": 6452 }, { "epoch": 0.9521209885651051, "grad_norm": 2.240351915359497, "learning_rate": 1.389422202438706e-07, "loss": 0.0541, "step": 6453 }, { "epoch": 0.9522685355957211, "grad_norm": 4.377660751342773, "learning_rate": 1.3808785210711606e-07, "loss": 0.0642, "step": 6454 }, { "epoch": 0.9524160826263371, "grad_norm": 4.16725492477417, "learning_rate": 1.3723610060808801e-07, "loss": 0.0707, "step": 6455 }, { "epoch": 0.9525636296569532, "grad_norm": 1.5783486366271973, "learning_rate": 1.3638696597277678e-07, "loss": 0.0347, "step": 6456 }, { "epoch": 0.9527111766875692, "grad_norm": 0.9976248741149902, "learning_rate": 1.3554044842648217e-07, "loss": 0.0198, "step": 6457 }, { "epoch": 0.9528587237181851, "grad_norm": 2.819964647293091, "learning_rate": 1.3469654819381118e-07, "loss": 0.0753, "step": 6458 }, { "epoch": 0.9530062707488012, "grad_norm": 0.9899864196777344, "learning_rate": 1.3385526549867022e-07, "loss": 0.013, "step": 6459 }, { "epoch": 0.9531538177794172, "grad_norm": 2.6590030193328857, "learning_rate": 1.3301660056427745e-07, "loss": 0.0809, "step": 6460 }, { "epoch": 0.9531538177794172, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05532016232609749, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 48.7081, "eval_samples_per_second": 5.974, "eval_steps_per_second": 0.205, "step": 6460 }, { "epoch": 0.9533013648100332, "grad_norm": 2.229666233062744, "learning_rate": 1.3218055361315262e-07, "loss": 0.0684, "step": 6461 }, { "epoch": 0.9534489118406492, "grad_norm": 2.2530157566070557, "learning_rate": 1.3134712486712165e-07, "loss": 0.0549, "step": 6462 }, { "epoch": 0.9535964588712652, "grad_norm": 2.005972146987915, "learning_rate": 1.3051631454731873e-07, "loss": 0.0462, "step": 6463 }, { "epoch": 0.9537440059018812, "grad_norm": 3.3792154788970947, "learning_rate": 1.2968812287417753e-07, "loss": 0.1276, "step": 6464 }, { "epoch": 0.9538915529324973, "grad_norm": 3.6405813694000244, "learning_rate": 1.288625500674412e-07, "loss": 0.0681, "step": 6465 }, { "epoch": 0.9540390999631132, "grad_norm": 1.7894163131713867, "learning_rate": 1.2803959634615782e-07, "loss": 0.0496, "step": 6466 }, { "epoch": 0.9541866469937292, "grad_norm": 0.9998230934143066, "learning_rate": 1.272192619286805e-07, "loss": 0.0204, "step": 6467 }, { "epoch": 0.9543341940243453, "grad_norm": 2.581663131713867, "learning_rate": 1.2640154703266405e-07, "loss": 0.0972, "step": 6468 }, { "epoch": 0.9544817410549613, "grad_norm": 1.5313342809677124, "learning_rate": 1.2558645187507267e-07, "loss": 0.0412, "step": 6469 }, { "epoch": 0.9546292880855772, "grad_norm": 2.740792751312256, "learning_rate": 1.247739766721734e-07, "loss": 0.0527, "step": 6470 }, { "epoch": 0.9547768351161933, "grad_norm": 1.9213531017303467, "learning_rate": 1.2396412163953709e-07, "loss": 0.0462, "step": 6471 }, { "epoch": 0.9549243821468093, "grad_norm": 2.4002788066864014, "learning_rate": 1.2315688699204298e-07, "loss": 0.0869, "step": 6472 }, { "epoch": 0.9550719291774253, "grad_norm": 4.3059186935424805, "learning_rate": 1.2235227294387085e-07, "loss": 0.0794, "step": 6473 }, { "epoch": 0.9552194762080413, "grad_norm": 2.3202598094940186, "learning_rate": 1.2155027970850776e-07, "loss": 0.0262, "step": 6474 }, { "epoch": 0.9553670232386573, "grad_norm": 2.168534517288208, "learning_rate": 1.2075090749874451e-07, "loss": 0.0299, "step": 6475 }, { "epoch": 0.9555145702692733, "grad_norm": 4.975533485412598, "learning_rate": 1.1995415652667598e-07, "loss": 0.1115, "step": 6476 }, { "epoch": 0.9556621172998894, "grad_norm": 2.08109450340271, "learning_rate": 1.1916002700370411e-07, "loss": 0.0311, "step": 6477 }, { "epoch": 0.9558096643305053, "grad_norm": 3.003537654876709, "learning_rate": 1.183685191405315e-07, "loss": 0.033, "step": 6478 }, { "epoch": 0.9559572113611213, "grad_norm": 1.9444817304611206, "learning_rate": 1.1757963314716791e-07, "loss": 0.051, "step": 6479 }, { "epoch": 0.9561047583917374, "grad_norm": 2.8040788173675537, "learning_rate": 1.1679336923292594e-07, "loss": 0.0706, "step": 6480 }, { "epoch": 0.9561047583917374, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9653465346534653, "eval_loss": 0.05492851138114929, "eval_precision": 0.9848484848484849, "eval_recall": 0.9466019417475728, "eval_runtime": 48.3996, "eval_samples_per_second": 6.012, "eval_steps_per_second": 0.207, "step": 6480 }, { "epoch": 0.9562523054223534, "grad_norm": 5.251060485839844, "learning_rate": 1.1600972760642426e-07, "loss": 0.0943, "step": 6481 }, { "epoch": 0.9563998524529694, "grad_norm": 2.8643219470977783, "learning_rate": 1.1522870847558432e-07, "loss": 0.0305, "step": 6482 }, { "epoch": 0.9565473994835854, "grad_norm": 3.2723355293273926, "learning_rate": 1.1445031204763146e-07, "loss": 0.0637, "step": 6483 }, { "epoch": 0.9566949465142014, "grad_norm": 3.418752908706665, "learning_rate": 1.1367453852909493e-07, "loss": 0.0994, "step": 6484 }, { "epoch": 0.9568424935448174, "grad_norm": 0.9884876608848572, "learning_rate": 1.1290138812581009e-07, "loss": 0.0218, "step": 6485 }, { "epoch": 0.9569900405754335, "grad_norm": 2.2045938968658447, "learning_rate": 1.1213086104291615e-07, "loss": 0.032, "step": 6486 }, { "epoch": 0.9571375876060494, "grad_norm": 2.34030818939209, "learning_rate": 1.1136295748485293e-07, "loss": 0.0321, "step": 6487 }, { "epoch": 0.9572851346366654, "grad_norm": 2.480587959289551, "learning_rate": 1.1059767765536856e-07, "loss": 0.0331, "step": 6488 }, { "epoch": 0.9574326816672815, "grad_norm": 2.711378574371338, "learning_rate": 1.098350217575117e-07, "loss": 0.0592, "step": 6489 }, { "epoch": 0.9575802286978975, "grad_norm": 1.1604716777801514, "learning_rate": 1.0907498999363609e-07, "loss": 0.015, "step": 6490 }, { "epoch": 0.9577277757285134, "grad_norm": 1.9541465044021606, "learning_rate": 1.0831758256539925e-07, "loss": 0.043, "step": 6491 }, { "epoch": 0.9578753227591295, "grad_norm": 6.780413627624512, "learning_rate": 1.075627996737627e-07, "loss": 0.0828, "step": 6492 }, { "epoch": 0.9580228697897455, "grad_norm": 0.8017694354057312, "learning_rate": 1.0681064151899068e-07, "loss": 0.0084, "step": 6493 }, { "epoch": 0.9581704168203615, "grad_norm": 1.1130073070526123, "learning_rate": 1.0606110830065131e-07, "loss": 0.011, "step": 6494 }, { "epoch": 0.9583179638509775, "grad_norm": 2.2371890544891357, "learning_rate": 1.0531420021761662e-07, "loss": 0.0579, "step": 6495 }, { "epoch": 0.9584655108815935, "grad_norm": 2.2667269706726074, "learning_rate": 1.0456991746806366e-07, "loss": 0.03, "step": 6496 }, { "epoch": 0.9586130579122095, "grad_norm": 0.9488065242767334, "learning_rate": 1.0382826024946891e-07, "loss": 0.0154, "step": 6497 }, { "epoch": 0.9587606049428256, "grad_norm": 2.35026478767395, "learning_rate": 1.0308922875861493e-07, "loss": 0.0377, "step": 6498 }, { "epoch": 0.9589081519734415, "grad_norm": 1.9522687196731567, "learning_rate": 1.0235282319158823e-07, "loss": 0.0769, "step": 6499 }, { "epoch": 0.9590556990040575, "grad_norm": 4.399130344390869, "learning_rate": 1.0161904374377696e-07, "loss": 0.0716, "step": 6500 }, { "epoch": 0.9590556990040575, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05545896664261818, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 50.2134, "eval_samples_per_second": 5.795, "eval_steps_per_second": 0.199, "step": 6500 }, { "epoch": 0.9592032460346736, "grad_norm": 1.29558265209198, "learning_rate": 1.0088789060987203e-07, "loss": 0.0439, "step": 6501 }, { "epoch": 0.9593507930652896, "grad_norm": 2.059356689453125, "learning_rate": 1.001593639838705e-07, "loss": 0.0357, "step": 6502 }, { "epoch": 0.9594983400959056, "grad_norm": 2.024535894393921, "learning_rate": 9.943346405906995e-08, "loss": 0.0594, "step": 6503 }, { "epoch": 0.9596458871265215, "grad_norm": 4.6369194984436035, "learning_rate": 9.871019102807078e-08, "loss": 0.032, "step": 6504 }, { "epoch": 0.9597934341571376, "grad_norm": 2.1632497310638428, "learning_rate": 9.798954508277836e-08, "loss": 0.0678, "step": 6505 }, { "epoch": 0.9599409811877536, "grad_norm": 1.632851004600525, "learning_rate": 9.727152641439863e-08, "loss": 0.0408, "step": 6506 }, { "epoch": 0.9600885282183697, "grad_norm": 2.7389848232269287, "learning_rate": 9.655613521344364e-08, "loss": 0.069, "step": 6507 }, { "epoch": 0.9602360752489856, "grad_norm": 1.2255512475967407, "learning_rate": 9.584337166972602e-08, "loss": 0.0368, "step": 6508 }, { "epoch": 0.9603836222796016, "grad_norm": 1.3319123983383179, "learning_rate": 9.513323597235891e-08, "loss": 0.0386, "step": 6509 }, { "epoch": 0.9605311693102176, "grad_norm": 1.6762546300888062, "learning_rate": 9.442572830976604e-08, "loss": 0.0321, "step": 6510 }, { "epoch": 0.9606787163408337, "grad_norm": 2.6606414318084717, "learning_rate": 9.372084886966392e-08, "loss": 0.0583, "step": 6511 }, { "epoch": 0.9608262633714496, "grad_norm": 3.4116148948669434, "learning_rate": 9.301859783907852e-08, "loss": 0.0465, "step": 6512 }, { "epoch": 0.9609738104020656, "grad_norm": 3.545358657836914, "learning_rate": 9.231897540433743e-08, "loss": 0.0457, "step": 6513 }, { "epoch": 0.9611213574326817, "grad_norm": 4.891258239746094, "learning_rate": 9.162198175106774e-08, "loss": 0.0335, "step": 6514 }, { "epoch": 0.9612689044632977, "grad_norm": 1.9941608905792236, "learning_rate": 9.092761706420261e-08, "loss": 0.0461, "step": 6515 }, { "epoch": 0.9614164514939136, "grad_norm": 2.0398828983306885, "learning_rate": 9.02358815279758e-08, "loss": 0.0509, "step": 6516 }, { "epoch": 0.9615639985245297, "grad_norm": 2.310847759246826, "learning_rate": 8.95467753259227e-08, "loss": 0.0463, "step": 6517 }, { "epoch": 0.9617115455551457, "grad_norm": 1.7646315097808838, "learning_rate": 8.886029864088375e-08, "loss": 0.0301, "step": 6518 }, { "epoch": 0.9618590925857617, "grad_norm": 2.1775879859924316, "learning_rate": 8.8176451655001e-08, "loss": 0.0298, "step": 6519 }, { "epoch": 0.9620066396163777, "grad_norm": 2.04054856300354, "learning_rate": 8.749523454971487e-08, "loss": 0.067, "step": 6520 }, { "epoch": 0.9620066396163777, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9653465346534653, "eval_loss": 0.05519821122288704, "eval_precision": 0.9848484848484849, "eval_recall": 0.9466019417475728, "eval_runtime": 51.0031, "eval_samples_per_second": 5.706, "eval_steps_per_second": 0.196, "step": 6520 }, { "epoch": 0.9621541866469937, "grad_norm": 1.858306884765625, "learning_rate": 8.681664750577413e-08, "loss": 0.0475, "step": 6521 }, { "epoch": 0.9623017336776097, "grad_norm": 1.6353979110717773, "learning_rate": 8.614069070322473e-08, "loss": 0.0516, "step": 6522 }, { "epoch": 0.9624492807082258, "grad_norm": 2.2677900791168213, "learning_rate": 8.546736432141656e-08, "loss": 0.0316, "step": 6523 }, { "epoch": 0.9625968277388418, "grad_norm": 2.1024563312530518, "learning_rate": 8.479666853900448e-08, "loss": 0.0266, "step": 6524 }, { "epoch": 0.9627443747694577, "grad_norm": 1.6655795574188232, "learning_rate": 8.412860353393947e-08, "loss": 0.0227, "step": 6525 }, { "epoch": 0.9628919218000738, "grad_norm": 3.0254976749420166, "learning_rate": 8.346316948347865e-08, "loss": 0.0635, "step": 6526 }, { "epoch": 0.9630394688306898, "grad_norm": 1.5075454711914062, "learning_rate": 8.280036656418078e-08, "loss": 0.0412, "step": 6527 }, { "epoch": 0.9631870158613058, "grad_norm": 1.4006476402282715, "learning_rate": 8.214019495190407e-08, "loss": 0.0334, "step": 6528 }, { "epoch": 0.9633345628919218, "grad_norm": 2.3075265884399414, "learning_rate": 8.148265482181173e-08, "loss": 0.0269, "step": 6529 }, { "epoch": 0.9634821099225378, "grad_norm": 1.0943922996520996, "learning_rate": 8.082774634836754e-08, "loss": 0.0353, "step": 6530 }, { "epoch": 0.9636296569531538, "grad_norm": 2.751830577850342, "learning_rate": 8.017546970533585e-08, "loss": 0.0331, "step": 6531 }, { "epoch": 0.9637772039837699, "grad_norm": 1.7253575325012207, "learning_rate": 7.952582506578487e-08, "loss": 0.0539, "step": 6532 }, { "epoch": 0.9639247510143858, "grad_norm": 2.3146719932556152, "learning_rate": 7.88788126020823e-08, "loss": 0.0372, "step": 6533 }, { "epoch": 0.9640722980450018, "grad_norm": 3.143972396850586, "learning_rate": 7.823443248589746e-08, "loss": 0.0955, "step": 6534 }, { "epoch": 0.9642198450756179, "grad_norm": 1.9397855997085571, "learning_rate": 7.759268488820471e-08, "loss": 0.0694, "step": 6535 }, { "epoch": 0.9643673921062339, "grad_norm": 2.692070484161377, "learning_rate": 7.695356997927561e-08, "loss": 0.0671, "step": 6536 }, { "epoch": 0.9645149391368498, "grad_norm": 5.281672954559326, "learning_rate": 7.631708792868453e-08, "loss": 0.0625, "step": 6537 }, { "epoch": 0.9646624861674659, "grad_norm": 2.2048943042755127, "learning_rate": 7.568323890530971e-08, "loss": 0.0905, "step": 6538 }, { "epoch": 0.9648100331980819, "grad_norm": 1.996284008026123, "learning_rate": 7.505202307732774e-08, "loss": 0.0426, "step": 6539 }, { "epoch": 0.9649575802286979, "grad_norm": 2.583498001098633, "learning_rate": 7.442344061221684e-08, "loss": 0.0626, "step": 6540 }, { "epoch": 0.9649575802286979, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05510440468788147, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 49.8894, "eval_samples_per_second": 5.833, "eval_steps_per_second": 0.2, "step": 6540 }, { "epoch": 0.9651051272593139, "grad_norm": 1.578550934791565, "learning_rate": 7.37974916767581e-08, "loss": 0.0493, "step": 6541 }, { "epoch": 0.9652526742899299, "grad_norm": 4.190537452697754, "learning_rate": 7.317417643703417e-08, "loss": 0.0503, "step": 6542 }, { "epoch": 0.9654002213205459, "grad_norm": 2.9617536067962646, "learning_rate": 7.255349505842502e-08, "loss": 0.1048, "step": 6543 }, { "epoch": 0.965547768351162, "grad_norm": 2.5437979698181152, "learning_rate": 7.193544770561777e-08, "loss": 0.0317, "step": 6544 }, { "epoch": 0.965695315381778, "grad_norm": 1.3040310144424438, "learning_rate": 7.132003454259461e-08, "loss": 0.0471, "step": 6545 }, { "epoch": 0.9658428624123939, "grad_norm": 2.7270450592041016, "learning_rate": 7.07072557326438e-08, "loss": 0.0799, "step": 6546 }, { "epoch": 0.96599040944301, "grad_norm": 2.9314723014831543, "learning_rate": 7.009711143835197e-08, "loss": 0.0726, "step": 6547 }, { "epoch": 0.966137956473626, "grad_norm": 1.3996440172195435, "learning_rate": 6.948960182160624e-08, "loss": 0.0272, "step": 6548 }, { "epoch": 0.966285503504242, "grad_norm": 3.6665632724761963, "learning_rate": 6.888472704359661e-08, "loss": 0.0339, "step": 6549 }, { "epoch": 0.966433050534858, "grad_norm": 5.509788513183594, "learning_rate": 6.828248726481357e-08, "loss": 0.0541, "step": 6550 }, { "epoch": 0.966580597565474, "grad_norm": 1.3570910692214966, "learning_rate": 6.768288264504597e-08, "loss": 0.0237, "step": 6551 }, { "epoch": 0.96672814459609, "grad_norm": 4.248430252075195, "learning_rate": 6.708591334338655e-08, "loss": 0.093, "step": 6552 }, { "epoch": 0.9668756916267061, "grad_norm": 2.7404448986053467, "learning_rate": 6.649157951822859e-08, "loss": 0.1065, "step": 6553 }, { "epoch": 0.967023238657322, "grad_norm": 3.5840132236480713, "learning_rate": 6.589988132726488e-08, "loss": 0.069, "step": 6554 }, { "epoch": 0.967170785687938, "grad_norm": 1.6288326978683472, "learning_rate": 6.53108189274887e-08, "loss": 0.0352, "step": 6555 }, { "epoch": 0.967318332718554, "grad_norm": 2.5016865730285645, "learning_rate": 6.472439247519502e-08, "loss": 0.043, "step": 6556 }, { "epoch": 0.9674658797491701, "grad_norm": 3.6651065349578857, "learning_rate": 6.414060212597939e-08, "loss": 0.1105, "step": 6557 }, { "epoch": 0.967613426779786, "grad_norm": 3.1000683307647705, "learning_rate": 6.35594480347368e-08, "loss": 0.0607, "step": 6558 }, { "epoch": 0.967760973810402, "grad_norm": 2.166266441345215, "learning_rate": 6.298093035566278e-08, "loss": 0.057, "step": 6559 }, { "epoch": 0.9679085208410181, "grad_norm": 1.484336495399475, "learning_rate": 6.240504924225566e-08, "loss": 0.0275, "step": 6560 }, { "epoch": 0.9679085208410181, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05525950714945793, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 51.4712, "eval_samples_per_second": 5.654, "eval_steps_per_second": 0.194, "step": 6560 }, { "epoch": 0.9680560678716341, "grad_norm": 3.557420015335083, "learning_rate": 6.183180484731211e-08, "loss": 0.0435, "step": 6561 }, { "epoch": 0.96820361490225, "grad_norm": 1.5809874534606934, "learning_rate": 6.126119732292935e-08, "loss": 0.0234, "step": 6562 }, { "epoch": 0.9683511619328661, "grad_norm": 1.9705631732940674, "learning_rate": 6.069322682050516e-08, "loss": 0.0528, "step": 6563 }, { "epoch": 0.9684987089634821, "grad_norm": 1.1486409902572632, "learning_rate": 6.0127893490739e-08, "loss": 0.0193, "step": 6564 }, { "epoch": 0.9686462559940981, "grad_norm": 2.3699090480804443, "learning_rate": 5.956519748362755e-08, "loss": 0.1015, "step": 6565 }, { "epoch": 0.9687938030247141, "grad_norm": 1.836517095565796, "learning_rate": 5.900513894847027e-08, "loss": 0.0327, "step": 6566 }, { "epoch": 0.9689413500553301, "grad_norm": 6.853978633880615, "learning_rate": 5.8447718033868286e-08, "loss": 0.0787, "step": 6567 }, { "epoch": 0.9690888970859461, "grad_norm": 5.050530910491943, "learning_rate": 5.7892934887717746e-08, "loss": 0.1619, "step": 6568 }, { "epoch": 0.9692364441165622, "grad_norm": 1.7064549922943115, "learning_rate": 5.734078965721867e-08, "loss": 0.0448, "step": 6569 }, { "epoch": 0.9693839911471782, "grad_norm": 2.607844114303589, "learning_rate": 5.679128248887167e-08, "loss": 0.0592, "step": 6570 }, { "epoch": 0.9695315381777941, "grad_norm": 1.541534423828125, "learning_rate": 5.624441352847565e-08, "loss": 0.0363, "step": 6571 }, { "epoch": 0.9696790852084102, "grad_norm": 4.141221046447754, "learning_rate": 5.5700182921128995e-08, "loss": 0.0448, "step": 6572 }, { "epoch": 0.9698266322390262, "grad_norm": 2.9067554473876953, "learning_rate": 5.515859081123287e-08, "loss": 0.1044, "step": 6573 }, { "epoch": 0.9699741792696422, "grad_norm": 4.378636360168457, "learning_rate": 5.461963734248565e-08, "loss": 0.0584, "step": 6574 }, { "epoch": 0.9701217263002582, "grad_norm": 2.6484222412109375, "learning_rate": 5.4083322657886293e-08, "loss": 0.0497, "step": 6575 }, { "epoch": 0.9702692733308742, "grad_norm": 1.6235097646713257, "learning_rate": 5.3549646899733186e-08, "loss": 0.0712, "step": 6576 }, { "epoch": 0.9704168203614902, "grad_norm": 5.442991733551025, "learning_rate": 5.301861020962751e-08, "loss": 0.0672, "step": 6577 }, { "epoch": 0.9705643673921063, "grad_norm": 1.869346022605896, "learning_rate": 5.249021272846766e-08, "loss": 0.0376, "step": 6578 }, { "epoch": 0.9707119144227222, "grad_norm": 1.3902775049209595, "learning_rate": 5.1964454596450387e-08, "loss": 0.0533, "step": 6579 }, { "epoch": 0.9708594614533382, "grad_norm": 1.305187463760376, "learning_rate": 5.14413359530741e-08, "loss": 0.0196, "step": 6580 }, { "epoch": 0.9708594614533382, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.0551002100110054, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 50.6556, "eval_samples_per_second": 5.745, "eval_steps_per_second": 0.197, "step": 6580 }, { "epoch": 0.9710070084839543, "grad_norm": 1.7406283617019653, "learning_rate": 5.0920856937137775e-08, "loss": 0.0479, "step": 6581 }, { "epoch": 0.9711545555145703, "grad_norm": 1.1636487245559692, "learning_rate": 5.040301768673761e-08, "loss": 0.0181, "step": 6582 }, { "epoch": 0.9713021025451862, "grad_norm": 3.7341973781585693, "learning_rate": 4.9887818339272586e-08, "loss": 0.0836, "step": 6583 }, { "epoch": 0.9714496495758023, "grad_norm": 2.6138193607330322, "learning_rate": 4.93752590314367e-08, "loss": 0.0552, "step": 6584 }, { "epoch": 0.9715971966064183, "grad_norm": 9.698671340942383, "learning_rate": 4.886533989922781e-08, "loss": 0.0285, "step": 6585 }, { "epoch": 0.9717447436370343, "grad_norm": 2.1348674297332764, "learning_rate": 4.8358061077942163e-08, "loss": 0.0314, "step": 6586 }, { "epoch": 0.9718922906676503, "grad_norm": 1.8118903636932373, "learning_rate": 4.785342270217319e-08, "loss": 0.0358, "step": 6587 }, { "epoch": 0.9720398376982663, "grad_norm": 1.8762260675430298, "learning_rate": 4.735142490581601e-08, "loss": 0.0485, "step": 6588 }, { "epoch": 0.9721873847288823, "grad_norm": 2.3938488960266113, "learning_rate": 4.6852067822065195e-08, "loss": 0.0594, "step": 6589 }, { "epoch": 0.9723349317594984, "grad_norm": 2.4276046752929688, "learning_rate": 4.6355351583412534e-08, "loss": 0.0807, "step": 6590 }, { "epoch": 0.9724824787901144, "grad_norm": 1.4129104614257812, "learning_rate": 4.5861276321651495e-08, "loss": 0.0256, "step": 6591 }, { "epoch": 0.9726300258207303, "grad_norm": 2.190324544906616, "learning_rate": 4.5369842167874986e-08, "loss": 0.033, "step": 6592 }, { "epoch": 0.9727775728513464, "grad_norm": 2.735649347305298, "learning_rate": 4.4881049252472056e-08, "loss": 0.055, "step": 6593 }, { "epoch": 0.9729251198819624, "grad_norm": 1.2812883853912354, "learning_rate": 4.439489770513339e-08, "loss": 0.0378, "step": 6594 }, { "epoch": 0.9730726669125784, "grad_norm": 1.0837053060531616, "learning_rate": 4.391138765484915e-08, "loss": 0.0207, "step": 6595 }, { "epoch": 0.9732202139431944, "grad_norm": 2.2297704219818115, "learning_rate": 4.343051922990782e-08, "loss": 0.0474, "step": 6596 }, { "epoch": 0.9733677609738104, "grad_norm": 4.039227485656738, "learning_rate": 4.295229255789623e-08, "loss": 0.058, "step": 6597 }, { "epoch": 0.9735153080044264, "grad_norm": 3.033900737762451, "learning_rate": 4.247670776570178e-08, "loss": 0.0861, "step": 6598 }, { "epoch": 0.9736628550350425, "grad_norm": 2.549617052078247, "learning_rate": 4.200376497951131e-08, "loss": 0.1002, "step": 6599 }, { "epoch": 0.9738104020656584, "grad_norm": 2.654305934906006, "learning_rate": 4.153346432480776e-08, "loss": 0.0986, "step": 6600 }, { "epoch": 0.9738104020656584, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05549389496445656, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 50.8482, "eval_samples_per_second": 5.723, "eval_steps_per_second": 0.197, "step": 6600 }, { "epoch": 0.9739579490962744, "grad_norm": 4.089635372161865, "learning_rate": 4.106580592637577e-08, "loss": 0.0861, "step": 6601 }, { "epoch": 0.9741054961268905, "grad_norm": 3.2651960849761963, "learning_rate": 4.060078990829719e-08, "loss": 0.0572, "step": 6602 }, { "epoch": 0.9742530431575065, "grad_norm": 1.9765956401824951, "learning_rate": 4.0138416393955545e-08, "loss": 0.0234, "step": 6603 }, { "epoch": 0.9744005901881224, "grad_norm": 3.121824264526367, "learning_rate": 3.967868550602827e-08, "loss": 0.0691, "step": 6604 }, { "epoch": 0.9745481372187385, "grad_norm": 2.891749620437622, "learning_rate": 3.922159736649889e-08, "loss": 0.0814, "step": 6605 }, { "epoch": 0.9746956842493545, "grad_norm": 3.361797332763672, "learning_rate": 3.8767152096641504e-08, "loss": 0.0982, "step": 6606 }, { "epoch": 0.9748432312799705, "grad_norm": 2.2206366062164307, "learning_rate": 3.831534981703522e-08, "loss": 0.0196, "step": 6607 }, { "epoch": 0.9749907783105864, "grad_norm": 1.349016785621643, "learning_rate": 3.7866190647554145e-08, "loss": 0.0523, "step": 6608 }, { "epoch": 0.9751383253412025, "grad_norm": 2.72078537940979, "learning_rate": 3.7419674707374064e-08, "loss": 0.089, "step": 6609 }, { "epoch": 0.9752858723718185, "grad_norm": 1.52629816532135, "learning_rate": 3.697580211496798e-08, "loss": 0.0607, "step": 6610 }, { "epoch": 0.9754334194024346, "grad_norm": 1.2776182889938354, "learning_rate": 3.6534572988106144e-08, "loss": 0.0283, "step": 6611 }, { "epoch": 0.9755809664330506, "grad_norm": 1.8487846851348877, "learning_rate": 3.6095987443860445e-08, "loss": 0.0396, "step": 6612 }, { "epoch": 0.9757285134636665, "grad_norm": 1.9192994832992554, "learning_rate": 3.5660045598597814e-08, "loss": 0.0598, "step": 6613 }, { "epoch": 0.9758760604942825, "grad_norm": 1.6062878370285034, "learning_rate": 3.522674756798794e-08, "loss": 0.0665, "step": 6614 }, { "epoch": 0.9760236075248986, "grad_norm": 1.8371132612228394, "learning_rate": 3.479609346699553e-08, "loss": 0.0509, "step": 6615 }, { "epoch": 0.9761711545555146, "grad_norm": 2.38232159614563, "learning_rate": 3.4368083409885844e-08, "loss": 0.0964, "step": 6616 }, { "epoch": 0.9763187015861305, "grad_norm": 2.736990451812744, "learning_rate": 3.394271751021916e-08, "loss": 0.141, "step": 6617 }, { "epoch": 0.9764662486167466, "grad_norm": 6.989748954772949, "learning_rate": 3.351999588085963e-08, "loss": 0.0612, "step": 6618 }, { "epoch": 0.9766137956473626, "grad_norm": 1.7669501304626465, "learning_rate": 3.309991863396644e-08, "loss": 0.0488, "step": 6619 }, { "epoch": 0.9767613426779787, "grad_norm": 1.4926352500915527, "learning_rate": 3.2682485880997096e-08, "loss": 0.0275, "step": 6620 }, { "epoch": 0.9767613426779787, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9653465346534653, "eval_loss": 0.055932920426130295, "eval_precision": 0.9848484848484849, "eval_recall": 0.9466019417475728, "eval_runtime": 49.2316, "eval_samples_per_second": 5.911, "eval_steps_per_second": 0.203, "step": 6620 }, { "epoch": 0.9769088897085946, "grad_norm": 3.745084047317505, "learning_rate": 3.226769773270855e-08, "loss": 0.0572, "step": 6621 }, { "epoch": 0.9770564367392106, "grad_norm": 1.6596072912216187, "learning_rate": 3.1855554299156096e-08, "loss": 0.0188, "step": 6622 }, { "epoch": 0.9772039837698266, "grad_norm": 2.8496854305267334, "learning_rate": 3.1446055689690056e-08, "loss": 0.0525, "step": 6623 }, { "epoch": 0.9773515308004427, "grad_norm": 2.801842451095581, "learning_rate": 3.103920201296462e-08, "loss": 0.0328, "step": 6624 }, { "epoch": 0.9774990778310586, "grad_norm": 2.546543598175049, "learning_rate": 3.063499337692788e-08, "loss": 0.0721, "step": 6625 }, { "epoch": 0.9776466248616746, "grad_norm": 2.072725534439087, "learning_rate": 3.023342988882849e-08, "loss": 0.037, "step": 6626 }, { "epoch": 0.9777941718922907, "grad_norm": 2.3779847621917725, "learning_rate": 2.983451165521123e-08, "loss": 0.0952, "step": 6627 }, { "epoch": 0.9779417189229067, "grad_norm": 2.976325035095215, "learning_rate": 2.9438238781921424e-08, "loss": 0.071, "step": 6628 }, { "epoch": 0.9780892659535226, "grad_norm": 3.863071918487549, "learning_rate": 2.9044611374099418e-08, "loss": 0.1468, "step": 6629 }, { "epoch": 0.9782368129841387, "grad_norm": 4.173577785491943, "learning_rate": 2.8653629536187222e-08, "loss": 0.0564, "step": 6630 }, { "epoch": 0.9783843600147547, "grad_norm": 3.290264844894409, "learning_rate": 2.8265293371922965e-08, "loss": 0.0889, "step": 6631 }, { "epoch": 0.9785319070453707, "grad_norm": 1.5761719942092896, "learning_rate": 2.7879602984342002e-08, "loss": 0.0203, "step": 6632 }, { "epoch": 0.9786794540759867, "grad_norm": 3.7511749267578125, "learning_rate": 2.7496558475778035e-08, "loss": 0.0736, "step": 6633 }, { "epoch": 0.9788270011066027, "grad_norm": 4.070005893707275, "learning_rate": 2.7116159947865318e-08, "loss": 0.0997, "step": 6634 }, { "epoch": 0.9789745481372187, "grad_norm": 2.2428393363952637, "learning_rate": 2.6738407501533113e-08, "loss": 0.064, "step": 6635 }, { "epoch": 0.9791220951678348, "grad_norm": 1.4023271799087524, "learning_rate": 2.636330123701014e-08, "loss": 0.0415, "step": 6636 }, { "epoch": 0.9792696421984508, "grad_norm": 1.616129755973816, "learning_rate": 2.599084125382123e-08, "loss": 0.0531, "step": 6637 }, { "epoch": 0.9794171892290667, "grad_norm": 4.515521049499512, "learning_rate": 2.5621027650790664e-08, "loss": 0.2382, "step": 6638 }, { "epoch": 0.9795647362596828, "grad_norm": 2.131122589111328, "learning_rate": 2.5253860526042173e-08, "loss": 0.0389, "step": 6639 }, { "epoch": 0.9797122832902988, "grad_norm": 1.6050862073898315, "learning_rate": 2.4889339976992277e-08, "loss": 0.0358, "step": 6640 }, { "epoch": 0.9797122832902988, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05516430363059044, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 49.5399, "eval_samples_per_second": 5.874, "eval_steps_per_second": 0.202, "step": 6640 }, { "epoch": 0.9798598303209148, "grad_norm": 1.8880443572998047, "learning_rate": 2.4527466100360277e-08, "loss": 0.0747, "step": 6641 }, { "epoch": 0.9800073773515308, "grad_norm": 5.500354290008545, "learning_rate": 2.4168238992160477e-08, "loss": 0.0474, "step": 6642 }, { "epoch": 0.9801549243821468, "grad_norm": 2.404766321182251, "learning_rate": 2.3811658747705525e-08, "loss": 0.0494, "step": 6643 }, { "epoch": 0.9803024714127628, "grad_norm": 2.824960947036743, "learning_rate": 2.3457725461607518e-08, "loss": 0.074, "step": 6644 }, { "epoch": 0.9804500184433789, "grad_norm": 1.472124457359314, "learning_rate": 2.3106439227773558e-08, "loss": 0.0277, "step": 6645 }, { "epoch": 0.9805975654739948, "grad_norm": 0.9315122365951538, "learning_rate": 2.27578001394102e-08, "loss": 0.0097, "step": 6646 }, { "epoch": 0.9807451125046108, "grad_norm": 2.713543176651001, "learning_rate": 2.241180828902012e-08, "loss": 0.0622, "step": 6647 }, { "epoch": 0.9808926595352269, "grad_norm": 5.194150447845459, "learning_rate": 2.2068463768405435e-08, "loss": 0.0851, "step": 6648 }, { "epoch": 0.9810402065658429, "grad_norm": 5.96819543838501, "learning_rate": 2.1727766668664385e-08, "loss": 0.0849, "step": 6649 }, { "epoch": 0.9811877535964588, "grad_norm": 1.1071208715438843, "learning_rate": 2.138971708019355e-08, "loss": 0.0268, "step": 6650 }, { "epoch": 0.9813353006270749, "grad_norm": 2.806211471557617, "learning_rate": 2.105431509268563e-08, "loss": 0.0916, "step": 6651 }, { "epoch": 0.9814828476576909, "grad_norm": 3.1690165996551514, "learning_rate": 2.0721560795133876e-08, "loss": 0.0993, "step": 6652 }, { "epoch": 0.9816303946883069, "grad_norm": 2.2227795124053955, "learning_rate": 2.0391454275827673e-08, "loss": 0.0388, "step": 6653 }, { "epoch": 0.9817779417189229, "grad_norm": 0.5616309642791748, "learning_rate": 2.0063995622350287e-08, "loss": 0.0045, "step": 6654 }, { "epoch": 0.9819254887495389, "grad_norm": 2.450514316558838, "learning_rate": 1.9739184921588885e-08, "loss": 0.0688, "step": 6655 }, { "epoch": 0.9820730357801549, "grad_norm": 2.0356853008270264, "learning_rate": 1.9417022259723418e-08, "loss": 0.0511, "step": 6656 }, { "epoch": 0.982220582810771, "grad_norm": 2.293266773223877, "learning_rate": 1.9097507722231068e-08, "loss": 0.0289, "step": 6657 }, { "epoch": 0.982368129841387, "grad_norm": 2.306947708129883, "learning_rate": 1.8780641393890685e-08, "loss": 0.0559, "step": 6658 }, { "epoch": 0.9825156768720029, "grad_norm": 0.8441616296768188, "learning_rate": 1.84664233587728e-08, "loss": 0.0107, "step": 6659 }, { "epoch": 0.982663223902619, "grad_norm": 1.6219745874404907, "learning_rate": 1.815485370025072e-08, "loss": 0.0266, "step": 6660 }, { "epoch": 0.982663223902619, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05498597025871277, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 49.5621, "eval_samples_per_second": 5.871, "eval_steps_per_second": 0.202, "step": 6660 }, { "epoch": 0.982810770933235, "grad_norm": 1.976530909538269, "learning_rate": 1.784593250099054e-08, "loss": 0.0253, "step": 6661 }, { "epoch": 0.982958317963851, "grad_norm": 2.217996120452881, "learning_rate": 1.7539659842957803e-08, "loss": 0.0555, "step": 6662 }, { "epoch": 0.983105864994467, "grad_norm": 2.010887861251831, "learning_rate": 1.7236035807416397e-08, "loss": 0.0421, "step": 6663 }, { "epoch": 0.983253412025083, "grad_norm": 0.6405054926872253, "learning_rate": 1.6935060474926323e-08, "loss": 0.0071, "step": 6664 }, { "epoch": 0.983400959055699, "grad_norm": 2.444506883621216, "learning_rate": 1.6636733925342595e-08, "loss": 0.033, "step": 6665 }, { "epoch": 0.983548506086315, "grad_norm": 1.0735312700271606, "learning_rate": 1.6341056237820784e-08, "loss": 0.0151, "step": 6666 }, { "epoch": 0.983696053116931, "grad_norm": 2.435049533843994, "learning_rate": 1.6048027490812577e-08, "loss": 0.0543, "step": 6667 }, { "epoch": 0.983843600147547, "grad_norm": 2.4513931274414062, "learning_rate": 1.5757647762065786e-08, "loss": 0.0621, "step": 6668 }, { "epoch": 0.983991147178163, "grad_norm": 1.8004716634750366, "learning_rate": 1.5469917128626554e-08, "loss": 0.025, "step": 6669 }, { "epoch": 0.9841386942087791, "grad_norm": 1.50918710231781, "learning_rate": 1.518483566683826e-08, "loss": 0.0401, "step": 6670 }, { "epoch": 0.984286241239395, "grad_norm": 2.1539971828460693, "learning_rate": 1.4902403452339287e-08, "loss": 0.0664, "step": 6671 }, { "epoch": 0.984433788270011, "grad_norm": 7.559150218963623, "learning_rate": 1.4622620560069688e-08, "loss": 0.0937, "step": 6672 }, { "epoch": 0.9845813353006271, "grad_norm": 1.530104637145996, "learning_rate": 1.4345487064260089e-08, "loss": 0.0648, "step": 6673 }, { "epoch": 0.9847288823312431, "grad_norm": 1.3213176727294922, "learning_rate": 1.4071003038443887e-08, "loss": 0.0344, "step": 6674 }, { "epoch": 0.984876429361859, "grad_norm": 1.8271011114120483, "learning_rate": 1.3799168555449494e-08, "loss": 0.0243, "step": 6675 }, { "epoch": 0.9850239763924751, "grad_norm": 1.226176142692566, "learning_rate": 1.3529983687400328e-08, "loss": 0.0178, "step": 6676 }, { "epoch": 0.9851715234230911, "grad_norm": 0.6308827996253967, "learning_rate": 1.3263448505720366e-08, "loss": 0.007, "step": 6677 }, { "epoch": 0.9853190704537071, "grad_norm": 2.996870517730713, "learning_rate": 1.2999563081127486e-08, "loss": 0.0786, "step": 6678 }, { "epoch": 0.9854666174843232, "grad_norm": 2.7150681018829346, "learning_rate": 1.2738327483639013e-08, "loss": 0.0394, "step": 6679 }, { "epoch": 0.9856141645149391, "grad_norm": 2.043134927749634, "learning_rate": 1.2479741782566168e-08, "loss": 0.0759, "step": 6680 }, { "epoch": 0.9856141645149391, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05593600869178772, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 49.7146, "eval_samples_per_second": 5.853, "eval_steps_per_second": 0.201, "step": 6680 }, { "epoch": 0.9857617115455551, "grad_norm": 1.8694920539855957, "learning_rate": 1.2223806046520737e-08, "loss": 0.0362, "step": 6681 }, { "epoch": 0.9859092585761712, "grad_norm": 2.6727139949798584, "learning_rate": 1.1970520343408398e-08, "loss": 0.073, "step": 6682 }, { "epoch": 0.9860568056067872, "grad_norm": 0.7778927683830261, "learning_rate": 1.1719884740433174e-08, "loss": 0.0056, "step": 6683 }, { "epoch": 0.9862043526374031, "grad_norm": 2.3464653491973877, "learning_rate": 1.1471899304095202e-08, "loss": 0.0314, "step": 6684 }, { "epoch": 0.9863518996680192, "grad_norm": 0.8709948658943176, "learning_rate": 1.122656410019296e-08, "loss": 0.0199, "step": 6685 }, { "epoch": 0.9864994466986352, "grad_norm": 6.606779098510742, "learning_rate": 1.0983879193819936e-08, "loss": 0.108, "step": 6686 }, { "epoch": 0.9866469937292512, "grad_norm": 4.287250995635986, "learning_rate": 1.074384464936684e-08, "loss": 0.0716, "step": 6687 }, { "epoch": 0.9867945407598672, "grad_norm": 0.7073714733123779, "learning_rate": 1.0506460530521622e-08, "loss": 0.0188, "step": 6688 }, { "epoch": 0.9869420877904832, "grad_norm": 4.2220563888549805, "learning_rate": 1.0271726900269452e-08, "loss": 0.0769, "step": 6689 }, { "epoch": 0.9870896348210992, "grad_norm": 1.6127564907073975, "learning_rate": 1.003964382089162e-08, "loss": 0.0457, "step": 6690 }, { "epoch": 0.9872371818517153, "grad_norm": 2.2320802211761475, "learning_rate": 9.810211353965537e-09, "loss": 0.047, "step": 6691 }, { "epoch": 0.9873847288823312, "grad_norm": 3.913719654083252, "learning_rate": 9.583429560365843e-09, "loss": 0.0715, "step": 6692 }, { "epoch": 0.9875322759129472, "grad_norm": 2.9218332767486572, "learning_rate": 9.359298500264402e-09, "loss": 0.0513, "step": 6693 }, { "epoch": 0.9876798229435633, "grad_norm": 1.7875134944915771, "learning_rate": 9.137818233129203e-09, "loss": 0.0406, "step": 6694 }, { "epoch": 0.9878273699741793, "grad_norm": 2.7455263137817383, "learning_rate": 8.91898881772657e-09, "loss": 0.0704, "step": 6695 }, { "epoch": 0.9879749170047952, "grad_norm": 0.6625596880912781, "learning_rate": 8.702810312115618e-09, "loss": 0.006, "step": 6696 }, { "epoch": 0.9881224640354113, "grad_norm": 1.6851662397384644, "learning_rate": 8.489282773656016e-09, "loss": 0.0527, "step": 6697 }, { "epoch": 0.9882700110660273, "grad_norm": 2.4347875118255615, "learning_rate": 8.278406259001337e-09, "loss": 0.0673, "step": 6698 }, { "epoch": 0.9884175580966433, "grad_norm": 5.950766563415527, "learning_rate": 8.07018082410349e-09, "loss": 0.091, "step": 6699 }, { "epoch": 0.9885651051272594, "grad_norm": 1.7334251403808594, "learning_rate": 7.864606524211616e-09, "loss": 0.0396, "step": 6700 }, { "epoch": 0.9885651051272594, "eval_accuracy": 0.9797395079594791, "eval_f1": 0.9653465346534653, "eval_loss": 0.05492059141397476, "eval_precision": 0.9848484848484849, "eval_recall": 0.9466019417475728, "eval_runtime": 49.3051, "eval_samples_per_second": 5.902, "eval_steps_per_second": 0.203, "step": 6700 }, { "epoch": 0.9887126521578753, "grad_norm": 0.30346840620040894, "learning_rate": 7.661683413868748e-09, "loss": 0.0029, "step": 6701 }, { "epoch": 0.9888601991884913, "grad_norm": 3.611311674118042, "learning_rate": 7.461411546916264e-09, "loss": 0.0608, "step": 6702 }, { "epoch": 0.9890077462191074, "grad_norm": 3.273043632507324, "learning_rate": 7.263790976492769e-09, "loss": 0.0737, "step": 6703 }, { "epoch": 0.9891552932497234, "grad_norm": 1.5908385515213013, "learning_rate": 7.068821755030763e-09, "loss": 0.0437, "step": 6704 }, { "epoch": 0.9893028402803393, "grad_norm": 2.5678610801696777, "learning_rate": 6.876503934262202e-09, "loss": 0.1046, "step": 6705 }, { "epoch": 0.9894503873109554, "grad_norm": 1.9680798053741455, "learning_rate": 6.686837565215154e-09, "loss": 0.0491, "step": 6706 }, { "epoch": 0.9895979343415714, "grad_norm": 3.603398084640503, "learning_rate": 6.499822698210478e-09, "loss": 0.0613, "step": 6707 }, { "epoch": 0.9897454813721874, "grad_norm": 2.4477665424346924, "learning_rate": 6.315459382871813e-09, "loss": 0.0667, "step": 6708 }, { "epoch": 0.9898930284028034, "grad_norm": 1.95452082157135, "learning_rate": 6.1337476681122556e-09, "loss": 0.0635, "step": 6709 }, { "epoch": 0.9900405754334194, "grad_norm": 0.929094672203064, "learning_rate": 5.95468760214879e-09, "loss": 0.0181, "step": 6710 }, { "epoch": 0.9901881224640354, "grad_norm": 1.7649720907211304, "learning_rate": 5.77827923248786e-09, "loss": 0.0337, "step": 6711 }, { "epoch": 0.9903356694946515, "grad_norm": 2.4020333290100098, "learning_rate": 5.604522605937579e-09, "loss": 0.0745, "step": 6712 }, { "epoch": 0.9904832165252674, "grad_norm": 1.8048148155212402, "learning_rate": 5.433417768598847e-09, "loss": 0.0588, "step": 6713 }, { "epoch": 0.9906307635558834, "grad_norm": 1.9580947160720825, "learning_rate": 5.264964765870906e-09, "loss": 0.0477, "step": 6714 }, { "epoch": 0.9907783105864995, "grad_norm": 1.246323823928833, "learning_rate": 5.099163642449112e-09, "loss": 0.0336, "step": 6715 }, { "epoch": 0.9909258576171155, "grad_norm": 1.8688416481018066, "learning_rate": 4.9360144423260535e-09, "loss": 0.0259, "step": 6716 }, { "epoch": 0.9910734046477314, "grad_norm": 1.5381361246109009, "learning_rate": 4.775517208788217e-09, "loss": 0.0298, "step": 6717 }, { "epoch": 0.9912209516783475, "grad_norm": 1.709076166152954, "learning_rate": 4.617671984420425e-09, "loss": 0.0542, "step": 6718 }, { "epoch": 0.9913684987089635, "grad_norm": 1.3890794515609741, "learning_rate": 4.462478811104731e-09, "loss": 0.0558, "step": 6719 }, { "epoch": 0.9915160457395795, "grad_norm": 0.8841899633407593, "learning_rate": 4.309937730015978e-09, "loss": 0.0172, "step": 6720 }, { "epoch": 0.9915160457395795, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.055379100143909454, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 50.3029, "eval_samples_per_second": 5.785, "eval_steps_per_second": 0.199, "step": 6720 }, { "epoch": 0.9916635927701954, "grad_norm": 1.6927621364593506, "learning_rate": 4.160048781628456e-09, "loss": 0.0152, "step": 6721 }, { "epoch": 0.9918111398008115, "grad_norm": 0.853327214717865, "learning_rate": 4.012812005712574e-09, "loss": 0.0098, "step": 6722 }, { "epoch": 0.9919586868314275, "grad_norm": 3.5236921310424805, "learning_rate": 3.8682274413337495e-09, "loss": 0.0527, "step": 6723 }, { "epoch": 0.9921062338620436, "grad_norm": 2.9586477279663086, "learning_rate": 3.726295126853519e-09, "loss": 0.0441, "step": 6724 }, { "epoch": 0.9922537808926596, "grad_norm": 0.3246397376060486, "learning_rate": 3.5870150999317567e-09, "loss": 0.0029, "step": 6725 }, { "epoch": 0.9924013279232755, "grad_norm": 1.3314762115478516, "learning_rate": 3.450387397522237e-09, "loss": 0.025, "step": 6726 }, { "epoch": 0.9925488749538915, "grad_norm": 1.7466126680374146, "learning_rate": 3.3164120558759614e-09, "loss": 0.0336, "step": 6727 }, { "epoch": 0.9926964219845076, "grad_norm": 0.8817312121391296, "learning_rate": 3.185089110542272e-09, "loss": 0.0208, "step": 6728 }, { "epoch": 0.9928439690151236, "grad_norm": 1.9117987155914307, "learning_rate": 3.056418596362187e-09, "loss": 0.0431, "step": 6729 }, { "epoch": 0.9929915160457395, "grad_norm": 5.536546230316162, "learning_rate": 2.9304005474761755e-09, "loss": 0.1609, "step": 6730 }, { "epoch": 0.9931390630763556, "grad_norm": 4.430239677429199, "learning_rate": 2.8070349973219336e-09, "loss": 0.1037, "step": 6731 }, { "epoch": 0.9932866101069716, "grad_norm": 1.1493936777114868, "learning_rate": 2.6863219786299466e-09, "loss": 0.0348, "step": 6732 }, { "epoch": 0.9934341571375876, "grad_norm": 1.1461900472640991, "learning_rate": 2.568261523430149e-09, "loss": 0.0286, "step": 6733 }, { "epoch": 0.9935817041682036, "grad_norm": 1.5313574075698853, "learning_rate": 2.452853663046373e-09, "loss": 0.0368, "step": 6734 }, { "epoch": 0.9937292511988196, "grad_norm": 1.981681227684021, "learning_rate": 2.3400984280985695e-09, "loss": 0.0701, "step": 6735 }, { "epoch": 0.9938767982294356, "grad_norm": 2.576758623123169, "learning_rate": 2.229995848506139e-09, "loss": 0.0525, "step": 6736 }, { "epoch": 0.9940243452600517, "grad_norm": 2.1853508949279785, "learning_rate": 2.1225459534801596e-09, "loss": 0.0543, "step": 6737 }, { "epoch": 0.9941718922906676, "grad_norm": 3.452421188354492, "learning_rate": 2.017748771531158e-09, "loss": 0.0628, "step": 6738 }, { "epoch": 0.9943194393212836, "grad_norm": 1.7515523433685303, "learning_rate": 1.915604330464671e-09, "loss": 0.0685, "step": 6739 }, { "epoch": 0.9944669863518997, "grad_norm": 2.203991413116455, "learning_rate": 1.8161126573823517e-09, "loss": 0.0444, "step": 6740 }, { "epoch": 0.9944669863518997, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.055171407759189606, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 48.6957, "eval_samples_per_second": 5.976, "eval_steps_per_second": 0.205, "step": 6740 }, { "epoch": 0.9946145333825157, "grad_norm": 2.4897584915161133, "learning_rate": 1.7192737786819736e-09, "loss": 0.0722, "step": 6741 }, { "epoch": 0.9947620804131316, "grad_norm": 1.9944440126419067, "learning_rate": 1.6250877200574278e-09, "loss": 0.0995, "step": 6742 }, { "epoch": 0.9949096274437477, "grad_norm": 2.042222738265991, "learning_rate": 1.5335545064998346e-09, "loss": 0.0642, "step": 6743 }, { "epoch": 0.9950571744743637, "grad_norm": 1.745491623878479, "learning_rate": 1.4446741622942128e-09, "loss": 0.0506, "step": 6744 }, { "epoch": 0.9952047215049797, "grad_norm": 0.9310182929039001, "learning_rate": 1.3584467110228095e-09, "loss": 0.0122, "step": 6745 }, { "epoch": 0.9953522685355958, "grad_norm": 1.8398277759552002, "learning_rate": 1.2748721755651005e-09, "loss": 0.0261, "step": 6746 }, { "epoch": 0.9954998155662117, "grad_norm": 1.677306890487671, "learning_rate": 1.1939505780966809e-09, "loss": 0.0358, "step": 6747 }, { "epoch": 0.9956473625968277, "grad_norm": 1.2638682126998901, "learning_rate": 1.115681940085933e-09, "loss": 0.0283, "step": 6748 }, { "epoch": 0.9957949096274438, "grad_norm": 3.8664028644561768, "learning_rate": 1.040066282300689e-09, "loss": 0.0514, "step": 6749 }, { "epoch": 0.9959424566580598, "grad_norm": 1.0908302068710327, "learning_rate": 9.671036248048993e-10, "loss": 0.0171, "step": 6750 }, { "epoch": 0.9960900036886757, "grad_norm": 4.517603874206543, "learning_rate": 8.967939869553022e-10, "loss": 0.0963, "step": 6751 }, { "epoch": 0.9962375507192918, "grad_norm": 9.87070369720459, "learning_rate": 8.291373874091957e-10, "loss": 0.0511, "step": 6752 }, { "epoch": 0.9963850977499078, "grad_norm": 1.7819442749023438, "learning_rate": 7.641338441166657e-10, "loss": 0.0142, "step": 6753 }, { "epoch": 0.9965326447805238, "grad_norm": 1.9164355993270874, "learning_rate": 7.017833743261371e-10, "loss": 0.0333, "step": 6754 }, { "epoch": 0.9966801918111398, "grad_norm": 1.178529143333435, "learning_rate": 6.420859945788227e-10, "loss": 0.0175, "step": 6755 }, { "epoch": 0.9968277388417558, "grad_norm": 3.305959701538086, "learning_rate": 5.850417207153847e-10, "loss": 0.0576, "step": 6756 }, { "epoch": 0.9969752858723718, "grad_norm": 3.1544477939605713, "learning_rate": 5.306505678714934e-10, "loss": 0.0754, "step": 6757 }, { "epoch": 0.9971228329029879, "grad_norm": 1.3226299285888672, "learning_rate": 4.789125504778281e-10, "loss": 0.0261, "step": 6758 }, { "epoch": 0.9972703799336038, "grad_norm": 3.1813669204711914, "learning_rate": 4.2982768226229597e-10, "loss": 0.1108, "step": 6759 }, { "epoch": 0.9974179269642198, "grad_norm": 2.4142541885375977, "learning_rate": 3.8339597624892366e-10, "loss": 0.0481, "step": 6760 }, { "epoch": 0.9974179269642198, "eval_accuracy": 0.9782923299565847, "eval_f1": 0.9629629629629629, "eval_loss": 0.05494352802634239, "eval_precision": 0.9798994974874372, "eval_recall": 0.9466019417475728, "eval_runtime": 48.9599, "eval_samples_per_second": 5.944, "eval_steps_per_second": 0.204, "step": 6760 }, { "epoch": 0.9975654739948359, "grad_norm": 1.254651665687561, "learning_rate": 3.396174447556355e-10, "loss": 0.0174, "step": 6761 }, { "epoch": 0.9977130210254519, "grad_norm": 2.606041669845581, "learning_rate": 2.984920994009155e-10, "loss": 0.0746, "step": 6762 }, { "epoch": 0.9978605680560678, "grad_norm": 6.167627334594727, "learning_rate": 2.600199510938151e-10, "loss": 0.1954, "step": 6763 }, { "epoch": 0.9980081150866839, "grad_norm": 2.2177047729492188, "learning_rate": 2.2420101004394512e-10, "loss": 0.0676, "step": 6764 }, { "epoch": 0.9981556621172999, "grad_norm": 1.7990946769714355, "learning_rate": 1.9103528575370457e-10, "loss": 0.0404, "step": 6765 }, { "epoch": 0.9983032091479159, "grad_norm": 1.5802024602890015, "learning_rate": 1.6052278702272107e-10, "loss": 0.0352, "step": 6766 }, { "epoch": 0.998450756178532, "grad_norm": 1.7018808126449585, "learning_rate": 1.3266352194785114e-10, "loss": 0.0254, "step": 6767 }, { "epoch": 0.9985983032091479, "grad_norm": 2.102145195007324, "learning_rate": 1.0745749792095971e-10, "loss": 0.0427, "step": 6768 }, { "epoch": 0.9987458502397639, "grad_norm": 2.0960288047790527, "learning_rate": 8.490472163003027e-11, "loss": 0.0562, "step": 6769 }, { "epoch": 0.99889339727038, "grad_norm": 2.1226682662963867, "learning_rate": 6.500519905694446e-11, "loss": 0.0399, "step": 6770 }, { "epoch": 0.999040944300996, "grad_norm": 2.805297374725342, "learning_rate": 4.775893548414345e-11, "loss": 0.128, "step": 6771 }, { "epoch": 0.9991884913316119, "grad_norm": 3.2651734352111816, "learning_rate": 3.316593548574609e-11, "loss": 0.0469, "step": 6772 }, { "epoch": 0.999336038362228, "grad_norm": 1.4640666246414185, "learning_rate": 2.1226202935320518e-11, "loss": 0.033, "step": 6773 }, { "epoch": 0.999483585392844, "grad_norm": 1.624456524848938, "learning_rate": 1.1939740999222793e-11, "loss": 0.0241, "step": 6774 }, { "epoch": 0.99963113242346, "grad_norm": 1.3273454904556274, "learning_rate": 5.3065521421480356e-12, "loss": 0.034, "step": 6775 }, { "epoch": 0.999778679454076, "grad_norm": 0.9207583069801331, "learning_rate": 1.3266381237997393e-12, "loss": 0.026, "step": 6776 }, { "epoch": 0.999926226484692, "grad_norm": 1.3743399381637573, "learning_rate": 0.0, "loss": 0.0357, "step": 6777 } ], "logging_steps": 1, "max_steps": 6777, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.0879468077357466e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }