bobox's picture
Training in progress, step 1145, checkpoint
e5a7baf verified
raw
history blame
213 kB
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9901989045834535,
"eval_steps": 115,
"global_step": 1145,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0008648025367541078,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 8.1889,
"step": 1
},
{
"epoch": 0.0017296050735082155,
"grad_norm": 25.332334518432617,
"learning_rate": 0.0,
"loss": 9.3962,
"step": 2
},
{
"epoch": 0.0025944076102623233,
"grad_norm": 21.9357967376709,
"learning_rate": 1.9193857965451055e-07,
"loss": 9.4132,
"step": 3
},
{
"epoch": 0.003459210147016431,
"grad_norm": 26.18132781982422,
"learning_rate": 3.838771593090211e-07,
"loss": 9.1807,
"step": 4
},
{
"epoch": 0.004324012683770539,
"grad_norm": 24.46787452697754,
"learning_rate": 5.758157389635317e-07,
"loss": 8.1559,
"step": 5
},
{
"epoch": 0.0051888152205246466,
"grad_norm": 25.57164764404297,
"learning_rate": 7.677543186180422e-07,
"loss": 9.9007,
"step": 6
},
{
"epoch": 0.006053617757278754,
"grad_norm": 19.92409324645996,
"learning_rate": 9.596928982725527e-07,
"loss": 9.634,
"step": 7
},
{
"epoch": 0.006918420294032862,
"grad_norm": 23.451889038085938,
"learning_rate": 1.1516314779270634e-06,
"loss": 10.8722,
"step": 8
},
{
"epoch": 0.007783222830786971,
"grad_norm": 22.753061294555664,
"learning_rate": 1.343570057581574e-06,
"loss": 8.6969,
"step": 9
},
{
"epoch": 0.008648025367541078,
"grad_norm": 23.012781143188477,
"learning_rate": 1.5355086372360844e-06,
"loss": 8.9552,
"step": 10
},
{
"epoch": 0.009512827904295185,
"grad_norm": 20.173051834106445,
"learning_rate": 1.7274472168905951e-06,
"loss": 7.5245,
"step": 11
},
{
"epoch": 0.010377630441049293,
"grad_norm": 24.979217529296875,
"learning_rate": 1.9193857965451054e-06,
"loss": 9.1573,
"step": 12
},
{
"epoch": 0.011242432977803401,
"grad_norm": 24.23455810546875,
"learning_rate": 2.1113243761996164e-06,
"loss": 9.2615,
"step": 13
},
{
"epoch": 0.012107235514557509,
"grad_norm": 25.147851943969727,
"learning_rate": 2.303262955854127e-06,
"loss": 9.1465,
"step": 14
},
{
"epoch": 0.012972038051311616,
"grad_norm": 21.937841415405273,
"learning_rate": 2.4952015355086374e-06,
"loss": 9.3845,
"step": 15
},
{
"epoch": 0.013836840588065724,
"grad_norm": 24.25821304321289,
"learning_rate": 2.687140115163148e-06,
"loss": 9.3638,
"step": 16
},
{
"epoch": 0.014701643124819834,
"grad_norm": 22.018434524536133,
"learning_rate": 2.879078694817659e-06,
"loss": 9.3365,
"step": 17
},
{
"epoch": 0.015566445661573941,
"grad_norm": 19.021236419677734,
"learning_rate": 3.071017274472169e-06,
"loss": 8.267,
"step": 18
},
{
"epoch": 0.016431248198328047,
"grad_norm": 24.68037986755371,
"learning_rate": 3.2629558541266794e-06,
"loss": 9.8727,
"step": 19
},
{
"epoch": 0.017296050735082155,
"grad_norm": 18.95473861694336,
"learning_rate": 3.4548944337811903e-06,
"loss": 9.6234,
"step": 20
},
{
"epoch": 0.018160853271836263,
"grad_norm": 20.910001754760742,
"learning_rate": 3.646833013435701e-06,
"loss": 9.0452,
"step": 21
},
{
"epoch": 0.01902565580859037,
"grad_norm": 23.27020835876465,
"learning_rate": 3.838771593090211e-06,
"loss": 8.5295,
"step": 22
},
{
"epoch": 0.01989045834534448,
"grad_norm": 20.173105239868164,
"learning_rate": 4.030710172744722e-06,
"loss": 7.8237,
"step": 23
},
{
"epoch": 0.020755260882098586,
"grad_norm": 21.863664627075195,
"learning_rate": 4.222648752399233e-06,
"loss": 8.5162,
"step": 24
},
{
"epoch": 0.021620063418852694,
"grad_norm": 19.267335891723633,
"learning_rate": 4.414587332053743e-06,
"loss": 8.312,
"step": 25
},
{
"epoch": 0.022484865955606802,
"grad_norm": 18.204317092895508,
"learning_rate": 4.606525911708254e-06,
"loss": 9.2816,
"step": 26
},
{
"epoch": 0.02334966849236091,
"grad_norm": 18.67437744140625,
"learning_rate": 4.798464491362764e-06,
"loss": 7.9128,
"step": 27
},
{
"epoch": 0.024214471029115017,
"grad_norm": 18.21224594116211,
"learning_rate": 4.990403071017275e-06,
"loss": 7.6773,
"step": 28
},
{
"epoch": 0.025079273565869125,
"grad_norm": 18.150562286376953,
"learning_rate": 5.182341650671786e-06,
"loss": 7.4793,
"step": 29
},
{
"epoch": 0.025944076102623233,
"grad_norm": 17.31494140625,
"learning_rate": 5.374280230326296e-06,
"loss": 8.2437,
"step": 30
},
{
"epoch": 0.02680887863937734,
"grad_norm": 16.86028289794922,
"learning_rate": 5.566218809980806e-06,
"loss": 8.6053,
"step": 31
},
{
"epoch": 0.02767368117613145,
"grad_norm": 15.361474990844727,
"learning_rate": 5.758157389635318e-06,
"loss": 7.4762,
"step": 32
},
{
"epoch": 0.02853848371288556,
"grad_norm": 18.730810165405273,
"learning_rate": 5.950095969289828e-06,
"loss": 10.5289,
"step": 33
},
{
"epoch": 0.029403286249639667,
"grad_norm": 15.356877326965332,
"learning_rate": 6.142034548944338e-06,
"loss": 7.8911,
"step": 34
},
{
"epoch": 0.030268088786393775,
"grad_norm": 17.622791290283203,
"learning_rate": 6.333973128598848e-06,
"loss": 7.9708,
"step": 35
},
{
"epoch": 0.031132891323147883,
"grad_norm": 17.50615882873535,
"learning_rate": 6.525911708253359e-06,
"loss": 7.2581,
"step": 36
},
{
"epoch": 0.03199769385990199,
"grad_norm": 16.068561553955078,
"learning_rate": 6.7178502879078705e-06,
"loss": 8.6747,
"step": 37
},
{
"epoch": 0.032862496396656095,
"grad_norm": 13.518677711486816,
"learning_rate": 6.909788867562381e-06,
"loss": 7.308,
"step": 38
},
{
"epoch": 0.033727298933410206,
"grad_norm": 18.713558197021484,
"learning_rate": 7.101727447216891e-06,
"loss": 8.6224,
"step": 39
},
{
"epoch": 0.03459210147016431,
"grad_norm": 20.201255798339844,
"learning_rate": 7.293666026871402e-06,
"loss": 12.3613,
"step": 40
},
{
"epoch": 0.03545690400691842,
"grad_norm": 13.44450855255127,
"learning_rate": 7.485604606525912e-06,
"loss": 7.2108,
"step": 41
},
{
"epoch": 0.036321706543672526,
"grad_norm": 15.1000394821167,
"learning_rate": 7.677543186180422e-06,
"loss": 7.831,
"step": 42
},
{
"epoch": 0.03718650908042664,
"grad_norm": 14.707894325256348,
"learning_rate": 7.869481765834934e-06,
"loss": 6.9282,
"step": 43
},
{
"epoch": 0.03805131161718074,
"grad_norm": 13.331870079040527,
"learning_rate": 8.061420345489444e-06,
"loss": 6.9092,
"step": 44
},
{
"epoch": 0.03891611415393485,
"grad_norm": 14.49152660369873,
"learning_rate": 8.253358925143954e-06,
"loss": 8.9053,
"step": 45
},
{
"epoch": 0.03978091669068896,
"grad_norm": 13.79437255859375,
"learning_rate": 8.445297504798465e-06,
"loss": 7.5276,
"step": 46
},
{
"epoch": 0.04064571922744307,
"grad_norm": 15.470795631408691,
"learning_rate": 8.637236084452976e-06,
"loss": 7.4793,
"step": 47
},
{
"epoch": 0.04151052176419717,
"grad_norm": 13.469670295715332,
"learning_rate": 8.829174664107486e-06,
"loss": 7.4401,
"step": 48
},
{
"epoch": 0.042375324300951284,
"grad_norm": 12.38973617553711,
"learning_rate": 9.021113243761997e-06,
"loss": 6.6742,
"step": 49
},
{
"epoch": 0.04324012683770539,
"grad_norm": 14.353404998779297,
"learning_rate": 9.213051823416507e-06,
"loss": 8.89,
"step": 50
},
{
"epoch": 0.0441049293744595,
"grad_norm": 12.149626731872559,
"learning_rate": 9.404990403071018e-06,
"loss": 8.6311,
"step": 51
},
{
"epoch": 0.044969731911213603,
"grad_norm": 12.504135131835938,
"learning_rate": 9.596928982725528e-06,
"loss": 6.9648,
"step": 52
},
{
"epoch": 0.045834534447967715,
"grad_norm": 12.439926147460938,
"learning_rate": 9.78886756238004e-06,
"loss": 7.0633,
"step": 53
},
{
"epoch": 0.04669933698472182,
"grad_norm": 13.445518493652344,
"learning_rate": 9.98080614203455e-06,
"loss": 8.1331,
"step": 54
},
{
"epoch": 0.04756413952147593,
"grad_norm": 12.668989181518555,
"learning_rate": 1.0172744721689061e-05,
"loss": 8.4931,
"step": 55
},
{
"epoch": 0.048428942058230035,
"grad_norm": 11.86841869354248,
"learning_rate": 1.0364683301343571e-05,
"loss": 6.9534,
"step": 56
},
{
"epoch": 0.049293744594984146,
"grad_norm": 12.336670875549316,
"learning_rate": 1.0556621880998081e-05,
"loss": 6.9585,
"step": 57
},
{
"epoch": 0.05015854713173825,
"grad_norm": 12.496221542358398,
"learning_rate": 1.0748560460652591e-05,
"loss": 7.6699,
"step": 58
},
{
"epoch": 0.05102334966849236,
"grad_norm": 11.765594482421875,
"learning_rate": 1.0940499040307102e-05,
"loss": 6.5076,
"step": 59
},
{
"epoch": 0.051888152205246466,
"grad_norm": 13.426615715026855,
"learning_rate": 1.1132437619961612e-05,
"loss": 9.5443,
"step": 60
},
{
"epoch": 0.05275295474200058,
"grad_norm": 12.127195358276367,
"learning_rate": 1.1324376199616123e-05,
"loss": 6.7481,
"step": 61
},
{
"epoch": 0.05361775727875468,
"grad_norm": 10.69729232788086,
"learning_rate": 1.1516314779270635e-05,
"loss": 6.4521,
"step": 62
},
{
"epoch": 0.05448255981550879,
"grad_norm": 12.042082786560059,
"learning_rate": 1.1708253358925145e-05,
"loss": 8.1839,
"step": 63
},
{
"epoch": 0.0553473623522629,
"grad_norm": 13.164307594299316,
"learning_rate": 1.1900191938579655e-05,
"loss": 7.1924,
"step": 64
},
{
"epoch": 0.05621216488901701,
"grad_norm": 10.799245834350586,
"learning_rate": 1.2092130518234165e-05,
"loss": 7.5767,
"step": 65
},
{
"epoch": 0.05707696742577112,
"grad_norm": 10.165273666381836,
"learning_rate": 1.2284069097888675e-05,
"loss": 7.2645,
"step": 66
},
{
"epoch": 0.05794176996252522,
"grad_norm": 12.342886924743652,
"learning_rate": 1.2476007677543186e-05,
"loss": 6.175,
"step": 67
},
{
"epoch": 0.058806572499279335,
"grad_norm": 10.652329444885254,
"learning_rate": 1.2667946257197696e-05,
"loss": 6.5491,
"step": 68
},
{
"epoch": 0.05967137503603344,
"grad_norm": 10.688251495361328,
"learning_rate": 1.2859884836852207e-05,
"loss": 6.7543,
"step": 69
},
{
"epoch": 0.06053617757278755,
"grad_norm": 11.341581344604492,
"learning_rate": 1.3051823416506717e-05,
"loss": 6.98,
"step": 70
},
{
"epoch": 0.061400980109541654,
"grad_norm": 10.539051055908203,
"learning_rate": 1.3243761996161231e-05,
"loss": 6.76,
"step": 71
},
{
"epoch": 0.062265782646295766,
"grad_norm": 10.746752738952637,
"learning_rate": 1.3435700575815741e-05,
"loss": 7.2167,
"step": 72
},
{
"epoch": 0.06313058518304987,
"grad_norm": 12.96174144744873,
"learning_rate": 1.3627639155470251e-05,
"loss": 9.219,
"step": 73
},
{
"epoch": 0.06399538771980398,
"grad_norm": 10.668299674987793,
"learning_rate": 1.3819577735124761e-05,
"loss": 7.3113,
"step": 74
},
{
"epoch": 0.06486019025655809,
"grad_norm": 10.878615379333496,
"learning_rate": 1.4011516314779271e-05,
"loss": 6.4098,
"step": 75
},
{
"epoch": 0.06572499279331219,
"grad_norm": 12.29603099822998,
"learning_rate": 1.4203454894433781e-05,
"loss": 8.3399,
"step": 76
},
{
"epoch": 0.0665897953300663,
"grad_norm": 13.01440143585205,
"learning_rate": 1.4395393474088293e-05,
"loss": 8.6991,
"step": 77
},
{
"epoch": 0.06745459786682041,
"grad_norm": 10.999458312988281,
"learning_rate": 1.4587332053742803e-05,
"loss": 9.5087,
"step": 78
},
{
"epoch": 0.06831940040357452,
"grad_norm": 11.303417205810547,
"learning_rate": 1.4779270633397313e-05,
"loss": 7.3491,
"step": 79
},
{
"epoch": 0.06918420294032862,
"grad_norm": 10.507055282592773,
"learning_rate": 1.4971209213051823e-05,
"loss": 6.8214,
"step": 80
},
{
"epoch": 0.07004900547708273,
"grad_norm": 11.467567443847656,
"learning_rate": 1.5163147792706333e-05,
"loss": 6.5489,
"step": 81
},
{
"epoch": 0.07091380801383684,
"grad_norm": 10.555798530578613,
"learning_rate": 1.5355086372360844e-05,
"loss": 6.7692,
"step": 82
},
{
"epoch": 0.07177861055059095,
"grad_norm": 12.266429901123047,
"learning_rate": 1.5547024952015357e-05,
"loss": 8.8059,
"step": 83
},
{
"epoch": 0.07264341308734505,
"grad_norm": 9.898346900939941,
"learning_rate": 1.5738963531669867e-05,
"loss": 6.4811,
"step": 84
},
{
"epoch": 0.07350821562409916,
"grad_norm": 11.04404067993164,
"learning_rate": 1.5930902111324377e-05,
"loss": 7.0495,
"step": 85
},
{
"epoch": 0.07437301816085327,
"grad_norm": 11.240497589111328,
"learning_rate": 1.6122840690978887e-05,
"loss": 5.8256,
"step": 86
},
{
"epoch": 0.07523782069760739,
"grad_norm": 10.409235000610352,
"learning_rate": 1.6314779270633397e-05,
"loss": 5.7203,
"step": 87
},
{
"epoch": 0.07610262323436148,
"grad_norm": 11.557363510131836,
"learning_rate": 1.6506717850287907e-05,
"loss": 6.5094,
"step": 88
},
{
"epoch": 0.0769674257711156,
"grad_norm": 9.760974884033203,
"learning_rate": 1.669865642994242e-05,
"loss": 5.7523,
"step": 89
},
{
"epoch": 0.0778322283078697,
"grad_norm": 9.31316089630127,
"learning_rate": 1.689059500959693e-05,
"loss": 6.0464,
"step": 90
},
{
"epoch": 0.07869703084462382,
"grad_norm": 11.943814277648926,
"learning_rate": 1.708253358925144e-05,
"loss": 6.5233,
"step": 91
},
{
"epoch": 0.07956183338137791,
"grad_norm": 9.126127243041992,
"learning_rate": 1.727447216890595e-05,
"loss": 6.8966,
"step": 92
},
{
"epoch": 0.08042663591813203,
"grad_norm": 9.386579513549805,
"learning_rate": 1.746641074856046e-05,
"loss": 6.3621,
"step": 93
},
{
"epoch": 0.08129143845488614,
"grad_norm": 10.63054370880127,
"learning_rate": 1.765834932821497e-05,
"loss": 6.0194,
"step": 94
},
{
"epoch": 0.08215624099164025,
"grad_norm": 10.119132995605469,
"learning_rate": 1.785028790786948e-05,
"loss": 6.6797,
"step": 95
},
{
"epoch": 0.08302104352839434,
"grad_norm": 10.746257781982422,
"learning_rate": 1.8042226487523995e-05,
"loss": 5.6214,
"step": 96
},
{
"epoch": 0.08388584606514846,
"grad_norm": 10.64887809753418,
"learning_rate": 1.8234165067178505e-05,
"loss": 6.4946,
"step": 97
},
{
"epoch": 0.08475064860190257,
"grad_norm": 11.115398406982422,
"learning_rate": 1.8426103646833015e-05,
"loss": 5.9069,
"step": 98
},
{
"epoch": 0.08561545113865668,
"grad_norm": 11.452004432678223,
"learning_rate": 1.8618042226487525e-05,
"loss": 6.8848,
"step": 99
},
{
"epoch": 0.08648025367541078,
"grad_norm": 12.722066879272461,
"learning_rate": 1.8809980806142035e-05,
"loss": 7.7248,
"step": 100
},
{
"epoch": 0.08734505621216489,
"grad_norm": 10.500570297241211,
"learning_rate": 1.9001919385796545e-05,
"loss": 6.9069,
"step": 101
},
{
"epoch": 0.088209858748919,
"grad_norm": 10.750312805175781,
"learning_rate": 1.9193857965451055e-05,
"loss": 6.3612,
"step": 102
},
{
"epoch": 0.08907466128567311,
"grad_norm": 12.96158218383789,
"learning_rate": 1.9385796545105565e-05,
"loss": 7.6664,
"step": 103
},
{
"epoch": 0.08993946382242721,
"grad_norm": 11.477307319641113,
"learning_rate": 1.957773512476008e-05,
"loss": 5.4654,
"step": 104
},
{
"epoch": 0.09080426635918132,
"grad_norm": 13.458792686462402,
"learning_rate": 1.976967370441459e-05,
"loss": 6.7583,
"step": 105
},
{
"epoch": 0.09166906889593543,
"grad_norm": 11.862403869628906,
"learning_rate": 1.99616122840691e-05,
"loss": 6.354,
"step": 106
},
{
"epoch": 0.09253387143268954,
"grad_norm": 15.43807601928711,
"learning_rate": 2.015355086372361e-05,
"loss": 5.0476,
"step": 107
},
{
"epoch": 0.09339867396944364,
"grad_norm": 15.703176498413086,
"learning_rate": 2.0345489443378122e-05,
"loss": 5.535,
"step": 108
},
{
"epoch": 0.09426347650619775,
"grad_norm": 15.830728530883789,
"learning_rate": 2.0537428023032633e-05,
"loss": 5.125,
"step": 109
},
{
"epoch": 0.09512827904295186,
"grad_norm": 18.535364151000977,
"learning_rate": 2.0729366602687143e-05,
"loss": 5.3941,
"step": 110
},
{
"epoch": 0.09599308157970597,
"grad_norm": 20.664087295532227,
"learning_rate": 2.0921305182341653e-05,
"loss": 7.6313,
"step": 111
},
{
"epoch": 0.09685788411646007,
"grad_norm": 26.702512741088867,
"learning_rate": 2.1113243761996163e-05,
"loss": 5.584,
"step": 112
},
{
"epoch": 0.09772268665321418,
"grad_norm": 24.893169403076172,
"learning_rate": 2.1305182341650673e-05,
"loss": 6.7148,
"step": 113
},
{
"epoch": 0.09858748918996829,
"grad_norm": 23.61020278930664,
"learning_rate": 2.1497120921305183e-05,
"loss": 4.3739,
"step": 114
},
{
"epoch": 0.0994522917267224,
"grad_norm": 30.567276000976562,
"learning_rate": 2.1689059500959693e-05,
"loss": 7.8202,
"step": 115
},
{
"epoch": 0.0994522917267224,
"eval_Qnli-dev-1024_cosine_accuracy": 0.6979166666666666,
"eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.9675614833831787,
"eval_Qnli-dev-1024_cosine_ap": 0.688956658941829,
"eval_Qnli-dev-1024_cosine_f1": 0.6881720430107527,
"eval_Qnli-dev-1024_cosine_f1_threshold": 0.9675614833831787,
"eval_Qnli-dev-1024_cosine_mcc": 0.3966087176872613,
"eval_Qnli-dev-1024_cosine_precision": 0.6666666666666666,
"eval_Qnli-dev-1024_cosine_recall": 0.7111111111111111,
"eval_Qnli-dev_cosine_accuracy": 0.7395833333333334,
"eval_Qnli-dev_cosine_accuracy_threshold": 0.8765031099319458,
"eval_Qnli-dev_cosine_ap": 0.760920950345153,
"eval_Qnli-dev_cosine_f1": 0.7272727272727272,
"eval_Qnli-dev_cosine_f1_threshold": 0.8635396957397461,
"eval_Qnli-dev_cosine_mcc": 0.4497120149145933,
"eval_Qnli-dev_cosine_precision": 0.6666666666666666,
"eval_Qnli-dev_cosine_recall": 0.8,
"eval_allNLI--triplets-1024_cosine_accuracy": 0.8020833134651184,
"eval_allNLI-triplets_cosine_accuracy": 0.9791666865348816,
"eval_global_dataset_loss": 1.940317153930664,
"eval_global_dataset_runtime": 67.8871,
"eval_global_dataset_samples_per_second": 14.318,
"eval_global_dataset_steps_per_second": 0.309,
"eval_sequential_score": 0.8020833134651184,
"eval_sts-test-1024_pearson_cosine": 0.6710405361187501,
"eval_sts-test-1024_spearman_cosine": 0.8309834676298848,
"eval_sts-test_pearson_cosine": 0.9054066453363472,
"eval_sts-test_spearman_cosine": 0.9155824166550393,
"step": 115
},
{
"epoch": 0.1003170942634765,
"grad_norm": 31.343202590942383,
"learning_rate": 2.1880998080614203e-05,
"loss": 4.42,
"step": 116
},
{
"epoch": 0.10118189680023061,
"grad_norm": 28.399757385253906,
"learning_rate": 2.2072936660268713e-05,
"loss": 4.5047,
"step": 117
},
{
"epoch": 0.10204669933698472,
"grad_norm": 32.25544357299805,
"learning_rate": 2.2264875239923223e-05,
"loss": 4.427,
"step": 118
},
{
"epoch": 0.10291150187373883,
"grad_norm": 27.07774543762207,
"learning_rate": 2.2456813819577733e-05,
"loss": 3.1201,
"step": 119
},
{
"epoch": 0.10377630441049293,
"grad_norm": 31.4462833404541,
"learning_rate": 2.2648752399232247e-05,
"loss": 4.3632,
"step": 120
},
{
"epoch": 0.10464110694724704,
"grad_norm": 27.67288589477539,
"learning_rate": 2.2840690978886757e-05,
"loss": 3.5101,
"step": 121
},
{
"epoch": 0.10550590948400115,
"grad_norm": 29.23362922668457,
"learning_rate": 2.303262955854127e-05,
"loss": 4.7499,
"step": 122
},
{
"epoch": 0.10637071202075526,
"grad_norm": 27.85274887084961,
"learning_rate": 2.322456813819578e-05,
"loss": 4.5242,
"step": 123
},
{
"epoch": 0.10723551455750936,
"grad_norm": 21.893939971923828,
"learning_rate": 2.341650671785029e-05,
"loss": 3.379,
"step": 124
},
{
"epoch": 0.10810031709426347,
"grad_norm": 18.63385772705078,
"learning_rate": 2.36084452975048e-05,
"loss": 2.8004,
"step": 125
},
{
"epoch": 0.10896511963101758,
"grad_norm": 16.17616844177246,
"learning_rate": 2.380038387715931e-05,
"loss": 2.8855,
"step": 126
},
{
"epoch": 0.1098299221677717,
"grad_norm": 17.123281478881836,
"learning_rate": 2.399232245681382e-05,
"loss": 3.937,
"step": 127
},
{
"epoch": 0.1106947247045258,
"grad_norm": 14.539612770080566,
"learning_rate": 2.418426103646833e-05,
"loss": 3.5914,
"step": 128
},
{
"epoch": 0.1115595272412799,
"grad_norm": 12.644956588745117,
"learning_rate": 2.437619961612284e-05,
"loss": 2.6459,
"step": 129
},
{
"epoch": 0.11242432977803402,
"grad_norm": 10.95170783996582,
"learning_rate": 2.456813819577735e-05,
"loss": 2.3887,
"step": 130
},
{
"epoch": 0.11328913231478813,
"grad_norm": 12.561387062072754,
"learning_rate": 2.476007677543186e-05,
"loss": 4.1043,
"step": 131
},
{
"epoch": 0.11415393485154224,
"grad_norm": 9.273588180541992,
"learning_rate": 2.495201535508637e-05,
"loss": 2.2758,
"step": 132
},
{
"epoch": 0.11501873738829634,
"grad_norm": 9.219544410705566,
"learning_rate": 2.514395393474088e-05,
"loss": 2.859,
"step": 133
},
{
"epoch": 0.11588353992505045,
"grad_norm": 8.443903923034668,
"learning_rate": 2.533589251439539e-05,
"loss": 2.0162,
"step": 134
},
{
"epoch": 0.11674834246180456,
"grad_norm": 9.522578239440918,
"learning_rate": 2.5527831094049905e-05,
"loss": 2.7069,
"step": 135
},
{
"epoch": 0.11761314499855867,
"grad_norm": 8.184837341308594,
"learning_rate": 2.5719769673704415e-05,
"loss": 1.9536,
"step": 136
},
{
"epoch": 0.11847794753531277,
"grad_norm": 9.079197883605957,
"learning_rate": 2.5911708253358925e-05,
"loss": 2.3063,
"step": 137
},
{
"epoch": 0.11934275007206688,
"grad_norm": 9.438823699951172,
"learning_rate": 2.6103646833013435e-05,
"loss": 3.3783,
"step": 138
},
{
"epoch": 0.12020755260882099,
"grad_norm": 8.003981590270996,
"learning_rate": 2.6295585412667952e-05,
"loss": 1.9538,
"step": 139
},
{
"epoch": 0.1210723551455751,
"grad_norm": 8.199268341064453,
"learning_rate": 2.6487523992322462e-05,
"loss": 2.8959,
"step": 140
},
{
"epoch": 0.1219371576823292,
"grad_norm": 9.071074485778809,
"learning_rate": 2.6679462571976972e-05,
"loss": 2.3064,
"step": 141
},
{
"epoch": 0.12280196021908331,
"grad_norm": 10.237217903137207,
"learning_rate": 2.6871401151631482e-05,
"loss": 2.4625,
"step": 142
},
{
"epoch": 0.12366676275583742,
"grad_norm": 7.96627950668335,
"learning_rate": 2.7063339731285992e-05,
"loss": 2.4083,
"step": 143
},
{
"epoch": 0.12453156529259153,
"grad_norm": 8.751070022583008,
"learning_rate": 2.7255278310940502e-05,
"loss": 1.5914,
"step": 144
},
{
"epoch": 0.12539636782934563,
"grad_norm": 6.843534469604492,
"learning_rate": 2.7447216890595012e-05,
"loss": 1.5798,
"step": 145
},
{
"epoch": 0.12626117036609974,
"grad_norm": 7.700779438018799,
"learning_rate": 2.7639155470249522e-05,
"loss": 1.5194,
"step": 146
},
{
"epoch": 0.12712597290285385,
"grad_norm": 8.954259872436523,
"learning_rate": 2.7831094049904032e-05,
"loss": 1.5924,
"step": 147
},
{
"epoch": 0.12799077543960796,
"grad_norm": 10.815597534179688,
"learning_rate": 2.8023032629558543e-05,
"loss": 3.1143,
"step": 148
},
{
"epoch": 0.12885557797636207,
"grad_norm": 9.539572715759277,
"learning_rate": 2.8214971209213053e-05,
"loss": 1.8632,
"step": 149
},
{
"epoch": 0.12972038051311618,
"grad_norm": 6.322872638702393,
"learning_rate": 2.8406909788867563e-05,
"loss": 2.0489,
"step": 150
},
{
"epoch": 0.13058518304987027,
"grad_norm": 6.538212776184082,
"learning_rate": 2.8598848368522073e-05,
"loss": 1.5573,
"step": 151
},
{
"epoch": 0.13144998558662438,
"grad_norm": 6.798872470855713,
"learning_rate": 2.8790786948176586e-05,
"loss": 2.8024,
"step": 152
},
{
"epoch": 0.1323147881233785,
"grad_norm": 8.393974304199219,
"learning_rate": 2.8982725527831096e-05,
"loss": 1.9423,
"step": 153
},
{
"epoch": 0.1331795906601326,
"grad_norm": 8.043729782104492,
"learning_rate": 2.9174664107485606e-05,
"loss": 3.1444,
"step": 154
},
{
"epoch": 0.1340443931968867,
"grad_norm": 9.158576965332031,
"learning_rate": 2.9366602687140116e-05,
"loss": 2.5482,
"step": 155
},
{
"epoch": 0.13490919573364082,
"grad_norm": 6.786825180053711,
"learning_rate": 2.9558541266794627e-05,
"loss": 1.0428,
"step": 156
},
{
"epoch": 0.13577399827039494,
"grad_norm": 12.157453536987305,
"learning_rate": 2.9750479846449137e-05,
"loss": 5.8267,
"step": 157
},
{
"epoch": 0.13663880080714905,
"grad_norm": 10.719176292419434,
"learning_rate": 2.9942418426103647e-05,
"loss": 1.9785,
"step": 158
},
{
"epoch": 0.13750360334390313,
"grad_norm": 8.25823974609375,
"learning_rate": 3.0134357005758157e-05,
"loss": 2.5306,
"step": 159
},
{
"epoch": 0.13836840588065724,
"grad_norm": 8.451217651367188,
"learning_rate": 3.0326295585412667e-05,
"loss": 1.8271,
"step": 160
},
{
"epoch": 0.13923320841741135,
"grad_norm": 9.387060165405273,
"learning_rate": 3.051823416506718e-05,
"loss": 2.6579,
"step": 161
},
{
"epoch": 0.14009801095416546,
"grad_norm": 8.968480110168457,
"learning_rate": 3.071017274472169e-05,
"loss": 3.0193,
"step": 162
},
{
"epoch": 0.14096281349091958,
"grad_norm": 8.816688537597656,
"learning_rate": 3.09021113243762e-05,
"loss": 1.5596,
"step": 163
},
{
"epoch": 0.1418276160276737,
"grad_norm": 5.402006149291992,
"learning_rate": 3.1094049904030714e-05,
"loss": 1.4505,
"step": 164
},
{
"epoch": 0.1426924185644278,
"grad_norm": 7.654393196105957,
"learning_rate": 3.128598848368523e-05,
"loss": 2.5331,
"step": 165
},
{
"epoch": 0.1435572211011819,
"grad_norm": 6.393066883087158,
"learning_rate": 3.1477927063339734e-05,
"loss": 1.384,
"step": 166
},
{
"epoch": 0.144422023637936,
"grad_norm": 8.975717544555664,
"learning_rate": 3.166986564299425e-05,
"loss": 3.3553,
"step": 167
},
{
"epoch": 0.1452868261746901,
"grad_norm": 8.812336921691895,
"learning_rate": 3.1861804222648754e-05,
"loss": 2.2541,
"step": 168
},
{
"epoch": 0.14615162871144421,
"grad_norm": 7.189652919769287,
"learning_rate": 3.205374280230327e-05,
"loss": 1.1827,
"step": 169
},
{
"epoch": 0.14701643124819833,
"grad_norm": 7.888529300689697,
"learning_rate": 3.2245681381957774e-05,
"loss": 1.3643,
"step": 170
},
{
"epoch": 0.14788123378495244,
"grad_norm": 6.611407279968262,
"learning_rate": 3.243761996161229e-05,
"loss": 1.9817,
"step": 171
},
{
"epoch": 0.14874603632170655,
"grad_norm": 6.734430313110352,
"learning_rate": 3.2629558541266795e-05,
"loss": 2.3332,
"step": 172
},
{
"epoch": 0.14961083885846066,
"grad_norm": 6.5995306968688965,
"learning_rate": 3.282149712092131e-05,
"loss": 1.4638,
"step": 173
},
{
"epoch": 0.15047564139521477,
"grad_norm": 7.57749605178833,
"learning_rate": 3.3013435700575815e-05,
"loss": 1.7929,
"step": 174
},
{
"epoch": 0.15134044393196885,
"grad_norm": 4.956903457641602,
"learning_rate": 3.320537428023033e-05,
"loss": 1.0457,
"step": 175
},
{
"epoch": 0.15220524646872297,
"grad_norm": 9.929686546325684,
"learning_rate": 3.339731285988484e-05,
"loss": 1.4866,
"step": 176
},
{
"epoch": 0.15307004900547708,
"grad_norm": 7.194726467132568,
"learning_rate": 3.358925143953935e-05,
"loss": 1.7834,
"step": 177
},
{
"epoch": 0.1539348515422312,
"grad_norm": 6.916417598724365,
"learning_rate": 3.378119001919386e-05,
"loss": 1.1396,
"step": 178
},
{
"epoch": 0.1547996540789853,
"grad_norm": 9.47856330871582,
"learning_rate": 3.397312859884837e-05,
"loss": 1.9811,
"step": 179
},
{
"epoch": 0.1556644566157394,
"grad_norm": 7.894885540008545,
"learning_rate": 3.416506717850288e-05,
"loss": 1.1859,
"step": 180
},
{
"epoch": 0.15652925915249352,
"grad_norm": 7.631194114685059,
"learning_rate": 3.435700575815739e-05,
"loss": 1.5481,
"step": 181
},
{
"epoch": 0.15739406168924763,
"grad_norm": 5.6157073974609375,
"learning_rate": 3.45489443378119e-05,
"loss": 1.5954,
"step": 182
},
{
"epoch": 0.15825886422600172,
"grad_norm": 9.201720237731934,
"learning_rate": 3.474088291746641e-05,
"loss": 2.2163,
"step": 183
},
{
"epoch": 0.15912366676275583,
"grad_norm": 5.702026844024658,
"learning_rate": 3.493282149712092e-05,
"loss": 1.475,
"step": 184
},
{
"epoch": 0.15998846929950994,
"grad_norm": 5.93116569519043,
"learning_rate": 3.512476007677543e-05,
"loss": 1.2394,
"step": 185
},
{
"epoch": 0.16085327183626405,
"grad_norm": 3.9884233474731445,
"learning_rate": 3.531669865642994e-05,
"loss": 1.2713,
"step": 186
},
{
"epoch": 0.16171807437301816,
"grad_norm": 7.569946765899658,
"learning_rate": 3.550863723608445e-05,
"loss": 1.435,
"step": 187
},
{
"epoch": 0.16258287690977227,
"grad_norm": 7.594637393951416,
"learning_rate": 3.570057581573896e-05,
"loss": 1.1762,
"step": 188
},
{
"epoch": 0.16344767944652638,
"grad_norm": 7.092876434326172,
"learning_rate": 3.5892514395393476e-05,
"loss": 2.3349,
"step": 189
},
{
"epoch": 0.1643124819832805,
"grad_norm": 6.997330188751221,
"learning_rate": 3.608445297504799e-05,
"loss": 1.1459,
"step": 190
},
{
"epoch": 0.16517728452003458,
"grad_norm": 9.205595016479492,
"learning_rate": 3.6276391554702496e-05,
"loss": 1.313,
"step": 191
},
{
"epoch": 0.1660420870567887,
"grad_norm": 6.776134014129639,
"learning_rate": 3.646833013435701e-05,
"loss": 1.1422,
"step": 192
},
{
"epoch": 0.1669068895935428,
"grad_norm": 9.902478218078613,
"learning_rate": 3.6660268714011516e-05,
"loss": 1.4937,
"step": 193
},
{
"epoch": 0.1677716921302969,
"grad_norm": 8.630653381347656,
"learning_rate": 3.685220729366603e-05,
"loss": 1.351,
"step": 194
},
{
"epoch": 0.16863649466705102,
"grad_norm": 8.957950592041016,
"learning_rate": 3.704414587332054e-05,
"loss": 1.1581,
"step": 195
},
{
"epoch": 0.16950129720380513,
"grad_norm": 8.303983688354492,
"learning_rate": 3.723608445297505e-05,
"loss": 2.1473,
"step": 196
},
{
"epoch": 0.17036609974055925,
"grad_norm": 8.272674560546875,
"learning_rate": 3.7428023032629563e-05,
"loss": 0.8801,
"step": 197
},
{
"epoch": 0.17123090227731336,
"grad_norm": 7.904557228088379,
"learning_rate": 3.761996161228407e-05,
"loss": 1.3985,
"step": 198
},
{
"epoch": 0.17209570481406747,
"grad_norm": 5.652804851531982,
"learning_rate": 3.7811900191938584e-05,
"loss": 0.8468,
"step": 199
},
{
"epoch": 0.17296050735082155,
"grad_norm": 5.771730422973633,
"learning_rate": 3.800383877159309e-05,
"loss": 1.0563,
"step": 200
},
{
"epoch": 0.17382530988757566,
"grad_norm": 6.634278297424316,
"learning_rate": 3.8195777351247604e-05,
"loss": 0.9612,
"step": 201
},
{
"epoch": 0.17469011242432977,
"grad_norm": 8.659712791442871,
"learning_rate": 3.838771593090211e-05,
"loss": 1.665,
"step": 202
},
{
"epoch": 0.17555491496108389,
"grad_norm": 6.617002487182617,
"learning_rate": 3.8579654510556624e-05,
"loss": 1.1505,
"step": 203
},
{
"epoch": 0.176419717497838,
"grad_norm": 10.3783597946167,
"learning_rate": 3.877159309021113e-05,
"loss": 1.7958,
"step": 204
},
{
"epoch": 0.1772845200345921,
"grad_norm": 9.473942756652832,
"learning_rate": 3.8963531669865644e-05,
"loss": 1.3115,
"step": 205
},
{
"epoch": 0.17814932257134622,
"grad_norm": 7.500204563140869,
"learning_rate": 3.915547024952016e-05,
"loss": 1.0855,
"step": 206
},
{
"epoch": 0.17901412510810033,
"grad_norm": 6.897130012512207,
"learning_rate": 3.9347408829174664e-05,
"loss": 1.1051,
"step": 207
},
{
"epoch": 0.17987892764485441,
"grad_norm": 9.034842491149902,
"learning_rate": 3.953934740882918e-05,
"loss": 2.5371,
"step": 208
},
{
"epoch": 0.18074373018160853,
"grad_norm": 9.812570571899414,
"learning_rate": 3.9731285988483684e-05,
"loss": 1.7992,
"step": 209
},
{
"epoch": 0.18160853271836264,
"grad_norm": 7.528004169464111,
"learning_rate": 3.99232245681382e-05,
"loss": 1.7798,
"step": 210
},
{
"epoch": 0.18247333525511675,
"grad_norm": 7.52139139175415,
"learning_rate": 4.0115163147792705e-05,
"loss": 0.7093,
"step": 211
},
{
"epoch": 0.18333813779187086,
"grad_norm": 9.2921142578125,
"learning_rate": 4.030710172744722e-05,
"loss": 1.2681,
"step": 212
},
{
"epoch": 0.18420294032862497,
"grad_norm": 4.883711814880371,
"learning_rate": 4.049904030710173e-05,
"loss": 0.911,
"step": 213
},
{
"epoch": 0.18506774286537908,
"grad_norm": 8.103593826293945,
"learning_rate": 4.0690978886756245e-05,
"loss": 1.1144,
"step": 214
},
{
"epoch": 0.1859325454021332,
"grad_norm": 6.5846381187438965,
"learning_rate": 4.088291746641075e-05,
"loss": 0.8362,
"step": 215
},
{
"epoch": 0.18679734793888728,
"grad_norm": 5.238864421844482,
"learning_rate": 4.1074856046065265e-05,
"loss": 0.838,
"step": 216
},
{
"epoch": 0.1876621504756414,
"grad_norm": 7.091164588928223,
"learning_rate": 4.126679462571977e-05,
"loss": 1.3143,
"step": 217
},
{
"epoch": 0.1885269530123955,
"grad_norm": 4.529580116271973,
"learning_rate": 4.1458733205374285e-05,
"loss": 0.8799,
"step": 218
},
{
"epoch": 0.1893917555491496,
"grad_norm": 5.912927627563477,
"learning_rate": 4.165067178502879e-05,
"loss": 0.7928,
"step": 219
},
{
"epoch": 0.19025655808590372,
"grad_norm": 7.802720069885254,
"learning_rate": 4.1842610364683305e-05,
"loss": 0.7077,
"step": 220
},
{
"epoch": 0.19112136062265783,
"grad_norm": 7.49670934677124,
"learning_rate": 4.203454894433781e-05,
"loss": 1.7815,
"step": 221
},
{
"epoch": 0.19198616315941194,
"grad_norm": 5.978695392608643,
"learning_rate": 4.2226487523992326e-05,
"loss": 1.3599,
"step": 222
},
{
"epoch": 0.19285096569616605,
"grad_norm": 8.289727210998535,
"learning_rate": 4.241842610364683e-05,
"loss": 0.7413,
"step": 223
},
{
"epoch": 0.19371576823292014,
"grad_norm": 7.663917541503906,
"learning_rate": 4.2610364683301346e-05,
"loss": 1.9959,
"step": 224
},
{
"epoch": 0.19458057076967425,
"grad_norm": 9.845619201660156,
"learning_rate": 4.280230326295586e-05,
"loss": 1.9112,
"step": 225
},
{
"epoch": 0.19544537330642836,
"grad_norm": 5.703056812286377,
"learning_rate": 4.2994241842610366e-05,
"loss": 0.5033,
"step": 226
},
{
"epoch": 0.19631017584318247,
"grad_norm": 9.209814071655273,
"learning_rate": 4.318618042226488e-05,
"loss": 1.1669,
"step": 227
},
{
"epoch": 0.19717497837993658,
"grad_norm": 8.577181816101074,
"learning_rate": 4.3378119001919386e-05,
"loss": 1.2109,
"step": 228
},
{
"epoch": 0.1980397809166907,
"grad_norm": 7.078784942626953,
"learning_rate": 4.35700575815739e-05,
"loss": 0.781,
"step": 229
},
{
"epoch": 0.1989045834534448,
"grad_norm": 9.162598609924316,
"learning_rate": 4.3761996161228406e-05,
"loss": 1.5895,
"step": 230
},
{
"epoch": 0.1989045834534448,
"eval_Qnli-dev-1024_cosine_accuracy": 0.7395833333333334,
"eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.867725133895874,
"eval_Qnli-dev-1024_cosine_ap": 0.713229712410124,
"eval_Qnli-dev-1024_cosine_f1": 0.7291666666666667,
"eval_Qnli-dev-1024_cosine_f1_threshold": 0.8537728786468506,
"eval_Qnli-dev-1024_cosine_mcc": 0.46405228758169936,
"eval_Qnli-dev-1024_cosine_precision": 0.6862745098039216,
"eval_Qnli-dev-1024_cosine_recall": 0.7777777777777778,
"eval_Qnli-dev_cosine_accuracy": 0.7291666666666666,
"eval_Qnli-dev_cosine_accuracy_threshold": 0.8137844204902649,
"eval_Qnli-dev_cosine_ap": 0.7513782450871136,
"eval_Qnli-dev_cosine_f1": 0.7222222222222222,
"eval_Qnli-dev_cosine_f1_threshold": 0.7686975002288818,
"eval_Qnli-dev_cosine_mcc": 0.41614558708189836,
"eval_Qnli-dev_cosine_precision": 0.6190476190476191,
"eval_Qnli-dev_cosine_recall": 0.8666666666666667,
"eval_allNLI--triplets-1024_cosine_accuracy": 0.9270833134651184,
"eval_allNLI-triplets_cosine_accuracy": 0.9791666865348816,
"eval_global_dataset_loss": 0.7282267808914185,
"eval_global_dataset_runtime": 67.7277,
"eval_global_dataset_samples_per_second": 14.352,
"eval_global_dataset_steps_per_second": 0.31,
"eval_sequential_score": 0.9270833134651184,
"eval_sts-test-1024_pearson_cosine": 0.8484219629681994,
"eval_sts-test-1024_spearman_cosine": 0.8984444397927454,
"eval_sts-test_pearson_cosine": 0.9066337545995211,
"eval_sts-test_spearman_cosine": 0.9170443296862147,
"step": 230
},
{
"epoch": 0.19976938599019892,
"grad_norm": 6.589021682739258,
"learning_rate": 4.395393474088292e-05,
"loss": 1.2439,
"step": 231
},
{
"epoch": 0.200634188526953,
"grad_norm": 4.9722089767456055,
"learning_rate": 4.4145873320537426e-05,
"loss": 0.5947,
"step": 232
},
{
"epoch": 0.2014989910637071,
"grad_norm": 6.424257278442383,
"learning_rate": 4.433781190019194e-05,
"loss": 1.1687,
"step": 233
},
{
"epoch": 0.20236379360046122,
"grad_norm": 10.21776008605957,
"learning_rate": 4.4529750479846447e-05,
"loss": 2.082,
"step": 234
},
{
"epoch": 0.20322859613721533,
"grad_norm": 6.3251633644104,
"learning_rate": 4.472168905950096e-05,
"loss": 0.521,
"step": 235
},
{
"epoch": 0.20409339867396945,
"grad_norm": 6.459076881408691,
"learning_rate": 4.491362763915547e-05,
"loss": 1.2406,
"step": 236
},
{
"epoch": 0.20495820121072356,
"grad_norm": 6.254432201385498,
"learning_rate": 4.510556621880998e-05,
"loss": 0.6586,
"step": 237
},
{
"epoch": 0.20582300374747767,
"grad_norm": 6.352238655090332,
"learning_rate": 4.5297504798464494e-05,
"loss": 0.6746,
"step": 238
},
{
"epoch": 0.20668780628423178,
"grad_norm": 4.247053146362305,
"learning_rate": 4.548944337811901e-05,
"loss": 0.3925,
"step": 239
},
{
"epoch": 0.20755260882098586,
"grad_norm": 6.61681604385376,
"learning_rate": 4.5681381957773514e-05,
"loss": 0.8654,
"step": 240
},
{
"epoch": 0.20841741135773997,
"grad_norm": 7.9061408042907715,
"learning_rate": 4.587332053742803e-05,
"loss": 0.6723,
"step": 241
},
{
"epoch": 0.20928221389449408,
"grad_norm": 3.9183671474456787,
"learning_rate": 4.606525911708254e-05,
"loss": 0.4345,
"step": 242
},
{
"epoch": 0.2101470164312482,
"grad_norm": 8.863993644714355,
"learning_rate": 4.625719769673705e-05,
"loss": 1.0822,
"step": 243
},
{
"epoch": 0.2110118189680023,
"grad_norm": 8.070558547973633,
"learning_rate": 4.644913627639156e-05,
"loss": 1.5697,
"step": 244
},
{
"epoch": 0.21187662150475642,
"grad_norm": 3.8370699882507324,
"learning_rate": 4.664107485604607e-05,
"loss": 0.3771,
"step": 245
},
{
"epoch": 0.21274142404151053,
"grad_norm": 4.8743486404418945,
"learning_rate": 4.683301343570058e-05,
"loss": 0.484,
"step": 246
},
{
"epoch": 0.21360622657826464,
"grad_norm": 6.827274322509766,
"learning_rate": 4.702495201535509e-05,
"loss": 1.0994,
"step": 247
},
{
"epoch": 0.21447102911501872,
"grad_norm": 6.400326251983643,
"learning_rate": 4.72168905950096e-05,
"loss": 1.0543,
"step": 248
},
{
"epoch": 0.21533583165177284,
"grad_norm": 9.760299682617188,
"learning_rate": 4.740882917466411e-05,
"loss": 1.1132,
"step": 249
},
{
"epoch": 0.21620063418852695,
"grad_norm": 9.413398742675781,
"learning_rate": 4.760076775431862e-05,
"loss": 1.7051,
"step": 250
},
{
"epoch": 0.21706543672528106,
"grad_norm": 6.986111164093018,
"learning_rate": 4.779270633397313e-05,
"loss": 0.5029,
"step": 251
},
{
"epoch": 0.21793023926203517,
"grad_norm": 11.26386547088623,
"learning_rate": 4.798464491362764e-05,
"loss": 1.5927,
"step": 252
},
{
"epoch": 0.21879504179878928,
"grad_norm": 5.758693695068359,
"learning_rate": 4.817658349328215e-05,
"loss": 0.9221,
"step": 253
},
{
"epoch": 0.2196598443355434,
"grad_norm": 6.061553478240967,
"learning_rate": 4.836852207293666e-05,
"loss": 0.989,
"step": 254
},
{
"epoch": 0.2205246468722975,
"grad_norm": 7.509443759918213,
"learning_rate": 4.8560460652591175e-05,
"loss": 1.9468,
"step": 255
},
{
"epoch": 0.2213894494090516,
"grad_norm": 7.857194900512695,
"learning_rate": 4.875239923224568e-05,
"loss": 0.7299,
"step": 256
},
{
"epoch": 0.2222542519458057,
"grad_norm": 9.96574592590332,
"learning_rate": 4.8944337811900195e-05,
"loss": 1.0199,
"step": 257
},
{
"epoch": 0.2231190544825598,
"grad_norm": 8.403667449951172,
"learning_rate": 4.91362763915547e-05,
"loss": 1.0238,
"step": 258
},
{
"epoch": 0.22398385701931392,
"grad_norm": 8.612835884094238,
"learning_rate": 4.9328214971209215e-05,
"loss": 1.8386,
"step": 259
},
{
"epoch": 0.22484865955606803,
"grad_norm": 7.690261363983154,
"learning_rate": 4.952015355086372e-05,
"loss": 0.7887,
"step": 260
},
{
"epoch": 0.22571346209282214,
"grad_norm": 9.24271011352539,
"learning_rate": 4.9712092130518236e-05,
"loss": 1.0248,
"step": 261
},
{
"epoch": 0.22657826462957625,
"grad_norm": 6.5738525390625,
"learning_rate": 4.990403071017274e-05,
"loss": 0.9891,
"step": 262
},
{
"epoch": 0.22744306716633036,
"grad_norm": 10.909134864807129,
"learning_rate": 5.009596928982726e-05,
"loss": 2.007,
"step": 263
},
{
"epoch": 0.22830786970308448,
"grad_norm": 7.512816905975342,
"learning_rate": 5.028790786948176e-05,
"loss": 1.6522,
"step": 264
},
{
"epoch": 0.22917267223983856,
"grad_norm": 4.3134446144104,
"learning_rate": 5.047984644913628e-05,
"loss": 0.8482,
"step": 265
},
{
"epoch": 0.23003747477659267,
"grad_norm": 6.679250240325928,
"learning_rate": 5.067178502879078e-05,
"loss": 0.7231,
"step": 266
},
{
"epoch": 0.23090227731334678,
"grad_norm": 8.060896873474121,
"learning_rate": 5.08637236084453e-05,
"loss": 0.9017,
"step": 267
},
{
"epoch": 0.2317670798501009,
"grad_norm": 10.473666191101074,
"learning_rate": 5.105566218809981e-05,
"loss": 1.2073,
"step": 268
},
{
"epoch": 0.232631882386855,
"grad_norm": 5.640207290649414,
"learning_rate": 5.124760076775432e-05,
"loss": 0.3825,
"step": 269
},
{
"epoch": 0.23349668492360912,
"grad_norm": 7.310571193695068,
"learning_rate": 5.143953934740883e-05,
"loss": 0.6634,
"step": 270
},
{
"epoch": 0.23436148746036323,
"grad_norm": 10.224222183227539,
"learning_rate": 5.163147792706334e-05,
"loss": 1.3564,
"step": 271
},
{
"epoch": 0.23522628999711734,
"grad_norm": 4.993323802947998,
"learning_rate": 5.182341650671785e-05,
"loss": 1.1294,
"step": 272
},
{
"epoch": 0.23609109253387142,
"grad_norm": 6.149577617645264,
"learning_rate": 5.201535508637236e-05,
"loss": 0.5599,
"step": 273
},
{
"epoch": 0.23695589507062553,
"grad_norm": 6.756112098693848,
"learning_rate": 5.220729366602687e-05,
"loss": 0.6844,
"step": 274
},
{
"epoch": 0.23782069760737964,
"grad_norm": 8.450921058654785,
"learning_rate": 5.2399232245681383e-05,
"loss": 0.7783,
"step": 275
},
{
"epoch": 0.23868550014413376,
"grad_norm": 7.2079267501831055,
"learning_rate": 5.2591170825335904e-05,
"loss": 1.101,
"step": 276
},
{
"epoch": 0.23955030268088787,
"grad_norm": 6.447202205657959,
"learning_rate": 5.2783109404990404e-05,
"loss": 0.9447,
"step": 277
},
{
"epoch": 0.24041510521764198,
"grad_norm": 10.80993366241455,
"learning_rate": 5.2975047984644924e-05,
"loss": 2.4452,
"step": 278
},
{
"epoch": 0.2412799077543961,
"grad_norm": 7.458428859710693,
"learning_rate": 5.3166986564299424e-05,
"loss": 1.2032,
"step": 279
},
{
"epoch": 0.2421447102911502,
"grad_norm": 11.762413024902344,
"learning_rate": 5.3358925143953944e-05,
"loss": 1.9775,
"step": 280
},
{
"epoch": 0.24300951282790428,
"grad_norm": 6.029952049255371,
"learning_rate": 5.3550863723608444e-05,
"loss": 0.523,
"step": 281
},
{
"epoch": 0.2438743153646584,
"grad_norm": 7.083131313323975,
"learning_rate": 5.3742802303262964e-05,
"loss": 0.6166,
"step": 282
},
{
"epoch": 0.2447391179014125,
"grad_norm": 8.343469619750977,
"learning_rate": 5.3934740882917464e-05,
"loss": 0.7902,
"step": 283
},
{
"epoch": 0.24560392043816662,
"grad_norm": 11.58956241607666,
"learning_rate": 5.4126679462571984e-05,
"loss": 1.1019,
"step": 284
},
{
"epoch": 0.24646872297492073,
"grad_norm": 6.451682090759277,
"learning_rate": 5.431861804222649e-05,
"loss": 1.1185,
"step": 285
},
{
"epoch": 0.24733352551167484,
"grad_norm": 8.293807983398438,
"learning_rate": 5.4510556621881004e-05,
"loss": 0.7051,
"step": 286
},
{
"epoch": 0.24819832804842895,
"grad_norm": 6.799464702606201,
"learning_rate": 5.470249520153551e-05,
"loss": 1.076,
"step": 287
},
{
"epoch": 0.24906313058518306,
"grad_norm": 6.457718849182129,
"learning_rate": 5.4894433781190025e-05,
"loss": 1.5065,
"step": 288
},
{
"epoch": 0.24992793312193715,
"grad_norm": 8.503544807434082,
"learning_rate": 5.508637236084453e-05,
"loss": 0.9986,
"step": 289
},
{
"epoch": 0.25079273565869126,
"grad_norm": 8.062347412109375,
"learning_rate": 5.5278310940499045e-05,
"loss": 1.1196,
"step": 290
},
{
"epoch": 0.2516575381954454,
"grad_norm": 5.3419508934021,
"learning_rate": 5.547024952015355e-05,
"loss": 0.7055,
"step": 291
},
{
"epoch": 0.2525223407321995,
"grad_norm": 3.2817585468292236,
"learning_rate": 5.5662188099808065e-05,
"loss": 0.2865,
"step": 292
},
{
"epoch": 0.25338714326895356,
"grad_norm": 8.452672004699707,
"learning_rate": 5.585412667946257e-05,
"loss": 0.6973,
"step": 293
},
{
"epoch": 0.2542519458057077,
"grad_norm": 9.172618865966797,
"learning_rate": 5.6046065259117085e-05,
"loss": 1.0347,
"step": 294
},
{
"epoch": 0.2551167483424618,
"grad_norm": 7.101957321166992,
"learning_rate": 5.623800383877159e-05,
"loss": 0.5065,
"step": 295
},
{
"epoch": 0.2559815508792159,
"grad_norm": 8.655692100524902,
"learning_rate": 5.6429942418426105e-05,
"loss": 0.7479,
"step": 296
},
{
"epoch": 0.25684635341597,
"grad_norm": 6.224137306213379,
"learning_rate": 5.662188099808061e-05,
"loss": 0.5214,
"step": 297
},
{
"epoch": 0.25771115595272415,
"grad_norm": 5.057961463928223,
"learning_rate": 5.6813819577735125e-05,
"loss": 0.4925,
"step": 298
},
{
"epoch": 0.25857595848947823,
"grad_norm": 5.989309787750244,
"learning_rate": 5.700575815738963e-05,
"loss": 0.9331,
"step": 299
},
{
"epoch": 0.25944076102623237,
"grad_norm": 5.4001336097717285,
"learning_rate": 5.7197696737044146e-05,
"loss": 0.4239,
"step": 300
},
{
"epoch": 0.26030556356298645,
"grad_norm": 8.392406463623047,
"learning_rate": 5.7389635316698666e-05,
"loss": 0.7426,
"step": 301
},
{
"epoch": 0.26117036609974054,
"grad_norm": 9.140869140625,
"learning_rate": 5.758157389635317e-05,
"loss": 1.292,
"step": 302
},
{
"epoch": 0.2620351686364947,
"grad_norm": 5.900636196136475,
"learning_rate": 5.7773512476007686e-05,
"loss": 1.1471,
"step": 303
},
{
"epoch": 0.26289997117324876,
"grad_norm": 2.76983904838562,
"learning_rate": 5.796545105566219e-05,
"loss": 0.5639,
"step": 304
},
{
"epoch": 0.2637647737100029,
"grad_norm": 8.212996482849121,
"learning_rate": 5.8157389635316706e-05,
"loss": 1.592,
"step": 305
},
{
"epoch": 0.264629576246757,
"grad_norm": 6.7358174324035645,
"learning_rate": 5.834932821497121e-05,
"loss": 0.6063,
"step": 306
},
{
"epoch": 0.2654943787835111,
"grad_norm": 9.422693252563477,
"learning_rate": 5.8541266794625726e-05,
"loss": 0.665,
"step": 307
},
{
"epoch": 0.2663591813202652,
"grad_norm": 10.346942901611328,
"learning_rate": 5.873320537428023e-05,
"loss": 0.7966,
"step": 308
},
{
"epoch": 0.2672239838570193,
"grad_norm": 8.950202941894531,
"learning_rate": 5.8925143953934746e-05,
"loss": 0.6255,
"step": 309
},
{
"epoch": 0.2680887863937734,
"grad_norm": 6.519852638244629,
"learning_rate": 5.911708253358925e-05,
"loss": 0.7197,
"step": 310
},
{
"epoch": 0.2689535889305275,
"grad_norm": 12.285760879516602,
"learning_rate": 5.9309021113243767e-05,
"loss": 2.22,
"step": 311
},
{
"epoch": 0.26981839146728165,
"grad_norm": 9.598986625671387,
"learning_rate": 5.950095969289827e-05,
"loss": 0.7472,
"step": 312
},
{
"epoch": 0.27068319400403573,
"grad_norm": 13.030138969421387,
"learning_rate": 5.969289827255279e-05,
"loss": 1.0278,
"step": 313
},
{
"epoch": 0.27154799654078987,
"grad_norm": 9.371500015258789,
"learning_rate": 5.9884836852207293e-05,
"loss": 0.6434,
"step": 314
},
{
"epoch": 0.27241279907754395,
"grad_norm": 7.387608528137207,
"learning_rate": 6.007677543186181e-05,
"loss": 0.4596,
"step": 315
},
{
"epoch": 0.2732776016142981,
"grad_norm": 6.994756698608398,
"learning_rate": 6.0268714011516314e-05,
"loss": 0.5547,
"step": 316
},
{
"epoch": 0.2741424041510522,
"grad_norm": 7.713170528411865,
"learning_rate": 6.046065259117083e-05,
"loss": 1.2906,
"step": 317
},
{
"epoch": 0.27500720668780626,
"grad_norm": 12.936992645263672,
"learning_rate": 6.0652591170825334e-05,
"loss": 2.2893,
"step": 318
},
{
"epoch": 0.2758720092245604,
"grad_norm": 12.210866928100586,
"learning_rate": 6.084452975047985e-05,
"loss": 2.0067,
"step": 319
},
{
"epoch": 0.2767368117613145,
"grad_norm": 9.767999649047852,
"learning_rate": 6.103646833013436e-05,
"loss": 1.0523,
"step": 320
},
{
"epoch": 0.2776016142980686,
"grad_norm": 10.349803924560547,
"learning_rate": 6.122840690978887e-05,
"loss": 1.425,
"step": 321
},
{
"epoch": 0.2784664168348227,
"grad_norm": 8.848223686218262,
"learning_rate": 6.142034548944337e-05,
"loss": 1.0846,
"step": 322
},
{
"epoch": 0.27933121937157684,
"grad_norm": 12.004369735717773,
"learning_rate": 6.16122840690979e-05,
"loss": 1.614,
"step": 323
},
{
"epoch": 0.28019602190833093,
"grad_norm": 4.841424465179443,
"learning_rate": 6.18042226487524e-05,
"loss": 0.831,
"step": 324
},
{
"epoch": 0.281060824445085,
"grad_norm": 10.002786636352539,
"learning_rate": 6.199616122840691e-05,
"loss": 0.8297,
"step": 325
},
{
"epoch": 0.28192562698183915,
"grad_norm": 6.301035404205322,
"learning_rate": 6.218809980806143e-05,
"loss": 0.5425,
"step": 326
},
{
"epoch": 0.28279042951859323,
"grad_norm": 5.8098626136779785,
"learning_rate": 6.238003838771593e-05,
"loss": 0.6583,
"step": 327
},
{
"epoch": 0.2836552320553474,
"grad_norm": 5.272045135498047,
"learning_rate": 6.257197696737045e-05,
"loss": 1.0148,
"step": 328
},
{
"epoch": 0.28452003459210146,
"grad_norm": 8.22673511505127,
"learning_rate": 6.276391554702495e-05,
"loss": 1.4798,
"step": 329
},
{
"epoch": 0.2853848371288556,
"grad_norm": 3.6933820247650146,
"learning_rate": 6.295585412667947e-05,
"loss": 0.3907,
"step": 330
},
{
"epoch": 0.2862496396656097,
"grad_norm": 9.97194766998291,
"learning_rate": 6.314779270633397e-05,
"loss": 1.2206,
"step": 331
},
{
"epoch": 0.2871144422023638,
"grad_norm": 3.41243577003479,
"learning_rate": 6.33397312859885e-05,
"loss": 0.6509,
"step": 332
},
{
"epoch": 0.2879792447391179,
"grad_norm": 5.184510231018066,
"learning_rate": 6.3531669865643e-05,
"loss": 0.5982,
"step": 333
},
{
"epoch": 0.288844047275872,
"grad_norm": 6.894106864929199,
"learning_rate": 6.372360844529751e-05,
"loss": 1.066,
"step": 334
},
{
"epoch": 0.2897088498126261,
"grad_norm": 6.806879997253418,
"learning_rate": 6.391554702495202e-05,
"loss": 0.6874,
"step": 335
},
{
"epoch": 0.2905736523493802,
"grad_norm": 4.7376933097839355,
"learning_rate": 6.410748560460654e-05,
"loss": 0.2232,
"step": 336
},
{
"epoch": 0.29143845488613435,
"grad_norm": 7.3895745277404785,
"learning_rate": 6.429942418426104e-05,
"loss": 0.8978,
"step": 337
},
{
"epoch": 0.29230325742288843,
"grad_norm": 4.52320671081543,
"learning_rate": 6.449136276391555e-05,
"loss": 0.5689,
"step": 338
},
{
"epoch": 0.29316805995964257,
"grad_norm": 10.309342384338379,
"learning_rate": 6.468330134357006e-05,
"loss": 1.1131,
"step": 339
},
{
"epoch": 0.29403286249639665,
"grad_norm": 7.698537826538086,
"learning_rate": 6.487523992322458e-05,
"loss": 0.4493,
"step": 340
},
{
"epoch": 0.2948976650331508,
"grad_norm": 9.31425952911377,
"learning_rate": 6.506717850287908e-05,
"loss": 0.5409,
"step": 341
},
{
"epoch": 0.2957624675699049,
"grad_norm": 3.6749117374420166,
"learning_rate": 6.525911708253359e-05,
"loss": 0.5921,
"step": 342
},
{
"epoch": 0.29662727010665896,
"grad_norm": 8.300640106201172,
"learning_rate": 6.54510556621881e-05,
"loss": 0.6657,
"step": 343
},
{
"epoch": 0.2974920726434131,
"grad_norm": 7.509027481079102,
"learning_rate": 6.564299424184262e-05,
"loss": 0.8345,
"step": 344
},
{
"epoch": 0.2983568751801672,
"grad_norm": 6.161888122558594,
"learning_rate": 6.583493282149712e-05,
"loss": 0.9418,
"step": 345
},
{
"epoch": 0.2983568751801672,
"eval_Qnli-dev-1024_cosine_accuracy": 0.7291666666666666,
"eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.8395562767982483,
"eval_Qnli-dev-1024_cosine_ap": 0.753054394869091,
"eval_Qnli-dev-1024_cosine_f1": 0.7216494845360825,
"eval_Qnli-dev-1024_cosine_f1_threshold": 0.8208398818969727,
"eval_Qnli-dev-1024_cosine_mcc": 0.44512380090846426,
"eval_Qnli-dev-1024_cosine_precision": 0.6730769230769231,
"eval_Qnli-dev-1024_cosine_recall": 0.7777777777777778,
"eval_Qnli-dev_cosine_accuracy": 0.7291666666666666,
"eval_Qnli-dev_cosine_accuracy_threshold": 0.7856455445289612,
"eval_Qnli-dev_cosine_ap": 0.7529763141762885,
"eval_Qnli-dev_cosine_f1": 0.7169811320754719,
"eval_Qnli-dev_cosine_f1_threshold": 0.7426920533180237,
"eval_Qnli-dev_cosine_mcc": 0.4079411028893153,
"eval_Qnli-dev_cosine_precision": 0.6229508196721312,
"eval_Qnli-dev_cosine_recall": 0.8444444444444444,
"eval_allNLI--triplets-1024_cosine_accuracy": 0.9270833134651184,
"eval_allNLI-triplets_cosine_accuracy": 0.9791666865348816,
"eval_global_dataset_loss": 0.6650346517562866,
"eval_global_dataset_runtime": 67.8938,
"eval_global_dataset_samples_per_second": 14.316,
"eval_global_dataset_steps_per_second": 0.309,
"eval_sequential_score": 0.9270833134651184,
"eval_sts-test-1024_pearson_cosine": 0.8520971782224942,
"eval_sts-test-1024_spearman_cosine": 0.894236476710775,
"eval_sts-test_pearson_cosine": 0.9080363785366253,
"eval_sts-test_spearman_cosine": 0.9193020252854658,
"step": 345
},
{
"epoch": 0.2992216777169213,
"grad_norm": 3.2981910705566406,
"learning_rate": 6.602687140115163e-05,
"loss": 0.3752,
"step": 346
},
{
"epoch": 0.3000864802536754,
"grad_norm": 5.650154113769531,
"learning_rate": 6.621880998080614e-05,
"loss": 0.362,
"step": 347
},
{
"epoch": 0.30095128279042954,
"grad_norm": 7.166718482971191,
"learning_rate": 6.641074856046066e-05,
"loss": 0.99,
"step": 348
},
{
"epoch": 0.3018160853271836,
"grad_norm": 5.6671295166015625,
"learning_rate": 6.660268714011516e-05,
"loss": 0.4309,
"step": 349
},
{
"epoch": 0.3026808878639377,
"grad_norm": 7.15688943862915,
"learning_rate": 6.679462571976968e-05,
"loss": 0.4101,
"step": 350
},
{
"epoch": 0.30354569040069185,
"grad_norm": 4.845415115356445,
"learning_rate": 6.698656429942419e-05,
"loss": 0.357,
"step": 351
},
{
"epoch": 0.30441049293744593,
"grad_norm": 6.766101360321045,
"learning_rate": 6.71785028790787e-05,
"loss": 0.4257,
"step": 352
},
{
"epoch": 0.30527529547420007,
"grad_norm": 9.900660514831543,
"learning_rate": 6.737044145873322e-05,
"loss": 0.6665,
"step": 353
},
{
"epoch": 0.30614009801095415,
"grad_norm": 4.632408142089844,
"learning_rate": 6.756238003838772e-05,
"loss": 0.4523,
"step": 354
},
{
"epoch": 0.3070049005477083,
"grad_norm": 9.697669982910156,
"learning_rate": 6.775431861804223e-05,
"loss": 1.4959,
"step": 355
},
{
"epoch": 0.3078697030844624,
"grad_norm": 9.970297813415527,
"learning_rate": 6.794625719769674e-05,
"loss": 0.833,
"step": 356
},
{
"epoch": 0.3087345056212165,
"grad_norm": 9.964993476867676,
"learning_rate": 6.813819577735126e-05,
"loss": 0.7928,
"step": 357
},
{
"epoch": 0.3095993081579706,
"grad_norm": 3.866377353668213,
"learning_rate": 6.833013435700576e-05,
"loss": 0.3277,
"step": 358
},
{
"epoch": 0.3104641106947247,
"grad_norm": 6.179882526397705,
"learning_rate": 6.852207293666027e-05,
"loss": 0.5336,
"step": 359
},
{
"epoch": 0.3113289132314788,
"grad_norm": 5.517486095428467,
"learning_rate": 6.871401151631478e-05,
"loss": 0.4663,
"step": 360
},
{
"epoch": 0.3121937157682329,
"grad_norm": 6.7118306159973145,
"learning_rate": 6.89059500959693e-05,
"loss": 0.5869,
"step": 361
},
{
"epoch": 0.31305851830498704,
"grad_norm": 8.203336715698242,
"learning_rate": 6.90978886756238e-05,
"loss": 0.6056,
"step": 362
},
{
"epoch": 0.3139233208417411,
"grad_norm": 4.762539863586426,
"learning_rate": 6.928982725527831e-05,
"loss": 0.5402,
"step": 363
},
{
"epoch": 0.31478812337849527,
"grad_norm": 5.39819860458374,
"learning_rate": 6.948176583493282e-05,
"loss": 1.046,
"step": 364
},
{
"epoch": 0.31565292591524935,
"grad_norm": 4.130873680114746,
"learning_rate": 6.967370441458734e-05,
"loss": 0.3102,
"step": 365
},
{
"epoch": 0.31651772845200343,
"grad_norm": 7.361220359802246,
"learning_rate": 6.986564299424184e-05,
"loss": 0.412,
"step": 366
},
{
"epoch": 0.31738253098875757,
"grad_norm": 7.686898708343506,
"learning_rate": 7.005758157389636e-05,
"loss": 0.5703,
"step": 367
},
{
"epoch": 0.31824733352551166,
"grad_norm": 10.829538345336914,
"learning_rate": 7.024952015355086e-05,
"loss": 1.6531,
"step": 368
},
{
"epoch": 0.3191121360622658,
"grad_norm": 5.71692419052124,
"learning_rate": 7.044145873320538e-05,
"loss": 0.4314,
"step": 369
},
{
"epoch": 0.3199769385990199,
"grad_norm": 8.669037818908691,
"learning_rate": 7.063339731285988e-05,
"loss": 0.7062,
"step": 370
},
{
"epoch": 0.320841741135774,
"grad_norm": 5.996104717254639,
"learning_rate": 7.08253358925144e-05,
"loss": 0.5788,
"step": 371
},
{
"epoch": 0.3217065436725281,
"grad_norm": 12.612412452697754,
"learning_rate": 7.10172744721689e-05,
"loss": 1.8529,
"step": 372
},
{
"epoch": 0.32257134620928224,
"grad_norm": 8.934858322143555,
"learning_rate": 7.120921305182342e-05,
"loss": 0.6606,
"step": 373
},
{
"epoch": 0.3234361487460363,
"grad_norm": 10.218025207519531,
"learning_rate": 7.140115163147793e-05,
"loss": 0.8089,
"step": 374
},
{
"epoch": 0.3243009512827904,
"grad_norm": 5.20566987991333,
"learning_rate": 7.159309021113245e-05,
"loss": 0.3905,
"step": 375
},
{
"epoch": 0.32516575381954455,
"grad_norm": 10.471417427062988,
"learning_rate": 7.178502879078695e-05,
"loss": 1.2417,
"step": 376
},
{
"epoch": 0.32603055635629863,
"grad_norm": 7.703388690948486,
"learning_rate": 7.197696737044146e-05,
"loss": 0.8738,
"step": 377
},
{
"epoch": 0.32689535889305277,
"grad_norm": 8.099038124084473,
"learning_rate": 7.216890595009598e-05,
"loss": 0.8544,
"step": 378
},
{
"epoch": 0.32776016142980685,
"grad_norm": 6.550043106079102,
"learning_rate": 7.236084452975049e-05,
"loss": 0.4667,
"step": 379
},
{
"epoch": 0.328624963966561,
"grad_norm": 10.672149658203125,
"learning_rate": 7.255278310940499e-05,
"loss": 0.8825,
"step": 380
},
{
"epoch": 0.3294897665033151,
"grad_norm": 7.584779262542725,
"learning_rate": 7.27447216890595e-05,
"loss": 0.6003,
"step": 381
},
{
"epoch": 0.33035456904006916,
"grad_norm": 5.818914890289307,
"learning_rate": 7.293666026871402e-05,
"loss": 0.4643,
"step": 382
},
{
"epoch": 0.3312193715768233,
"grad_norm": 6.871515274047852,
"learning_rate": 7.312859884836853e-05,
"loss": 0.5097,
"step": 383
},
{
"epoch": 0.3320841741135774,
"grad_norm": 3.9484200477600098,
"learning_rate": 7.332053742802303e-05,
"loss": 0.4679,
"step": 384
},
{
"epoch": 0.3329489766503315,
"grad_norm": 3.8606741428375244,
"learning_rate": 7.351247600767754e-05,
"loss": 0.3732,
"step": 385
},
{
"epoch": 0.3338137791870856,
"grad_norm": 10.65389347076416,
"learning_rate": 7.370441458733206e-05,
"loss": 0.9031,
"step": 386
},
{
"epoch": 0.33467858172383974,
"grad_norm": 10.56472396850586,
"learning_rate": 7.389635316698657e-05,
"loss": 0.6668,
"step": 387
},
{
"epoch": 0.3355433842605938,
"grad_norm": 9.798723220825195,
"learning_rate": 7.408829174664109e-05,
"loss": 0.7715,
"step": 388
},
{
"epoch": 0.33640818679734796,
"grad_norm": 8.35350227355957,
"learning_rate": 7.428023032629558e-05,
"loss": 0.8536,
"step": 389
},
{
"epoch": 0.33727298933410205,
"grad_norm": 7.99412727355957,
"learning_rate": 7.44721689059501e-05,
"loss": 0.9303,
"step": 390
},
{
"epoch": 0.33813779187085613,
"grad_norm": 8.098565101623535,
"learning_rate": 7.46641074856046e-05,
"loss": 0.3704,
"step": 391
},
{
"epoch": 0.33900259440761027,
"grad_norm": 7.83499002456665,
"learning_rate": 7.485604606525913e-05,
"loss": 0.3678,
"step": 392
},
{
"epoch": 0.33986739694436435,
"grad_norm": 9.846261978149414,
"learning_rate": 7.504798464491363e-05,
"loss": 1.6854,
"step": 393
},
{
"epoch": 0.3407321994811185,
"grad_norm": 10.261216163635254,
"learning_rate": 7.523992322456814e-05,
"loss": 0.7636,
"step": 394
},
{
"epoch": 0.3415970020178726,
"grad_norm": 5.547618389129639,
"learning_rate": 7.543186180422265e-05,
"loss": 0.3462,
"step": 395
},
{
"epoch": 0.3424618045546267,
"grad_norm": 6.500753402709961,
"learning_rate": 7.562380038387717e-05,
"loss": 0.644,
"step": 396
},
{
"epoch": 0.3433266070913808,
"grad_norm": 8.669839859008789,
"learning_rate": 7.581573896353167e-05,
"loss": 0.7317,
"step": 397
},
{
"epoch": 0.34419140962813494,
"grad_norm": 6.280559062957764,
"learning_rate": 7.600767754318618e-05,
"loss": 0.7023,
"step": 398
},
{
"epoch": 0.345056212164889,
"grad_norm": 7.725942611694336,
"learning_rate": 7.61996161228407e-05,
"loss": 0.7164,
"step": 399
},
{
"epoch": 0.3459210147016431,
"grad_norm": 7.478891849517822,
"learning_rate": 7.639155470249521e-05,
"loss": 0.4271,
"step": 400
},
{
"epoch": 0.34678581723839724,
"grad_norm": 4.877331256866455,
"learning_rate": 7.658349328214971e-05,
"loss": 0.7332,
"step": 401
},
{
"epoch": 0.3476506197751513,
"grad_norm": 8.025667190551758,
"learning_rate": 7.677543186180422e-05,
"loss": 0.3978,
"step": 402
},
{
"epoch": 0.34851542231190547,
"grad_norm": 7.804194450378418,
"learning_rate": 7.696737044145874e-05,
"loss": 0.5208,
"step": 403
},
{
"epoch": 0.34938022484865955,
"grad_norm": 5.8793230056762695,
"learning_rate": 7.715930902111325e-05,
"loss": 0.4889,
"step": 404
},
{
"epoch": 0.3502450273854137,
"grad_norm": 8.609319686889648,
"learning_rate": 7.735124760076777e-05,
"loss": 0.769,
"step": 405
},
{
"epoch": 0.35110982992216777,
"grad_norm": 6.56134033203125,
"learning_rate": 7.754318618042226e-05,
"loss": 0.3932,
"step": 406
},
{
"epoch": 0.35197463245892185,
"grad_norm": 8.588756561279297,
"learning_rate": 7.773512476007678e-05,
"loss": 0.5919,
"step": 407
},
{
"epoch": 0.352839434995676,
"grad_norm": 7.530106067657471,
"learning_rate": 7.792706333973129e-05,
"loss": 0.6037,
"step": 408
},
{
"epoch": 0.3537042375324301,
"grad_norm": 7.5281853675842285,
"learning_rate": 7.811900191938581e-05,
"loss": 0.4321,
"step": 409
},
{
"epoch": 0.3545690400691842,
"grad_norm": 8.16552448272705,
"learning_rate": 7.831094049904032e-05,
"loss": 1.1022,
"step": 410
},
{
"epoch": 0.3554338426059383,
"grad_norm": 8.752754211425781,
"learning_rate": 7.850287907869482e-05,
"loss": 0.5996,
"step": 411
},
{
"epoch": 0.35629864514269244,
"grad_norm": 7.659090995788574,
"learning_rate": 7.869481765834933e-05,
"loss": 0.5673,
"step": 412
},
{
"epoch": 0.3571634476794465,
"grad_norm": 6.884600639343262,
"learning_rate": 7.888675623800385e-05,
"loss": 0.3437,
"step": 413
},
{
"epoch": 0.35802825021620066,
"grad_norm": 5.328488349914551,
"learning_rate": 7.907869481765836e-05,
"loss": 0.519,
"step": 414
},
{
"epoch": 0.35889305275295474,
"grad_norm": 10.308977127075195,
"learning_rate": 7.927063339731286e-05,
"loss": 1.5373,
"step": 415
},
{
"epoch": 0.35975785528970883,
"grad_norm": 7.618837356567383,
"learning_rate": 7.946257197696737e-05,
"loss": 0.764,
"step": 416
},
{
"epoch": 0.36062265782646297,
"grad_norm": 8.787110328674316,
"learning_rate": 7.965451055662189e-05,
"loss": 0.6131,
"step": 417
},
{
"epoch": 0.36148746036321705,
"grad_norm": 6.432898998260498,
"learning_rate": 7.98464491362764e-05,
"loss": 0.6826,
"step": 418
},
{
"epoch": 0.3623522628999712,
"grad_norm": 8.762993812561035,
"learning_rate": 8.00383877159309e-05,
"loss": 0.9631,
"step": 419
},
{
"epoch": 0.3632170654367253,
"grad_norm": 5.939430236816406,
"learning_rate": 8.023032629558541e-05,
"loss": 0.4283,
"step": 420
},
{
"epoch": 0.3640818679734794,
"grad_norm": 8.092362403869629,
"learning_rate": 8.042226487523993e-05,
"loss": 1.2001,
"step": 421
},
{
"epoch": 0.3649466705102335,
"grad_norm": 7.594040870666504,
"learning_rate": 8.061420345489444e-05,
"loss": 0.4499,
"step": 422
},
{
"epoch": 0.3658114730469876,
"grad_norm": 12.614463806152344,
"learning_rate": 8.080614203454894e-05,
"loss": 1.4073,
"step": 423
},
{
"epoch": 0.3666762755837417,
"grad_norm": 6.807295322418213,
"learning_rate": 8.099808061420346e-05,
"loss": 0.8035,
"step": 424
},
{
"epoch": 0.3675410781204958,
"grad_norm": 3.6670141220092773,
"learning_rate": 8.119001919385797e-05,
"loss": 0.3207,
"step": 425
},
{
"epoch": 0.36840588065724994,
"grad_norm": 7.3801445960998535,
"learning_rate": 8.138195777351249e-05,
"loss": 0.4752,
"step": 426
},
{
"epoch": 0.369270683194004,
"grad_norm": 9.895638465881348,
"learning_rate": 8.157389635316698e-05,
"loss": 1.1256,
"step": 427
},
{
"epoch": 0.37013548573075816,
"grad_norm": 6.200985431671143,
"learning_rate": 8.17658349328215e-05,
"loss": 0.4226,
"step": 428
},
{
"epoch": 0.37100028826751225,
"grad_norm": 9.858406066894531,
"learning_rate": 8.195777351247601e-05,
"loss": 1.123,
"step": 429
},
{
"epoch": 0.3718650908042664,
"grad_norm": 7.274184703826904,
"learning_rate": 8.214971209213053e-05,
"loss": 0.4425,
"step": 430
},
{
"epoch": 0.37272989334102047,
"grad_norm": 4.712157249450684,
"learning_rate": 8.234165067178504e-05,
"loss": 0.4242,
"step": 431
},
{
"epoch": 0.37359469587777455,
"grad_norm": 7.515327453613281,
"learning_rate": 8.253358925143954e-05,
"loss": 1.0072,
"step": 432
},
{
"epoch": 0.3744594984145287,
"grad_norm": 3.97876238822937,
"learning_rate": 8.272552783109405e-05,
"loss": 0.241,
"step": 433
},
{
"epoch": 0.3753243009512828,
"grad_norm": 7.888240337371826,
"learning_rate": 8.291746641074857e-05,
"loss": 0.7359,
"step": 434
},
{
"epoch": 0.3761891034880369,
"grad_norm": 6.10671329498291,
"learning_rate": 8.310940499040308e-05,
"loss": 0.4583,
"step": 435
},
{
"epoch": 0.377053906024791,
"grad_norm": 6.102023601531982,
"learning_rate": 8.330134357005758e-05,
"loss": 0.9001,
"step": 436
},
{
"epoch": 0.37791870856154514,
"grad_norm": 7.122408390045166,
"learning_rate": 8.349328214971209e-05,
"loss": 0.4614,
"step": 437
},
{
"epoch": 0.3787835110982992,
"grad_norm": 9.432422637939453,
"learning_rate": 8.368522072936661e-05,
"loss": 1.238,
"step": 438
},
{
"epoch": 0.3796483136350533,
"grad_norm": 9.530061721801758,
"learning_rate": 8.387715930902112e-05,
"loss": 1.5289,
"step": 439
},
{
"epoch": 0.38051311617180744,
"grad_norm": 7.045010566711426,
"learning_rate": 8.406909788867562e-05,
"loss": 0.3283,
"step": 440
},
{
"epoch": 0.3813779187085615,
"grad_norm": 6.275206089019775,
"learning_rate": 8.426103646833013e-05,
"loss": 0.4147,
"step": 441
},
{
"epoch": 0.38224272124531566,
"grad_norm": 4.124218940734863,
"learning_rate": 8.445297504798465e-05,
"loss": 0.4956,
"step": 442
},
{
"epoch": 0.38310752378206975,
"grad_norm": 5.8184895515441895,
"learning_rate": 8.464491362763916e-05,
"loss": 0.5166,
"step": 443
},
{
"epoch": 0.3839723263188239,
"grad_norm": 2.6442999839782715,
"learning_rate": 8.483685220729366e-05,
"loss": 0.2486,
"step": 444
},
{
"epoch": 0.38483712885557797,
"grad_norm": 3.8425562381744385,
"learning_rate": 8.502879078694817e-05,
"loss": 0.4493,
"step": 445
},
{
"epoch": 0.3857019313923321,
"grad_norm": 9.125511169433594,
"learning_rate": 8.522072936660269e-05,
"loss": 1.0439,
"step": 446
},
{
"epoch": 0.3865667339290862,
"grad_norm": 9.67273998260498,
"learning_rate": 8.54126679462572e-05,
"loss": 1.249,
"step": 447
},
{
"epoch": 0.3874315364658403,
"grad_norm": 7.822050094604492,
"learning_rate": 8.560460652591172e-05,
"loss": 0.8329,
"step": 448
},
{
"epoch": 0.3882963390025944,
"grad_norm": 5.747166633605957,
"learning_rate": 8.579654510556623e-05,
"loss": 0.3256,
"step": 449
},
{
"epoch": 0.3891611415393485,
"grad_norm": 7.257145404815674,
"learning_rate": 8.598848368522073e-05,
"loss": 1.0333,
"step": 450
},
{
"epoch": 0.39002594407610264,
"grad_norm": 7.6516642570495605,
"learning_rate": 8.618042226487525e-05,
"loss": 0.3821,
"step": 451
},
{
"epoch": 0.3908907466128567,
"grad_norm": 6.943114757537842,
"learning_rate": 8.637236084452976e-05,
"loss": 0.4578,
"step": 452
},
{
"epoch": 0.39175554914961086,
"grad_norm": 6.90556526184082,
"learning_rate": 8.656429942418427e-05,
"loss": 0.6716,
"step": 453
},
{
"epoch": 0.39262035168636494,
"grad_norm": 5.005017280578613,
"learning_rate": 8.675623800383877e-05,
"loss": 0.2694,
"step": 454
},
{
"epoch": 0.393485154223119,
"grad_norm": 9.84821605682373,
"learning_rate": 8.694817658349329e-05,
"loss": 1.7739,
"step": 455
},
{
"epoch": 0.39434995675987317,
"grad_norm": 7.2032647132873535,
"learning_rate": 8.71401151631478e-05,
"loss": 0.7109,
"step": 456
},
{
"epoch": 0.39521475929662725,
"grad_norm": 10.030957221984863,
"learning_rate": 8.73320537428023e-05,
"loss": 0.5733,
"step": 457
},
{
"epoch": 0.3960795618333814,
"grad_norm": 3.6352131366729736,
"learning_rate": 8.752399232245681e-05,
"loss": 0.283,
"step": 458
},
{
"epoch": 0.39694436437013547,
"grad_norm": 3.4260525703430176,
"learning_rate": 8.771593090211133e-05,
"loss": 0.4214,
"step": 459
},
{
"epoch": 0.3978091669068896,
"grad_norm": 4.595706462860107,
"learning_rate": 8.790786948176584e-05,
"loss": 0.6332,
"step": 460
},
{
"epoch": 0.3978091669068896,
"eval_Qnli-dev-1024_cosine_accuracy": 0.75,
"eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.879416823387146,
"eval_Qnli-dev-1024_cosine_ap": 0.781841863547347,
"eval_Qnli-dev-1024_cosine_f1": 0.7346938775510203,
"eval_Qnli-dev-1024_cosine_f1_threshold": 0.8326764106750488,
"eval_Qnli-dev-1024_cosine_mcc": 0.4683019469005233,
"eval_Qnli-dev-1024_cosine_precision": 0.6792452830188679,
"eval_Qnli-dev-1024_cosine_recall": 0.8,
"eval_Qnli-dev_cosine_accuracy": 0.7291666666666666,
"eval_Qnli-dev_cosine_accuracy_threshold": 0.8109143972396851,
"eval_Qnli-dev_cosine_ap": 0.7568700790587495,
"eval_Qnli-dev_cosine_f1": 0.7346938775510203,
"eval_Qnli-dev_cosine_f1_threshold": 0.742037296295166,
"eval_Qnli-dev_cosine_mcc": 0.4683019469005233,
"eval_Qnli-dev_cosine_precision": 0.6792452830188679,
"eval_Qnli-dev_cosine_recall": 0.8,
"eval_allNLI--triplets-1024_cosine_accuracy": 0.9375,
"eval_allNLI-triplets_cosine_accuracy": 0.9791666865348816,
"eval_global_dataset_loss": 0.7834931015968323,
"eval_global_dataset_runtime": 68.0114,
"eval_global_dataset_samples_per_second": 14.292,
"eval_global_dataset_steps_per_second": 0.309,
"eval_sequential_score": 0.9375,
"eval_sts-test-1024_pearson_cosine": 0.8881975899886265,
"eval_sts-test-1024_spearman_cosine": 0.9114913957523785,
"eval_sts-test_pearson_cosine": 0.9096311897411768,
"eval_sts-test_spearman_cosine": 0.9200152476526354,
"step": 460
},
{
"epoch": 0.3986739694436437,
"grad_norm": 10.936285018920898,
"learning_rate": 8.809980806142035e-05,
"loss": 1.2458,
"step": 461
},
{
"epoch": 0.39953877198039783,
"grad_norm": 5.99333381652832,
"learning_rate": 8.829174664107485e-05,
"loss": 0.3104,
"step": 462
},
{
"epoch": 0.4004035745171519,
"grad_norm": 6.260789394378662,
"learning_rate": 8.848368522072937e-05,
"loss": 0.4634,
"step": 463
},
{
"epoch": 0.401268377053906,
"grad_norm": 4.397698879241943,
"learning_rate": 8.867562380038388e-05,
"loss": 0.5893,
"step": 464
},
{
"epoch": 0.40213317959066014,
"grad_norm": 4.650321960449219,
"learning_rate": 8.88675623800384e-05,
"loss": 0.377,
"step": 465
},
{
"epoch": 0.4029979821274142,
"grad_norm": 9.407013893127441,
"learning_rate": 8.905950095969289e-05,
"loss": 0.5403,
"step": 466
},
{
"epoch": 0.40386278466416836,
"grad_norm": 10.255672454833984,
"learning_rate": 8.925143953934741e-05,
"loss": 0.9454,
"step": 467
},
{
"epoch": 0.40472758720092245,
"grad_norm": 11.186202049255371,
"learning_rate": 8.944337811900192e-05,
"loss": 0.9292,
"step": 468
},
{
"epoch": 0.4055923897376766,
"grad_norm": 6.13421630859375,
"learning_rate": 8.963531669865644e-05,
"loss": 0.4132,
"step": 469
},
{
"epoch": 0.40645719227443067,
"grad_norm": 12.439327239990234,
"learning_rate": 8.982725527831093e-05,
"loss": 0.9805,
"step": 470
},
{
"epoch": 0.4073219948111848,
"grad_norm": 10.574874877929688,
"learning_rate": 9.001919385796545e-05,
"loss": 0.7784,
"step": 471
},
{
"epoch": 0.4081867973479389,
"grad_norm": 5.993617057800293,
"learning_rate": 9.021113243761996e-05,
"loss": 0.369,
"step": 472
},
{
"epoch": 0.409051599884693,
"grad_norm": 10.213888168334961,
"learning_rate": 9.040307101727448e-05,
"loss": 1.4911,
"step": 473
},
{
"epoch": 0.4099164024214471,
"grad_norm": 7.043622016906738,
"learning_rate": 9.059500959692899e-05,
"loss": 0.4223,
"step": 474
},
{
"epoch": 0.4107812049582012,
"grad_norm": 8.350674629211426,
"learning_rate": 9.07869481765835e-05,
"loss": 1.2959,
"step": 475
},
{
"epoch": 0.41164600749495533,
"grad_norm": 8.64110279083252,
"learning_rate": 9.097888675623801e-05,
"loss": 0.4928,
"step": 476
},
{
"epoch": 0.4125108100317094,
"grad_norm": 6.388255596160889,
"learning_rate": 9.117082533589252e-05,
"loss": 1.02,
"step": 477
},
{
"epoch": 0.41337561256846356,
"grad_norm": 4.513554096221924,
"learning_rate": 9.136276391554703e-05,
"loss": 0.5034,
"step": 478
},
{
"epoch": 0.41424041510521764,
"grad_norm": 10.509414672851562,
"learning_rate": 9.155470249520153e-05,
"loss": 1.6119,
"step": 479
},
{
"epoch": 0.4151052176419717,
"grad_norm": 5.085805416107178,
"learning_rate": 9.174664107485605e-05,
"loss": 0.59,
"step": 480
},
{
"epoch": 0.41597002017872586,
"grad_norm": 8.275995254516602,
"learning_rate": 9.193857965451056e-05,
"loss": 0.5589,
"step": 481
},
{
"epoch": 0.41683482271547995,
"grad_norm": 9.266075134277344,
"learning_rate": 9.213051823416508e-05,
"loss": 0.8402,
"step": 482
},
{
"epoch": 0.4176996252522341,
"grad_norm": 9.998162269592285,
"learning_rate": 9.232245681381957e-05,
"loss": 0.5293,
"step": 483
},
{
"epoch": 0.41856442778898817,
"grad_norm": 6.49897575378418,
"learning_rate": 9.25143953934741e-05,
"loss": 0.5911,
"step": 484
},
{
"epoch": 0.4194292303257423,
"grad_norm": 2.814267158508301,
"learning_rate": 9.27063339731286e-05,
"loss": 0.1819,
"step": 485
},
{
"epoch": 0.4202940328624964,
"grad_norm": 6.657732009887695,
"learning_rate": 9.289827255278312e-05,
"loss": 0.6963,
"step": 486
},
{
"epoch": 0.42115883539925053,
"grad_norm": 10.121885299682617,
"learning_rate": 9.309021113243761e-05,
"loss": 0.6004,
"step": 487
},
{
"epoch": 0.4220236379360046,
"grad_norm": 15.206952095031738,
"learning_rate": 9.328214971209214e-05,
"loss": 2.0825,
"step": 488
},
{
"epoch": 0.4228884404727587,
"grad_norm": 11.911534309387207,
"learning_rate": 9.347408829174664e-05,
"loss": 1.6674,
"step": 489
},
{
"epoch": 0.42375324300951284,
"grad_norm": 6.184067726135254,
"learning_rate": 9.366602687140116e-05,
"loss": 0.5685,
"step": 490
},
{
"epoch": 0.4246180455462669,
"grad_norm": 7.771515846252441,
"learning_rate": 9.385796545105567e-05,
"loss": 0.7835,
"step": 491
},
{
"epoch": 0.42548284808302106,
"grad_norm": 8.338656425476074,
"learning_rate": 9.404990403071018e-05,
"loss": 0.4613,
"step": 492
},
{
"epoch": 0.42634765061977514,
"grad_norm": 9.678628921508789,
"learning_rate": 9.424184261036468e-05,
"loss": 1.0492,
"step": 493
},
{
"epoch": 0.4272124531565293,
"grad_norm": 5.315983772277832,
"learning_rate": 9.44337811900192e-05,
"loss": 0.3512,
"step": 494
},
{
"epoch": 0.42807725569328337,
"grad_norm": 7.20918607711792,
"learning_rate": 9.462571976967371e-05,
"loss": 0.8227,
"step": 495
},
{
"epoch": 0.42894205823003745,
"grad_norm": 8.044875144958496,
"learning_rate": 9.481765834932822e-05,
"loss": 0.7849,
"step": 496
},
{
"epoch": 0.4298068607667916,
"grad_norm": 8.14607048034668,
"learning_rate": 9.500959692898272e-05,
"loss": 1.2823,
"step": 497
},
{
"epoch": 0.43067166330354567,
"grad_norm": 9.731268882751465,
"learning_rate": 9.520153550863724e-05,
"loss": 1.2799,
"step": 498
},
{
"epoch": 0.4315364658402998,
"grad_norm": 9.654071807861328,
"learning_rate": 9.539347408829176e-05,
"loss": 0.596,
"step": 499
},
{
"epoch": 0.4324012683770539,
"grad_norm": 9.026534080505371,
"learning_rate": 9.558541266794626e-05,
"loss": 0.6793,
"step": 500
},
{
"epoch": 0.43326607091380803,
"grad_norm": 7.325682163238525,
"learning_rate": 9.577735124760078e-05,
"loss": 0.575,
"step": 501
},
{
"epoch": 0.4341308734505621,
"grad_norm": 4.846238136291504,
"learning_rate": 9.596928982725528e-05,
"loss": 0.2631,
"step": 502
},
{
"epoch": 0.43499567598731625,
"grad_norm": 8.93980598449707,
"learning_rate": 9.61612284069098e-05,
"loss": 0.5173,
"step": 503
},
{
"epoch": 0.43586047852407034,
"grad_norm": 11.70151138305664,
"learning_rate": 9.63531669865643e-05,
"loss": 0.9963,
"step": 504
},
{
"epoch": 0.4367252810608244,
"grad_norm": 6.328804016113281,
"learning_rate": 9.654510556621882e-05,
"loss": 0.9315,
"step": 505
},
{
"epoch": 0.43759008359757856,
"grad_norm": 9.678471565246582,
"learning_rate": 9.673704414587332e-05,
"loss": 0.7878,
"step": 506
},
{
"epoch": 0.43845488613433264,
"grad_norm": 6.569301128387451,
"learning_rate": 9.692898272552784e-05,
"loss": 0.4346,
"step": 507
},
{
"epoch": 0.4393196886710868,
"grad_norm": 6.5204596519470215,
"learning_rate": 9.712092130518235e-05,
"loss": 0.7662,
"step": 508
},
{
"epoch": 0.44018449120784087,
"grad_norm": 8.459349632263184,
"learning_rate": 9.731285988483686e-05,
"loss": 0.5221,
"step": 509
},
{
"epoch": 0.441049293744595,
"grad_norm": 8.08749008178711,
"learning_rate": 9.750479846449136e-05,
"loss": 0.9803,
"step": 510
},
{
"epoch": 0.4419140962813491,
"grad_norm": 8.031821250915527,
"learning_rate": 9.769673704414588e-05,
"loss": 1.1605,
"step": 511
},
{
"epoch": 0.4427788988181032,
"grad_norm": 9.393692016601562,
"learning_rate": 9.788867562380039e-05,
"loss": 0.6801,
"step": 512
},
{
"epoch": 0.4436437013548573,
"grad_norm": 5.011040687561035,
"learning_rate": 9.80806142034549e-05,
"loss": 0.3935,
"step": 513
},
{
"epoch": 0.4445085038916114,
"grad_norm": 2.235301971435547,
"learning_rate": 9.82725527831094e-05,
"loss": 0.1377,
"step": 514
},
{
"epoch": 0.44537330642836553,
"grad_norm": 5.642356872558594,
"learning_rate": 9.846449136276392e-05,
"loss": 0.985,
"step": 515
},
{
"epoch": 0.4462381089651196,
"grad_norm": 9.386540412902832,
"learning_rate": 9.865642994241843e-05,
"loss": 0.7949,
"step": 516
},
{
"epoch": 0.44710291150187376,
"grad_norm": 5.782979965209961,
"learning_rate": 9.884836852207294e-05,
"loss": 0.5974,
"step": 517
},
{
"epoch": 0.44796771403862784,
"grad_norm": 5.323793888092041,
"learning_rate": 9.904030710172744e-05,
"loss": 0.6797,
"step": 518
},
{
"epoch": 0.448832516575382,
"grad_norm": 8.012255668640137,
"learning_rate": 9.923224568138196e-05,
"loss": 0.6953,
"step": 519
},
{
"epoch": 0.44969731911213606,
"grad_norm": 6.930400371551514,
"learning_rate": 9.942418426103647e-05,
"loss": 0.4729,
"step": 520
},
{
"epoch": 0.45056212164889015,
"grad_norm": 10.408514976501465,
"learning_rate": 9.961612284069098e-05,
"loss": 0.5509,
"step": 521
},
{
"epoch": 0.4514269241856443,
"grad_norm": 5.082659721374512,
"learning_rate": 9.980806142034548e-05,
"loss": 0.4549,
"step": 522
},
{
"epoch": 0.45229172672239837,
"grad_norm": 10.625167846679688,
"learning_rate": 0.0001,
"loss": 1.6151,
"step": 523
},
{
"epoch": 0.4531565292591525,
"grad_norm": 7.423165798187256,
"learning_rate": 9.999974430536151e-05,
"loss": 0.4466,
"step": 524
},
{
"epoch": 0.4540213317959066,
"grad_norm": 10.48806095123291,
"learning_rate": 9.999897722406126e-05,
"loss": 1.3489,
"step": 525
},
{
"epoch": 0.45488613433266073,
"grad_norm": 8.526479721069336,
"learning_rate": 9.999769876394478e-05,
"loss": 0.6699,
"step": 526
},
{
"epoch": 0.4557509368694148,
"grad_norm": 7.596718788146973,
"learning_rate": 9.999590893808788e-05,
"loss": 0.5189,
"step": 527
},
{
"epoch": 0.45661573940616895,
"grad_norm": 10.089831352233887,
"learning_rate": 9.999360776479651e-05,
"loss": 0.7617,
"step": 528
},
{
"epoch": 0.45748054194292304,
"grad_norm": 7.766354560852051,
"learning_rate": 9.999079526760659e-05,
"loss": 0.5148,
"step": 529
},
{
"epoch": 0.4583453444796771,
"grad_norm": 6.268951892852783,
"learning_rate": 9.998747147528374e-05,
"loss": 0.5564,
"step": 530
},
{
"epoch": 0.45921014701643126,
"grad_norm": 5.794777870178223,
"learning_rate": 9.9983636421823e-05,
"loss": 0.4038,
"step": 531
},
{
"epoch": 0.46007494955318534,
"grad_norm": 8.995209693908691,
"learning_rate": 9.997929014644845e-05,
"loss": 0.6968,
"step": 532
},
{
"epoch": 0.4609397520899395,
"grad_norm": 6.833916187286377,
"learning_rate": 9.997443269361289e-05,
"loss": 0.4393,
"step": 533
},
{
"epoch": 0.46180455462669356,
"grad_norm": 9.531277656555176,
"learning_rate": 9.996906411299726e-05,
"loss": 0.7228,
"step": 534
},
{
"epoch": 0.4626693571634477,
"grad_norm": 11.1766939163208,
"learning_rate": 9.996318445951032e-05,
"loss": 0.898,
"step": 535
},
{
"epoch": 0.4635341597002018,
"grad_norm": 4.982804298400879,
"learning_rate": 9.995679379328785e-05,
"loss": 0.3461,
"step": 536
},
{
"epoch": 0.46439896223695587,
"grad_norm": 3.0458362102508545,
"learning_rate": 9.994989217969224e-05,
"loss": 0.4753,
"step": 537
},
{
"epoch": 0.46526376477371,
"grad_norm": 7.552469253540039,
"learning_rate": 9.99424796893117e-05,
"loss": 0.4446,
"step": 538
},
{
"epoch": 0.4661285673104641,
"grad_norm": 10.52206039428711,
"learning_rate": 9.99345563979596e-05,
"loss": 0.8696,
"step": 539
},
{
"epoch": 0.46699336984721823,
"grad_norm": 9.044191360473633,
"learning_rate": 9.992612238667368e-05,
"loss": 1.0505,
"step": 540
},
{
"epoch": 0.4678581723839723,
"grad_norm": 7.528494834899902,
"learning_rate": 9.991717774171514e-05,
"loss": 0.3523,
"step": 541
},
{
"epoch": 0.46872297492072645,
"grad_norm": 8.00634765625,
"learning_rate": 9.990772255456797e-05,
"loss": 0.6452,
"step": 542
},
{
"epoch": 0.46958777745748054,
"grad_norm": 6.528989315032959,
"learning_rate": 9.989775692193773e-05,
"loss": 0.5005,
"step": 543
},
{
"epoch": 0.4704525799942347,
"grad_norm": 7.66871452331543,
"learning_rate": 9.988728094575082e-05,
"loss": 0.5364,
"step": 544
},
{
"epoch": 0.47131738253098876,
"grad_norm": 6.3178558349609375,
"learning_rate": 9.987629473315325e-05,
"loss": 0.6121,
"step": 545
},
{
"epoch": 0.47218218506774284,
"grad_norm": 3.713564872741699,
"learning_rate": 9.986479839650966e-05,
"loss": 0.2326,
"step": 546
},
{
"epoch": 0.473046987604497,
"grad_norm": 11.291918754577637,
"learning_rate": 9.98527920534021e-05,
"loss": 1.2339,
"step": 547
},
{
"epoch": 0.47391179014125107,
"grad_norm": 8.482532501220703,
"learning_rate": 9.984027582662892e-05,
"loss": 0.8196,
"step": 548
},
{
"epoch": 0.4747765926780052,
"grad_norm": 2.9724512100219727,
"learning_rate": 9.982724984420333e-05,
"loss": 0.2354,
"step": 549
},
{
"epoch": 0.4756413952147593,
"grad_norm": 9.461052894592285,
"learning_rate": 9.981371423935233e-05,
"loss": 0.6666,
"step": 550
},
{
"epoch": 0.4765061977515134,
"grad_norm": 5.076896667480469,
"learning_rate": 9.979966915051517e-05,
"loss": 0.3125,
"step": 551
},
{
"epoch": 0.4773710002882675,
"grad_norm": 8.995684623718262,
"learning_rate": 9.978511472134203e-05,
"loss": 0.7455,
"step": 552
},
{
"epoch": 0.4782358028250216,
"grad_norm": 2.971757173538208,
"learning_rate": 9.977005110069245e-05,
"loss": 0.32,
"step": 553
},
{
"epoch": 0.47910060536177573,
"grad_norm": 7.4964399337768555,
"learning_rate": 9.975447844263395e-05,
"loss": 0.9793,
"step": 554
},
{
"epoch": 0.4799654078985298,
"grad_norm": 6.13850736618042,
"learning_rate": 9.973839690644032e-05,
"loss": 0.7821,
"step": 555
},
{
"epoch": 0.48083021043528396,
"grad_norm": 8.951305389404297,
"learning_rate": 9.972180665659004e-05,
"loss": 0.6022,
"step": 556
},
{
"epoch": 0.48169501297203804,
"grad_norm": 6.228058338165283,
"learning_rate": 9.970470786276467e-05,
"loss": 0.8369,
"step": 557
},
{
"epoch": 0.4825598155087922,
"grad_norm": 10.346866607666016,
"learning_rate": 9.968710069984698e-05,
"loss": 0.8025,
"step": 558
},
{
"epoch": 0.48342461804554626,
"grad_norm": 2.9348461627960205,
"learning_rate": 9.966898534791926e-05,
"loss": 0.1631,
"step": 559
},
{
"epoch": 0.4842894205823004,
"grad_norm": 8.404128074645996,
"learning_rate": 9.965036199226147e-05,
"loss": 0.7858,
"step": 560
},
{
"epoch": 0.4851542231190545,
"grad_norm": 3.0906944274902344,
"learning_rate": 9.963123082334925e-05,
"loss": 0.3223,
"step": 561
},
{
"epoch": 0.48601902565580857,
"grad_norm": 4.46307373046875,
"learning_rate": 9.961159203685212e-05,
"loss": 0.2361,
"step": 562
},
{
"epoch": 0.4868838281925627,
"grad_norm": 7.367444038391113,
"learning_rate": 9.959144583363141e-05,
"loss": 1.2893,
"step": 563
},
{
"epoch": 0.4877486307293168,
"grad_norm": 4.720983505249023,
"learning_rate": 9.957079241973809e-05,
"loss": 0.5666,
"step": 564
},
{
"epoch": 0.48861343326607093,
"grad_norm": 5.1994829177856445,
"learning_rate": 9.95496320064109e-05,
"loss": 0.2794,
"step": 565
},
{
"epoch": 0.489478235802825,
"grad_norm": 8.899139404296875,
"learning_rate": 9.952796481007401e-05,
"loss": 0.6303,
"step": 566
},
{
"epoch": 0.49034303833957915,
"grad_norm": 4.118505477905273,
"learning_rate": 9.950579105233483e-05,
"loss": 0.1724,
"step": 567
},
{
"epoch": 0.49120784087633323,
"grad_norm": 6.728652477264404,
"learning_rate": 9.948311095998181e-05,
"loss": 0.662,
"step": 568
},
{
"epoch": 0.4920726434130873,
"grad_norm": 7.761811256408691,
"learning_rate": 9.945992476498209e-05,
"loss": 0.4051,
"step": 569
},
{
"epoch": 0.49293744594984146,
"grad_norm": 10.437024116516113,
"learning_rate": 9.943623270447909e-05,
"loss": 0.7596,
"step": 570
},
{
"epoch": 0.49380224848659554,
"grad_norm": 8.579437255859375,
"learning_rate": 9.94120350207901e-05,
"loss": 0.4666,
"step": 571
},
{
"epoch": 0.4946670510233497,
"grad_norm": 11.050808906555176,
"learning_rate": 9.938733196140386e-05,
"loss": 0.8923,
"step": 572
},
{
"epoch": 0.49553185356010376,
"grad_norm": 6.367518901824951,
"learning_rate": 9.936212377897798e-05,
"loss": 0.3065,
"step": 573
},
{
"epoch": 0.4963966560968579,
"grad_norm": 5.786684036254883,
"learning_rate": 9.933641073133631e-05,
"loss": 0.6386,
"step": 574
},
{
"epoch": 0.497261458633612,
"grad_norm": 3.814639091491699,
"learning_rate": 9.93101930814664e-05,
"loss": 0.2868,
"step": 575
},
{
"epoch": 0.497261458633612,
"eval_Qnli-dev-1024_cosine_accuracy": 0.6979166666666666,
"eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.8905854225158691,
"eval_Qnli-dev-1024_cosine_ap": 0.7246322873104885,
"eval_Qnli-dev-1024_cosine_f1": 0.6909090909090909,
"eval_Qnli-dev-1024_cosine_f1_threshold": 0.805380642414093,
"eval_Qnli-dev-1024_cosine_mcc": 0.33620907137955974,
"eval_Qnli-dev-1024_cosine_precision": 0.5846153846153846,
"eval_Qnli-dev-1024_cosine_recall": 0.8444444444444444,
"eval_Qnli-dev_cosine_accuracy": 0.6979166666666666,
"eval_Qnli-dev_cosine_accuracy_threshold": 0.8145653009414673,
"eval_Qnli-dev_cosine_ap": 0.7254668033788828,
"eval_Qnli-dev_cosine_f1": 0.7289719626168225,
"eval_Qnli-dev_cosine_f1_threshold": 0.7076575756072998,
"eval_Qnli-dev_cosine_mcc": 0.43373226132862797,
"eval_Qnli-dev_cosine_precision": 0.6290322580645161,
"eval_Qnli-dev_cosine_recall": 0.8666666666666667,
"eval_allNLI--triplets-1024_cosine_accuracy": 0.9479166865348816,
"eval_allNLI-triplets_cosine_accuracy": 0.96875,
"eval_global_dataset_loss": 0.5409160852432251,
"eval_global_dataset_runtime": 68.0813,
"eval_global_dataset_samples_per_second": 14.277,
"eval_global_dataset_steps_per_second": 0.308,
"eval_sequential_score": 0.9479166865348816,
"eval_sts-test-1024_pearson_cosine": 0.8845045352861245,
"eval_sts-test-1024_spearman_cosine": 0.9123160743907711,
"eval_sts-test_pearson_cosine": 0.9122846955191348,
"eval_sts-test_spearman_cosine": 0.920479051307594,
"step": 575
},
{
"epoch": 0.4981262611703661,
"grad_norm": 9.594101905822754,
"learning_rate": 9.928347109751677e-05,
"loss": 0.6007,
"step": 576
},
{
"epoch": 0.4989910637071202,
"grad_norm": 5.121261119842529,
"learning_rate": 9.925624505279411e-05,
"loss": 0.2853,
"step": 577
},
{
"epoch": 0.4998558662438743,
"grad_norm": 4.0415215492248535,
"learning_rate": 9.922851522576058e-05,
"loss": 0.3982,
"step": 578
},
{
"epoch": 0.5007206687806284,
"grad_norm": 11.199448585510254,
"learning_rate": 9.92002819000309e-05,
"loss": 0.7686,
"step": 579
},
{
"epoch": 0.5015854713173825,
"grad_norm": 5.71658992767334,
"learning_rate": 9.917154536436948e-05,
"loss": 0.3809,
"step": 580
},
{
"epoch": 0.5024502738541367,
"grad_norm": 10.616915702819824,
"learning_rate": 9.914230591268743e-05,
"loss": 1.1228,
"step": 581
},
{
"epoch": 0.5033150763908908,
"grad_norm": 4.5049028396606445,
"learning_rate": 9.911256384403961e-05,
"loss": 0.3737,
"step": 582
},
{
"epoch": 0.5041798789276448,
"grad_norm": 5.022185325622559,
"learning_rate": 9.90823194626215e-05,
"loss": 0.2141,
"step": 583
},
{
"epoch": 0.505044681464399,
"grad_norm": 10.781139373779297,
"learning_rate": 9.905157307776616e-05,
"loss": 1.2942,
"step": 584
},
{
"epoch": 0.5059094840011531,
"grad_norm": 10.425268173217773,
"learning_rate": 9.902032500394103e-05,
"loss": 1.4177,
"step": 585
},
{
"epoch": 0.5067742865379071,
"grad_norm": 6.989367961883545,
"learning_rate": 9.898857556074468e-05,
"loss": 0.5832,
"step": 586
},
{
"epoch": 0.5076390890746613,
"grad_norm": 6.156850814819336,
"learning_rate": 9.895632507290362e-05,
"loss": 0.4419,
"step": 587
},
{
"epoch": 0.5085038916114154,
"grad_norm": 6.66822624206543,
"learning_rate": 9.892357387026892e-05,
"loss": 0.5903,
"step": 588
},
{
"epoch": 0.5093686941481695,
"grad_norm": 8.24500560760498,
"learning_rate": 9.889032228781285e-05,
"loss": 0.44,
"step": 589
},
{
"epoch": 0.5102334966849236,
"grad_norm": 6.062635898590088,
"learning_rate": 9.88565706656255e-05,
"loss": 0.3002,
"step": 590
},
{
"epoch": 0.5110982992216777,
"grad_norm": 8.822070121765137,
"learning_rate": 9.882231934891119e-05,
"loss": 0.6883,
"step": 591
},
{
"epoch": 0.5119631017584318,
"grad_norm": 6.581031322479248,
"learning_rate": 9.878756868798504e-05,
"loss": 0.7068,
"step": 592
},
{
"epoch": 0.512827904295186,
"grad_norm": 6.801186561584473,
"learning_rate": 9.875231903826936e-05,
"loss": 0.5245,
"step": 593
},
{
"epoch": 0.51369270683194,
"grad_norm": 8.146296501159668,
"learning_rate": 9.871657076029003e-05,
"loss": 0.7089,
"step": 594
},
{
"epoch": 0.5145575093686942,
"grad_norm": 12.6628999710083,
"learning_rate": 9.868032421967275e-05,
"loss": 1.8026,
"step": 595
},
{
"epoch": 0.5154223119054483,
"grad_norm": 3.0164332389831543,
"learning_rate": 9.864357978713936e-05,
"loss": 0.2736,
"step": 596
},
{
"epoch": 0.5162871144422023,
"grad_norm": 3.916259527206421,
"learning_rate": 9.860633783850406e-05,
"loss": 0.3196,
"step": 597
},
{
"epoch": 0.5171519169789565,
"grad_norm": 8.493870735168457,
"learning_rate": 9.856859875466948e-05,
"loss": 0.7005,
"step": 598
},
{
"epoch": 0.5180167195157106,
"grad_norm": 8.802308082580566,
"learning_rate": 9.853036292162291e-05,
"loss": 0.4239,
"step": 599
},
{
"epoch": 0.5188815220524647,
"grad_norm": 10.11483383178711,
"learning_rate": 9.849163073043223e-05,
"loss": 0.5686,
"step": 600
},
{
"epoch": 0.5197463245892188,
"grad_norm": 7.787915229797363,
"learning_rate": 9.845240257724198e-05,
"loss": 0.6015,
"step": 601
},
{
"epoch": 0.5206111271259729,
"grad_norm": 3.49916410446167,
"learning_rate": 9.841267886326932e-05,
"loss": 0.1611,
"step": 602
},
{
"epoch": 0.521475929662727,
"grad_norm": 8.411331176757812,
"learning_rate": 9.837245999479985e-05,
"loss": 0.6458,
"step": 603
},
{
"epoch": 0.5223407321994811,
"grad_norm": 7.405316352844238,
"learning_rate": 9.833174638318356e-05,
"loss": 0.7173,
"step": 604
},
{
"epoch": 0.5232055347362352,
"grad_norm": 8.42251968383789,
"learning_rate": 9.829053844483052e-05,
"loss": 0.8808,
"step": 605
},
{
"epoch": 0.5240703372729894,
"grad_norm": 6.8583269119262695,
"learning_rate": 9.824883660120667e-05,
"loss": 0.625,
"step": 606
},
{
"epoch": 0.5249351398097435,
"grad_norm": 6.834749698638916,
"learning_rate": 9.820664127882957e-05,
"loss": 0.4378,
"step": 607
},
{
"epoch": 0.5257999423464975,
"grad_norm": 5.739812850952148,
"learning_rate": 9.81639529092639e-05,
"loss": 0.7798,
"step": 608
},
{
"epoch": 0.5266647448832517,
"grad_norm": 7.9455084800720215,
"learning_rate": 9.812077192911713e-05,
"loss": 0.6586,
"step": 609
},
{
"epoch": 0.5275295474200058,
"grad_norm": 7.959743499755859,
"learning_rate": 9.80770987800351e-05,
"loss": 0.8475,
"step": 610
},
{
"epoch": 0.5283943499567598,
"grad_norm": 5.485658168792725,
"learning_rate": 9.803293390869739e-05,
"loss": 0.4095,
"step": 611
},
{
"epoch": 0.529259152493514,
"grad_norm": 7.284278392791748,
"learning_rate": 9.798827776681286e-05,
"loss": 0.4946,
"step": 612
},
{
"epoch": 0.5301239550302681,
"grad_norm": 8.508416175842285,
"learning_rate": 9.79431308111149e-05,
"loss": 0.3962,
"step": 613
},
{
"epoch": 0.5309887575670222,
"grad_norm": 5.56104850769043,
"learning_rate": 9.789749350335693e-05,
"loss": 0.7191,
"step": 614
},
{
"epoch": 0.5318535601037763,
"grad_norm": 11.444177627563477,
"learning_rate": 9.785136631030755e-05,
"loss": 0.6589,
"step": 615
},
{
"epoch": 0.5327183626405304,
"grad_norm": 8.934037208557129,
"learning_rate": 9.780474970374578e-05,
"loss": 0.5603,
"step": 616
},
{
"epoch": 0.5335831651772845,
"grad_norm": 12.182479858398438,
"learning_rate": 9.775764416045628e-05,
"loss": 1.3667,
"step": 617
},
{
"epoch": 0.5344479677140386,
"grad_norm": 6.506429195404053,
"learning_rate": 9.771005016222446e-05,
"loss": 0.5623,
"step": 618
},
{
"epoch": 0.5353127702507927,
"grad_norm": 8.439187049865723,
"learning_rate": 9.766196819583149e-05,
"loss": 0.6174,
"step": 619
},
{
"epoch": 0.5361775727875469,
"grad_norm": 9.493589401245117,
"learning_rate": 9.761339875304945e-05,
"loss": 0.6462,
"step": 620
},
{
"epoch": 0.537042375324301,
"grad_norm": 2.347870111465454,
"learning_rate": 9.756434233063616e-05,
"loss": 0.1693,
"step": 621
},
{
"epoch": 0.537907177861055,
"grad_norm": 8.565069198608398,
"learning_rate": 9.751479943033019e-05,
"loss": 0.4887,
"step": 622
},
{
"epoch": 0.5387719803978092,
"grad_norm": 8.762991905212402,
"learning_rate": 9.746477055884571e-05,
"loss": 0.9039,
"step": 623
},
{
"epoch": 0.5396367829345633,
"grad_norm": 5.132269382476807,
"learning_rate": 9.741425622786728e-05,
"loss": 0.3159,
"step": 624
},
{
"epoch": 0.5405015854713174,
"grad_norm": 6.715843677520752,
"learning_rate": 9.736325695404464e-05,
"loss": 0.6409,
"step": 625
},
{
"epoch": 0.5413663880080715,
"grad_norm": 2.351118803024292,
"learning_rate": 9.731177325898746e-05,
"loss": 0.1413,
"step": 626
},
{
"epoch": 0.5422311905448256,
"grad_norm": 5.473691940307617,
"learning_rate": 9.725980566925989e-05,
"loss": 0.3963,
"step": 627
},
{
"epoch": 0.5430959930815797,
"grad_norm": 6.525996685028076,
"learning_rate": 9.72073547163753e-05,
"loss": 0.4283,
"step": 628
},
{
"epoch": 0.5439607956183338,
"grad_norm": 9.671774864196777,
"learning_rate": 9.71544209367908e-05,
"loss": 0.8147,
"step": 629
},
{
"epoch": 0.5448255981550879,
"grad_norm": 7.720305919647217,
"learning_rate": 9.710100487190173e-05,
"loss": 0.7238,
"step": 630
},
{
"epoch": 0.545690400691842,
"grad_norm": 6.962470531463623,
"learning_rate": 9.704710706803613e-05,
"loss": 0.3583,
"step": 631
},
{
"epoch": 0.5465552032285962,
"grad_norm": 7.1871819496154785,
"learning_rate": 9.699272807644921e-05,
"loss": 0.5934,
"step": 632
},
{
"epoch": 0.5474200057653502,
"grad_norm": 8.43585205078125,
"learning_rate": 9.693786845331761e-05,
"loss": 0.3339,
"step": 633
},
{
"epoch": 0.5482848083021044,
"grad_norm": 8.839116096496582,
"learning_rate": 9.68825287597338e-05,
"loss": 0.5551,
"step": 634
},
{
"epoch": 0.5491496108388585,
"grad_norm": 7.399514675140381,
"learning_rate": 9.68267095617003e-05,
"loss": 0.7277,
"step": 635
},
{
"epoch": 0.5500144133756125,
"grad_norm": 3.7421650886535645,
"learning_rate": 9.677041143012391e-05,
"loss": 0.3276,
"step": 636
},
{
"epoch": 0.5508792159123667,
"grad_norm": 6.863941669464111,
"learning_rate": 9.67136349408098e-05,
"loss": 0.3983,
"step": 637
},
{
"epoch": 0.5517440184491208,
"grad_norm": 8.192028999328613,
"learning_rate": 9.665638067445577e-05,
"loss": 0.5536,
"step": 638
},
{
"epoch": 0.5526088209858749,
"grad_norm": 6.802035331726074,
"learning_rate": 9.659864921664617e-05,
"loss": 0.4256,
"step": 639
},
{
"epoch": 0.553473623522629,
"grad_norm": 8.902397155761719,
"learning_rate": 9.654044115784594e-05,
"loss": 0.6132,
"step": 640
},
{
"epoch": 0.5543384260593831,
"grad_norm": 3.023282289505005,
"learning_rate": 9.648175709339465e-05,
"loss": 0.1601,
"step": 641
},
{
"epoch": 0.5552032285961372,
"grad_norm": 6.913763523101807,
"learning_rate": 9.642259762350032e-05,
"loss": 0.8637,
"step": 642
},
{
"epoch": 0.5560680311328913,
"grad_norm": 5.186830043792725,
"learning_rate": 9.636296335323334e-05,
"loss": 0.2678,
"step": 643
},
{
"epoch": 0.5569328336696454,
"grad_norm": 8.123047828674316,
"learning_rate": 9.63028548925202e-05,
"loss": 0.4715,
"step": 644
},
{
"epoch": 0.5577976362063995,
"grad_norm": 8.248505592346191,
"learning_rate": 9.624227285613736e-05,
"loss": 0.4066,
"step": 645
},
{
"epoch": 0.5586624387431537,
"grad_norm": 7.174196243286133,
"learning_rate": 9.618121786370491e-05,
"loss": 0.2985,
"step": 646
},
{
"epoch": 0.5595272412799077,
"grad_norm": 9.055746078491211,
"learning_rate": 9.61196905396802e-05,
"loss": 0.4818,
"step": 647
},
{
"epoch": 0.5603920438166619,
"grad_norm": 5.331139087677002,
"learning_rate": 9.605769151335151e-05,
"loss": 0.3297,
"step": 648
},
{
"epoch": 0.561256846353416,
"grad_norm": 4.492726802825928,
"learning_rate": 9.59952214188316e-05,
"loss": 0.2309,
"step": 649
},
{
"epoch": 0.56212164889017,
"grad_norm": 7.451852798461914,
"learning_rate": 9.593228089505117e-05,
"loss": 0.3733,
"step": 650
},
{
"epoch": 0.5629864514269242,
"grad_norm": 9.455964088439941,
"learning_rate": 9.586887058575243e-05,
"loss": 0.471,
"step": 651
},
{
"epoch": 0.5638512539636783,
"grad_norm": 4.70458984375,
"learning_rate": 9.58049911394824e-05,
"loss": 0.1841,
"step": 652
},
{
"epoch": 0.5647160565004324,
"grad_norm": 3.027376413345337,
"learning_rate": 9.574064320958637e-05,
"loss": 0.1042,
"step": 653
},
{
"epoch": 0.5655808590371865,
"grad_norm": 13.047475814819336,
"learning_rate": 9.567582745420117e-05,
"loss": 1.7486,
"step": 654
},
{
"epoch": 0.5664456615739406,
"grad_norm": 5.038949489593506,
"learning_rate": 9.561054453624842e-05,
"loss": 0.7092,
"step": 655
},
{
"epoch": 0.5673104641106947,
"grad_norm": 6.817296981811523,
"learning_rate": 9.554479512342784e-05,
"loss": 0.4515,
"step": 656
},
{
"epoch": 0.5681752666474489,
"grad_norm": 6.715672969818115,
"learning_rate": 9.54785798882103e-05,
"loss": 0.5267,
"step": 657
},
{
"epoch": 0.5690400691842029,
"grad_norm": 12.338273048400879,
"learning_rate": 9.541189950783104e-05,
"loss": 0.8779,
"step": 658
},
{
"epoch": 0.569904871720957,
"grad_norm": 6.969177722930908,
"learning_rate": 9.534475466428267e-05,
"loss": 0.3105,
"step": 659
},
{
"epoch": 0.5707696742577112,
"grad_norm": 4.153381824493408,
"learning_rate": 9.527714604430827e-05,
"loss": 0.2972,
"step": 660
},
{
"epoch": 0.5716344767944652,
"grad_norm": 9.585479736328125,
"learning_rate": 9.52090743393943e-05,
"loss": 0.7349,
"step": 661
},
{
"epoch": 0.5724992793312194,
"grad_norm": 8.285649299621582,
"learning_rate": 9.514054024576356e-05,
"loss": 0.3054,
"step": 662
},
{
"epoch": 0.5733640818679735,
"grad_norm": 8.23316764831543,
"learning_rate": 9.507154446436805e-05,
"loss": 0.3722,
"step": 663
},
{
"epoch": 0.5742288844047276,
"grad_norm": 3.4087507724761963,
"learning_rate": 9.500208770088183e-05,
"loss": 0.3515,
"step": 664
},
{
"epoch": 0.5750936869414817,
"grad_norm": 11.583375930786133,
"learning_rate": 9.49321706656938e-05,
"loss": 1.0321,
"step": 665
},
{
"epoch": 0.5759584894782358,
"grad_norm": 9.680198669433594,
"learning_rate": 9.48617940739004e-05,
"loss": 0.6996,
"step": 666
},
{
"epoch": 0.5768232920149899,
"grad_norm": 5.860654354095459,
"learning_rate": 9.479095864529828e-05,
"loss": 0.584,
"step": 667
},
{
"epoch": 0.577688094551744,
"grad_norm": 8.714286804199219,
"learning_rate": 9.471966510437704e-05,
"loss": 0.8377,
"step": 668
},
{
"epoch": 0.5785528970884981,
"grad_norm": 5.863884925842285,
"learning_rate": 9.464791418031172e-05,
"loss": 0.3194,
"step": 669
},
{
"epoch": 0.5794176996252522,
"grad_norm": 3.8105716705322266,
"learning_rate": 9.457570660695541e-05,
"loss": 0.2197,
"step": 670
},
{
"epoch": 0.5802825021620064,
"grad_norm": 7.818668842315674,
"learning_rate": 9.450304312283164e-05,
"loss": 0.5296,
"step": 671
},
{
"epoch": 0.5811473046987604,
"grad_norm": 3.5748655796051025,
"learning_rate": 9.442992447112697e-05,
"loss": 0.2199,
"step": 672
},
{
"epoch": 0.5820121072355146,
"grad_norm": 9.74962043762207,
"learning_rate": 9.435635139968328e-05,
"loss": 0.7576,
"step": 673
},
{
"epoch": 0.5828769097722687,
"grad_norm": 5.957652568817139,
"learning_rate": 9.428232466099018e-05,
"loss": 0.4388,
"step": 674
},
{
"epoch": 0.5837417123090227,
"grad_norm": 1.4129705429077148,
"learning_rate": 9.420784501217726e-05,
"loss": 0.0997,
"step": 675
},
{
"epoch": 0.5846065148457769,
"grad_norm": 6.296298503875732,
"learning_rate": 9.41329132150064e-05,
"loss": 0.4806,
"step": 676
},
{
"epoch": 0.585471317382531,
"grad_norm": 8.789826393127441,
"learning_rate": 9.405753003586395e-05,
"loss": 0.7328,
"step": 677
},
{
"epoch": 0.5863361199192851,
"grad_norm": 9.228763580322266,
"learning_rate": 9.39816962457529e-05,
"loss": 0.4772,
"step": 678
},
{
"epoch": 0.5872009224560392,
"grad_norm": 5.72409725189209,
"learning_rate": 9.3905412620285e-05,
"loss": 0.3285,
"step": 679
},
{
"epoch": 0.5880657249927933,
"grad_norm": 10.633530616760254,
"learning_rate": 9.382867993967281e-05,
"loss": 0.9213,
"step": 680
},
{
"epoch": 0.5889305275295474,
"grad_norm": 10.06709098815918,
"learning_rate": 9.375149898872172e-05,
"loss": 0.5335,
"step": 681
},
{
"epoch": 0.5897953300663016,
"grad_norm": 5.641694068908691,
"learning_rate": 9.367387055682197e-05,
"loss": 0.3178,
"step": 682
},
{
"epoch": 0.5906601326030556,
"grad_norm": 8.637955665588379,
"learning_rate": 9.359579543794048e-05,
"loss": 0.7194,
"step": 683
},
{
"epoch": 0.5915249351398097,
"grad_norm": 5.672209739685059,
"learning_rate": 9.351727443061283e-05,
"loss": 0.5559,
"step": 684
},
{
"epoch": 0.5923897376765639,
"grad_norm": 6.293837547302246,
"learning_rate": 9.343830833793505e-05,
"loss": 0.489,
"step": 685
},
{
"epoch": 0.5932545402133179,
"grad_norm": 5.788215160369873,
"learning_rate": 9.335889796755541e-05,
"loss": 0.2563,
"step": 686
},
{
"epoch": 0.594119342750072,
"grad_norm": 8.539923667907715,
"learning_rate": 9.327904413166615e-05,
"loss": 0.8217,
"step": 687
},
{
"epoch": 0.5949841452868262,
"grad_norm": 4.539181709289551,
"learning_rate": 9.319874764699515e-05,
"loss": 0.371,
"step": 688
},
{
"epoch": 0.5958489478235803,
"grad_norm": 4.926830291748047,
"learning_rate": 9.311800933479764e-05,
"loss": 0.3217,
"step": 689
},
{
"epoch": 0.5967137503603344,
"grad_norm": 8.856836318969727,
"learning_rate": 9.30368300208478e-05,
"loss": 0.5505,
"step": 690
},
{
"epoch": 0.5967137503603344,
"eval_Qnli-dev-1024_cosine_accuracy": 0.71875,
"eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.868374228477478,
"eval_Qnli-dev-1024_cosine_ap": 0.7082660274050915,
"eval_Qnli-dev-1024_cosine_f1": 0.6938775510204082,
"eval_Qnli-dev-1024_cosine_f1_threshold": 0.8440404534339905,
"eval_Qnli-dev-1024_cosine_mcc": 0.3843486566998693,
"eval_Qnli-dev-1024_cosine_precision": 0.6415094339622641,
"eval_Qnli-dev-1024_cosine_recall": 0.7555555555555555,
"eval_Qnli-dev_cosine_accuracy": 0.7291666666666666,
"eval_Qnli-dev_cosine_accuracy_threshold": 0.7942297458648682,
"eval_Qnli-dev_cosine_ap": 0.7468079642036429,
"eval_Qnli-dev_cosine_f1": 0.7222222222222222,
"eval_Qnli-dev_cosine_f1_threshold": 0.6964967250823975,
"eval_Qnli-dev_cosine_mcc": 0.41614558708189836,
"eval_Qnli-dev_cosine_precision": 0.6190476190476191,
"eval_Qnli-dev_cosine_recall": 0.8666666666666667,
"eval_allNLI--triplets-1024_cosine_accuracy": 0.9375,
"eval_allNLI-triplets_cosine_accuracy": 0.96875,
"eval_global_dataset_loss": 0.6163961887359619,
"eval_global_dataset_runtime": 68.0763,
"eval_global_dataset_samples_per_second": 14.278,
"eval_global_dataset_steps_per_second": 0.308,
"eval_sequential_score": 0.9375,
"eval_sts-test-1024_pearson_cosine": 0.8687143218667199,
"eval_sts-test-1024_spearman_cosine": 0.9060527968336128,
"eval_sts-test_pearson_cosine": 0.9081393663808583,
"eval_sts-test_spearman_cosine": 0.920736019932914,
"step": 690
},
{
"epoch": 0.5975785528970885,
"grad_norm": 7.396856784820557,
"learning_rate": 9.295521053543019e-05,
"loss": 0.5105,
"step": 691
},
{
"epoch": 0.5984433554338426,
"grad_norm": 8.075766563415527,
"learning_rate": 9.287315171333144e-05,
"loss": 0.5877,
"step": 692
},
{
"epoch": 0.5993081579705967,
"grad_norm": 8.688477516174316,
"learning_rate": 9.279065439383157e-05,
"loss": 0.7346,
"step": 693
},
{
"epoch": 0.6001729605073508,
"grad_norm": 5.448639869689941,
"learning_rate": 9.27077194206955e-05,
"loss": 0.3644,
"step": 694
},
{
"epoch": 0.601037763044105,
"grad_norm": 5.420974254608154,
"learning_rate": 9.262434764216428e-05,
"loss": 0.2205,
"step": 695
},
{
"epoch": 0.6019025655808591,
"grad_norm": 6.542895793914795,
"learning_rate": 9.254053991094666e-05,
"loss": 0.2832,
"step": 696
},
{
"epoch": 0.6027673681176131,
"grad_norm": 3.4809961318969727,
"learning_rate": 9.245629708421008e-05,
"loss": 0.145,
"step": 697
},
{
"epoch": 0.6036321706543673,
"grad_norm": 10.80398941040039,
"learning_rate": 9.237162002357214e-05,
"loss": 0.8998,
"step": 698
},
{
"epoch": 0.6044969731911214,
"grad_norm": 2.6544158458709717,
"learning_rate": 9.228650959509166e-05,
"loss": 0.1194,
"step": 699
},
{
"epoch": 0.6053617757278754,
"grad_norm": 4.051424980163574,
"learning_rate": 9.220096666925982e-05,
"loss": 0.1845,
"step": 700
},
{
"epoch": 0.6062265782646296,
"grad_norm": 10.206416130065918,
"learning_rate": 9.211499212099135e-05,
"loss": 0.6004,
"step": 701
},
{
"epoch": 0.6070913808013837,
"grad_norm": 11.007821083068848,
"learning_rate": 9.202858682961545e-05,
"loss": 0.5262,
"step": 702
},
{
"epoch": 0.6079561833381378,
"grad_norm": 9.616263389587402,
"learning_rate": 9.194175167886698e-05,
"loss": 1.3073,
"step": 703
},
{
"epoch": 0.6088209858748919,
"grad_norm": 7.583075523376465,
"learning_rate": 9.185448755687717e-05,
"loss": 0.4977,
"step": 704
},
{
"epoch": 0.609685788411646,
"grad_norm": 3.0454254150390625,
"learning_rate": 9.176679535616477e-05,
"loss": 0.1434,
"step": 705
},
{
"epoch": 0.6105505909484001,
"grad_norm": 5.383974075317383,
"learning_rate": 9.167867597362682e-05,
"loss": 0.1923,
"step": 706
},
{
"epoch": 0.6114153934851542,
"grad_norm": 10.157812118530273,
"learning_rate": 9.159013031052943e-05,
"loss": 0.5597,
"step": 707
},
{
"epoch": 0.6122801960219083,
"grad_norm": 12.371292114257812,
"learning_rate": 9.150115927249869e-05,
"loss": 0.8295,
"step": 708
},
{
"epoch": 0.6131449985586624,
"grad_norm": 9.299467086791992,
"learning_rate": 9.141176376951128e-05,
"loss": 0.5907,
"step": 709
},
{
"epoch": 0.6140098010954166,
"grad_norm": 7.16170597076416,
"learning_rate": 9.132194471588522e-05,
"loss": 0.5436,
"step": 710
},
{
"epoch": 0.6148746036321706,
"grad_norm": 6.266456127166748,
"learning_rate": 9.123170303027055e-05,
"loss": 0.309,
"step": 711
},
{
"epoch": 0.6157394061689248,
"grad_norm": 10.73092269897461,
"learning_rate": 9.114103963563985e-05,
"loss": 0.7257,
"step": 712
},
{
"epoch": 0.6166042087056789,
"grad_norm": 8.290569305419922,
"learning_rate": 9.104995545927893e-05,
"loss": 0.6665,
"step": 713
},
{
"epoch": 0.617469011242433,
"grad_norm": 6.256021499633789,
"learning_rate": 9.095845143277714e-05,
"loss": 0.281,
"step": 714
},
{
"epoch": 0.6183338137791871,
"grad_norm": 3.134965419769287,
"learning_rate": 9.086652849201807e-05,
"loss": 0.2241,
"step": 715
},
{
"epoch": 0.6191986163159412,
"grad_norm": 8.62253475189209,
"learning_rate": 9.077418757716988e-05,
"loss": 0.7341,
"step": 716
},
{
"epoch": 0.6200634188526953,
"grad_norm": 8.627028465270996,
"learning_rate": 9.06814296326756e-05,
"loss": 0.3946,
"step": 717
},
{
"epoch": 0.6209282213894494,
"grad_norm": 5.673067092895508,
"learning_rate": 9.05882556072436e-05,
"loss": 0.4353,
"step": 718
},
{
"epoch": 0.6217930239262035,
"grad_norm": 5.314984321594238,
"learning_rate": 9.049466645383784e-05,
"loss": 0.3919,
"step": 719
},
{
"epoch": 0.6226578264629576,
"grad_norm": 8.689918518066406,
"learning_rate": 9.040066312966811e-05,
"loss": 0.5087,
"step": 720
},
{
"epoch": 0.6235226289997118,
"grad_norm": 5.046836853027344,
"learning_rate": 9.030624659618023e-05,
"loss": 0.2345,
"step": 721
},
{
"epoch": 0.6243874315364658,
"grad_norm": 12.160417556762695,
"learning_rate": 9.021141781904627e-05,
"loss": 0.8855,
"step": 722
},
{
"epoch": 0.62525223407322,
"grad_norm": 9.182302474975586,
"learning_rate": 9.011617776815464e-05,
"loss": 0.7187,
"step": 723
},
{
"epoch": 0.6261170366099741,
"grad_norm": 6.717326641082764,
"learning_rate": 9.002052741760015e-05,
"loss": 0.5225,
"step": 724
},
{
"epoch": 0.6269818391467281,
"grad_norm": 11.271307945251465,
"learning_rate": 8.992446774567405e-05,
"loss": 0.9725,
"step": 725
},
{
"epoch": 0.6278466416834823,
"grad_norm": 12.319371223449707,
"learning_rate": 8.982799973485407e-05,
"loss": 0.6209,
"step": 726
},
{
"epoch": 0.6287114442202364,
"grad_norm": 7.424941062927246,
"learning_rate": 8.973112437179436e-05,
"loss": 0.478,
"step": 727
},
{
"epoch": 0.6295762467569905,
"grad_norm": 6.208258628845215,
"learning_rate": 8.963384264731533e-05,
"loss": 0.2833,
"step": 728
},
{
"epoch": 0.6304410492937446,
"grad_norm": 4.718559265136719,
"learning_rate": 8.95361555563936e-05,
"loss": 0.2356,
"step": 729
},
{
"epoch": 0.6313058518304987,
"grad_norm": 9.238673210144043,
"learning_rate": 8.943806409815181e-05,
"loss": 0.6937,
"step": 730
},
{
"epoch": 0.6321706543672528,
"grad_norm": 11.935426712036133,
"learning_rate": 8.933956927584832e-05,
"loss": 0.8793,
"step": 731
},
{
"epoch": 0.6330354569040069,
"grad_norm": 8.183321952819824,
"learning_rate": 8.924067209686709e-05,
"loss": 0.6845,
"step": 732
},
{
"epoch": 0.633900259440761,
"grad_norm": 4.494237422943115,
"learning_rate": 8.914137357270723e-05,
"loss": 0.2744,
"step": 733
},
{
"epoch": 0.6347650619775151,
"grad_norm": 10.111383438110352,
"learning_rate": 8.904167471897274e-05,
"loss": 0.8681,
"step": 734
},
{
"epoch": 0.6356298645142693,
"grad_norm": 10.407071113586426,
"learning_rate": 8.894157655536216e-05,
"loss": 1.0385,
"step": 735
},
{
"epoch": 0.6364946670510233,
"grad_norm": 6.472255706787109,
"learning_rate": 8.884108010565797e-05,
"loss": 0.2331,
"step": 736
},
{
"epoch": 0.6373594695877774,
"grad_norm": 4.348916530609131,
"learning_rate": 8.874018639771637e-05,
"loss": 0.3183,
"step": 737
},
{
"epoch": 0.6382242721245316,
"grad_norm": 3.087089776992798,
"learning_rate": 8.863889646345653e-05,
"loss": 0.1691,
"step": 738
},
{
"epoch": 0.6390890746612857,
"grad_norm": 5.743144512176514,
"learning_rate": 8.85372113388502e-05,
"loss": 0.4625,
"step": 739
},
{
"epoch": 0.6399538771980398,
"grad_norm": 4.561880111694336,
"learning_rate": 8.843513206391101e-05,
"loss": 0.2338,
"step": 740
},
{
"epoch": 0.6408186797347939,
"grad_norm": 10.266475677490234,
"learning_rate": 8.83326596826839e-05,
"loss": 1.1701,
"step": 741
},
{
"epoch": 0.641683482271548,
"grad_norm": 8.521928787231445,
"learning_rate": 8.822979524323441e-05,
"loss": 0.7673,
"step": 742
},
{
"epoch": 0.6425482848083021,
"grad_norm": 8.54457950592041,
"learning_rate": 8.812653979763795e-05,
"loss": 0.5481,
"step": 743
},
{
"epoch": 0.6434130873450562,
"grad_norm": 5.748913288116455,
"learning_rate": 8.802289440196908e-05,
"loss": 0.3357,
"step": 744
},
{
"epoch": 0.6442778898818103,
"grad_norm": 4.804452896118164,
"learning_rate": 8.791886011629068e-05,
"loss": 0.263,
"step": 745
},
{
"epoch": 0.6451426924185645,
"grad_norm": 3.707672119140625,
"learning_rate": 8.781443800464316e-05,
"loss": 0.1461,
"step": 746
},
{
"epoch": 0.6460074949553185,
"grad_norm": 7.357616901397705,
"learning_rate": 8.77096291350334e-05,
"loss": 0.3193,
"step": 747
},
{
"epoch": 0.6468722974920726,
"grad_norm": 4.722273349761963,
"learning_rate": 8.760443457942408e-05,
"loss": 0.2647,
"step": 748
},
{
"epoch": 0.6477371000288268,
"grad_norm": 5.43215799331665,
"learning_rate": 8.749885541372257e-05,
"loss": 0.2494,
"step": 749
},
{
"epoch": 0.6486019025655808,
"grad_norm": 4.395086765289307,
"learning_rate": 8.739289271776991e-05,
"loss": 0.1905,
"step": 750
},
{
"epoch": 0.649466705102335,
"grad_norm": 6.617416858673096,
"learning_rate": 8.728654757532984e-05,
"loss": 0.6302,
"step": 751
},
{
"epoch": 0.6503315076390891,
"grad_norm": 3.7228050231933594,
"learning_rate": 8.717982107407768e-05,
"loss": 0.3397,
"step": 752
},
{
"epoch": 0.6511963101758432,
"grad_norm": 9.654953002929688,
"learning_rate": 8.707271430558919e-05,
"loss": 0.6679,
"step": 753
},
{
"epoch": 0.6520611127125973,
"grad_norm": 4.019669532775879,
"learning_rate": 8.69652283653294e-05,
"loss": 0.3372,
"step": 754
},
{
"epoch": 0.6529259152493514,
"grad_norm": 7.510921478271484,
"learning_rate": 8.68573643526415e-05,
"loss": 0.6676,
"step": 755
},
{
"epoch": 0.6537907177861055,
"grad_norm": 13.126535415649414,
"learning_rate": 8.674912337073544e-05,
"loss": 1.2867,
"step": 756
},
{
"epoch": 0.6546555203228596,
"grad_norm": 9.412704467773438,
"learning_rate": 8.66405065266768e-05,
"loss": 0.8248,
"step": 757
},
{
"epoch": 0.6555203228596137,
"grad_norm": 6.785587787628174,
"learning_rate": 8.653151493137536e-05,
"loss": 0.4971,
"step": 758
},
{
"epoch": 0.6563851253963678,
"grad_norm": 12.77095890045166,
"learning_rate": 8.642214969957376e-05,
"loss": 1.4049,
"step": 759
},
{
"epoch": 0.657249927933122,
"grad_norm": 6.501046180725098,
"learning_rate": 8.631241194983616e-05,
"loss": 0.3086,
"step": 760
},
{
"epoch": 0.658114730469876,
"grad_norm": 6.871536731719971,
"learning_rate": 8.620230280453673e-05,
"loss": 0.6796,
"step": 761
},
{
"epoch": 0.6589795330066301,
"grad_norm": 6.746383190155029,
"learning_rate": 8.609182338984818e-05,
"loss": 0.4314,
"step": 762
},
{
"epoch": 0.6598443355433843,
"grad_norm": 4.454339504241943,
"learning_rate": 8.598097483573029e-05,
"loss": 0.2843,
"step": 763
},
{
"epoch": 0.6607091380801383,
"grad_norm": 5.15504789352417,
"learning_rate": 8.586975827591825e-05,
"loss": 0.4569,
"step": 764
},
{
"epoch": 0.6615739406168925,
"grad_norm": 6.545773506164551,
"learning_rate": 8.575817484791127e-05,
"loss": 0.3931,
"step": 765
},
{
"epoch": 0.6624387431536466,
"grad_norm": 4.9794511795043945,
"learning_rate": 8.564622569296063e-05,
"loss": 0.2155,
"step": 766
},
{
"epoch": 0.6633035456904007,
"grad_norm": 8.013479232788086,
"learning_rate": 8.553391195605833e-05,
"loss": 0.3245,
"step": 767
},
{
"epoch": 0.6641683482271548,
"grad_norm": 9.687097549438477,
"learning_rate": 8.542123478592518e-05,
"loss": 0.7824,
"step": 768
},
{
"epoch": 0.6650331507639089,
"grad_norm": 5.516420364379883,
"learning_rate": 8.530819533499909e-05,
"loss": 0.3537,
"step": 769
},
{
"epoch": 0.665897953300663,
"grad_norm": 6.398399353027344,
"learning_rate": 8.519479475942334e-05,
"loss": 0.2212,
"step": 770
},
{
"epoch": 0.6667627558374172,
"grad_norm": 6.814426898956299,
"learning_rate": 8.508103421903468e-05,
"loss": 0.5911,
"step": 771
},
{
"epoch": 0.6676275583741712,
"grad_norm": 6.5453410148620605,
"learning_rate": 8.496691487735156e-05,
"loss": 0.4524,
"step": 772
},
{
"epoch": 0.6684923609109253,
"grad_norm": 3.5740625858306885,
"learning_rate": 8.485243790156208e-05,
"loss": 0.2604,
"step": 773
},
{
"epoch": 0.6693571634476795,
"grad_norm": 12.454208374023438,
"learning_rate": 8.473760446251221e-05,
"loss": 0.8186,
"step": 774
},
{
"epoch": 0.6702219659844335,
"grad_norm": 5.322040557861328,
"learning_rate": 8.462241573469379e-05,
"loss": 0.4612,
"step": 775
},
{
"epoch": 0.6710867685211876,
"grad_norm": 7.373685359954834,
"learning_rate": 8.450687289623235e-05,
"loss": 0.5306,
"step": 776
},
{
"epoch": 0.6719515710579418,
"grad_norm": 11.016031265258789,
"learning_rate": 8.439097712887531e-05,
"loss": 1.0424,
"step": 777
},
{
"epoch": 0.6728163735946959,
"grad_norm": 8.017274856567383,
"learning_rate": 8.427472961797971e-05,
"loss": 0.473,
"step": 778
},
{
"epoch": 0.67368117613145,
"grad_norm": 5.788976669311523,
"learning_rate": 8.415813155250017e-05,
"loss": 0.2846,
"step": 779
},
{
"epoch": 0.6745459786682041,
"grad_norm": 4.2314558029174805,
"learning_rate": 8.404118412497666e-05,
"loss": 0.4083,
"step": 780
},
{
"epoch": 0.6754107812049582,
"grad_norm": 3.476349115371704,
"learning_rate": 8.392388853152245e-05,
"loss": 0.236,
"step": 781
},
{
"epoch": 0.6762755837417123,
"grad_norm": 10.38036823272705,
"learning_rate": 8.380624597181165e-05,
"loss": 0.6732,
"step": 782
},
{
"epoch": 0.6771403862784664,
"grad_norm": 7.326548099517822,
"learning_rate": 8.368825764906716e-05,
"loss": 0.6798,
"step": 783
},
{
"epoch": 0.6780051888152205,
"grad_norm": 8.5910062789917,
"learning_rate": 8.356992477004828e-05,
"loss": 0.75,
"step": 784
},
{
"epoch": 0.6788699913519747,
"grad_norm": 4.450828552246094,
"learning_rate": 8.345124854503825e-05,
"loss": 0.2198,
"step": 785
},
{
"epoch": 0.6797347938887287,
"grad_norm": 3.15915584564209,
"learning_rate": 8.33322301878321e-05,
"loss": 0.1629,
"step": 786
},
{
"epoch": 0.6805995964254828,
"grad_norm": 3.2538440227508545,
"learning_rate": 8.321287091572403e-05,
"loss": 0.1949,
"step": 787
},
{
"epoch": 0.681464398962237,
"grad_norm": 8.031615257263184,
"learning_rate": 8.309317194949509e-05,
"loss": 0.3901,
"step": 788
},
{
"epoch": 0.682329201498991,
"grad_norm": 2.7871859073638916,
"learning_rate": 8.297313451340064e-05,
"loss": 0.2184,
"step": 789
},
{
"epoch": 0.6831940040357452,
"grad_norm": 6.6741204261779785,
"learning_rate": 8.285275983515783e-05,
"loss": 0.3516,
"step": 790
},
{
"epoch": 0.6840588065724993,
"grad_norm": 9.924346923828125,
"learning_rate": 8.273204914593304e-05,
"loss": 0.9001,
"step": 791
},
{
"epoch": 0.6849236091092534,
"grad_norm": 2.0380783081054688,
"learning_rate": 8.261100368032934e-05,
"loss": 0.0729,
"step": 792
},
{
"epoch": 0.6857884116460075,
"grad_norm": 4.190455913543701,
"learning_rate": 8.248962467637378e-05,
"loss": 0.1484,
"step": 793
},
{
"epoch": 0.6866532141827616,
"grad_norm": 10.513288497924805,
"learning_rate": 8.236791337550478e-05,
"loss": 0.8013,
"step": 794
},
{
"epoch": 0.6875180167195157,
"grad_norm": 5.367727279663086,
"learning_rate": 8.22458710225594e-05,
"loss": 0.2315,
"step": 795
},
{
"epoch": 0.6883828192562699,
"grad_norm": 4.737613201141357,
"learning_rate": 8.21234988657607e-05,
"loss": 0.2135,
"step": 796
},
{
"epoch": 0.6892476217930239,
"grad_norm": 7.230178356170654,
"learning_rate": 8.20007981567048e-05,
"loss": 0.6123,
"step": 797
},
{
"epoch": 0.690112424329778,
"grad_norm": 5.188995361328125,
"learning_rate": 8.18777701503483e-05,
"loss": 0.2533,
"step": 798
},
{
"epoch": 0.6909772268665322,
"grad_norm": 9.257750511169434,
"learning_rate": 8.175441610499522e-05,
"loss": 0.6212,
"step": 799
},
{
"epoch": 0.6918420294032862,
"grad_norm": 1.5883065462112427,
"learning_rate": 8.163073728228427e-05,
"loss": 0.0883,
"step": 800
},
{
"epoch": 0.6927068319400403,
"grad_norm": 8.530162811279297,
"learning_rate": 8.150673494717597e-05,
"loss": 0.3946,
"step": 801
},
{
"epoch": 0.6935716344767945,
"grad_norm": 7.668551445007324,
"learning_rate": 8.138241036793958e-05,
"loss": 0.4277,
"step": 802
},
{
"epoch": 0.6944364370135486,
"grad_norm": 8.265761375427246,
"learning_rate": 8.125776481614024e-05,
"loss": 0.5575,
"step": 803
},
{
"epoch": 0.6953012395503027,
"grad_norm": 7.973784446716309,
"learning_rate": 8.113279956662594e-05,
"loss": 0.4164,
"step": 804
},
{
"epoch": 0.6961660420870568,
"grad_norm": 4.912955284118652,
"learning_rate": 8.100751589751442e-05,
"loss": 0.1826,
"step": 805
},
{
"epoch": 0.6961660420870568,
"eval_Qnli-dev-1024_cosine_accuracy": 0.71875,
"eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.8731638193130493,
"eval_Qnli-dev-1024_cosine_ap": 0.724535579920194,
"eval_Qnli-dev-1024_cosine_f1": 0.7037037037037037,
"eval_Qnli-dev-1024_cosine_f1_threshold": 0.781539261341095,
"eval_Qnli-dev-1024_cosine_mcc": 0.3721962181491566,
"eval_Qnli-dev-1024_cosine_precision": 0.6031746031746031,
"eval_Qnli-dev-1024_cosine_recall": 0.8444444444444444,
"eval_Qnli-dev_cosine_accuracy": 0.7291666666666666,
"eval_Qnli-dev_cosine_accuracy_threshold": 0.778366208076477,
"eval_Qnli-dev_cosine_ap": 0.7396468214578549,
"eval_Qnli-dev_cosine_f1": 0.7142857142857142,
"eval_Qnli-dev_cosine_f1_threshold": 0.7083452939987183,
"eval_Qnli-dev_cosine_mcc": 0.4263253018001963,
"eval_Qnli-dev_cosine_precision": 0.660377358490566,
"eval_Qnli-dev_cosine_recall": 0.7777777777777778,
"eval_allNLI--triplets-1024_cosine_accuracy": 0.9583333134651184,
"eval_allNLI-triplets_cosine_accuracy": 0.9791666865348816,
"eval_global_dataset_loss": 0.6504772901535034,
"eval_global_dataset_runtime": 67.87,
"eval_global_dataset_samples_per_second": 14.321,
"eval_global_dataset_steps_per_second": 0.309,
"eval_sequential_score": 0.9583333134651184,
"eval_sts-test-1024_pearson_cosine": 0.8671751973788917,
"eval_sts-test-1024_spearman_cosine": 0.9100830925358703,
"eval_sts-test_pearson_cosine": 0.9084514358185803,
"eval_sts-test_spearman_cosine": 0.9218648677325396,
"step": 805
},
{
"epoch": 0.6970308446238109,
"grad_norm": 6.679697513580322,
"learning_rate": 8.08819150901802e-05,
"loss": 0.1903,
"step": 806
},
{
"epoch": 0.697895647160565,
"grad_norm": 5.046963214874268,
"learning_rate": 8.075599842924139e-05,
"loss": 0.2428,
"step": 807
},
{
"epoch": 0.6987604496973191,
"grad_norm": 2.3785789012908936,
"learning_rate": 8.06297672025466e-05,
"loss": 0.0856,
"step": 808
},
{
"epoch": 0.6996252522340732,
"grad_norm": 3.804054021835327,
"learning_rate": 8.050322270116174e-05,
"loss": 0.1363,
"step": 809
},
{
"epoch": 0.7004900547708274,
"grad_norm": 7.834051132202148,
"learning_rate": 8.037636621935685e-05,
"loss": 0.3135,
"step": 810
},
{
"epoch": 0.7013548573075814,
"grad_norm": 9.44914722442627,
"learning_rate": 8.02491990545928e-05,
"loss": 0.758,
"step": 811
},
{
"epoch": 0.7022196598443355,
"grad_norm": 4.096058368682861,
"learning_rate": 8.012172250750807e-05,
"loss": 0.3242,
"step": 812
},
{
"epoch": 0.7030844623810897,
"grad_norm": 9.295559883117676,
"learning_rate": 7.999393788190548e-05,
"loss": 0.5513,
"step": 813
},
{
"epoch": 0.7039492649178437,
"grad_norm": 8.359066009521484,
"learning_rate": 7.986584648473874e-05,
"loss": 0.8495,
"step": 814
},
{
"epoch": 0.7048140674545978,
"grad_norm": 11.649872779846191,
"learning_rate": 7.973744962609921e-05,
"loss": 0.6741,
"step": 815
},
{
"epoch": 0.705678869991352,
"grad_norm": 7.094292163848877,
"learning_rate": 7.960874861920242e-05,
"loss": 0.3723,
"step": 816
},
{
"epoch": 0.7065436725281061,
"grad_norm": 7.240077495574951,
"learning_rate": 7.947974478037468e-05,
"loss": 0.5434,
"step": 817
},
{
"epoch": 0.7074084750648602,
"grad_norm": 2.250290632247925,
"learning_rate": 7.935043942903955e-05,
"loss": 0.1101,
"step": 818
},
{
"epoch": 0.7082732776016143,
"grad_norm": 4.252150058746338,
"learning_rate": 7.922083388770447e-05,
"loss": 0.1888,
"step": 819
},
{
"epoch": 0.7091380801383684,
"grad_norm": 4.072676181793213,
"learning_rate": 7.90909294819471e-05,
"loss": 0.2415,
"step": 820
},
{
"epoch": 0.7100028826751225,
"grad_norm": 9.136191368103027,
"learning_rate": 7.896072754040186e-05,
"loss": 0.4492,
"step": 821
},
{
"epoch": 0.7108676852118766,
"grad_norm": 10.30457878112793,
"learning_rate": 7.883022939474626e-05,
"loss": 0.9007,
"step": 822
},
{
"epoch": 0.7117324877486307,
"grad_norm": 5.737984657287598,
"learning_rate": 7.869943637968738e-05,
"loss": 0.5574,
"step": 823
},
{
"epoch": 0.7125972902853849,
"grad_norm": 9.1240234375,
"learning_rate": 7.85683498329481e-05,
"loss": 0.9687,
"step": 824
},
{
"epoch": 0.7134620928221389,
"grad_norm": 8.149517059326172,
"learning_rate": 7.843697109525352e-05,
"loss": 0.8161,
"step": 825
},
{
"epoch": 0.714326895358893,
"grad_norm": 10.62049674987793,
"learning_rate": 7.830530151031719e-05,
"loss": 0.6275,
"step": 826
},
{
"epoch": 0.7151916978956472,
"grad_norm": 4.933554172515869,
"learning_rate": 7.817334242482738e-05,
"loss": 0.361,
"step": 827
},
{
"epoch": 0.7160565004324013,
"grad_norm": 4.892520427703857,
"learning_rate": 7.804109518843334e-05,
"loss": 0.2424,
"step": 828
},
{
"epoch": 0.7169213029691553,
"grad_norm": 8.320906639099121,
"learning_rate": 7.790856115373142e-05,
"loss": 0.7132,
"step": 829
},
{
"epoch": 0.7177861055059095,
"grad_norm": 8.682563781738281,
"learning_rate": 7.77757416762513e-05,
"loss": 0.5795,
"step": 830
},
{
"epoch": 0.7186509080426636,
"grad_norm": 1.6696056127548218,
"learning_rate": 7.764263811444215e-05,
"loss": 0.0414,
"step": 831
},
{
"epoch": 0.7195157105794177,
"grad_norm": 10.681838989257812,
"learning_rate": 7.75092518296586e-05,
"loss": 0.8467,
"step": 832
},
{
"epoch": 0.7203805131161718,
"grad_norm": 5.933515548706055,
"learning_rate": 7.737558418614699e-05,
"loss": 0.3639,
"step": 833
},
{
"epoch": 0.7212453156529259,
"grad_norm": 3.935758352279663,
"learning_rate": 7.724163655103131e-05,
"loss": 0.3737,
"step": 834
},
{
"epoch": 0.7221101181896801,
"grad_norm": 9.584526062011719,
"learning_rate": 7.710741029429926e-05,
"loss": 0.6802,
"step": 835
},
{
"epoch": 0.7229749207264341,
"grad_norm": 13.70799446105957,
"learning_rate": 7.697290678878819e-05,
"loss": 1.0565,
"step": 836
},
{
"epoch": 0.7238397232631882,
"grad_norm": 8.689953804016113,
"learning_rate": 7.683812741017112e-05,
"loss": 0.5841,
"step": 837
},
{
"epoch": 0.7247045257999424,
"grad_norm": 13.601666450500488,
"learning_rate": 7.670307353694262e-05,
"loss": 1.0172,
"step": 838
},
{
"epoch": 0.7255693283366964,
"grad_norm": 3.734889507293701,
"learning_rate": 7.656774655040472e-05,
"loss": 0.3109,
"step": 839
},
{
"epoch": 0.7264341308734505,
"grad_norm": 10.951227188110352,
"learning_rate": 7.643214783465286e-05,
"loss": 0.6902,
"step": 840
},
{
"epoch": 0.7272989334102047,
"grad_norm": 2.3853259086608887,
"learning_rate": 7.62962787765616e-05,
"loss": 0.1287,
"step": 841
},
{
"epoch": 0.7281637359469588,
"grad_norm": 10.43367862701416,
"learning_rate": 7.616014076577054e-05,
"loss": 0.6679,
"step": 842
},
{
"epoch": 0.7290285384837129,
"grad_norm": 5.164660453796387,
"learning_rate": 7.602373519467005e-05,
"loss": 0.3239,
"step": 843
},
{
"epoch": 0.729893341020467,
"grad_norm": 6.129587650299072,
"learning_rate": 7.588706345838705e-05,
"loss": 0.1646,
"step": 844
},
{
"epoch": 0.7307581435572211,
"grad_norm": 5.64245080947876,
"learning_rate": 7.575012695477076e-05,
"loss": 0.3594,
"step": 845
},
{
"epoch": 0.7316229460939752,
"grad_norm": 5.245384216308594,
"learning_rate": 7.561292708437838e-05,
"loss": 0.2795,
"step": 846
},
{
"epoch": 0.7324877486307293,
"grad_norm": 6.762210369110107,
"learning_rate": 7.547546525046073e-05,
"loss": 0.3268,
"step": 847
},
{
"epoch": 0.7333525511674834,
"grad_norm": 9.87009048461914,
"learning_rate": 7.533774285894798e-05,
"loss": 0.8067,
"step": 848
},
{
"epoch": 0.7342173537042376,
"grad_norm": 4.286474704742432,
"learning_rate": 7.519976131843522e-05,
"loss": 0.3708,
"step": 849
},
{
"epoch": 0.7350821562409916,
"grad_norm": 9.59669303894043,
"learning_rate": 7.506152204016807e-05,
"loss": 0.5467,
"step": 850
},
{
"epoch": 0.7359469587777457,
"grad_norm": 3.928433895111084,
"learning_rate": 7.492302643802821e-05,
"loss": 0.15,
"step": 851
},
{
"epoch": 0.7368117613144999,
"grad_norm": 7.317601203918457,
"learning_rate": 7.478427592851893e-05,
"loss": 0.4525,
"step": 852
},
{
"epoch": 0.737676563851254,
"grad_norm": 6.541726589202881,
"learning_rate": 7.464527193075073e-05,
"loss": 0.3871,
"step": 853
},
{
"epoch": 0.738541366388008,
"grad_norm": 12.070144653320312,
"learning_rate": 7.450601586642664e-05,
"loss": 0.8351,
"step": 854
},
{
"epoch": 0.7394061689247622,
"grad_norm": 8.084358215332031,
"learning_rate": 7.436650915982785e-05,
"loss": 0.3939,
"step": 855
},
{
"epoch": 0.7402709714615163,
"grad_norm": 6.941904067993164,
"learning_rate": 7.422675323779907e-05,
"loss": 0.4311,
"step": 856
},
{
"epoch": 0.7411357739982704,
"grad_norm": 8.018699645996094,
"learning_rate": 7.408674952973382e-05,
"loss": 0.4675,
"step": 857
},
{
"epoch": 0.7420005765350245,
"grad_norm": 7.949825763702393,
"learning_rate": 7.394649946756004e-05,
"loss": 0.5963,
"step": 858
},
{
"epoch": 0.7428653790717786,
"grad_norm": 6.355823040008545,
"learning_rate": 7.38060044857253e-05,
"loss": 0.3415,
"step": 859
},
{
"epoch": 0.7437301816085328,
"grad_norm": 7.31845760345459,
"learning_rate": 7.366526602118214e-05,
"loss": 0.3599,
"step": 860
},
{
"epoch": 0.7445949841452868,
"grad_norm": 4.008370876312256,
"learning_rate": 7.352428551337338e-05,
"loss": 0.3354,
"step": 861
},
{
"epoch": 0.7454597866820409,
"grad_norm": 6.440021991729736,
"learning_rate": 7.338306440421743e-05,
"loss": 0.2971,
"step": 862
},
{
"epoch": 0.7463245892187951,
"grad_norm": 11.389256477355957,
"learning_rate": 7.32416041380935e-05,
"loss": 0.6679,
"step": 863
},
{
"epoch": 0.7471893917555491,
"grad_norm": 2.519818067550659,
"learning_rate": 7.309990616182685e-05,
"loss": 0.1211,
"step": 864
},
{
"epoch": 0.7480541942923032,
"grad_norm": 7.607640743255615,
"learning_rate": 7.2957971924674e-05,
"loss": 0.2407,
"step": 865
},
{
"epoch": 0.7489189968290574,
"grad_norm": 7.118372917175293,
"learning_rate": 7.28158028783079e-05,
"loss": 0.3254,
"step": 866
},
{
"epoch": 0.7497837993658115,
"grad_norm": 2.883557081222534,
"learning_rate": 7.267340047680305e-05,
"loss": 0.1074,
"step": 867
},
{
"epoch": 0.7506486019025655,
"grad_norm": 4.721225738525391,
"learning_rate": 7.253076617662065e-05,
"loss": 0.1904,
"step": 868
},
{
"epoch": 0.7515134044393197,
"grad_norm": 2.654787302017212,
"learning_rate": 7.23879014365938e-05,
"loss": 0.182,
"step": 869
},
{
"epoch": 0.7523782069760738,
"grad_norm": 7.568452835083008,
"learning_rate": 7.224480771791235e-05,
"loss": 0.4094,
"step": 870
},
{
"epoch": 0.7532430095128279,
"grad_norm": 8.068111419677734,
"learning_rate": 7.210148648410821e-05,
"loss": 0.8455,
"step": 871
},
{
"epoch": 0.754107812049582,
"grad_norm": 6.598762512207031,
"learning_rate": 7.195793920104023e-05,
"loss": 0.4085,
"step": 872
},
{
"epoch": 0.7549726145863361,
"grad_norm": 6.5393829345703125,
"learning_rate": 7.18141673368792e-05,
"loss": 0.4978,
"step": 873
},
{
"epoch": 0.7558374171230903,
"grad_norm": 4.241705894470215,
"learning_rate": 7.167017236209292e-05,
"loss": 0.2777,
"step": 874
},
{
"epoch": 0.7567022196598443,
"grad_norm": 5.239429950714111,
"learning_rate": 7.152595574943113e-05,
"loss": 0.3822,
"step": 875
},
{
"epoch": 0.7575670221965984,
"grad_norm": 10.576812744140625,
"learning_rate": 7.138151897391041e-05,
"loss": 0.5127,
"step": 876
},
{
"epoch": 0.7584318247333526,
"grad_norm": 4.40622615814209,
"learning_rate": 7.123686351279914e-05,
"loss": 0.2795,
"step": 877
},
{
"epoch": 0.7592966272701066,
"grad_norm": 8.214874267578125,
"learning_rate": 7.10919908456023e-05,
"loss": 0.4,
"step": 878
},
{
"epoch": 0.7601614298068607,
"grad_norm": 5.674429893493652,
"learning_rate": 7.094690245404652e-05,
"loss": 0.3919,
"step": 879
},
{
"epoch": 0.7610262323436149,
"grad_norm": 7.315159797668457,
"learning_rate": 7.080159982206471e-05,
"loss": 0.3323,
"step": 880
},
{
"epoch": 0.761891034880369,
"grad_norm": 5.864488124847412,
"learning_rate": 7.065608443578105e-05,
"loss": 0.5407,
"step": 881
},
{
"epoch": 0.762755837417123,
"grad_norm": 9.524258613586426,
"learning_rate": 7.05103577834957e-05,
"loss": 0.8925,
"step": 882
},
{
"epoch": 0.7636206399538772,
"grad_norm": 2.4174962043762207,
"learning_rate": 7.036442135566961e-05,
"loss": 0.116,
"step": 883
},
{
"epoch": 0.7644854424906313,
"grad_norm": 5.054670810699463,
"learning_rate": 7.021827664490928e-05,
"loss": 0.382,
"step": 884
},
{
"epoch": 0.7653502450273855,
"grad_norm": 4.311699867248535,
"learning_rate": 7.007192514595141e-05,
"loss": 0.2573,
"step": 885
},
{
"epoch": 0.7662150475641395,
"grad_norm": 5.006008625030518,
"learning_rate": 6.992536835564782e-05,
"loss": 0.2442,
"step": 886
},
{
"epoch": 0.7670798501008936,
"grad_norm": 4.521592140197754,
"learning_rate": 6.977860777294988e-05,
"loss": 0.2122,
"step": 887
},
{
"epoch": 0.7679446526376478,
"grad_norm": 7.981561183929443,
"learning_rate": 6.963164489889337e-05,
"loss": 0.3405,
"step": 888
},
{
"epoch": 0.7688094551744018,
"grad_norm": 10.011691093444824,
"learning_rate": 6.948448123658308e-05,
"loss": 0.4895,
"step": 889
},
{
"epoch": 0.7696742577111559,
"grad_norm": 6.9324517250061035,
"learning_rate": 6.933711829117733e-05,
"loss": 0.4046,
"step": 890
},
{
"epoch": 0.7705390602479101,
"grad_norm": 5.044534683227539,
"learning_rate": 6.918955756987275e-05,
"loss": 0.3365,
"step": 891
},
{
"epoch": 0.7714038627846642,
"grad_norm": 6.062309265136719,
"learning_rate": 6.904180058188877e-05,
"loss": 0.3073,
"step": 892
},
{
"epoch": 0.7722686653214182,
"grad_norm": 9.762418746948242,
"learning_rate": 6.889384883845214e-05,
"loss": 0.7621,
"step": 893
},
{
"epoch": 0.7731334678581724,
"grad_norm": 8.496923446655273,
"learning_rate": 6.874570385278158e-05,
"loss": 0.4088,
"step": 894
},
{
"epoch": 0.7739982703949265,
"grad_norm": 9.173744201660156,
"learning_rate": 6.859736714007226e-05,
"loss": 0.6372,
"step": 895
},
{
"epoch": 0.7748630729316806,
"grad_norm": 8.595545768737793,
"learning_rate": 6.844884021748019e-05,
"loss": 0.7089,
"step": 896
},
{
"epoch": 0.7757278754684347,
"grad_norm": 7.156553268432617,
"learning_rate": 6.830012460410697e-05,
"loss": 0.5503,
"step": 897
},
{
"epoch": 0.7765926780051888,
"grad_norm": 5.894566059112549,
"learning_rate": 6.815122182098394e-05,
"loss": 0.5239,
"step": 898
},
{
"epoch": 0.777457480541943,
"grad_norm": 5.80053186416626,
"learning_rate": 6.800213339105683e-05,
"loss": 0.1838,
"step": 899
},
{
"epoch": 0.778322283078697,
"grad_norm": 2.8142247200012207,
"learning_rate": 6.785286083917017e-05,
"loss": 0.1141,
"step": 900
},
{
"epoch": 0.7791870856154511,
"grad_norm": 5.2369537353515625,
"learning_rate": 6.770340569205157e-05,
"loss": 0.4552,
"step": 901
},
{
"epoch": 0.7800518881522053,
"grad_norm": 7.276421070098877,
"learning_rate": 6.755376947829625e-05,
"loss": 0.4267,
"step": 902
},
{
"epoch": 0.7809166906889593,
"grad_norm": 10.988953590393066,
"learning_rate": 6.74039537283513e-05,
"loss": 1.0252,
"step": 903
},
{
"epoch": 0.7817814932257134,
"grad_norm": 10.337282180786133,
"learning_rate": 6.725395997450008e-05,
"loss": 0.6281,
"step": 904
},
{
"epoch": 0.7826462957624676,
"grad_norm": 10.337082862854004,
"learning_rate": 6.710378975084652e-05,
"loss": 0.6716,
"step": 905
},
{
"epoch": 0.7835110982992217,
"grad_norm": 3.361793279647827,
"learning_rate": 6.695344459329948e-05,
"loss": 0.1769,
"step": 906
},
{
"epoch": 0.7843759008359757,
"grad_norm": 8.392909049987793,
"learning_rate": 6.6802926039557e-05,
"loss": 0.428,
"step": 907
},
{
"epoch": 0.7852407033727299,
"grad_norm": 5.3866729736328125,
"learning_rate": 6.665223562909058e-05,
"loss": 0.335,
"step": 908
},
{
"epoch": 0.786105505909484,
"grad_norm": 8.97474479675293,
"learning_rate": 6.650137490312935e-05,
"loss": 0.6272,
"step": 909
},
{
"epoch": 0.786970308446238,
"grad_norm": 9.634217262268066,
"learning_rate": 6.635034540464456e-05,
"loss": 0.6253,
"step": 910
},
{
"epoch": 0.7878351109829922,
"grad_norm": 3.891382932662964,
"learning_rate": 6.619914867833343e-05,
"loss": 0.2603,
"step": 911
},
{
"epoch": 0.7886999135197463,
"grad_norm": 6.183927059173584,
"learning_rate": 6.60477862706037e-05,
"loss": 0.5737,
"step": 912
},
{
"epoch": 0.7895647160565005,
"grad_norm": 7.62052583694458,
"learning_rate": 6.589625972955764e-05,
"loss": 0.3792,
"step": 913
},
{
"epoch": 0.7904295185932545,
"grad_norm": 8.527345657348633,
"learning_rate": 6.574457060497618e-05,
"loss": 0.308,
"step": 914
},
{
"epoch": 0.7912943211300086,
"grad_norm": 4.892148494720459,
"learning_rate": 6.559272044830317e-05,
"loss": 0.2018,
"step": 915
},
{
"epoch": 0.7921591236667628,
"grad_norm": 3.214404582977295,
"learning_rate": 6.544071081262943e-05,
"loss": 0.1299,
"step": 916
},
{
"epoch": 0.7930239262035169,
"grad_norm": 7.314729690551758,
"learning_rate": 6.528854325267692e-05,
"loss": 0.4338,
"step": 917
},
{
"epoch": 0.7938887287402709,
"grad_norm": 6.503054618835449,
"learning_rate": 6.513621932478282e-05,
"loss": 0.2775,
"step": 918
},
{
"epoch": 0.7947535312770251,
"grad_norm": 3.8166730403900146,
"learning_rate": 6.498374058688359e-05,
"loss": 0.2077,
"step": 919
},
{
"epoch": 0.7956183338137792,
"grad_norm": 3.5877130031585693,
"learning_rate": 6.483110859849907e-05,
"loss": 0.2204,
"step": 920
},
{
"epoch": 0.7956183338137792,
"eval_Qnli-dev-1024_cosine_accuracy": 0.7604166666666666,
"eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.8248084783554077,
"eval_Qnli-dev-1024_cosine_ap": 0.7343586316206616,
"eval_Qnli-dev-1024_cosine_f1": 0.7628865979381444,
"eval_Qnli-dev-1024_cosine_f1_threshold": 0.8248084783554077,
"eval_Qnli-dev-1024_cosine_mcc": 0.528911810491234,
"eval_Qnli-dev-1024_cosine_precision": 0.7115384615384616,
"eval_Qnli-dev-1024_cosine_recall": 0.8222222222222222,
"eval_Qnli-dev_cosine_accuracy": 0.71875,
"eval_Qnli-dev_cosine_accuracy_threshold": 0.7927051782608032,
"eval_Qnli-dev_cosine_ap": 0.7346717053497452,
"eval_Qnli-dev_cosine_f1": 0.7254901960784313,
"eval_Qnli-dev_cosine_f1_threshold": 0.7089404463768005,
"eval_Qnli-dev_cosine_mcc": 0.43697448216965834,
"eval_Qnli-dev_cosine_precision": 0.6491228070175439,
"eval_Qnli-dev_cosine_recall": 0.8222222222222222,
"eval_allNLI--triplets-1024_cosine_accuracy": 0.9375,
"eval_allNLI-triplets_cosine_accuracy": 0.9583333134651184,
"eval_global_dataset_loss": 0.5776817798614502,
"eval_global_dataset_runtime": 67.9151,
"eval_global_dataset_samples_per_second": 14.312,
"eval_global_dataset_steps_per_second": 0.309,
"eval_sequential_score": 0.9375,
"eval_sts-test-1024_pearson_cosine": 0.8700957313588291,
"eval_sts-test-1024_spearman_cosine": 0.9050692984403192,
"eval_sts-test_pearson_cosine": 0.9076253073025099,
"eval_sts-test_spearman_cosine": 0.9206752404165891,
"step": 920
},
{
"epoch": 0.7964831363505332,
"grad_norm": 8.318743705749512,
"learning_rate": 6.467832492071649e-05,
"loss": 0.6926,
"step": 921
},
{
"epoch": 0.7973479388872874,
"grad_norm": 3.8544118404388428,
"learning_rate": 6.452539111617453e-05,
"loss": 0.287,
"step": 922
},
{
"epoch": 0.7982127414240415,
"grad_norm": 5.1298441886901855,
"learning_rate": 6.437230874904739e-05,
"loss": 0.119,
"step": 923
},
{
"epoch": 0.7990775439607957,
"grad_norm": 6.161678791046143,
"learning_rate": 6.421907938502865e-05,
"loss": 0.2366,
"step": 924
},
{
"epoch": 0.7999423464975497,
"grad_norm": 6.632068634033203,
"learning_rate": 6.406570459131538e-05,
"loss": 0.2398,
"step": 925
},
{
"epoch": 0.8008071490343038,
"grad_norm": 7.237907886505127,
"learning_rate": 6.39121859365921e-05,
"loss": 0.5197,
"step": 926
},
{
"epoch": 0.801671951571058,
"grad_norm": 4.103116035461426,
"learning_rate": 6.375852499101467e-05,
"loss": 0.2894,
"step": 927
},
{
"epoch": 0.802536754107812,
"grad_norm": 4.985617160797119,
"learning_rate": 6.36047233261943e-05,
"loss": 0.3327,
"step": 928
},
{
"epoch": 0.8034015566445661,
"grad_norm": 3.32094144821167,
"learning_rate": 6.345078251518143e-05,
"loss": 0.1615,
"step": 929
},
{
"epoch": 0.8042663591813203,
"grad_norm": 2.774306297302246,
"learning_rate": 6.329670413244967e-05,
"loss": 0.181,
"step": 930
},
{
"epoch": 0.8051311617180744,
"grad_norm": 5.269750595092773,
"learning_rate": 6.314248975387965e-05,
"loss": 0.2791,
"step": 931
},
{
"epoch": 0.8059959642548284,
"grad_norm": 6.187272548675537,
"learning_rate": 6.298814095674297e-05,
"loss": 0.4581,
"step": 932
},
{
"epoch": 0.8068607667915826,
"grad_norm": 5.671879291534424,
"learning_rate": 6.283365931968603e-05,
"loss": 0.2483,
"step": 933
},
{
"epoch": 0.8077255693283367,
"grad_norm": 12.671977043151855,
"learning_rate": 6.26790464227139e-05,
"loss": 1.2848,
"step": 934
},
{
"epoch": 0.8085903718650908,
"grad_norm": 4.673594951629639,
"learning_rate": 6.252430384717412e-05,
"loss": 0.2594,
"step": 935
},
{
"epoch": 0.8094551744018449,
"grad_norm": 6.6575117111206055,
"learning_rate": 6.236943317574056e-05,
"loss": 0.3473,
"step": 936
},
{
"epoch": 0.810319976938599,
"grad_norm": 3.377204656600952,
"learning_rate": 6.221443599239721e-05,
"loss": 0.2606,
"step": 937
},
{
"epoch": 0.8111847794753532,
"grad_norm": 7.629633903503418,
"learning_rate": 6.205931388242207e-05,
"loss": 0.3551,
"step": 938
},
{
"epoch": 0.8120495820121072,
"grad_norm": 7.514203071594238,
"learning_rate": 6.190406843237078e-05,
"loss": 0.3383,
"step": 939
},
{
"epoch": 0.8129143845488613,
"grad_norm": 6.37880277633667,
"learning_rate": 6.174870123006051e-05,
"loss": 0.2142,
"step": 940
},
{
"epoch": 0.8137791870856155,
"grad_norm": 5.619572639465332,
"learning_rate": 6.159321386455372e-05,
"loss": 0.2147,
"step": 941
},
{
"epoch": 0.8146439896223696,
"grad_norm": 4.535106658935547,
"learning_rate": 6.143760792614179e-05,
"loss": 0.2273,
"step": 942
},
{
"epoch": 0.8155087921591236,
"grad_norm": 9.820999145507812,
"learning_rate": 6.128188500632892e-05,
"loss": 0.9269,
"step": 943
},
{
"epoch": 0.8163735946958778,
"grad_norm": 13.05849838256836,
"learning_rate": 6.112604669781572e-05,
"loss": 1.5161,
"step": 944
},
{
"epoch": 0.8172383972326319,
"grad_norm": 5.841894626617432,
"learning_rate": 6.0970094594483004e-05,
"loss": 0.1962,
"step": 945
},
{
"epoch": 0.818103199769386,
"grad_norm": 7.9914069175720215,
"learning_rate": 6.0814030291375424e-05,
"loss": 0.5516,
"step": 946
},
{
"epoch": 0.8189680023061401,
"grad_norm": 4.961643695831299,
"learning_rate": 6.0657855384685215e-05,
"loss": 0.3498,
"step": 947
},
{
"epoch": 0.8198328048428942,
"grad_norm": 5.379317283630371,
"learning_rate": 6.050157147173581e-05,
"loss": 0.3962,
"step": 948
},
{
"epoch": 0.8206976073796484,
"grad_norm": 4.794488430023193,
"learning_rate": 6.0345180150965576e-05,
"loss": 0.2953,
"step": 949
},
{
"epoch": 0.8215624099164024,
"grad_norm": 3.9415969848632812,
"learning_rate": 6.0188683021911396e-05,
"loss": 0.1737,
"step": 950
},
{
"epoch": 0.8224272124531565,
"grad_norm": 5.720635890960693,
"learning_rate": 6.003208168519233e-05,
"loss": 0.1876,
"step": 951
},
{
"epoch": 0.8232920149899107,
"grad_norm": 5.777576923370361,
"learning_rate": 5.9875377742493276e-05,
"loss": 0.2037,
"step": 952
},
{
"epoch": 0.8241568175266647,
"grad_norm": 7.2347798347473145,
"learning_rate": 5.971857279654854e-05,
"loss": 0.6859,
"step": 953
},
{
"epoch": 0.8250216200634188,
"grad_norm": 9.168425559997559,
"learning_rate": 5.956166845112552e-05,
"loss": 0.5139,
"step": 954
},
{
"epoch": 0.825886422600173,
"grad_norm": 8.480242729187012,
"learning_rate": 5.9404666311008175e-05,
"loss": 0.4557,
"step": 955
},
{
"epoch": 0.8267512251369271,
"grad_norm": 7.415064811706543,
"learning_rate": 5.924756798198075e-05,
"loss": 0.418,
"step": 956
},
{
"epoch": 0.8276160276736811,
"grad_norm": 5.769486427307129,
"learning_rate": 5.909037507081121e-05,
"loss": 0.3326,
"step": 957
},
{
"epoch": 0.8284808302104353,
"grad_norm": 9.98505687713623,
"learning_rate": 5.893308918523498e-05,
"loss": 0.6773,
"step": 958
},
{
"epoch": 0.8293456327471894,
"grad_norm": 6.003732681274414,
"learning_rate": 5.877571193393837e-05,
"loss": 0.1938,
"step": 959
},
{
"epoch": 0.8302104352839434,
"grad_norm": 6.989200115203857,
"learning_rate": 5.8618244926542156e-05,
"loss": 0.2502,
"step": 960
},
{
"epoch": 0.8310752378206976,
"grad_norm": 5.944050312042236,
"learning_rate": 5.84606897735851e-05,
"loss": 0.1686,
"step": 961
},
{
"epoch": 0.8319400403574517,
"grad_norm": 2.568422794342041,
"learning_rate": 5.830304808650753e-05,
"loss": 0.1174,
"step": 962
},
{
"epoch": 0.8328048428942059,
"grad_norm": 6.347965717315674,
"learning_rate": 5.814532147763478e-05,
"loss": 0.3688,
"step": 963
},
{
"epoch": 0.8336696454309599,
"grad_norm": 9.391959190368652,
"learning_rate": 5.798751156016085e-05,
"loss": 0.4529,
"step": 964
},
{
"epoch": 0.834534447967714,
"grad_norm": 7.86402702331543,
"learning_rate": 5.7829619948131654e-05,
"loss": 0.5973,
"step": 965
},
{
"epoch": 0.8353992505044682,
"grad_norm": 9.44655990600586,
"learning_rate": 5.767164825642879e-05,
"loss": 0.7635,
"step": 966
},
{
"epoch": 0.8362640530412222,
"grad_norm": 7.177609920501709,
"learning_rate": 5.751359810075284e-05,
"loss": 0.5631,
"step": 967
},
{
"epoch": 0.8371288555779763,
"grad_norm": 5.718000411987305,
"learning_rate": 5.735547109760686e-05,
"loss": 0.313,
"step": 968
},
{
"epoch": 0.8379936581147305,
"grad_norm": 6.908907413482666,
"learning_rate": 5.719726886427998e-05,
"loss": 0.4425,
"step": 969
},
{
"epoch": 0.8388584606514846,
"grad_norm": 6.510931491851807,
"learning_rate": 5.7038993018830675e-05,
"loss": 0.3581,
"step": 970
},
{
"epoch": 0.8397232631882386,
"grad_norm": 6.836475372314453,
"learning_rate": 5.688064518007036e-05,
"loss": 0.2542,
"step": 971
},
{
"epoch": 0.8405880657249928,
"grad_norm": 6.765063762664795,
"learning_rate": 5.6722226967546764e-05,
"loss": 0.2576,
"step": 972
},
{
"epoch": 0.8414528682617469,
"grad_norm": 2.587757110595703,
"learning_rate": 5.65637400015274e-05,
"loss": 0.1156,
"step": 973
},
{
"epoch": 0.8423176707985011,
"grad_norm": 11.528030395507812,
"learning_rate": 5.640518590298298e-05,
"loss": 0.8184,
"step": 974
},
{
"epoch": 0.8431824733352551,
"grad_norm": 7.4515790939331055,
"learning_rate": 5.624656629357081e-05,
"loss": 0.3536,
"step": 975
},
{
"epoch": 0.8440472758720092,
"grad_norm": 6.2617082595825195,
"learning_rate": 5.6087882795618216e-05,
"loss": 0.3023,
"step": 976
},
{
"epoch": 0.8449120784087634,
"grad_norm": 4.997031211853027,
"learning_rate": 5.5929137032106005e-05,
"loss": 0.418,
"step": 977
},
{
"epoch": 0.8457768809455174,
"grad_norm": 7.6783671379089355,
"learning_rate": 5.577033062665179e-05,
"loss": 0.3036,
"step": 978
},
{
"epoch": 0.8466416834822715,
"grad_norm": 7.0620436668396,
"learning_rate": 5.561146520349343e-05,
"loss": 0.55,
"step": 979
},
{
"epoch": 0.8475064860190257,
"grad_norm": 8.351699829101562,
"learning_rate": 5.5452542387472416e-05,
"loss": 0.6477,
"step": 980
},
{
"epoch": 0.8483712885557798,
"grad_norm": 7.685431480407715,
"learning_rate": 5.529356380401722e-05,
"loss": 0.3518,
"step": 981
},
{
"epoch": 0.8492360910925338,
"grad_norm": 9.351055145263672,
"learning_rate": 5.5134531079126704e-05,
"loss": 0.7033,
"step": 982
},
{
"epoch": 0.850100893629288,
"grad_norm": 9.499361038208008,
"learning_rate": 5.497544583935347e-05,
"loss": 0.6931,
"step": 983
},
{
"epoch": 0.8509656961660421,
"grad_norm": 10.090303421020508,
"learning_rate": 5.481630971178721e-05,
"loss": 0.9278,
"step": 984
},
{
"epoch": 0.8518304987027961,
"grad_norm": 4.208652019500732,
"learning_rate": 5.465712432403812e-05,
"loss": 0.3061,
"step": 985
},
{
"epoch": 0.8526953012395503,
"grad_norm": 9.341512680053711,
"learning_rate": 5.4497891304220225e-05,
"loss": 0.8352,
"step": 986
},
{
"epoch": 0.8535601037763044,
"grad_norm": 1.4906487464904785,
"learning_rate": 5.433861228093471e-05,
"loss": 0.125,
"step": 987
},
{
"epoch": 0.8544249063130586,
"grad_norm": 2.660661458969116,
"learning_rate": 5.417928888325324e-05,
"loss": 0.2284,
"step": 988
},
{
"epoch": 0.8552897088498126,
"grad_norm": 10.015325546264648,
"learning_rate": 5.401992274070136e-05,
"loss": 0.838,
"step": 989
},
{
"epoch": 0.8561545113865667,
"grad_norm": 8.29864501953125,
"learning_rate": 5.386051548324179e-05,
"loss": 0.5318,
"step": 990
},
{
"epoch": 0.8570193139233209,
"grad_norm": 4.587142467498779,
"learning_rate": 5.3701068741257796e-05,
"loss": 0.1618,
"step": 991
},
{
"epoch": 0.8578841164600749,
"grad_norm": 1.8213179111480713,
"learning_rate": 5.354158414553646e-05,
"loss": 0.0871,
"step": 992
},
{
"epoch": 0.858748918996829,
"grad_norm": 8.93700122833252,
"learning_rate": 5.3382063327252017e-05,
"loss": 0.6915,
"step": 993
},
{
"epoch": 0.8596137215335832,
"grad_norm": 4.793188095092773,
"learning_rate": 5.322250791794916e-05,
"loss": 0.3728,
"step": 994
},
{
"epoch": 0.8604785240703373,
"grad_norm": 4.624011516571045,
"learning_rate": 5.3062919549526436e-05,
"loss": 0.2403,
"step": 995
},
{
"epoch": 0.8613433266070913,
"grad_norm": 1.8955051898956299,
"learning_rate": 5.2903299854219435e-05,
"loss": 0.0651,
"step": 996
},
{
"epoch": 0.8622081291438455,
"grad_norm": 10.889961242675781,
"learning_rate": 5.274365046458416e-05,
"loss": 0.5783,
"step": 997
},
{
"epoch": 0.8630729316805996,
"grad_norm": 4.15156888961792,
"learning_rate": 5.258397301348035e-05,
"loss": 0.2061,
"step": 998
},
{
"epoch": 0.8639377342173538,
"grad_norm": 3.9485700130462646,
"learning_rate": 5.2424269134054694e-05,
"loss": 0.154,
"step": 999
},
{
"epoch": 0.8648025367541078,
"grad_norm": 9.996199607849121,
"learning_rate": 5.2264540459724276e-05,
"loss": 0.4689,
"step": 1000
},
{
"epoch": 0.8656673392908619,
"grad_norm": 7.154214382171631,
"learning_rate": 5.21047886241597e-05,
"loss": 0.2088,
"step": 1001
},
{
"epoch": 0.8665321418276161,
"grad_norm": 8.80577564239502,
"learning_rate": 5.194501526126842e-05,
"loss": 0.5299,
"step": 1002
},
{
"epoch": 0.8673969443643701,
"grad_norm": 5.227262020111084,
"learning_rate": 5.1785222005178224e-05,
"loss": 0.2689,
"step": 1003
},
{
"epoch": 0.8682617469011242,
"grad_norm": 6.6007843017578125,
"learning_rate": 5.162541049022019e-05,
"loss": 0.3098,
"step": 1004
},
{
"epoch": 0.8691265494378784,
"grad_norm": 6.239222526550293,
"learning_rate": 5.146558235091225e-05,
"loss": 0.3478,
"step": 1005
},
{
"epoch": 0.8699913519746325,
"grad_norm": 2.814821243286133,
"learning_rate": 5.1305739221942364e-05,
"loss": 0.1841,
"step": 1006
},
{
"epoch": 0.8708561545113865,
"grad_norm": 1.4831047058105469,
"learning_rate": 5.114588273815173e-05,
"loss": 0.0862,
"step": 1007
},
{
"epoch": 0.8717209570481407,
"grad_norm": 8.568103790283203,
"learning_rate": 5.09860145345182e-05,
"loss": 0.5991,
"step": 1008
},
{
"epoch": 0.8725857595848948,
"grad_norm": 9.560081481933594,
"learning_rate": 5.082613624613946e-05,
"loss": 0.4026,
"step": 1009
},
{
"epoch": 0.8734505621216488,
"grad_norm": 7.88618803024292,
"learning_rate": 5.066624950821637e-05,
"loss": 0.4991,
"step": 1010
},
{
"epoch": 0.874315364658403,
"grad_norm": 5.938468933105469,
"learning_rate": 5.05063559560362e-05,
"loss": 0.2786,
"step": 1011
},
{
"epoch": 0.8751801671951571,
"grad_norm": 9.075552940368652,
"learning_rate": 5.0346457224955903e-05,
"loss": 0.4708,
"step": 1012
},
{
"epoch": 0.8760449697319113,
"grad_norm": 8.848043441772461,
"learning_rate": 5.018655495038541e-05,
"loss": 0.4201,
"step": 1013
},
{
"epoch": 0.8769097722686653,
"grad_norm": 5.168188095092773,
"learning_rate": 5.002665076777091e-05,
"loss": 0.2089,
"step": 1014
},
{
"epoch": 0.8777745748054194,
"grad_norm": 4.413999557495117,
"learning_rate": 4.986674631257804e-05,
"loss": 0.3158,
"step": 1015
},
{
"epoch": 0.8786393773421736,
"grad_norm": 9.610701560974121,
"learning_rate": 4.970684322027534e-05,
"loss": 0.7363,
"step": 1016
},
{
"epoch": 0.8795041798789276,
"grad_norm": 6.793404579162598,
"learning_rate": 4.9546943126317274e-05,
"loss": 0.2885,
"step": 1017
},
{
"epoch": 0.8803689824156817,
"grad_norm": 9.434625625610352,
"learning_rate": 4.9387047666127786e-05,
"loss": 0.4937,
"step": 1018
},
{
"epoch": 0.8812337849524359,
"grad_norm": 6.130424499511719,
"learning_rate": 4.9227158475083304e-05,
"loss": 0.1684,
"step": 1019
},
{
"epoch": 0.88209858748919,
"grad_norm": 4.250467777252197,
"learning_rate": 4.9067277188496185e-05,
"loss": 0.2749,
"step": 1020
},
{
"epoch": 0.882963390025944,
"grad_norm": 3.2336244583129883,
"learning_rate": 4.890740544159796e-05,
"loss": 0.2789,
"step": 1021
},
{
"epoch": 0.8838281925626982,
"grad_norm": 7.7692084312438965,
"learning_rate": 4.874754486952255e-05,
"loss": 0.5868,
"step": 1022
},
{
"epoch": 0.8846929950994523,
"grad_norm": 7.071033954620361,
"learning_rate": 4.8587697107289626e-05,
"loss": 0.5894,
"step": 1023
},
{
"epoch": 0.8855577976362063,
"grad_norm": 6.448328971862793,
"learning_rate": 4.84278637897878e-05,
"loss": 0.1635,
"step": 1024
},
{
"epoch": 0.8864226001729605,
"grad_norm": 11.375746726989746,
"learning_rate": 4.826804655175795e-05,
"loss": 0.6829,
"step": 1025
},
{
"epoch": 0.8872874027097146,
"grad_norm": 1.4379364252090454,
"learning_rate": 4.8108247027776565e-05,
"loss": 0.1124,
"step": 1026
},
{
"epoch": 0.8881522052464688,
"grad_norm": 11.497692108154297,
"learning_rate": 4.794846685223886e-05,
"loss": 1.2642,
"step": 1027
},
{
"epoch": 0.8890170077832228,
"grad_norm": 6.77423620223999,
"learning_rate": 4.778870765934221e-05,
"loss": 0.6585,
"step": 1028
},
{
"epoch": 0.8898818103199769,
"grad_norm": 6.651241779327393,
"learning_rate": 4.762897108306939e-05,
"loss": 0.2622,
"step": 1029
},
{
"epoch": 0.8907466128567311,
"grad_norm": 7.476505279541016,
"learning_rate": 4.7469258757171854e-05,
"loss": 0.2797,
"step": 1030
},
{
"epoch": 0.8916114153934852,
"grad_norm": 7.375949382781982,
"learning_rate": 4.7309572315152976e-05,
"loss": 0.2747,
"step": 1031
},
{
"epoch": 0.8924762179302392,
"grad_norm": 4.907548427581787,
"learning_rate": 4.7149913390251494e-05,
"loss": 0.3748,
"step": 1032
},
{
"epoch": 0.8933410204669934,
"grad_norm": 7.232724189758301,
"learning_rate": 4.6990283615424605e-05,
"loss": 0.1792,
"step": 1033
},
{
"epoch": 0.8942058230037475,
"grad_norm": 6.12727165222168,
"learning_rate": 4.6830684623331446e-05,
"loss": 0.342,
"step": 1034
},
{
"epoch": 0.8950706255405015,
"grad_norm": 4.968775272369385,
"learning_rate": 4.667111804631626e-05,
"loss": 0.3287,
"step": 1035
},
{
"epoch": 0.8950706255405015,
"eval_Qnli-dev-1024_cosine_accuracy": 0.7083333333333334,
"eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.8668882846832275,
"eval_Qnli-dev-1024_cosine_ap": 0.6999822477767415,
"eval_Qnli-dev-1024_cosine_f1": 0.7090909090909091,
"eval_Qnli-dev-1024_cosine_f1_threshold": 0.7944933772087097,
"eval_Qnli-dev-1024_cosine_mcc": 0.3808509397785054,
"eval_Qnli-dev-1024_cosine_precision": 0.6,
"eval_Qnli-dev-1024_cosine_recall": 0.8666666666666667,
"eval_Qnli-dev_cosine_accuracy": 0.71875,
"eval_Qnli-dev_cosine_accuracy_threshold": 0.7880457639694214,
"eval_Qnli-dev_cosine_ap": 0.7340095878922616,
"eval_Qnli-dev_cosine_f1": 0.7238095238095237,
"eval_Qnli-dev_cosine_f1_threshold": 0.703315019607544,
"eval_Qnli-dev_cosine_mcc": 0.42578476395267345,
"eval_Qnli-dev_cosine_precision": 0.6333333333333333,
"eval_Qnli-dev_cosine_recall": 0.8444444444444444,
"eval_allNLI--triplets-1024_cosine_accuracy": 0.9375,
"eval_allNLI-triplets_cosine_accuracy": 0.9791666865348816,
"eval_global_dataset_loss": 0.5188027620315552,
"eval_global_dataset_runtime": 67.9093,
"eval_global_dataset_samples_per_second": 14.313,
"eval_global_dataset_steps_per_second": 0.309,
"eval_sequential_score": 0.9375,
"eval_sts-test-1024_pearson_cosine": 0.8797952712975998,
"eval_sts-test-1024_spearman_cosine": 0.9135158587840699,
"eval_sts-test_pearson_cosine": 0.9084511864603124,
"eval_sts-test_spearman_cosine": 0.9222536610997011,
"step": 1035
},
{
"epoch": 0.8959354280772557,
"grad_norm": 8.467510223388672,
"learning_rate": 4.651158551639177e-05,
"loss": 0.5348,
"step": 1036
},
{
"epoch": 0.8968002306140098,
"grad_norm": 6.454378604888916,
"learning_rate": 4.635208866522251e-05,
"loss": 0.6588,
"step": 1037
},
{
"epoch": 0.897665033150764,
"grad_norm": 6.776357650756836,
"learning_rate": 4.619262912410804e-05,
"loss": 0.5132,
"step": 1038
},
{
"epoch": 0.898529835687518,
"grad_norm": 10.85428237915039,
"learning_rate": 4.603320852396637e-05,
"loss": 1.1707,
"step": 1039
},
{
"epoch": 0.8993946382242721,
"grad_norm": 3.449406862258911,
"learning_rate": 4.587382849531717e-05,
"loss": 0.1442,
"step": 1040
},
{
"epoch": 0.9002594407610263,
"grad_norm": 2.9549670219421387,
"learning_rate": 4.5714490668265245e-05,
"loss": 0.2243,
"step": 1041
},
{
"epoch": 0.9011242432977803,
"grad_norm": 4.6617817878723145,
"learning_rate": 4.5555196672483685e-05,
"loss": 0.3099,
"step": 1042
},
{
"epoch": 0.9019890458345344,
"grad_norm": 6.141875267028809,
"learning_rate": 4.5395948137197296e-05,
"loss": 0.1839,
"step": 1043
},
{
"epoch": 0.9028538483712886,
"grad_norm": 12.232782363891602,
"learning_rate": 4.5236746691166e-05,
"loss": 0.6248,
"step": 1044
},
{
"epoch": 0.9037186509080427,
"grad_norm": 5.728059768676758,
"learning_rate": 4.507759396266802e-05,
"loss": 0.4605,
"step": 1045
},
{
"epoch": 0.9045834534447967,
"grad_norm": 8.688108444213867,
"learning_rate": 4.49184915794833e-05,
"loss": 0.4857,
"step": 1046
},
{
"epoch": 0.9054482559815509,
"grad_norm": 8.695257186889648,
"learning_rate": 4.475944116887695e-05,
"loss": 0.3966,
"step": 1047
},
{
"epoch": 0.906313058518305,
"grad_norm": 5.200995922088623,
"learning_rate": 4.460044435758241e-05,
"loss": 0.4439,
"step": 1048
},
{
"epoch": 0.907177861055059,
"grad_norm": 12.601680755615234,
"learning_rate": 4.4441502771785003e-05,
"loss": 0.6051,
"step": 1049
},
{
"epoch": 0.9080426635918132,
"grad_norm": 9.575990676879883,
"learning_rate": 4.428261803710516e-05,
"loss": 0.3982,
"step": 1050
},
{
"epoch": 0.9089074661285673,
"grad_norm": 2.344109058380127,
"learning_rate": 4.4123791778581865e-05,
"loss": 0.1718,
"step": 1051
},
{
"epoch": 0.9097722686653215,
"grad_norm": 7.567986488342285,
"learning_rate": 4.3965025620656065e-05,
"loss": 0.2641,
"step": 1052
},
{
"epoch": 0.9106370712020755,
"grad_norm": 8.634700775146484,
"learning_rate": 4.3806321187153934e-05,
"loss": 0.3788,
"step": 1053
},
{
"epoch": 0.9115018737388296,
"grad_norm": 8.53459644317627,
"learning_rate": 4.3647680101270416e-05,
"loss": 0.4456,
"step": 1054
},
{
"epoch": 0.9123666762755838,
"grad_norm": 10.249025344848633,
"learning_rate": 4.348910398555249e-05,
"loss": 1.0234,
"step": 1055
},
{
"epoch": 0.9132314788123379,
"grad_norm": 10.008344650268555,
"learning_rate": 4.333059446188269e-05,
"loss": 0.6228,
"step": 1056
},
{
"epoch": 0.9140962813490919,
"grad_norm": 8.067853927612305,
"learning_rate": 4.317215315146238e-05,
"loss": 0.4588,
"step": 1057
},
{
"epoch": 0.9149610838858461,
"grad_norm": 10.182132720947266,
"learning_rate": 4.301378167479532e-05,
"loss": 0.8651,
"step": 1058
},
{
"epoch": 0.9158258864226002,
"grad_norm": 11.363606452941895,
"learning_rate": 4.285548165167105e-05,
"loss": 0.8571,
"step": 1059
},
{
"epoch": 0.9166906889593542,
"grad_norm": 10.103208541870117,
"learning_rate": 4.2697254701148235e-05,
"loss": 0.6446,
"step": 1060
},
{
"epoch": 0.9175554914961084,
"grad_norm": 6.2334418296813965,
"learning_rate": 4.253910244153817e-05,
"loss": 0.2193,
"step": 1061
},
{
"epoch": 0.9184202940328625,
"grad_norm": 5.234436511993408,
"learning_rate": 4.2381026490388245e-05,
"loss": 0.258,
"step": 1062
},
{
"epoch": 0.9192850965696167,
"grad_norm": 8.499395370483398,
"learning_rate": 4.222302846446544e-05,
"loss": 0.4164,
"step": 1063
},
{
"epoch": 0.9201498991063707,
"grad_norm": 5.450392723083496,
"learning_rate": 4.206510997973963e-05,
"loss": 0.4783,
"step": 1064
},
{
"epoch": 0.9210147016431248,
"grad_norm": 5.65176248550415,
"learning_rate": 4.1907272651367226e-05,
"loss": 0.246,
"step": 1065
},
{
"epoch": 0.921879504179879,
"grad_norm": 8.317374229431152,
"learning_rate": 4.1749518093674566e-05,
"loss": 0.3821,
"step": 1066
},
{
"epoch": 0.922744306716633,
"grad_norm": 4.983073711395264,
"learning_rate": 4.159184792014145e-05,
"loss": 0.182,
"step": 1067
},
{
"epoch": 0.9236091092533871,
"grad_norm": 10.939299583435059,
"learning_rate": 4.143426374338459e-05,
"loss": 0.6648,
"step": 1068
},
{
"epoch": 0.9244739117901413,
"grad_norm": 5.333117485046387,
"learning_rate": 4.1276767175141125e-05,
"loss": 0.5405,
"step": 1069
},
{
"epoch": 0.9253387143268954,
"grad_norm": 6.263637542724609,
"learning_rate": 4.1119359826252226e-05,
"loss": 0.3681,
"step": 1070
},
{
"epoch": 0.9262035168636494,
"grad_norm": 5.16562032699585,
"learning_rate": 4.0962043306646455e-05,
"loss": 0.2323,
"step": 1071
},
{
"epoch": 0.9270683194004036,
"grad_norm": 6.132068634033203,
"learning_rate": 4.080481922532348e-05,
"loss": 0.4676,
"step": 1072
},
{
"epoch": 0.9279331219371577,
"grad_norm": 8.957972526550293,
"learning_rate": 4.064768919033746e-05,
"loss": 0.5141,
"step": 1073
},
{
"epoch": 0.9287979244739117,
"grad_norm": 7.958962440490723,
"learning_rate": 4.0490654808780685e-05,
"loss": 0.3067,
"step": 1074
},
{
"epoch": 0.9296627270106659,
"grad_norm": 6.653066158294678,
"learning_rate": 4.033371768676716e-05,
"loss": 0.4638,
"step": 1075
},
{
"epoch": 0.93052752954742,
"grad_norm": 5.897211074829102,
"learning_rate": 4.0176879429416086e-05,
"loss": 0.3082,
"step": 1076
},
{
"epoch": 0.9313923320841742,
"grad_norm": 8.102348327636719,
"learning_rate": 4.002014164083552e-05,
"loss": 0.4003,
"step": 1077
},
{
"epoch": 0.9322571346209282,
"grad_norm": 7.730281829833984,
"learning_rate": 3.9863505924105995e-05,
"loss": 0.3053,
"step": 1078
},
{
"epoch": 0.9331219371576823,
"grad_norm": 5.675047397613525,
"learning_rate": 3.970697388126397e-05,
"loss": 0.1876,
"step": 1079
},
{
"epoch": 0.9339867396944365,
"grad_norm": 9.553377151489258,
"learning_rate": 3.9550547113285665e-05,
"loss": 0.569,
"step": 1080
},
{
"epoch": 0.9348515422311905,
"grad_norm": 10.86451244354248,
"learning_rate": 3.9394227220070466e-05,
"loss": 0.8728,
"step": 1081
},
{
"epoch": 0.9357163447679446,
"grad_norm": 9.33718204498291,
"learning_rate": 3.923801580042476e-05,
"loss": 0.4347,
"step": 1082
},
{
"epoch": 0.9365811473046988,
"grad_norm": 8.696025848388672,
"learning_rate": 3.90819144520454e-05,
"loss": 0.8919,
"step": 1083
},
{
"epoch": 0.9374459498414529,
"grad_norm": 7.635885238647461,
"learning_rate": 3.892592477150352e-05,
"loss": 0.4828,
"step": 1084
},
{
"epoch": 0.9383107523782069,
"grad_norm": 7.686861038208008,
"learning_rate": 3.877004835422815e-05,
"loss": 0.4338,
"step": 1085
},
{
"epoch": 0.9391755549149611,
"grad_norm": 6.8635029792785645,
"learning_rate": 3.861428679448983e-05,
"loss": 0.359,
"step": 1086
},
{
"epoch": 0.9400403574517152,
"grad_norm": 4.335479736328125,
"learning_rate": 3.845864168538437e-05,
"loss": 0.3828,
"step": 1087
},
{
"epoch": 0.9409051599884694,
"grad_norm": 7.711667537689209,
"learning_rate": 3.8303114618816577e-05,
"loss": 0.5294,
"step": 1088
},
{
"epoch": 0.9417699625252234,
"grad_norm": 6.784587383270264,
"learning_rate": 3.814770718548396e-05,
"loss": 0.4212,
"step": 1089
},
{
"epoch": 0.9426347650619775,
"grad_norm": 8.687413215637207,
"learning_rate": 3.7992420974860384e-05,
"loss": 0.5723,
"step": 1090
},
{
"epoch": 0.9434995675987317,
"grad_norm": 3.785308361053467,
"learning_rate": 3.783725757517994e-05,
"loss": 0.2047,
"step": 1091
},
{
"epoch": 0.9443643701354857,
"grad_norm": 8.60908031463623,
"learning_rate": 3.7682218573420576e-05,
"loss": 0.4359,
"step": 1092
},
{
"epoch": 0.9452291726722398,
"grad_norm": 3.608921527862549,
"learning_rate": 3.7527305555287976e-05,
"loss": 0.2121,
"step": 1093
},
{
"epoch": 0.946093975208994,
"grad_norm": 7.160829544067383,
"learning_rate": 3.737252010519925e-05,
"loss": 0.461,
"step": 1094
},
{
"epoch": 0.9469587777457481,
"grad_norm": 2.5925629138946533,
"learning_rate": 3.721786380626675e-05,
"loss": 0.1127,
"step": 1095
},
{
"epoch": 0.9478235802825021,
"grad_norm": 9.759129524230957,
"learning_rate": 3.706333824028201e-05,
"loss": 0.5365,
"step": 1096
},
{
"epoch": 0.9486883828192563,
"grad_norm": 9.999465942382812,
"learning_rate": 3.690894498769933e-05,
"loss": 1.0112,
"step": 1097
},
{
"epoch": 0.9495531853560104,
"grad_norm": 9.034364700317383,
"learning_rate": 3.675468562761982e-05,
"loss": 0.6563,
"step": 1098
},
{
"epoch": 0.9504179878927644,
"grad_norm": 2.152198076248169,
"learning_rate": 3.6600561737775106e-05,
"loss": 0.0732,
"step": 1099
},
{
"epoch": 0.9512827904295186,
"grad_norm": 4.004874229431152,
"learning_rate": 3.6446574894511265e-05,
"loss": 0.1631,
"step": 1100
},
{
"epoch": 0.9521475929662727,
"grad_norm": 7.518155097961426,
"learning_rate": 3.629272667277274e-05,
"loss": 0.4512,
"step": 1101
},
{
"epoch": 0.9530123955030269,
"grad_norm": 5.864679336547852,
"learning_rate": 3.613901864608611e-05,
"loss": 0.2731,
"step": 1102
},
{
"epoch": 0.9538771980397809,
"grad_norm": 7.249544620513916,
"learning_rate": 3.598545238654416e-05,
"loss": 0.4866,
"step": 1103
},
{
"epoch": 0.954742000576535,
"grad_norm": 2.4601848125457764,
"learning_rate": 3.583202946478963e-05,
"loss": 0.2007,
"step": 1104
},
{
"epoch": 0.9556068031132892,
"grad_norm": 7.753067970275879,
"learning_rate": 3.567875144999925e-05,
"loss": 0.501,
"step": 1105
},
{
"epoch": 0.9564716056500432,
"grad_norm": 11.398188591003418,
"learning_rate": 3.5525619909867704e-05,
"loss": 0.8343,
"step": 1106
},
{
"epoch": 0.9573364081867973,
"grad_norm": 3.151561975479126,
"learning_rate": 3.537263641059152e-05,
"loss": 0.1781,
"step": 1107
},
{
"epoch": 0.9582012107235515,
"grad_norm": 5.797046184539795,
"learning_rate": 3.521980251685315e-05,
"loss": 0.3011,
"step": 1108
},
{
"epoch": 0.9590660132603056,
"grad_norm": 8.037071228027344,
"learning_rate": 3.506711979180485e-05,
"loss": 0.423,
"step": 1109
},
{
"epoch": 0.9599308157970596,
"grad_norm": 8.320140838623047,
"learning_rate": 3.49145897970528e-05,
"loss": 0.6317,
"step": 1110
},
{
"epoch": 0.9607956183338138,
"grad_norm": 7.24954080581665,
"learning_rate": 3.47622140926411e-05,
"loss": 0.3058,
"step": 1111
},
{
"epoch": 0.9616604208705679,
"grad_norm": 2.021778106689453,
"learning_rate": 3.4609994237035746e-05,
"loss": 0.1734,
"step": 1112
},
{
"epoch": 0.962525223407322,
"grad_norm": 4.613988876342773,
"learning_rate": 3.4457931787108774e-05,
"loss": 0.1935,
"step": 1113
},
{
"epoch": 0.9633900259440761,
"grad_norm": 4.552547454833984,
"learning_rate": 3.4306028298122316e-05,
"loss": 0.1533,
"step": 1114
},
{
"epoch": 0.9642548284808302,
"grad_norm": 10.501197814941406,
"learning_rate": 3.415428532371271e-05,
"loss": 0.9337,
"step": 1115
},
{
"epoch": 0.9651196310175844,
"grad_norm": 7.168083190917969,
"learning_rate": 3.40027044158745e-05,
"loss": 0.3695,
"step": 1116
},
{
"epoch": 0.9659844335543384,
"grad_norm": 10.598306655883789,
"learning_rate": 3.3851287124944756e-05,
"loss": 0.7095,
"step": 1117
},
{
"epoch": 0.9668492360910925,
"grad_norm": 5.203083038330078,
"learning_rate": 3.370003499958703e-05,
"loss": 0.4206,
"step": 1118
},
{
"epoch": 0.9677140386278467,
"grad_norm": 5.217127323150635,
"learning_rate": 3.3548949586775624e-05,
"loss": 0.235,
"step": 1119
},
{
"epoch": 0.9685788411646008,
"grad_norm": 4.155709266662598,
"learning_rate": 3.339803243177972e-05,
"loss": 0.1233,
"step": 1120
},
{
"epoch": 0.9694436437013548,
"grad_norm": 2.8669726848602295,
"learning_rate": 3.324728507814764e-05,
"loss": 0.1605,
"step": 1121
},
{
"epoch": 0.970308446238109,
"grad_norm": 3.5733962059020996,
"learning_rate": 3.3096709067691006e-05,
"loss": 0.1095,
"step": 1122
},
{
"epoch": 0.9711732487748631,
"grad_norm": 4.109647274017334,
"learning_rate": 3.294630594046892e-05,
"loss": 0.2737,
"step": 1123
},
{
"epoch": 0.9720380513116171,
"grad_norm": 7.015890121459961,
"learning_rate": 3.279607723477234e-05,
"loss": 0.3482,
"step": 1124
},
{
"epoch": 0.9729028538483713,
"grad_norm": 6.006662368774414,
"learning_rate": 3.2646024487108236e-05,
"loss": 0.4144,
"step": 1125
},
{
"epoch": 0.9737676563851254,
"grad_norm": 8.346697807312012,
"learning_rate": 3.249614923218391e-05,
"loss": 0.4055,
"step": 1126
},
{
"epoch": 0.9746324589218796,
"grad_norm": 6.663881778717041,
"learning_rate": 3.234645300289137e-05,
"loss": 0.5001,
"step": 1127
},
{
"epoch": 0.9754972614586336,
"grad_norm": 7.918451309204102,
"learning_rate": 3.21969373302915e-05,
"loss": 0.6129,
"step": 1128
},
{
"epoch": 0.9763620639953877,
"grad_norm": 5.889848709106445,
"learning_rate": 3.204760374359857e-05,
"loss": 0.3793,
"step": 1129
},
{
"epoch": 0.9772268665321419,
"grad_norm": 13.368314743041992,
"learning_rate": 3.189845377016448e-05,
"loss": 1.0901,
"step": 1130
},
{
"epoch": 0.9780916690688959,
"grad_norm": 2.977189064025879,
"learning_rate": 3.1749488935463145e-05,
"loss": 0.1219,
"step": 1131
},
{
"epoch": 0.97895647160565,
"grad_norm": 9.539501190185547,
"learning_rate": 3.160071076307497e-05,
"loss": 0.5126,
"step": 1132
},
{
"epoch": 0.9798212741424042,
"grad_norm": 2.8723487854003906,
"learning_rate": 3.145212077467118e-05,
"loss": 0.2261,
"step": 1133
},
{
"epoch": 0.9806860766791583,
"grad_norm": 4.65241813659668,
"learning_rate": 3.1303720489998326e-05,
"loss": 0.1636,
"step": 1134
},
{
"epoch": 0.9815508792159123,
"grad_norm": 6.660006999969482,
"learning_rate": 3.1155511426862654e-05,
"loss": 0.2711,
"step": 1135
},
{
"epoch": 0.9824156817526665,
"grad_norm": 4.477895259857178,
"learning_rate": 3.100749510111471e-05,
"loss": 0.3117,
"step": 1136
},
{
"epoch": 0.9832804842894206,
"grad_norm": 13.759649276733398,
"learning_rate": 3.085967302663375e-05,
"loss": 0.8633,
"step": 1137
},
{
"epoch": 0.9841452868261746,
"grad_norm": 5.8578948974609375,
"learning_rate": 3.071204671531221e-05,
"loss": 0.3619,
"step": 1138
},
{
"epoch": 0.9850100893629288,
"grad_norm": 2.4084582328796387,
"learning_rate": 3.056461767704037e-05,
"loss": 0.1079,
"step": 1139
},
{
"epoch": 0.9858748918996829,
"grad_norm": 7.170529842376709,
"learning_rate": 3.041738741969078e-05,
"loss": 0.4303,
"step": 1140
},
{
"epoch": 0.986739694436437,
"grad_norm": 4.021960735321045,
"learning_rate": 3.027035744910298e-05,
"loss": 0.1799,
"step": 1141
},
{
"epoch": 0.9876044969731911,
"grad_norm": 4.080975532531738,
"learning_rate": 3.012352926906794e-05,
"loss": 0.3902,
"step": 1142
},
{
"epoch": 0.9884692995099452,
"grad_norm": 2.9526562690734863,
"learning_rate": 2.9976904381312835e-05,
"loss": 0.1557,
"step": 1143
},
{
"epoch": 0.9893341020466994,
"grad_norm": 5.068524360656738,
"learning_rate": 2.9830484285485544e-05,
"loss": 0.1057,
"step": 1144
},
{
"epoch": 0.9901989045834535,
"grad_norm": 4.044787883758545,
"learning_rate": 2.968427047913942e-05,
"loss": 0.1142,
"step": 1145
}
],
"logging_steps": 1,
"max_steps": 3468,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 1145,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}