|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9901989045834535, |
|
"eval_steps": 115, |
|
"global_step": 1145, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0008648025367541078, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.0, |
|
"loss": 8.1889, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0017296050735082155, |
|
"grad_norm": 25.332334518432617, |
|
"learning_rate": 0.0, |
|
"loss": 9.3962, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0025944076102623233, |
|
"grad_norm": 21.9357967376709, |
|
"learning_rate": 1.9193857965451055e-07, |
|
"loss": 9.4132, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.003459210147016431, |
|
"grad_norm": 26.18132781982422, |
|
"learning_rate": 3.838771593090211e-07, |
|
"loss": 9.1807, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.004324012683770539, |
|
"grad_norm": 24.46787452697754, |
|
"learning_rate": 5.758157389635317e-07, |
|
"loss": 8.1559, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0051888152205246466, |
|
"grad_norm": 25.57164764404297, |
|
"learning_rate": 7.677543186180422e-07, |
|
"loss": 9.9007, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.006053617757278754, |
|
"grad_norm": 19.92409324645996, |
|
"learning_rate": 9.596928982725527e-07, |
|
"loss": 9.634, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.006918420294032862, |
|
"grad_norm": 23.451889038085938, |
|
"learning_rate": 1.1516314779270634e-06, |
|
"loss": 10.8722, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.007783222830786971, |
|
"grad_norm": 22.753061294555664, |
|
"learning_rate": 1.343570057581574e-06, |
|
"loss": 8.6969, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.008648025367541078, |
|
"grad_norm": 23.012781143188477, |
|
"learning_rate": 1.5355086372360844e-06, |
|
"loss": 8.9552, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.009512827904295185, |
|
"grad_norm": 20.173051834106445, |
|
"learning_rate": 1.7274472168905951e-06, |
|
"loss": 7.5245, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.010377630441049293, |
|
"grad_norm": 24.979217529296875, |
|
"learning_rate": 1.9193857965451054e-06, |
|
"loss": 9.1573, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.011242432977803401, |
|
"grad_norm": 24.23455810546875, |
|
"learning_rate": 2.1113243761996164e-06, |
|
"loss": 9.2615, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.012107235514557509, |
|
"grad_norm": 25.147851943969727, |
|
"learning_rate": 2.303262955854127e-06, |
|
"loss": 9.1465, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.012972038051311616, |
|
"grad_norm": 21.937841415405273, |
|
"learning_rate": 2.4952015355086374e-06, |
|
"loss": 9.3845, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.013836840588065724, |
|
"grad_norm": 24.25821304321289, |
|
"learning_rate": 2.687140115163148e-06, |
|
"loss": 9.3638, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.014701643124819834, |
|
"grad_norm": 22.018434524536133, |
|
"learning_rate": 2.879078694817659e-06, |
|
"loss": 9.3365, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.015566445661573941, |
|
"grad_norm": 19.021236419677734, |
|
"learning_rate": 3.071017274472169e-06, |
|
"loss": 8.267, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.016431248198328047, |
|
"grad_norm": 24.68037986755371, |
|
"learning_rate": 3.2629558541266794e-06, |
|
"loss": 9.8727, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.017296050735082155, |
|
"grad_norm": 18.95473861694336, |
|
"learning_rate": 3.4548944337811903e-06, |
|
"loss": 9.6234, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.018160853271836263, |
|
"grad_norm": 20.910001754760742, |
|
"learning_rate": 3.646833013435701e-06, |
|
"loss": 9.0452, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.01902565580859037, |
|
"grad_norm": 23.27020835876465, |
|
"learning_rate": 3.838771593090211e-06, |
|
"loss": 8.5295, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.01989045834534448, |
|
"grad_norm": 20.173105239868164, |
|
"learning_rate": 4.030710172744722e-06, |
|
"loss": 7.8237, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.020755260882098586, |
|
"grad_norm": 21.863664627075195, |
|
"learning_rate": 4.222648752399233e-06, |
|
"loss": 8.5162, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.021620063418852694, |
|
"grad_norm": 19.267335891723633, |
|
"learning_rate": 4.414587332053743e-06, |
|
"loss": 8.312, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.022484865955606802, |
|
"grad_norm": 18.204317092895508, |
|
"learning_rate": 4.606525911708254e-06, |
|
"loss": 9.2816, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.02334966849236091, |
|
"grad_norm": 18.67437744140625, |
|
"learning_rate": 4.798464491362764e-06, |
|
"loss": 7.9128, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.024214471029115017, |
|
"grad_norm": 18.21224594116211, |
|
"learning_rate": 4.990403071017275e-06, |
|
"loss": 7.6773, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.025079273565869125, |
|
"grad_norm": 18.150562286376953, |
|
"learning_rate": 5.182341650671786e-06, |
|
"loss": 7.4793, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.025944076102623233, |
|
"grad_norm": 17.31494140625, |
|
"learning_rate": 5.374280230326296e-06, |
|
"loss": 8.2437, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02680887863937734, |
|
"grad_norm": 16.86028289794922, |
|
"learning_rate": 5.566218809980806e-06, |
|
"loss": 8.6053, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.02767368117613145, |
|
"grad_norm": 15.361474990844727, |
|
"learning_rate": 5.758157389635318e-06, |
|
"loss": 7.4762, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.02853848371288556, |
|
"grad_norm": 18.730810165405273, |
|
"learning_rate": 5.950095969289828e-06, |
|
"loss": 10.5289, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.029403286249639667, |
|
"grad_norm": 15.356877326965332, |
|
"learning_rate": 6.142034548944338e-06, |
|
"loss": 7.8911, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.030268088786393775, |
|
"grad_norm": 17.622791290283203, |
|
"learning_rate": 6.333973128598848e-06, |
|
"loss": 7.9708, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.031132891323147883, |
|
"grad_norm": 17.50615882873535, |
|
"learning_rate": 6.525911708253359e-06, |
|
"loss": 7.2581, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.03199769385990199, |
|
"grad_norm": 16.068561553955078, |
|
"learning_rate": 6.7178502879078705e-06, |
|
"loss": 8.6747, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.032862496396656095, |
|
"grad_norm": 13.518677711486816, |
|
"learning_rate": 6.909788867562381e-06, |
|
"loss": 7.308, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.033727298933410206, |
|
"grad_norm": 18.713558197021484, |
|
"learning_rate": 7.101727447216891e-06, |
|
"loss": 8.6224, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.03459210147016431, |
|
"grad_norm": 20.201255798339844, |
|
"learning_rate": 7.293666026871402e-06, |
|
"loss": 12.3613, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03545690400691842, |
|
"grad_norm": 13.44450855255127, |
|
"learning_rate": 7.485604606525912e-06, |
|
"loss": 7.2108, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.036321706543672526, |
|
"grad_norm": 15.1000394821167, |
|
"learning_rate": 7.677543186180422e-06, |
|
"loss": 7.831, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.03718650908042664, |
|
"grad_norm": 14.707894325256348, |
|
"learning_rate": 7.869481765834934e-06, |
|
"loss": 6.9282, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.03805131161718074, |
|
"grad_norm": 13.331870079040527, |
|
"learning_rate": 8.061420345489444e-06, |
|
"loss": 6.9092, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.03891611415393485, |
|
"grad_norm": 14.49152660369873, |
|
"learning_rate": 8.253358925143954e-06, |
|
"loss": 8.9053, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.03978091669068896, |
|
"grad_norm": 13.79437255859375, |
|
"learning_rate": 8.445297504798465e-06, |
|
"loss": 7.5276, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.04064571922744307, |
|
"grad_norm": 15.470795631408691, |
|
"learning_rate": 8.637236084452976e-06, |
|
"loss": 7.4793, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.04151052176419717, |
|
"grad_norm": 13.469670295715332, |
|
"learning_rate": 8.829174664107486e-06, |
|
"loss": 7.4401, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.042375324300951284, |
|
"grad_norm": 12.38973617553711, |
|
"learning_rate": 9.021113243761997e-06, |
|
"loss": 6.6742, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.04324012683770539, |
|
"grad_norm": 14.353404998779297, |
|
"learning_rate": 9.213051823416507e-06, |
|
"loss": 8.89, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0441049293744595, |
|
"grad_norm": 12.149626731872559, |
|
"learning_rate": 9.404990403071018e-06, |
|
"loss": 8.6311, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.044969731911213603, |
|
"grad_norm": 12.504135131835938, |
|
"learning_rate": 9.596928982725528e-06, |
|
"loss": 6.9648, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.045834534447967715, |
|
"grad_norm": 12.439926147460938, |
|
"learning_rate": 9.78886756238004e-06, |
|
"loss": 7.0633, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.04669933698472182, |
|
"grad_norm": 13.445518493652344, |
|
"learning_rate": 9.98080614203455e-06, |
|
"loss": 8.1331, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.04756413952147593, |
|
"grad_norm": 12.668989181518555, |
|
"learning_rate": 1.0172744721689061e-05, |
|
"loss": 8.4931, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.048428942058230035, |
|
"grad_norm": 11.86841869354248, |
|
"learning_rate": 1.0364683301343571e-05, |
|
"loss": 6.9534, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.049293744594984146, |
|
"grad_norm": 12.336670875549316, |
|
"learning_rate": 1.0556621880998081e-05, |
|
"loss": 6.9585, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.05015854713173825, |
|
"grad_norm": 12.496221542358398, |
|
"learning_rate": 1.0748560460652591e-05, |
|
"loss": 7.6699, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.05102334966849236, |
|
"grad_norm": 11.765594482421875, |
|
"learning_rate": 1.0940499040307102e-05, |
|
"loss": 6.5076, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.051888152205246466, |
|
"grad_norm": 13.426615715026855, |
|
"learning_rate": 1.1132437619961612e-05, |
|
"loss": 9.5443, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05275295474200058, |
|
"grad_norm": 12.127195358276367, |
|
"learning_rate": 1.1324376199616123e-05, |
|
"loss": 6.7481, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.05361775727875468, |
|
"grad_norm": 10.69729232788086, |
|
"learning_rate": 1.1516314779270635e-05, |
|
"loss": 6.4521, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.05448255981550879, |
|
"grad_norm": 12.042082786560059, |
|
"learning_rate": 1.1708253358925145e-05, |
|
"loss": 8.1839, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.0553473623522629, |
|
"grad_norm": 13.164307594299316, |
|
"learning_rate": 1.1900191938579655e-05, |
|
"loss": 7.1924, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.05621216488901701, |
|
"grad_norm": 10.799245834350586, |
|
"learning_rate": 1.2092130518234165e-05, |
|
"loss": 7.5767, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.05707696742577112, |
|
"grad_norm": 10.165273666381836, |
|
"learning_rate": 1.2284069097888675e-05, |
|
"loss": 7.2645, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.05794176996252522, |
|
"grad_norm": 12.342886924743652, |
|
"learning_rate": 1.2476007677543186e-05, |
|
"loss": 6.175, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.058806572499279335, |
|
"grad_norm": 10.652329444885254, |
|
"learning_rate": 1.2667946257197696e-05, |
|
"loss": 6.5491, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.05967137503603344, |
|
"grad_norm": 10.688251495361328, |
|
"learning_rate": 1.2859884836852207e-05, |
|
"loss": 6.7543, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.06053617757278755, |
|
"grad_norm": 11.341581344604492, |
|
"learning_rate": 1.3051823416506717e-05, |
|
"loss": 6.98, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.061400980109541654, |
|
"grad_norm": 10.539051055908203, |
|
"learning_rate": 1.3243761996161231e-05, |
|
"loss": 6.76, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.062265782646295766, |
|
"grad_norm": 10.746752738952637, |
|
"learning_rate": 1.3435700575815741e-05, |
|
"loss": 7.2167, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.06313058518304987, |
|
"grad_norm": 12.96174144744873, |
|
"learning_rate": 1.3627639155470251e-05, |
|
"loss": 9.219, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.06399538771980398, |
|
"grad_norm": 10.668299674987793, |
|
"learning_rate": 1.3819577735124761e-05, |
|
"loss": 7.3113, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.06486019025655809, |
|
"grad_norm": 10.878615379333496, |
|
"learning_rate": 1.4011516314779271e-05, |
|
"loss": 6.4098, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.06572499279331219, |
|
"grad_norm": 12.29603099822998, |
|
"learning_rate": 1.4203454894433781e-05, |
|
"loss": 8.3399, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.0665897953300663, |
|
"grad_norm": 13.01440143585205, |
|
"learning_rate": 1.4395393474088293e-05, |
|
"loss": 8.6991, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.06745459786682041, |
|
"grad_norm": 10.999458312988281, |
|
"learning_rate": 1.4587332053742803e-05, |
|
"loss": 9.5087, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.06831940040357452, |
|
"grad_norm": 11.303417205810547, |
|
"learning_rate": 1.4779270633397313e-05, |
|
"loss": 7.3491, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.06918420294032862, |
|
"grad_norm": 10.507055282592773, |
|
"learning_rate": 1.4971209213051823e-05, |
|
"loss": 6.8214, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07004900547708273, |
|
"grad_norm": 11.467567443847656, |
|
"learning_rate": 1.5163147792706333e-05, |
|
"loss": 6.5489, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.07091380801383684, |
|
"grad_norm": 10.555798530578613, |
|
"learning_rate": 1.5355086372360844e-05, |
|
"loss": 6.7692, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.07177861055059095, |
|
"grad_norm": 12.266429901123047, |
|
"learning_rate": 1.5547024952015357e-05, |
|
"loss": 8.8059, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.07264341308734505, |
|
"grad_norm": 9.898346900939941, |
|
"learning_rate": 1.5738963531669867e-05, |
|
"loss": 6.4811, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.07350821562409916, |
|
"grad_norm": 11.04404067993164, |
|
"learning_rate": 1.5930902111324377e-05, |
|
"loss": 7.0495, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.07437301816085327, |
|
"grad_norm": 11.240497589111328, |
|
"learning_rate": 1.6122840690978887e-05, |
|
"loss": 5.8256, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.07523782069760739, |
|
"grad_norm": 10.409235000610352, |
|
"learning_rate": 1.6314779270633397e-05, |
|
"loss": 5.7203, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.07610262323436148, |
|
"grad_norm": 11.557363510131836, |
|
"learning_rate": 1.6506717850287907e-05, |
|
"loss": 6.5094, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.0769674257711156, |
|
"grad_norm": 9.760974884033203, |
|
"learning_rate": 1.669865642994242e-05, |
|
"loss": 5.7523, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.0778322283078697, |
|
"grad_norm": 9.31316089630127, |
|
"learning_rate": 1.689059500959693e-05, |
|
"loss": 6.0464, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07869703084462382, |
|
"grad_norm": 11.943814277648926, |
|
"learning_rate": 1.708253358925144e-05, |
|
"loss": 6.5233, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.07956183338137791, |
|
"grad_norm": 9.126127243041992, |
|
"learning_rate": 1.727447216890595e-05, |
|
"loss": 6.8966, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.08042663591813203, |
|
"grad_norm": 9.386579513549805, |
|
"learning_rate": 1.746641074856046e-05, |
|
"loss": 6.3621, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.08129143845488614, |
|
"grad_norm": 10.63054370880127, |
|
"learning_rate": 1.765834932821497e-05, |
|
"loss": 6.0194, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.08215624099164025, |
|
"grad_norm": 10.119132995605469, |
|
"learning_rate": 1.785028790786948e-05, |
|
"loss": 6.6797, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.08302104352839434, |
|
"grad_norm": 10.746257781982422, |
|
"learning_rate": 1.8042226487523995e-05, |
|
"loss": 5.6214, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.08388584606514846, |
|
"grad_norm": 10.64887809753418, |
|
"learning_rate": 1.8234165067178505e-05, |
|
"loss": 6.4946, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.08475064860190257, |
|
"grad_norm": 11.115398406982422, |
|
"learning_rate": 1.8426103646833015e-05, |
|
"loss": 5.9069, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.08561545113865668, |
|
"grad_norm": 11.452004432678223, |
|
"learning_rate": 1.8618042226487525e-05, |
|
"loss": 6.8848, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.08648025367541078, |
|
"grad_norm": 12.722066879272461, |
|
"learning_rate": 1.8809980806142035e-05, |
|
"loss": 7.7248, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08734505621216489, |
|
"grad_norm": 10.500570297241211, |
|
"learning_rate": 1.9001919385796545e-05, |
|
"loss": 6.9069, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.088209858748919, |
|
"grad_norm": 10.750312805175781, |
|
"learning_rate": 1.9193857965451055e-05, |
|
"loss": 6.3612, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.08907466128567311, |
|
"grad_norm": 12.96158218383789, |
|
"learning_rate": 1.9385796545105565e-05, |
|
"loss": 7.6664, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.08993946382242721, |
|
"grad_norm": 11.477307319641113, |
|
"learning_rate": 1.957773512476008e-05, |
|
"loss": 5.4654, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.09080426635918132, |
|
"grad_norm": 13.458792686462402, |
|
"learning_rate": 1.976967370441459e-05, |
|
"loss": 6.7583, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.09166906889593543, |
|
"grad_norm": 11.862403869628906, |
|
"learning_rate": 1.99616122840691e-05, |
|
"loss": 6.354, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.09253387143268954, |
|
"grad_norm": 15.43807601928711, |
|
"learning_rate": 2.015355086372361e-05, |
|
"loss": 5.0476, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.09339867396944364, |
|
"grad_norm": 15.703176498413086, |
|
"learning_rate": 2.0345489443378122e-05, |
|
"loss": 5.535, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.09426347650619775, |
|
"grad_norm": 15.830728530883789, |
|
"learning_rate": 2.0537428023032633e-05, |
|
"loss": 5.125, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.09512827904295186, |
|
"grad_norm": 18.535364151000977, |
|
"learning_rate": 2.0729366602687143e-05, |
|
"loss": 5.3941, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.09599308157970597, |
|
"grad_norm": 20.664087295532227, |
|
"learning_rate": 2.0921305182341653e-05, |
|
"loss": 7.6313, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.09685788411646007, |
|
"grad_norm": 26.702512741088867, |
|
"learning_rate": 2.1113243761996163e-05, |
|
"loss": 5.584, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.09772268665321418, |
|
"grad_norm": 24.893169403076172, |
|
"learning_rate": 2.1305182341650673e-05, |
|
"loss": 6.7148, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.09858748918996829, |
|
"grad_norm": 23.61020278930664, |
|
"learning_rate": 2.1497120921305183e-05, |
|
"loss": 4.3739, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.0994522917267224, |
|
"grad_norm": 30.567276000976562, |
|
"learning_rate": 2.1689059500959693e-05, |
|
"loss": 7.8202, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.0994522917267224, |
|
"eval_Qnli-dev-1024_cosine_accuracy": 0.6979166666666666, |
|
"eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.9675614833831787, |
|
"eval_Qnli-dev-1024_cosine_ap": 0.688956658941829, |
|
"eval_Qnli-dev-1024_cosine_f1": 0.6881720430107527, |
|
"eval_Qnli-dev-1024_cosine_f1_threshold": 0.9675614833831787, |
|
"eval_Qnli-dev-1024_cosine_mcc": 0.3966087176872613, |
|
"eval_Qnli-dev-1024_cosine_precision": 0.6666666666666666, |
|
"eval_Qnli-dev-1024_cosine_recall": 0.7111111111111111, |
|
"eval_Qnli-dev_cosine_accuracy": 0.7395833333333334, |
|
"eval_Qnli-dev_cosine_accuracy_threshold": 0.8765031099319458, |
|
"eval_Qnli-dev_cosine_ap": 0.760920950345153, |
|
"eval_Qnli-dev_cosine_f1": 0.7272727272727272, |
|
"eval_Qnli-dev_cosine_f1_threshold": 0.8635396957397461, |
|
"eval_Qnli-dev_cosine_mcc": 0.4497120149145933, |
|
"eval_Qnli-dev_cosine_precision": 0.6666666666666666, |
|
"eval_Qnli-dev_cosine_recall": 0.8, |
|
"eval_allNLI--triplets-1024_cosine_accuracy": 0.8020833134651184, |
|
"eval_allNLI-triplets_cosine_accuracy": 0.9791666865348816, |
|
"eval_global_dataset_loss": 1.940317153930664, |
|
"eval_global_dataset_runtime": 67.8871, |
|
"eval_global_dataset_samples_per_second": 14.318, |
|
"eval_global_dataset_steps_per_second": 0.309, |
|
"eval_sequential_score": 0.8020833134651184, |
|
"eval_sts-test-1024_pearson_cosine": 0.6710405361187501, |
|
"eval_sts-test-1024_spearman_cosine": 0.8309834676298848, |
|
"eval_sts-test_pearson_cosine": 0.9054066453363472, |
|
"eval_sts-test_spearman_cosine": 0.9155824166550393, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.1003170942634765, |
|
"grad_norm": 31.343202590942383, |
|
"learning_rate": 2.1880998080614203e-05, |
|
"loss": 4.42, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.10118189680023061, |
|
"grad_norm": 28.399757385253906, |
|
"learning_rate": 2.2072936660268713e-05, |
|
"loss": 4.5047, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.10204669933698472, |
|
"grad_norm": 32.25544357299805, |
|
"learning_rate": 2.2264875239923223e-05, |
|
"loss": 4.427, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.10291150187373883, |
|
"grad_norm": 27.07774543762207, |
|
"learning_rate": 2.2456813819577733e-05, |
|
"loss": 3.1201, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.10377630441049293, |
|
"grad_norm": 31.4462833404541, |
|
"learning_rate": 2.2648752399232247e-05, |
|
"loss": 4.3632, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.10464110694724704, |
|
"grad_norm": 27.67288589477539, |
|
"learning_rate": 2.2840690978886757e-05, |
|
"loss": 3.5101, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.10550590948400115, |
|
"grad_norm": 29.23362922668457, |
|
"learning_rate": 2.303262955854127e-05, |
|
"loss": 4.7499, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.10637071202075526, |
|
"grad_norm": 27.85274887084961, |
|
"learning_rate": 2.322456813819578e-05, |
|
"loss": 4.5242, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.10723551455750936, |
|
"grad_norm": 21.893939971923828, |
|
"learning_rate": 2.341650671785029e-05, |
|
"loss": 3.379, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.10810031709426347, |
|
"grad_norm": 18.63385772705078, |
|
"learning_rate": 2.36084452975048e-05, |
|
"loss": 2.8004, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.10896511963101758, |
|
"grad_norm": 16.17616844177246, |
|
"learning_rate": 2.380038387715931e-05, |
|
"loss": 2.8855, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.1098299221677717, |
|
"grad_norm": 17.123281478881836, |
|
"learning_rate": 2.399232245681382e-05, |
|
"loss": 3.937, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.1106947247045258, |
|
"grad_norm": 14.539612770080566, |
|
"learning_rate": 2.418426103646833e-05, |
|
"loss": 3.5914, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.1115595272412799, |
|
"grad_norm": 12.644956588745117, |
|
"learning_rate": 2.437619961612284e-05, |
|
"loss": 2.6459, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.11242432977803402, |
|
"grad_norm": 10.95170783996582, |
|
"learning_rate": 2.456813819577735e-05, |
|
"loss": 2.3887, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.11328913231478813, |
|
"grad_norm": 12.561387062072754, |
|
"learning_rate": 2.476007677543186e-05, |
|
"loss": 4.1043, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.11415393485154224, |
|
"grad_norm": 9.273588180541992, |
|
"learning_rate": 2.495201535508637e-05, |
|
"loss": 2.2758, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.11501873738829634, |
|
"grad_norm": 9.219544410705566, |
|
"learning_rate": 2.514395393474088e-05, |
|
"loss": 2.859, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.11588353992505045, |
|
"grad_norm": 8.443903923034668, |
|
"learning_rate": 2.533589251439539e-05, |
|
"loss": 2.0162, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.11674834246180456, |
|
"grad_norm": 9.522578239440918, |
|
"learning_rate": 2.5527831094049905e-05, |
|
"loss": 2.7069, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.11761314499855867, |
|
"grad_norm": 8.184837341308594, |
|
"learning_rate": 2.5719769673704415e-05, |
|
"loss": 1.9536, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.11847794753531277, |
|
"grad_norm": 9.079197883605957, |
|
"learning_rate": 2.5911708253358925e-05, |
|
"loss": 2.3063, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.11934275007206688, |
|
"grad_norm": 9.438823699951172, |
|
"learning_rate": 2.6103646833013435e-05, |
|
"loss": 3.3783, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.12020755260882099, |
|
"grad_norm": 8.003981590270996, |
|
"learning_rate": 2.6295585412667952e-05, |
|
"loss": 1.9538, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.1210723551455751, |
|
"grad_norm": 8.199268341064453, |
|
"learning_rate": 2.6487523992322462e-05, |
|
"loss": 2.8959, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1219371576823292, |
|
"grad_norm": 9.071074485778809, |
|
"learning_rate": 2.6679462571976972e-05, |
|
"loss": 2.3064, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.12280196021908331, |
|
"grad_norm": 10.237217903137207, |
|
"learning_rate": 2.6871401151631482e-05, |
|
"loss": 2.4625, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.12366676275583742, |
|
"grad_norm": 7.96627950668335, |
|
"learning_rate": 2.7063339731285992e-05, |
|
"loss": 2.4083, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.12453156529259153, |
|
"grad_norm": 8.751070022583008, |
|
"learning_rate": 2.7255278310940502e-05, |
|
"loss": 1.5914, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.12539636782934563, |
|
"grad_norm": 6.843534469604492, |
|
"learning_rate": 2.7447216890595012e-05, |
|
"loss": 1.5798, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.12626117036609974, |
|
"grad_norm": 7.700779438018799, |
|
"learning_rate": 2.7639155470249522e-05, |
|
"loss": 1.5194, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.12712597290285385, |
|
"grad_norm": 8.954259872436523, |
|
"learning_rate": 2.7831094049904032e-05, |
|
"loss": 1.5924, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.12799077543960796, |
|
"grad_norm": 10.815597534179688, |
|
"learning_rate": 2.8023032629558543e-05, |
|
"loss": 3.1143, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.12885557797636207, |
|
"grad_norm": 9.539572715759277, |
|
"learning_rate": 2.8214971209213053e-05, |
|
"loss": 1.8632, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.12972038051311618, |
|
"grad_norm": 6.322872638702393, |
|
"learning_rate": 2.8406909788867563e-05, |
|
"loss": 2.0489, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.13058518304987027, |
|
"grad_norm": 6.538212776184082, |
|
"learning_rate": 2.8598848368522073e-05, |
|
"loss": 1.5573, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.13144998558662438, |
|
"grad_norm": 6.798872470855713, |
|
"learning_rate": 2.8790786948176586e-05, |
|
"loss": 2.8024, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.1323147881233785, |
|
"grad_norm": 8.393974304199219, |
|
"learning_rate": 2.8982725527831096e-05, |
|
"loss": 1.9423, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.1331795906601326, |
|
"grad_norm": 8.043729782104492, |
|
"learning_rate": 2.9174664107485606e-05, |
|
"loss": 3.1444, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.1340443931968867, |
|
"grad_norm": 9.158576965332031, |
|
"learning_rate": 2.9366602687140116e-05, |
|
"loss": 2.5482, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.13490919573364082, |
|
"grad_norm": 6.786825180053711, |
|
"learning_rate": 2.9558541266794627e-05, |
|
"loss": 1.0428, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.13577399827039494, |
|
"grad_norm": 12.157453536987305, |
|
"learning_rate": 2.9750479846449137e-05, |
|
"loss": 5.8267, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.13663880080714905, |
|
"grad_norm": 10.719176292419434, |
|
"learning_rate": 2.9942418426103647e-05, |
|
"loss": 1.9785, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.13750360334390313, |
|
"grad_norm": 8.25823974609375, |
|
"learning_rate": 3.0134357005758157e-05, |
|
"loss": 2.5306, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.13836840588065724, |
|
"grad_norm": 8.451217651367188, |
|
"learning_rate": 3.0326295585412667e-05, |
|
"loss": 1.8271, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.13923320841741135, |
|
"grad_norm": 9.387060165405273, |
|
"learning_rate": 3.051823416506718e-05, |
|
"loss": 2.6579, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.14009801095416546, |
|
"grad_norm": 8.968480110168457, |
|
"learning_rate": 3.071017274472169e-05, |
|
"loss": 3.0193, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.14096281349091958, |
|
"grad_norm": 8.816688537597656, |
|
"learning_rate": 3.09021113243762e-05, |
|
"loss": 1.5596, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.1418276160276737, |
|
"grad_norm": 5.402006149291992, |
|
"learning_rate": 3.1094049904030714e-05, |
|
"loss": 1.4505, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.1426924185644278, |
|
"grad_norm": 7.654393196105957, |
|
"learning_rate": 3.128598848368523e-05, |
|
"loss": 2.5331, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.1435572211011819, |
|
"grad_norm": 6.393066883087158, |
|
"learning_rate": 3.1477927063339734e-05, |
|
"loss": 1.384, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.144422023637936, |
|
"grad_norm": 8.975717544555664, |
|
"learning_rate": 3.166986564299425e-05, |
|
"loss": 3.3553, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.1452868261746901, |
|
"grad_norm": 8.812336921691895, |
|
"learning_rate": 3.1861804222648754e-05, |
|
"loss": 2.2541, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.14615162871144421, |
|
"grad_norm": 7.189652919769287, |
|
"learning_rate": 3.205374280230327e-05, |
|
"loss": 1.1827, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.14701643124819833, |
|
"grad_norm": 7.888529300689697, |
|
"learning_rate": 3.2245681381957774e-05, |
|
"loss": 1.3643, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.14788123378495244, |
|
"grad_norm": 6.611407279968262, |
|
"learning_rate": 3.243761996161229e-05, |
|
"loss": 1.9817, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.14874603632170655, |
|
"grad_norm": 6.734430313110352, |
|
"learning_rate": 3.2629558541266795e-05, |
|
"loss": 2.3332, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.14961083885846066, |
|
"grad_norm": 6.5995306968688965, |
|
"learning_rate": 3.282149712092131e-05, |
|
"loss": 1.4638, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.15047564139521477, |
|
"grad_norm": 7.57749605178833, |
|
"learning_rate": 3.3013435700575815e-05, |
|
"loss": 1.7929, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.15134044393196885, |
|
"grad_norm": 4.956903457641602, |
|
"learning_rate": 3.320537428023033e-05, |
|
"loss": 1.0457, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.15220524646872297, |
|
"grad_norm": 9.929686546325684, |
|
"learning_rate": 3.339731285988484e-05, |
|
"loss": 1.4866, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.15307004900547708, |
|
"grad_norm": 7.194726467132568, |
|
"learning_rate": 3.358925143953935e-05, |
|
"loss": 1.7834, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.1539348515422312, |
|
"grad_norm": 6.916417598724365, |
|
"learning_rate": 3.378119001919386e-05, |
|
"loss": 1.1396, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.1547996540789853, |
|
"grad_norm": 9.47856330871582, |
|
"learning_rate": 3.397312859884837e-05, |
|
"loss": 1.9811, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.1556644566157394, |
|
"grad_norm": 7.894885540008545, |
|
"learning_rate": 3.416506717850288e-05, |
|
"loss": 1.1859, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.15652925915249352, |
|
"grad_norm": 7.631194114685059, |
|
"learning_rate": 3.435700575815739e-05, |
|
"loss": 1.5481, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.15739406168924763, |
|
"grad_norm": 5.6157073974609375, |
|
"learning_rate": 3.45489443378119e-05, |
|
"loss": 1.5954, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.15825886422600172, |
|
"grad_norm": 9.201720237731934, |
|
"learning_rate": 3.474088291746641e-05, |
|
"loss": 2.2163, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.15912366676275583, |
|
"grad_norm": 5.702026844024658, |
|
"learning_rate": 3.493282149712092e-05, |
|
"loss": 1.475, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.15998846929950994, |
|
"grad_norm": 5.93116569519043, |
|
"learning_rate": 3.512476007677543e-05, |
|
"loss": 1.2394, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.16085327183626405, |
|
"grad_norm": 3.9884233474731445, |
|
"learning_rate": 3.531669865642994e-05, |
|
"loss": 1.2713, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.16171807437301816, |
|
"grad_norm": 7.569946765899658, |
|
"learning_rate": 3.550863723608445e-05, |
|
"loss": 1.435, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.16258287690977227, |
|
"grad_norm": 7.594637393951416, |
|
"learning_rate": 3.570057581573896e-05, |
|
"loss": 1.1762, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.16344767944652638, |
|
"grad_norm": 7.092876434326172, |
|
"learning_rate": 3.5892514395393476e-05, |
|
"loss": 2.3349, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.1643124819832805, |
|
"grad_norm": 6.997330188751221, |
|
"learning_rate": 3.608445297504799e-05, |
|
"loss": 1.1459, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.16517728452003458, |
|
"grad_norm": 9.205595016479492, |
|
"learning_rate": 3.6276391554702496e-05, |
|
"loss": 1.313, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.1660420870567887, |
|
"grad_norm": 6.776134014129639, |
|
"learning_rate": 3.646833013435701e-05, |
|
"loss": 1.1422, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.1669068895935428, |
|
"grad_norm": 9.902478218078613, |
|
"learning_rate": 3.6660268714011516e-05, |
|
"loss": 1.4937, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.1677716921302969, |
|
"grad_norm": 8.630653381347656, |
|
"learning_rate": 3.685220729366603e-05, |
|
"loss": 1.351, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.16863649466705102, |
|
"grad_norm": 8.957950592041016, |
|
"learning_rate": 3.704414587332054e-05, |
|
"loss": 1.1581, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.16950129720380513, |
|
"grad_norm": 8.303983688354492, |
|
"learning_rate": 3.723608445297505e-05, |
|
"loss": 2.1473, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.17036609974055925, |
|
"grad_norm": 8.272674560546875, |
|
"learning_rate": 3.7428023032629563e-05, |
|
"loss": 0.8801, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.17123090227731336, |
|
"grad_norm": 7.904557228088379, |
|
"learning_rate": 3.761996161228407e-05, |
|
"loss": 1.3985, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.17209570481406747, |
|
"grad_norm": 5.652804851531982, |
|
"learning_rate": 3.7811900191938584e-05, |
|
"loss": 0.8468, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.17296050735082155, |
|
"grad_norm": 5.771730422973633, |
|
"learning_rate": 3.800383877159309e-05, |
|
"loss": 1.0563, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.17382530988757566, |
|
"grad_norm": 6.634278297424316, |
|
"learning_rate": 3.8195777351247604e-05, |
|
"loss": 0.9612, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.17469011242432977, |
|
"grad_norm": 8.659712791442871, |
|
"learning_rate": 3.838771593090211e-05, |
|
"loss": 1.665, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.17555491496108389, |
|
"grad_norm": 6.617002487182617, |
|
"learning_rate": 3.8579654510556624e-05, |
|
"loss": 1.1505, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.176419717497838, |
|
"grad_norm": 10.3783597946167, |
|
"learning_rate": 3.877159309021113e-05, |
|
"loss": 1.7958, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.1772845200345921, |
|
"grad_norm": 9.473942756652832, |
|
"learning_rate": 3.8963531669865644e-05, |
|
"loss": 1.3115, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.17814932257134622, |
|
"grad_norm": 7.500204563140869, |
|
"learning_rate": 3.915547024952016e-05, |
|
"loss": 1.0855, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.17901412510810033, |
|
"grad_norm": 6.897130012512207, |
|
"learning_rate": 3.9347408829174664e-05, |
|
"loss": 1.1051, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.17987892764485441, |
|
"grad_norm": 9.034842491149902, |
|
"learning_rate": 3.953934740882918e-05, |
|
"loss": 2.5371, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.18074373018160853, |
|
"grad_norm": 9.812570571899414, |
|
"learning_rate": 3.9731285988483684e-05, |
|
"loss": 1.7992, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.18160853271836264, |
|
"grad_norm": 7.528004169464111, |
|
"learning_rate": 3.99232245681382e-05, |
|
"loss": 1.7798, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.18247333525511675, |
|
"grad_norm": 7.52139139175415, |
|
"learning_rate": 4.0115163147792705e-05, |
|
"loss": 0.7093, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.18333813779187086, |
|
"grad_norm": 9.2921142578125, |
|
"learning_rate": 4.030710172744722e-05, |
|
"loss": 1.2681, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.18420294032862497, |
|
"grad_norm": 4.883711814880371, |
|
"learning_rate": 4.049904030710173e-05, |
|
"loss": 0.911, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.18506774286537908, |
|
"grad_norm": 8.103593826293945, |
|
"learning_rate": 4.0690978886756245e-05, |
|
"loss": 1.1144, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.1859325454021332, |
|
"grad_norm": 6.5846381187438965, |
|
"learning_rate": 4.088291746641075e-05, |
|
"loss": 0.8362, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.18679734793888728, |
|
"grad_norm": 5.238864421844482, |
|
"learning_rate": 4.1074856046065265e-05, |
|
"loss": 0.838, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.1876621504756414, |
|
"grad_norm": 7.091164588928223, |
|
"learning_rate": 4.126679462571977e-05, |
|
"loss": 1.3143, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.1885269530123955, |
|
"grad_norm": 4.529580116271973, |
|
"learning_rate": 4.1458733205374285e-05, |
|
"loss": 0.8799, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.1893917555491496, |
|
"grad_norm": 5.912927627563477, |
|
"learning_rate": 4.165067178502879e-05, |
|
"loss": 0.7928, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.19025655808590372, |
|
"grad_norm": 7.802720069885254, |
|
"learning_rate": 4.1842610364683305e-05, |
|
"loss": 0.7077, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.19112136062265783, |
|
"grad_norm": 7.49670934677124, |
|
"learning_rate": 4.203454894433781e-05, |
|
"loss": 1.7815, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.19198616315941194, |
|
"grad_norm": 5.978695392608643, |
|
"learning_rate": 4.2226487523992326e-05, |
|
"loss": 1.3599, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.19285096569616605, |
|
"grad_norm": 8.289727210998535, |
|
"learning_rate": 4.241842610364683e-05, |
|
"loss": 0.7413, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.19371576823292014, |
|
"grad_norm": 7.663917541503906, |
|
"learning_rate": 4.2610364683301346e-05, |
|
"loss": 1.9959, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.19458057076967425, |
|
"grad_norm": 9.845619201660156, |
|
"learning_rate": 4.280230326295586e-05, |
|
"loss": 1.9112, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.19544537330642836, |
|
"grad_norm": 5.703056812286377, |
|
"learning_rate": 4.2994241842610366e-05, |
|
"loss": 0.5033, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.19631017584318247, |
|
"grad_norm": 9.209814071655273, |
|
"learning_rate": 4.318618042226488e-05, |
|
"loss": 1.1669, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.19717497837993658, |
|
"grad_norm": 8.577181816101074, |
|
"learning_rate": 4.3378119001919386e-05, |
|
"loss": 1.2109, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.1980397809166907, |
|
"grad_norm": 7.078784942626953, |
|
"learning_rate": 4.35700575815739e-05, |
|
"loss": 0.781, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.1989045834534448, |
|
"grad_norm": 9.162598609924316, |
|
"learning_rate": 4.3761996161228406e-05, |
|
"loss": 1.5895, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.1989045834534448, |
|
"eval_Qnli-dev-1024_cosine_accuracy": 0.7395833333333334, |
|
"eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.867725133895874, |
|
"eval_Qnli-dev-1024_cosine_ap": 0.713229712410124, |
|
"eval_Qnli-dev-1024_cosine_f1": 0.7291666666666667, |
|
"eval_Qnli-dev-1024_cosine_f1_threshold": 0.8537728786468506, |
|
"eval_Qnli-dev-1024_cosine_mcc": 0.46405228758169936, |
|
"eval_Qnli-dev-1024_cosine_precision": 0.6862745098039216, |
|
"eval_Qnli-dev-1024_cosine_recall": 0.7777777777777778, |
|
"eval_Qnli-dev_cosine_accuracy": 0.7291666666666666, |
|
"eval_Qnli-dev_cosine_accuracy_threshold": 0.8137844204902649, |
|
"eval_Qnli-dev_cosine_ap": 0.7513782450871136, |
|
"eval_Qnli-dev_cosine_f1": 0.7222222222222222, |
|
"eval_Qnli-dev_cosine_f1_threshold": 0.7686975002288818, |
|
"eval_Qnli-dev_cosine_mcc": 0.41614558708189836, |
|
"eval_Qnli-dev_cosine_precision": 0.6190476190476191, |
|
"eval_Qnli-dev_cosine_recall": 0.8666666666666667, |
|
"eval_allNLI--triplets-1024_cosine_accuracy": 0.9270833134651184, |
|
"eval_allNLI-triplets_cosine_accuracy": 0.9791666865348816, |
|
"eval_global_dataset_loss": 0.7282267808914185, |
|
"eval_global_dataset_runtime": 67.7277, |
|
"eval_global_dataset_samples_per_second": 14.352, |
|
"eval_global_dataset_steps_per_second": 0.31, |
|
"eval_sequential_score": 0.9270833134651184, |
|
"eval_sts-test-1024_pearson_cosine": 0.8484219629681994, |
|
"eval_sts-test-1024_spearman_cosine": 0.8984444397927454, |
|
"eval_sts-test_pearson_cosine": 0.9066337545995211, |
|
"eval_sts-test_spearman_cosine": 0.9170443296862147, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.19976938599019892, |
|
"grad_norm": 6.589021682739258, |
|
"learning_rate": 4.395393474088292e-05, |
|
"loss": 1.2439, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.200634188526953, |
|
"grad_norm": 4.9722089767456055, |
|
"learning_rate": 4.4145873320537426e-05, |
|
"loss": 0.5947, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.2014989910637071, |
|
"grad_norm": 6.424257278442383, |
|
"learning_rate": 4.433781190019194e-05, |
|
"loss": 1.1687, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.20236379360046122, |
|
"grad_norm": 10.21776008605957, |
|
"learning_rate": 4.4529750479846447e-05, |
|
"loss": 2.082, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.20322859613721533, |
|
"grad_norm": 6.3251633644104, |
|
"learning_rate": 4.472168905950096e-05, |
|
"loss": 0.521, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.20409339867396945, |
|
"grad_norm": 6.459076881408691, |
|
"learning_rate": 4.491362763915547e-05, |
|
"loss": 1.2406, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.20495820121072356, |
|
"grad_norm": 6.254432201385498, |
|
"learning_rate": 4.510556621880998e-05, |
|
"loss": 0.6586, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.20582300374747767, |
|
"grad_norm": 6.352238655090332, |
|
"learning_rate": 4.5297504798464494e-05, |
|
"loss": 0.6746, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.20668780628423178, |
|
"grad_norm": 4.247053146362305, |
|
"learning_rate": 4.548944337811901e-05, |
|
"loss": 0.3925, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.20755260882098586, |
|
"grad_norm": 6.61681604385376, |
|
"learning_rate": 4.5681381957773514e-05, |
|
"loss": 0.8654, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.20841741135773997, |
|
"grad_norm": 7.9061408042907715, |
|
"learning_rate": 4.587332053742803e-05, |
|
"loss": 0.6723, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.20928221389449408, |
|
"grad_norm": 3.9183671474456787, |
|
"learning_rate": 4.606525911708254e-05, |
|
"loss": 0.4345, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.2101470164312482, |
|
"grad_norm": 8.863993644714355, |
|
"learning_rate": 4.625719769673705e-05, |
|
"loss": 1.0822, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.2110118189680023, |
|
"grad_norm": 8.070558547973633, |
|
"learning_rate": 4.644913627639156e-05, |
|
"loss": 1.5697, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.21187662150475642, |
|
"grad_norm": 3.8370699882507324, |
|
"learning_rate": 4.664107485604607e-05, |
|
"loss": 0.3771, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.21274142404151053, |
|
"grad_norm": 4.8743486404418945, |
|
"learning_rate": 4.683301343570058e-05, |
|
"loss": 0.484, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.21360622657826464, |
|
"grad_norm": 6.827274322509766, |
|
"learning_rate": 4.702495201535509e-05, |
|
"loss": 1.0994, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.21447102911501872, |
|
"grad_norm": 6.400326251983643, |
|
"learning_rate": 4.72168905950096e-05, |
|
"loss": 1.0543, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.21533583165177284, |
|
"grad_norm": 9.760299682617188, |
|
"learning_rate": 4.740882917466411e-05, |
|
"loss": 1.1132, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.21620063418852695, |
|
"grad_norm": 9.413398742675781, |
|
"learning_rate": 4.760076775431862e-05, |
|
"loss": 1.7051, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.21706543672528106, |
|
"grad_norm": 6.986111164093018, |
|
"learning_rate": 4.779270633397313e-05, |
|
"loss": 0.5029, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.21793023926203517, |
|
"grad_norm": 11.26386547088623, |
|
"learning_rate": 4.798464491362764e-05, |
|
"loss": 1.5927, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.21879504179878928, |
|
"grad_norm": 5.758693695068359, |
|
"learning_rate": 4.817658349328215e-05, |
|
"loss": 0.9221, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.2196598443355434, |
|
"grad_norm": 6.061553478240967, |
|
"learning_rate": 4.836852207293666e-05, |
|
"loss": 0.989, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.2205246468722975, |
|
"grad_norm": 7.509443759918213, |
|
"learning_rate": 4.8560460652591175e-05, |
|
"loss": 1.9468, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.2213894494090516, |
|
"grad_norm": 7.857194900512695, |
|
"learning_rate": 4.875239923224568e-05, |
|
"loss": 0.7299, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.2222542519458057, |
|
"grad_norm": 9.96574592590332, |
|
"learning_rate": 4.8944337811900195e-05, |
|
"loss": 1.0199, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.2231190544825598, |
|
"grad_norm": 8.403667449951172, |
|
"learning_rate": 4.91362763915547e-05, |
|
"loss": 1.0238, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.22398385701931392, |
|
"grad_norm": 8.612835884094238, |
|
"learning_rate": 4.9328214971209215e-05, |
|
"loss": 1.8386, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.22484865955606803, |
|
"grad_norm": 7.690261363983154, |
|
"learning_rate": 4.952015355086372e-05, |
|
"loss": 0.7887, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.22571346209282214, |
|
"grad_norm": 9.24271011352539, |
|
"learning_rate": 4.9712092130518236e-05, |
|
"loss": 1.0248, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.22657826462957625, |
|
"grad_norm": 6.5738525390625, |
|
"learning_rate": 4.990403071017274e-05, |
|
"loss": 0.9891, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.22744306716633036, |
|
"grad_norm": 10.909134864807129, |
|
"learning_rate": 5.009596928982726e-05, |
|
"loss": 2.007, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.22830786970308448, |
|
"grad_norm": 7.512816905975342, |
|
"learning_rate": 5.028790786948176e-05, |
|
"loss": 1.6522, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.22917267223983856, |
|
"grad_norm": 4.3134446144104, |
|
"learning_rate": 5.047984644913628e-05, |
|
"loss": 0.8482, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.23003747477659267, |
|
"grad_norm": 6.679250240325928, |
|
"learning_rate": 5.067178502879078e-05, |
|
"loss": 0.7231, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.23090227731334678, |
|
"grad_norm": 8.060896873474121, |
|
"learning_rate": 5.08637236084453e-05, |
|
"loss": 0.9017, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.2317670798501009, |
|
"grad_norm": 10.473666191101074, |
|
"learning_rate": 5.105566218809981e-05, |
|
"loss": 1.2073, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.232631882386855, |
|
"grad_norm": 5.640207290649414, |
|
"learning_rate": 5.124760076775432e-05, |
|
"loss": 0.3825, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.23349668492360912, |
|
"grad_norm": 7.310571193695068, |
|
"learning_rate": 5.143953934740883e-05, |
|
"loss": 0.6634, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.23436148746036323, |
|
"grad_norm": 10.224222183227539, |
|
"learning_rate": 5.163147792706334e-05, |
|
"loss": 1.3564, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.23522628999711734, |
|
"grad_norm": 4.993323802947998, |
|
"learning_rate": 5.182341650671785e-05, |
|
"loss": 1.1294, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.23609109253387142, |
|
"grad_norm": 6.149577617645264, |
|
"learning_rate": 5.201535508637236e-05, |
|
"loss": 0.5599, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.23695589507062553, |
|
"grad_norm": 6.756112098693848, |
|
"learning_rate": 5.220729366602687e-05, |
|
"loss": 0.6844, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.23782069760737964, |
|
"grad_norm": 8.450921058654785, |
|
"learning_rate": 5.2399232245681383e-05, |
|
"loss": 0.7783, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.23868550014413376, |
|
"grad_norm": 7.2079267501831055, |
|
"learning_rate": 5.2591170825335904e-05, |
|
"loss": 1.101, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.23955030268088787, |
|
"grad_norm": 6.447202205657959, |
|
"learning_rate": 5.2783109404990404e-05, |
|
"loss": 0.9447, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.24041510521764198, |
|
"grad_norm": 10.80993366241455, |
|
"learning_rate": 5.2975047984644924e-05, |
|
"loss": 2.4452, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.2412799077543961, |
|
"grad_norm": 7.458428859710693, |
|
"learning_rate": 5.3166986564299424e-05, |
|
"loss": 1.2032, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.2421447102911502, |
|
"grad_norm": 11.762413024902344, |
|
"learning_rate": 5.3358925143953944e-05, |
|
"loss": 1.9775, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.24300951282790428, |
|
"grad_norm": 6.029952049255371, |
|
"learning_rate": 5.3550863723608444e-05, |
|
"loss": 0.523, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.2438743153646584, |
|
"grad_norm": 7.083131313323975, |
|
"learning_rate": 5.3742802303262964e-05, |
|
"loss": 0.6166, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.2447391179014125, |
|
"grad_norm": 8.343469619750977, |
|
"learning_rate": 5.3934740882917464e-05, |
|
"loss": 0.7902, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.24560392043816662, |
|
"grad_norm": 11.58956241607666, |
|
"learning_rate": 5.4126679462571984e-05, |
|
"loss": 1.1019, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.24646872297492073, |
|
"grad_norm": 6.451682090759277, |
|
"learning_rate": 5.431861804222649e-05, |
|
"loss": 1.1185, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.24733352551167484, |
|
"grad_norm": 8.293807983398438, |
|
"learning_rate": 5.4510556621881004e-05, |
|
"loss": 0.7051, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.24819832804842895, |
|
"grad_norm": 6.799464702606201, |
|
"learning_rate": 5.470249520153551e-05, |
|
"loss": 1.076, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.24906313058518306, |
|
"grad_norm": 6.457718849182129, |
|
"learning_rate": 5.4894433781190025e-05, |
|
"loss": 1.5065, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.24992793312193715, |
|
"grad_norm": 8.503544807434082, |
|
"learning_rate": 5.508637236084453e-05, |
|
"loss": 0.9986, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.25079273565869126, |
|
"grad_norm": 8.062347412109375, |
|
"learning_rate": 5.5278310940499045e-05, |
|
"loss": 1.1196, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.2516575381954454, |
|
"grad_norm": 5.3419508934021, |
|
"learning_rate": 5.547024952015355e-05, |
|
"loss": 0.7055, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.2525223407321995, |
|
"grad_norm": 3.2817585468292236, |
|
"learning_rate": 5.5662188099808065e-05, |
|
"loss": 0.2865, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.25338714326895356, |
|
"grad_norm": 8.452672004699707, |
|
"learning_rate": 5.585412667946257e-05, |
|
"loss": 0.6973, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.2542519458057077, |
|
"grad_norm": 9.172618865966797, |
|
"learning_rate": 5.6046065259117085e-05, |
|
"loss": 1.0347, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.2551167483424618, |
|
"grad_norm": 7.101957321166992, |
|
"learning_rate": 5.623800383877159e-05, |
|
"loss": 0.5065, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.2559815508792159, |
|
"grad_norm": 8.655692100524902, |
|
"learning_rate": 5.6429942418426105e-05, |
|
"loss": 0.7479, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.25684635341597, |
|
"grad_norm": 6.224137306213379, |
|
"learning_rate": 5.662188099808061e-05, |
|
"loss": 0.5214, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.25771115595272415, |
|
"grad_norm": 5.057961463928223, |
|
"learning_rate": 5.6813819577735125e-05, |
|
"loss": 0.4925, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.25857595848947823, |
|
"grad_norm": 5.989309787750244, |
|
"learning_rate": 5.700575815738963e-05, |
|
"loss": 0.9331, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.25944076102623237, |
|
"grad_norm": 5.4001336097717285, |
|
"learning_rate": 5.7197696737044146e-05, |
|
"loss": 0.4239, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.26030556356298645, |
|
"grad_norm": 8.392406463623047, |
|
"learning_rate": 5.7389635316698666e-05, |
|
"loss": 0.7426, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.26117036609974054, |
|
"grad_norm": 9.140869140625, |
|
"learning_rate": 5.758157389635317e-05, |
|
"loss": 1.292, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.2620351686364947, |
|
"grad_norm": 5.900636196136475, |
|
"learning_rate": 5.7773512476007686e-05, |
|
"loss": 1.1471, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.26289997117324876, |
|
"grad_norm": 2.76983904838562, |
|
"learning_rate": 5.796545105566219e-05, |
|
"loss": 0.5639, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.2637647737100029, |
|
"grad_norm": 8.212996482849121, |
|
"learning_rate": 5.8157389635316706e-05, |
|
"loss": 1.592, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.264629576246757, |
|
"grad_norm": 6.7358174324035645, |
|
"learning_rate": 5.834932821497121e-05, |
|
"loss": 0.6063, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.2654943787835111, |
|
"grad_norm": 9.422693252563477, |
|
"learning_rate": 5.8541266794625726e-05, |
|
"loss": 0.665, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.2663591813202652, |
|
"grad_norm": 10.346942901611328, |
|
"learning_rate": 5.873320537428023e-05, |
|
"loss": 0.7966, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.2672239838570193, |
|
"grad_norm": 8.950202941894531, |
|
"learning_rate": 5.8925143953934746e-05, |
|
"loss": 0.6255, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.2680887863937734, |
|
"grad_norm": 6.519852638244629, |
|
"learning_rate": 5.911708253358925e-05, |
|
"loss": 0.7197, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.2689535889305275, |
|
"grad_norm": 12.285760879516602, |
|
"learning_rate": 5.9309021113243767e-05, |
|
"loss": 2.22, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.26981839146728165, |
|
"grad_norm": 9.598986625671387, |
|
"learning_rate": 5.950095969289827e-05, |
|
"loss": 0.7472, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.27068319400403573, |
|
"grad_norm": 13.030138969421387, |
|
"learning_rate": 5.969289827255279e-05, |
|
"loss": 1.0278, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.27154799654078987, |
|
"grad_norm": 9.371500015258789, |
|
"learning_rate": 5.9884836852207293e-05, |
|
"loss": 0.6434, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.27241279907754395, |
|
"grad_norm": 7.387608528137207, |
|
"learning_rate": 6.007677543186181e-05, |
|
"loss": 0.4596, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.2732776016142981, |
|
"grad_norm": 6.994756698608398, |
|
"learning_rate": 6.0268714011516314e-05, |
|
"loss": 0.5547, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.2741424041510522, |
|
"grad_norm": 7.713170528411865, |
|
"learning_rate": 6.046065259117083e-05, |
|
"loss": 1.2906, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.27500720668780626, |
|
"grad_norm": 12.936992645263672, |
|
"learning_rate": 6.0652591170825334e-05, |
|
"loss": 2.2893, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.2758720092245604, |
|
"grad_norm": 12.210866928100586, |
|
"learning_rate": 6.084452975047985e-05, |
|
"loss": 2.0067, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.2767368117613145, |
|
"grad_norm": 9.767999649047852, |
|
"learning_rate": 6.103646833013436e-05, |
|
"loss": 1.0523, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.2776016142980686, |
|
"grad_norm": 10.349803924560547, |
|
"learning_rate": 6.122840690978887e-05, |
|
"loss": 1.425, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.2784664168348227, |
|
"grad_norm": 8.848223686218262, |
|
"learning_rate": 6.142034548944337e-05, |
|
"loss": 1.0846, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.27933121937157684, |
|
"grad_norm": 12.004369735717773, |
|
"learning_rate": 6.16122840690979e-05, |
|
"loss": 1.614, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.28019602190833093, |
|
"grad_norm": 4.841424465179443, |
|
"learning_rate": 6.18042226487524e-05, |
|
"loss": 0.831, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.281060824445085, |
|
"grad_norm": 10.002786636352539, |
|
"learning_rate": 6.199616122840691e-05, |
|
"loss": 0.8297, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.28192562698183915, |
|
"grad_norm": 6.301035404205322, |
|
"learning_rate": 6.218809980806143e-05, |
|
"loss": 0.5425, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.28279042951859323, |
|
"grad_norm": 5.8098626136779785, |
|
"learning_rate": 6.238003838771593e-05, |
|
"loss": 0.6583, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.2836552320553474, |
|
"grad_norm": 5.272045135498047, |
|
"learning_rate": 6.257197696737045e-05, |
|
"loss": 1.0148, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.28452003459210146, |
|
"grad_norm": 8.22673511505127, |
|
"learning_rate": 6.276391554702495e-05, |
|
"loss": 1.4798, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.2853848371288556, |
|
"grad_norm": 3.6933820247650146, |
|
"learning_rate": 6.295585412667947e-05, |
|
"loss": 0.3907, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.2862496396656097, |
|
"grad_norm": 9.97194766998291, |
|
"learning_rate": 6.314779270633397e-05, |
|
"loss": 1.2206, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.2871144422023638, |
|
"grad_norm": 3.41243577003479, |
|
"learning_rate": 6.33397312859885e-05, |
|
"loss": 0.6509, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.2879792447391179, |
|
"grad_norm": 5.184510231018066, |
|
"learning_rate": 6.3531669865643e-05, |
|
"loss": 0.5982, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.288844047275872, |
|
"grad_norm": 6.894106864929199, |
|
"learning_rate": 6.372360844529751e-05, |
|
"loss": 1.066, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.2897088498126261, |
|
"grad_norm": 6.806879997253418, |
|
"learning_rate": 6.391554702495202e-05, |
|
"loss": 0.6874, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.2905736523493802, |
|
"grad_norm": 4.7376933097839355, |
|
"learning_rate": 6.410748560460654e-05, |
|
"loss": 0.2232, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.29143845488613435, |
|
"grad_norm": 7.3895745277404785, |
|
"learning_rate": 6.429942418426104e-05, |
|
"loss": 0.8978, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.29230325742288843, |
|
"grad_norm": 4.52320671081543, |
|
"learning_rate": 6.449136276391555e-05, |
|
"loss": 0.5689, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.29316805995964257, |
|
"grad_norm": 10.309342384338379, |
|
"learning_rate": 6.468330134357006e-05, |
|
"loss": 1.1131, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.29403286249639665, |
|
"grad_norm": 7.698537826538086, |
|
"learning_rate": 6.487523992322458e-05, |
|
"loss": 0.4493, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.2948976650331508, |
|
"grad_norm": 9.31425952911377, |
|
"learning_rate": 6.506717850287908e-05, |
|
"loss": 0.5409, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.2957624675699049, |
|
"grad_norm": 3.6749117374420166, |
|
"learning_rate": 6.525911708253359e-05, |
|
"loss": 0.5921, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.29662727010665896, |
|
"grad_norm": 8.300640106201172, |
|
"learning_rate": 6.54510556621881e-05, |
|
"loss": 0.6657, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.2974920726434131, |
|
"grad_norm": 7.509027481079102, |
|
"learning_rate": 6.564299424184262e-05, |
|
"loss": 0.8345, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.2983568751801672, |
|
"grad_norm": 6.161888122558594, |
|
"learning_rate": 6.583493282149712e-05, |
|
"loss": 0.9418, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.2983568751801672, |
|
"eval_Qnli-dev-1024_cosine_accuracy": 0.7291666666666666, |
|
"eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.8395562767982483, |
|
"eval_Qnli-dev-1024_cosine_ap": 0.753054394869091, |
|
"eval_Qnli-dev-1024_cosine_f1": 0.7216494845360825, |
|
"eval_Qnli-dev-1024_cosine_f1_threshold": 0.8208398818969727, |
|
"eval_Qnli-dev-1024_cosine_mcc": 0.44512380090846426, |
|
"eval_Qnli-dev-1024_cosine_precision": 0.6730769230769231, |
|
"eval_Qnli-dev-1024_cosine_recall": 0.7777777777777778, |
|
"eval_Qnli-dev_cosine_accuracy": 0.7291666666666666, |
|
"eval_Qnli-dev_cosine_accuracy_threshold": 0.7856455445289612, |
|
"eval_Qnli-dev_cosine_ap": 0.7529763141762885, |
|
"eval_Qnli-dev_cosine_f1": 0.7169811320754719, |
|
"eval_Qnli-dev_cosine_f1_threshold": 0.7426920533180237, |
|
"eval_Qnli-dev_cosine_mcc": 0.4079411028893153, |
|
"eval_Qnli-dev_cosine_precision": 0.6229508196721312, |
|
"eval_Qnli-dev_cosine_recall": 0.8444444444444444, |
|
"eval_allNLI--triplets-1024_cosine_accuracy": 0.9270833134651184, |
|
"eval_allNLI-triplets_cosine_accuracy": 0.9791666865348816, |
|
"eval_global_dataset_loss": 0.6650346517562866, |
|
"eval_global_dataset_runtime": 67.8938, |
|
"eval_global_dataset_samples_per_second": 14.316, |
|
"eval_global_dataset_steps_per_second": 0.309, |
|
"eval_sequential_score": 0.9270833134651184, |
|
"eval_sts-test-1024_pearson_cosine": 0.8520971782224942, |
|
"eval_sts-test-1024_spearman_cosine": 0.894236476710775, |
|
"eval_sts-test_pearson_cosine": 0.9080363785366253, |
|
"eval_sts-test_spearman_cosine": 0.9193020252854658, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.2992216777169213, |
|
"grad_norm": 3.2981910705566406, |
|
"learning_rate": 6.602687140115163e-05, |
|
"loss": 0.3752, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.3000864802536754, |
|
"grad_norm": 5.650154113769531, |
|
"learning_rate": 6.621880998080614e-05, |
|
"loss": 0.362, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.30095128279042954, |
|
"grad_norm": 7.166718482971191, |
|
"learning_rate": 6.641074856046066e-05, |
|
"loss": 0.99, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.3018160853271836, |
|
"grad_norm": 5.6671295166015625, |
|
"learning_rate": 6.660268714011516e-05, |
|
"loss": 0.4309, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.3026808878639377, |
|
"grad_norm": 7.15688943862915, |
|
"learning_rate": 6.679462571976968e-05, |
|
"loss": 0.4101, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.30354569040069185, |
|
"grad_norm": 4.845415115356445, |
|
"learning_rate": 6.698656429942419e-05, |
|
"loss": 0.357, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.30441049293744593, |
|
"grad_norm": 6.766101360321045, |
|
"learning_rate": 6.71785028790787e-05, |
|
"loss": 0.4257, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.30527529547420007, |
|
"grad_norm": 9.900660514831543, |
|
"learning_rate": 6.737044145873322e-05, |
|
"loss": 0.6665, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.30614009801095415, |
|
"grad_norm": 4.632408142089844, |
|
"learning_rate": 6.756238003838772e-05, |
|
"loss": 0.4523, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.3070049005477083, |
|
"grad_norm": 9.697669982910156, |
|
"learning_rate": 6.775431861804223e-05, |
|
"loss": 1.4959, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.3078697030844624, |
|
"grad_norm": 9.970297813415527, |
|
"learning_rate": 6.794625719769674e-05, |
|
"loss": 0.833, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.3087345056212165, |
|
"grad_norm": 9.964993476867676, |
|
"learning_rate": 6.813819577735126e-05, |
|
"loss": 0.7928, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.3095993081579706, |
|
"grad_norm": 3.866377353668213, |
|
"learning_rate": 6.833013435700576e-05, |
|
"loss": 0.3277, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.3104641106947247, |
|
"grad_norm": 6.179882526397705, |
|
"learning_rate": 6.852207293666027e-05, |
|
"loss": 0.5336, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.3113289132314788, |
|
"grad_norm": 5.517486095428467, |
|
"learning_rate": 6.871401151631478e-05, |
|
"loss": 0.4663, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3121937157682329, |
|
"grad_norm": 6.7118306159973145, |
|
"learning_rate": 6.89059500959693e-05, |
|
"loss": 0.5869, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.31305851830498704, |
|
"grad_norm": 8.203336715698242, |
|
"learning_rate": 6.90978886756238e-05, |
|
"loss": 0.6056, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.3139233208417411, |
|
"grad_norm": 4.762539863586426, |
|
"learning_rate": 6.928982725527831e-05, |
|
"loss": 0.5402, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.31478812337849527, |
|
"grad_norm": 5.39819860458374, |
|
"learning_rate": 6.948176583493282e-05, |
|
"loss": 1.046, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.31565292591524935, |
|
"grad_norm": 4.130873680114746, |
|
"learning_rate": 6.967370441458734e-05, |
|
"loss": 0.3102, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.31651772845200343, |
|
"grad_norm": 7.361220359802246, |
|
"learning_rate": 6.986564299424184e-05, |
|
"loss": 0.412, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.31738253098875757, |
|
"grad_norm": 7.686898708343506, |
|
"learning_rate": 7.005758157389636e-05, |
|
"loss": 0.5703, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.31824733352551166, |
|
"grad_norm": 10.829538345336914, |
|
"learning_rate": 7.024952015355086e-05, |
|
"loss": 1.6531, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.3191121360622658, |
|
"grad_norm": 5.71692419052124, |
|
"learning_rate": 7.044145873320538e-05, |
|
"loss": 0.4314, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.3199769385990199, |
|
"grad_norm": 8.669037818908691, |
|
"learning_rate": 7.063339731285988e-05, |
|
"loss": 0.7062, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.320841741135774, |
|
"grad_norm": 5.996104717254639, |
|
"learning_rate": 7.08253358925144e-05, |
|
"loss": 0.5788, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.3217065436725281, |
|
"grad_norm": 12.612412452697754, |
|
"learning_rate": 7.10172744721689e-05, |
|
"loss": 1.8529, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.32257134620928224, |
|
"grad_norm": 8.934858322143555, |
|
"learning_rate": 7.120921305182342e-05, |
|
"loss": 0.6606, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.3234361487460363, |
|
"grad_norm": 10.218025207519531, |
|
"learning_rate": 7.140115163147793e-05, |
|
"loss": 0.8089, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.3243009512827904, |
|
"grad_norm": 5.20566987991333, |
|
"learning_rate": 7.159309021113245e-05, |
|
"loss": 0.3905, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.32516575381954455, |
|
"grad_norm": 10.471417427062988, |
|
"learning_rate": 7.178502879078695e-05, |
|
"loss": 1.2417, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.32603055635629863, |
|
"grad_norm": 7.703388690948486, |
|
"learning_rate": 7.197696737044146e-05, |
|
"loss": 0.8738, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.32689535889305277, |
|
"grad_norm": 8.099038124084473, |
|
"learning_rate": 7.216890595009598e-05, |
|
"loss": 0.8544, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.32776016142980685, |
|
"grad_norm": 6.550043106079102, |
|
"learning_rate": 7.236084452975049e-05, |
|
"loss": 0.4667, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.328624963966561, |
|
"grad_norm": 10.672149658203125, |
|
"learning_rate": 7.255278310940499e-05, |
|
"loss": 0.8825, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.3294897665033151, |
|
"grad_norm": 7.584779262542725, |
|
"learning_rate": 7.27447216890595e-05, |
|
"loss": 0.6003, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.33035456904006916, |
|
"grad_norm": 5.818914890289307, |
|
"learning_rate": 7.293666026871402e-05, |
|
"loss": 0.4643, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.3312193715768233, |
|
"grad_norm": 6.871515274047852, |
|
"learning_rate": 7.312859884836853e-05, |
|
"loss": 0.5097, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.3320841741135774, |
|
"grad_norm": 3.9484200477600098, |
|
"learning_rate": 7.332053742802303e-05, |
|
"loss": 0.4679, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.3329489766503315, |
|
"grad_norm": 3.8606741428375244, |
|
"learning_rate": 7.351247600767754e-05, |
|
"loss": 0.3732, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.3338137791870856, |
|
"grad_norm": 10.65389347076416, |
|
"learning_rate": 7.370441458733206e-05, |
|
"loss": 0.9031, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.33467858172383974, |
|
"grad_norm": 10.56472396850586, |
|
"learning_rate": 7.389635316698657e-05, |
|
"loss": 0.6668, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.3355433842605938, |
|
"grad_norm": 9.798723220825195, |
|
"learning_rate": 7.408829174664109e-05, |
|
"loss": 0.7715, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.33640818679734796, |
|
"grad_norm": 8.35350227355957, |
|
"learning_rate": 7.428023032629558e-05, |
|
"loss": 0.8536, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.33727298933410205, |
|
"grad_norm": 7.99412727355957, |
|
"learning_rate": 7.44721689059501e-05, |
|
"loss": 0.9303, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.33813779187085613, |
|
"grad_norm": 8.098565101623535, |
|
"learning_rate": 7.46641074856046e-05, |
|
"loss": 0.3704, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.33900259440761027, |
|
"grad_norm": 7.83499002456665, |
|
"learning_rate": 7.485604606525913e-05, |
|
"loss": 0.3678, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.33986739694436435, |
|
"grad_norm": 9.846261978149414, |
|
"learning_rate": 7.504798464491363e-05, |
|
"loss": 1.6854, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.3407321994811185, |
|
"grad_norm": 10.261216163635254, |
|
"learning_rate": 7.523992322456814e-05, |
|
"loss": 0.7636, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.3415970020178726, |
|
"grad_norm": 5.547618389129639, |
|
"learning_rate": 7.543186180422265e-05, |
|
"loss": 0.3462, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.3424618045546267, |
|
"grad_norm": 6.500753402709961, |
|
"learning_rate": 7.562380038387717e-05, |
|
"loss": 0.644, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.3433266070913808, |
|
"grad_norm": 8.669839859008789, |
|
"learning_rate": 7.581573896353167e-05, |
|
"loss": 0.7317, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.34419140962813494, |
|
"grad_norm": 6.280559062957764, |
|
"learning_rate": 7.600767754318618e-05, |
|
"loss": 0.7023, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.345056212164889, |
|
"grad_norm": 7.725942611694336, |
|
"learning_rate": 7.61996161228407e-05, |
|
"loss": 0.7164, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.3459210147016431, |
|
"grad_norm": 7.478891849517822, |
|
"learning_rate": 7.639155470249521e-05, |
|
"loss": 0.4271, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.34678581723839724, |
|
"grad_norm": 4.877331256866455, |
|
"learning_rate": 7.658349328214971e-05, |
|
"loss": 0.7332, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.3476506197751513, |
|
"grad_norm": 8.025667190551758, |
|
"learning_rate": 7.677543186180422e-05, |
|
"loss": 0.3978, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.34851542231190547, |
|
"grad_norm": 7.804194450378418, |
|
"learning_rate": 7.696737044145874e-05, |
|
"loss": 0.5208, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.34938022484865955, |
|
"grad_norm": 5.8793230056762695, |
|
"learning_rate": 7.715930902111325e-05, |
|
"loss": 0.4889, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.3502450273854137, |
|
"grad_norm": 8.609319686889648, |
|
"learning_rate": 7.735124760076777e-05, |
|
"loss": 0.769, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.35110982992216777, |
|
"grad_norm": 6.56134033203125, |
|
"learning_rate": 7.754318618042226e-05, |
|
"loss": 0.3932, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.35197463245892185, |
|
"grad_norm": 8.588756561279297, |
|
"learning_rate": 7.773512476007678e-05, |
|
"loss": 0.5919, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.352839434995676, |
|
"grad_norm": 7.530106067657471, |
|
"learning_rate": 7.792706333973129e-05, |
|
"loss": 0.6037, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.3537042375324301, |
|
"grad_norm": 7.5281853675842285, |
|
"learning_rate": 7.811900191938581e-05, |
|
"loss": 0.4321, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.3545690400691842, |
|
"grad_norm": 8.16552448272705, |
|
"learning_rate": 7.831094049904032e-05, |
|
"loss": 1.1022, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.3554338426059383, |
|
"grad_norm": 8.752754211425781, |
|
"learning_rate": 7.850287907869482e-05, |
|
"loss": 0.5996, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.35629864514269244, |
|
"grad_norm": 7.659090995788574, |
|
"learning_rate": 7.869481765834933e-05, |
|
"loss": 0.5673, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.3571634476794465, |
|
"grad_norm": 6.884600639343262, |
|
"learning_rate": 7.888675623800385e-05, |
|
"loss": 0.3437, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.35802825021620066, |
|
"grad_norm": 5.328488349914551, |
|
"learning_rate": 7.907869481765836e-05, |
|
"loss": 0.519, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.35889305275295474, |
|
"grad_norm": 10.308977127075195, |
|
"learning_rate": 7.927063339731286e-05, |
|
"loss": 1.5373, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.35975785528970883, |
|
"grad_norm": 7.618837356567383, |
|
"learning_rate": 7.946257197696737e-05, |
|
"loss": 0.764, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.36062265782646297, |
|
"grad_norm": 8.787110328674316, |
|
"learning_rate": 7.965451055662189e-05, |
|
"loss": 0.6131, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.36148746036321705, |
|
"grad_norm": 6.432898998260498, |
|
"learning_rate": 7.98464491362764e-05, |
|
"loss": 0.6826, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.3623522628999712, |
|
"grad_norm": 8.762993812561035, |
|
"learning_rate": 8.00383877159309e-05, |
|
"loss": 0.9631, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.3632170654367253, |
|
"grad_norm": 5.939430236816406, |
|
"learning_rate": 8.023032629558541e-05, |
|
"loss": 0.4283, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.3640818679734794, |
|
"grad_norm": 8.092362403869629, |
|
"learning_rate": 8.042226487523993e-05, |
|
"loss": 1.2001, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.3649466705102335, |
|
"grad_norm": 7.594040870666504, |
|
"learning_rate": 8.061420345489444e-05, |
|
"loss": 0.4499, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.3658114730469876, |
|
"grad_norm": 12.614463806152344, |
|
"learning_rate": 8.080614203454894e-05, |
|
"loss": 1.4073, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.3666762755837417, |
|
"grad_norm": 6.807295322418213, |
|
"learning_rate": 8.099808061420346e-05, |
|
"loss": 0.8035, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.3675410781204958, |
|
"grad_norm": 3.6670141220092773, |
|
"learning_rate": 8.119001919385797e-05, |
|
"loss": 0.3207, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.36840588065724994, |
|
"grad_norm": 7.3801445960998535, |
|
"learning_rate": 8.138195777351249e-05, |
|
"loss": 0.4752, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.369270683194004, |
|
"grad_norm": 9.895638465881348, |
|
"learning_rate": 8.157389635316698e-05, |
|
"loss": 1.1256, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.37013548573075816, |
|
"grad_norm": 6.200985431671143, |
|
"learning_rate": 8.17658349328215e-05, |
|
"loss": 0.4226, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.37100028826751225, |
|
"grad_norm": 9.858406066894531, |
|
"learning_rate": 8.195777351247601e-05, |
|
"loss": 1.123, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.3718650908042664, |
|
"grad_norm": 7.274184703826904, |
|
"learning_rate": 8.214971209213053e-05, |
|
"loss": 0.4425, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.37272989334102047, |
|
"grad_norm": 4.712157249450684, |
|
"learning_rate": 8.234165067178504e-05, |
|
"loss": 0.4242, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.37359469587777455, |
|
"grad_norm": 7.515327453613281, |
|
"learning_rate": 8.253358925143954e-05, |
|
"loss": 1.0072, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.3744594984145287, |
|
"grad_norm": 3.97876238822937, |
|
"learning_rate": 8.272552783109405e-05, |
|
"loss": 0.241, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.3753243009512828, |
|
"grad_norm": 7.888240337371826, |
|
"learning_rate": 8.291746641074857e-05, |
|
"loss": 0.7359, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.3761891034880369, |
|
"grad_norm": 6.10671329498291, |
|
"learning_rate": 8.310940499040308e-05, |
|
"loss": 0.4583, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.377053906024791, |
|
"grad_norm": 6.102023601531982, |
|
"learning_rate": 8.330134357005758e-05, |
|
"loss": 0.9001, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.37791870856154514, |
|
"grad_norm": 7.122408390045166, |
|
"learning_rate": 8.349328214971209e-05, |
|
"loss": 0.4614, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.3787835110982992, |
|
"grad_norm": 9.432422637939453, |
|
"learning_rate": 8.368522072936661e-05, |
|
"loss": 1.238, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.3796483136350533, |
|
"grad_norm": 9.530061721801758, |
|
"learning_rate": 8.387715930902112e-05, |
|
"loss": 1.5289, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.38051311617180744, |
|
"grad_norm": 7.045010566711426, |
|
"learning_rate": 8.406909788867562e-05, |
|
"loss": 0.3283, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.3813779187085615, |
|
"grad_norm": 6.275206089019775, |
|
"learning_rate": 8.426103646833013e-05, |
|
"loss": 0.4147, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.38224272124531566, |
|
"grad_norm": 4.124218940734863, |
|
"learning_rate": 8.445297504798465e-05, |
|
"loss": 0.4956, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.38310752378206975, |
|
"grad_norm": 5.8184895515441895, |
|
"learning_rate": 8.464491362763916e-05, |
|
"loss": 0.5166, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.3839723263188239, |
|
"grad_norm": 2.6442999839782715, |
|
"learning_rate": 8.483685220729366e-05, |
|
"loss": 0.2486, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.38483712885557797, |
|
"grad_norm": 3.8425562381744385, |
|
"learning_rate": 8.502879078694817e-05, |
|
"loss": 0.4493, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.3857019313923321, |
|
"grad_norm": 9.125511169433594, |
|
"learning_rate": 8.522072936660269e-05, |
|
"loss": 1.0439, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.3865667339290862, |
|
"grad_norm": 9.67273998260498, |
|
"learning_rate": 8.54126679462572e-05, |
|
"loss": 1.249, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.3874315364658403, |
|
"grad_norm": 7.822050094604492, |
|
"learning_rate": 8.560460652591172e-05, |
|
"loss": 0.8329, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.3882963390025944, |
|
"grad_norm": 5.747166633605957, |
|
"learning_rate": 8.579654510556623e-05, |
|
"loss": 0.3256, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.3891611415393485, |
|
"grad_norm": 7.257145404815674, |
|
"learning_rate": 8.598848368522073e-05, |
|
"loss": 1.0333, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.39002594407610264, |
|
"grad_norm": 7.6516642570495605, |
|
"learning_rate": 8.618042226487525e-05, |
|
"loss": 0.3821, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.3908907466128567, |
|
"grad_norm": 6.943114757537842, |
|
"learning_rate": 8.637236084452976e-05, |
|
"loss": 0.4578, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.39175554914961086, |
|
"grad_norm": 6.90556526184082, |
|
"learning_rate": 8.656429942418427e-05, |
|
"loss": 0.6716, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.39262035168636494, |
|
"grad_norm": 5.005017280578613, |
|
"learning_rate": 8.675623800383877e-05, |
|
"loss": 0.2694, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.393485154223119, |
|
"grad_norm": 9.84821605682373, |
|
"learning_rate": 8.694817658349329e-05, |
|
"loss": 1.7739, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.39434995675987317, |
|
"grad_norm": 7.2032647132873535, |
|
"learning_rate": 8.71401151631478e-05, |
|
"loss": 0.7109, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.39521475929662725, |
|
"grad_norm": 10.030957221984863, |
|
"learning_rate": 8.73320537428023e-05, |
|
"loss": 0.5733, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.3960795618333814, |
|
"grad_norm": 3.6352131366729736, |
|
"learning_rate": 8.752399232245681e-05, |
|
"loss": 0.283, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.39694436437013547, |
|
"grad_norm": 3.4260525703430176, |
|
"learning_rate": 8.771593090211133e-05, |
|
"loss": 0.4214, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.3978091669068896, |
|
"grad_norm": 4.595706462860107, |
|
"learning_rate": 8.790786948176584e-05, |
|
"loss": 0.6332, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3978091669068896, |
|
"eval_Qnli-dev-1024_cosine_accuracy": 0.75, |
|
"eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.879416823387146, |
|
"eval_Qnli-dev-1024_cosine_ap": 0.781841863547347, |
|
"eval_Qnli-dev-1024_cosine_f1": 0.7346938775510203, |
|
"eval_Qnli-dev-1024_cosine_f1_threshold": 0.8326764106750488, |
|
"eval_Qnli-dev-1024_cosine_mcc": 0.4683019469005233, |
|
"eval_Qnli-dev-1024_cosine_precision": 0.6792452830188679, |
|
"eval_Qnli-dev-1024_cosine_recall": 0.8, |
|
"eval_Qnli-dev_cosine_accuracy": 0.7291666666666666, |
|
"eval_Qnli-dev_cosine_accuracy_threshold": 0.8109143972396851, |
|
"eval_Qnli-dev_cosine_ap": 0.7568700790587495, |
|
"eval_Qnli-dev_cosine_f1": 0.7346938775510203, |
|
"eval_Qnli-dev_cosine_f1_threshold": 0.742037296295166, |
|
"eval_Qnli-dev_cosine_mcc": 0.4683019469005233, |
|
"eval_Qnli-dev_cosine_precision": 0.6792452830188679, |
|
"eval_Qnli-dev_cosine_recall": 0.8, |
|
"eval_allNLI--triplets-1024_cosine_accuracy": 0.9375, |
|
"eval_allNLI-triplets_cosine_accuracy": 0.9791666865348816, |
|
"eval_global_dataset_loss": 0.7834931015968323, |
|
"eval_global_dataset_runtime": 68.0114, |
|
"eval_global_dataset_samples_per_second": 14.292, |
|
"eval_global_dataset_steps_per_second": 0.309, |
|
"eval_sequential_score": 0.9375, |
|
"eval_sts-test-1024_pearson_cosine": 0.8881975899886265, |
|
"eval_sts-test-1024_spearman_cosine": 0.9114913957523785, |
|
"eval_sts-test_pearson_cosine": 0.9096311897411768, |
|
"eval_sts-test_spearman_cosine": 0.9200152476526354, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3986739694436437, |
|
"grad_norm": 10.936285018920898, |
|
"learning_rate": 8.809980806142035e-05, |
|
"loss": 1.2458, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.39953877198039783, |
|
"grad_norm": 5.99333381652832, |
|
"learning_rate": 8.829174664107485e-05, |
|
"loss": 0.3104, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.4004035745171519, |
|
"grad_norm": 6.260789394378662, |
|
"learning_rate": 8.848368522072937e-05, |
|
"loss": 0.4634, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.401268377053906, |
|
"grad_norm": 4.397698879241943, |
|
"learning_rate": 8.867562380038388e-05, |
|
"loss": 0.5893, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.40213317959066014, |
|
"grad_norm": 4.650321960449219, |
|
"learning_rate": 8.88675623800384e-05, |
|
"loss": 0.377, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.4029979821274142, |
|
"grad_norm": 9.407013893127441, |
|
"learning_rate": 8.905950095969289e-05, |
|
"loss": 0.5403, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.40386278466416836, |
|
"grad_norm": 10.255672454833984, |
|
"learning_rate": 8.925143953934741e-05, |
|
"loss": 0.9454, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.40472758720092245, |
|
"grad_norm": 11.186202049255371, |
|
"learning_rate": 8.944337811900192e-05, |
|
"loss": 0.9292, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.4055923897376766, |
|
"grad_norm": 6.13421630859375, |
|
"learning_rate": 8.963531669865644e-05, |
|
"loss": 0.4132, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.40645719227443067, |
|
"grad_norm": 12.439327239990234, |
|
"learning_rate": 8.982725527831093e-05, |
|
"loss": 0.9805, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.4073219948111848, |
|
"grad_norm": 10.574874877929688, |
|
"learning_rate": 9.001919385796545e-05, |
|
"loss": 0.7784, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.4081867973479389, |
|
"grad_norm": 5.993617057800293, |
|
"learning_rate": 9.021113243761996e-05, |
|
"loss": 0.369, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.409051599884693, |
|
"grad_norm": 10.213888168334961, |
|
"learning_rate": 9.040307101727448e-05, |
|
"loss": 1.4911, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.4099164024214471, |
|
"grad_norm": 7.043622016906738, |
|
"learning_rate": 9.059500959692899e-05, |
|
"loss": 0.4223, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.4107812049582012, |
|
"grad_norm": 8.350674629211426, |
|
"learning_rate": 9.07869481765835e-05, |
|
"loss": 1.2959, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.41164600749495533, |
|
"grad_norm": 8.64110279083252, |
|
"learning_rate": 9.097888675623801e-05, |
|
"loss": 0.4928, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.4125108100317094, |
|
"grad_norm": 6.388255596160889, |
|
"learning_rate": 9.117082533589252e-05, |
|
"loss": 1.02, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.41337561256846356, |
|
"grad_norm": 4.513554096221924, |
|
"learning_rate": 9.136276391554703e-05, |
|
"loss": 0.5034, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.41424041510521764, |
|
"grad_norm": 10.509414672851562, |
|
"learning_rate": 9.155470249520153e-05, |
|
"loss": 1.6119, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.4151052176419717, |
|
"grad_norm": 5.085805416107178, |
|
"learning_rate": 9.174664107485605e-05, |
|
"loss": 0.59, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.41597002017872586, |
|
"grad_norm": 8.275995254516602, |
|
"learning_rate": 9.193857965451056e-05, |
|
"loss": 0.5589, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.41683482271547995, |
|
"grad_norm": 9.266075134277344, |
|
"learning_rate": 9.213051823416508e-05, |
|
"loss": 0.8402, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.4176996252522341, |
|
"grad_norm": 9.998162269592285, |
|
"learning_rate": 9.232245681381957e-05, |
|
"loss": 0.5293, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.41856442778898817, |
|
"grad_norm": 6.49897575378418, |
|
"learning_rate": 9.25143953934741e-05, |
|
"loss": 0.5911, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.4194292303257423, |
|
"grad_norm": 2.814267158508301, |
|
"learning_rate": 9.27063339731286e-05, |
|
"loss": 0.1819, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.4202940328624964, |
|
"grad_norm": 6.657732009887695, |
|
"learning_rate": 9.289827255278312e-05, |
|
"loss": 0.6963, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.42115883539925053, |
|
"grad_norm": 10.121885299682617, |
|
"learning_rate": 9.309021113243761e-05, |
|
"loss": 0.6004, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.4220236379360046, |
|
"grad_norm": 15.206952095031738, |
|
"learning_rate": 9.328214971209214e-05, |
|
"loss": 2.0825, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.4228884404727587, |
|
"grad_norm": 11.911534309387207, |
|
"learning_rate": 9.347408829174664e-05, |
|
"loss": 1.6674, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.42375324300951284, |
|
"grad_norm": 6.184067726135254, |
|
"learning_rate": 9.366602687140116e-05, |
|
"loss": 0.5685, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.4246180455462669, |
|
"grad_norm": 7.771515846252441, |
|
"learning_rate": 9.385796545105567e-05, |
|
"loss": 0.7835, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.42548284808302106, |
|
"grad_norm": 8.338656425476074, |
|
"learning_rate": 9.404990403071018e-05, |
|
"loss": 0.4613, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.42634765061977514, |
|
"grad_norm": 9.678628921508789, |
|
"learning_rate": 9.424184261036468e-05, |
|
"loss": 1.0492, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.4272124531565293, |
|
"grad_norm": 5.315983772277832, |
|
"learning_rate": 9.44337811900192e-05, |
|
"loss": 0.3512, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.42807725569328337, |
|
"grad_norm": 7.20918607711792, |
|
"learning_rate": 9.462571976967371e-05, |
|
"loss": 0.8227, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.42894205823003745, |
|
"grad_norm": 8.044875144958496, |
|
"learning_rate": 9.481765834932822e-05, |
|
"loss": 0.7849, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.4298068607667916, |
|
"grad_norm": 8.14607048034668, |
|
"learning_rate": 9.500959692898272e-05, |
|
"loss": 1.2823, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.43067166330354567, |
|
"grad_norm": 9.731268882751465, |
|
"learning_rate": 9.520153550863724e-05, |
|
"loss": 1.2799, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.4315364658402998, |
|
"grad_norm": 9.654071807861328, |
|
"learning_rate": 9.539347408829176e-05, |
|
"loss": 0.596, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.4324012683770539, |
|
"grad_norm": 9.026534080505371, |
|
"learning_rate": 9.558541266794626e-05, |
|
"loss": 0.6793, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.43326607091380803, |
|
"grad_norm": 7.325682163238525, |
|
"learning_rate": 9.577735124760078e-05, |
|
"loss": 0.575, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.4341308734505621, |
|
"grad_norm": 4.846238136291504, |
|
"learning_rate": 9.596928982725528e-05, |
|
"loss": 0.2631, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.43499567598731625, |
|
"grad_norm": 8.93980598449707, |
|
"learning_rate": 9.61612284069098e-05, |
|
"loss": 0.5173, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.43586047852407034, |
|
"grad_norm": 11.70151138305664, |
|
"learning_rate": 9.63531669865643e-05, |
|
"loss": 0.9963, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.4367252810608244, |
|
"grad_norm": 6.328804016113281, |
|
"learning_rate": 9.654510556621882e-05, |
|
"loss": 0.9315, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.43759008359757856, |
|
"grad_norm": 9.678471565246582, |
|
"learning_rate": 9.673704414587332e-05, |
|
"loss": 0.7878, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.43845488613433264, |
|
"grad_norm": 6.569301128387451, |
|
"learning_rate": 9.692898272552784e-05, |
|
"loss": 0.4346, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.4393196886710868, |
|
"grad_norm": 6.5204596519470215, |
|
"learning_rate": 9.712092130518235e-05, |
|
"loss": 0.7662, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.44018449120784087, |
|
"grad_norm": 8.459349632263184, |
|
"learning_rate": 9.731285988483686e-05, |
|
"loss": 0.5221, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.441049293744595, |
|
"grad_norm": 8.08749008178711, |
|
"learning_rate": 9.750479846449136e-05, |
|
"loss": 0.9803, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.4419140962813491, |
|
"grad_norm": 8.031821250915527, |
|
"learning_rate": 9.769673704414588e-05, |
|
"loss": 1.1605, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.4427788988181032, |
|
"grad_norm": 9.393692016601562, |
|
"learning_rate": 9.788867562380039e-05, |
|
"loss": 0.6801, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.4436437013548573, |
|
"grad_norm": 5.011040687561035, |
|
"learning_rate": 9.80806142034549e-05, |
|
"loss": 0.3935, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.4445085038916114, |
|
"grad_norm": 2.235301971435547, |
|
"learning_rate": 9.82725527831094e-05, |
|
"loss": 0.1377, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.44537330642836553, |
|
"grad_norm": 5.642356872558594, |
|
"learning_rate": 9.846449136276392e-05, |
|
"loss": 0.985, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.4462381089651196, |
|
"grad_norm": 9.386540412902832, |
|
"learning_rate": 9.865642994241843e-05, |
|
"loss": 0.7949, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.44710291150187376, |
|
"grad_norm": 5.782979965209961, |
|
"learning_rate": 9.884836852207294e-05, |
|
"loss": 0.5974, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.44796771403862784, |
|
"grad_norm": 5.323793888092041, |
|
"learning_rate": 9.904030710172744e-05, |
|
"loss": 0.6797, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.448832516575382, |
|
"grad_norm": 8.012255668640137, |
|
"learning_rate": 9.923224568138196e-05, |
|
"loss": 0.6953, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.44969731911213606, |
|
"grad_norm": 6.930400371551514, |
|
"learning_rate": 9.942418426103647e-05, |
|
"loss": 0.4729, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.45056212164889015, |
|
"grad_norm": 10.408514976501465, |
|
"learning_rate": 9.961612284069098e-05, |
|
"loss": 0.5509, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.4514269241856443, |
|
"grad_norm": 5.082659721374512, |
|
"learning_rate": 9.980806142034548e-05, |
|
"loss": 0.4549, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.45229172672239837, |
|
"grad_norm": 10.625167846679688, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6151, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.4531565292591525, |
|
"grad_norm": 7.423165798187256, |
|
"learning_rate": 9.999974430536151e-05, |
|
"loss": 0.4466, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.4540213317959066, |
|
"grad_norm": 10.48806095123291, |
|
"learning_rate": 9.999897722406126e-05, |
|
"loss": 1.3489, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.45488613433266073, |
|
"grad_norm": 8.526479721069336, |
|
"learning_rate": 9.999769876394478e-05, |
|
"loss": 0.6699, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.4557509368694148, |
|
"grad_norm": 7.596718788146973, |
|
"learning_rate": 9.999590893808788e-05, |
|
"loss": 0.5189, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.45661573940616895, |
|
"grad_norm": 10.089831352233887, |
|
"learning_rate": 9.999360776479651e-05, |
|
"loss": 0.7617, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.45748054194292304, |
|
"grad_norm": 7.766354560852051, |
|
"learning_rate": 9.999079526760659e-05, |
|
"loss": 0.5148, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.4583453444796771, |
|
"grad_norm": 6.268951892852783, |
|
"learning_rate": 9.998747147528374e-05, |
|
"loss": 0.5564, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.45921014701643126, |
|
"grad_norm": 5.794777870178223, |
|
"learning_rate": 9.9983636421823e-05, |
|
"loss": 0.4038, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.46007494955318534, |
|
"grad_norm": 8.995209693908691, |
|
"learning_rate": 9.997929014644845e-05, |
|
"loss": 0.6968, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.4609397520899395, |
|
"grad_norm": 6.833916187286377, |
|
"learning_rate": 9.997443269361289e-05, |
|
"loss": 0.4393, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.46180455462669356, |
|
"grad_norm": 9.531277656555176, |
|
"learning_rate": 9.996906411299726e-05, |
|
"loss": 0.7228, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.4626693571634477, |
|
"grad_norm": 11.1766939163208, |
|
"learning_rate": 9.996318445951032e-05, |
|
"loss": 0.898, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.4635341597002018, |
|
"grad_norm": 4.982804298400879, |
|
"learning_rate": 9.995679379328785e-05, |
|
"loss": 0.3461, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.46439896223695587, |
|
"grad_norm": 3.0458362102508545, |
|
"learning_rate": 9.994989217969224e-05, |
|
"loss": 0.4753, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.46526376477371, |
|
"grad_norm": 7.552469253540039, |
|
"learning_rate": 9.99424796893117e-05, |
|
"loss": 0.4446, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.4661285673104641, |
|
"grad_norm": 10.52206039428711, |
|
"learning_rate": 9.99345563979596e-05, |
|
"loss": 0.8696, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.46699336984721823, |
|
"grad_norm": 9.044191360473633, |
|
"learning_rate": 9.992612238667368e-05, |
|
"loss": 1.0505, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.4678581723839723, |
|
"grad_norm": 7.528494834899902, |
|
"learning_rate": 9.991717774171514e-05, |
|
"loss": 0.3523, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.46872297492072645, |
|
"grad_norm": 8.00634765625, |
|
"learning_rate": 9.990772255456797e-05, |
|
"loss": 0.6452, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.46958777745748054, |
|
"grad_norm": 6.528989315032959, |
|
"learning_rate": 9.989775692193773e-05, |
|
"loss": 0.5005, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.4704525799942347, |
|
"grad_norm": 7.66871452331543, |
|
"learning_rate": 9.988728094575082e-05, |
|
"loss": 0.5364, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.47131738253098876, |
|
"grad_norm": 6.3178558349609375, |
|
"learning_rate": 9.987629473315325e-05, |
|
"loss": 0.6121, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.47218218506774284, |
|
"grad_norm": 3.713564872741699, |
|
"learning_rate": 9.986479839650966e-05, |
|
"loss": 0.2326, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.473046987604497, |
|
"grad_norm": 11.291918754577637, |
|
"learning_rate": 9.98527920534021e-05, |
|
"loss": 1.2339, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.47391179014125107, |
|
"grad_norm": 8.482532501220703, |
|
"learning_rate": 9.984027582662892e-05, |
|
"loss": 0.8196, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.4747765926780052, |
|
"grad_norm": 2.9724512100219727, |
|
"learning_rate": 9.982724984420333e-05, |
|
"loss": 0.2354, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.4756413952147593, |
|
"grad_norm": 9.461052894592285, |
|
"learning_rate": 9.981371423935233e-05, |
|
"loss": 0.6666, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.4765061977515134, |
|
"grad_norm": 5.076896667480469, |
|
"learning_rate": 9.979966915051517e-05, |
|
"loss": 0.3125, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.4773710002882675, |
|
"grad_norm": 8.995684623718262, |
|
"learning_rate": 9.978511472134203e-05, |
|
"loss": 0.7455, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.4782358028250216, |
|
"grad_norm": 2.971757173538208, |
|
"learning_rate": 9.977005110069245e-05, |
|
"loss": 0.32, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.47910060536177573, |
|
"grad_norm": 7.4964399337768555, |
|
"learning_rate": 9.975447844263395e-05, |
|
"loss": 0.9793, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.4799654078985298, |
|
"grad_norm": 6.13850736618042, |
|
"learning_rate": 9.973839690644032e-05, |
|
"loss": 0.7821, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.48083021043528396, |
|
"grad_norm": 8.951305389404297, |
|
"learning_rate": 9.972180665659004e-05, |
|
"loss": 0.6022, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.48169501297203804, |
|
"grad_norm": 6.228058338165283, |
|
"learning_rate": 9.970470786276467e-05, |
|
"loss": 0.8369, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.4825598155087922, |
|
"grad_norm": 10.346866607666016, |
|
"learning_rate": 9.968710069984698e-05, |
|
"loss": 0.8025, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.48342461804554626, |
|
"grad_norm": 2.9348461627960205, |
|
"learning_rate": 9.966898534791926e-05, |
|
"loss": 0.1631, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.4842894205823004, |
|
"grad_norm": 8.404128074645996, |
|
"learning_rate": 9.965036199226147e-05, |
|
"loss": 0.7858, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.4851542231190545, |
|
"grad_norm": 3.0906944274902344, |
|
"learning_rate": 9.963123082334925e-05, |
|
"loss": 0.3223, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.48601902565580857, |
|
"grad_norm": 4.46307373046875, |
|
"learning_rate": 9.961159203685212e-05, |
|
"loss": 0.2361, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.4868838281925627, |
|
"grad_norm": 7.367444038391113, |
|
"learning_rate": 9.959144583363141e-05, |
|
"loss": 1.2893, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.4877486307293168, |
|
"grad_norm": 4.720983505249023, |
|
"learning_rate": 9.957079241973809e-05, |
|
"loss": 0.5666, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.48861343326607093, |
|
"grad_norm": 5.1994829177856445, |
|
"learning_rate": 9.95496320064109e-05, |
|
"loss": 0.2794, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.489478235802825, |
|
"grad_norm": 8.899139404296875, |
|
"learning_rate": 9.952796481007401e-05, |
|
"loss": 0.6303, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.49034303833957915, |
|
"grad_norm": 4.118505477905273, |
|
"learning_rate": 9.950579105233483e-05, |
|
"loss": 0.1724, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.49120784087633323, |
|
"grad_norm": 6.728652477264404, |
|
"learning_rate": 9.948311095998181e-05, |
|
"loss": 0.662, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.4920726434130873, |
|
"grad_norm": 7.761811256408691, |
|
"learning_rate": 9.945992476498209e-05, |
|
"loss": 0.4051, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.49293744594984146, |
|
"grad_norm": 10.437024116516113, |
|
"learning_rate": 9.943623270447909e-05, |
|
"loss": 0.7596, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.49380224848659554, |
|
"grad_norm": 8.579437255859375, |
|
"learning_rate": 9.94120350207901e-05, |
|
"loss": 0.4666, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.4946670510233497, |
|
"grad_norm": 11.050808906555176, |
|
"learning_rate": 9.938733196140386e-05, |
|
"loss": 0.8923, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.49553185356010376, |
|
"grad_norm": 6.367518901824951, |
|
"learning_rate": 9.936212377897798e-05, |
|
"loss": 0.3065, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.4963966560968579, |
|
"grad_norm": 5.786684036254883, |
|
"learning_rate": 9.933641073133631e-05, |
|
"loss": 0.6386, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.497261458633612, |
|
"grad_norm": 3.814639091491699, |
|
"learning_rate": 9.93101930814664e-05, |
|
"loss": 0.2868, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.497261458633612, |
|
"eval_Qnli-dev-1024_cosine_accuracy": 0.6979166666666666, |
|
"eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.8905854225158691, |
|
"eval_Qnli-dev-1024_cosine_ap": 0.7246322873104885, |
|
"eval_Qnli-dev-1024_cosine_f1": 0.6909090909090909, |
|
"eval_Qnli-dev-1024_cosine_f1_threshold": 0.805380642414093, |
|
"eval_Qnli-dev-1024_cosine_mcc": 0.33620907137955974, |
|
"eval_Qnli-dev-1024_cosine_precision": 0.5846153846153846, |
|
"eval_Qnli-dev-1024_cosine_recall": 0.8444444444444444, |
|
"eval_Qnli-dev_cosine_accuracy": 0.6979166666666666, |
|
"eval_Qnli-dev_cosine_accuracy_threshold": 0.8145653009414673, |
|
"eval_Qnli-dev_cosine_ap": 0.7254668033788828, |
|
"eval_Qnli-dev_cosine_f1": 0.7289719626168225, |
|
"eval_Qnli-dev_cosine_f1_threshold": 0.7076575756072998, |
|
"eval_Qnli-dev_cosine_mcc": 0.43373226132862797, |
|
"eval_Qnli-dev_cosine_precision": 0.6290322580645161, |
|
"eval_Qnli-dev_cosine_recall": 0.8666666666666667, |
|
"eval_allNLI--triplets-1024_cosine_accuracy": 0.9479166865348816, |
|
"eval_allNLI-triplets_cosine_accuracy": 0.96875, |
|
"eval_global_dataset_loss": 0.5409160852432251, |
|
"eval_global_dataset_runtime": 68.0813, |
|
"eval_global_dataset_samples_per_second": 14.277, |
|
"eval_global_dataset_steps_per_second": 0.308, |
|
"eval_sequential_score": 0.9479166865348816, |
|
"eval_sts-test-1024_pearson_cosine": 0.8845045352861245, |
|
"eval_sts-test-1024_spearman_cosine": 0.9123160743907711, |
|
"eval_sts-test_pearson_cosine": 0.9122846955191348, |
|
"eval_sts-test_spearman_cosine": 0.920479051307594, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.4981262611703661, |
|
"grad_norm": 9.594101905822754, |
|
"learning_rate": 9.928347109751677e-05, |
|
"loss": 0.6007, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.4989910637071202, |
|
"grad_norm": 5.121261119842529, |
|
"learning_rate": 9.925624505279411e-05, |
|
"loss": 0.2853, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.4998558662438743, |
|
"grad_norm": 4.0415215492248535, |
|
"learning_rate": 9.922851522576058e-05, |
|
"loss": 0.3982, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.5007206687806284, |
|
"grad_norm": 11.199448585510254, |
|
"learning_rate": 9.92002819000309e-05, |
|
"loss": 0.7686, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.5015854713173825, |
|
"grad_norm": 5.71658992767334, |
|
"learning_rate": 9.917154536436948e-05, |
|
"loss": 0.3809, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.5024502738541367, |
|
"grad_norm": 10.616915702819824, |
|
"learning_rate": 9.914230591268743e-05, |
|
"loss": 1.1228, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.5033150763908908, |
|
"grad_norm": 4.5049028396606445, |
|
"learning_rate": 9.911256384403961e-05, |
|
"loss": 0.3737, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.5041798789276448, |
|
"grad_norm": 5.022185325622559, |
|
"learning_rate": 9.90823194626215e-05, |
|
"loss": 0.2141, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.505044681464399, |
|
"grad_norm": 10.781139373779297, |
|
"learning_rate": 9.905157307776616e-05, |
|
"loss": 1.2942, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.5059094840011531, |
|
"grad_norm": 10.425268173217773, |
|
"learning_rate": 9.902032500394103e-05, |
|
"loss": 1.4177, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.5067742865379071, |
|
"grad_norm": 6.989367961883545, |
|
"learning_rate": 9.898857556074468e-05, |
|
"loss": 0.5832, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.5076390890746613, |
|
"grad_norm": 6.156850814819336, |
|
"learning_rate": 9.895632507290362e-05, |
|
"loss": 0.4419, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.5085038916114154, |
|
"grad_norm": 6.66822624206543, |
|
"learning_rate": 9.892357387026892e-05, |
|
"loss": 0.5903, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.5093686941481695, |
|
"grad_norm": 8.24500560760498, |
|
"learning_rate": 9.889032228781285e-05, |
|
"loss": 0.44, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.5102334966849236, |
|
"grad_norm": 6.062635898590088, |
|
"learning_rate": 9.88565706656255e-05, |
|
"loss": 0.3002, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.5110982992216777, |
|
"grad_norm": 8.822070121765137, |
|
"learning_rate": 9.882231934891119e-05, |
|
"loss": 0.6883, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.5119631017584318, |
|
"grad_norm": 6.581031322479248, |
|
"learning_rate": 9.878756868798504e-05, |
|
"loss": 0.7068, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.512827904295186, |
|
"grad_norm": 6.801186561584473, |
|
"learning_rate": 9.875231903826936e-05, |
|
"loss": 0.5245, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.51369270683194, |
|
"grad_norm": 8.146296501159668, |
|
"learning_rate": 9.871657076029003e-05, |
|
"loss": 0.7089, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.5145575093686942, |
|
"grad_norm": 12.6628999710083, |
|
"learning_rate": 9.868032421967275e-05, |
|
"loss": 1.8026, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.5154223119054483, |
|
"grad_norm": 3.0164332389831543, |
|
"learning_rate": 9.864357978713936e-05, |
|
"loss": 0.2736, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.5162871144422023, |
|
"grad_norm": 3.916259527206421, |
|
"learning_rate": 9.860633783850406e-05, |
|
"loss": 0.3196, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.5171519169789565, |
|
"grad_norm": 8.493870735168457, |
|
"learning_rate": 9.856859875466948e-05, |
|
"loss": 0.7005, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.5180167195157106, |
|
"grad_norm": 8.802308082580566, |
|
"learning_rate": 9.853036292162291e-05, |
|
"loss": 0.4239, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.5188815220524647, |
|
"grad_norm": 10.11483383178711, |
|
"learning_rate": 9.849163073043223e-05, |
|
"loss": 0.5686, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5197463245892188, |
|
"grad_norm": 7.787915229797363, |
|
"learning_rate": 9.845240257724198e-05, |
|
"loss": 0.6015, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.5206111271259729, |
|
"grad_norm": 3.49916410446167, |
|
"learning_rate": 9.841267886326932e-05, |
|
"loss": 0.1611, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.521475929662727, |
|
"grad_norm": 8.411331176757812, |
|
"learning_rate": 9.837245999479985e-05, |
|
"loss": 0.6458, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.5223407321994811, |
|
"grad_norm": 7.405316352844238, |
|
"learning_rate": 9.833174638318356e-05, |
|
"loss": 0.7173, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.5232055347362352, |
|
"grad_norm": 8.42251968383789, |
|
"learning_rate": 9.829053844483052e-05, |
|
"loss": 0.8808, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.5240703372729894, |
|
"grad_norm": 6.8583269119262695, |
|
"learning_rate": 9.824883660120667e-05, |
|
"loss": 0.625, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.5249351398097435, |
|
"grad_norm": 6.834749698638916, |
|
"learning_rate": 9.820664127882957e-05, |
|
"loss": 0.4378, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.5257999423464975, |
|
"grad_norm": 5.739812850952148, |
|
"learning_rate": 9.81639529092639e-05, |
|
"loss": 0.7798, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.5266647448832517, |
|
"grad_norm": 7.9455084800720215, |
|
"learning_rate": 9.812077192911713e-05, |
|
"loss": 0.6586, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.5275295474200058, |
|
"grad_norm": 7.959743499755859, |
|
"learning_rate": 9.80770987800351e-05, |
|
"loss": 0.8475, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.5283943499567598, |
|
"grad_norm": 5.485658168792725, |
|
"learning_rate": 9.803293390869739e-05, |
|
"loss": 0.4095, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.529259152493514, |
|
"grad_norm": 7.284278392791748, |
|
"learning_rate": 9.798827776681286e-05, |
|
"loss": 0.4946, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.5301239550302681, |
|
"grad_norm": 8.508416175842285, |
|
"learning_rate": 9.79431308111149e-05, |
|
"loss": 0.3962, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.5309887575670222, |
|
"grad_norm": 5.56104850769043, |
|
"learning_rate": 9.789749350335693e-05, |
|
"loss": 0.7191, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.5318535601037763, |
|
"grad_norm": 11.444177627563477, |
|
"learning_rate": 9.785136631030755e-05, |
|
"loss": 0.6589, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.5327183626405304, |
|
"grad_norm": 8.934037208557129, |
|
"learning_rate": 9.780474970374578e-05, |
|
"loss": 0.5603, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.5335831651772845, |
|
"grad_norm": 12.182479858398438, |
|
"learning_rate": 9.775764416045628e-05, |
|
"loss": 1.3667, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.5344479677140386, |
|
"grad_norm": 6.506429195404053, |
|
"learning_rate": 9.771005016222446e-05, |
|
"loss": 0.5623, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.5353127702507927, |
|
"grad_norm": 8.439187049865723, |
|
"learning_rate": 9.766196819583149e-05, |
|
"loss": 0.6174, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.5361775727875469, |
|
"grad_norm": 9.493589401245117, |
|
"learning_rate": 9.761339875304945e-05, |
|
"loss": 0.6462, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.537042375324301, |
|
"grad_norm": 2.347870111465454, |
|
"learning_rate": 9.756434233063616e-05, |
|
"loss": 0.1693, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.537907177861055, |
|
"grad_norm": 8.565069198608398, |
|
"learning_rate": 9.751479943033019e-05, |
|
"loss": 0.4887, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.5387719803978092, |
|
"grad_norm": 8.762991905212402, |
|
"learning_rate": 9.746477055884571e-05, |
|
"loss": 0.9039, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.5396367829345633, |
|
"grad_norm": 5.132269382476807, |
|
"learning_rate": 9.741425622786728e-05, |
|
"loss": 0.3159, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.5405015854713174, |
|
"grad_norm": 6.715843677520752, |
|
"learning_rate": 9.736325695404464e-05, |
|
"loss": 0.6409, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.5413663880080715, |
|
"grad_norm": 2.351118803024292, |
|
"learning_rate": 9.731177325898746e-05, |
|
"loss": 0.1413, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.5422311905448256, |
|
"grad_norm": 5.473691940307617, |
|
"learning_rate": 9.725980566925989e-05, |
|
"loss": 0.3963, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.5430959930815797, |
|
"grad_norm": 6.525996685028076, |
|
"learning_rate": 9.72073547163753e-05, |
|
"loss": 0.4283, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.5439607956183338, |
|
"grad_norm": 9.671774864196777, |
|
"learning_rate": 9.71544209367908e-05, |
|
"loss": 0.8147, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.5448255981550879, |
|
"grad_norm": 7.720305919647217, |
|
"learning_rate": 9.710100487190173e-05, |
|
"loss": 0.7238, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.545690400691842, |
|
"grad_norm": 6.962470531463623, |
|
"learning_rate": 9.704710706803613e-05, |
|
"loss": 0.3583, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.5465552032285962, |
|
"grad_norm": 7.1871819496154785, |
|
"learning_rate": 9.699272807644921e-05, |
|
"loss": 0.5934, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.5474200057653502, |
|
"grad_norm": 8.43585205078125, |
|
"learning_rate": 9.693786845331761e-05, |
|
"loss": 0.3339, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.5482848083021044, |
|
"grad_norm": 8.839116096496582, |
|
"learning_rate": 9.68825287597338e-05, |
|
"loss": 0.5551, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.5491496108388585, |
|
"grad_norm": 7.399514675140381, |
|
"learning_rate": 9.68267095617003e-05, |
|
"loss": 0.7277, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.5500144133756125, |
|
"grad_norm": 3.7421650886535645, |
|
"learning_rate": 9.677041143012391e-05, |
|
"loss": 0.3276, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.5508792159123667, |
|
"grad_norm": 6.863941669464111, |
|
"learning_rate": 9.67136349408098e-05, |
|
"loss": 0.3983, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.5517440184491208, |
|
"grad_norm": 8.192028999328613, |
|
"learning_rate": 9.665638067445577e-05, |
|
"loss": 0.5536, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.5526088209858749, |
|
"grad_norm": 6.802035331726074, |
|
"learning_rate": 9.659864921664617e-05, |
|
"loss": 0.4256, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.553473623522629, |
|
"grad_norm": 8.902397155761719, |
|
"learning_rate": 9.654044115784594e-05, |
|
"loss": 0.6132, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.5543384260593831, |
|
"grad_norm": 3.023282289505005, |
|
"learning_rate": 9.648175709339465e-05, |
|
"loss": 0.1601, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.5552032285961372, |
|
"grad_norm": 6.913763523101807, |
|
"learning_rate": 9.642259762350032e-05, |
|
"loss": 0.8637, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.5560680311328913, |
|
"grad_norm": 5.186830043792725, |
|
"learning_rate": 9.636296335323334e-05, |
|
"loss": 0.2678, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.5569328336696454, |
|
"grad_norm": 8.123047828674316, |
|
"learning_rate": 9.63028548925202e-05, |
|
"loss": 0.4715, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.5577976362063995, |
|
"grad_norm": 8.248505592346191, |
|
"learning_rate": 9.624227285613736e-05, |
|
"loss": 0.4066, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.5586624387431537, |
|
"grad_norm": 7.174196243286133, |
|
"learning_rate": 9.618121786370491e-05, |
|
"loss": 0.2985, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.5595272412799077, |
|
"grad_norm": 9.055746078491211, |
|
"learning_rate": 9.61196905396802e-05, |
|
"loss": 0.4818, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.5603920438166619, |
|
"grad_norm": 5.331139087677002, |
|
"learning_rate": 9.605769151335151e-05, |
|
"loss": 0.3297, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.561256846353416, |
|
"grad_norm": 4.492726802825928, |
|
"learning_rate": 9.59952214188316e-05, |
|
"loss": 0.2309, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.56212164889017, |
|
"grad_norm": 7.451852798461914, |
|
"learning_rate": 9.593228089505117e-05, |
|
"loss": 0.3733, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5629864514269242, |
|
"grad_norm": 9.455964088439941, |
|
"learning_rate": 9.586887058575243e-05, |
|
"loss": 0.471, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.5638512539636783, |
|
"grad_norm": 4.70458984375, |
|
"learning_rate": 9.58049911394824e-05, |
|
"loss": 0.1841, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.5647160565004324, |
|
"grad_norm": 3.027376413345337, |
|
"learning_rate": 9.574064320958637e-05, |
|
"loss": 0.1042, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.5655808590371865, |
|
"grad_norm": 13.047475814819336, |
|
"learning_rate": 9.567582745420117e-05, |
|
"loss": 1.7486, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.5664456615739406, |
|
"grad_norm": 5.038949489593506, |
|
"learning_rate": 9.561054453624842e-05, |
|
"loss": 0.7092, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.5673104641106947, |
|
"grad_norm": 6.817296981811523, |
|
"learning_rate": 9.554479512342784e-05, |
|
"loss": 0.4515, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.5681752666474489, |
|
"grad_norm": 6.715672969818115, |
|
"learning_rate": 9.54785798882103e-05, |
|
"loss": 0.5267, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.5690400691842029, |
|
"grad_norm": 12.338273048400879, |
|
"learning_rate": 9.541189950783104e-05, |
|
"loss": 0.8779, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.569904871720957, |
|
"grad_norm": 6.969177722930908, |
|
"learning_rate": 9.534475466428267e-05, |
|
"loss": 0.3105, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.5707696742577112, |
|
"grad_norm": 4.153381824493408, |
|
"learning_rate": 9.527714604430827e-05, |
|
"loss": 0.2972, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.5716344767944652, |
|
"grad_norm": 9.585479736328125, |
|
"learning_rate": 9.52090743393943e-05, |
|
"loss": 0.7349, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.5724992793312194, |
|
"grad_norm": 8.285649299621582, |
|
"learning_rate": 9.514054024576356e-05, |
|
"loss": 0.3054, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.5733640818679735, |
|
"grad_norm": 8.23316764831543, |
|
"learning_rate": 9.507154446436805e-05, |
|
"loss": 0.3722, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.5742288844047276, |
|
"grad_norm": 3.4087507724761963, |
|
"learning_rate": 9.500208770088183e-05, |
|
"loss": 0.3515, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.5750936869414817, |
|
"grad_norm": 11.583375930786133, |
|
"learning_rate": 9.49321706656938e-05, |
|
"loss": 1.0321, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.5759584894782358, |
|
"grad_norm": 9.680198669433594, |
|
"learning_rate": 9.48617940739004e-05, |
|
"loss": 0.6996, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.5768232920149899, |
|
"grad_norm": 5.860654354095459, |
|
"learning_rate": 9.479095864529828e-05, |
|
"loss": 0.584, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.577688094551744, |
|
"grad_norm": 8.714286804199219, |
|
"learning_rate": 9.471966510437704e-05, |
|
"loss": 0.8377, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.5785528970884981, |
|
"grad_norm": 5.863884925842285, |
|
"learning_rate": 9.464791418031172e-05, |
|
"loss": 0.3194, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.5794176996252522, |
|
"grad_norm": 3.8105716705322266, |
|
"learning_rate": 9.457570660695541e-05, |
|
"loss": 0.2197, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.5802825021620064, |
|
"grad_norm": 7.818668842315674, |
|
"learning_rate": 9.450304312283164e-05, |
|
"loss": 0.5296, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.5811473046987604, |
|
"grad_norm": 3.5748655796051025, |
|
"learning_rate": 9.442992447112697e-05, |
|
"loss": 0.2199, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.5820121072355146, |
|
"grad_norm": 9.74962043762207, |
|
"learning_rate": 9.435635139968328e-05, |
|
"loss": 0.7576, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.5828769097722687, |
|
"grad_norm": 5.957652568817139, |
|
"learning_rate": 9.428232466099018e-05, |
|
"loss": 0.4388, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.5837417123090227, |
|
"grad_norm": 1.4129705429077148, |
|
"learning_rate": 9.420784501217726e-05, |
|
"loss": 0.0997, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.5846065148457769, |
|
"grad_norm": 6.296298503875732, |
|
"learning_rate": 9.41329132150064e-05, |
|
"loss": 0.4806, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.585471317382531, |
|
"grad_norm": 8.789826393127441, |
|
"learning_rate": 9.405753003586395e-05, |
|
"loss": 0.7328, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.5863361199192851, |
|
"grad_norm": 9.228763580322266, |
|
"learning_rate": 9.39816962457529e-05, |
|
"loss": 0.4772, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.5872009224560392, |
|
"grad_norm": 5.72409725189209, |
|
"learning_rate": 9.3905412620285e-05, |
|
"loss": 0.3285, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.5880657249927933, |
|
"grad_norm": 10.633530616760254, |
|
"learning_rate": 9.382867993967281e-05, |
|
"loss": 0.9213, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.5889305275295474, |
|
"grad_norm": 10.06709098815918, |
|
"learning_rate": 9.375149898872172e-05, |
|
"loss": 0.5335, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.5897953300663016, |
|
"grad_norm": 5.641694068908691, |
|
"learning_rate": 9.367387055682197e-05, |
|
"loss": 0.3178, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.5906601326030556, |
|
"grad_norm": 8.637955665588379, |
|
"learning_rate": 9.359579543794048e-05, |
|
"loss": 0.7194, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.5915249351398097, |
|
"grad_norm": 5.672209739685059, |
|
"learning_rate": 9.351727443061283e-05, |
|
"loss": 0.5559, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.5923897376765639, |
|
"grad_norm": 6.293837547302246, |
|
"learning_rate": 9.343830833793505e-05, |
|
"loss": 0.489, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.5932545402133179, |
|
"grad_norm": 5.788215160369873, |
|
"learning_rate": 9.335889796755541e-05, |
|
"loss": 0.2563, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.594119342750072, |
|
"grad_norm": 8.539923667907715, |
|
"learning_rate": 9.327904413166615e-05, |
|
"loss": 0.8217, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.5949841452868262, |
|
"grad_norm": 4.539181709289551, |
|
"learning_rate": 9.319874764699515e-05, |
|
"loss": 0.371, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.5958489478235803, |
|
"grad_norm": 4.926830291748047, |
|
"learning_rate": 9.311800933479764e-05, |
|
"loss": 0.3217, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.5967137503603344, |
|
"grad_norm": 8.856836318969727, |
|
"learning_rate": 9.30368300208478e-05, |
|
"loss": 0.5505, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.5967137503603344, |
|
"eval_Qnli-dev-1024_cosine_accuracy": 0.71875, |
|
"eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.868374228477478, |
|
"eval_Qnli-dev-1024_cosine_ap": 0.7082660274050915, |
|
"eval_Qnli-dev-1024_cosine_f1": 0.6938775510204082, |
|
"eval_Qnli-dev-1024_cosine_f1_threshold": 0.8440404534339905, |
|
"eval_Qnli-dev-1024_cosine_mcc": 0.3843486566998693, |
|
"eval_Qnli-dev-1024_cosine_precision": 0.6415094339622641, |
|
"eval_Qnli-dev-1024_cosine_recall": 0.7555555555555555, |
|
"eval_Qnli-dev_cosine_accuracy": 0.7291666666666666, |
|
"eval_Qnli-dev_cosine_accuracy_threshold": 0.7942297458648682, |
|
"eval_Qnli-dev_cosine_ap": 0.7468079642036429, |
|
"eval_Qnli-dev_cosine_f1": 0.7222222222222222, |
|
"eval_Qnli-dev_cosine_f1_threshold": 0.6964967250823975, |
|
"eval_Qnli-dev_cosine_mcc": 0.41614558708189836, |
|
"eval_Qnli-dev_cosine_precision": 0.6190476190476191, |
|
"eval_Qnli-dev_cosine_recall": 0.8666666666666667, |
|
"eval_allNLI--triplets-1024_cosine_accuracy": 0.9375, |
|
"eval_allNLI-triplets_cosine_accuracy": 0.96875, |
|
"eval_global_dataset_loss": 0.6163961887359619, |
|
"eval_global_dataset_runtime": 68.0763, |
|
"eval_global_dataset_samples_per_second": 14.278, |
|
"eval_global_dataset_steps_per_second": 0.308, |
|
"eval_sequential_score": 0.9375, |
|
"eval_sts-test-1024_pearson_cosine": 0.8687143218667199, |
|
"eval_sts-test-1024_spearman_cosine": 0.9060527968336128, |
|
"eval_sts-test_pearson_cosine": 0.9081393663808583, |
|
"eval_sts-test_spearman_cosine": 0.920736019932914, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.5975785528970885, |
|
"grad_norm": 7.396856784820557, |
|
"learning_rate": 9.295521053543019e-05, |
|
"loss": 0.5105, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.5984433554338426, |
|
"grad_norm": 8.075766563415527, |
|
"learning_rate": 9.287315171333144e-05, |
|
"loss": 0.5877, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.5993081579705967, |
|
"grad_norm": 8.688477516174316, |
|
"learning_rate": 9.279065439383157e-05, |
|
"loss": 0.7346, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.6001729605073508, |
|
"grad_norm": 5.448639869689941, |
|
"learning_rate": 9.27077194206955e-05, |
|
"loss": 0.3644, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.601037763044105, |
|
"grad_norm": 5.420974254608154, |
|
"learning_rate": 9.262434764216428e-05, |
|
"loss": 0.2205, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.6019025655808591, |
|
"grad_norm": 6.542895793914795, |
|
"learning_rate": 9.254053991094666e-05, |
|
"loss": 0.2832, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.6027673681176131, |
|
"grad_norm": 3.4809961318969727, |
|
"learning_rate": 9.245629708421008e-05, |
|
"loss": 0.145, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.6036321706543673, |
|
"grad_norm": 10.80398941040039, |
|
"learning_rate": 9.237162002357214e-05, |
|
"loss": 0.8998, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.6044969731911214, |
|
"grad_norm": 2.6544158458709717, |
|
"learning_rate": 9.228650959509166e-05, |
|
"loss": 0.1194, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.6053617757278754, |
|
"grad_norm": 4.051424980163574, |
|
"learning_rate": 9.220096666925982e-05, |
|
"loss": 0.1845, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.6062265782646296, |
|
"grad_norm": 10.206416130065918, |
|
"learning_rate": 9.211499212099135e-05, |
|
"loss": 0.6004, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.6070913808013837, |
|
"grad_norm": 11.007821083068848, |
|
"learning_rate": 9.202858682961545e-05, |
|
"loss": 0.5262, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.6079561833381378, |
|
"grad_norm": 9.616263389587402, |
|
"learning_rate": 9.194175167886698e-05, |
|
"loss": 1.3073, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.6088209858748919, |
|
"grad_norm": 7.583075523376465, |
|
"learning_rate": 9.185448755687717e-05, |
|
"loss": 0.4977, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.609685788411646, |
|
"grad_norm": 3.0454254150390625, |
|
"learning_rate": 9.176679535616477e-05, |
|
"loss": 0.1434, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.6105505909484001, |
|
"grad_norm": 5.383974075317383, |
|
"learning_rate": 9.167867597362682e-05, |
|
"loss": 0.1923, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.6114153934851542, |
|
"grad_norm": 10.157812118530273, |
|
"learning_rate": 9.159013031052943e-05, |
|
"loss": 0.5597, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.6122801960219083, |
|
"grad_norm": 12.371292114257812, |
|
"learning_rate": 9.150115927249869e-05, |
|
"loss": 0.8295, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.6131449985586624, |
|
"grad_norm": 9.299467086791992, |
|
"learning_rate": 9.141176376951128e-05, |
|
"loss": 0.5907, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.6140098010954166, |
|
"grad_norm": 7.16170597076416, |
|
"learning_rate": 9.132194471588522e-05, |
|
"loss": 0.5436, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.6148746036321706, |
|
"grad_norm": 6.266456127166748, |
|
"learning_rate": 9.123170303027055e-05, |
|
"loss": 0.309, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.6157394061689248, |
|
"grad_norm": 10.73092269897461, |
|
"learning_rate": 9.114103963563985e-05, |
|
"loss": 0.7257, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.6166042087056789, |
|
"grad_norm": 8.290569305419922, |
|
"learning_rate": 9.104995545927893e-05, |
|
"loss": 0.6665, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.617469011242433, |
|
"grad_norm": 6.256021499633789, |
|
"learning_rate": 9.095845143277714e-05, |
|
"loss": 0.281, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.6183338137791871, |
|
"grad_norm": 3.134965419769287, |
|
"learning_rate": 9.086652849201807e-05, |
|
"loss": 0.2241, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.6191986163159412, |
|
"grad_norm": 8.62253475189209, |
|
"learning_rate": 9.077418757716988e-05, |
|
"loss": 0.7341, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.6200634188526953, |
|
"grad_norm": 8.627028465270996, |
|
"learning_rate": 9.06814296326756e-05, |
|
"loss": 0.3946, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.6209282213894494, |
|
"grad_norm": 5.673067092895508, |
|
"learning_rate": 9.05882556072436e-05, |
|
"loss": 0.4353, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.6217930239262035, |
|
"grad_norm": 5.314984321594238, |
|
"learning_rate": 9.049466645383784e-05, |
|
"loss": 0.3919, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.6226578264629576, |
|
"grad_norm": 8.689918518066406, |
|
"learning_rate": 9.040066312966811e-05, |
|
"loss": 0.5087, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.6235226289997118, |
|
"grad_norm": 5.046836853027344, |
|
"learning_rate": 9.030624659618023e-05, |
|
"loss": 0.2345, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.6243874315364658, |
|
"grad_norm": 12.160417556762695, |
|
"learning_rate": 9.021141781904627e-05, |
|
"loss": 0.8855, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.62525223407322, |
|
"grad_norm": 9.182302474975586, |
|
"learning_rate": 9.011617776815464e-05, |
|
"loss": 0.7187, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.6261170366099741, |
|
"grad_norm": 6.717326641082764, |
|
"learning_rate": 9.002052741760015e-05, |
|
"loss": 0.5225, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.6269818391467281, |
|
"grad_norm": 11.271307945251465, |
|
"learning_rate": 8.992446774567405e-05, |
|
"loss": 0.9725, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.6278466416834823, |
|
"grad_norm": 12.319371223449707, |
|
"learning_rate": 8.982799973485407e-05, |
|
"loss": 0.6209, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.6287114442202364, |
|
"grad_norm": 7.424941062927246, |
|
"learning_rate": 8.973112437179436e-05, |
|
"loss": 0.478, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.6295762467569905, |
|
"grad_norm": 6.208258628845215, |
|
"learning_rate": 8.963384264731533e-05, |
|
"loss": 0.2833, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.6304410492937446, |
|
"grad_norm": 4.718559265136719, |
|
"learning_rate": 8.95361555563936e-05, |
|
"loss": 0.2356, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.6313058518304987, |
|
"grad_norm": 9.238673210144043, |
|
"learning_rate": 8.943806409815181e-05, |
|
"loss": 0.6937, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.6321706543672528, |
|
"grad_norm": 11.935426712036133, |
|
"learning_rate": 8.933956927584832e-05, |
|
"loss": 0.8793, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.6330354569040069, |
|
"grad_norm": 8.183321952819824, |
|
"learning_rate": 8.924067209686709e-05, |
|
"loss": 0.6845, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.633900259440761, |
|
"grad_norm": 4.494237422943115, |
|
"learning_rate": 8.914137357270723e-05, |
|
"loss": 0.2744, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.6347650619775151, |
|
"grad_norm": 10.111383438110352, |
|
"learning_rate": 8.904167471897274e-05, |
|
"loss": 0.8681, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.6356298645142693, |
|
"grad_norm": 10.407071113586426, |
|
"learning_rate": 8.894157655536216e-05, |
|
"loss": 1.0385, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.6364946670510233, |
|
"grad_norm": 6.472255706787109, |
|
"learning_rate": 8.884108010565797e-05, |
|
"loss": 0.2331, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.6373594695877774, |
|
"grad_norm": 4.348916530609131, |
|
"learning_rate": 8.874018639771637e-05, |
|
"loss": 0.3183, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.6382242721245316, |
|
"grad_norm": 3.087089776992798, |
|
"learning_rate": 8.863889646345653e-05, |
|
"loss": 0.1691, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.6390890746612857, |
|
"grad_norm": 5.743144512176514, |
|
"learning_rate": 8.85372113388502e-05, |
|
"loss": 0.4625, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.6399538771980398, |
|
"grad_norm": 4.561880111694336, |
|
"learning_rate": 8.843513206391101e-05, |
|
"loss": 0.2338, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.6408186797347939, |
|
"grad_norm": 10.266475677490234, |
|
"learning_rate": 8.83326596826839e-05, |
|
"loss": 1.1701, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.641683482271548, |
|
"grad_norm": 8.521928787231445, |
|
"learning_rate": 8.822979524323441e-05, |
|
"loss": 0.7673, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.6425482848083021, |
|
"grad_norm": 8.54457950592041, |
|
"learning_rate": 8.812653979763795e-05, |
|
"loss": 0.5481, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.6434130873450562, |
|
"grad_norm": 5.748913288116455, |
|
"learning_rate": 8.802289440196908e-05, |
|
"loss": 0.3357, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.6442778898818103, |
|
"grad_norm": 4.804452896118164, |
|
"learning_rate": 8.791886011629068e-05, |
|
"loss": 0.263, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.6451426924185645, |
|
"grad_norm": 3.707672119140625, |
|
"learning_rate": 8.781443800464316e-05, |
|
"loss": 0.1461, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.6460074949553185, |
|
"grad_norm": 7.357616901397705, |
|
"learning_rate": 8.77096291350334e-05, |
|
"loss": 0.3193, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.6468722974920726, |
|
"grad_norm": 4.722273349761963, |
|
"learning_rate": 8.760443457942408e-05, |
|
"loss": 0.2647, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.6477371000288268, |
|
"grad_norm": 5.43215799331665, |
|
"learning_rate": 8.749885541372257e-05, |
|
"loss": 0.2494, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.6486019025655808, |
|
"grad_norm": 4.395086765289307, |
|
"learning_rate": 8.739289271776991e-05, |
|
"loss": 0.1905, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.649466705102335, |
|
"grad_norm": 6.617416858673096, |
|
"learning_rate": 8.728654757532984e-05, |
|
"loss": 0.6302, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.6503315076390891, |
|
"grad_norm": 3.7228050231933594, |
|
"learning_rate": 8.717982107407768e-05, |
|
"loss": 0.3397, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.6511963101758432, |
|
"grad_norm": 9.654953002929688, |
|
"learning_rate": 8.707271430558919e-05, |
|
"loss": 0.6679, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.6520611127125973, |
|
"grad_norm": 4.019669532775879, |
|
"learning_rate": 8.69652283653294e-05, |
|
"loss": 0.3372, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.6529259152493514, |
|
"grad_norm": 7.510921478271484, |
|
"learning_rate": 8.68573643526415e-05, |
|
"loss": 0.6676, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.6537907177861055, |
|
"grad_norm": 13.126535415649414, |
|
"learning_rate": 8.674912337073544e-05, |
|
"loss": 1.2867, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.6546555203228596, |
|
"grad_norm": 9.412704467773438, |
|
"learning_rate": 8.66405065266768e-05, |
|
"loss": 0.8248, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.6555203228596137, |
|
"grad_norm": 6.785587787628174, |
|
"learning_rate": 8.653151493137536e-05, |
|
"loss": 0.4971, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.6563851253963678, |
|
"grad_norm": 12.77095890045166, |
|
"learning_rate": 8.642214969957376e-05, |
|
"loss": 1.4049, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.657249927933122, |
|
"grad_norm": 6.501046180725098, |
|
"learning_rate": 8.631241194983616e-05, |
|
"loss": 0.3086, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.658114730469876, |
|
"grad_norm": 6.871536731719971, |
|
"learning_rate": 8.620230280453673e-05, |
|
"loss": 0.6796, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.6589795330066301, |
|
"grad_norm": 6.746383190155029, |
|
"learning_rate": 8.609182338984818e-05, |
|
"loss": 0.4314, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.6598443355433843, |
|
"grad_norm": 4.454339504241943, |
|
"learning_rate": 8.598097483573029e-05, |
|
"loss": 0.2843, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.6607091380801383, |
|
"grad_norm": 5.15504789352417, |
|
"learning_rate": 8.586975827591825e-05, |
|
"loss": 0.4569, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.6615739406168925, |
|
"grad_norm": 6.545773506164551, |
|
"learning_rate": 8.575817484791127e-05, |
|
"loss": 0.3931, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.6624387431536466, |
|
"grad_norm": 4.9794511795043945, |
|
"learning_rate": 8.564622569296063e-05, |
|
"loss": 0.2155, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.6633035456904007, |
|
"grad_norm": 8.013479232788086, |
|
"learning_rate": 8.553391195605833e-05, |
|
"loss": 0.3245, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.6641683482271548, |
|
"grad_norm": 9.687097549438477, |
|
"learning_rate": 8.542123478592518e-05, |
|
"loss": 0.7824, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.6650331507639089, |
|
"grad_norm": 5.516420364379883, |
|
"learning_rate": 8.530819533499909e-05, |
|
"loss": 0.3537, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.665897953300663, |
|
"grad_norm": 6.398399353027344, |
|
"learning_rate": 8.519479475942334e-05, |
|
"loss": 0.2212, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.6667627558374172, |
|
"grad_norm": 6.814426898956299, |
|
"learning_rate": 8.508103421903468e-05, |
|
"loss": 0.5911, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.6676275583741712, |
|
"grad_norm": 6.5453410148620605, |
|
"learning_rate": 8.496691487735156e-05, |
|
"loss": 0.4524, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.6684923609109253, |
|
"grad_norm": 3.5740625858306885, |
|
"learning_rate": 8.485243790156208e-05, |
|
"loss": 0.2604, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.6693571634476795, |
|
"grad_norm": 12.454208374023438, |
|
"learning_rate": 8.473760446251221e-05, |
|
"loss": 0.8186, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.6702219659844335, |
|
"grad_norm": 5.322040557861328, |
|
"learning_rate": 8.462241573469379e-05, |
|
"loss": 0.4612, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.6710867685211876, |
|
"grad_norm": 7.373685359954834, |
|
"learning_rate": 8.450687289623235e-05, |
|
"loss": 0.5306, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.6719515710579418, |
|
"grad_norm": 11.016031265258789, |
|
"learning_rate": 8.439097712887531e-05, |
|
"loss": 1.0424, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.6728163735946959, |
|
"grad_norm": 8.017274856567383, |
|
"learning_rate": 8.427472961797971e-05, |
|
"loss": 0.473, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.67368117613145, |
|
"grad_norm": 5.788976669311523, |
|
"learning_rate": 8.415813155250017e-05, |
|
"loss": 0.2846, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.6745459786682041, |
|
"grad_norm": 4.2314558029174805, |
|
"learning_rate": 8.404118412497666e-05, |
|
"loss": 0.4083, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.6754107812049582, |
|
"grad_norm": 3.476349115371704, |
|
"learning_rate": 8.392388853152245e-05, |
|
"loss": 0.236, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.6762755837417123, |
|
"grad_norm": 10.38036823272705, |
|
"learning_rate": 8.380624597181165e-05, |
|
"loss": 0.6732, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.6771403862784664, |
|
"grad_norm": 7.326548099517822, |
|
"learning_rate": 8.368825764906716e-05, |
|
"loss": 0.6798, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.6780051888152205, |
|
"grad_norm": 8.5910062789917, |
|
"learning_rate": 8.356992477004828e-05, |
|
"loss": 0.75, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.6788699913519747, |
|
"grad_norm": 4.450828552246094, |
|
"learning_rate": 8.345124854503825e-05, |
|
"loss": 0.2198, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.6797347938887287, |
|
"grad_norm": 3.15915584564209, |
|
"learning_rate": 8.33322301878321e-05, |
|
"loss": 0.1629, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.6805995964254828, |
|
"grad_norm": 3.2538440227508545, |
|
"learning_rate": 8.321287091572403e-05, |
|
"loss": 0.1949, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.681464398962237, |
|
"grad_norm": 8.031615257263184, |
|
"learning_rate": 8.309317194949509e-05, |
|
"loss": 0.3901, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.682329201498991, |
|
"grad_norm": 2.7871859073638916, |
|
"learning_rate": 8.297313451340064e-05, |
|
"loss": 0.2184, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.6831940040357452, |
|
"grad_norm": 6.6741204261779785, |
|
"learning_rate": 8.285275983515783e-05, |
|
"loss": 0.3516, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.6840588065724993, |
|
"grad_norm": 9.924346923828125, |
|
"learning_rate": 8.273204914593304e-05, |
|
"loss": 0.9001, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.6849236091092534, |
|
"grad_norm": 2.0380783081054688, |
|
"learning_rate": 8.261100368032934e-05, |
|
"loss": 0.0729, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.6857884116460075, |
|
"grad_norm": 4.190455913543701, |
|
"learning_rate": 8.248962467637378e-05, |
|
"loss": 0.1484, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.6866532141827616, |
|
"grad_norm": 10.513288497924805, |
|
"learning_rate": 8.236791337550478e-05, |
|
"loss": 0.8013, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.6875180167195157, |
|
"grad_norm": 5.367727279663086, |
|
"learning_rate": 8.22458710225594e-05, |
|
"loss": 0.2315, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.6883828192562699, |
|
"grad_norm": 4.737613201141357, |
|
"learning_rate": 8.21234988657607e-05, |
|
"loss": 0.2135, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.6892476217930239, |
|
"grad_norm": 7.230178356170654, |
|
"learning_rate": 8.20007981567048e-05, |
|
"loss": 0.6123, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.690112424329778, |
|
"grad_norm": 5.188995361328125, |
|
"learning_rate": 8.18777701503483e-05, |
|
"loss": 0.2533, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.6909772268665322, |
|
"grad_norm": 9.257750511169434, |
|
"learning_rate": 8.175441610499522e-05, |
|
"loss": 0.6212, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.6918420294032862, |
|
"grad_norm": 1.5883065462112427, |
|
"learning_rate": 8.163073728228427e-05, |
|
"loss": 0.0883, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6927068319400403, |
|
"grad_norm": 8.530162811279297, |
|
"learning_rate": 8.150673494717597e-05, |
|
"loss": 0.3946, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.6935716344767945, |
|
"grad_norm": 7.668551445007324, |
|
"learning_rate": 8.138241036793958e-05, |
|
"loss": 0.4277, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.6944364370135486, |
|
"grad_norm": 8.265761375427246, |
|
"learning_rate": 8.125776481614024e-05, |
|
"loss": 0.5575, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.6953012395503027, |
|
"grad_norm": 7.973784446716309, |
|
"learning_rate": 8.113279956662594e-05, |
|
"loss": 0.4164, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.6961660420870568, |
|
"grad_norm": 4.912955284118652, |
|
"learning_rate": 8.100751589751442e-05, |
|
"loss": 0.1826, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.6961660420870568, |
|
"eval_Qnli-dev-1024_cosine_accuracy": 0.71875, |
|
"eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.8731638193130493, |
|
"eval_Qnli-dev-1024_cosine_ap": 0.724535579920194, |
|
"eval_Qnli-dev-1024_cosine_f1": 0.7037037037037037, |
|
"eval_Qnli-dev-1024_cosine_f1_threshold": 0.781539261341095, |
|
"eval_Qnli-dev-1024_cosine_mcc": 0.3721962181491566, |
|
"eval_Qnli-dev-1024_cosine_precision": 0.6031746031746031, |
|
"eval_Qnli-dev-1024_cosine_recall": 0.8444444444444444, |
|
"eval_Qnli-dev_cosine_accuracy": 0.7291666666666666, |
|
"eval_Qnli-dev_cosine_accuracy_threshold": 0.778366208076477, |
|
"eval_Qnli-dev_cosine_ap": 0.7396468214578549, |
|
"eval_Qnli-dev_cosine_f1": 0.7142857142857142, |
|
"eval_Qnli-dev_cosine_f1_threshold": 0.7083452939987183, |
|
"eval_Qnli-dev_cosine_mcc": 0.4263253018001963, |
|
"eval_Qnli-dev_cosine_precision": 0.660377358490566, |
|
"eval_Qnli-dev_cosine_recall": 0.7777777777777778, |
|
"eval_allNLI--triplets-1024_cosine_accuracy": 0.9583333134651184, |
|
"eval_allNLI-triplets_cosine_accuracy": 0.9791666865348816, |
|
"eval_global_dataset_loss": 0.6504772901535034, |
|
"eval_global_dataset_runtime": 67.87, |
|
"eval_global_dataset_samples_per_second": 14.321, |
|
"eval_global_dataset_steps_per_second": 0.309, |
|
"eval_sequential_score": 0.9583333134651184, |
|
"eval_sts-test-1024_pearson_cosine": 0.8671751973788917, |
|
"eval_sts-test-1024_spearman_cosine": 0.9100830925358703, |
|
"eval_sts-test_pearson_cosine": 0.9084514358185803, |
|
"eval_sts-test_spearman_cosine": 0.9218648677325396, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.6970308446238109, |
|
"grad_norm": 6.679697513580322, |
|
"learning_rate": 8.08819150901802e-05, |
|
"loss": 0.1903, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.697895647160565, |
|
"grad_norm": 5.046963214874268, |
|
"learning_rate": 8.075599842924139e-05, |
|
"loss": 0.2428, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.6987604496973191, |
|
"grad_norm": 2.3785789012908936, |
|
"learning_rate": 8.06297672025466e-05, |
|
"loss": 0.0856, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.6996252522340732, |
|
"grad_norm": 3.804054021835327, |
|
"learning_rate": 8.050322270116174e-05, |
|
"loss": 0.1363, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.7004900547708274, |
|
"grad_norm": 7.834051132202148, |
|
"learning_rate": 8.037636621935685e-05, |
|
"loss": 0.3135, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.7013548573075814, |
|
"grad_norm": 9.44914722442627, |
|
"learning_rate": 8.02491990545928e-05, |
|
"loss": 0.758, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.7022196598443355, |
|
"grad_norm": 4.096058368682861, |
|
"learning_rate": 8.012172250750807e-05, |
|
"loss": 0.3242, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.7030844623810897, |
|
"grad_norm": 9.295559883117676, |
|
"learning_rate": 7.999393788190548e-05, |
|
"loss": 0.5513, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.7039492649178437, |
|
"grad_norm": 8.359066009521484, |
|
"learning_rate": 7.986584648473874e-05, |
|
"loss": 0.8495, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.7048140674545978, |
|
"grad_norm": 11.649872779846191, |
|
"learning_rate": 7.973744962609921e-05, |
|
"loss": 0.6741, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.705678869991352, |
|
"grad_norm": 7.094292163848877, |
|
"learning_rate": 7.960874861920242e-05, |
|
"loss": 0.3723, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.7065436725281061, |
|
"grad_norm": 7.240077495574951, |
|
"learning_rate": 7.947974478037468e-05, |
|
"loss": 0.5434, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.7074084750648602, |
|
"grad_norm": 2.250290632247925, |
|
"learning_rate": 7.935043942903955e-05, |
|
"loss": 0.1101, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.7082732776016143, |
|
"grad_norm": 4.252150058746338, |
|
"learning_rate": 7.922083388770447e-05, |
|
"loss": 0.1888, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.7091380801383684, |
|
"grad_norm": 4.072676181793213, |
|
"learning_rate": 7.90909294819471e-05, |
|
"loss": 0.2415, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.7100028826751225, |
|
"grad_norm": 9.136191368103027, |
|
"learning_rate": 7.896072754040186e-05, |
|
"loss": 0.4492, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.7108676852118766, |
|
"grad_norm": 10.30457878112793, |
|
"learning_rate": 7.883022939474626e-05, |
|
"loss": 0.9007, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.7117324877486307, |
|
"grad_norm": 5.737984657287598, |
|
"learning_rate": 7.869943637968738e-05, |
|
"loss": 0.5574, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.7125972902853849, |
|
"grad_norm": 9.1240234375, |
|
"learning_rate": 7.85683498329481e-05, |
|
"loss": 0.9687, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.7134620928221389, |
|
"grad_norm": 8.149517059326172, |
|
"learning_rate": 7.843697109525352e-05, |
|
"loss": 0.8161, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.714326895358893, |
|
"grad_norm": 10.62049674987793, |
|
"learning_rate": 7.830530151031719e-05, |
|
"loss": 0.6275, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.7151916978956472, |
|
"grad_norm": 4.933554172515869, |
|
"learning_rate": 7.817334242482738e-05, |
|
"loss": 0.361, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.7160565004324013, |
|
"grad_norm": 4.892520427703857, |
|
"learning_rate": 7.804109518843334e-05, |
|
"loss": 0.2424, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.7169213029691553, |
|
"grad_norm": 8.320906639099121, |
|
"learning_rate": 7.790856115373142e-05, |
|
"loss": 0.7132, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.7177861055059095, |
|
"grad_norm": 8.682563781738281, |
|
"learning_rate": 7.77757416762513e-05, |
|
"loss": 0.5795, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.7186509080426636, |
|
"grad_norm": 1.6696056127548218, |
|
"learning_rate": 7.764263811444215e-05, |
|
"loss": 0.0414, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.7195157105794177, |
|
"grad_norm": 10.681838989257812, |
|
"learning_rate": 7.75092518296586e-05, |
|
"loss": 0.8467, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.7203805131161718, |
|
"grad_norm": 5.933515548706055, |
|
"learning_rate": 7.737558418614699e-05, |
|
"loss": 0.3639, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.7212453156529259, |
|
"grad_norm": 3.935758352279663, |
|
"learning_rate": 7.724163655103131e-05, |
|
"loss": 0.3737, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.7221101181896801, |
|
"grad_norm": 9.584526062011719, |
|
"learning_rate": 7.710741029429926e-05, |
|
"loss": 0.6802, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.7229749207264341, |
|
"grad_norm": 13.70799446105957, |
|
"learning_rate": 7.697290678878819e-05, |
|
"loss": 1.0565, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.7238397232631882, |
|
"grad_norm": 8.689953804016113, |
|
"learning_rate": 7.683812741017112e-05, |
|
"loss": 0.5841, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.7247045257999424, |
|
"grad_norm": 13.601666450500488, |
|
"learning_rate": 7.670307353694262e-05, |
|
"loss": 1.0172, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.7255693283366964, |
|
"grad_norm": 3.734889507293701, |
|
"learning_rate": 7.656774655040472e-05, |
|
"loss": 0.3109, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.7264341308734505, |
|
"grad_norm": 10.951227188110352, |
|
"learning_rate": 7.643214783465286e-05, |
|
"loss": 0.6902, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.7272989334102047, |
|
"grad_norm": 2.3853259086608887, |
|
"learning_rate": 7.62962787765616e-05, |
|
"loss": 0.1287, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.7281637359469588, |
|
"grad_norm": 10.43367862701416, |
|
"learning_rate": 7.616014076577054e-05, |
|
"loss": 0.6679, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.7290285384837129, |
|
"grad_norm": 5.164660453796387, |
|
"learning_rate": 7.602373519467005e-05, |
|
"loss": 0.3239, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.729893341020467, |
|
"grad_norm": 6.129587650299072, |
|
"learning_rate": 7.588706345838705e-05, |
|
"loss": 0.1646, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.7307581435572211, |
|
"grad_norm": 5.64245080947876, |
|
"learning_rate": 7.575012695477076e-05, |
|
"loss": 0.3594, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.7316229460939752, |
|
"grad_norm": 5.245384216308594, |
|
"learning_rate": 7.561292708437838e-05, |
|
"loss": 0.2795, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.7324877486307293, |
|
"grad_norm": 6.762210369110107, |
|
"learning_rate": 7.547546525046073e-05, |
|
"loss": 0.3268, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.7333525511674834, |
|
"grad_norm": 9.87009048461914, |
|
"learning_rate": 7.533774285894798e-05, |
|
"loss": 0.8067, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.7342173537042376, |
|
"grad_norm": 4.286474704742432, |
|
"learning_rate": 7.519976131843522e-05, |
|
"loss": 0.3708, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.7350821562409916, |
|
"grad_norm": 9.59669303894043, |
|
"learning_rate": 7.506152204016807e-05, |
|
"loss": 0.5467, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.7359469587777457, |
|
"grad_norm": 3.928433895111084, |
|
"learning_rate": 7.492302643802821e-05, |
|
"loss": 0.15, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.7368117613144999, |
|
"grad_norm": 7.317601203918457, |
|
"learning_rate": 7.478427592851893e-05, |
|
"loss": 0.4525, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.737676563851254, |
|
"grad_norm": 6.541726589202881, |
|
"learning_rate": 7.464527193075073e-05, |
|
"loss": 0.3871, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.738541366388008, |
|
"grad_norm": 12.070144653320312, |
|
"learning_rate": 7.450601586642664e-05, |
|
"loss": 0.8351, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.7394061689247622, |
|
"grad_norm": 8.084358215332031, |
|
"learning_rate": 7.436650915982785e-05, |
|
"loss": 0.3939, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.7402709714615163, |
|
"grad_norm": 6.941904067993164, |
|
"learning_rate": 7.422675323779907e-05, |
|
"loss": 0.4311, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.7411357739982704, |
|
"grad_norm": 8.018699645996094, |
|
"learning_rate": 7.408674952973382e-05, |
|
"loss": 0.4675, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.7420005765350245, |
|
"grad_norm": 7.949825763702393, |
|
"learning_rate": 7.394649946756004e-05, |
|
"loss": 0.5963, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.7428653790717786, |
|
"grad_norm": 6.355823040008545, |
|
"learning_rate": 7.38060044857253e-05, |
|
"loss": 0.3415, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.7437301816085328, |
|
"grad_norm": 7.31845760345459, |
|
"learning_rate": 7.366526602118214e-05, |
|
"loss": 0.3599, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.7445949841452868, |
|
"grad_norm": 4.008370876312256, |
|
"learning_rate": 7.352428551337338e-05, |
|
"loss": 0.3354, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.7454597866820409, |
|
"grad_norm": 6.440021991729736, |
|
"learning_rate": 7.338306440421743e-05, |
|
"loss": 0.2971, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.7463245892187951, |
|
"grad_norm": 11.389256477355957, |
|
"learning_rate": 7.32416041380935e-05, |
|
"loss": 0.6679, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.7471893917555491, |
|
"grad_norm": 2.519818067550659, |
|
"learning_rate": 7.309990616182685e-05, |
|
"loss": 0.1211, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.7480541942923032, |
|
"grad_norm": 7.607640743255615, |
|
"learning_rate": 7.2957971924674e-05, |
|
"loss": 0.2407, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.7489189968290574, |
|
"grad_norm": 7.118372917175293, |
|
"learning_rate": 7.28158028783079e-05, |
|
"loss": 0.3254, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.7497837993658115, |
|
"grad_norm": 2.883557081222534, |
|
"learning_rate": 7.267340047680305e-05, |
|
"loss": 0.1074, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.7506486019025655, |
|
"grad_norm": 4.721225738525391, |
|
"learning_rate": 7.253076617662065e-05, |
|
"loss": 0.1904, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.7515134044393197, |
|
"grad_norm": 2.654787302017212, |
|
"learning_rate": 7.23879014365938e-05, |
|
"loss": 0.182, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.7523782069760738, |
|
"grad_norm": 7.568452835083008, |
|
"learning_rate": 7.224480771791235e-05, |
|
"loss": 0.4094, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.7532430095128279, |
|
"grad_norm": 8.068111419677734, |
|
"learning_rate": 7.210148648410821e-05, |
|
"loss": 0.8455, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.754107812049582, |
|
"grad_norm": 6.598762512207031, |
|
"learning_rate": 7.195793920104023e-05, |
|
"loss": 0.4085, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.7549726145863361, |
|
"grad_norm": 6.5393829345703125, |
|
"learning_rate": 7.18141673368792e-05, |
|
"loss": 0.4978, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.7558374171230903, |
|
"grad_norm": 4.241705894470215, |
|
"learning_rate": 7.167017236209292e-05, |
|
"loss": 0.2777, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.7567022196598443, |
|
"grad_norm": 5.239429950714111, |
|
"learning_rate": 7.152595574943113e-05, |
|
"loss": 0.3822, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.7575670221965984, |
|
"grad_norm": 10.576812744140625, |
|
"learning_rate": 7.138151897391041e-05, |
|
"loss": 0.5127, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.7584318247333526, |
|
"grad_norm": 4.40622615814209, |
|
"learning_rate": 7.123686351279914e-05, |
|
"loss": 0.2795, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.7592966272701066, |
|
"grad_norm": 8.214874267578125, |
|
"learning_rate": 7.10919908456023e-05, |
|
"loss": 0.4, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.7601614298068607, |
|
"grad_norm": 5.674429893493652, |
|
"learning_rate": 7.094690245404652e-05, |
|
"loss": 0.3919, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.7610262323436149, |
|
"grad_norm": 7.315159797668457, |
|
"learning_rate": 7.080159982206471e-05, |
|
"loss": 0.3323, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.761891034880369, |
|
"grad_norm": 5.864488124847412, |
|
"learning_rate": 7.065608443578105e-05, |
|
"loss": 0.5407, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.762755837417123, |
|
"grad_norm": 9.524258613586426, |
|
"learning_rate": 7.05103577834957e-05, |
|
"loss": 0.8925, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.7636206399538772, |
|
"grad_norm": 2.4174962043762207, |
|
"learning_rate": 7.036442135566961e-05, |
|
"loss": 0.116, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.7644854424906313, |
|
"grad_norm": 5.054670810699463, |
|
"learning_rate": 7.021827664490928e-05, |
|
"loss": 0.382, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.7653502450273855, |
|
"grad_norm": 4.311699867248535, |
|
"learning_rate": 7.007192514595141e-05, |
|
"loss": 0.2573, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.7662150475641395, |
|
"grad_norm": 5.006008625030518, |
|
"learning_rate": 6.992536835564782e-05, |
|
"loss": 0.2442, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.7670798501008936, |
|
"grad_norm": 4.521592140197754, |
|
"learning_rate": 6.977860777294988e-05, |
|
"loss": 0.2122, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.7679446526376478, |
|
"grad_norm": 7.981561183929443, |
|
"learning_rate": 6.963164489889337e-05, |
|
"loss": 0.3405, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.7688094551744018, |
|
"grad_norm": 10.011691093444824, |
|
"learning_rate": 6.948448123658308e-05, |
|
"loss": 0.4895, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.7696742577111559, |
|
"grad_norm": 6.9324517250061035, |
|
"learning_rate": 6.933711829117733e-05, |
|
"loss": 0.4046, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.7705390602479101, |
|
"grad_norm": 5.044534683227539, |
|
"learning_rate": 6.918955756987275e-05, |
|
"loss": 0.3365, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.7714038627846642, |
|
"grad_norm": 6.062309265136719, |
|
"learning_rate": 6.904180058188877e-05, |
|
"loss": 0.3073, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.7722686653214182, |
|
"grad_norm": 9.762418746948242, |
|
"learning_rate": 6.889384883845214e-05, |
|
"loss": 0.7621, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.7731334678581724, |
|
"grad_norm": 8.496923446655273, |
|
"learning_rate": 6.874570385278158e-05, |
|
"loss": 0.4088, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.7739982703949265, |
|
"grad_norm": 9.173744201660156, |
|
"learning_rate": 6.859736714007226e-05, |
|
"loss": 0.6372, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.7748630729316806, |
|
"grad_norm": 8.595545768737793, |
|
"learning_rate": 6.844884021748019e-05, |
|
"loss": 0.7089, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.7757278754684347, |
|
"grad_norm": 7.156553268432617, |
|
"learning_rate": 6.830012460410697e-05, |
|
"loss": 0.5503, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.7765926780051888, |
|
"grad_norm": 5.894566059112549, |
|
"learning_rate": 6.815122182098394e-05, |
|
"loss": 0.5239, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.777457480541943, |
|
"grad_norm": 5.80053186416626, |
|
"learning_rate": 6.800213339105683e-05, |
|
"loss": 0.1838, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.778322283078697, |
|
"grad_norm": 2.8142247200012207, |
|
"learning_rate": 6.785286083917017e-05, |
|
"loss": 0.1141, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7791870856154511, |
|
"grad_norm": 5.2369537353515625, |
|
"learning_rate": 6.770340569205157e-05, |
|
"loss": 0.4552, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.7800518881522053, |
|
"grad_norm": 7.276421070098877, |
|
"learning_rate": 6.755376947829625e-05, |
|
"loss": 0.4267, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.7809166906889593, |
|
"grad_norm": 10.988953590393066, |
|
"learning_rate": 6.74039537283513e-05, |
|
"loss": 1.0252, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.7817814932257134, |
|
"grad_norm": 10.337282180786133, |
|
"learning_rate": 6.725395997450008e-05, |
|
"loss": 0.6281, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.7826462957624676, |
|
"grad_norm": 10.337082862854004, |
|
"learning_rate": 6.710378975084652e-05, |
|
"loss": 0.6716, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.7835110982992217, |
|
"grad_norm": 3.361793279647827, |
|
"learning_rate": 6.695344459329948e-05, |
|
"loss": 0.1769, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.7843759008359757, |
|
"grad_norm": 8.392909049987793, |
|
"learning_rate": 6.6802926039557e-05, |
|
"loss": 0.428, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.7852407033727299, |
|
"grad_norm": 5.3866729736328125, |
|
"learning_rate": 6.665223562909058e-05, |
|
"loss": 0.335, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.786105505909484, |
|
"grad_norm": 8.97474479675293, |
|
"learning_rate": 6.650137490312935e-05, |
|
"loss": 0.6272, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.786970308446238, |
|
"grad_norm": 9.634217262268066, |
|
"learning_rate": 6.635034540464456e-05, |
|
"loss": 0.6253, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.7878351109829922, |
|
"grad_norm": 3.891382932662964, |
|
"learning_rate": 6.619914867833343e-05, |
|
"loss": 0.2603, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.7886999135197463, |
|
"grad_norm": 6.183927059173584, |
|
"learning_rate": 6.60477862706037e-05, |
|
"loss": 0.5737, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.7895647160565005, |
|
"grad_norm": 7.62052583694458, |
|
"learning_rate": 6.589625972955764e-05, |
|
"loss": 0.3792, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.7904295185932545, |
|
"grad_norm": 8.527345657348633, |
|
"learning_rate": 6.574457060497618e-05, |
|
"loss": 0.308, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.7912943211300086, |
|
"grad_norm": 4.892148494720459, |
|
"learning_rate": 6.559272044830317e-05, |
|
"loss": 0.2018, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.7921591236667628, |
|
"grad_norm": 3.214404582977295, |
|
"learning_rate": 6.544071081262943e-05, |
|
"loss": 0.1299, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.7930239262035169, |
|
"grad_norm": 7.314729690551758, |
|
"learning_rate": 6.528854325267692e-05, |
|
"loss": 0.4338, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.7938887287402709, |
|
"grad_norm": 6.503054618835449, |
|
"learning_rate": 6.513621932478282e-05, |
|
"loss": 0.2775, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.7947535312770251, |
|
"grad_norm": 3.8166730403900146, |
|
"learning_rate": 6.498374058688359e-05, |
|
"loss": 0.2077, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.7956183338137792, |
|
"grad_norm": 3.5877130031585693, |
|
"learning_rate": 6.483110859849907e-05, |
|
"loss": 0.2204, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.7956183338137792, |
|
"eval_Qnli-dev-1024_cosine_accuracy": 0.7604166666666666, |
|
"eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.8248084783554077, |
|
"eval_Qnli-dev-1024_cosine_ap": 0.7343586316206616, |
|
"eval_Qnli-dev-1024_cosine_f1": 0.7628865979381444, |
|
"eval_Qnli-dev-1024_cosine_f1_threshold": 0.8248084783554077, |
|
"eval_Qnli-dev-1024_cosine_mcc": 0.528911810491234, |
|
"eval_Qnli-dev-1024_cosine_precision": 0.7115384615384616, |
|
"eval_Qnli-dev-1024_cosine_recall": 0.8222222222222222, |
|
"eval_Qnli-dev_cosine_accuracy": 0.71875, |
|
"eval_Qnli-dev_cosine_accuracy_threshold": 0.7927051782608032, |
|
"eval_Qnli-dev_cosine_ap": 0.7346717053497452, |
|
"eval_Qnli-dev_cosine_f1": 0.7254901960784313, |
|
"eval_Qnli-dev_cosine_f1_threshold": 0.7089404463768005, |
|
"eval_Qnli-dev_cosine_mcc": 0.43697448216965834, |
|
"eval_Qnli-dev_cosine_precision": 0.6491228070175439, |
|
"eval_Qnli-dev_cosine_recall": 0.8222222222222222, |
|
"eval_allNLI--triplets-1024_cosine_accuracy": 0.9375, |
|
"eval_allNLI-triplets_cosine_accuracy": 0.9583333134651184, |
|
"eval_global_dataset_loss": 0.5776817798614502, |
|
"eval_global_dataset_runtime": 67.9151, |
|
"eval_global_dataset_samples_per_second": 14.312, |
|
"eval_global_dataset_steps_per_second": 0.309, |
|
"eval_sequential_score": 0.9375, |
|
"eval_sts-test-1024_pearson_cosine": 0.8700957313588291, |
|
"eval_sts-test-1024_spearman_cosine": 0.9050692984403192, |
|
"eval_sts-test_pearson_cosine": 0.9076253073025099, |
|
"eval_sts-test_spearman_cosine": 0.9206752404165891, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.7964831363505332, |
|
"grad_norm": 8.318743705749512, |
|
"learning_rate": 6.467832492071649e-05, |
|
"loss": 0.6926, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.7973479388872874, |
|
"grad_norm": 3.8544118404388428, |
|
"learning_rate": 6.452539111617453e-05, |
|
"loss": 0.287, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.7982127414240415, |
|
"grad_norm": 5.1298441886901855, |
|
"learning_rate": 6.437230874904739e-05, |
|
"loss": 0.119, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.7990775439607957, |
|
"grad_norm": 6.161678791046143, |
|
"learning_rate": 6.421907938502865e-05, |
|
"loss": 0.2366, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.7999423464975497, |
|
"grad_norm": 6.632068634033203, |
|
"learning_rate": 6.406570459131538e-05, |
|
"loss": 0.2398, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.8008071490343038, |
|
"grad_norm": 7.237907886505127, |
|
"learning_rate": 6.39121859365921e-05, |
|
"loss": 0.5197, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.801671951571058, |
|
"grad_norm": 4.103116035461426, |
|
"learning_rate": 6.375852499101467e-05, |
|
"loss": 0.2894, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.802536754107812, |
|
"grad_norm": 4.985617160797119, |
|
"learning_rate": 6.36047233261943e-05, |
|
"loss": 0.3327, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.8034015566445661, |
|
"grad_norm": 3.32094144821167, |
|
"learning_rate": 6.345078251518143e-05, |
|
"loss": 0.1615, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.8042663591813203, |
|
"grad_norm": 2.774306297302246, |
|
"learning_rate": 6.329670413244967e-05, |
|
"loss": 0.181, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.8051311617180744, |
|
"grad_norm": 5.269750595092773, |
|
"learning_rate": 6.314248975387965e-05, |
|
"loss": 0.2791, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.8059959642548284, |
|
"grad_norm": 6.187272548675537, |
|
"learning_rate": 6.298814095674297e-05, |
|
"loss": 0.4581, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.8068607667915826, |
|
"grad_norm": 5.671879291534424, |
|
"learning_rate": 6.283365931968603e-05, |
|
"loss": 0.2483, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.8077255693283367, |
|
"grad_norm": 12.671977043151855, |
|
"learning_rate": 6.26790464227139e-05, |
|
"loss": 1.2848, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.8085903718650908, |
|
"grad_norm": 4.673594951629639, |
|
"learning_rate": 6.252430384717412e-05, |
|
"loss": 0.2594, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.8094551744018449, |
|
"grad_norm": 6.6575117111206055, |
|
"learning_rate": 6.236943317574056e-05, |
|
"loss": 0.3473, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.810319976938599, |
|
"grad_norm": 3.377204656600952, |
|
"learning_rate": 6.221443599239721e-05, |
|
"loss": 0.2606, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.8111847794753532, |
|
"grad_norm": 7.629633903503418, |
|
"learning_rate": 6.205931388242207e-05, |
|
"loss": 0.3551, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.8120495820121072, |
|
"grad_norm": 7.514203071594238, |
|
"learning_rate": 6.190406843237078e-05, |
|
"loss": 0.3383, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.8129143845488613, |
|
"grad_norm": 6.37880277633667, |
|
"learning_rate": 6.174870123006051e-05, |
|
"loss": 0.2142, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.8137791870856155, |
|
"grad_norm": 5.619572639465332, |
|
"learning_rate": 6.159321386455372e-05, |
|
"loss": 0.2147, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.8146439896223696, |
|
"grad_norm": 4.535106658935547, |
|
"learning_rate": 6.143760792614179e-05, |
|
"loss": 0.2273, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.8155087921591236, |
|
"grad_norm": 9.820999145507812, |
|
"learning_rate": 6.128188500632892e-05, |
|
"loss": 0.9269, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.8163735946958778, |
|
"grad_norm": 13.05849838256836, |
|
"learning_rate": 6.112604669781572e-05, |
|
"loss": 1.5161, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.8172383972326319, |
|
"grad_norm": 5.841894626617432, |
|
"learning_rate": 6.0970094594483004e-05, |
|
"loss": 0.1962, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.818103199769386, |
|
"grad_norm": 7.9914069175720215, |
|
"learning_rate": 6.0814030291375424e-05, |
|
"loss": 0.5516, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.8189680023061401, |
|
"grad_norm": 4.961643695831299, |
|
"learning_rate": 6.0657855384685215e-05, |
|
"loss": 0.3498, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.8198328048428942, |
|
"grad_norm": 5.379317283630371, |
|
"learning_rate": 6.050157147173581e-05, |
|
"loss": 0.3962, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.8206976073796484, |
|
"grad_norm": 4.794488430023193, |
|
"learning_rate": 6.0345180150965576e-05, |
|
"loss": 0.2953, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.8215624099164024, |
|
"grad_norm": 3.9415969848632812, |
|
"learning_rate": 6.0188683021911396e-05, |
|
"loss": 0.1737, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.8224272124531565, |
|
"grad_norm": 5.720635890960693, |
|
"learning_rate": 6.003208168519233e-05, |
|
"loss": 0.1876, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.8232920149899107, |
|
"grad_norm": 5.777576923370361, |
|
"learning_rate": 5.9875377742493276e-05, |
|
"loss": 0.2037, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.8241568175266647, |
|
"grad_norm": 7.2347798347473145, |
|
"learning_rate": 5.971857279654854e-05, |
|
"loss": 0.6859, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.8250216200634188, |
|
"grad_norm": 9.168425559997559, |
|
"learning_rate": 5.956166845112552e-05, |
|
"loss": 0.5139, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.825886422600173, |
|
"grad_norm": 8.480242729187012, |
|
"learning_rate": 5.9404666311008175e-05, |
|
"loss": 0.4557, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.8267512251369271, |
|
"grad_norm": 7.415064811706543, |
|
"learning_rate": 5.924756798198075e-05, |
|
"loss": 0.418, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.8276160276736811, |
|
"grad_norm": 5.769486427307129, |
|
"learning_rate": 5.909037507081121e-05, |
|
"loss": 0.3326, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.8284808302104353, |
|
"grad_norm": 9.98505687713623, |
|
"learning_rate": 5.893308918523498e-05, |
|
"loss": 0.6773, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.8293456327471894, |
|
"grad_norm": 6.003732681274414, |
|
"learning_rate": 5.877571193393837e-05, |
|
"loss": 0.1938, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.8302104352839434, |
|
"grad_norm": 6.989200115203857, |
|
"learning_rate": 5.8618244926542156e-05, |
|
"loss": 0.2502, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.8310752378206976, |
|
"grad_norm": 5.944050312042236, |
|
"learning_rate": 5.84606897735851e-05, |
|
"loss": 0.1686, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.8319400403574517, |
|
"grad_norm": 2.568422794342041, |
|
"learning_rate": 5.830304808650753e-05, |
|
"loss": 0.1174, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.8328048428942059, |
|
"grad_norm": 6.347965717315674, |
|
"learning_rate": 5.814532147763478e-05, |
|
"loss": 0.3688, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.8336696454309599, |
|
"grad_norm": 9.391959190368652, |
|
"learning_rate": 5.798751156016085e-05, |
|
"loss": 0.4529, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.834534447967714, |
|
"grad_norm": 7.86402702331543, |
|
"learning_rate": 5.7829619948131654e-05, |
|
"loss": 0.5973, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.8353992505044682, |
|
"grad_norm": 9.44655990600586, |
|
"learning_rate": 5.767164825642879e-05, |
|
"loss": 0.7635, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.8362640530412222, |
|
"grad_norm": 7.177609920501709, |
|
"learning_rate": 5.751359810075284e-05, |
|
"loss": 0.5631, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.8371288555779763, |
|
"grad_norm": 5.718000411987305, |
|
"learning_rate": 5.735547109760686e-05, |
|
"loss": 0.313, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.8379936581147305, |
|
"grad_norm": 6.908907413482666, |
|
"learning_rate": 5.719726886427998e-05, |
|
"loss": 0.4425, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.8388584606514846, |
|
"grad_norm": 6.510931491851807, |
|
"learning_rate": 5.7038993018830675e-05, |
|
"loss": 0.3581, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.8397232631882386, |
|
"grad_norm": 6.836475372314453, |
|
"learning_rate": 5.688064518007036e-05, |
|
"loss": 0.2542, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.8405880657249928, |
|
"grad_norm": 6.765063762664795, |
|
"learning_rate": 5.6722226967546764e-05, |
|
"loss": 0.2576, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.8414528682617469, |
|
"grad_norm": 2.587757110595703, |
|
"learning_rate": 5.65637400015274e-05, |
|
"loss": 0.1156, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.8423176707985011, |
|
"grad_norm": 11.528030395507812, |
|
"learning_rate": 5.640518590298298e-05, |
|
"loss": 0.8184, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.8431824733352551, |
|
"grad_norm": 7.4515790939331055, |
|
"learning_rate": 5.624656629357081e-05, |
|
"loss": 0.3536, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.8440472758720092, |
|
"grad_norm": 6.2617082595825195, |
|
"learning_rate": 5.6087882795618216e-05, |
|
"loss": 0.3023, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.8449120784087634, |
|
"grad_norm": 4.997031211853027, |
|
"learning_rate": 5.5929137032106005e-05, |
|
"loss": 0.418, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.8457768809455174, |
|
"grad_norm": 7.6783671379089355, |
|
"learning_rate": 5.577033062665179e-05, |
|
"loss": 0.3036, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.8466416834822715, |
|
"grad_norm": 7.0620436668396, |
|
"learning_rate": 5.561146520349343e-05, |
|
"loss": 0.55, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.8475064860190257, |
|
"grad_norm": 8.351699829101562, |
|
"learning_rate": 5.5452542387472416e-05, |
|
"loss": 0.6477, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.8483712885557798, |
|
"grad_norm": 7.685431480407715, |
|
"learning_rate": 5.529356380401722e-05, |
|
"loss": 0.3518, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.8492360910925338, |
|
"grad_norm": 9.351055145263672, |
|
"learning_rate": 5.5134531079126704e-05, |
|
"loss": 0.7033, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.850100893629288, |
|
"grad_norm": 9.499361038208008, |
|
"learning_rate": 5.497544583935347e-05, |
|
"loss": 0.6931, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.8509656961660421, |
|
"grad_norm": 10.090303421020508, |
|
"learning_rate": 5.481630971178721e-05, |
|
"loss": 0.9278, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.8518304987027961, |
|
"grad_norm": 4.208652019500732, |
|
"learning_rate": 5.465712432403812e-05, |
|
"loss": 0.3061, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.8526953012395503, |
|
"grad_norm": 9.341512680053711, |
|
"learning_rate": 5.4497891304220225e-05, |
|
"loss": 0.8352, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.8535601037763044, |
|
"grad_norm": 1.4906487464904785, |
|
"learning_rate": 5.433861228093471e-05, |
|
"loss": 0.125, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.8544249063130586, |
|
"grad_norm": 2.660661458969116, |
|
"learning_rate": 5.417928888325324e-05, |
|
"loss": 0.2284, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.8552897088498126, |
|
"grad_norm": 10.015325546264648, |
|
"learning_rate": 5.401992274070136e-05, |
|
"loss": 0.838, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.8561545113865667, |
|
"grad_norm": 8.29864501953125, |
|
"learning_rate": 5.386051548324179e-05, |
|
"loss": 0.5318, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.8570193139233209, |
|
"grad_norm": 4.587142467498779, |
|
"learning_rate": 5.3701068741257796e-05, |
|
"loss": 0.1618, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.8578841164600749, |
|
"grad_norm": 1.8213179111480713, |
|
"learning_rate": 5.354158414553646e-05, |
|
"loss": 0.0871, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.858748918996829, |
|
"grad_norm": 8.93700122833252, |
|
"learning_rate": 5.3382063327252017e-05, |
|
"loss": 0.6915, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.8596137215335832, |
|
"grad_norm": 4.793188095092773, |
|
"learning_rate": 5.322250791794916e-05, |
|
"loss": 0.3728, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.8604785240703373, |
|
"grad_norm": 4.624011516571045, |
|
"learning_rate": 5.3062919549526436e-05, |
|
"loss": 0.2403, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.8613433266070913, |
|
"grad_norm": 1.8955051898956299, |
|
"learning_rate": 5.2903299854219435e-05, |
|
"loss": 0.0651, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.8622081291438455, |
|
"grad_norm": 10.889961242675781, |
|
"learning_rate": 5.274365046458416e-05, |
|
"loss": 0.5783, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.8630729316805996, |
|
"grad_norm": 4.15156888961792, |
|
"learning_rate": 5.258397301348035e-05, |
|
"loss": 0.2061, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.8639377342173538, |
|
"grad_norm": 3.9485700130462646, |
|
"learning_rate": 5.2424269134054694e-05, |
|
"loss": 0.154, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.8648025367541078, |
|
"grad_norm": 9.996199607849121, |
|
"learning_rate": 5.2264540459724276e-05, |
|
"loss": 0.4689, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8656673392908619, |
|
"grad_norm": 7.154214382171631, |
|
"learning_rate": 5.21047886241597e-05, |
|
"loss": 0.2088, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 0.8665321418276161, |
|
"grad_norm": 8.80577564239502, |
|
"learning_rate": 5.194501526126842e-05, |
|
"loss": 0.5299, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.8673969443643701, |
|
"grad_norm": 5.227262020111084, |
|
"learning_rate": 5.1785222005178224e-05, |
|
"loss": 0.2689, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 0.8682617469011242, |
|
"grad_norm": 6.6007843017578125, |
|
"learning_rate": 5.162541049022019e-05, |
|
"loss": 0.3098, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.8691265494378784, |
|
"grad_norm": 6.239222526550293, |
|
"learning_rate": 5.146558235091225e-05, |
|
"loss": 0.3478, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.8699913519746325, |
|
"grad_norm": 2.814821243286133, |
|
"learning_rate": 5.1305739221942364e-05, |
|
"loss": 0.1841, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 0.8708561545113865, |
|
"grad_norm": 1.4831047058105469, |
|
"learning_rate": 5.114588273815173e-05, |
|
"loss": 0.0862, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 0.8717209570481407, |
|
"grad_norm": 8.568103790283203, |
|
"learning_rate": 5.09860145345182e-05, |
|
"loss": 0.5991, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.8725857595848948, |
|
"grad_norm": 9.560081481933594, |
|
"learning_rate": 5.082613624613946e-05, |
|
"loss": 0.4026, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 0.8734505621216488, |
|
"grad_norm": 7.88618803024292, |
|
"learning_rate": 5.066624950821637e-05, |
|
"loss": 0.4991, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.874315364658403, |
|
"grad_norm": 5.938468933105469, |
|
"learning_rate": 5.05063559560362e-05, |
|
"loss": 0.2786, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 0.8751801671951571, |
|
"grad_norm": 9.075552940368652, |
|
"learning_rate": 5.0346457224955903e-05, |
|
"loss": 0.4708, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.8760449697319113, |
|
"grad_norm": 8.848043441772461, |
|
"learning_rate": 5.018655495038541e-05, |
|
"loss": 0.4201, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 0.8769097722686653, |
|
"grad_norm": 5.168188095092773, |
|
"learning_rate": 5.002665076777091e-05, |
|
"loss": 0.2089, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.8777745748054194, |
|
"grad_norm": 4.413999557495117, |
|
"learning_rate": 4.986674631257804e-05, |
|
"loss": 0.3158, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.8786393773421736, |
|
"grad_norm": 9.610701560974121, |
|
"learning_rate": 4.970684322027534e-05, |
|
"loss": 0.7363, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.8795041798789276, |
|
"grad_norm": 6.793404579162598, |
|
"learning_rate": 4.9546943126317274e-05, |
|
"loss": 0.2885, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 0.8803689824156817, |
|
"grad_norm": 9.434625625610352, |
|
"learning_rate": 4.9387047666127786e-05, |
|
"loss": 0.4937, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 0.8812337849524359, |
|
"grad_norm": 6.130424499511719, |
|
"learning_rate": 4.9227158475083304e-05, |
|
"loss": 0.1684, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 0.88209858748919, |
|
"grad_norm": 4.250467777252197, |
|
"learning_rate": 4.9067277188496185e-05, |
|
"loss": 0.2749, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.882963390025944, |
|
"grad_norm": 3.2336244583129883, |
|
"learning_rate": 4.890740544159796e-05, |
|
"loss": 0.2789, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 0.8838281925626982, |
|
"grad_norm": 7.7692084312438965, |
|
"learning_rate": 4.874754486952255e-05, |
|
"loss": 0.5868, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 0.8846929950994523, |
|
"grad_norm": 7.071033954620361, |
|
"learning_rate": 4.8587697107289626e-05, |
|
"loss": 0.5894, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.8855577976362063, |
|
"grad_norm": 6.448328971862793, |
|
"learning_rate": 4.84278637897878e-05, |
|
"loss": 0.1635, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.8864226001729605, |
|
"grad_norm": 11.375746726989746, |
|
"learning_rate": 4.826804655175795e-05, |
|
"loss": 0.6829, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.8872874027097146, |
|
"grad_norm": 1.4379364252090454, |
|
"learning_rate": 4.8108247027776565e-05, |
|
"loss": 0.1124, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.8881522052464688, |
|
"grad_norm": 11.497692108154297, |
|
"learning_rate": 4.794846685223886e-05, |
|
"loss": 1.2642, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 0.8890170077832228, |
|
"grad_norm": 6.77423620223999, |
|
"learning_rate": 4.778870765934221e-05, |
|
"loss": 0.6585, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.8898818103199769, |
|
"grad_norm": 6.651241779327393, |
|
"learning_rate": 4.762897108306939e-05, |
|
"loss": 0.2622, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 0.8907466128567311, |
|
"grad_norm": 7.476505279541016, |
|
"learning_rate": 4.7469258757171854e-05, |
|
"loss": 0.2797, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.8916114153934852, |
|
"grad_norm": 7.375949382781982, |
|
"learning_rate": 4.7309572315152976e-05, |
|
"loss": 0.2747, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 0.8924762179302392, |
|
"grad_norm": 4.907548427581787, |
|
"learning_rate": 4.7149913390251494e-05, |
|
"loss": 0.3748, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.8933410204669934, |
|
"grad_norm": 7.232724189758301, |
|
"learning_rate": 4.6990283615424605e-05, |
|
"loss": 0.1792, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 0.8942058230037475, |
|
"grad_norm": 6.12727165222168, |
|
"learning_rate": 4.6830684623331446e-05, |
|
"loss": 0.342, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 0.8950706255405015, |
|
"grad_norm": 4.968775272369385, |
|
"learning_rate": 4.667111804631626e-05, |
|
"loss": 0.3287, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.8950706255405015, |
|
"eval_Qnli-dev-1024_cosine_accuracy": 0.7083333333333334, |
|
"eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.8668882846832275, |
|
"eval_Qnli-dev-1024_cosine_ap": 0.6999822477767415, |
|
"eval_Qnli-dev-1024_cosine_f1": 0.7090909090909091, |
|
"eval_Qnli-dev-1024_cosine_f1_threshold": 0.7944933772087097, |
|
"eval_Qnli-dev-1024_cosine_mcc": 0.3808509397785054, |
|
"eval_Qnli-dev-1024_cosine_precision": 0.6, |
|
"eval_Qnli-dev-1024_cosine_recall": 0.8666666666666667, |
|
"eval_Qnli-dev_cosine_accuracy": 0.71875, |
|
"eval_Qnli-dev_cosine_accuracy_threshold": 0.7880457639694214, |
|
"eval_Qnli-dev_cosine_ap": 0.7340095878922616, |
|
"eval_Qnli-dev_cosine_f1": 0.7238095238095237, |
|
"eval_Qnli-dev_cosine_f1_threshold": 0.703315019607544, |
|
"eval_Qnli-dev_cosine_mcc": 0.42578476395267345, |
|
"eval_Qnli-dev_cosine_precision": 0.6333333333333333, |
|
"eval_Qnli-dev_cosine_recall": 0.8444444444444444, |
|
"eval_allNLI--triplets-1024_cosine_accuracy": 0.9375, |
|
"eval_allNLI-triplets_cosine_accuracy": 0.9791666865348816, |
|
"eval_global_dataset_loss": 0.5188027620315552, |
|
"eval_global_dataset_runtime": 67.9093, |
|
"eval_global_dataset_samples_per_second": 14.313, |
|
"eval_global_dataset_steps_per_second": 0.309, |
|
"eval_sequential_score": 0.9375, |
|
"eval_sts-test-1024_pearson_cosine": 0.8797952712975998, |
|
"eval_sts-test-1024_spearman_cosine": 0.9135158587840699, |
|
"eval_sts-test_pearson_cosine": 0.9084511864603124, |
|
"eval_sts-test_spearman_cosine": 0.9222536610997011, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.8959354280772557, |
|
"grad_norm": 8.467510223388672, |
|
"learning_rate": 4.651158551639177e-05, |
|
"loss": 0.5348, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.8968002306140098, |
|
"grad_norm": 6.454378604888916, |
|
"learning_rate": 4.635208866522251e-05, |
|
"loss": 0.6588, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 0.897665033150764, |
|
"grad_norm": 6.776357650756836, |
|
"learning_rate": 4.619262912410804e-05, |
|
"loss": 0.5132, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.898529835687518, |
|
"grad_norm": 10.85428237915039, |
|
"learning_rate": 4.603320852396637e-05, |
|
"loss": 1.1707, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 0.8993946382242721, |
|
"grad_norm": 3.449406862258911, |
|
"learning_rate": 4.587382849531717e-05, |
|
"loss": 0.1442, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.9002594407610263, |
|
"grad_norm": 2.9549670219421387, |
|
"learning_rate": 4.5714490668265245e-05, |
|
"loss": 0.2243, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 0.9011242432977803, |
|
"grad_norm": 4.6617817878723145, |
|
"learning_rate": 4.5555196672483685e-05, |
|
"loss": 0.3099, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 0.9019890458345344, |
|
"grad_norm": 6.141875267028809, |
|
"learning_rate": 4.5395948137197296e-05, |
|
"loss": 0.1839, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 0.9028538483712886, |
|
"grad_norm": 12.232782363891602, |
|
"learning_rate": 4.5236746691166e-05, |
|
"loss": 0.6248, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.9037186509080427, |
|
"grad_norm": 5.728059768676758, |
|
"learning_rate": 4.507759396266802e-05, |
|
"loss": 0.4605, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.9045834534447967, |
|
"grad_norm": 8.688108444213867, |
|
"learning_rate": 4.49184915794833e-05, |
|
"loss": 0.4857, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 0.9054482559815509, |
|
"grad_norm": 8.695257186889648, |
|
"learning_rate": 4.475944116887695e-05, |
|
"loss": 0.3966, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 0.906313058518305, |
|
"grad_norm": 5.200995922088623, |
|
"learning_rate": 4.460044435758241e-05, |
|
"loss": 0.4439, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.907177861055059, |
|
"grad_norm": 12.601680755615234, |
|
"learning_rate": 4.4441502771785003e-05, |
|
"loss": 0.6051, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 0.9080426635918132, |
|
"grad_norm": 9.575990676879883, |
|
"learning_rate": 4.428261803710516e-05, |
|
"loss": 0.3982, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.9089074661285673, |
|
"grad_norm": 2.344109058380127, |
|
"learning_rate": 4.4123791778581865e-05, |
|
"loss": 0.1718, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 0.9097722686653215, |
|
"grad_norm": 7.567986488342285, |
|
"learning_rate": 4.3965025620656065e-05, |
|
"loss": 0.2641, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.9106370712020755, |
|
"grad_norm": 8.634700775146484, |
|
"learning_rate": 4.3806321187153934e-05, |
|
"loss": 0.3788, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 0.9115018737388296, |
|
"grad_norm": 8.53459644317627, |
|
"learning_rate": 4.3647680101270416e-05, |
|
"loss": 0.4456, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.9123666762755838, |
|
"grad_norm": 10.249025344848633, |
|
"learning_rate": 4.348910398555249e-05, |
|
"loss": 1.0234, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.9132314788123379, |
|
"grad_norm": 10.008344650268555, |
|
"learning_rate": 4.333059446188269e-05, |
|
"loss": 0.6228, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.9140962813490919, |
|
"grad_norm": 8.067853927612305, |
|
"learning_rate": 4.317215315146238e-05, |
|
"loss": 0.4588, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 0.9149610838858461, |
|
"grad_norm": 10.182132720947266, |
|
"learning_rate": 4.301378167479532e-05, |
|
"loss": 0.8651, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 0.9158258864226002, |
|
"grad_norm": 11.363606452941895, |
|
"learning_rate": 4.285548165167105e-05, |
|
"loss": 0.8571, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 0.9166906889593542, |
|
"grad_norm": 10.103208541870117, |
|
"learning_rate": 4.2697254701148235e-05, |
|
"loss": 0.6446, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.9175554914961084, |
|
"grad_norm": 6.2334418296813965, |
|
"learning_rate": 4.253910244153817e-05, |
|
"loss": 0.2193, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 0.9184202940328625, |
|
"grad_norm": 5.234436511993408, |
|
"learning_rate": 4.2381026490388245e-05, |
|
"loss": 0.258, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 0.9192850965696167, |
|
"grad_norm": 8.499395370483398, |
|
"learning_rate": 4.222302846446544e-05, |
|
"loss": 0.4164, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 0.9201498991063707, |
|
"grad_norm": 5.450392723083496, |
|
"learning_rate": 4.206510997973963e-05, |
|
"loss": 0.4783, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.9210147016431248, |
|
"grad_norm": 5.65176248550415, |
|
"learning_rate": 4.1907272651367226e-05, |
|
"loss": 0.246, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.921879504179879, |
|
"grad_norm": 8.317374229431152, |
|
"learning_rate": 4.1749518093674566e-05, |
|
"loss": 0.3821, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 0.922744306716633, |
|
"grad_norm": 4.983073711395264, |
|
"learning_rate": 4.159184792014145e-05, |
|
"loss": 0.182, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 0.9236091092533871, |
|
"grad_norm": 10.939299583435059, |
|
"learning_rate": 4.143426374338459e-05, |
|
"loss": 0.6648, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.9244739117901413, |
|
"grad_norm": 5.333117485046387, |
|
"learning_rate": 4.1276767175141125e-05, |
|
"loss": 0.5405, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 0.9253387143268954, |
|
"grad_norm": 6.263637542724609, |
|
"learning_rate": 4.1119359826252226e-05, |
|
"loss": 0.3681, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.9262035168636494, |
|
"grad_norm": 5.16562032699585, |
|
"learning_rate": 4.0962043306646455e-05, |
|
"loss": 0.2323, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 0.9270683194004036, |
|
"grad_norm": 6.132068634033203, |
|
"learning_rate": 4.080481922532348e-05, |
|
"loss": 0.4676, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.9279331219371577, |
|
"grad_norm": 8.957972526550293, |
|
"learning_rate": 4.064768919033746e-05, |
|
"loss": 0.5141, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 0.9287979244739117, |
|
"grad_norm": 7.958962440490723, |
|
"learning_rate": 4.0490654808780685e-05, |
|
"loss": 0.3067, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 0.9296627270106659, |
|
"grad_norm": 6.653066158294678, |
|
"learning_rate": 4.033371768676716e-05, |
|
"loss": 0.4638, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.93052752954742, |
|
"grad_norm": 5.897211074829102, |
|
"learning_rate": 4.0176879429416086e-05, |
|
"loss": 0.3082, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.9313923320841742, |
|
"grad_norm": 8.102348327636719, |
|
"learning_rate": 4.002014164083552e-05, |
|
"loss": 0.4003, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 0.9322571346209282, |
|
"grad_norm": 7.730281829833984, |
|
"learning_rate": 3.9863505924105995e-05, |
|
"loss": 0.3053, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 0.9331219371576823, |
|
"grad_norm": 5.675047397613525, |
|
"learning_rate": 3.970697388126397e-05, |
|
"loss": 0.1876, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 0.9339867396944365, |
|
"grad_norm": 9.553377151489258, |
|
"learning_rate": 3.9550547113285665e-05, |
|
"loss": 0.569, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.9348515422311905, |
|
"grad_norm": 10.86451244354248, |
|
"learning_rate": 3.9394227220070466e-05, |
|
"loss": 0.8728, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 0.9357163447679446, |
|
"grad_norm": 9.33718204498291, |
|
"learning_rate": 3.923801580042476e-05, |
|
"loss": 0.4347, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 0.9365811473046988, |
|
"grad_norm": 8.696025848388672, |
|
"learning_rate": 3.90819144520454e-05, |
|
"loss": 0.8919, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 0.9374459498414529, |
|
"grad_norm": 7.635885238647461, |
|
"learning_rate": 3.892592477150352e-05, |
|
"loss": 0.4828, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 0.9383107523782069, |
|
"grad_norm": 7.686861038208008, |
|
"learning_rate": 3.877004835422815e-05, |
|
"loss": 0.4338, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.9391755549149611, |
|
"grad_norm": 6.8635029792785645, |
|
"learning_rate": 3.861428679448983e-05, |
|
"loss": 0.359, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 0.9400403574517152, |
|
"grad_norm": 4.335479736328125, |
|
"learning_rate": 3.845864168538437e-05, |
|
"loss": 0.3828, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 0.9409051599884694, |
|
"grad_norm": 7.711667537689209, |
|
"learning_rate": 3.8303114618816577e-05, |
|
"loss": 0.5294, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.9417699625252234, |
|
"grad_norm": 6.784587383270264, |
|
"learning_rate": 3.814770718548396e-05, |
|
"loss": 0.4212, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 0.9426347650619775, |
|
"grad_norm": 8.687413215637207, |
|
"learning_rate": 3.7992420974860384e-05, |
|
"loss": 0.5723, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.9434995675987317, |
|
"grad_norm": 3.785308361053467, |
|
"learning_rate": 3.783725757517994e-05, |
|
"loss": 0.2047, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 0.9443643701354857, |
|
"grad_norm": 8.60908031463623, |
|
"learning_rate": 3.7682218573420576e-05, |
|
"loss": 0.4359, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.9452291726722398, |
|
"grad_norm": 3.608921527862549, |
|
"learning_rate": 3.7527305555287976e-05, |
|
"loss": 0.2121, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 0.946093975208994, |
|
"grad_norm": 7.160829544067383, |
|
"learning_rate": 3.737252010519925e-05, |
|
"loss": 0.461, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 0.9469587777457481, |
|
"grad_norm": 2.5925629138946533, |
|
"learning_rate": 3.721786380626675e-05, |
|
"loss": 0.1127, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.9478235802825021, |
|
"grad_norm": 9.759129524230957, |
|
"learning_rate": 3.706333824028201e-05, |
|
"loss": 0.5365, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 0.9486883828192563, |
|
"grad_norm": 9.999465942382812, |
|
"learning_rate": 3.690894498769933e-05, |
|
"loss": 1.0112, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 0.9495531853560104, |
|
"grad_norm": 9.034364700317383, |
|
"learning_rate": 3.675468562761982e-05, |
|
"loss": 0.6563, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 0.9504179878927644, |
|
"grad_norm": 2.152198076248169, |
|
"learning_rate": 3.6600561737775106e-05, |
|
"loss": 0.0732, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 0.9512827904295186, |
|
"grad_norm": 4.004874229431152, |
|
"learning_rate": 3.6446574894511265e-05, |
|
"loss": 0.1631, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.9521475929662727, |
|
"grad_norm": 7.518155097961426, |
|
"learning_rate": 3.629272667277274e-05, |
|
"loss": 0.4512, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 0.9530123955030269, |
|
"grad_norm": 5.864679336547852, |
|
"learning_rate": 3.613901864608611e-05, |
|
"loss": 0.2731, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 0.9538771980397809, |
|
"grad_norm": 7.249544620513916, |
|
"learning_rate": 3.598545238654416e-05, |
|
"loss": 0.4866, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 0.954742000576535, |
|
"grad_norm": 2.4601848125457764, |
|
"learning_rate": 3.583202946478963e-05, |
|
"loss": 0.2007, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.9556068031132892, |
|
"grad_norm": 7.753067970275879, |
|
"learning_rate": 3.567875144999925e-05, |
|
"loss": 0.501, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.9564716056500432, |
|
"grad_norm": 11.398188591003418, |
|
"learning_rate": 3.5525619909867704e-05, |
|
"loss": 0.8343, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 0.9573364081867973, |
|
"grad_norm": 3.151561975479126, |
|
"learning_rate": 3.537263641059152e-05, |
|
"loss": 0.1781, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 0.9582012107235515, |
|
"grad_norm": 5.797046184539795, |
|
"learning_rate": 3.521980251685315e-05, |
|
"loss": 0.3011, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 0.9590660132603056, |
|
"grad_norm": 8.037071228027344, |
|
"learning_rate": 3.506711979180485e-05, |
|
"loss": 0.423, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 0.9599308157970596, |
|
"grad_norm": 8.320140838623047, |
|
"learning_rate": 3.49145897970528e-05, |
|
"loss": 0.6317, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.9607956183338138, |
|
"grad_norm": 7.24954080581665, |
|
"learning_rate": 3.47622140926411e-05, |
|
"loss": 0.3058, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 0.9616604208705679, |
|
"grad_norm": 2.021778106689453, |
|
"learning_rate": 3.4609994237035746e-05, |
|
"loss": 0.1734, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 0.962525223407322, |
|
"grad_norm": 4.613988876342773, |
|
"learning_rate": 3.4457931787108774e-05, |
|
"loss": 0.1935, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 0.9633900259440761, |
|
"grad_norm": 4.552547454833984, |
|
"learning_rate": 3.4306028298122316e-05, |
|
"loss": 0.1533, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 0.9642548284808302, |
|
"grad_norm": 10.501197814941406, |
|
"learning_rate": 3.415428532371271e-05, |
|
"loss": 0.9337, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.9651196310175844, |
|
"grad_norm": 7.168083190917969, |
|
"learning_rate": 3.40027044158745e-05, |
|
"loss": 0.3695, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.9659844335543384, |
|
"grad_norm": 10.598306655883789, |
|
"learning_rate": 3.3851287124944756e-05, |
|
"loss": 0.7095, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 0.9668492360910925, |
|
"grad_norm": 5.203083038330078, |
|
"learning_rate": 3.370003499958703e-05, |
|
"loss": 0.4206, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.9677140386278467, |
|
"grad_norm": 5.217127323150635, |
|
"learning_rate": 3.3548949586775624e-05, |
|
"loss": 0.235, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 0.9685788411646008, |
|
"grad_norm": 4.155709266662598, |
|
"learning_rate": 3.339803243177972e-05, |
|
"loss": 0.1233, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.9694436437013548, |
|
"grad_norm": 2.8669726848602295, |
|
"learning_rate": 3.324728507814764e-05, |
|
"loss": 0.1605, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 0.970308446238109, |
|
"grad_norm": 3.5733962059020996, |
|
"learning_rate": 3.3096709067691006e-05, |
|
"loss": 0.1095, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 0.9711732487748631, |
|
"grad_norm": 4.109647274017334, |
|
"learning_rate": 3.294630594046892e-05, |
|
"loss": 0.2737, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 0.9720380513116171, |
|
"grad_norm": 7.015890121459961, |
|
"learning_rate": 3.279607723477234e-05, |
|
"loss": 0.3482, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 0.9729028538483713, |
|
"grad_norm": 6.006662368774414, |
|
"learning_rate": 3.2646024487108236e-05, |
|
"loss": 0.4144, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.9737676563851254, |
|
"grad_norm": 8.346697807312012, |
|
"learning_rate": 3.249614923218391e-05, |
|
"loss": 0.4055, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 0.9746324589218796, |
|
"grad_norm": 6.663881778717041, |
|
"learning_rate": 3.234645300289137e-05, |
|
"loss": 0.5001, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 0.9754972614586336, |
|
"grad_norm": 7.918451309204102, |
|
"learning_rate": 3.21969373302915e-05, |
|
"loss": 0.6129, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 0.9763620639953877, |
|
"grad_norm": 5.889848709106445, |
|
"learning_rate": 3.204760374359857e-05, |
|
"loss": 0.3793, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 0.9772268665321419, |
|
"grad_norm": 13.368314743041992, |
|
"learning_rate": 3.189845377016448e-05, |
|
"loss": 1.0901, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.9780916690688959, |
|
"grad_norm": 2.977189064025879, |
|
"learning_rate": 3.1749488935463145e-05, |
|
"loss": 0.1219, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 0.97895647160565, |
|
"grad_norm": 9.539501190185547, |
|
"learning_rate": 3.160071076307497e-05, |
|
"loss": 0.5126, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 0.9798212741424042, |
|
"grad_norm": 2.8723487854003906, |
|
"learning_rate": 3.145212077467118e-05, |
|
"loss": 0.2261, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 0.9806860766791583, |
|
"grad_norm": 4.65241813659668, |
|
"learning_rate": 3.1303720489998326e-05, |
|
"loss": 0.1636, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 0.9815508792159123, |
|
"grad_norm": 6.660006999969482, |
|
"learning_rate": 3.1155511426862654e-05, |
|
"loss": 0.2711, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.9824156817526665, |
|
"grad_norm": 4.477895259857178, |
|
"learning_rate": 3.100749510111471e-05, |
|
"loss": 0.3117, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 0.9832804842894206, |
|
"grad_norm": 13.759649276733398, |
|
"learning_rate": 3.085967302663375e-05, |
|
"loss": 0.8633, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 0.9841452868261746, |
|
"grad_norm": 5.8578948974609375, |
|
"learning_rate": 3.071204671531221e-05, |
|
"loss": 0.3619, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 0.9850100893629288, |
|
"grad_norm": 2.4084582328796387, |
|
"learning_rate": 3.056461767704037e-05, |
|
"loss": 0.1079, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 0.9858748918996829, |
|
"grad_norm": 7.170529842376709, |
|
"learning_rate": 3.041738741969078e-05, |
|
"loss": 0.4303, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.986739694436437, |
|
"grad_norm": 4.021960735321045, |
|
"learning_rate": 3.027035744910298e-05, |
|
"loss": 0.1799, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 0.9876044969731911, |
|
"grad_norm": 4.080975532531738, |
|
"learning_rate": 3.012352926906794e-05, |
|
"loss": 0.3902, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 0.9884692995099452, |
|
"grad_norm": 2.9526562690734863, |
|
"learning_rate": 2.9976904381312835e-05, |
|
"loss": 0.1557, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 0.9893341020466994, |
|
"grad_norm": 5.068524360656738, |
|
"learning_rate": 2.9830484285485544e-05, |
|
"loss": 0.1057, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 0.9901989045834535, |
|
"grad_norm": 4.044787883758545, |
|
"learning_rate": 2.968427047913942e-05, |
|
"loss": 0.1142, |
|
"step": 1145 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 3468, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 1145, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|