{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9724770642201834, "eval_steps": 98, "global_step": 2916, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0010193679918450561, "grad_norm": 24.08992576599121, "learning_rate": 0.0, "loss": 8.5798, "step": 1 }, { "epoch": 0.0020387359836901123, "grad_norm": 22.663619995117188, "learning_rate": 2.2624434389140275e-07, "loss": 8.4222, "step": 2 }, { "epoch": 0.0030581039755351682, "grad_norm": 25.377544403076172, "learning_rate": 4.524886877828055e-07, "loss": 9.392, "step": 3 }, { "epoch": 0.004077471967380225, "grad_norm": 22.122257232666016, "learning_rate": 6.787330316742082e-07, "loss": 8.4193, "step": 4 }, { "epoch": 0.0050968399592252805, "grad_norm": Infinity, "learning_rate": 9.04977375565611e-07, "loss": 8.808, "step": 5 }, { "epoch": 0.0061162079510703364, "grad_norm": 22.832090377807617, "learning_rate": 9.04977375565611e-07, "loss": 10.5002, "step": 6 }, { "epoch": 0.007135575942915392, "grad_norm": 20.933177947998047, "learning_rate": 1.1312217194570136e-06, "loss": 9.5956, "step": 7 }, { "epoch": 0.00815494393476045, "grad_norm": 20.44132423400879, "learning_rate": 1.3574660633484164e-06, "loss": 8.5526, "step": 8 }, { "epoch": 0.009174311926605505, "grad_norm": 22.630067825317383, "learning_rate": 1.583710407239819e-06, "loss": 9.8255, "step": 9 }, { "epoch": 0.010193679918450561, "grad_norm": 22.625064849853516, "learning_rate": 1.809954751131222e-06, "loss": 9.2067, "step": 10 }, { "epoch": 0.011213047910295617, "grad_norm": 19.427106857299805, "learning_rate": 2.0361990950226245e-06, "loss": 8.667, "step": 11 }, { "epoch": 0.012232415902140673, "grad_norm": 24.354387283325195, "learning_rate": 2.2624434389140273e-06, "loss": 10.396, "step": 12 }, { "epoch": 0.013251783893985729, "grad_norm": 21.99860191345215, "learning_rate": 2.48868778280543e-06, "loss": 8.9036, "step": 13 }, { "epoch": 0.014271151885830785, "grad_norm": 21.275592803955078, "learning_rate": 2.7149321266968327e-06, "loss": 7.9307, "step": 14 }, { "epoch": 0.01529051987767584, "grad_norm": 20.020435333251953, "learning_rate": 2.9411764705882355e-06, "loss": 9.0655, "step": 15 }, { "epoch": 0.0163098878695209, "grad_norm": 20.713603973388672, "learning_rate": 3.167420814479638e-06, "loss": 8.6598, "step": 16 }, { "epoch": 0.017329255861365953, "grad_norm": 22.857194900512695, "learning_rate": 3.3936651583710405e-06, "loss": 9.7562, "step": 17 }, { "epoch": 0.01834862385321101, "grad_norm": 19.031551361083984, "learning_rate": 3.619909502262444e-06, "loss": 9.2297, "step": 18 }, { "epoch": 0.019367991845056064, "grad_norm": 19.30624008178711, "learning_rate": 3.846153846153847e-06, "loss": 8.6939, "step": 19 }, { "epoch": 0.020387359836901122, "grad_norm": 17.09296417236328, "learning_rate": 4.072398190045249e-06, "loss": 8.1317, "step": 20 }, { "epoch": 0.021406727828746176, "grad_norm": 19.199600219726562, "learning_rate": 4.298642533936651e-06, "loss": 8.3585, "step": 21 }, { "epoch": 0.022426095820591234, "grad_norm": 18.50484275817871, "learning_rate": 4.5248868778280546e-06, "loss": 8.4533, "step": 22 }, { "epoch": 0.023445463812436288, "grad_norm": 19.170618057250977, "learning_rate": 4.751131221719457e-06, "loss": 9.3014, "step": 23 }, { "epoch": 0.024464831804281346, "grad_norm": 17.692346572875977, "learning_rate": 4.97737556561086e-06, "loss": 8.18, "step": 24 }, { "epoch": 0.0254841997961264, "grad_norm": 18.87356185913086, "learning_rate": 5.203619909502263e-06, "loss": 7.8485, "step": 25 }, { "epoch": 0.026503567787971458, "grad_norm": 16.432092666625977, "learning_rate": 5.4298642533936655e-06, "loss": 8.9669, "step": 26 }, { "epoch": 0.027522935779816515, "grad_norm": 17.064382553100586, "learning_rate": 5.656108597285068e-06, "loss": 10.1397, "step": 27 }, { "epoch": 0.02854230377166157, "grad_norm": 17.96854591369629, "learning_rate": 5.882352941176471e-06, "loss": 10.5216, "step": 28 }, { "epoch": 0.029561671763506627, "grad_norm": 16.348352432250977, "learning_rate": 6.108597285067873e-06, "loss": 7.4782, "step": 29 }, { "epoch": 0.03058103975535168, "grad_norm": 15.834653854370117, "learning_rate": 6.334841628959276e-06, "loss": 8.0439, "step": 30 }, { "epoch": 0.03160040774719674, "grad_norm": 15.115158081054688, "learning_rate": 6.5610859728506795e-06, "loss": 7.1911, "step": 31 }, { "epoch": 0.0326197757390418, "grad_norm": 17.570573806762695, "learning_rate": 6.787330316742081e-06, "loss": 8.5735, "step": 32 }, { "epoch": 0.03363914373088685, "grad_norm": 15.224530220031738, "learning_rate": 7.013574660633485e-06, "loss": 8.3855, "step": 33 }, { "epoch": 0.034658511722731905, "grad_norm": 16.47282600402832, "learning_rate": 7.239819004524888e-06, "loss": 8.4305, "step": 34 }, { "epoch": 0.03567787971457696, "grad_norm": 16.739215850830078, "learning_rate": 7.46606334841629e-06, "loss": 9.4608, "step": 35 }, { "epoch": 0.03669724770642202, "grad_norm": 13.741637229919434, "learning_rate": 7.692307692307694e-06, "loss": 8.1572, "step": 36 }, { "epoch": 0.03771661569826707, "grad_norm": 14.70285701751709, "learning_rate": 7.918552036199094e-06, "loss": 8.1456, "step": 37 }, { "epoch": 0.03873598369011213, "grad_norm": 11.470185279846191, "learning_rate": 8.144796380090498e-06, "loss": 7.3833, "step": 38 }, { "epoch": 0.039755351681957186, "grad_norm": 13.029812812805176, "learning_rate": 8.3710407239819e-06, "loss": 8.8539, "step": 39 }, { "epoch": 0.040774719673802244, "grad_norm": 12.46716594696045, "learning_rate": 8.597285067873303e-06, "loss": 8.9349, "step": 40 }, { "epoch": 0.0417940876656473, "grad_norm": 12.875706672668457, "learning_rate": 8.823529411764707e-06, "loss": 8.1803, "step": 41 }, { "epoch": 0.04281345565749235, "grad_norm": 12.646770477294922, "learning_rate": 9.049773755656109e-06, "loss": 6.7532, "step": 42 }, { "epoch": 0.04383282364933741, "grad_norm": 13.792744636535645, "learning_rate": 9.276018099547511e-06, "loss": 7.127, "step": 43 }, { "epoch": 0.04485219164118247, "grad_norm": 11.656695365905762, "learning_rate": 9.502262443438914e-06, "loss": 7.5565, "step": 44 }, { "epoch": 0.045871559633027525, "grad_norm": 11.562976837158203, "learning_rate": 9.728506787330318e-06, "loss": 7.6078, "step": 45 }, { "epoch": 0.046890927624872576, "grad_norm": 11.516715049743652, "learning_rate": 9.95475113122172e-06, "loss": 8.4153, "step": 46 }, { "epoch": 0.047910295616717634, "grad_norm": 11.569866180419922, "learning_rate": 1.0180995475113122e-05, "loss": 7.1062, "step": 47 }, { "epoch": 0.04892966360856269, "grad_norm": 11.088666915893555, "learning_rate": 1.0407239819004526e-05, "loss": 6.8482, "step": 48 }, { "epoch": 0.04994903160040775, "grad_norm": 11.396224021911621, "learning_rate": 1.0633484162895929e-05, "loss": 7.2262, "step": 49 }, { "epoch": 0.0509683995922528, "grad_norm": 11.868388175964355, "learning_rate": 1.0859728506787331e-05, "loss": 8.0207, "step": 50 }, { "epoch": 0.05198776758409786, "grad_norm": 10.022957801818848, "learning_rate": 1.1085972850678733e-05, "loss": 7.6895, "step": 51 }, { "epoch": 0.053007135575942915, "grad_norm": 11.007475852966309, "learning_rate": 1.1312217194570136e-05, "loss": 7.6185, "step": 52 }, { "epoch": 0.05402650356778797, "grad_norm": 10.026458740234375, "learning_rate": 1.153846153846154e-05, "loss": 8.8153, "step": 53 }, { "epoch": 0.05504587155963303, "grad_norm": 10.358866691589355, "learning_rate": 1.1764705882352942e-05, "loss": 7.7666, "step": 54 }, { "epoch": 0.05606523955147808, "grad_norm": 10.722491264343262, "learning_rate": 1.1990950226244344e-05, "loss": 7.1431, "step": 55 }, { "epoch": 0.05708460754332314, "grad_norm": 10.623186111450195, "learning_rate": 1.2217194570135746e-05, "loss": 6.3969, "step": 56 }, { "epoch": 0.0581039755351682, "grad_norm": 10.13591480255127, "learning_rate": 1.244343891402715e-05, "loss": 8.1643, "step": 57 }, { "epoch": 0.059123343527013254, "grad_norm": 9.476139068603516, "learning_rate": 1.2669683257918553e-05, "loss": 7.1228, "step": 58 }, { "epoch": 0.060142711518858305, "grad_norm": 8.608465194702148, "learning_rate": 1.2895927601809957e-05, "loss": 6.9228, "step": 59 }, { "epoch": 0.06116207951070336, "grad_norm": 10.69497299194336, "learning_rate": 1.3122171945701359e-05, "loss": 10.2251, "step": 60 }, { "epoch": 0.06218144750254842, "grad_norm": 9.309306144714355, "learning_rate": 1.3348416289592761e-05, "loss": 7.1105, "step": 61 }, { "epoch": 0.06320081549439348, "grad_norm": 9.268863677978516, "learning_rate": 1.3574660633484162e-05, "loss": 7.1156, "step": 62 }, { "epoch": 0.06422018348623854, "grad_norm": 10.207130432128906, "learning_rate": 1.3800904977375568e-05, "loss": 6.5522, "step": 63 }, { "epoch": 0.0652395514780836, "grad_norm": 9.29359245300293, "learning_rate": 1.402714932126697e-05, "loss": 6.734, "step": 64 }, { "epoch": 0.06625891946992865, "grad_norm": 8.38429069519043, "learning_rate": 1.425339366515837e-05, "loss": 8.1303, "step": 65 }, { "epoch": 0.0672782874617737, "grad_norm": 9.689257621765137, "learning_rate": 1.4479638009049776e-05, "loss": 7.298, "step": 66 }, { "epoch": 0.06829765545361875, "grad_norm": 8.886714935302734, "learning_rate": 1.4705882352941177e-05, "loss": 6.1227, "step": 67 }, { "epoch": 0.06931702344546381, "grad_norm": 9.28791332244873, "learning_rate": 1.493212669683258e-05, "loss": 6.7938, "step": 68 }, { "epoch": 0.07033639143730887, "grad_norm": 9.196669578552246, "learning_rate": 1.5158371040723981e-05, "loss": 6.4562, "step": 69 }, { "epoch": 0.07135575942915393, "grad_norm": 10.716215133666992, "learning_rate": 1.5384615384615387e-05, "loss": 8.0389, "step": 70 }, { "epoch": 0.07237512742099898, "grad_norm": 9.852572441101074, "learning_rate": 1.5610859728506788e-05, "loss": 8.7218, "step": 71 }, { "epoch": 0.07339449541284404, "grad_norm": 8.59492301940918, "learning_rate": 1.583710407239819e-05, "loss": 6.1906, "step": 72 }, { "epoch": 0.0744138634046891, "grad_norm": 9.830521583557129, "learning_rate": 1.6063348416289596e-05, "loss": 6.7222, "step": 73 }, { "epoch": 0.07543323139653414, "grad_norm": 9.12816047668457, "learning_rate": 1.6289592760180996e-05, "loss": 7.0611, "step": 74 }, { "epoch": 0.0764525993883792, "grad_norm": 10.391504287719727, "learning_rate": 1.6515837104072397e-05, "loss": 7.8241, "step": 75 }, { "epoch": 0.07747196738022426, "grad_norm": 9.0382719039917, "learning_rate": 1.67420814479638e-05, "loss": 6.3791, "step": 76 }, { "epoch": 0.07849133537206932, "grad_norm": 11.495955467224121, "learning_rate": 1.6968325791855205e-05, "loss": 6.8864, "step": 77 }, { "epoch": 0.07951070336391437, "grad_norm": 9.282613754272461, "learning_rate": 1.7194570135746606e-05, "loss": 6.8356, "step": 78 }, { "epoch": 0.08053007135575943, "grad_norm": 9.06067180633545, "learning_rate": 1.742081447963801e-05, "loss": 6.168, "step": 79 }, { "epoch": 0.08154943934760449, "grad_norm": 10.343846321105957, "learning_rate": 1.7647058823529414e-05, "loss": 8.6845, "step": 80 }, { "epoch": 0.08256880733944955, "grad_norm": 10.185526847839355, "learning_rate": 1.7873303167420814e-05, "loss": 5.9739, "step": 81 }, { "epoch": 0.0835881753312946, "grad_norm": 12.164653778076172, "learning_rate": 1.8099547511312218e-05, "loss": 6.2423, "step": 82 }, { "epoch": 0.08460754332313965, "grad_norm": 10.543149948120117, "learning_rate": 1.832579185520362e-05, "loss": 7.6247, "step": 83 }, { "epoch": 0.0856269113149847, "grad_norm": 10.210731506347656, "learning_rate": 1.8552036199095023e-05, "loss": 5.8418, "step": 84 }, { "epoch": 0.08664627930682976, "grad_norm": 11.613642692565918, "learning_rate": 1.8778280542986427e-05, "loss": 7.0948, "step": 85 }, { "epoch": 0.08766564729867482, "grad_norm": 12.590648651123047, "learning_rate": 1.9004524886877827e-05, "loss": 6.7457, "step": 86 }, { "epoch": 0.08868501529051988, "grad_norm": 12.547815322875977, "learning_rate": 1.923076923076923e-05, "loss": 5.6837, "step": 87 }, { "epoch": 0.08970438328236494, "grad_norm": 14.212437629699707, "learning_rate": 1.9457013574660635e-05, "loss": 6.0757, "step": 88 }, { "epoch": 0.09072375127421, "grad_norm": 14.821358680725098, "learning_rate": 1.9683257918552036e-05, "loss": 6.523, "step": 89 }, { "epoch": 0.09174311926605505, "grad_norm": 14.133096694946289, "learning_rate": 1.990950226244344e-05, "loss": 6.2917, "step": 90 }, { "epoch": 0.09276248725790011, "grad_norm": 14.283154487609863, "learning_rate": 2.0135746606334844e-05, "loss": 6.0509, "step": 91 }, { "epoch": 0.09378185524974515, "grad_norm": 15.914741516113281, "learning_rate": 2.0361990950226245e-05, "loss": 6.8298, "step": 92 }, { "epoch": 0.09480122324159021, "grad_norm": 18.067726135253906, "learning_rate": 2.058823529411765e-05, "loss": 7.855, "step": 93 }, { "epoch": 0.09582059123343527, "grad_norm": 17.288843154907227, "learning_rate": 2.0814479638009053e-05, "loss": 6.6372, "step": 94 }, { "epoch": 0.09683995922528033, "grad_norm": 22.13617515563965, "learning_rate": 2.1040723981900453e-05, "loss": 5.7468, "step": 95 }, { "epoch": 0.09785932721712538, "grad_norm": 22.20960235595703, "learning_rate": 2.1266968325791857e-05, "loss": 7.5522, "step": 96 }, { "epoch": 0.09887869520897044, "grad_norm": 23.28131103515625, "learning_rate": 2.149321266968326e-05, "loss": 7.7825, "step": 97 }, { "epoch": 0.0998980632008155, "grad_norm": 29.695850372314453, "learning_rate": 2.1719457013574662e-05, "loss": 8.7452, "step": 98 }, { "epoch": 0.0998980632008155, "eval_Qnli-dev-1024_cosine_accuracy": 0.6458333333333334, "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.995652437210083, "eval_Qnli-dev-1024_cosine_ap": 0.6274798374964984, "eval_Qnli-dev-1024_cosine_f1": 0.6518518518518519, "eval_Qnli-dev-1024_cosine_f1_threshold": 0.9515509605407715, "eval_Qnli-dev-1024_cosine_mcc": 0.1563007361345257, "eval_Qnli-dev-1024_cosine_precision": 0.4888888888888889, "eval_Qnli-dev-1024_cosine_recall": 0.9777777777777777, "eval_Qnli-dev_cosine_accuracy": 0.7395833333333334, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8860945701599121, "eval_Qnli-dev_cosine_ap": 0.7645314494110582, "eval_Qnli-dev_cosine_f1": 0.7500000000000001, "eval_Qnli-dev_cosine_f1_threshold": 0.8442017436027527, "eval_Qnli-dev_cosine_mcc": 0.48653004754089046, "eval_Qnli-dev_cosine_precision": 0.6610169491525424, "eval_Qnli-dev_cosine_recall": 0.8666666666666667, "eval_allNLI--triplets-1024_cosine_accuracy": 0.7291666865348816, "eval_allNLI-triplets_cosine_accuracy": 0.96875, "eval_global_dataset_loss": 2.297825574874878, "eval_global_dataset_runtime": 104.2196, "eval_global_dataset_samples_per_second": 7.705, "eval_global_dataset_steps_per_second": 0.163, "eval_sequential_score": 0.7291666865348816, "eval_sts-test-1024_pearson_cosine": 0.470983874633109, "eval_sts-test-1024_spearman_cosine": 0.7146928621162676, "eval_sts-test_pearson_cosine": 0.904138891044396, "eval_sts-test_spearman_cosine": 0.9172742489825538, "step": 98 }, { "epoch": 0.10091743119266056, "grad_norm": 21.82425880432129, "learning_rate": 2.1945701357466062e-05, "loss": 6.2322, "step": 99 }, { "epoch": 0.1019367991845056, "grad_norm": 25.734025955200195, "learning_rate": 2.2171945701357466e-05, "loss": 4.8433, "step": 100 }, { "epoch": 0.10295616717635066, "grad_norm": 28.17144775390625, "learning_rate": 2.239819004524887e-05, "loss": 4.8335, "step": 101 }, { "epoch": 0.10397553516819572, "grad_norm": 27.875871658325195, "learning_rate": 2.262443438914027e-05, "loss": 5.138, "step": 102 }, { "epoch": 0.10499490316004077, "grad_norm": 31.503034591674805, "learning_rate": 2.2850678733031675e-05, "loss": 4.8609, "step": 103 }, { "epoch": 0.10601427115188583, "grad_norm": 26.674440383911133, "learning_rate": 2.307692307692308e-05, "loss": 4.6204, "step": 104 }, { "epoch": 0.10703363914373089, "grad_norm": 25.039222717285156, "learning_rate": 2.330316742081448e-05, "loss": 4.3809, "step": 105 }, { "epoch": 0.10805300713557595, "grad_norm": 26.333913803100586, "learning_rate": 2.3529411764705884e-05, "loss": 5.6703, "step": 106 }, { "epoch": 0.109072375127421, "grad_norm": 23.51517105102539, "learning_rate": 2.3755656108597284e-05, "loss": 5.0237, "step": 107 }, { "epoch": 0.11009174311926606, "grad_norm": 18.25855255126953, "learning_rate": 2.3981900452488688e-05, "loss": 4.002, "step": 108 }, { "epoch": 0.1111111111111111, "grad_norm": 19.852886199951172, "learning_rate": 2.4208144796380092e-05, "loss": 5.2532, "step": 109 }, { "epoch": 0.11213047910295616, "grad_norm": 17.45444107055664, "learning_rate": 2.4434389140271493e-05, "loss": 4.1033, "step": 110 }, { "epoch": 0.11314984709480122, "grad_norm": 14.521421432495117, "learning_rate": 2.4660633484162897e-05, "loss": 4.0818, "step": 111 }, { "epoch": 0.11416921508664628, "grad_norm": 12.525910377502441, "learning_rate": 2.48868778280543e-05, "loss": 3.458, "step": 112 }, { "epoch": 0.11518858307849134, "grad_norm": 14.503193855285645, "learning_rate": 2.51131221719457e-05, "loss": 4.3372, "step": 113 }, { "epoch": 0.1162079510703364, "grad_norm": 14.2279634475708, "learning_rate": 2.5339366515837106e-05, "loss": 4.9513, "step": 114 }, { "epoch": 0.11722731906218145, "grad_norm": 15.238719940185547, "learning_rate": 2.5565610859728506e-05, "loss": 5.2602, "step": 115 }, { "epoch": 0.11824668705402651, "grad_norm": 11.11528491973877, "learning_rate": 2.5791855203619913e-05, "loss": 3.1741, "step": 116 }, { "epoch": 0.11926605504587157, "grad_norm": 12.077157974243164, "learning_rate": 2.6018099547511314e-05, "loss": 4.1914, "step": 117 }, { "epoch": 0.12028542303771661, "grad_norm": 11.872669219970703, "learning_rate": 2.6244343891402718e-05, "loss": 2.8383, "step": 118 }, { "epoch": 0.12130479102956167, "grad_norm": 9.008302688598633, "learning_rate": 2.647058823529412e-05, "loss": 3.4165, "step": 119 }, { "epoch": 0.12232415902140673, "grad_norm": 10.702130317687988, "learning_rate": 2.6696832579185523e-05, "loss": 3.5085, "step": 120 }, { "epoch": 0.12334352701325178, "grad_norm": 10.306276321411133, "learning_rate": 2.6923076923076923e-05, "loss": 2.3992, "step": 121 }, { "epoch": 0.12436289500509684, "grad_norm": 9.035378456115723, "learning_rate": 2.7149321266968324e-05, "loss": 2.4849, "step": 122 }, { "epoch": 0.12538226299694188, "grad_norm": 8.996299743652344, "learning_rate": 2.737556561085973e-05, "loss": 2.2839, "step": 123 }, { "epoch": 0.12640163098878696, "grad_norm": 8.635661125183105, "learning_rate": 2.7601809954751135e-05, "loss": 2.567, "step": 124 }, { "epoch": 0.127420998980632, "grad_norm": 10.015826225280762, "learning_rate": 2.7828054298642536e-05, "loss": 4.5119, "step": 125 }, { "epoch": 0.12844036697247707, "grad_norm": 8.679932594299316, "learning_rate": 2.805429864253394e-05, "loss": 2.767, "step": 126 }, { "epoch": 0.12945973496432212, "grad_norm": 10.05739688873291, "learning_rate": 2.828054298642534e-05, "loss": 4.0225, "step": 127 }, { "epoch": 0.1304791029561672, "grad_norm": 9.361485481262207, "learning_rate": 2.850678733031674e-05, "loss": 1.8294, "step": 128 }, { "epoch": 0.13149847094801223, "grad_norm": 9.865928649902344, "learning_rate": 2.8733031674208145e-05, "loss": 4.4174, "step": 129 }, { "epoch": 0.1325178389398573, "grad_norm": 10.055468559265137, "learning_rate": 2.8959276018099553e-05, "loss": 2.0112, "step": 130 }, { "epoch": 0.13353720693170235, "grad_norm": 9.528116226196289, "learning_rate": 2.9185520361990953e-05, "loss": 1.7772, "step": 131 }, { "epoch": 0.1345565749235474, "grad_norm": 9.870166778564453, "learning_rate": 2.9411764705882354e-05, "loss": 3.1912, "step": 132 }, { "epoch": 0.13557594291539246, "grad_norm": 10.1703462600708, "learning_rate": 2.9638009049773758e-05, "loss": 2.4527, "step": 133 }, { "epoch": 0.1365953109072375, "grad_norm": 7.443604469299316, "learning_rate": 2.986425339366516e-05, "loss": 1.6424, "step": 134 }, { "epoch": 0.13761467889908258, "grad_norm": 10.003544807434082, "learning_rate": 3.0090497737556562e-05, "loss": 2.6143, "step": 135 }, { "epoch": 0.13863404689092762, "grad_norm": 9.352860450744629, "learning_rate": 3.0316742081447963e-05, "loss": 2.0498, "step": 136 }, { "epoch": 0.1396534148827727, "grad_norm": 7.393095970153809, "learning_rate": 3.0542986425339374e-05, "loss": 1.962, "step": 137 }, { "epoch": 0.14067278287461774, "grad_norm": 8.278059959411621, "learning_rate": 3.0769230769230774e-05, "loss": 1.789, "step": 138 }, { "epoch": 0.14169215086646278, "grad_norm": 6.577699184417725, "learning_rate": 3.0995475113122175e-05, "loss": 1.459, "step": 139 }, { "epoch": 0.14271151885830785, "grad_norm": 8.23404312133789, "learning_rate": 3.1221719457013576e-05, "loss": 1.2479, "step": 140 }, { "epoch": 0.1437308868501529, "grad_norm": 9.47106647491455, "learning_rate": 3.1447963800904976e-05, "loss": 2.5413, "step": 141 }, { "epoch": 0.14475025484199797, "grad_norm": 7.330000400543213, "learning_rate": 3.167420814479638e-05, "loss": 1.4077, "step": 142 }, { "epoch": 0.145769622833843, "grad_norm": 9.64534854888916, "learning_rate": 3.1900452488687784e-05, "loss": 2.6988, "step": 143 }, { "epoch": 0.14678899082568808, "grad_norm": 8.404465675354004, "learning_rate": 3.212669683257919e-05, "loss": 2.9772, "step": 144 }, { "epoch": 0.14780835881753313, "grad_norm": 8.019698143005371, "learning_rate": 3.235294117647059e-05, "loss": 1.6265, "step": 145 }, { "epoch": 0.1488277268093782, "grad_norm": 7.635079860687256, "learning_rate": 3.257918552036199e-05, "loss": 1.9404, "step": 146 }, { "epoch": 0.14984709480122324, "grad_norm": 7.929011821746826, "learning_rate": 3.2805429864253393e-05, "loss": 1.4251, "step": 147 }, { "epoch": 0.15086646279306828, "grad_norm": 7.869425296783447, "learning_rate": 3.3031674208144794e-05, "loss": 2.6657, "step": 148 }, { "epoch": 0.15188583078491336, "grad_norm": 8.369176864624023, "learning_rate": 3.32579185520362e-05, "loss": 2.1576, "step": 149 }, { "epoch": 0.1529051987767584, "grad_norm": 9.128487586975098, "learning_rate": 3.34841628959276e-05, "loss": 1.53, "step": 150 }, { "epoch": 0.15392456676860347, "grad_norm": 7.673459529876709, "learning_rate": 3.371040723981901e-05, "loss": 1.2642, "step": 151 }, { "epoch": 0.15494393476044852, "grad_norm": 9.104422569274902, "learning_rate": 3.393665158371041e-05, "loss": 2.4846, "step": 152 }, { "epoch": 0.1559633027522936, "grad_norm": 8.658594131469727, "learning_rate": 3.416289592760181e-05, "loss": 1.4979, "step": 153 }, { "epoch": 0.15698267074413863, "grad_norm": 9.34330940246582, "learning_rate": 3.438914027149321e-05, "loss": 1.8149, "step": 154 }, { "epoch": 0.1580020387359837, "grad_norm": 9.401769638061523, "learning_rate": 3.461538461538462e-05, "loss": 1.4693, "step": 155 }, { "epoch": 0.15902140672782875, "grad_norm": 10.389461517333984, "learning_rate": 3.484162895927602e-05, "loss": 2.0114, "step": 156 }, { "epoch": 0.1600407747196738, "grad_norm": 9.321866989135742, "learning_rate": 3.506787330316742e-05, "loss": 1.5511, "step": 157 }, { "epoch": 0.16106014271151886, "grad_norm": 10.052262306213379, "learning_rate": 3.529411764705883e-05, "loss": 1.621, "step": 158 }, { "epoch": 0.1620795107033639, "grad_norm": 7.535787105560303, "learning_rate": 3.552036199095023e-05, "loss": 2.1122, "step": 159 }, { "epoch": 0.16309887869520898, "grad_norm": 9.70533275604248, "learning_rate": 3.574660633484163e-05, "loss": 1.8148, "step": 160 }, { "epoch": 0.16411824668705402, "grad_norm": 7.81204080581665, "learning_rate": 3.5972850678733036e-05, "loss": 1.9861, "step": 161 }, { "epoch": 0.1651376146788991, "grad_norm": 7.583981513977051, "learning_rate": 3.6199095022624436e-05, "loss": 1.3943, "step": 162 }, { "epoch": 0.16615698267074414, "grad_norm": 8.344895362854004, "learning_rate": 3.642533936651584e-05, "loss": 1.7317, "step": 163 }, { "epoch": 0.1671763506625892, "grad_norm": 7.1097331047058105, "learning_rate": 3.665158371040724e-05, "loss": 1.6, "step": 164 }, { "epoch": 0.16819571865443425, "grad_norm": 7.911113739013672, "learning_rate": 3.6877828054298645e-05, "loss": 1.2222, "step": 165 }, { "epoch": 0.1692150866462793, "grad_norm": 9.282394409179688, "learning_rate": 3.7104072398190046e-05, "loss": 1.6152, "step": 166 }, { "epoch": 0.17023445463812437, "grad_norm": 7.449146270751953, "learning_rate": 3.733031674208145e-05, "loss": 1.0374, "step": 167 }, { "epoch": 0.1712538226299694, "grad_norm": 9.164731979370117, "learning_rate": 3.7556561085972854e-05, "loss": 1.2844, "step": 168 }, { "epoch": 0.17227319062181448, "grad_norm": 6.987304210662842, "learning_rate": 3.7782805429864254e-05, "loss": 1.8805, "step": 169 }, { "epoch": 0.17329255861365953, "grad_norm": 7.447988033294678, "learning_rate": 3.8009049773755655e-05, "loss": 1.0972, "step": 170 }, { "epoch": 0.1743119266055046, "grad_norm": 7.7849321365356445, "learning_rate": 3.8235294117647055e-05, "loss": 1.7012, "step": 171 }, { "epoch": 0.17533129459734964, "grad_norm": 7.341614246368408, "learning_rate": 3.846153846153846e-05, "loss": 1.4182, "step": 172 }, { "epoch": 0.1763506625891947, "grad_norm": 8.514887809753418, "learning_rate": 3.868778280542987e-05, "loss": 2.6053, "step": 173 }, { "epoch": 0.17737003058103976, "grad_norm": 7.384711265563965, "learning_rate": 3.891402714932127e-05, "loss": 1.4193, "step": 174 }, { "epoch": 0.1783893985728848, "grad_norm": 8.553336143493652, "learning_rate": 3.914027149321267e-05, "loss": 2.251, "step": 175 }, { "epoch": 0.17940876656472987, "grad_norm": 8.517749786376953, "learning_rate": 3.936651583710407e-05, "loss": 1.9057, "step": 176 }, { "epoch": 0.18042813455657492, "grad_norm": 8.444558143615723, "learning_rate": 3.959276018099547e-05, "loss": 1.1228, "step": 177 }, { "epoch": 0.18144750254842, "grad_norm": 12.253990173339844, "learning_rate": 3.981900452488688e-05, "loss": 4.0905, "step": 178 }, { "epoch": 0.18246687054026503, "grad_norm": 5.70052433013916, "learning_rate": 4.004524886877829e-05, "loss": 0.9007, "step": 179 }, { "epoch": 0.1834862385321101, "grad_norm": 9.525473594665527, "learning_rate": 4.027149321266969e-05, "loss": 2.0665, "step": 180 }, { "epoch": 0.18450560652395515, "grad_norm": 6.146080493927002, "learning_rate": 4.049773755656109e-05, "loss": 1.0946, "step": 181 }, { "epoch": 0.18552497451580022, "grad_norm": 7.736543655395508, "learning_rate": 4.072398190045249e-05, "loss": 1.7479, "step": 182 }, { "epoch": 0.18654434250764526, "grad_norm": 8.404258728027344, "learning_rate": 4.095022624434389e-05, "loss": 2.0877, "step": 183 }, { "epoch": 0.1875637104994903, "grad_norm": 5.705750942230225, "learning_rate": 4.11764705882353e-05, "loss": 0.9239, "step": 184 }, { "epoch": 0.18858307849133538, "grad_norm": 7.753995895385742, "learning_rate": 4.14027149321267e-05, "loss": 1.7865, "step": 185 }, { "epoch": 0.18960244648318042, "grad_norm": 9.15240478515625, "learning_rate": 4.1628959276018105e-05, "loss": 2.1053, "step": 186 }, { "epoch": 0.1906218144750255, "grad_norm": 7.2251129150390625, "learning_rate": 4.1855203619909506e-05, "loss": 1.5273, "step": 187 }, { "epoch": 0.19164118246687054, "grad_norm": 6.803040981292725, "learning_rate": 4.2081447963800907e-05, "loss": 1.8726, "step": 188 }, { "epoch": 0.1926605504587156, "grad_norm": 5.646162509918213, "learning_rate": 4.230769230769231e-05, "loss": 1.4663, "step": 189 }, { "epoch": 0.19367991845056065, "grad_norm": 7.599930286407471, "learning_rate": 4.2533936651583714e-05, "loss": 1.0136, "step": 190 }, { "epoch": 0.1946992864424057, "grad_norm": 7.882979393005371, "learning_rate": 4.2760180995475115e-05, "loss": 1.121, "step": 191 }, { "epoch": 0.19571865443425077, "grad_norm": 8.919268608093262, "learning_rate": 4.298642533936652e-05, "loss": 1.6074, "step": 192 }, { "epoch": 0.1967380224260958, "grad_norm": 8.914848327636719, "learning_rate": 4.321266968325792e-05, "loss": 2.1956, "step": 193 }, { "epoch": 0.19775739041794088, "grad_norm": 8.603778839111328, "learning_rate": 4.3438914027149324e-05, "loss": 1.5425, "step": 194 }, { "epoch": 0.19877675840978593, "grad_norm": 8.500616073608398, "learning_rate": 4.3665158371040724e-05, "loss": 1.4552, "step": 195 }, { "epoch": 0.199796126401631, "grad_norm": 7.815979957580566, "learning_rate": 4.3891402714932125e-05, "loss": 1.2635, "step": 196 }, { "epoch": 0.199796126401631, "eval_Qnli-dev-1024_cosine_accuracy": 0.7395833333333334, "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.8570283651351929, "eval_Qnli-dev-1024_cosine_ap": 0.7434694144471753, "eval_Qnli-dev-1024_cosine_f1": 0.7207207207207208, "eval_Qnli-dev-1024_cosine_f1_threshold": 0.7906914353370667, "eval_Qnli-dev-1024_cosine_mcc": 0.4081269865567241, "eval_Qnli-dev-1024_cosine_precision": 0.6060606060606061, "eval_Qnli-dev-1024_cosine_recall": 0.8888888888888888, "eval_Qnli-dev_cosine_accuracy": 0.75, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8287814855575562, "eval_Qnli-dev_cosine_ap": 0.7646453733471359, "eval_Qnli-dev_cosine_f1": 0.7378640776699029, "eval_Qnli-dev_cosine_f1_threshold": 0.7745069265365601, "eval_Qnli-dev_cosine_mcc": 0.46153029495329345, "eval_Qnli-dev_cosine_precision": 0.6551724137931034, "eval_Qnli-dev_cosine_recall": 0.8444444444444444, "eval_allNLI--triplets-1024_cosine_accuracy": 0.9166666865348816, "eval_allNLI-triplets_cosine_accuracy": 0.9583333134651184, "eval_global_dataset_loss": 0.5179261565208435, "eval_global_dataset_runtime": 104.2216, "eval_global_dataset_samples_per_second": 7.705, "eval_global_dataset_steps_per_second": 0.163, "eval_sequential_score": 0.9166666865348816, "eval_sts-test-1024_pearson_cosine": 0.8476975008591285, "eval_sts-test-1024_spearman_cosine": 0.8973182534732806, "eval_sts-test_pearson_cosine": 0.9039400681490469, "eval_sts-test_spearman_cosine": 0.9185431775441114, "step": 196 }, { "epoch": 0.20081549439347604, "grad_norm": 9.34125804901123, "learning_rate": 4.411764705882353e-05, "loss": 1.7222, "step": 197 }, { "epoch": 0.2018348623853211, "grad_norm": 10.679852485656738, "learning_rate": 4.434389140271493e-05, "loss": 2.377, "step": 198 }, { "epoch": 0.20285423037716616, "grad_norm": 7.775190830230713, "learning_rate": 4.457013574660634e-05, "loss": 1.5317, "step": 199 }, { "epoch": 0.2038735983690112, "grad_norm": 6.390950679779053, "learning_rate": 4.479638009049774e-05, "loss": 1.0494, "step": 200 }, { "epoch": 0.20489296636085627, "grad_norm": 9.170794486999512, "learning_rate": 4.502262443438914e-05, "loss": 1.7392, "step": 201 }, { "epoch": 0.20591233435270132, "grad_norm": 7.37787389755249, "learning_rate": 4.524886877828054e-05, "loss": 1.2924, "step": 202 }, { "epoch": 0.2069317023445464, "grad_norm": 6.836249828338623, "learning_rate": 4.547511312217195e-05, "loss": 0.9413, "step": 203 }, { "epoch": 0.20795107033639143, "grad_norm": 9.543895721435547, "learning_rate": 4.570135746606335e-05, "loss": 2.1448, "step": 204 }, { "epoch": 0.2089704383282365, "grad_norm": 7.8430495262146, "learning_rate": 4.592760180995475e-05, "loss": 1.0357, "step": 205 }, { "epoch": 0.20998980632008155, "grad_norm": 9.558221817016602, "learning_rate": 4.615384615384616e-05, "loss": 1.3534, "step": 206 }, { "epoch": 0.21100917431192662, "grad_norm": 5.715826034545898, "learning_rate": 4.638009049773756e-05, "loss": 1.0564, "step": 207 }, { "epoch": 0.21202854230377166, "grad_norm": 8.720932960510254, "learning_rate": 4.660633484162896e-05, "loss": 0.9259, "step": 208 }, { "epoch": 0.2130479102956167, "grad_norm": 9.008890151977539, "learning_rate": 4.683257918552037e-05, "loss": 1.3813, "step": 209 }, { "epoch": 0.21406727828746178, "grad_norm": 7.1262006759643555, "learning_rate": 4.705882352941177e-05, "loss": 0.8828, "step": 210 }, { "epoch": 0.21508664627930682, "grad_norm": 12.986166000366211, "learning_rate": 4.728506787330317e-05, "loss": 2.9147, "step": 211 }, { "epoch": 0.2161060142711519, "grad_norm": 6.804072380065918, "learning_rate": 4.751131221719457e-05, "loss": 0.6539, "step": 212 }, { "epoch": 0.21712538226299694, "grad_norm": 9.138653755187988, "learning_rate": 4.7737556561085976e-05, "loss": 1.3092, "step": 213 }, { "epoch": 0.218144750254842, "grad_norm": 7.303668975830078, "learning_rate": 4.7963800904977377e-05, "loss": 1.1562, "step": 214 }, { "epoch": 0.21916411824668705, "grad_norm": 7.368769645690918, "learning_rate": 4.8190045248868784e-05, "loss": 0.9509, "step": 215 }, { "epoch": 0.22018348623853212, "grad_norm": 5.067785263061523, "learning_rate": 4.8416289592760185e-05, "loss": 0.6664, "step": 216 }, { "epoch": 0.22120285423037717, "grad_norm": 5.643320083618164, "learning_rate": 4.8642533936651585e-05, "loss": 1.2315, "step": 217 }, { "epoch": 0.2222222222222222, "grad_norm": 6.596173286437988, "learning_rate": 4.8868778280542986e-05, "loss": 0.9855, "step": 218 }, { "epoch": 0.22324159021406728, "grad_norm": 6.5434770584106445, "learning_rate": 4.9095022624434386e-05, "loss": 0.9258, "step": 219 }, { "epoch": 0.22426095820591233, "grad_norm": 11.537922859191895, "learning_rate": 4.9321266968325794e-05, "loss": 1.6578, "step": 220 }, { "epoch": 0.2252803261977574, "grad_norm": 7.364137172698975, "learning_rate": 4.95475113122172e-05, "loss": 0.9666, "step": 221 }, { "epoch": 0.22629969418960244, "grad_norm": 8.102925300598145, "learning_rate": 4.97737556561086e-05, "loss": 0.9808, "step": 222 }, { "epoch": 0.2273190621814475, "grad_norm": 10.013775825500488, "learning_rate": 5e-05, "loss": 2.4156, "step": 223 }, { "epoch": 0.22833843017329256, "grad_norm": 7.974793434143066, "learning_rate": 5.02262443438914e-05, "loss": 1.1297, "step": 224 }, { "epoch": 0.22935779816513763, "grad_norm": 7.710846424102783, "learning_rate": 5.0452488687782804e-05, "loss": 1.3063, "step": 225 }, { "epoch": 0.23037716615698267, "grad_norm": 5.633566856384277, "learning_rate": 5.067873303167421e-05, "loss": 0.5567, "step": 226 }, { "epoch": 0.23139653414882771, "grad_norm": 9.50987720489502, "learning_rate": 5.090497737556561e-05, "loss": 1.3551, "step": 227 }, { "epoch": 0.2324159021406728, "grad_norm": 10.309268951416016, "learning_rate": 5.113122171945701e-05, "loss": 1.4079, "step": 228 }, { "epoch": 0.23343527013251783, "grad_norm": 7.812633037567139, "learning_rate": 5.135746606334841e-05, "loss": 0.948, "step": 229 }, { "epoch": 0.2344546381243629, "grad_norm": 8.013436317443848, "learning_rate": 5.158371040723983e-05, "loss": 0.9288, "step": 230 }, { "epoch": 0.23547400611620795, "grad_norm": 7.550686359405518, "learning_rate": 5.180995475113123e-05, "loss": 1.0077, "step": 231 }, { "epoch": 0.23649337410805302, "grad_norm": 7.249583721160889, "learning_rate": 5.203619909502263e-05, "loss": 1.0674, "step": 232 }, { "epoch": 0.23751274209989806, "grad_norm": 7.766678810119629, "learning_rate": 5.2262443438914036e-05, "loss": 1.3354, "step": 233 }, { "epoch": 0.23853211009174313, "grad_norm": 7.417704105377197, "learning_rate": 5.2488687782805436e-05, "loss": 1.0076, "step": 234 }, { "epoch": 0.23955147808358818, "grad_norm": 8.414839744567871, "learning_rate": 5.271493212669684e-05, "loss": 0.8814, "step": 235 }, { "epoch": 0.24057084607543322, "grad_norm": 9.537981986999512, "learning_rate": 5.294117647058824e-05, "loss": 1.7839, "step": 236 }, { "epoch": 0.2415902140672783, "grad_norm": 6.3290886878967285, "learning_rate": 5.316742081447964e-05, "loss": 1.2614, "step": 237 }, { "epoch": 0.24260958205912334, "grad_norm": 8.181835174560547, "learning_rate": 5.3393665158371045e-05, "loss": 0.8655, "step": 238 }, { "epoch": 0.2436289500509684, "grad_norm": 8.01684856414795, "learning_rate": 5.3619909502262446e-05, "loss": 1.8418, "step": 239 }, { "epoch": 0.24464831804281345, "grad_norm": 7.891118049621582, "learning_rate": 5.384615384615385e-05, "loss": 1.4465, "step": 240 }, { "epoch": 0.24566768603465852, "grad_norm": 8.080881118774414, "learning_rate": 5.407239819004525e-05, "loss": 0.8695, "step": 241 }, { "epoch": 0.24668705402650357, "grad_norm": 6.881638050079346, "learning_rate": 5.429864253393665e-05, "loss": 0.9695, "step": 242 }, { "epoch": 0.24770642201834864, "grad_norm": 10.03598690032959, "learning_rate": 5.4524886877828055e-05, "loss": 1.3374, "step": 243 }, { "epoch": 0.24872579001019368, "grad_norm": 7.844127178192139, "learning_rate": 5.475113122171946e-05, "loss": 0.6716, "step": 244 }, { "epoch": 0.24974515800203873, "grad_norm": 8.654071807861328, "learning_rate": 5.497737556561087e-05, "loss": 1.032, "step": 245 }, { "epoch": 0.25076452599388377, "grad_norm": 6.731460094451904, "learning_rate": 5.520361990950227e-05, "loss": 0.8033, "step": 246 }, { "epoch": 0.25178389398572887, "grad_norm": 9.436687469482422, "learning_rate": 5.542986425339367e-05, "loss": 0.9257, "step": 247 }, { "epoch": 0.2528032619775739, "grad_norm": 7.817379474639893, "learning_rate": 5.565610859728507e-05, "loss": 0.8311, "step": 248 }, { "epoch": 0.25382262996941896, "grad_norm": 6.328183650970459, "learning_rate": 5.588235294117647e-05, "loss": 0.5609, "step": 249 }, { "epoch": 0.254841997961264, "grad_norm": 8.576601028442383, "learning_rate": 5.610859728506788e-05, "loss": 1.5985, "step": 250 }, { "epoch": 0.2558613659531091, "grad_norm": 9.092324256896973, "learning_rate": 5.633484162895928e-05, "loss": 1.0025, "step": 251 }, { "epoch": 0.25688073394495414, "grad_norm": 11.906094551086426, "learning_rate": 5.656108597285068e-05, "loss": 2.0499, "step": 252 }, { "epoch": 0.2579001019367992, "grad_norm": 7.968968868255615, "learning_rate": 5.678733031674208e-05, "loss": 1.3116, "step": 253 }, { "epoch": 0.25891946992864423, "grad_norm": 5.355049133300781, "learning_rate": 5.701357466063348e-05, "loss": 0.5969, "step": 254 }, { "epoch": 0.2599388379204893, "grad_norm": 8.151896476745605, "learning_rate": 5.723981900452488e-05, "loss": 1.1107, "step": 255 }, { "epoch": 0.2609582059123344, "grad_norm": 9.651622772216797, "learning_rate": 5.746606334841629e-05, "loss": 1.8581, "step": 256 }, { "epoch": 0.2619775739041794, "grad_norm": 7.1527533531188965, "learning_rate": 5.769230769230769e-05, "loss": 0.572, "step": 257 }, { "epoch": 0.26299694189602446, "grad_norm": 6.141374111175537, "learning_rate": 5.7918552036199105e-05, "loss": 0.9267, "step": 258 }, { "epoch": 0.2640163098878695, "grad_norm": 7.274891376495361, "learning_rate": 5.8144796380090506e-05, "loss": 0.6255, "step": 259 }, { "epoch": 0.2650356778797146, "grad_norm": 5.81080436706543, "learning_rate": 5.8371040723981906e-05, "loss": 0.7615, "step": 260 }, { "epoch": 0.26605504587155965, "grad_norm": 6.9981279373168945, "learning_rate": 5.859728506787331e-05, "loss": 0.6026, "step": 261 }, { "epoch": 0.2670744138634047, "grad_norm": 5.718660831451416, "learning_rate": 5.882352941176471e-05, "loss": 0.7263, "step": 262 }, { "epoch": 0.26809378185524974, "grad_norm": 5.391998767852783, "learning_rate": 5.9049773755656115e-05, "loss": 0.4643, "step": 263 }, { "epoch": 0.2691131498470948, "grad_norm": 6.843007564544678, "learning_rate": 5.9276018099547516e-05, "loss": 0.5101, "step": 264 }, { "epoch": 0.2701325178389399, "grad_norm": 5.087254047393799, "learning_rate": 5.9502262443438916e-05, "loss": 0.5562, "step": 265 }, { "epoch": 0.2711518858307849, "grad_norm": 7.482615947723389, "learning_rate": 5.972850678733032e-05, "loss": 1.256, "step": 266 }, { "epoch": 0.27217125382262997, "grad_norm": 6.911371231079102, "learning_rate": 5.995475113122172e-05, "loss": 0.6543, "step": 267 }, { "epoch": 0.273190621814475, "grad_norm": 7.643139839172363, "learning_rate": 6.0180995475113125e-05, "loss": 0.6698, "step": 268 }, { "epoch": 0.2742099898063201, "grad_norm": 9.08658504486084, "learning_rate": 6.0407239819004525e-05, "loss": 1.3843, "step": 269 }, { "epoch": 0.27522935779816515, "grad_norm": 8.890534400939941, "learning_rate": 6.0633484162895926e-05, "loss": 1.1421, "step": 270 }, { "epoch": 0.2762487257900102, "grad_norm": 9.855698585510254, "learning_rate": 6.0859728506787327e-05, "loss": 1.1558, "step": 271 }, { "epoch": 0.27726809378185524, "grad_norm": 8.32972526550293, "learning_rate": 6.108597285067875e-05, "loss": 1.603, "step": 272 }, { "epoch": 0.2782874617737003, "grad_norm": 8.393510818481445, "learning_rate": 6.131221719457015e-05, "loss": 0.7985, "step": 273 }, { "epoch": 0.2793068297655454, "grad_norm": 7.992040157318115, "learning_rate": 6.153846153846155e-05, "loss": 1.3884, "step": 274 }, { "epoch": 0.2803261977573904, "grad_norm": 8.646651268005371, "learning_rate": 6.176470588235295e-05, "loss": 1.0337, "step": 275 }, { "epoch": 0.28134556574923547, "grad_norm": 7.3104329109191895, "learning_rate": 6.199095022624435e-05, "loss": 1.0917, "step": 276 }, { "epoch": 0.2823649337410805, "grad_norm": 12.030378341674805, "learning_rate": 6.221719457013575e-05, "loss": 2.4149, "step": 277 }, { "epoch": 0.28338430173292556, "grad_norm": 4.781021595001221, "learning_rate": 6.244343891402715e-05, "loss": 0.4301, "step": 278 }, { "epoch": 0.28440366972477066, "grad_norm": 4.352090358734131, "learning_rate": 6.266968325791855e-05, "loss": 0.5084, "step": 279 }, { "epoch": 0.2854230377166157, "grad_norm": 5.88839864730835, "learning_rate": 6.289592760180995e-05, "loss": 0.7202, "step": 280 }, { "epoch": 0.28644240570846075, "grad_norm": 11.228419303894043, "learning_rate": 6.312217194570135e-05, "loss": 1.7983, "step": 281 }, { "epoch": 0.2874617737003058, "grad_norm": 6.119421005249023, "learning_rate": 6.334841628959275e-05, "loss": 0.673, "step": 282 }, { "epoch": 0.2884811416921509, "grad_norm": 6.405134677886963, "learning_rate": 6.357466063348417e-05, "loss": 0.6655, "step": 283 }, { "epoch": 0.28950050968399593, "grad_norm": 6.735506534576416, "learning_rate": 6.380090497737557e-05, "loss": 0.9121, "step": 284 }, { "epoch": 0.290519877675841, "grad_norm": 11.012415885925293, "learning_rate": 6.402714932126697e-05, "loss": 1.5978, "step": 285 }, { "epoch": 0.291539245667686, "grad_norm": 13.007187843322754, "learning_rate": 6.425339366515838e-05, "loss": 1.8536, "step": 286 }, { "epoch": 0.29255861365953106, "grad_norm": 12.273601531982422, "learning_rate": 6.447963800904978e-05, "loss": 1.6397, "step": 287 }, { "epoch": 0.29357798165137616, "grad_norm": 9.6339750289917, "learning_rate": 6.470588235294118e-05, "loss": 0.8275, "step": 288 }, { "epoch": 0.2945973496432212, "grad_norm": 6.717658996582031, "learning_rate": 6.493212669683258e-05, "loss": 0.6003, "step": 289 }, { "epoch": 0.29561671763506625, "grad_norm": 8.443256378173828, "learning_rate": 6.515837104072399e-05, "loss": 0.9834, "step": 290 }, { "epoch": 0.2966360856269113, "grad_norm": 8.823105812072754, "learning_rate": 6.538461538461539e-05, "loss": 0.603, "step": 291 }, { "epoch": 0.2976554536187564, "grad_norm": 6.8099141120910645, "learning_rate": 6.561085972850679e-05, "loss": 0.6597, "step": 292 }, { "epoch": 0.29867482161060144, "grad_norm": 6.705087661743164, "learning_rate": 6.583710407239819e-05, "loss": 0.739, "step": 293 }, { "epoch": 0.2996941896024465, "grad_norm": 7.209024906158447, "learning_rate": 6.606334841628959e-05, "loss": 1.2564, "step": 294 }, { "epoch": 0.2996941896024465, "eval_Qnli-dev-1024_cosine_accuracy": 0.7395833333333334, "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.8565528392791748, "eval_Qnli-dev-1024_cosine_ap": 0.7531377591671699, "eval_Qnli-dev-1024_cosine_f1": 0.7254901960784313, "eval_Qnli-dev-1024_cosine_f1_threshold": 0.8200148344039917, "eval_Qnli-dev-1024_cosine_mcc": 0.43697448216965834, "eval_Qnli-dev-1024_cosine_precision": 0.6491228070175439, "eval_Qnli-dev-1024_cosine_recall": 0.8222222222222222, "eval_Qnli-dev_cosine_accuracy": 0.7395833333333334, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7719540596008301, "eval_Qnli-dev_cosine_ap": 0.7588639733888536, "eval_Qnli-dev_cosine_f1": 0.7454545454545455, "eval_Qnli-dev_cosine_f1_threshold": 0.7090869545936584, "eval_Qnli-dev_cosine_mcc": 0.47013467657639685, "eval_Qnli-dev_cosine_precision": 0.6307692307692307, "eval_Qnli-dev_cosine_recall": 0.9111111111111111, "eval_allNLI--triplets-1024_cosine_accuracy": 0.8854166865348816, "eval_allNLI-triplets_cosine_accuracy": 0.9583333134651184, "eval_global_dataset_loss": 0.4855804145336151, "eval_global_dataset_runtime": 104.3189, "eval_global_dataset_samples_per_second": 7.698, "eval_global_dataset_steps_per_second": 0.163, "eval_sequential_score": 0.8854166865348816, "eval_sts-test-1024_pearson_cosine": 0.8681461030339531, "eval_sts-test-1024_spearman_cosine": 0.9053809631987397, "eval_sts-test_pearson_cosine": 0.9051731986667259, "eval_sts-test_spearman_cosine": 0.920630429781229, "step": 294 }, { "epoch": 0.3007135575942915, "grad_norm": 7.649487495422363, "learning_rate": 6.6289592760181e-05, "loss": 1.4442, "step": 295 }, { "epoch": 0.30173292558613657, "grad_norm": 7.740142822265625, "learning_rate": 6.65158371040724e-05, "loss": 1.4064, "step": 296 }, { "epoch": 0.30275229357798167, "grad_norm": 8.009271621704102, "learning_rate": 6.67420814479638e-05, "loss": 0.8456, "step": 297 }, { "epoch": 0.3037716615698267, "grad_norm": 5.718809604644775, "learning_rate": 6.69683257918552e-05, "loss": 0.7772, "step": 298 }, { "epoch": 0.30479102956167176, "grad_norm": 7.34658145904541, "learning_rate": 6.719457013574662e-05, "loss": 0.7619, "step": 299 }, { "epoch": 0.3058103975535168, "grad_norm": 8.556058883666992, "learning_rate": 6.742081447963802e-05, "loss": 1.002, "step": 300 }, { "epoch": 0.3068297655453619, "grad_norm": 8.995348930358887, "learning_rate": 6.764705882352942e-05, "loss": 1.4774, "step": 301 }, { "epoch": 0.30784913353720694, "grad_norm": 8.271109580993652, "learning_rate": 6.787330316742082e-05, "loss": 0.6814, "step": 302 }, { "epoch": 0.308868501529052, "grad_norm": 8.896450996398926, "learning_rate": 6.809954751131222e-05, "loss": 1.2286, "step": 303 }, { "epoch": 0.30988786952089703, "grad_norm": 9.076520919799805, "learning_rate": 6.832579185520362e-05, "loss": 1.8546, "step": 304 }, { "epoch": 0.3109072375127421, "grad_norm": 6.780123233795166, "learning_rate": 6.855203619909502e-05, "loss": 0.7547, "step": 305 }, { "epoch": 0.3119266055045872, "grad_norm": 7.728740692138672, "learning_rate": 6.877828054298642e-05, "loss": 1.3638, "step": 306 }, { "epoch": 0.3129459734964322, "grad_norm": 8.941544532775879, "learning_rate": 6.900452488687784e-05, "loss": 1.2604, "step": 307 }, { "epoch": 0.31396534148827726, "grad_norm": 6.70719575881958, "learning_rate": 6.923076923076924e-05, "loss": 0.5111, "step": 308 }, { "epoch": 0.3149847094801223, "grad_norm": 7.599255561828613, "learning_rate": 6.945701357466064e-05, "loss": 0.7153, "step": 309 }, { "epoch": 0.3160040774719674, "grad_norm": 7.323727607727051, "learning_rate": 6.968325791855204e-05, "loss": 0.8367, "step": 310 }, { "epoch": 0.31702344546381245, "grad_norm": 7.314160346984863, "learning_rate": 6.990950226244344e-05, "loss": 0.6643, "step": 311 }, { "epoch": 0.3180428134556575, "grad_norm": 8.265671730041504, "learning_rate": 7.013574660633484e-05, "loss": 1.0404, "step": 312 }, { "epoch": 0.31906218144750254, "grad_norm": 10.820046424865723, "learning_rate": 7.036199095022625e-05, "loss": 1.122, "step": 313 }, { "epoch": 0.3200815494393476, "grad_norm": 7.194378852844238, "learning_rate": 7.058823529411765e-05, "loss": 0.7091, "step": 314 }, { "epoch": 0.3211009174311927, "grad_norm": 7.764474868774414, "learning_rate": 7.081447963800906e-05, "loss": 1.07, "step": 315 }, { "epoch": 0.3221202854230377, "grad_norm": 7.757960796356201, "learning_rate": 7.104072398190046e-05, "loss": 0.7246, "step": 316 }, { "epoch": 0.32313965341488277, "grad_norm": 9.631681442260742, "learning_rate": 7.126696832579186e-05, "loss": 0.821, "step": 317 }, { "epoch": 0.3241590214067278, "grad_norm": 6.478396892547607, "learning_rate": 7.149321266968326e-05, "loss": 0.9556, "step": 318 }, { "epoch": 0.3251783893985729, "grad_norm": 8.858171463012695, "learning_rate": 7.171945701357467e-05, "loss": 1.2634, "step": 319 }, { "epoch": 0.32619775739041795, "grad_norm": 8.02340030670166, "learning_rate": 7.194570135746607e-05, "loss": 0.8621, "step": 320 }, { "epoch": 0.327217125382263, "grad_norm": 8.634239196777344, "learning_rate": 7.217194570135747e-05, "loss": 1.4215, "step": 321 }, { "epoch": 0.32823649337410804, "grad_norm": 8.96740436553955, "learning_rate": 7.239819004524887e-05, "loss": 0.8894, "step": 322 }, { "epoch": 0.3292558613659531, "grad_norm": 4.730165958404541, "learning_rate": 7.262443438914027e-05, "loss": 0.4134, "step": 323 }, { "epoch": 0.3302752293577982, "grad_norm": 6.1243181228637695, "learning_rate": 7.285067873303167e-05, "loss": 0.4147, "step": 324 }, { "epoch": 0.3312945973496432, "grad_norm": 7.8853607177734375, "learning_rate": 7.307692307692307e-05, "loss": 0.5721, "step": 325 }, { "epoch": 0.33231396534148827, "grad_norm": 9.193514823913574, "learning_rate": 7.330316742081448e-05, "loss": 0.8541, "step": 326 }, { "epoch": 0.3333333333333333, "grad_norm": 12.314509391784668, "learning_rate": 7.352941176470589e-05, "loss": 2.2959, "step": 327 }, { "epoch": 0.3343527013251784, "grad_norm": 4.384552955627441, "learning_rate": 7.375565610859729e-05, "loss": 0.4452, "step": 328 }, { "epoch": 0.33537206931702346, "grad_norm": 5.677075386047363, "learning_rate": 7.398190045248869e-05, "loss": 0.5008, "step": 329 }, { "epoch": 0.3363914373088685, "grad_norm": 6.752626419067383, "learning_rate": 7.420814479638009e-05, "loss": 0.4106, "step": 330 }, { "epoch": 0.33741080530071355, "grad_norm": 10.971478462219238, "learning_rate": 7.44343891402715e-05, "loss": 0.9237, "step": 331 }, { "epoch": 0.3384301732925586, "grad_norm": 7.574080944061279, "learning_rate": 7.46606334841629e-05, "loss": 0.6275, "step": 332 }, { "epoch": 0.3394495412844037, "grad_norm": 9.538507461547852, "learning_rate": 7.48868778280543e-05, "loss": 1.5184, "step": 333 }, { "epoch": 0.34046890927624873, "grad_norm": 9.139626502990723, "learning_rate": 7.511312217194571e-05, "loss": 1.7865, "step": 334 }, { "epoch": 0.3414882772680938, "grad_norm": 9.275596618652344, "learning_rate": 7.533936651583711e-05, "loss": 1.5947, "step": 335 }, { "epoch": 0.3425076452599388, "grad_norm": 9.375283241271973, "learning_rate": 7.556561085972851e-05, "loss": 1.0249, "step": 336 }, { "epoch": 0.3435270132517839, "grad_norm": 7.951083660125732, "learning_rate": 7.579185520361991e-05, "loss": 1.0227, "step": 337 }, { "epoch": 0.34454638124362896, "grad_norm": 9.579297065734863, "learning_rate": 7.601809954751131e-05, "loss": 1.28, "step": 338 }, { "epoch": 0.345565749235474, "grad_norm": 5.935997486114502, "learning_rate": 7.624434389140271e-05, "loss": 0.798, "step": 339 }, { "epoch": 0.34658511722731905, "grad_norm": 7.16936731338501, "learning_rate": 7.647058823529411e-05, "loss": 1.0408, "step": 340 }, { "epoch": 0.3476044852191641, "grad_norm": 9.448662757873535, "learning_rate": 7.669683257918553e-05, "loss": 0.9732, "step": 341 }, { "epoch": 0.3486238532110092, "grad_norm": 7.747692584991455, "learning_rate": 7.692307692307693e-05, "loss": 0.7588, "step": 342 }, { "epoch": 0.34964322120285424, "grad_norm": 10.198869705200195, "learning_rate": 7.714932126696833e-05, "loss": 0.9615, "step": 343 }, { "epoch": 0.3506625891946993, "grad_norm": 8.069470405578613, "learning_rate": 7.737556561085974e-05, "loss": 0.9895, "step": 344 }, { "epoch": 0.3516819571865443, "grad_norm": 10.662049293518066, "learning_rate": 7.760180995475114e-05, "loss": 1.923, "step": 345 }, { "epoch": 0.3527013251783894, "grad_norm": 6.53238582611084, "learning_rate": 7.782805429864254e-05, "loss": 0.615, "step": 346 }, { "epoch": 0.35372069317023447, "grad_norm": 11.10132122039795, "learning_rate": 7.805429864253394e-05, "loss": 1.4572, "step": 347 }, { "epoch": 0.3547400611620795, "grad_norm": 7.372711181640625, "learning_rate": 7.828054298642534e-05, "loss": 1.0083, "step": 348 }, { "epoch": 0.35575942915392456, "grad_norm": 7.358077526092529, "learning_rate": 7.850678733031674e-05, "loss": 0.922, "step": 349 }, { "epoch": 0.3567787971457696, "grad_norm": 8.45017147064209, "learning_rate": 7.873303167420814e-05, "loss": 1.3767, "step": 350 }, { "epoch": 0.3577981651376147, "grad_norm": 4.858506679534912, "learning_rate": 7.895927601809954e-05, "loss": 0.6378, "step": 351 }, { "epoch": 0.35881753312945974, "grad_norm": 5.764273643493652, "learning_rate": 7.918552036199095e-05, "loss": 0.4063, "step": 352 }, { "epoch": 0.3598369011213048, "grad_norm": 8.656686782836914, "learning_rate": 7.941176470588235e-05, "loss": 1.0834, "step": 353 }, { "epoch": 0.36085626911314983, "grad_norm": 5.824944496154785, "learning_rate": 7.963800904977376e-05, "loss": 0.807, "step": 354 }, { "epoch": 0.36187563710499493, "grad_norm": 6.73368501663208, "learning_rate": 7.986425339366516e-05, "loss": 1.0293, "step": 355 }, { "epoch": 0.36289500509684, "grad_norm": 5.860096454620361, "learning_rate": 8.009049773755657e-05, "loss": 0.4371, "step": 356 }, { "epoch": 0.363914373088685, "grad_norm": 5.65436315536499, "learning_rate": 8.031674208144798e-05, "loss": 0.4334, "step": 357 }, { "epoch": 0.36493374108053006, "grad_norm": 7.566843509674072, "learning_rate": 8.054298642533938e-05, "loss": 0.949, "step": 358 }, { "epoch": 0.3659531090723751, "grad_norm": 6.286118984222412, "learning_rate": 8.076923076923078e-05, "loss": 0.5788, "step": 359 }, { "epoch": 0.3669724770642202, "grad_norm": 10.212640762329102, "learning_rate": 8.099547511312218e-05, "loss": 0.8535, "step": 360 }, { "epoch": 0.36799184505606525, "grad_norm": 9.267760276794434, "learning_rate": 8.122171945701358e-05, "loss": 1.2529, "step": 361 }, { "epoch": 0.3690112130479103, "grad_norm": 8.794651985168457, "learning_rate": 8.144796380090498e-05, "loss": 0.8974, "step": 362 }, { "epoch": 0.37003058103975534, "grad_norm": 13.3441162109375, "learning_rate": 8.167420814479638e-05, "loss": 1.9105, "step": 363 }, { "epoch": 0.37104994903160043, "grad_norm": 9.258030891418457, "learning_rate": 8.190045248868778e-05, "loss": 0.7717, "step": 364 }, { "epoch": 0.3720693170234455, "grad_norm": 6.051854610443115, "learning_rate": 8.212669683257918e-05, "loss": 1.1052, "step": 365 }, { "epoch": 0.3730886850152905, "grad_norm": 9.53382682800293, "learning_rate": 8.23529411764706e-05, "loss": 0.7298, "step": 366 }, { "epoch": 0.37410805300713557, "grad_norm": 6.723752498626709, "learning_rate": 8.2579185520362e-05, "loss": 0.7039, "step": 367 }, { "epoch": 0.3751274209989806, "grad_norm": 6.844725608825684, "learning_rate": 8.28054298642534e-05, "loss": 0.8536, "step": 368 }, { "epoch": 0.3761467889908257, "grad_norm": 5.233691692352295, "learning_rate": 8.303167420814481e-05, "loss": 0.4774, "step": 369 }, { "epoch": 0.37716615698267075, "grad_norm": 4.231795787811279, "learning_rate": 8.325791855203621e-05, "loss": 0.3297, "step": 370 }, { "epoch": 0.3781855249745158, "grad_norm": 11.760458946228027, "learning_rate": 8.348416289592761e-05, "loss": 1.693, "step": 371 }, { "epoch": 0.37920489296636084, "grad_norm": 10.05996036529541, "learning_rate": 8.371040723981901e-05, "loss": 0.853, "step": 372 }, { "epoch": 0.38022426095820594, "grad_norm": 8.649154663085938, "learning_rate": 8.393665158371041e-05, "loss": 0.7242, "step": 373 }, { "epoch": 0.381243628950051, "grad_norm": 6.6194748878479, "learning_rate": 8.416289592760181e-05, "loss": 0.5019, "step": 374 }, { "epoch": 0.382262996941896, "grad_norm": 8.058365821838379, "learning_rate": 8.438914027149321e-05, "loss": 0.6206, "step": 375 }, { "epoch": 0.38328236493374107, "grad_norm": 6.66504430770874, "learning_rate": 8.461538461538461e-05, "loss": 0.4872, "step": 376 }, { "epoch": 0.3843017329255861, "grad_norm": 5.8679518699646, "learning_rate": 8.484162895927601e-05, "loss": 0.4515, "step": 377 }, { "epoch": 0.3853211009174312, "grad_norm": 9.830297470092773, "learning_rate": 8.506787330316743e-05, "loss": 1.4657, "step": 378 }, { "epoch": 0.38634046890927626, "grad_norm": 8.260361671447754, "learning_rate": 8.529411764705883e-05, "loss": 0.8411, "step": 379 }, { "epoch": 0.3873598369011213, "grad_norm": 8.48035717010498, "learning_rate": 8.552036199095023e-05, "loss": 0.7654, "step": 380 }, { "epoch": 0.38837920489296635, "grad_norm": 7.481667518615723, "learning_rate": 8.574660633484163e-05, "loss": 0.5413, "step": 381 }, { "epoch": 0.3893985728848114, "grad_norm": 5.923032760620117, "learning_rate": 8.597285067873304e-05, "loss": 0.4594, "step": 382 }, { "epoch": 0.3904179408766565, "grad_norm": 11.383003234863281, "learning_rate": 8.619909502262445e-05, "loss": 1.2656, "step": 383 }, { "epoch": 0.39143730886850153, "grad_norm": 9.154252052307129, "learning_rate": 8.642533936651585e-05, "loss": 0.6881, "step": 384 }, { "epoch": 0.3924566768603466, "grad_norm": 8.656584739685059, "learning_rate": 8.665158371040725e-05, "loss": 0.8169, "step": 385 }, { "epoch": 0.3934760448521916, "grad_norm": 9.6775541305542, "learning_rate": 8.687782805429865e-05, "loss": 0.937, "step": 386 }, { "epoch": 0.3944954128440367, "grad_norm": 12.836816787719727, "learning_rate": 8.710407239819005e-05, "loss": 2.1343, "step": 387 }, { "epoch": 0.39551478083588176, "grad_norm": 6.1532487869262695, "learning_rate": 8.733031674208145e-05, "loss": 0.3644, "step": 388 }, { "epoch": 0.3965341488277268, "grad_norm": 6.3952555656433105, "learning_rate": 8.755656108597285e-05, "loss": 0.4406, "step": 389 }, { "epoch": 0.39755351681957185, "grad_norm": 7.005934238433838, "learning_rate": 8.778280542986425e-05, "loss": 0.5444, "step": 390 }, { "epoch": 0.3985728848114169, "grad_norm": 8.97732925415039, "learning_rate": 8.800904977375566e-05, "loss": 1.3891, "step": 391 }, { "epoch": 0.399592252803262, "grad_norm": 6.8778181076049805, "learning_rate": 8.823529411764706e-05, "loss": 0.6287, "step": 392 }, { "epoch": 0.399592252803262, "eval_Qnli-dev-1024_cosine_accuracy": 0.7395833333333334, "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.8465664982795715, "eval_Qnli-dev-1024_cosine_ap": 0.7683064400770494, "eval_Qnli-dev-1024_cosine_f1": 0.6976744186046511, "eval_Qnli-dev-1024_cosine_f1_threshold": 0.834477424621582, "eval_Qnli-dev-1024_cosine_mcc": 0.45496263625850347, "eval_Qnli-dev-1024_cosine_precision": 0.7317073170731707, "eval_Qnli-dev-1024_cosine_recall": 0.6666666666666666, "eval_Qnli-dev_cosine_accuracy": 0.75, "eval_Qnli-dev_cosine_accuracy_threshold": 0.74493807554245, "eval_Qnli-dev_cosine_ap": 0.7575725381948821, "eval_Qnli-dev_cosine_f1": 0.7476635514018692, "eval_Qnli-dev_cosine_f1_threshold": 0.7015562057495117, "eval_Qnli-dev_cosine_mcc": 0.47737827504723207, "eval_Qnli-dev_cosine_precision": 0.6451612903225806, "eval_Qnli-dev_cosine_recall": 0.8888888888888888, "eval_allNLI--triplets-1024_cosine_accuracy": 0.9270833134651184, "eval_allNLI-triplets_cosine_accuracy": 0.9479166865348816, "eval_global_dataset_loss": 0.3703947365283966, "eval_global_dataset_runtime": 104.3143, "eval_global_dataset_samples_per_second": 7.698, "eval_global_dataset_steps_per_second": 0.163, "eval_sequential_score": 0.9270833134651184, "eval_sts-test-1024_pearson_cosine": 0.8782079507952609, "eval_sts-test-1024_spearman_cosine": 0.9080003485202497, "eval_sts-test_pearson_cosine": 0.9052799671643099, "eval_sts-test_spearman_cosine": 0.9200953636370672, "step": 392 }, { "epoch": 0.40061162079510704, "grad_norm": 7.236085414886475, "learning_rate": 8.846153846153847e-05, "loss": 1.066, "step": 393 }, { "epoch": 0.4016309887869521, "grad_norm": 7.638827323913574, "learning_rate": 8.868778280542987e-05, "loss": 1.0406, "step": 394 }, { "epoch": 0.4026503567787971, "grad_norm": 6.2278876304626465, "learning_rate": 8.891402714932127e-05, "loss": 0.819, "step": 395 }, { "epoch": 0.4036697247706422, "grad_norm": 7.04884147644043, "learning_rate": 8.914027149321268e-05, "loss": 0.5826, "step": 396 }, { "epoch": 0.40468909276248727, "grad_norm": 8.24869441986084, "learning_rate": 8.936651583710408e-05, "loss": 0.6355, "step": 397 }, { "epoch": 0.4057084607543323, "grad_norm": 9.9276704788208, "learning_rate": 8.959276018099548e-05, "loss": 0.7566, "step": 398 }, { "epoch": 0.40672782874617736, "grad_norm": 8.717905044555664, "learning_rate": 8.981900452488688e-05, "loss": 0.8174, "step": 399 }, { "epoch": 0.4077471967380224, "grad_norm": 8.515538215637207, "learning_rate": 9.004524886877828e-05, "loss": 0.8905, "step": 400 }, { "epoch": 0.4087665647298675, "grad_norm": 6.506967067718506, "learning_rate": 9.027149321266968e-05, "loss": 0.6646, "step": 401 }, { "epoch": 0.40978593272171254, "grad_norm": 9.33711051940918, "learning_rate": 9.049773755656108e-05, "loss": 0.9056, "step": 402 }, { "epoch": 0.4108053007135576, "grad_norm": 5.124199867248535, "learning_rate": 9.07239819004525e-05, "loss": 0.3689, "step": 403 }, { "epoch": 0.41182466870540263, "grad_norm": 5.597712516784668, "learning_rate": 9.09502262443439e-05, "loss": 0.3709, "step": 404 }, { "epoch": 0.41284403669724773, "grad_norm": 7.897356033325195, "learning_rate": 9.11764705882353e-05, "loss": 0.6708, "step": 405 }, { "epoch": 0.4138634046890928, "grad_norm": 8.37096881866455, "learning_rate": 9.14027149321267e-05, "loss": 1.0531, "step": 406 }, { "epoch": 0.4148827726809378, "grad_norm": 7.530358791351318, "learning_rate": 9.16289592760181e-05, "loss": 1.1355, "step": 407 }, { "epoch": 0.41590214067278286, "grad_norm": 10.304217338562012, "learning_rate": 9.18552036199095e-05, "loss": 0.8042, "step": 408 }, { "epoch": 0.4169215086646279, "grad_norm": 7.292766094207764, "learning_rate": 9.20814479638009e-05, "loss": 0.3915, "step": 409 }, { "epoch": 0.417940876656473, "grad_norm": 10.453197479248047, "learning_rate": 9.230769230769232e-05, "loss": 1.9388, "step": 410 }, { "epoch": 0.41896024464831805, "grad_norm": 2.7471694946289062, "learning_rate": 9.253393665158372e-05, "loss": 0.3044, "step": 411 }, { "epoch": 0.4199796126401631, "grad_norm": 5.923367023468018, "learning_rate": 9.276018099547512e-05, "loss": 0.6153, "step": 412 }, { "epoch": 0.42099898063200814, "grad_norm": 8.176202774047852, "learning_rate": 9.298642533936652e-05, "loss": 0.9407, "step": 413 }, { "epoch": 0.42201834862385323, "grad_norm": 8.41361141204834, "learning_rate": 9.321266968325792e-05, "loss": 0.6876, "step": 414 }, { "epoch": 0.4230377166156983, "grad_norm": 9.516852378845215, "learning_rate": 9.343891402714933e-05, "loss": 0.9694, "step": 415 }, { "epoch": 0.4240570846075433, "grad_norm": 7.201638698577881, "learning_rate": 9.366515837104073e-05, "loss": 0.7868, "step": 416 }, { "epoch": 0.42507645259938837, "grad_norm": 9.961840629577637, "learning_rate": 9.389140271493213e-05, "loss": 0.7735, "step": 417 }, { "epoch": 0.4260958205912334, "grad_norm": 10.842241287231445, "learning_rate": 9.411764705882353e-05, "loss": 1.1682, "step": 418 }, { "epoch": 0.4271151885830785, "grad_norm": 5.817572116851807, "learning_rate": 9.434389140271494e-05, "loss": 0.3465, "step": 419 }, { "epoch": 0.42813455657492355, "grad_norm": 6.870133399963379, "learning_rate": 9.457013574660634e-05, "loss": 0.5699, "step": 420 }, { "epoch": 0.4291539245667686, "grad_norm": 6.472342014312744, "learning_rate": 9.479638009049774e-05, "loss": 0.6128, "step": 421 }, { "epoch": 0.43017329255861364, "grad_norm": 6.5723795890808105, "learning_rate": 9.502262443438914e-05, "loss": 0.8886, "step": 422 }, { "epoch": 0.43119266055045874, "grad_norm": 6.1384429931640625, "learning_rate": 9.524886877828054e-05, "loss": 0.5124, "step": 423 }, { "epoch": 0.4322120285423038, "grad_norm": 6.241471290588379, "learning_rate": 9.547511312217195e-05, "loss": 0.4409, "step": 424 }, { "epoch": 0.4332313965341488, "grad_norm": 9.087861061096191, "learning_rate": 9.570135746606335e-05, "loss": 0.6368, "step": 425 }, { "epoch": 0.43425076452599387, "grad_norm": 9.653539657592773, "learning_rate": 9.592760180995475e-05, "loss": 0.9874, "step": 426 }, { "epoch": 0.4352701325178389, "grad_norm": 13.366517066955566, "learning_rate": 9.615384615384617e-05, "loss": 1.6544, "step": 427 }, { "epoch": 0.436289500509684, "grad_norm": 6.302597522735596, "learning_rate": 9.638009049773757e-05, "loss": 0.4561, "step": 428 }, { "epoch": 0.43730886850152906, "grad_norm": 7.133030891418457, "learning_rate": 9.660633484162897e-05, "loss": 0.5443, "step": 429 }, { "epoch": 0.4383282364933741, "grad_norm": 6.341556072235107, "learning_rate": 9.683257918552037e-05, "loss": 0.5183, "step": 430 }, { "epoch": 0.43934760448521915, "grad_norm": 10.657116889953613, "learning_rate": 9.705882352941177e-05, "loss": 1.1585, "step": 431 }, { "epoch": 0.44036697247706424, "grad_norm": 7.707142353057861, "learning_rate": 9.728506787330317e-05, "loss": 1.4285, "step": 432 }, { "epoch": 0.4413863404689093, "grad_norm": 8.27905559539795, "learning_rate": 9.751131221719457e-05, "loss": 1.0638, "step": 433 }, { "epoch": 0.44240570846075433, "grad_norm": 5.601058483123779, "learning_rate": 9.773755656108597e-05, "loss": 0.553, "step": 434 }, { "epoch": 0.4434250764525994, "grad_norm": 9.084299087524414, "learning_rate": 9.796380090497737e-05, "loss": 1.0009, "step": 435 }, { "epoch": 0.4444444444444444, "grad_norm": 5.231532573699951, "learning_rate": 9.819004524886877e-05, "loss": 0.5211, "step": 436 }, { "epoch": 0.4454638124362895, "grad_norm": 7.0478715896606445, "learning_rate": 9.841628959276019e-05, "loss": 0.6483, "step": 437 }, { "epoch": 0.44648318042813456, "grad_norm": 8.44166088104248, "learning_rate": 9.864253393665159e-05, "loss": 1.2634, "step": 438 }, { "epoch": 0.4475025484199796, "grad_norm": 7.2984771728515625, "learning_rate": 9.8868778280543e-05, "loss": 0.5242, "step": 439 }, { "epoch": 0.44852191641182465, "grad_norm": 9.091867446899414, "learning_rate": 9.90950226244344e-05, "loss": 1.3739, "step": 440 }, { "epoch": 0.44954128440366975, "grad_norm": 8.1068115234375, "learning_rate": 9.93212669683258e-05, "loss": 1.0153, "step": 441 }, { "epoch": 0.4505606523955148, "grad_norm": 7.902680397033691, "learning_rate": 9.95475113122172e-05, "loss": 0.7174, "step": 442 }, { "epoch": 0.45158002038735984, "grad_norm": 8.784537315368652, "learning_rate": 9.97737556561086e-05, "loss": 0.8631, "step": 443 }, { "epoch": 0.4525993883792049, "grad_norm": 8.205148696899414, "learning_rate": 0.0001, "loss": 1.0721, "step": 444 }, { "epoch": 0.4536187563710499, "grad_norm": 4.789169788360596, "learning_rate": 9.999964497873585e-05, "loss": 0.3682, "step": 445 }, { "epoch": 0.454638124362895, "grad_norm": 11.335341453552246, "learning_rate": 9.999857991998499e-05, "loss": 1.3278, "step": 446 }, { "epoch": 0.45565749235474007, "grad_norm": 8.901962280273438, "learning_rate": 9.999680483887217e-05, "loss": 0.665, "step": 447 }, { "epoch": 0.4566768603465851, "grad_norm": 6.525248050689697, "learning_rate": 9.999431976060504e-05, "loss": 0.77, "step": 448 }, { "epoch": 0.45769622833843016, "grad_norm": 7.658937931060791, "learning_rate": 9.999112472047386e-05, "loss": 0.9903, "step": 449 }, { "epoch": 0.45871559633027525, "grad_norm": 5.406915664672852, "learning_rate": 9.998721976385087e-05, "loss": 0.3372, "step": 450 }, { "epoch": 0.4597349643221203, "grad_norm": 5.920129299163818, "learning_rate": 9.998260494618979e-05, "loss": 0.6911, "step": 451 }, { "epoch": 0.46075433231396534, "grad_norm": 7.490262985229492, "learning_rate": 9.997728033302496e-05, "loss": 0.505, "step": 452 }, { "epoch": 0.4617737003058104, "grad_norm": 8.21649169921875, "learning_rate": 9.997124599997043e-05, "loss": 1.3397, "step": 453 }, { "epoch": 0.46279306829765543, "grad_norm": 5.116532802581787, "learning_rate": 9.996450203271886e-05, "loss": 0.2853, "step": 454 }, { "epoch": 0.46381243628950053, "grad_norm": 7.29067325592041, "learning_rate": 9.995704852704029e-05, "loss": 1.198, "step": 455 }, { "epoch": 0.4648318042813456, "grad_norm": 10.033268928527832, "learning_rate": 9.994888558878086e-05, "loss": 1.7965, "step": 456 }, { "epoch": 0.4658511722731906, "grad_norm": 5.4102606773376465, "learning_rate": 9.994001333386125e-05, "loss": 0.2987, "step": 457 }, { "epoch": 0.46687054026503566, "grad_norm": 8.109895706176758, "learning_rate": 9.993043188827501e-05, "loss": 0.6864, "step": 458 }, { "epoch": 0.46788990825688076, "grad_norm": 9.893292427062988, "learning_rate": 9.992014138808682e-05, "loss": 0.9016, "step": 459 }, { "epoch": 0.4689092762487258, "grad_norm": 7.73169469833374, "learning_rate": 9.990914197943053e-05, "loss": 0.7314, "step": 460 }, { "epoch": 0.46992864424057085, "grad_norm": 8.335735321044922, "learning_rate": 9.989743381850711e-05, "loss": 0.6633, "step": 461 }, { "epoch": 0.4709480122324159, "grad_norm": 8.655631065368652, "learning_rate": 9.988501707158243e-05, "loss": 0.9783, "step": 462 }, { "epoch": 0.47196738022426094, "grad_norm": 9.166102409362793, "learning_rate": 9.987189191498479e-05, "loss": 1.1307, "step": 463 }, { "epoch": 0.47298674821610603, "grad_norm": 10.597552299499512, "learning_rate": 9.985805853510262e-05, "loss": 1.4662, "step": 464 }, { "epoch": 0.4740061162079511, "grad_norm": 10.318975448608398, "learning_rate": 9.984351712838167e-05, "loss": 1.4666, "step": 465 }, { "epoch": 0.4750254841997961, "grad_norm": 7.259106636047363, "learning_rate": 9.98282679013223e-05, "loss": 0.5918, "step": 466 }, { "epoch": 0.47604485219164117, "grad_norm": 10.192667961120605, "learning_rate": 9.981231107047648e-05, "loss": 1.5836, "step": 467 }, { "epoch": 0.47706422018348627, "grad_norm": 6.506603717803955, "learning_rate": 9.97956468624448e-05, "loss": 0.5698, "step": 468 }, { "epoch": 0.4780835881753313, "grad_norm": 5.789127349853516, "learning_rate": 9.977827551387318e-05, "loss": 0.4654, "step": 469 }, { "epoch": 0.47910295616717635, "grad_norm": 8.615316390991211, "learning_rate": 9.976019727144956e-05, "loss": 0.9522, "step": 470 }, { "epoch": 0.4801223241590214, "grad_norm": 4.792436599731445, "learning_rate": 9.974141239190034e-05, "loss": 0.4748, "step": 471 }, { "epoch": 0.48114169215086644, "grad_norm": 9.958406448364258, "learning_rate": 9.972192114198677e-05, "loss": 2.0766, "step": 472 }, { "epoch": 0.48216106014271154, "grad_norm": 4.340735912322998, "learning_rate": 9.970172379850122e-05, "loss": 0.7071, "step": 473 }, { "epoch": 0.4831804281345566, "grad_norm": 7.170680999755859, "learning_rate": 9.968082064826314e-05, "loss": 0.435, "step": 474 }, { "epoch": 0.4841997961264016, "grad_norm": 3.927189350128174, "learning_rate": 9.965921198811501e-05, "loss": 0.4551, "step": 475 }, { "epoch": 0.48521916411824667, "grad_norm": 10.183062553405762, "learning_rate": 9.96368981249182e-05, "loss": 1.1758, "step": 476 }, { "epoch": 0.48623853211009177, "grad_norm": 9.819293022155762, "learning_rate": 9.961387937554857e-05, "loss": 0.9995, "step": 477 }, { "epoch": 0.4872579001019368, "grad_norm": 11.188612937927246, "learning_rate": 9.95901560668919e-05, "loss": 1.6207, "step": 478 }, { "epoch": 0.48827726809378186, "grad_norm": 7.268994331359863, "learning_rate": 9.95657285358394e-05, "loss": 0.6978, "step": 479 }, { "epoch": 0.4892966360856269, "grad_norm": 5.575627326965332, "learning_rate": 9.954059712928275e-05, "loss": 0.4236, "step": 480 }, { "epoch": 0.49031600407747195, "grad_norm": 9.621591567993164, "learning_rate": 9.951476220410929e-05, "loss": 1.8218, "step": 481 }, { "epoch": 0.49133537206931704, "grad_norm": 7.322023391723633, "learning_rate": 9.948822412719697e-05, "loss": 0.8749, "step": 482 }, { "epoch": 0.4923547400611621, "grad_norm": 8.407424926757812, "learning_rate": 9.946098327540902e-05, "loss": 1.1704, "step": 483 }, { "epoch": 0.49337410805300713, "grad_norm": 4.8855438232421875, "learning_rate": 9.943304003558873e-05, "loss": 0.5327, "step": 484 }, { "epoch": 0.4943934760448522, "grad_norm": 8.738515853881836, "learning_rate": 9.940439480455386e-05, "loss": 1.2009, "step": 485 }, { "epoch": 0.4954128440366973, "grad_norm": 7.554356575012207, "learning_rate": 9.937504798909106e-05, "loss": 0.5427, "step": 486 }, { "epoch": 0.4964322120285423, "grad_norm": 8.203272819519043, "learning_rate": 9.934500000595008e-05, "loss": 0.5893, "step": 487 }, { "epoch": 0.49745158002038736, "grad_norm": 8.477286338806152, "learning_rate": 9.931425128183782e-05, "loss": 1.061, "step": 488 }, { "epoch": 0.4984709480122324, "grad_norm": 7.389923095703125, "learning_rate": 9.928280225341232e-05, "loss": 0.5465, "step": 489 }, { "epoch": 0.49949031600407745, "grad_norm": 10.051106452941895, "learning_rate": 9.925065336727654e-05, "loss": 0.7035, "step": 490 }, { "epoch": 0.49949031600407745, "eval_Qnli-dev-1024_cosine_accuracy": 0.7291666666666666, "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.817620038986206, "eval_Qnli-dev-1024_cosine_ap": 0.7443202788050278, "eval_Qnli-dev-1024_cosine_f1": 0.7291666666666667, "eval_Qnli-dev-1024_cosine_f1_threshold": 0.817620038986206, "eval_Qnli-dev-1024_cosine_mcc": 0.46405228758169936, "eval_Qnli-dev-1024_cosine_precision": 0.6862745098039216, "eval_Qnli-dev-1024_cosine_recall": 0.7777777777777778, "eval_Qnli-dev_cosine_accuracy": 0.7395833333333334, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7795548439025879, "eval_Qnli-dev_cosine_ap": 0.7446338608862075, "eval_Qnli-dev_cosine_f1": 0.7378640776699029, "eval_Qnli-dev_cosine_f1_threshold": 0.6985307335853577, "eval_Qnli-dev_cosine_mcc": 0.46153029495329345, "eval_Qnli-dev_cosine_precision": 0.6551724137931034, "eval_Qnli-dev_cosine_recall": 0.8444444444444444, "eval_allNLI--triplets-1024_cosine_accuracy": 0.90625, "eval_allNLI-triplets_cosine_accuracy": 0.9375, "eval_global_dataset_loss": 0.34814590215682983, "eval_global_dataset_runtime": 104.2751, "eval_global_dataset_samples_per_second": 7.701, "eval_global_dataset_steps_per_second": 0.163, "eval_sequential_score": 0.90625, "eval_sts-test-1024_pearson_cosine": 0.8574057933500303, "eval_sts-test-1024_spearman_cosine": 0.8986116241995802, "eval_sts-test_pearson_cosine": 0.9019111579722014, "eval_sts-test_spearman_cosine": 0.9181479205822737, "step": 490 }, { "epoch": 0.5005096839959225, "grad_norm": 10.88537311553955, "learning_rate": 9.921780507997202e-05, "loss": 1.6596, "step": 491 }, { "epoch": 0.5015290519877675, "grad_norm": 5.0818891525268555, "learning_rate": 9.918425785797235e-05, "loss": 0.4475, "step": 492 }, { "epoch": 0.5025484199796126, "grad_norm": 12.540839195251465, "learning_rate": 9.915001217767663e-05, "loss": 2.0803, "step": 493 }, { "epoch": 0.5035677879714577, "grad_norm": 6.171934604644775, "learning_rate": 9.911506852540267e-05, "loss": 0.4296, "step": 494 }, { "epoch": 0.5045871559633027, "grad_norm": 9.624109268188477, "learning_rate": 9.907942739738001e-05, "loss": 1.5435, "step": 495 }, { "epoch": 0.5056065239551478, "grad_norm": 6.020090579986572, "learning_rate": 9.904308929974302e-05, "loss": 0.6073, "step": 496 }, { "epoch": 0.5066258919469928, "grad_norm": 8.587658882141113, "learning_rate": 9.900605474852358e-05, "loss": 1.1774, "step": 497 }, { "epoch": 0.5076452599388379, "grad_norm": 6.535181999206543, "learning_rate": 9.896832426964382e-05, "loss": 0.8951, "step": 498 }, { "epoch": 0.508664627930683, "grad_norm": 5.945138454437256, "learning_rate": 9.892989839890863e-05, "loss": 0.3775, "step": 499 }, { "epoch": 0.509683995922528, "grad_norm": 7.641120433807373, "learning_rate": 9.889077768199806e-05, "loss": 0.8086, "step": 500 }, { "epoch": 0.5107033639143731, "grad_norm": 8.10549545288086, "learning_rate": 9.885096267445957e-05, "loss": 0.3864, "step": 501 }, { "epoch": 0.5117227319062182, "grad_norm": 7.266530990600586, "learning_rate": 9.881045394170012e-05, "loss": 0.8865, "step": 502 }, { "epoch": 0.5127420998980632, "grad_norm": 9.056779861450195, "learning_rate": 9.876925205897818e-05, "loss": 0.567, "step": 503 }, { "epoch": 0.5137614678899083, "grad_norm": 7.140566349029541, "learning_rate": 9.872735761139554e-05, "loss": 0.9304, "step": 504 }, { "epoch": 0.5147808358817533, "grad_norm": 11.422016143798828, "learning_rate": 9.868477119388896e-05, "loss": 0.6977, "step": 505 }, { "epoch": 0.5158002038735984, "grad_norm": 11.155719757080078, "learning_rate": 9.864149341122181e-05, "loss": 1.3174, "step": 506 }, { "epoch": 0.5168195718654435, "grad_norm": 8.781103134155273, "learning_rate": 9.859752487797542e-05, "loss": 1.2481, "step": 507 }, { "epoch": 0.5178389398572885, "grad_norm": 5.503263473510742, "learning_rate": 9.855286621854034e-05, "loss": 0.4894, "step": 508 }, { "epoch": 0.5188583078491336, "grad_norm": 7.503839015960693, "learning_rate": 9.850751806710753e-05, "loss": 0.8095, "step": 509 }, { "epoch": 0.5198776758409785, "grad_norm": 5.623706817626953, "learning_rate": 9.846148106765933e-05, "loss": 0.388, "step": 510 }, { "epoch": 0.5208970438328236, "grad_norm": 5.178555965423584, "learning_rate": 9.841475587396028e-05, "loss": 0.6725, "step": 511 }, { "epoch": 0.5219164118246687, "grad_norm": 7.296833038330078, "learning_rate": 9.836734314954785e-05, "loss": 0.4804, "step": 512 }, { "epoch": 0.5229357798165137, "grad_norm": 8.692532539367676, "learning_rate": 9.831924356772308e-05, "loss": 1.2414, "step": 513 }, { "epoch": 0.5239551478083588, "grad_norm": 9.865914344787598, "learning_rate": 9.827045781154093e-05, "loss": 1.0319, "step": 514 }, { "epoch": 0.5249745158002038, "grad_norm": 14.857895851135254, "learning_rate": 9.822098657380065e-05, "loss": 2.0732, "step": 515 }, { "epoch": 0.5259938837920489, "grad_norm": 6.85409688949585, "learning_rate": 9.817083055703587e-05, "loss": 1.1168, "step": 516 }, { "epoch": 0.527013251783894, "grad_norm": 10.668725967407227, "learning_rate": 9.811999047350471e-05, "loss": 0.8056, "step": 517 }, { "epoch": 0.528032619775739, "grad_norm": 6.76224946975708, "learning_rate": 9.806846704517957e-05, "loss": 0.5322, "step": 518 }, { "epoch": 0.5290519877675841, "grad_norm": 4.4465789794921875, "learning_rate": 9.801626100373699e-05, "loss": 0.4348, "step": 519 }, { "epoch": 0.5300713557594292, "grad_norm": 8.388195991516113, "learning_rate": 9.796337309054717e-05, "loss": 0.6316, "step": 520 }, { "epoch": 0.5310907237512742, "grad_norm": 5.859539031982422, "learning_rate": 9.790980405666344e-05, "loss": 0.3212, "step": 521 }, { "epoch": 0.5321100917431193, "grad_norm": 6.299170017242432, "learning_rate": 9.785555466281162e-05, "loss": 0.4739, "step": 522 }, { "epoch": 0.5331294597349643, "grad_norm": 9.609426498413086, "learning_rate": 9.780062567937928e-05, "loss": 1.1692, "step": 523 }, { "epoch": 0.5341488277268094, "grad_norm": 9.116230964660645, "learning_rate": 9.774501788640471e-05, "loss": 0.941, "step": 524 }, { "epoch": 0.5351681957186545, "grad_norm": 5.024673938751221, "learning_rate": 9.768873207356586e-05, "loss": 0.3767, "step": 525 }, { "epoch": 0.5361875637104995, "grad_norm": 7.534763336181641, "learning_rate": 9.763176904016913e-05, "loss": 0.5264, "step": 526 }, { "epoch": 0.5372069317023446, "grad_norm": 7.897163391113281, "learning_rate": 9.757412959513807e-05, "loss": 0.4345, "step": 527 }, { "epoch": 0.5382262996941896, "grad_norm": 8.391239166259766, "learning_rate": 9.751581455700181e-05, "loss": 1.0352, "step": 528 }, { "epoch": 0.5392456676860347, "grad_norm": 6.951046466827393, "learning_rate": 9.745682475388348e-05, "loss": 1.1014, "step": 529 }, { "epoch": 0.5402650356778798, "grad_norm": 6.4283671379089355, "learning_rate": 9.73971610234885e-05, "loss": 0.7368, "step": 530 }, { "epoch": 0.5412844036697247, "grad_norm": 7.643414497375488, "learning_rate": 9.733682421309256e-05, "loss": 0.5324, "step": 531 }, { "epoch": 0.5423037716615698, "grad_norm": 7.95609188079834, "learning_rate": 9.727581517952969e-05, "loss": 0.5351, "step": 532 }, { "epoch": 0.5433231396534148, "grad_norm": 11.28146743774414, "learning_rate": 9.721413478918007e-05, "loss": 1.6815, "step": 533 }, { "epoch": 0.5443425076452599, "grad_norm": 8.346885681152344, "learning_rate": 9.715178391795769e-05, "loss": 0.8125, "step": 534 }, { "epoch": 0.545361875637105, "grad_norm": 8.147517204284668, "learning_rate": 9.708876345129797e-05, "loss": 0.8629, "step": 535 }, { "epoch": 0.54638124362895, "grad_norm": 10.061439514160156, "learning_rate": 9.702507428414513e-05, "loss": 1.3161, "step": 536 }, { "epoch": 0.5474006116207951, "grad_norm": 8.882964134216309, "learning_rate": 9.696071732093952e-05, "loss": 1.0465, "step": 537 }, { "epoch": 0.5484199796126402, "grad_norm": 5.954410076141357, "learning_rate": 9.689569347560475e-05, "loss": 0.4531, "step": 538 }, { "epoch": 0.5494393476044852, "grad_norm": 10.33085823059082, "learning_rate": 9.683000367153474e-05, "loss": 0.5567, "step": 539 }, { "epoch": 0.5504587155963303, "grad_norm": 5.265343189239502, "learning_rate": 9.676364884158058e-05, "loss": 0.7093, "step": 540 }, { "epoch": 0.5514780835881753, "grad_norm": 10.214452743530273, "learning_rate": 9.66966299280373e-05, "loss": 1.9339, "step": 541 }, { "epoch": 0.5524974515800204, "grad_norm": 7.001688480377197, "learning_rate": 9.662894788263044e-05, "loss": 0.3659, "step": 542 }, { "epoch": 0.5535168195718655, "grad_norm": 6.640339374542236, "learning_rate": 9.656060366650267e-05, "loss": 1.0505, "step": 543 }, { "epoch": 0.5545361875637105, "grad_norm": 9.303877830505371, "learning_rate": 9.649159825019996e-05, "loss": 0.8766, "step": 544 }, { "epoch": 0.5555555555555556, "grad_norm": 8.21275806427002, "learning_rate": 9.642193261365791e-05, "loss": 0.6526, "step": 545 }, { "epoch": 0.5565749235474006, "grad_norm": 6.97646427154541, "learning_rate": 9.635160774618782e-05, "loss": 0.5529, "step": 546 }, { "epoch": 0.5575942915392457, "grad_norm": 6.77686071395874, "learning_rate": 9.628062464646264e-05, "loss": 0.4817, "step": 547 }, { "epoch": 0.5586136595310908, "grad_norm": 3.5217092037200928, "learning_rate": 9.620898432250272e-05, "loss": 0.4804, "step": 548 }, { "epoch": 0.5596330275229358, "grad_norm": 5.6369476318359375, "learning_rate": 9.613668779166165e-05, "loss": 0.4508, "step": 549 }, { "epoch": 0.5606523955147809, "grad_norm": 5.534257888793945, "learning_rate": 9.606373608061162e-05, "loss": 0.4339, "step": 550 }, { "epoch": 0.5616717635066258, "grad_norm": 10.922380447387695, "learning_rate": 9.5990130225329e-05, "loss": 0.712, "step": 551 }, { "epoch": 0.5626911314984709, "grad_norm": 6.2288360595703125, "learning_rate": 9.59158712710795e-05, "loss": 0.3974, "step": 552 }, { "epoch": 0.563710499490316, "grad_norm": 11.958196640014648, "learning_rate": 9.58409602724035e-05, "loss": 1.0016, "step": 553 }, { "epoch": 0.564729867482161, "grad_norm": 8.267114639282227, "learning_rate": 9.576539829310085e-05, "loss": 0.5751, "step": 554 }, { "epoch": 0.5657492354740061, "grad_norm": 11.533574104309082, "learning_rate": 9.568918640621594e-05, "loss": 1.111, "step": 555 }, { "epoch": 0.5667686034658511, "grad_norm": 6.519062519073486, "learning_rate": 9.561232569402239e-05, "loss": 0.4202, "step": 556 }, { "epoch": 0.5677879714576962, "grad_norm": 9.009593963623047, "learning_rate": 9.553481724800768e-05, "loss": 0.7822, "step": 557 }, { "epoch": 0.5688073394495413, "grad_norm": 12.121257781982422, "learning_rate": 9.545666216885767e-05, "loss": 1.3844, "step": 558 }, { "epoch": 0.5698267074413863, "grad_norm": 5.953427314758301, "learning_rate": 9.537786156644097e-05, "loss": 0.3881, "step": 559 }, { "epoch": 0.5708460754332314, "grad_norm": 7.334780216217041, "learning_rate": 9.529841655979315e-05, "loss": 0.6317, "step": 560 }, { "epoch": 0.5718654434250765, "grad_norm": 5.987368583679199, "learning_rate": 9.521832827710088e-05, "loss": 0.4976, "step": 561 }, { "epoch": 0.5728848114169215, "grad_norm": 3.9462735652923584, "learning_rate": 9.51375978556859e-05, "loss": 0.2741, "step": 562 }, { "epoch": 0.5739041794087666, "grad_norm": 6.374652862548828, "learning_rate": 9.505622644198885e-05, "loss": 0.6232, "step": 563 }, { "epoch": 0.5749235474006116, "grad_norm": 3.525486707687378, "learning_rate": 9.497421519155303e-05, "loss": 0.2083, "step": 564 }, { "epoch": 0.5759429153924567, "grad_norm": 9.60029125213623, "learning_rate": 9.489156526900795e-05, "loss": 1.0605, "step": 565 }, { "epoch": 0.5769622833843018, "grad_norm": 12.22358226776123, "learning_rate": 9.480827784805278e-05, "loss": 1.2086, "step": 566 }, { "epoch": 0.5779816513761468, "grad_norm": 4.388841152191162, "learning_rate": 9.472435411143978e-05, "loss": 0.2217, "step": 567 }, { "epoch": 0.5790010193679919, "grad_norm": 5.581283092498779, "learning_rate": 9.463979525095738e-05, "loss": 0.4215, "step": 568 }, { "epoch": 0.5800203873598369, "grad_norm": 7.996876239776611, "learning_rate": 9.455460246741331e-05, "loss": 0.663, "step": 569 }, { "epoch": 0.581039755351682, "grad_norm": 9.21956729888916, "learning_rate": 9.446877697061757e-05, "loss": 0.653, "step": 570 }, { "epoch": 0.582059123343527, "grad_norm": 8.46827220916748, "learning_rate": 9.43823199793652e-05, "loss": 0.6895, "step": 571 }, { "epoch": 0.583078491335372, "grad_norm": 9.72203540802002, "learning_rate": 9.429523272141903e-05, "loss": 1.1101, "step": 572 }, { "epoch": 0.5840978593272171, "grad_norm": 8.79525089263916, "learning_rate": 9.420751643349219e-05, "loss": 1.2991, "step": 573 }, { "epoch": 0.5851172273190621, "grad_norm": 6.719937801361084, "learning_rate": 9.411917236123059e-05, "loss": 0.4072, "step": 574 }, { "epoch": 0.5861365953109072, "grad_norm": 8.360040664672852, "learning_rate": 9.403020175919517e-05, "loss": 1.169, "step": 575 }, { "epoch": 0.5871559633027523, "grad_norm": 5.402820587158203, "learning_rate": 9.394060589084417e-05, "loss": 0.3374, "step": 576 }, { "epoch": 0.5881753312945973, "grad_norm": 9.037818908691406, "learning_rate": 9.385038602851515e-05, "loss": 0.6785, "step": 577 }, { "epoch": 0.5891946992864424, "grad_norm": 9.151761054992676, "learning_rate": 9.375954345340685e-05, "loss": 1.2757, "step": 578 }, { "epoch": 0.5902140672782875, "grad_norm": 5.834461212158203, "learning_rate": 9.366807945556113e-05, "loss": 0.5899, "step": 579 }, { "epoch": 0.5912334352701325, "grad_norm": 5.722581386566162, "learning_rate": 9.357599533384453e-05, "loss": 0.3389, "step": 580 }, { "epoch": 0.5922528032619776, "grad_norm": 10.132628440856934, "learning_rate": 9.348329239592995e-05, "loss": 1.631, "step": 581 }, { "epoch": 0.5932721712538226, "grad_norm": 9.922087669372559, "learning_rate": 9.338997195827792e-05, "loss": 1.3975, "step": 582 }, { "epoch": 0.5942915392456677, "grad_norm": 8.382550239562988, "learning_rate": 9.329603534611806e-05, "loss": 0.4654, "step": 583 }, { "epoch": 0.5953109072375128, "grad_norm": 8.080007553100586, "learning_rate": 9.32014838934301e-05, "loss": 0.56, "step": 584 }, { "epoch": 0.5963302752293578, "grad_norm": 5.616114616394043, "learning_rate": 9.310631894292518e-05, "loss": 0.2282, "step": 585 }, { "epoch": 0.5973496432212029, "grad_norm": 10.813580513000488, "learning_rate": 9.301054184602647e-05, "loss": 1.0754, "step": 586 }, { "epoch": 0.5983690112130479, "grad_norm": 8.062788963317871, "learning_rate": 9.291415396285024e-05, "loss": 0.4411, "step": 587 }, { "epoch": 0.599388379204893, "grad_norm": 8.6395845413208, "learning_rate": 9.281715666218643e-05, "loss": 0.9243, "step": 588 }, { "epoch": 0.599388379204893, "eval_Qnli-dev-1024_cosine_accuracy": 0.71875, "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.8436912298202515, "eval_Qnli-dev-1024_cosine_ap": 0.7587494204458187, "eval_Qnli-dev-1024_cosine_f1": 0.6875, "eval_Qnli-dev-1024_cosine_f1_threshold": 0.8028630018234253, "eval_Qnli-dev-1024_cosine_mcc": 0.3803921568627451, "eval_Qnli-dev-1024_cosine_precision": 0.6470588235294118, "eval_Qnli-dev-1024_cosine_recall": 0.7333333333333333, "eval_Qnli-dev_cosine_accuracy": 0.71875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8078321218490601, "eval_Qnli-dev_cosine_ap": 0.7321739553695406, "eval_Qnli-dev_cosine_f1": 0.7339449541284404, "eval_Qnli-dev_cosine_f1_threshold": 0.6781572699546814, "eval_Qnli-dev_cosine_mcc": 0.4428074427700477, "eval_Qnli-dev_cosine_precision": 0.625, "eval_Qnli-dev_cosine_recall": 0.8888888888888888, "eval_allNLI--triplets-1024_cosine_accuracy": 0.90625, "eval_allNLI-triplets_cosine_accuracy": 0.9375, "eval_global_dataset_loss": 0.36118289828300476, "eval_global_dataset_runtime": 104.3983, "eval_global_dataset_samples_per_second": 7.692, "eval_global_dataset_steps_per_second": 0.163, "eval_sequential_score": 0.90625, "eval_sts-test-1024_pearson_cosine": 0.8631921152381832, "eval_sts-test-1024_spearman_cosine": 0.9009700758334896, "eval_sts-test_pearson_cosine": 0.9009355736320144, "eval_sts-test_spearman_cosine": 0.9171725695772274, "step": 588 }, { "epoch": 0.6004077471967381, "grad_norm": 6.184821128845215, "learning_rate": 9.271955132147916e-05, "loss": 0.3572, "step": 589 }, { "epoch": 0.601427115188583, "grad_norm": 8.318941116333008, "learning_rate": 9.262133932680733e-05, "loss": 0.6761, "step": 590 }, { "epoch": 0.6024464831804281, "grad_norm": 7.54533052444458, "learning_rate": 9.252252207286479e-05, "loss": 0.5754, "step": 591 }, { "epoch": 0.6034658511722731, "grad_norm": 4.341547012329102, "learning_rate": 9.24231009629406e-05, "loss": 0.3664, "step": 592 }, { "epoch": 0.6044852191641182, "grad_norm": 7.616749286651611, "learning_rate": 9.232307740889909e-05, "loss": 0.5391, "step": 593 }, { "epoch": 0.6055045871559633, "grad_norm": 4.843873977661133, "learning_rate": 9.222245283115979e-05, "loss": 0.518, "step": 594 }, { "epoch": 0.6065239551478083, "grad_norm": 8.295080184936523, "learning_rate": 9.21212286586773e-05, "loss": 0.5263, "step": 595 }, { "epoch": 0.6075433231396534, "grad_norm": 3.873260736465454, "learning_rate": 9.201940632892096e-05, "loss": 0.2995, "step": 596 }, { "epoch": 0.6085626911314985, "grad_norm": 4.403683185577393, "learning_rate": 9.191698728785448e-05, "loss": 0.4181, "step": 597 }, { "epoch": 0.6095820591233435, "grad_norm": 7.282264709472656, "learning_rate": 9.181397298991532e-05, "loss": 0.5087, "step": 598 }, { "epoch": 0.6106014271151886, "grad_norm": 5.132986068725586, "learning_rate": 9.171036489799416e-05, "loss": 0.4344, "step": 599 }, { "epoch": 0.6116207951070336, "grad_norm": 11.096871376037598, "learning_rate": 9.160616448341403e-05, "loss": 1.6529, "step": 600 }, { "epoch": 0.6126401630988787, "grad_norm": 4.306335926055908, "learning_rate": 9.150137322590944e-05, "loss": 0.4079, "step": 601 }, { "epoch": 0.6136595310907238, "grad_norm": 5.622674942016602, "learning_rate": 9.139599261360537e-05, "loss": 0.3123, "step": 602 }, { "epoch": 0.6146788990825688, "grad_norm": 10.172139167785645, "learning_rate": 9.129002414299617e-05, "loss": 1.4398, "step": 603 }, { "epoch": 0.6156982670744139, "grad_norm": 10.175543785095215, "learning_rate": 9.118346931892423e-05, "loss": 1.5553, "step": 604 }, { "epoch": 0.6167176350662589, "grad_norm": 7.616044521331787, "learning_rate": 9.10763296545587e-05, "loss": 0.7958, "step": 605 }, { "epoch": 0.617737003058104, "grad_norm": 5.390756607055664, "learning_rate": 9.096860667137394e-05, "loss": 0.3815, "step": 606 }, { "epoch": 0.6187563710499491, "grad_norm": 6.750911235809326, "learning_rate": 9.086030189912794e-05, "loss": 0.787, "step": 607 }, { "epoch": 0.6197757390417941, "grad_norm": 5.695408344268799, "learning_rate": 9.075141687584057e-05, "loss": 0.2352, "step": 608 }, { "epoch": 0.6207951070336392, "grad_norm": 12.017024040222168, "learning_rate": 9.06419531477718e-05, "loss": 0.6469, "step": 609 }, { "epoch": 0.6218144750254841, "grad_norm": 9.70870304107666, "learning_rate": 9.053191226939965e-05, "loss": 1.0997, "step": 610 }, { "epoch": 0.6228338430173292, "grad_norm": 8.183333396911621, "learning_rate": 9.042129580339822e-05, "loss": 0.8762, "step": 611 }, { "epoch": 0.6238532110091743, "grad_norm": 8.237792015075684, "learning_rate": 9.031010532061538e-05, "loss": 0.6259, "step": 612 }, { "epoch": 0.6248725790010193, "grad_norm": 7.553733825683594, "learning_rate": 9.019834240005058e-05, "loss": 0.8133, "step": 613 }, { "epoch": 0.6258919469928644, "grad_norm": 8.876506805419922, "learning_rate": 9.008600862883235e-05, "loss": 1.2119, "step": 614 }, { "epoch": 0.6269113149847095, "grad_norm": 6.738461971282959, "learning_rate": 8.997310560219578e-05, "loss": 0.5068, "step": 615 }, { "epoch": 0.6279306829765545, "grad_norm": 9.400090217590332, "learning_rate": 8.985963492345991e-05, "loss": 0.7723, "step": 616 }, { "epoch": 0.6289500509683996, "grad_norm": 8.690120697021484, "learning_rate": 8.974559820400486e-05, "loss": 1.0791, "step": 617 }, { "epoch": 0.6299694189602446, "grad_norm": 5.441365718841553, "learning_rate": 8.963099706324904e-05, "loss": 0.348, "step": 618 }, { "epoch": 0.6309887869520897, "grad_norm": 8.373964309692383, "learning_rate": 8.951583312862616e-05, "loss": 0.6523, "step": 619 }, { "epoch": 0.6320081549439348, "grad_norm": 8.361169815063477, "learning_rate": 8.9400108035562e-05, "loss": 0.6241, "step": 620 }, { "epoch": 0.6330275229357798, "grad_norm": 8.095520973205566, "learning_rate": 8.928382342745137e-05, "loss": 0.5039, "step": 621 }, { "epoch": 0.6340468909276249, "grad_norm": 9.879805564880371, "learning_rate": 8.916698095563453e-05, "loss": 1.0113, "step": 622 }, { "epoch": 0.6350662589194699, "grad_norm": 11.630424499511719, "learning_rate": 8.904958227937406e-05, "loss": 1.0527, "step": 623 }, { "epoch": 0.636085626911315, "grad_norm": 9.939377784729004, "learning_rate": 8.893162906583094e-05, "loss": 1.3893, "step": 624 }, { "epoch": 0.6371049949031601, "grad_norm": 7.852113723754883, "learning_rate": 8.881312299004117e-05, "loss": 0.7191, "step": 625 }, { "epoch": 0.6381243628950051, "grad_norm": 7.134123802185059, "learning_rate": 8.86940657348918e-05, "loss": 0.3591, "step": 626 }, { "epoch": 0.6391437308868502, "grad_norm": 5.795046806335449, "learning_rate": 8.857445899109715e-05, "loss": 0.9856, "step": 627 }, { "epoch": 0.6401630988786952, "grad_norm": 7.4533610343933105, "learning_rate": 8.845430445717469e-05, "loss": 0.7603, "step": 628 }, { "epoch": 0.6411824668705403, "grad_norm": 9.926379203796387, "learning_rate": 8.8333603839421e-05, "loss": 1.1553, "step": 629 }, { "epoch": 0.6422018348623854, "grad_norm": 7.032261371612549, "learning_rate": 8.821235885188754e-05, "loss": 0.5608, "step": 630 }, { "epoch": 0.6432212028542303, "grad_norm": 6.283802509307861, "learning_rate": 8.809057121635624e-05, "loss": 0.4338, "step": 631 }, { "epoch": 0.6442405708460754, "grad_norm": 2.8640384674072266, "learning_rate": 8.796824266231511e-05, "loss": 0.1376, "step": 632 }, { "epoch": 0.6452599388379205, "grad_norm": 7.722833633422852, "learning_rate": 8.784537492693368e-05, "loss": 0.6539, "step": 633 }, { "epoch": 0.6462793068297655, "grad_norm": 7.714670658111572, "learning_rate": 8.772196975503828e-05, "loss": 0.5017, "step": 634 }, { "epoch": 0.6472986748216106, "grad_norm": 4.0773091316223145, "learning_rate": 8.759802889908733e-05, "loss": 0.1888, "step": 635 }, { "epoch": 0.6483180428134556, "grad_norm": 12.99943733215332, "learning_rate": 8.747355411914642e-05, "loss": 1.6077, "step": 636 }, { "epoch": 0.6493374108053007, "grad_norm": 10.86596393585205, "learning_rate": 8.734854718286327e-05, "loss": 0.9635, "step": 637 }, { "epoch": 0.6503567787971458, "grad_norm": 9.243484497070312, "learning_rate": 8.722300986544272e-05, "loss": 0.9786, "step": 638 }, { "epoch": 0.6513761467889908, "grad_norm": 10.92319393157959, "learning_rate": 8.709694394962142e-05, "loss": 0.6728, "step": 639 }, { "epoch": 0.6523955147808359, "grad_norm": 11.628253936767578, "learning_rate": 8.697035122564266e-05, "loss": 0.8592, "step": 640 }, { "epoch": 0.6534148827726809, "grad_norm": 5.602497100830078, "learning_rate": 8.684323349123075e-05, "loss": 0.3945, "step": 641 }, { "epoch": 0.654434250764526, "grad_norm": 7.681665420532227, "learning_rate": 8.671559255156567e-05, "loss": 0.7486, "step": 642 }, { "epoch": 0.6554536187563711, "grad_norm": 9.017338752746582, "learning_rate": 8.658743021925733e-05, "loss": 0.7793, "step": 643 }, { "epoch": 0.6564729867482161, "grad_norm": 5.24987268447876, "learning_rate": 8.645874831431982e-05, "loss": 0.4401, "step": 644 }, { "epoch": 0.6574923547400612, "grad_norm": 10.270877838134766, "learning_rate": 8.632954866414567e-05, "loss": 0.6189, "step": 645 }, { "epoch": 0.6585117227319062, "grad_norm": 8.378297805786133, "learning_rate": 8.619983310347982e-05, "loss": 0.7339, "step": 646 }, { "epoch": 0.6595310907237513, "grad_norm": 6.045844554901123, "learning_rate": 8.606960347439355e-05, "loss": 0.4089, "step": 647 }, { "epoch": 0.6605504587155964, "grad_norm": 10.432483673095703, "learning_rate": 8.593886162625835e-05, "loss": 1.1412, "step": 648 }, { "epoch": 0.6615698267074414, "grad_norm": 5.939512729644775, "learning_rate": 8.580760941571967e-05, "loss": 0.798, "step": 649 }, { "epoch": 0.6625891946992865, "grad_norm": 12.093332290649414, "learning_rate": 8.567584870667056e-05, "loss": 1.0588, "step": 650 }, { "epoch": 0.6636085626911316, "grad_norm": 8.624043464660645, "learning_rate": 8.554358137022513e-05, "loss": 0.9044, "step": 651 }, { "epoch": 0.6646279306829765, "grad_norm": 7.735975742340088, "learning_rate": 8.54108092846921e-05, "loss": 0.4464, "step": 652 }, { "epoch": 0.6656472986748216, "grad_norm": 3.8205575942993164, "learning_rate": 8.527753433554797e-05, "loss": 0.2756, "step": 653 }, { "epoch": 0.6666666666666666, "grad_norm": 10.537273406982422, "learning_rate": 8.51437584154104e-05, "loss": 1.123, "step": 654 }, { "epoch": 0.6676860346585117, "grad_norm": 6.052632808685303, "learning_rate": 8.500948342401124e-05, "loss": 0.5377, "step": 655 }, { "epoch": 0.6687054026503568, "grad_norm": 7.787528991699219, "learning_rate": 8.48747112681696e-05, "loss": 0.5164, "step": 656 }, { "epoch": 0.6697247706422018, "grad_norm": 10.115964889526367, "learning_rate": 8.473944386176469e-05, "loss": 0.7155, "step": 657 }, { "epoch": 0.6707441386340469, "grad_norm": 6.880122184753418, "learning_rate": 8.460368312570873e-05, "loss": 0.4512, "step": 658 }, { "epoch": 0.6717635066258919, "grad_norm": 8.106338500976562, "learning_rate": 8.446743098791969e-05, "loss": 0.6199, "step": 659 }, { "epoch": 0.672782874617737, "grad_norm": 11.035154342651367, "learning_rate": 8.433068938329376e-05, "loss": 0.6673, "step": 660 }, { "epoch": 0.6738022426095821, "grad_norm": 4.484703540802002, "learning_rate": 8.419346025367809e-05, "loss": 0.5934, "step": 661 }, { "epoch": 0.6748216106014271, "grad_norm": 6.977105140686035, "learning_rate": 8.4055745547843e-05, "loss": 0.5034, "step": 662 }, { "epoch": 0.6758409785932722, "grad_norm": 5.447470664978027, "learning_rate": 8.391754722145449e-05, "loss": 0.4161, "step": 663 }, { "epoch": 0.6768603465851172, "grad_norm": 13.200489044189453, "learning_rate": 8.37788672370463e-05, "loss": 0.9848, "step": 664 }, { "epoch": 0.6778797145769623, "grad_norm": 6.03376579284668, "learning_rate": 8.36397075639922e-05, "loss": 0.356, "step": 665 }, { "epoch": 0.6788990825688074, "grad_norm": 6.075347900390625, "learning_rate": 8.350007017847788e-05, "loss": 0.3031, "step": 666 }, { "epoch": 0.6799184505606524, "grad_norm": 5.790109157562256, "learning_rate": 8.335995706347299e-05, "loss": 0.254, "step": 667 }, { "epoch": 0.6809378185524975, "grad_norm": 11.979147911071777, "learning_rate": 8.321937020870296e-05, "loss": 0.8646, "step": 668 }, { "epoch": 0.6819571865443425, "grad_norm": 9.445723533630371, "learning_rate": 8.30783116106207e-05, "loss": 0.7303, "step": 669 }, { "epoch": 0.6829765545361876, "grad_norm": 8.001054763793945, "learning_rate": 8.293678327237827e-05, "loss": 0.4105, "step": 670 }, { "epoch": 0.6839959225280327, "grad_norm": 4.437264919281006, "learning_rate": 8.279478720379845e-05, "loss": 0.2874, "step": 671 }, { "epoch": 0.6850152905198776, "grad_norm": 4.547714710235596, "learning_rate": 8.265232542134622e-05, "loss": 0.2112, "step": 672 }, { "epoch": 0.6860346585117227, "grad_norm": 7.875749588012695, "learning_rate": 8.250939994810003e-05, "loss": 1.0919, "step": 673 }, { "epoch": 0.6870540265035678, "grad_norm": 5.349310874938965, "learning_rate": 8.236601281372319e-05, "loss": 0.5927, "step": 674 }, { "epoch": 0.6880733944954128, "grad_norm": 11.490046501159668, "learning_rate": 8.222216605443496e-05, "loss": 1.011, "step": 675 }, { "epoch": 0.6890927624872579, "grad_norm": 7.11298942565918, "learning_rate": 8.207786171298166e-05, "loss": 0.5656, "step": 676 }, { "epoch": 0.6901121304791029, "grad_norm": 10.48589038848877, "learning_rate": 8.193310183860771e-05, "loss": 0.7199, "step": 677 }, { "epoch": 0.691131498470948, "grad_norm": 9.364179611206055, "learning_rate": 8.178788848702643e-05, "loss": 0.7506, "step": 678 }, { "epoch": 0.6921508664627931, "grad_norm": 6.678390026092529, "learning_rate": 8.164222372039092e-05, "loss": 0.5386, "step": 679 }, { "epoch": 0.6931702344546381, "grad_norm": 6.151979446411133, "learning_rate": 8.149610960726479e-05, "loss": 0.6678, "step": 680 }, { "epoch": 0.6941896024464832, "grad_norm": 6.415065765380859, "learning_rate": 8.134954822259271e-05, "loss": 0.4834, "step": 681 }, { "epoch": 0.6952089704383282, "grad_norm": 4.4640326499938965, "learning_rate": 8.120254164767101e-05, "loss": 0.2411, "step": 682 }, { "epoch": 0.6962283384301733, "grad_norm": 6.626987457275391, "learning_rate": 8.105509197011807e-05, "loss": 0.5011, "step": 683 }, { "epoch": 0.6972477064220184, "grad_norm": 7.628388404846191, "learning_rate": 8.090720128384475e-05, "loss": 0.6573, "step": 684 }, { "epoch": 0.6982670744138634, "grad_norm": 3.4043076038360596, "learning_rate": 8.075887168902459e-05, "loss": 0.2798, "step": 685 }, { "epoch": 0.6992864424057085, "grad_norm": 5.682481288909912, "learning_rate": 8.061010529206398e-05, "loss": 0.5887, "step": 686 }, { "epoch": 0.6992864424057085, "eval_Qnli-dev-1024_cosine_accuracy": 0.7291666666666666, "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.8446075320243835, "eval_Qnli-dev-1024_cosine_ap": 0.7501532568375827, "eval_Qnli-dev-1024_cosine_f1": 0.7207207207207208, "eval_Qnli-dev-1024_cosine_f1_threshold": 0.756614089012146, "eval_Qnli-dev-1024_cosine_mcc": 0.4081269865567241, "eval_Qnli-dev-1024_cosine_precision": 0.6060606060606061, "eval_Qnli-dev-1024_cosine_recall": 0.8888888888888888, "eval_Qnli-dev_cosine_accuracy": 0.71875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7564685344696045, "eval_Qnli-dev_cosine_ap": 0.731843650475666, "eval_Qnli-dev_cosine_f1": 0.7378640776699029, "eval_Qnli-dev_cosine_f1_threshold": 0.6987220048904419, "eval_Qnli-dev_cosine_mcc": 0.46153029495329345, "eval_Qnli-dev_cosine_precision": 0.6551724137931034, "eval_Qnli-dev_cosine_recall": 0.8444444444444444, "eval_allNLI--triplets-1024_cosine_accuracy": 0.9270833134651184, "eval_allNLI-triplets_cosine_accuracy": 0.9583333134651184, "eval_global_dataset_loss": 0.29955434799194336, "eval_global_dataset_runtime": 104.3655, "eval_global_dataset_samples_per_second": 7.694, "eval_global_dataset_steps_per_second": 0.163, "eval_sequential_score": 0.9270833134651184, "eval_sts-test-1024_pearson_cosine": 0.8628190908797548, "eval_sts-test-1024_spearman_cosine": 0.9062196010289961, "eval_sts-test_pearson_cosine": 0.9012940791829644, "eval_sts-test_spearman_cosine": 0.9179642241352577, "step": 686 }, { "epoch": 0.7003058103975535, "grad_norm": 5.198816299438477, "learning_rate": 8.046090420557231e-05, "loss": 0.6328, "step": 687 }, { "epoch": 0.7013251783893986, "grad_norm": 7.044593811035156, "learning_rate": 8.031127054833189e-05, "loss": 0.4322, "step": 688 }, { "epoch": 0.7023445463812437, "grad_norm": 13.891091346740723, "learning_rate": 8.016120644526797e-05, "loss": 1.1473, "step": 689 }, { "epoch": 0.7033639143730887, "grad_norm": 9.329078674316406, "learning_rate": 8.001071402741842e-05, "loss": 1.0135, "step": 690 }, { "epoch": 0.7043832823649337, "grad_norm": 4.1149210929870605, "learning_rate": 7.985979543190358e-05, "loss": 0.2655, "step": 691 }, { "epoch": 0.7054026503567788, "grad_norm": 7.722234725952148, "learning_rate": 7.970845280189586e-05, "loss": 0.5053, "step": 692 }, { "epoch": 0.7064220183486238, "grad_norm": 6.9180216789245605, "learning_rate": 7.955668828658937e-05, "loss": 0.8647, "step": 693 }, { "epoch": 0.7074413863404689, "grad_norm": 5.709589004516602, "learning_rate": 7.940450404116928e-05, "loss": 0.4423, "step": 694 }, { "epoch": 0.7084607543323139, "grad_norm": 4.812499523162842, "learning_rate": 7.925190222678133e-05, "loss": 0.3673, "step": 695 }, { "epoch": 0.709480122324159, "grad_norm": 11.944628715515137, "learning_rate": 7.909888501050109e-05, "loss": 1.1714, "step": 696 }, { "epoch": 0.7104994903160041, "grad_norm": 7.61957311630249, "learning_rate": 7.894545456530316e-05, "loss": 0.8142, "step": 697 }, { "epoch": 0.7115188583078491, "grad_norm": 9.580735206604004, "learning_rate": 7.879161307003038e-05, "loss": 0.8027, "step": 698 }, { "epoch": 0.7125382262996942, "grad_norm": 7.831961154937744, "learning_rate": 7.863736270936284e-05, "loss": 0.4514, "step": 699 }, { "epoch": 0.7135575942915392, "grad_norm": 9.805893898010254, "learning_rate": 7.848270567378686e-05, "loss": 0.8798, "step": 700 }, { "epoch": 0.7145769622833843, "grad_norm": 8.573545455932617, "learning_rate": 7.832764415956389e-05, "loss": 0.7718, "step": 701 }, { "epoch": 0.7155963302752294, "grad_norm": 6.185779571533203, "learning_rate": 7.817218036869932e-05, "loss": 0.4094, "step": 702 }, { "epoch": 0.7166156982670744, "grad_norm": 9.415246963500977, "learning_rate": 7.80163165089112e-05, "loss": 0.5358, "step": 703 }, { "epoch": 0.7176350662589195, "grad_norm": 5.7925543785095215, "learning_rate": 7.78600547935989e-05, "loss": 0.5728, "step": 704 }, { "epoch": 0.7186544342507645, "grad_norm": 8.365612983703613, "learning_rate": 7.770339744181175e-05, "loss": 0.4349, "step": 705 }, { "epoch": 0.7196738022426096, "grad_norm": 11.040353775024414, "learning_rate": 7.754634667821734e-05, "loss": 1.0107, "step": 706 }, { "epoch": 0.7206931702344547, "grad_norm": 10.400522232055664, "learning_rate": 7.73889047330701e-05, "loss": 1.3393, "step": 707 }, { "epoch": 0.7217125382262997, "grad_norm": 6.314993381500244, "learning_rate": 7.723107384217958e-05, "loss": 0.5175, "step": 708 }, { "epoch": 0.7227319062181448, "grad_norm": 7.7337541580200195, "learning_rate": 7.70728562468787e-05, "loss": 0.3906, "step": 709 }, { "epoch": 0.7237512742099899, "grad_norm": 8.559732437133789, "learning_rate": 7.691425419399183e-05, "loss": 0.726, "step": 710 }, { "epoch": 0.7247706422018348, "grad_norm": 5.824985504150391, "learning_rate": 7.675526993580306e-05, "loss": 0.4299, "step": 711 }, { "epoch": 0.72579001019368, "grad_norm": 9.804418563842773, "learning_rate": 7.659590573002407e-05, "loss": 0.7486, "step": 712 }, { "epoch": 0.7268093781855249, "grad_norm": 5.5835957527160645, "learning_rate": 7.643616383976214e-05, "loss": 0.3316, "step": 713 }, { "epoch": 0.72782874617737, "grad_norm": 8.719099044799805, "learning_rate": 7.627604653348796e-05, "loss": 0.5444, "step": 714 }, { "epoch": 0.7288481141692151, "grad_norm": 7.16873025894165, "learning_rate": 7.611555608500351e-05, "loss": 0.4717, "step": 715 }, { "epoch": 0.7298674821610601, "grad_norm": 8.529095649719238, "learning_rate": 7.595469477340965e-05, "loss": 0.5413, "step": 716 }, { "epoch": 0.7308868501529052, "grad_norm": 4.7856245040893555, "learning_rate": 7.579346488307379e-05, "loss": 0.2207, "step": 717 }, { "epoch": 0.7319062181447502, "grad_norm": 8.381448745727539, "learning_rate": 7.563186870359758e-05, "loss": 0.8042, "step": 718 }, { "epoch": 0.7329255861365953, "grad_norm": 6.099252700805664, "learning_rate": 7.546990852978415e-05, "loss": 0.3666, "step": 719 }, { "epoch": 0.7339449541284404, "grad_norm": 6.979067802429199, "learning_rate": 7.530758666160577e-05, "loss": 0.4511, "step": 720 }, { "epoch": 0.7349643221202854, "grad_norm": 8.355476379394531, "learning_rate": 7.514490540417103e-05, "loss": 0.782, "step": 721 }, { "epoch": 0.7359836901121305, "grad_norm": 3.4271693229675293, "learning_rate": 7.498186706769213e-05, "loss": 0.2947, "step": 722 }, { "epoch": 0.7370030581039755, "grad_norm": 10.753888130187988, "learning_rate": 7.481847396745215e-05, "loss": 1.6206, "step": 723 }, { "epoch": 0.7380224260958206, "grad_norm": 10.323583602905273, "learning_rate": 7.465472842377206e-05, "loss": 0.6216, "step": 724 }, { "epoch": 0.7390417940876657, "grad_norm": 5.268289566040039, "learning_rate": 7.449063276197789e-05, "loss": 0.2819, "step": 725 }, { "epoch": 0.7400611620795107, "grad_norm": 8.948394775390625, "learning_rate": 7.432618931236759e-05, "loss": 0.4797, "step": 726 }, { "epoch": 0.7410805300713558, "grad_norm": 4.78109884262085, "learning_rate": 7.416140041017802e-05, "loss": 0.3875, "step": 727 }, { "epoch": 0.7420998980632009, "grad_norm": 7.643434047698975, "learning_rate": 7.399626839555176e-05, "loss": 0.6888, "step": 728 }, { "epoch": 0.7431192660550459, "grad_norm": 4.128391742706299, "learning_rate": 7.383079561350386e-05, "loss": 0.3023, "step": 729 }, { "epoch": 0.744138634046891, "grad_norm": 8.254578590393066, "learning_rate": 7.36649844138886e-05, "loss": 0.6654, "step": 730 }, { "epoch": 0.745158002038736, "grad_norm": 10.747797966003418, "learning_rate": 7.3498837151366e-05, "loss": 0.6517, "step": 731 }, { "epoch": 0.746177370030581, "grad_norm": 6.274332046508789, "learning_rate": 7.333235618536856e-05, "loss": 0.4537, "step": 732 }, { "epoch": 0.7471967380224261, "grad_norm": 8.256685256958008, "learning_rate": 7.316554388006756e-05, "loss": 0.7224, "step": 733 }, { "epoch": 0.7482161060142711, "grad_norm": 7.657110214233398, "learning_rate": 7.299840260433965e-05, "loss": 0.4447, "step": 734 }, { "epoch": 0.7492354740061162, "grad_norm": 6.170997142791748, "learning_rate": 7.283093473173307e-05, "loss": 0.4127, "step": 735 }, { "epoch": 0.7502548419979612, "grad_norm": 5.84876823425293, "learning_rate": 7.26631426404341e-05, "loss": 0.3297, "step": 736 }, { "epoch": 0.7512742099898063, "grad_norm": 5.986436367034912, "learning_rate": 7.249502871323314e-05, "loss": 0.3664, "step": 737 }, { "epoch": 0.7522935779816514, "grad_norm": 9.613632202148438, "learning_rate": 7.232659533749092e-05, "loss": 0.7934, "step": 738 }, { "epoch": 0.7533129459734964, "grad_norm": 5.5741286277771, "learning_rate": 7.215784490510468e-05, "loss": 0.4214, "step": 739 }, { "epoch": 0.7543323139653415, "grad_norm": 8.343430519104004, "learning_rate": 7.198877981247406e-05, "loss": 0.6174, "step": 740 }, { "epoch": 0.7553516819571865, "grad_norm": 11.505045890808105, "learning_rate": 7.18194024604672e-05, "loss": 0.7011, "step": 741 }, { "epoch": 0.7563710499490316, "grad_norm": 9.192388534545898, "learning_rate": 7.164971525438657e-05, "loss": 0.6472, "step": 742 }, { "epoch": 0.7573904179408767, "grad_norm": 10.685009002685547, "learning_rate": 7.147972060393478e-05, "loss": 0.9555, "step": 743 }, { "epoch": 0.7584097859327217, "grad_norm": 9.81982421875, "learning_rate": 7.130942092318051e-05, "loss": 1.1771, "step": 744 }, { "epoch": 0.7594291539245668, "grad_norm": 7.654698848724365, "learning_rate": 7.113881863052407e-05, "loss": 0.6876, "step": 745 }, { "epoch": 0.7604485219164119, "grad_norm": 10.608144760131836, "learning_rate": 7.096791614866309e-05, "loss": 0.6737, "step": 746 }, { "epoch": 0.7614678899082569, "grad_norm": 8.949767112731934, "learning_rate": 7.079671590455821e-05, "loss": 0.9648, "step": 747 }, { "epoch": 0.762487257900102, "grad_norm": 5.873875141143799, "learning_rate": 7.06252203293985e-05, "loss": 0.3267, "step": 748 }, { "epoch": 0.763506625891947, "grad_norm": 3.814371347427368, "learning_rate": 7.045343185856701e-05, "loss": 0.2244, "step": 749 }, { "epoch": 0.764525993883792, "grad_norm": 5.834865570068359, "learning_rate": 7.028135293160611e-05, "loss": 0.305, "step": 750 }, { "epoch": 0.7655453618756372, "grad_norm": 8.765941619873047, "learning_rate": 7.010898599218296e-05, "loss": 0.5588, "step": 751 }, { "epoch": 0.7665647298674821, "grad_norm": 8.091228485107422, "learning_rate": 6.99363334880547e-05, "loss": 1.0974, "step": 752 }, { "epoch": 0.7675840978593272, "grad_norm": 7.041286468505859, "learning_rate": 6.976339787103373e-05, "loss": 0.603, "step": 753 }, { "epoch": 0.7686034658511722, "grad_norm": 6.676450729370117, "learning_rate": 6.959018159695293e-05, "loss": 0.6972, "step": 754 }, { "epoch": 0.7696228338430173, "grad_norm": 9.935379981994629, "learning_rate": 6.94166871256307e-05, "loss": 0.958, "step": 755 }, { "epoch": 0.7706422018348624, "grad_norm": 6.536661624908447, "learning_rate": 6.92429169208361e-05, "loss": 0.2937, "step": 756 }, { "epoch": 0.7716615698267074, "grad_norm": 5.736427307128906, "learning_rate": 6.906887345025385e-05, "loss": 0.3384, "step": 757 }, { "epoch": 0.7726809378185525, "grad_norm": 5.628017425537109, "learning_rate": 6.88945591854493e-05, "loss": 0.3321, "step": 758 }, { "epoch": 0.7737003058103975, "grad_norm": 9.1480712890625, "learning_rate": 6.87199766018332e-05, "loss": 0.8029, "step": 759 }, { "epoch": 0.7747196738022426, "grad_norm": 7.8731770515441895, "learning_rate": 6.85451281786268e-05, "loss": 0.7043, "step": 760 }, { "epoch": 0.7757390417940877, "grad_norm": 13.733153343200684, "learning_rate": 6.837001639882641e-05, "loss": 1.6068, "step": 761 }, { "epoch": 0.7767584097859327, "grad_norm": 9.02813720703125, "learning_rate": 6.819464374916823e-05, "loss": 1.1273, "step": 762 }, { "epoch": 0.7777777777777778, "grad_norm": 8.211151123046875, "learning_rate": 6.801901272009307e-05, "loss": 0.5429, "step": 763 }, { "epoch": 0.7787971457696228, "grad_norm": 5.243499755859375, "learning_rate": 6.784312580571091e-05, "loss": 0.2976, "step": 764 }, { "epoch": 0.7798165137614679, "grad_norm": 11.219100952148438, "learning_rate": 6.766698550376556e-05, "loss": 0.9183, "step": 765 }, { "epoch": 0.780835881753313, "grad_norm": 7.10944938659668, "learning_rate": 6.749059431559913e-05, "loss": 0.4734, "step": 766 }, { "epoch": 0.781855249745158, "grad_norm": 7.810965061187744, "learning_rate": 6.731395474611649e-05, "loss": 0.5437, "step": 767 }, { "epoch": 0.7828746177370031, "grad_norm": 6.063333034515381, "learning_rate": 6.71370693037498e-05, "loss": 0.3382, "step": 768 }, { "epoch": 0.7838939857288482, "grad_norm": 5.784426689147949, "learning_rate": 6.695994050042277e-05, "loss": 0.3925, "step": 769 }, { "epoch": 0.7849133537206932, "grad_norm": 7.640711784362793, "learning_rate": 6.678257085151509e-05, "loss": 0.4345, "step": 770 }, { "epoch": 0.7859327217125383, "grad_norm": 9.467418670654297, "learning_rate": 6.660496287582667e-05, "loss": 0.9237, "step": 771 }, { "epoch": 0.7869520897043832, "grad_norm": 4.449363708496094, "learning_rate": 6.642711909554174e-05, "loss": 0.3875, "step": 772 }, { "epoch": 0.7879714576962283, "grad_norm": 7.483307838439941, "learning_rate": 6.624904203619333e-05, "loss": 0.533, "step": 773 }, { "epoch": 0.7889908256880734, "grad_norm": 4.827091693878174, "learning_rate": 6.607073422662711e-05, "loss": 0.4211, "step": 774 }, { "epoch": 0.7900101936799184, "grad_norm": 6.135465621948242, "learning_rate": 6.589219819896565e-05, "loss": 0.5421, "step": 775 }, { "epoch": 0.7910295616717635, "grad_norm": 9.622929573059082, "learning_rate": 6.571343648857242e-05, "loss": 0.8904, "step": 776 }, { "epoch": 0.7920489296636085, "grad_norm": 5.664134502410889, "learning_rate": 6.553445163401571e-05, "loss": 0.4604, "step": 777 }, { "epoch": 0.7930682976554536, "grad_norm": 9.634468078613281, "learning_rate": 6.535524617703273e-05, "loss": 0.7431, "step": 778 }, { "epoch": 0.7940876656472987, "grad_norm": 10.855483055114746, "learning_rate": 6.517582266249336e-05, "loss": 1.0159, "step": 779 }, { "epoch": 0.7951070336391437, "grad_norm": 9.945262908935547, "learning_rate": 6.499618363836417e-05, "loss": 0.6554, "step": 780 }, { "epoch": 0.7961264016309888, "grad_norm": 7.224388599395752, "learning_rate": 6.481633165567207e-05, "loss": 0.8539, "step": 781 }, { "epoch": 0.7971457696228338, "grad_norm": 8.917383193969727, "learning_rate": 6.463626926846817e-05, "loss": 0.4543, "step": 782 }, { "epoch": 0.7981651376146789, "grad_norm": 4.411260604858398, "learning_rate": 6.445599903379154e-05, "loss": 0.2281, "step": 783 }, { "epoch": 0.799184505606524, "grad_norm": 8.85741138458252, "learning_rate": 6.427552351163286e-05, "loss": 1.0334, "step": 784 }, { "epoch": 0.799184505606524, "eval_Qnli-dev-1024_cosine_accuracy": 0.7395833333333334, "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.8376107215881348, "eval_Qnli-dev-1024_cosine_ap": 0.7815698422458957, "eval_Qnli-dev-1024_cosine_f1": 0.7222222222222222, "eval_Qnli-dev-1024_cosine_f1_threshold": 0.7544304132461548, "eval_Qnli-dev-1024_cosine_mcc": 0.41614558708189836, "eval_Qnli-dev-1024_cosine_precision": 0.6190476190476191, "eval_Qnli-dev-1024_cosine_recall": 0.8666666666666667, "eval_Qnli-dev_cosine_accuracy": 0.7291666666666666, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7388297319412231, "eval_Qnli-dev_cosine_ap": 0.7636341718424307, "eval_Qnli-dev_cosine_f1": 0.7450980392156862, "eval_Qnli-dev_cosine_f1_threshold": 0.695953369140625, "eval_Qnli-dev_cosine_mcc": 0.4794765594627558, "eval_Qnli-dev_cosine_precision": 0.6666666666666666, "eval_Qnli-dev_cosine_recall": 0.8444444444444444, "eval_allNLI--triplets-1024_cosine_accuracy": 0.9583333134651184, "eval_allNLI-triplets_cosine_accuracy": 0.9583333134651184, "eval_global_dataset_loss": 0.3045359253883362, "eval_global_dataset_runtime": 103.772, "eval_global_dataset_samples_per_second": 7.738, "eval_global_dataset_steps_per_second": 0.164, "eval_sequential_score": 0.9583333134651184, "eval_sts-test-1024_pearson_cosine": 0.8706480103495355, "eval_sts-test-1024_spearman_cosine": 0.9094148980677476, "eval_sts-test_pearson_cosine": 0.9036838203711135, "eval_sts-test_spearman_cosine": 0.9196077696084266, "step": 784 }, { "epoch": 0.800203873598369, "grad_norm": 10.137900352478027, "learning_rate": 6.409484526489805e-05, "loss": 0.9697, "step": 785 }, { "epoch": 0.8012232415902141, "grad_norm": 8.976780891418457, "learning_rate": 6.391396685937186e-05, "loss": 0.7048, "step": 786 }, { "epoch": 0.8022426095820592, "grad_norm": 8.672534942626953, "learning_rate": 6.373289086368151e-05, "loss": 0.5263, "step": 787 }, { "epoch": 0.8032619775739042, "grad_norm": 9.115574836730957, "learning_rate": 6.355161984926019e-05, "loss": 0.5056, "step": 788 }, { "epoch": 0.8042813455657493, "grad_norm": 5.47214412689209, "learning_rate": 6.337015639031044e-05, "loss": 0.3826, "step": 789 }, { "epoch": 0.8053007135575942, "grad_norm": 4.726554870605469, "learning_rate": 6.318850306376777e-05, "loss": 0.3029, "step": 790 }, { "epoch": 0.8063200815494393, "grad_norm": 9.025796890258789, "learning_rate": 6.300666244926387e-05, "loss": 0.7712, "step": 791 }, { "epoch": 0.8073394495412844, "grad_norm": 8.51115894317627, "learning_rate": 6.282463712909018e-05, "loss": 0.5587, "step": 792 }, { "epoch": 0.8083588175331294, "grad_norm": 4.170646667480469, "learning_rate": 6.264242968816106e-05, "loss": 0.2386, "step": 793 }, { "epoch": 0.8093781855249745, "grad_norm": 7.225284576416016, "learning_rate": 6.246004271397713e-05, "loss": 0.5662, "step": 794 }, { "epoch": 0.8103975535168195, "grad_norm": 8.109657287597656, "learning_rate": 6.227747879658859e-05, "loss": 0.5322, "step": 795 }, { "epoch": 0.8114169215086646, "grad_norm": 8.729584693908691, "learning_rate": 6.20947405285583e-05, "loss": 0.5122, "step": 796 }, { "epoch": 0.8124362895005097, "grad_norm": 6.562040328979492, "learning_rate": 6.191183050492515e-05, "loss": 0.5094, "step": 797 }, { "epoch": 0.8134556574923547, "grad_norm": 8.552765846252441, "learning_rate": 6.172875132316703e-05, "loss": 0.8412, "step": 798 }, { "epoch": 0.8144750254841998, "grad_norm": 8.517980575561523, "learning_rate": 6.154550558316405e-05, "loss": 0.3771, "step": 799 }, { "epoch": 0.8154943934760448, "grad_norm": 9.862586975097656, "learning_rate": 6.136209588716155e-05, "loss": 0.626, "step": 800 }, { "epoch": 0.8165137614678899, "grad_norm": 11.597122192382812, "learning_rate": 6.117852483973325e-05, "loss": 0.8902, "step": 801 }, { "epoch": 0.817533129459735, "grad_norm": 4.268974781036377, "learning_rate": 6.0994795047744144e-05, "loss": 0.2301, "step": 802 }, { "epoch": 0.81855249745158, "grad_norm": 2.586038112640381, "learning_rate": 6.081090912031358e-05, "loss": 0.16, "step": 803 }, { "epoch": 0.8195718654434251, "grad_norm": 6.814731121063232, "learning_rate": 6.0626869668778085e-05, "loss": 0.4375, "step": 804 }, { "epoch": 0.8205912334352702, "grad_norm": 9.699979782104492, "learning_rate": 6.044267930665446e-05, "loss": 0.9554, "step": 805 }, { "epoch": 0.8216106014271152, "grad_norm": 7.751320838928223, "learning_rate": 6.025834064960247e-05, "loss": 0.4906, "step": 806 }, { "epoch": 0.8226299694189603, "grad_norm": 8.852093696594238, "learning_rate": 6.007385631538787e-05, "loss": 0.478, "step": 807 }, { "epoch": 0.8236493374108053, "grad_norm": 5.510447025299072, "learning_rate": 5.988922892384513e-05, "loss": 0.6057, "step": 808 }, { "epoch": 0.8246687054026504, "grad_norm": 6.745148658752441, "learning_rate": 5.9704461096840204e-05, "loss": 0.5003, "step": 809 }, { "epoch": 0.8256880733944955, "grad_norm": 11.509452819824219, "learning_rate": 5.9519555458233436e-05, "loss": 1.0844, "step": 810 }, { "epoch": 0.8267074413863404, "grad_norm": 9.71648120880127, "learning_rate": 5.933451463384213e-05, "loss": 1.0267, "step": 811 }, { "epoch": 0.8277268093781855, "grad_norm": 9.810832023620605, "learning_rate": 5.91493412514034e-05, "loss": 0.5415, "step": 812 }, { "epoch": 0.8287461773700305, "grad_norm": 5.600392818450928, "learning_rate": 5.896403794053679e-05, "loss": 0.3295, "step": 813 }, { "epoch": 0.8297655453618756, "grad_norm": 7.511580944061279, "learning_rate": 5.877860733270692e-05, "loss": 0.5511, "step": 814 }, { "epoch": 0.8307849133537207, "grad_norm": 5.374726295471191, "learning_rate": 5.8593052061186125e-05, "loss": 0.3234, "step": 815 }, { "epoch": 0.8318042813455657, "grad_norm": 4.7778639793396, "learning_rate": 5.8407374761017105e-05, "loss": 0.2917, "step": 816 }, { "epoch": 0.8328236493374108, "grad_norm": 4.155742645263672, "learning_rate": 5.822157806897548e-05, "loss": 0.3865, "step": 817 }, { "epoch": 0.8338430173292558, "grad_norm": 5.087594032287598, "learning_rate": 5.803566462353225e-05, "loss": 0.2401, "step": 818 }, { "epoch": 0.8348623853211009, "grad_norm": 3.707869529724121, "learning_rate": 5.7849637064816496e-05, "loss": 0.1582, "step": 819 }, { "epoch": 0.835881753312946, "grad_norm": 7.63162899017334, "learning_rate": 5.76634980345778e-05, "loss": 0.5475, "step": 820 }, { "epoch": 0.836901121304791, "grad_norm": 5.092942237854004, "learning_rate": 5.747725017614869e-05, "loss": 0.3291, "step": 821 }, { "epoch": 0.8379204892966361, "grad_norm": 6.86021089553833, "learning_rate": 5.72908961344072e-05, "loss": 0.6867, "step": 822 }, { "epoch": 0.8389398572884812, "grad_norm": 9.336700439453125, "learning_rate": 5.710443855573919e-05, "loss": 0.9519, "step": 823 }, { "epoch": 0.8399592252803262, "grad_norm": 6.382976055145264, "learning_rate": 5.6917880088000894e-05, "loss": 0.4898, "step": 824 }, { "epoch": 0.8409785932721713, "grad_norm": 8.171992301940918, "learning_rate": 5.6731223380481257e-05, "loss": 0.3361, "step": 825 }, { "epoch": 0.8419979612640163, "grad_norm": 11.304964065551758, "learning_rate": 5.6544471083864245e-05, "loss": 1.0131, "step": 826 }, { "epoch": 0.8430173292558614, "grad_norm": 7.883802890777588, "learning_rate": 5.635762585019136e-05, "loss": 0.4988, "step": 827 }, { "epoch": 0.8440366972477065, "grad_norm": 5.304625988006592, "learning_rate": 5.61706903328238e-05, "loss": 0.2737, "step": 828 }, { "epoch": 0.8450560652395515, "grad_norm": 8.170361518859863, "learning_rate": 5.598366718640494e-05, "loss": 0.5214, "step": 829 }, { "epoch": 0.8460754332313966, "grad_norm": 7.193360805511475, "learning_rate": 5.579655906682255e-05, "loss": 0.5261, "step": 830 }, { "epoch": 0.8470948012232415, "grad_norm": 5.908787250518799, "learning_rate": 5.5609368631171035e-05, "loss": 0.4337, "step": 831 }, { "epoch": 0.8481141692150866, "grad_norm": 11.470138549804688, "learning_rate": 5.5422098537713815e-05, "loss": 1.0523, "step": 832 }, { "epoch": 0.8491335372069317, "grad_norm": 5.7633514404296875, "learning_rate": 5.52347514458455e-05, "loss": 0.59, "step": 833 }, { "epoch": 0.8501529051987767, "grad_norm": 9.171930313110352, "learning_rate": 5.5047330016054154e-05, "loss": 0.9984, "step": 834 }, { "epoch": 0.8511722731906218, "grad_norm": 7.584822177886963, "learning_rate": 5.48598369098835e-05, "loss": 1.0533, "step": 835 }, { "epoch": 0.8521916411824668, "grad_norm": 5.429177761077881, "learning_rate": 5.4672274789895104e-05, "loss": 0.266, "step": 836 }, { "epoch": 0.8532110091743119, "grad_norm": 7.292309284210205, "learning_rate": 5.4484646319630636e-05, "loss": 0.3497, "step": 837 }, { "epoch": 0.854230377166157, "grad_norm": 7.126836776733398, "learning_rate": 5.429695416357392e-05, "loss": 0.5161, "step": 838 }, { "epoch": 0.855249745158002, "grad_norm": 6.357126235961914, "learning_rate": 5.410920098711323e-05, "loss": 0.4256, "step": 839 }, { "epoch": 0.8562691131498471, "grad_norm": 6.682480335235596, "learning_rate": 5.392138945650339e-05, "loss": 0.3334, "step": 840 }, { "epoch": 0.8572884811416922, "grad_norm": 6.9180521965026855, "learning_rate": 5.373352223882787e-05, "loss": 0.5704, "step": 841 }, { "epoch": 0.8583078491335372, "grad_norm": 6.871384620666504, "learning_rate": 5.354560200196094e-05, "loss": 0.3803, "step": 842 }, { "epoch": 0.8593272171253823, "grad_norm": 9.186737060546875, "learning_rate": 5.335763141452982e-05, "loss": 0.7648, "step": 843 }, { "epoch": 0.8603465851172273, "grad_norm": 8.700101852416992, "learning_rate": 5.3169613145876714e-05, "loss": 0.7548, "step": 844 }, { "epoch": 0.8613659531090724, "grad_norm": 7.032200336456299, "learning_rate": 5.2981549866020975e-05, "loss": 0.7275, "step": 845 }, { "epoch": 0.8623853211009175, "grad_norm": 13.48193359375, "learning_rate": 5.2793444245621146e-05, "loss": 1.1788, "step": 846 }, { "epoch": 0.8634046890927625, "grad_norm": 9.682479858398438, "learning_rate": 5.260529895593702e-05, "loss": 0.7809, "step": 847 }, { "epoch": 0.8644240570846076, "grad_norm": 8.730304718017578, "learning_rate": 5.241711666879172e-05, "loss": 0.6487, "step": 848 }, { "epoch": 0.8654434250764526, "grad_norm": 6.570590972900391, "learning_rate": 5.2228900056533836e-05, "loss": 0.561, "step": 849 }, { "epoch": 0.8664627930682977, "grad_norm": 8.695535659790039, "learning_rate": 5.204065179199931e-05, "loss": 0.5906, "step": 850 }, { "epoch": 0.8674821610601428, "grad_norm": 5.353935241699219, "learning_rate": 5.1852374548473614e-05, "loss": 0.5192, "step": 851 }, { "epoch": 0.8685015290519877, "grad_norm": 10.60522174835205, "learning_rate": 5.1664070999653766e-05, "loss": 0.8094, "step": 852 }, { "epoch": 0.8695208970438328, "grad_norm": 3.7188539505004883, "learning_rate": 5.147574381961032e-05, "loss": 0.2399, "step": 853 }, { "epoch": 0.8705402650356778, "grad_norm": 5.648993492126465, "learning_rate": 5.128739568274944e-05, "loss": 0.4103, "step": 854 }, { "epoch": 0.8715596330275229, "grad_norm": 6.711026668548584, "learning_rate": 5.109902926377482e-05, "loss": 0.4969, "step": 855 }, { "epoch": 0.872579001019368, "grad_norm": 5.686347961425781, "learning_rate": 5.091064723764987e-05, "loss": 0.37, "step": 856 }, { "epoch": 0.873598369011213, "grad_norm": 4.857931613922119, "learning_rate": 5.072225227955959e-05, "loss": 0.4109, "step": 857 }, { "epoch": 0.8746177370030581, "grad_norm": 8.75938606262207, "learning_rate": 5.053384706487261e-05, "loss": 0.525, "step": 858 }, { "epoch": 0.8756371049949032, "grad_norm": 5.874378204345703, "learning_rate": 5.034543426910324e-05, "loss": 0.5958, "step": 859 }, { "epoch": 0.8766564729867482, "grad_norm": 5.085257530212402, "learning_rate": 5.0157016567873424e-05, "loss": 0.4708, "step": 860 }, { "epoch": 0.8776758409785933, "grad_norm": 7.9917707443237305, "learning_rate": 4.996859663687479e-05, "loss": 0.6881, "step": 861 }, { "epoch": 0.8786952089704383, "grad_norm": 8.1506929397583, "learning_rate": 4.9780177151830634e-05, "loss": 0.5545, "step": 862 }, { "epoch": 0.8797145769622834, "grad_norm": 9.375650405883789, "learning_rate": 4.959176078845789e-05, "loss": 0.645, "step": 863 }, { "epoch": 0.8807339449541285, "grad_norm": 4.8143310546875, "learning_rate": 4.9403350222429184e-05, "loss": 0.4112, "step": 864 }, { "epoch": 0.8817533129459735, "grad_norm": 7.862481594085693, "learning_rate": 4.92149481293348e-05, "loss": 0.4178, "step": 865 }, { "epoch": 0.8827726809378186, "grad_norm": 5.252464771270752, "learning_rate": 4.902655718464473e-05, "loss": 0.2857, "step": 866 }, { "epoch": 0.8837920489296636, "grad_norm": 6.06905460357666, "learning_rate": 4.883818006367062e-05, "loss": 0.3374, "step": 867 }, { "epoch": 0.8848114169215087, "grad_norm": 6.810131072998047, "learning_rate": 4.86498194415278e-05, "loss": 0.5303, "step": 868 }, { "epoch": 0.8858307849133538, "grad_norm": 7.676322937011719, "learning_rate": 4.846147799309734e-05, "loss": 0.7438, "step": 869 }, { "epoch": 0.8868501529051988, "grad_norm": 11.570023536682129, "learning_rate": 4.8273158392987986e-05, "loss": 1.0872, "step": 870 }, { "epoch": 0.8878695208970439, "grad_norm": 6.312341213226318, "learning_rate": 4.8084863315498234e-05, "loss": 0.4497, "step": 871 }, { "epoch": 0.8888888888888888, "grad_norm": 7.389033794403076, "learning_rate": 4.7896595434578356e-05, "loss": 0.4171, "step": 872 }, { "epoch": 0.8899082568807339, "grad_norm": 8.600625038146973, "learning_rate": 4.770835742379239e-05, "loss": 0.4417, "step": 873 }, { "epoch": 0.890927624872579, "grad_norm": 7.350024223327637, "learning_rate": 4.7520151956280227e-05, "loss": 0.7023, "step": 874 }, { "epoch": 0.891946992864424, "grad_norm": 12.617684364318848, "learning_rate": 4.733198170471953e-05, "loss": 1.0547, "step": 875 }, { "epoch": 0.8929663608562691, "grad_norm": 5.219171524047852, "learning_rate": 4.714384934128796e-05, "loss": 0.3526, "step": 876 }, { "epoch": 0.8939857288481141, "grad_norm": 10.923335075378418, "learning_rate": 4.6955757537625104e-05, "loss": 0.7315, "step": 877 }, { "epoch": 0.8950050968399592, "grad_norm": 4.7785325050354, "learning_rate": 4.6767708964794526e-05, "loss": 0.4082, "step": 878 }, { "epoch": 0.8960244648318043, "grad_norm": 7.037627696990967, "learning_rate": 4.6579706293245944e-05, "loss": 0.8155, "step": 879 }, { "epoch": 0.8970438328236493, "grad_norm": 7.149205207824707, "learning_rate": 4.6391752192777164e-05, "loss": 0.5083, "step": 880 }, { "epoch": 0.8980632008154944, "grad_norm": 5.331564426422119, "learning_rate": 4.620384933249631e-05, "loss": 0.655, "step": 881 }, { "epoch": 0.8990825688073395, "grad_norm": 10.019486427307129, "learning_rate": 4.6016000380783805e-05, "loss": 0.7207, "step": 882 }, { "epoch": 0.8990825688073395, "eval_Qnli-dev-1024_cosine_accuracy": 0.7708333333333334, "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.8360349535942078, "eval_Qnli-dev-1024_cosine_ap": 0.8011558872452826, "eval_Qnli-dev-1024_cosine_f1": 0.7250000000000001, "eval_Qnli-dev-1024_cosine_f1_threshold": 0.8360349535942078, "eval_Qnli-dev-1024_cosine_mcc": 0.5461802806126049, "eval_Qnli-dev-1024_cosine_precision": 0.8285714285714286, "eval_Qnli-dev-1024_cosine_recall": 0.6444444444444445, "eval_Qnli-dev_cosine_accuracy": 0.7291666666666666, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7521146535873413, "eval_Qnli-dev_cosine_ap": 0.7712094779135136, "eval_Qnli-dev_cosine_f1": 0.7500000000000001, "eval_Qnli-dev_cosine_f1_threshold": 0.6768573522567749, "eval_Qnli-dev_cosine_mcc": 0.48653004754089046, "eval_Qnli-dev_cosine_precision": 0.6610169491525424, "eval_Qnli-dev_cosine_recall": 0.8666666666666667, "eval_allNLI--triplets-1024_cosine_accuracy": 0.9479166865348816, "eval_allNLI-triplets_cosine_accuracy": 0.9583333134651184, "eval_global_dataset_loss": 0.26387155055999756, "eval_global_dataset_runtime": 103.9177, "eval_global_dataset_samples_per_second": 7.727, "eval_global_dataset_steps_per_second": 0.164, "eval_sequential_score": 0.9479166865348816, "eval_sts-test-1024_pearson_cosine": 0.8810824372715894, "eval_sts-test-1024_spearman_cosine": 0.9117642789427417, "eval_sts-test_pearson_cosine": 0.9044525796924666, "eval_sts-test_spearman_cosine": 0.9182572042166309, "step": 882 }, { "epoch": 0.9001019367991845, "grad_norm": 7.724752426147461, "learning_rate": 4.582820800525455e-05, "loss": 0.4898, "step": 883 }, { "epoch": 0.9011213047910296, "grad_norm": 9.442131042480469, "learning_rate": 4.564047487272001e-05, "loss": 0.5506, "step": 884 }, { "epoch": 0.9021406727828746, "grad_norm": 8.832263946533203, "learning_rate": 4.5452803649150324e-05, "loss": 0.6206, "step": 885 }, { "epoch": 0.9031600407747197, "grad_norm": 12.987079620361328, "learning_rate": 4.5265196999636535e-05, "loss": 1.9669, "step": 886 }, { "epoch": 0.9041794087665648, "grad_norm": 9.050309181213379, "learning_rate": 4.5077657588352595e-05, "loss": 0.8493, "step": 887 }, { "epoch": 0.9051987767584098, "grad_norm": 9.642857551574707, "learning_rate": 4.489018807851769e-05, "loss": 0.9698, "step": 888 }, { "epoch": 0.9062181447502549, "grad_norm": 7.444589614868164, "learning_rate": 4.4702791132358314e-05, "loss": 0.7322, "step": 889 }, { "epoch": 0.9072375127420998, "grad_norm": 9.99152946472168, "learning_rate": 4.451546941107046e-05, "loss": 0.484, "step": 890 }, { "epoch": 0.908256880733945, "grad_norm": 6.232360363006592, "learning_rate": 4.432822557478194e-05, "loss": 0.5604, "step": 891 }, { "epoch": 0.90927624872579, "grad_norm": 3.1541106700897217, "learning_rate": 4.414106228251446e-05, "loss": 0.2633, "step": 892 }, { "epoch": 0.910295616717635, "grad_norm": 5.661106109619141, "learning_rate": 4.3953982192146006e-05, "loss": 0.2417, "step": 893 }, { "epoch": 0.9113149847094801, "grad_norm": 4.497067451477051, "learning_rate": 4.3766987960372956e-05, "loss": 0.4481, "step": 894 }, { "epoch": 0.9123343527013251, "grad_norm": 8.505694389343262, "learning_rate": 4.358008224267245e-05, "loss": 0.7402, "step": 895 }, { "epoch": 0.9133537206931702, "grad_norm": 5.820054054260254, "learning_rate": 4.3393267693264686e-05, "loss": 0.4897, "step": 896 }, { "epoch": 0.9143730886850153, "grad_norm": 7.943095684051514, "learning_rate": 4.320654696507511e-05, "loss": 0.5863, "step": 897 }, { "epoch": 0.9153924566768603, "grad_norm": 10.6437349319458, "learning_rate": 4.301992270969692e-05, "loss": 0.7101, "step": 898 }, { "epoch": 0.9164118246687054, "grad_norm": 3.8055593967437744, "learning_rate": 4.2833397577353284e-05, "loss": 0.2404, "step": 899 }, { "epoch": 0.9174311926605505, "grad_norm": 8.539854049682617, "learning_rate": 4.26469742168597e-05, "loss": 0.5594, "step": 900 }, { "epoch": 0.9184505606523955, "grad_norm": 5.611748218536377, "learning_rate": 4.2460655275586494e-05, "loss": 0.4047, "step": 901 }, { "epoch": 0.9194699286442406, "grad_norm": 4.898343086242676, "learning_rate": 4.227444339942107e-05, "loss": 0.4865, "step": 902 }, { "epoch": 0.9204892966360856, "grad_norm": 8.28711986541748, "learning_rate": 4.208834123273047e-05, "loss": 0.3909, "step": 903 }, { "epoch": 0.9215086646279307, "grad_norm": 6.98935604095459, "learning_rate": 4.190235141832375e-05, "loss": 0.2808, "step": 904 }, { "epoch": 0.9225280326197758, "grad_norm": 9.016980171203613, "learning_rate": 4.171647659741448e-05, "loss": 0.7509, "step": 905 }, { "epoch": 0.9235474006116208, "grad_norm": 5.859550476074219, "learning_rate": 4.153071940958321e-05, "loss": 0.325, "step": 906 }, { "epoch": 0.9245667686034659, "grad_norm": 7.970040321350098, "learning_rate": 4.134508249274002e-05, "loss": 0.5335, "step": 907 }, { "epoch": 0.9255861365953109, "grad_norm": 6.2324981689453125, "learning_rate": 4.1159568483087e-05, "loss": 0.6193, "step": 908 }, { "epoch": 0.926605504587156, "grad_norm": 5.227268218994141, "learning_rate": 4.0974180015080897e-05, "loss": 0.2974, "step": 909 }, { "epoch": 0.9276248725790011, "grad_norm": 9.293944358825684, "learning_rate": 4.078891972139564e-05, "loss": 0.6725, "step": 910 }, { "epoch": 0.928644240570846, "grad_norm": 10.003561019897461, "learning_rate": 4.060379023288495e-05, "loss": 0.8828, "step": 911 }, { "epoch": 0.9296636085626911, "grad_norm": 9.07729721069336, "learning_rate": 4.0418794178545076e-05, "loss": 0.8751, "step": 912 }, { "epoch": 0.9306829765545361, "grad_norm": 7.200821876525879, "learning_rate": 4.023393418547732e-05, "loss": 0.7019, "step": 913 }, { "epoch": 0.9317023445463812, "grad_norm": 10.154699325561523, "learning_rate": 4.0049212878850793e-05, "loss": 0.7131, "step": 914 }, { "epoch": 0.9327217125382263, "grad_norm": 7.271543025970459, "learning_rate": 3.98646328818652e-05, "loss": 0.2849, "step": 915 }, { "epoch": 0.9337410805300713, "grad_norm": 9.933566093444824, "learning_rate": 3.96801968157135e-05, "loss": 0.8097, "step": 916 }, { "epoch": 0.9347604485219164, "grad_norm": 5.370792865753174, "learning_rate": 3.949590729954467e-05, "loss": 0.3447, "step": 917 }, { "epoch": 0.9357798165137615, "grad_norm": 8.846680641174316, "learning_rate": 3.931176695042664e-05, "loss": 0.8601, "step": 918 }, { "epoch": 0.9367991845056065, "grad_norm": 5.936051368713379, "learning_rate": 3.912777838330893e-05, "loss": 0.4467, "step": 919 }, { "epoch": 0.9378185524974516, "grad_norm": 10.40077018737793, "learning_rate": 3.8943944210985735e-05, "loss": 0.8137, "step": 920 }, { "epoch": 0.9388379204892966, "grad_norm": 7.319591999053955, "learning_rate": 3.876026704405866e-05, "loss": 0.4527, "step": 921 }, { "epoch": 0.9398572884811417, "grad_norm": 8.947883605957031, "learning_rate": 3.8576749490899686e-05, "loss": 0.7656, "step": 922 }, { "epoch": 0.9408766564729868, "grad_norm": 10.776662826538086, "learning_rate": 3.839339415761416e-05, "loss": 1.1218, "step": 923 }, { "epoch": 0.9418960244648318, "grad_norm": 2.9248359203338623, "learning_rate": 3.821020364800379e-05, "loss": 0.188, "step": 924 }, { "epoch": 0.9429153924566769, "grad_norm": 9.73752212524414, "learning_rate": 3.8027180563529616e-05, "loss": 0.8454, "step": 925 }, { "epoch": 0.9439347604485219, "grad_norm": 6.643280506134033, "learning_rate": 3.7844327503275136e-05, "loss": 0.5368, "step": 926 }, { "epoch": 0.944954128440367, "grad_norm": 9.299040794372559, "learning_rate": 3.7661647063909294e-05, "loss": 0.7602, "step": 927 }, { "epoch": 0.9459734964322121, "grad_norm": 6.660792827606201, "learning_rate": 3.747914183964974e-05, "loss": 0.4733, "step": 928 }, { "epoch": 0.9469928644240571, "grad_norm": 5.206737995147705, "learning_rate": 3.729681442222587e-05, "loss": 0.2305, "step": 929 }, { "epoch": 0.9480122324159022, "grad_norm": 9.746971130371094, "learning_rate": 3.711466740084211e-05, "loss": 0.7775, "step": 930 }, { "epoch": 0.9490316004077471, "grad_norm": 9.825338363647461, "learning_rate": 3.6932703362141084e-05, "loss": 0.8859, "step": 931 }, { "epoch": 0.9500509683995922, "grad_norm": 7.335731506347656, "learning_rate": 3.6750924890166914e-05, "loss": 0.3918, "step": 932 }, { "epoch": 0.9510703363914373, "grad_norm": 6.4724931716918945, "learning_rate": 3.656933456632853e-05, "loss": 0.3842, "step": 933 }, { "epoch": 0.9520897043832823, "grad_norm": 4.886312484741211, "learning_rate": 3.638793496936296e-05, "loss": 0.3719, "step": 934 }, { "epoch": 0.9531090723751274, "grad_norm": 8.522834777832031, "learning_rate": 3.620672867529878e-05, "loss": 0.8043, "step": 935 }, { "epoch": 0.9541284403669725, "grad_norm": 9.507696151733398, "learning_rate": 3.602571825741953e-05, "loss": 0.8282, "step": 936 }, { "epoch": 0.9551478083588175, "grad_norm": 4.895750045776367, "learning_rate": 3.584490628622705e-05, "loss": 0.4599, "step": 937 }, { "epoch": 0.9561671763506626, "grad_norm": 7.197470664978027, "learning_rate": 3.566429532940518e-05, "loss": 0.649, "step": 938 }, { "epoch": 0.9571865443425076, "grad_norm": 6.60915470123291, "learning_rate": 3.548388795178307e-05, "loss": 0.4325, "step": 939 }, { "epoch": 0.9582059123343527, "grad_norm": 10.626359939575195, "learning_rate": 3.5303686715298955e-05, "loss": 1.3108, "step": 940 }, { "epoch": 0.9592252803261978, "grad_norm": 6.316555023193359, "learning_rate": 3.51236941789637e-05, "loss": 0.3018, "step": 941 }, { "epoch": 0.9602446483180428, "grad_norm": 7.12025785446167, "learning_rate": 3.494391289882435e-05, "loss": 0.6258, "step": 942 }, { "epoch": 0.9612640163098879, "grad_norm": 10.008544921875, "learning_rate": 3.476434542792805e-05, "loss": 1.2266, "step": 943 }, { "epoch": 0.9622833843017329, "grad_norm": 8.917716979980469, "learning_rate": 3.4584994316285604e-05, "loss": 0.6593, "step": 944 }, { "epoch": 0.963302752293578, "grad_norm": 5.837446689605713, "learning_rate": 3.4405862110835364e-05, "loss": 0.3096, "step": 945 }, { "epoch": 0.9643221202854231, "grad_norm": 4.312796115875244, "learning_rate": 3.422695135540697e-05, "loss": 0.3436, "step": 946 }, { "epoch": 0.9653414882772681, "grad_norm": 4.772927284240723, "learning_rate": 3.404826459068536e-05, "loss": 0.2497, "step": 947 }, { "epoch": 0.9663608562691132, "grad_norm": 3.3676137924194336, "learning_rate": 3.386980435417457e-05, "loss": 0.1653, "step": 948 }, { "epoch": 0.9673802242609582, "grad_norm": 6.203863143920898, "learning_rate": 3.369157318016176e-05, "loss": 0.469, "step": 949 }, { "epoch": 0.9683995922528033, "grad_norm": 7.628493309020996, "learning_rate": 3.351357359968117e-05, "loss": 0.4919, "step": 950 }, { "epoch": 0.9694189602446484, "grad_norm": 7.940287113189697, "learning_rate": 3.333580814047826e-05, "loss": 0.4788, "step": 951 }, { "epoch": 0.9704383282364933, "grad_norm": 6.046499729156494, "learning_rate": 3.3158279326973766e-05, "loss": 0.3041, "step": 952 }, { "epoch": 0.9714576962283384, "grad_norm": 4.314492225646973, "learning_rate": 3.298098968022782e-05, "loss": 0.3138, "step": 953 }, { "epoch": 0.9724770642201835, "grad_norm": 8.91407585144043, "learning_rate": 3.2803941717904216e-05, "loss": 0.7758, "step": 954 }, { "epoch": 0.9734964322120285, "grad_norm": 11.913896560668945, "learning_rate": 3.26271379542346e-05, "loss": 0.6974, "step": 955 }, { "epoch": 0.9745158002038736, "grad_norm": 4.831221580505371, "learning_rate": 3.2450580899982795e-05, "loss": 0.2964, "step": 956 }, { "epoch": 0.9755351681957186, "grad_norm": 6.116502285003662, "learning_rate": 3.2274273062409154e-05, "loss": 0.3473, "step": 957 }, { "epoch": 0.9765545361875637, "grad_norm": 11.75236988067627, "learning_rate": 3.2098216945234946e-05, "loss": 0.8905, "step": 958 }, { "epoch": 0.9775739041794088, "grad_norm": 3.468975067138672, "learning_rate": 3.192241504860675e-05, "loss": 0.2521, "step": 959 }, { "epoch": 0.9785932721712538, "grad_norm": 7.624709606170654, "learning_rate": 3.1746869869061063e-05, "loss": 0.4462, "step": 960 }, { "epoch": 0.9796126401630989, "grad_norm": 9.019265174865723, "learning_rate": 3.157158389948871e-05, "loss": 0.7842, "step": 961 }, { "epoch": 0.9806320081549439, "grad_norm": 4.77131986618042, "learning_rate": 3.1396559629099574e-05, "loss": 0.2973, "step": 962 }, { "epoch": 0.981651376146789, "grad_norm": 8.40596866607666, "learning_rate": 3.122179954338716e-05, "loss": 0.6026, "step": 963 }, { "epoch": 0.9826707441386341, "grad_norm": 6.705322265625, "learning_rate": 3.1047306124093335e-05, "loss": 0.4026, "step": 964 }, { "epoch": 0.9836901121304791, "grad_norm": 10.35732364654541, "learning_rate": 3.087308184917308e-05, "loss": 0.9181, "step": 965 }, { "epoch": 0.9847094801223242, "grad_norm": 6.806704998016357, "learning_rate": 3.069912919275926e-05, "loss": 0.473, "step": 966 }, { "epoch": 0.9857288481141692, "grad_norm": 10.28345012664795, "learning_rate": 3.0525450625127575e-05, "loss": 0.7152, "step": 967 }, { "epoch": 0.9867482161060143, "grad_norm": 11.785171508789062, "learning_rate": 3.0352048612661416e-05, "loss": 0.9519, "step": 968 }, { "epoch": 0.9877675840978594, "grad_norm": 8.55274772644043, "learning_rate": 3.017892561781682e-05, "loss": 0.5322, "step": 969 }, { "epoch": 0.9887869520897044, "grad_norm": 8.597644805908203, "learning_rate": 3.0006084099087595e-05, "loss": 0.8257, "step": 970 }, { "epoch": 0.9898063200815495, "grad_norm": 6.743808746337891, "learning_rate": 2.983352651097031e-05, "loss": 0.5648, "step": 971 }, { "epoch": 0.9908256880733946, "grad_norm": 10.981080055236816, "learning_rate": 2.9661255303929486e-05, "loss": 0.909, "step": 972 }, { "epoch": 0.9918450560652395, "grad_norm": 9.426305770874023, "learning_rate": 2.948927292436281e-05, "loss": 0.5531, "step": 973 }, { "epoch": 0.9928644240570846, "grad_norm": 5.883917331695557, "learning_rate": 2.9317581814566323e-05, "loss": 0.2709, "step": 974 }, { "epoch": 0.9938837920489296, "grad_norm": 7.118516445159912, "learning_rate": 2.9146184412699855e-05, "loss": 0.7348, "step": 975 }, { "epoch": 0.9949031600407747, "grad_norm": 5.449122428894043, "learning_rate": 2.8975083152752258e-05, "loss": 0.2765, "step": 976 }, { "epoch": 0.9959225280326198, "grad_norm": 7.071670055389404, "learning_rate": 2.880428046450699e-05, "loss": 0.4227, "step": 977 }, { "epoch": 0.9969418960244648, "grad_norm": 4.812356948852539, "learning_rate": 2.863377877350747e-05, "loss": 0.252, "step": 978 }, { "epoch": 0.9979612640163099, "grad_norm": 5.169205188751221, "learning_rate": 2.8463580501022748e-05, "loss": 0.3303, "step": 979 }, { "epoch": 0.9989806320081549, "grad_norm": 7.911106109619141, "learning_rate": 2.8293688064013062e-05, "loss": 0.4636, "step": 980 }, { "epoch": 0.9989806320081549, "eval_Qnli-dev-1024_cosine_accuracy": 0.75, "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.8305908441543579, "eval_Qnli-dev-1024_cosine_ap": 0.7944254663147428, "eval_Qnli-dev-1024_cosine_f1": 0.7474747474747475, "eval_Qnli-dev-1024_cosine_f1_threshold": 0.7933956980705261, "eval_Qnli-dev-1024_cosine_mcc": 0.49179033209958445, "eval_Qnli-dev-1024_cosine_precision": 0.6851851851851852, "eval_Qnli-dev-1024_cosine_recall": 0.8222222222222222, "eval_Qnli-dev_cosine_accuracy": 0.7604166666666666, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7133668661117554, "eval_Qnli-dev_cosine_ap": 0.7661051082646182, "eval_Qnli-dev_cosine_f1": 0.7526881720430108, "eval_Qnli-dev_cosine_f1_threshold": 0.7133668661117554, "eval_Qnli-dev_cosine_mcc": 0.5218535759042912, "eval_Qnli-dev_cosine_precision": 0.7291666666666666, "eval_Qnli-dev_cosine_recall": 0.7777777777777778, "eval_allNLI--triplets-1024_cosine_accuracy": 0.9479166865348816, "eval_allNLI-triplets_cosine_accuracy": 0.9479166865348816, "eval_global_dataset_loss": 0.3123040199279785, "eval_global_dataset_runtime": 103.8412, "eval_global_dataset_samples_per_second": 7.733, "eval_global_dataset_steps_per_second": 0.164, "eval_sequential_score": 0.9479166865348816, "eval_sts-test-1024_pearson_cosine": 0.8847345798228932, "eval_sts-test-1024_spearman_cosine": 0.9153180610643749, "eval_sts-test_pearson_cosine": 0.9064074468577172, "eval_sts-test_spearman_cosine": 0.9206390660127711, "step": 980 }, { "epoch": 1.0, "grad_norm": 12.788724899291992, "learning_rate": 2.8124103875095527e-05, "loss": 1.3317, "step": 981 }, { "epoch": 1.001019367991845, "grad_norm": 4.531240940093994, "learning_rate": 2.7954830342509875e-05, "loss": 0.2939, "step": 982 }, { "epoch": 1.0020387359836902, "grad_norm": 4.971615314483643, "learning_rate": 2.7785869870084213e-05, "loss": 0.3436, "step": 983 }, { "epoch": 1.003058103975535, "grad_norm": 6.3732805252075195, "learning_rate": 2.761722485720099e-05, "loss": 0.6107, "step": 984 }, { "epoch": 1.0040774719673802, "grad_norm": 6.102477550506592, "learning_rate": 2.744889769876282e-05, "loss": 0.4759, "step": 985 }, { "epoch": 1.0050968399592253, "grad_norm": 8.986217498779297, "learning_rate": 2.728089078515854e-05, "loss": 0.4255, "step": 986 }, { "epoch": 1.0061162079510704, "grad_norm": 4.163675308227539, "learning_rate": 2.7113206502229232e-05, "loss": 0.274, "step": 987 }, { "epoch": 1.0071355759429155, "grad_norm": 8.744937896728516, "learning_rate": 2.69458472312343e-05, "loss": 0.7007, "step": 988 }, { "epoch": 1.0081549439347604, "grad_norm": 3.9354403018951416, "learning_rate": 2.6778815348817776e-05, "loss": 0.2389, "step": 989 }, { "epoch": 1.0091743119266054, "grad_norm": 8.15425968170166, "learning_rate": 2.6612113226974443e-05, "loss": 0.6767, "step": 990 }, { "epoch": 1.0101936799184505, "grad_norm": 2.782219648361206, "learning_rate": 2.6445743233016234e-05, "loss": 0.1771, "step": 991 }, { "epoch": 1.0112130479102956, "grad_norm": 7.419161319732666, "learning_rate": 2.6279707729538592e-05, "loss": 0.3375, "step": 992 }, { "epoch": 1.0122324159021407, "grad_norm": 4.217768669128418, "learning_rate": 2.6114009074386846e-05, "loss": 0.3619, "step": 993 }, { "epoch": 1.0132517838939856, "grad_norm": 1.7565678358078003, "learning_rate": 2.5948649620622868e-05, "loss": 0.1643, "step": 994 }, { "epoch": 1.0142711518858307, "grad_norm": 6.78450870513916, "learning_rate": 2.5783631716491553e-05, "loss": 0.5588, "step": 995 }, { "epoch": 1.0152905198776758, "grad_norm": 6.4786577224731445, "learning_rate": 2.5618957705387465e-05, "loss": 0.4759, "step": 996 }, { "epoch": 1.016309887869521, "grad_norm": 3.9980854988098145, "learning_rate": 2.5454629925821673e-05, "loss": 0.2266, "step": 997 }, { "epoch": 1.017329255861366, "grad_norm": 4.7267608642578125, "learning_rate": 2.5290650711388374e-05, "loss": 0.3953, "step": 998 }, { "epoch": 1.018348623853211, "grad_norm": 3.118359088897705, "learning_rate": 2.5127022390731913e-05, "loss": 0.2146, "step": 999 }, { "epoch": 1.019367991845056, "grad_norm": 7.00241231918335, "learning_rate": 2.4963747287513633e-05, "loss": 0.8455, "step": 1000 }, { "epoch": 1.020387359836901, "grad_norm": 4.788176536560059, "learning_rate": 2.4800827720378843e-05, "loss": 0.3804, "step": 1001 }, { "epoch": 1.0214067278287462, "grad_norm": 6.446220397949219, "learning_rate": 2.4638266002923983e-05, "loss": 0.429, "step": 1002 }, { "epoch": 1.0224260958205913, "grad_norm": 8.392274856567383, "learning_rate": 2.4476064443663714e-05, "loss": 0.5209, "step": 1003 }, { "epoch": 1.0234454638124364, "grad_norm": 11.21457290649414, "learning_rate": 2.431422534599815e-05, "loss": 0.7556, "step": 1004 }, { "epoch": 1.0244648318042813, "grad_norm": 5.634324073791504, "learning_rate": 2.415275100818013e-05, "loss": 0.4399, "step": 1005 }, { "epoch": 1.0254841997961264, "grad_norm": 6.49767541885376, "learning_rate": 2.3991643723282576e-05, "loss": 0.3612, "step": 1006 }, { "epoch": 1.0265035677879715, "grad_norm": 3.142667531967163, "learning_rate": 2.3830905779165997e-05, "loss": 0.175, "step": 1007 }, { "epoch": 1.0275229357798166, "grad_norm": 2.9294066429138184, "learning_rate": 2.3670539458445883e-05, "loss": 0.1849, "step": 1008 }, { "epoch": 1.0285423037716617, "grad_norm": 10.674302101135254, "learning_rate": 2.3510547038460405e-05, "loss": 1.0634, "step": 1009 }, { "epoch": 1.0295616717635065, "grad_norm": 5.9518818855285645, "learning_rate": 2.3350930791238012e-05, "loss": 0.6881, "step": 1010 }, { "epoch": 1.0305810397553516, "grad_norm": 4.818733215332031, "learning_rate": 2.319169298346518e-05, "loss": 0.2433, "step": 1011 }, { "epoch": 1.0316004077471967, "grad_norm": 4.729939937591553, "learning_rate": 2.303283587645424e-05, "loss": 0.2113, "step": 1012 }, { "epoch": 1.0326197757390418, "grad_norm": 2.8469977378845215, "learning_rate": 2.2874361726111194e-05, "loss": 0.1412, "step": 1013 }, { "epoch": 1.033639143730887, "grad_norm": 6.6002655029296875, "learning_rate": 2.2716272782903806e-05, "loss": 0.4273, "step": 1014 }, { "epoch": 1.0346585117227318, "grad_norm": 5.73666524887085, "learning_rate": 2.25585712918295e-05, "loss": 0.2649, "step": 1015 }, { "epoch": 1.035677879714577, "grad_norm": 10.085776329040527, "learning_rate": 2.2401259492383593e-05, "loss": 0.6387, "step": 1016 }, { "epoch": 1.036697247706422, "grad_norm": 6.065640449523926, "learning_rate": 2.224433961852747e-05, "loss": 0.2274, "step": 1017 }, { "epoch": 1.0377166156982671, "grad_norm": 4.713268280029297, "learning_rate": 2.2087813898656774e-05, "loss": 0.2802, "step": 1018 }, { "epoch": 1.0387359836901122, "grad_norm": 2.990431547164917, "learning_rate": 2.1931684555569877e-05, "loss": 0.1943, "step": 1019 }, { "epoch": 1.039755351681957, "grad_norm": 8.173871040344238, "learning_rate": 2.1775953806436265e-05, "loss": 0.4127, "step": 1020 }, { "epoch": 1.0407747196738022, "grad_norm": 3.7410459518432617, "learning_rate": 2.1620623862765006e-05, "loss": 0.1905, "step": 1021 }, { "epoch": 1.0417940876656473, "grad_norm": 6.727060794830322, "learning_rate": 2.146569693037343e-05, "loss": 0.3222, "step": 1022 }, { "epoch": 1.0428134556574924, "grad_norm": 4.430773735046387, "learning_rate": 2.1311175209355755e-05, "loss": 0.1848, "step": 1023 }, { "epoch": 1.0438328236493375, "grad_norm": 2.9469778537750244, "learning_rate": 2.115706089405185e-05, "loss": 0.177, "step": 1024 }, { "epoch": 1.0448521916411824, "grad_norm": 7.147342681884766, "learning_rate": 2.1003356173016098e-05, "loss": 0.2783, "step": 1025 }, { "epoch": 1.0458715596330275, "grad_norm": 8.615937232971191, "learning_rate": 2.0850063228986234e-05, "loss": 0.3611, "step": 1026 }, { "epoch": 1.0468909276248726, "grad_norm": 5.674496173858643, "learning_rate": 2.0697184238852468e-05, "loss": 0.2966, "step": 1027 }, { "epoch": 1.0479102956167177, "grad_norm": 6.515853404998779, "learning_rate": 2.054472137362649e-05, "loss": 0.2342, "step": 1028 }, { "epoch": 1.0489296636085628, "grad_norm": 2.8067984580993652, "learning_rate": 2.0392676798410677e-05, "loss": 0.1982, "step": 1029 }, { "epoch": 1.0499490316004076, "grad_norm": 5.038998126983643, "learning_rate": 2.0241052672367327e-05, "loss": 0.2343, "step": 1030 }, { "epoch": 1.0509683995922527, "grad_norm": 8.588050842285156, "learning_rate": 2.0089851148687965e-05, "loss": 0.5463, "step": 1031 }, { "epoch": 1.0519877675840978, "grad_norm": 6.627236843109131, "learning_rate": 1.993907437456285e-05, "loss": 0.5568, "step": 1032 }, { "epoch": 1.053007135575943, "grad_norm": 7.842597484588623, "learning_rate": 1.9788724491150423e-05, "loss": 0.5468, "step": 1033 }, { "epoch": 1.054026503567788, "grad_norm": 3.9404547214508057, "learning_rate": 1.9638803633546933e-05, "loss": 0.2003, "step": 1034 }, { "epoch": 1.0550458715596331, "grad_norm": 6.290144443511963, "learning_rate": 1.948931393075603e-05, "loss": 0.2297, "step": 1035 }, { "epoch": 1.056065239551478, "grad_norm": 6.990847110748291, "learning_rate": 1.9340257505658667e-05, "loss": 0.5335, "step": 1036 }, { "epoch": 1.0570846075433231, "grad_norm": 7.3486247062683105, "learning_rate": 1.9191636474982883e-05, "loss": 0.5115, "step": 1037 }, { "epoch": 1.0581039755351682, "grad_norm": 6.156608581542969, "learning_rate": 1.9043452949273687e-05, "loss": 0.4874, "step": 1038 }, { "epoch": 1.0591233435270133, "grad_norm": 8.564006805419922, "learning_rate": 1.889570903286322e-05, "loss": 0.8818, "step": 1039 }, { "epoch": 1.0601427115188584, "grad_norm": 3.6055350303649902, "learning_rate": 1.8748406823840726e-05, "loss": 0.1726, "step": 1040 }, { "epoch": 1.0611620795107033, "grad_norm": 5.525221824645996, "learning_rate": 1.860154841402288e-05, "loss": 0.3581, "step": 1041 }, { "epoch": 1.0621814475025484, "grad_norm": 3.264824390411377, "learning_rate": 1.8455135888924013e-05, "loss": 0.1545, "step": 1042 }, { "epoch": 1.0632008154943935, "grad_norm": 6.2633891105651855, "learning_rate": 1.8309171327726522e-05, "loss": 0.6471, "step": 1043 }, { "epoch": 1.0642201834862386, "grad_norm": 8.516809463500977, "learning_rate": 1.816365680325134e-05, "loss": 0.8047, "step": 1044 }, { "epoch": 1.0652395514780837, "grad_norm": 6.006096839904785, "learning_rate": 1.8018594381928444e-05, "loss": 0.3753, "step": 1045 }, { "epoch": 1.0662589194699286, "grad_norm": 8.395868301391602, "learning_rate": 1.7873986123767648e-05, "loss": 0.4489, "step": 1046 }, { "epoch": 1.0672782874617737, "grad_norm": 2.2208871841430664, "learning_rate": 1.7729834082329184e-05, "loss": 0.133, "step": 1047 }, { "epoch": 1.0682976554536188, "grad_norm": 4.6480913162231445, "learning_rate": 1.7586140304694655e-05, "loss": 0.3512, "step": 1048 }, { "epoch": 1.0693170234454639, "grad_norm": 5.258199214935303, "learning_rate": 1.7442906831437927e-05, "loss": 0.2497, "step": 1049 }, { "epoch": 1.070336391437309, "grad_norm": 5.950115203857422, "learning_rate": 1.730013569659616e-05, "loss": 0.298, "step": 1050 }, { "epoch": 1.0713557594291538, "grad_norm": 5.7246994972229, "learning_rate": 1.715782892764092e-05, "loss": 0.2226, "step": 1051 }, { "epoch": 1.072375127420999, "grad_norm": 4.580132484436035, "learning_rate": 1.7015988545449318e-05, "loss": 0.3375, "step": 1052 }, { "epoch": 1.073394495412844, "grad_norm": 3.1227149963378906, "learning_rate": 1.6874616564275463e-05, "loss": 0.2353, "step": 1053 }, { "epoch": 1.0744138634046891, "grad_norm": 6.9805908203125, "learning_rate": 1.673371499172174e-05, "loss": 0.3775, "step": 1054 }, { "epoch": 1.0754332313965342, "grad_norm": 5.377047538757324, "learning_rate": 1.6593285828710298e-05, "loss": 0.4914, "step": 1055 }, { "epoch": 1.0764525993883791, "grad_norm": 5.249961853027344, "learning_rate": 1.6453331069454718e-05, "loss": 0.2293, "step": 1056 }, { "epoch": 1.0774719673802242, "grad_norm": 4.464038848876953, "learning_rate": 1.6313852701431597e-05, "loss": 0.3663, "step": 1057 }, { "epoch": 1.0784913353720693, "grad_norm": 3.9056015014648438, "learning_rate": 1.6174852705352418e-05, "loss": 0.4417, "step": 1058 }, { "epoch": 1.0795107033639144, "grad_norm": 2.9351861476898193, "learning_rate": 1.603633305513536e-05, "loss": 0.1897, "step": 1059 }, { "epoch": 1.0805300713557595, "grad_norm": 7.004980564117432, "learning_rate": 1.5898295717877255e-05, "loss": 0.6545, "step": 1060 }, { "epoch": 1.0815494393476044, "grad_norm": 3.4823687076568604, "learning_rate": 1.5760742653825706e-05, "loss": 0.2848, "step": 1061 }, { "epoch": 1.0825688073394495, "grad_norm": 7.051894187927246, "learning_rate": 1.5623675816351224e-05, "loss": 0.4413, "step": 1062 }, { "epoch": 1.0835881753312946, "grad_norm": 4.416322708129883, "learning_rate": 1.5487097151919494e-05, "loss": 0.2769, "step": 1063 }, { "epoch": 1.0846075433231397, "grad_norm": 3.4366087913513184, "learning_rate": 1.5351008600063728e-05, "loss": 0.2127, "step": 1064 }, { "epoch": 1.0856269113149848, "grad_norm": 4.421938419342041, "learning_rate": 1.5215412093357084e-05, "loss": 0.3111, "step": 1065 }, { "epoch": 1.0866462793068297, "grad_norm": 3.5228912830352783, "learning_rate": 1.5080309557385303e-05, "loss": 0.2097, "step": 1066 }, { "epoch": 1.0876656472986748, "grad_norm": 4.430333614349365, "learning_rate": 1.4945702910719334e-05, "loss": 0.2142, "step": 1067 }, { "epoch": 1.0886850152905199, "grad_norm": 3.3618974685668945, "learning_rate": 1.4811594064888019e-05, "loss": 0.3016, "step": 1068 }, { "epoch": 1.089704383282365, "grad_norm": 6.342488765716553, "learning_rate": 1.467798492435104e-05, "loss": 0.1612, "step": 1069 }, { "epoch": 1.09072375127421, "grad_norm": 5.643674850463867, "learning_rate": 1.4544877386471856e-05, "loss": 0.2357, "step": 1070 }, { "epoch": 1.091743119266055, "grad_norm": 6.614534378051758, "learning_rate": 1.4412273341490706e-05, "loss": 0.6814, "step": 1071 }, { "epoch": 1.0927624872579, "grad_norm": 1.9248261451721191, "learning_rate": 1.4280174672497837e-05, "loss": 0.0978, "step": 1072 }, { "epoch": 1.0937818552497451, "grad_norm": 9.543230056762695, "learning_rate": 1.4148583255406684e-05, "loss": 0.8243, "step": 1073 }, { "epoch": 1.0948012232415902, "grad_norm": 3.6566359996795654, "learning_rate": 1.4017500958927298e-05, "loss": 0.1563, "step": 1074 }, { "epoch": 1.0958205912334353, "grad_norm": 7.203381538391113, "learning_rate": 1.3886929644539798e-05, "loss": 0.2596, "step": 1075 }, { "epoch": 1.0968399592252802, "grad_norm": 3.83259654045105, "learning_rate": 1.3756871166467894e-05, "loss": 0.1584, "step": 1076 }, { "epoch": 1.0978593272171253, "grad_norm": 6.46610164642334, "learning_rate": 1.3627327371652643e-05, "loss": 0.4703, "step": 1077 }, { "epoch": 1.0988786952089704, "grad_norm": 11.403766632080078, "learning_rate": 1.3498300099726042e-05, "loss": 1.0745, "step": 1078 }, { "epoch": 1.0988786952089704, "eval_Qnli-dev-1024_cosine_accuracy": 0.75, "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.8006778955459595, "eval_Qnli-dev-1024_cosine_ap": 0.7824068286827124, "eval_Qnli-dev-1024_cosine_f1": 0.7289719626168225, "eval_Qnli-dev-1024_cosine_f1_threshold": 0.7280842661857605, "eval_Qnli-dev-1024_cosine_mcc": 0.43373226132862797, "eval_Qnli-dev-1024_cosine_precision": 0.6290322580645161, "eval_Qnli-dev-1024_cosine_recall": 0.8666666666666667, "eval_Qnli-dev_cosine_accuracy": 0.7395833333333334, "eval_Qnli-dev_cosine_accuracy_threshold": 0.6900060772895813, "eval_Qnli-dev_cosine_ap": 0.7604401746827563, "eval_Qnli-dev_cosine_f1": 0.7422680412371134, "eval_Qnli-dev_cosine_f1_threshold": 0.6807612776756287, "eval_Qnli-dev_cosine_mcc": 0.48701780569984915, "eval_Qnli-dev_cosine_precision": 0.6923076923076923, "eval_Qnli-dev_cosine_recall": 0.8, "eval_allNLI--triplets-1024_cosine_accuracy": 0.9583333134651184, "eval_allNLI-triplets_cosine_accuracy": 0.9479166865348816, "eval_global_dataset_loss": 0.3292054533958435, "eval_global_dataset_runtime": 103.8809, "eval_global_dataset_samples_per_second": 7.73, "eval_global_dataset_steps_per_second": 0.164, "eval_sequential_score": 0.9583333134651184, "eval_sts-test-1024_pearson_cosine": 0.8872344763338829, "eval_sts-test-1024_spearman_cosine": 0.913351335857396, "eval_sts-test_pearson_cosine": 0.9070699316238475, "eval_sts-test_spearman_cosine": 0.9203335932694187, "step": 1078 }, { "epoch": 1.0998980632008155, "grad_norm": 5.55651330947876, "learning_rate": 1.3369791182985136e-05, "loss": 0.3291, "step": 1079 }, { "epoch": 1.1009174311926606, "grad_norm": 4.600091934204102, "learning_rate": 1.3241802446365853e-05, "loss": 0.2577, "step": 1080 }, { "epoch": 1.1019367991845055, "grad_norm": 3.14005446434021, "learning_rate": 1.3114335707417108e-05, "loss": 0.3184, "step": 1081 }, { "epoch": 1.1029561671763506, "grad_norm": 8.673876762390137, "learning_rate": 1.2987392776275025e-05, "loss": 0.5004, "step": 1082 }, { "epoch": 1.1039755351681957, "grad_norm": 3.660937547683716, "learning_rate": 1.286097545563718e-05, "loss": 0.1591, "step": 1083 }, { "epoch": 1.1049949031600408, "grad_norm": 6.43077278137207, "learning_rate": 1.2735085540737063e-05, "loss": 0.2775, "step": 1084 }, { "epoch": 1.1060142711518859, "grad_norm": 5.092467308044434, "learning_rate": 1.2609724819318542e-05, "loss": 0.4079, "step": 1085 }, { "epoch": 1.107033639143731, "grad_norm": 9.57333755493164, "learning_rate": 1.2484895071610486e-05, "loss": 0.7756, "step": 1086 }, { "epoch": 1.1080530071355759, "grad_norm": 8.876251220703125, "learning_rate": 1.236059807030151e-05, "loss": 0.7169, "step": 1087 }, { "epoch": 1.109072375127421, "grad_norm": 2.4624123573303223, "learning_rate": 1.2236835580514717e-05, "loss": 0.1473, "step": 1088 }, { "epoch": 1.110091743119266, "grad_norm": 6.119423866271973, "learning_rate": 1.2113609359782757e-05, "loss": 0.4862, "step": 1089 }, { "epoch": 1.1111111111111112, "grad_norm": 7.974935054779053, "learning_rate": 1.1990921158022766e-05, "loss": 0.6355, "step": 1090 }, { "epoch": 1.1121304791029563, "grad_norm": 4.422234535217285, "learning_rate": 1.1868772717511568e-05, "loss": 0.2046, "step": 1091 }, { "epoch": 1.1131498470948011, "grad_norm": 3.5344936847686768, "learning_rate": 1.1747165772860941e-05, "loss": 0.2609, "step": 1092 }, { "epoch": 1.1141692150866462, "grad_norm": 10.226187705993652, "learning_rate": 1.1626102050992877e-05, "loss": 0.6422, "step": 1093 }, { "epoch": 1.1151885830784913, "grad_norm": 4.9040961265563965, "learning_rate": 1.1505583271115228e-05, "loss": 0.2296, "step": 1094 }, { "epoch": 1.1162079510703364, "grad_norm": 3.2139878273010254, "learning_rate": 1.1385611144697178e-05, "loss": 0.2221, "step": 1095 }, { "epoch": 1.1172273190621815, "grad_norm": 2.8489925861358643, "learning_rate": 1.1266187375444964e-05, "loss": 0.142, "step": 1096 }, { "epoch": 1.1182466870540264, "grad_norm": 4.342340469360352, "learning_rate": 1.1147313659277715e-05, "loss": 0.4458, "step": 1097 }, { "epoch": 1.1192660550458715, "grad_norm": 7.662566661834717, "learning_rate": 1.1028991684303259e-05, "loss": 0.6697, "step": 1098 }, { "epoch": 1.1202854230377166, "grad_norm": 6.858183860778809, "learning_rate": 1.0911223130794307e-05, "loss": 0.3055, "step": 1099 }, { "epoch": 1.1213047910295617, "grad_norm": 7.914067268371582, "learning_rate": 1.0794009671164484e-05, "loss": 0.7791, "step": 1100 }, { "epoch": 1.1223241590214068, "grad_norm": 3.899747610092163, "learning_rate": 1.0677352969944627e-05, "loss": 0.2956, "step": 1101 }, { "epoch": 1.1233435270132517, "grad_norm": 6.076767444610596, "learning_rate": 1.0561254683759114e-05, "loss": 0.1481, "step": 1102 }, { "epoch": 1.1243628950050968, "grad_norm": 8.103065490722656, "learning_rate": 1.0445716461302347e-05, "loss": 0.6428, "step": 1103 }, { "epoch": 1.1253822629969419, "grad_norm": 5.3769636154174805, "learning_rate": 1.033073994331536e-05, "loss": 0.5238, "step": 1104 }, { "epoch": 1.126401630988787, "grad_norm": 7.144674777984619, "learning_rate": 1.0216326762562512e-05, "loss": 0.6283, "step": 1105 }, { "epoch": 1.127420998980632, "grad_norm": 8.02340030670166, "learning_rate": 1.010247854380828e-05, "loss": 0.442, "step": 1106 }, { "epoch": 1.1284403669724772, "grad_norm": 3.026409149169922, "learning_rate": 9.989196903794217e-06, "loss": 0.2222, "step": 1107 }, { "epoch": 1.129459734964322, "grad_norm": 6.199506759643555, "learning_rate": 9.876483451215945e-06, "loss": 0.3275, "step": 1108 }, { "epoch": 1.1304791029561672, "grad_norm": 3.136950969696045, "learning_rate": 9.764339786700372e-06, "loss": 0.1822, "step": 1109 }, { "epoch": 1.1314984709480123, "grad_norm": 7.81477689743042, "learning_rate": 9.652767502782916e-06, "loss": 0.4456, "step": 1110 }, { "epoch": 1.1325178389398574, "grad_norm": 4.547238349914551, "learning_rate": 9.541768183884913e-06, "loss": 0.3127, "step": 1111 }, { "epoch": 1.1335372069317025, "grad_norm": 4.855820655822754, "learning_rate": 9.431343406291115e-06, "loss": 0.2407, "step": 1112 }, { "epoch": 1.1345565749235473, "grad_norm": 4.511462211608887, "learning_rate": 9.321494738127257e-06, "loss": 0.2194, "step": 1113 }, { "epoch": 1.1355759429153924, "grad_norm": 12.011505126953125, "learning_rate": 9.212223739337883e-06, "loss": 0.9654, "step": 1114 }, { "epoch": 1.1365953109072375, "grad_norm": 8.190558433532715, "learning_rate": 9.103531961664118e-06, "loss": 0.6017, "step": 1115 }, { "epoch": 1.1376146788990826, "grad_norm": 7.281464576721191, "learning_rate": 8.99542094862164e-06, "loss": 0.2722, "step": 1116 }, { "epoch": 1.1386340468909277, "grad_norm": 2.0321619510650635, "learning_rate": 8.887892235478817e-06, "loss": 0.1289, "step": 1117 }, { "epoch": 1.1396534148827726, "grad_norm": 9.805882453918457, "learning_rate": 8.780947349234797e-06, "loss": 0.6156, "step": 1118 }, { "epoch": 1.1406727828746177, "grad_norm": 5.596035003662109, "learning_rate": 8.67458780859795e-06, "loss": 0.3163, "step": 1119 }, { "epoch": 1.1416921508664628, "grad_norm": 5.5199785232543945, "learning_rate": 8.568815123964225e-06, "loss": 0.3798, "step": 1120 }, { "epoch": 1.142711518858308, "grad_norm": 6.384104251861572, "learning_rate": 8.463630797395705e-06, "loss": 0.3098, "step": 1121 }, { "epoch": 1.143730886850153, "grad_norm": 3.7240169048309326, "learning_rate": 8.35903632259929e-06, "loss": 0.1738, "step": 1122 }, { "epoch": 1.1447502548419979, "grad_norm": 4.985114097595215, "learning_rate": 8.255033184905481e-06, "loss": 0.3891, "step": 1123 }, { "epoch": 1.145769622833843, "grad_norm": 2.9914612770080566, "learning_rate": 8.151622861247304e-06, "loss": 0.2051, "step": 1124 }, { "epoch": 1.146788990825688, "grad_norm": 7.231305122375488, "learning_rate": 8.04880682013931e-06, "loss": 0.4286, "step": 1125 }, { "epoch": 1.1478083588175332, "grad_norm": 4.831548690795898, "learning_rate": 7.946586521656751e-06, "loss": 0.4055, "step": 1126 }, { "epoch": 1.1488277268093783, "grad_norm": 7.110738754272461, "learning_rate": 7.84496341741478e-06, "loss": 0.6177, "step": 1127 }, { "epoch": 1.1498470948012232, "grad_norm": 4.9150919914245605, "learning_rate": 7.743938950547925e-06, "loss": 0.2766, "step": 1128 }, { "epoch": 1.1508664627930683, "grad_norm": 3.1030569076538086, "learning_rate": 7.643514555689552e-06, "loss": 0.1609, "step": 1129 }, { "epoch": 1.1518858307849134, "grad_norm": 6.362361431121826, "learning_rate": 7.543691658951479e-06, "loss": 0.2926, "step": 1130 }, { "epoch": 1.1529051987767585, "grad_norm": 6.581395626068115, "learning_rate": 7.444471677903775e-06, "loss": 0.3756, "step": 1131 }, { "epoch": 1.1539245667686036, "grad_norm": 7.190741062164307, "learning_rate": 7.345856021554509e-06, "loss": 0.3885, "step": 1132 }, { "epoch": 1.1549439347604484, "grad_norm": 6.979515552520752, "learning_rate": 7.247846090329913e-06, "loss": 0.2404, "step": 1133 }, { "epoch": 1.1559633027522935, "grad_norm": 5.002467632293701, "learning_rate": 7.150443276054369e-06, "loss": 0.2946, "step": 1134 }, { "epoch": 1.1569826707441386, "grad_norm": 3.771885395050049, "learning_rate": 7.053648961930681e-06, "loss": 0.4079, "step": 1135 }, { "epoch": 1.1580020387359837, "grad_norm": 8.6467866897583, "learning_rate": 6.9574645225204735e-06, "loss": 0.3785, "step": 1136 }, { "epoch": 1.1590214067278288, "grad_norm": 6.50823974609375, "learning_rate": 6.861891323724551e-06, "loss": 0.2116, "step": 1137 }, { "epoch": 1.1600407747196737, "grad_norm": 3.70011568069458, "learning_rate": 6.766930722763642e-06, "loss": 0.2715, "step": 1138 }, { "epoch": 1.1610601427115188, "grad_norm": 3.5144193172454834, "learning_rate": 6.672584068159055e-06, "loss": 0.1894, "step": 1139 }, { "epoch": 1.162079510703364, "grad_norm": 9.98049545288086, "learning_rate": 6.578852699713539e-06, "loss": 0.4822, "step": 1140 }, { "epoch": 1.163098878695209, "grad_norm": 5.186913967132568, "learning_rate": 6.4857379484922486e-06, "loss": 0.2893, "step": 1141 }, { "epoch": 1.164118246687054, "grad_norm": 11.172896385192871, "learning_rate": 6.3932411368038455e-06, "loss": 0.993, "step": 1142 }, { "epoch": 1.165137614678899, "grad_norm": 8.763799667358398, "learning_rate": 6.3013635781817234e-06, "loss": 0.5991, "step": 1143 }, { "epoch": 1.166156982670744, "grad_norm": 8.425676345825195, "learning_rate": 6.210106577365382e-06, "loss": 0.7188, "step": 1144 }, { "epoch": 1.1671763506625892, "grad_norm": 3.8981945514678955, "learning_rate": 6.119471430281837e-06, "loss": 0.1915, "step": 1145 }, { "epoch": 1.1681957186544343, "grad_norm": 9.766255378723145, "learning_rate": 6.0294594240272895e-06, "loss": 0.7058, "step": 1146 }, { "epoch": 1.1692150866462794, "grad_norm": 6.730464935302734, "learning_rate": 5.940071836848759e-06, "loss": 0.5334, "step": 1147 }, { "epoch": 1.1702344546381243, "grad_norm": 5.2849884033203125, "learning_rate": 5.851309938126031e-06, "loss": 0.4142, "step": 1148 }, { "epoch": 1.1712538226299694, "grad_norm": 9.674934387207031, "learning_rate": 5.763174988353565e-06, "loss": 0.4762, "step": 1149 }, { "epoch": 1.1722731906218145, "grad_norm": 8.052671432495117, "learning_rate": 5.675668239122606e-06, "loss": 0.4203, "step": 1150 }, { "epoch": 1.1732925586136596, "grad_norm": 5.496038436889648, "learning_rate": 5.588790933103444e-06, "loss": 0.2975, "step": 1151 }, { "epoch": 1.1743119266055047, "grad_norm": 6.982463359832764, "learning_rate": 5.502544304027701e-06, "loss": 0.3294, "step": 1152 }, { "epoch": 1.1753312945973495, "grad_norm": 6.534660816192627, "learning_rate": 5.41692957667086e-06, "loss": 0.4958, "step": 1153 }, { "epoch": 1.1763506625891946, "grad_norm": 5.204538345336914, "learning_rate": 5.3319479668348775e-06, "loss": 0.2991, "step": 1154 }, { "epoch": 1.1773700305810397, "grad_norm": 6.391998767852783, "learning_rate": 5.247600681330905e-06, "loss": 0.3466, "step": 1155 }, { "epoch": 1.1783893985728848, "grad_norm": 5.729480266571045, "learning_rate": 5.16388891796214e-06, "loss": 0.3326, "step": 1156 }, { "epoch": 1.17940876656473, "grad_norm": 7.996901988983154, "learning_rate": 5.0808138655068115e-06, "loss": 0.4252, "step": 1157 }, { "epoch": 1.1804281345565748, "grad_norm": 1.6697102785110474, "learning_rate": 4.99837670370133e-06, "loss": 0.1011, "step": 1158 }, { "epoch": 1.18144750254842, "grad_norm": 6.206373691558838, "learning_rate": 4.916578603223515e-06, "loss": 0.3505, "step": 1159 }, { "epoch": 1.182466870540265, "grad_norm": 8.138467788696289, "learning_rate": 4.835420725675965e-06, "loss": 0.2831, "step": 1160 }, { "epoch": 1.18348623853211, "grad_norm": 4.128909587860107, "learning_rate": 4.754904223569584e-06, "loss": 0.2483, "step": 1161 }, { "epoch": 1.1845056065239552, "grad_norm": 5.585041522979736, "learning_rate": 4.67503024030716e-06, "loss": 0.2343, "step": 1162 }, { "epoch": 1.1855249745158003, "grad_norm": 9.036812782287598, "learning_rate": 4.5957999101672145e-06, "loss": 0.4812, "step": 1163 }, { "epoch": 1.1865443425076452, "grad_norm": 7.6726765632629395, "learning_rate": 4.517214358287825e-06, "loss": 0.6771, "step": 1164 }, { "epoch": 1.1875637104994903, "grad_norm": 9.784526824951172, "learning_rate": 4.439274700650659e-06, "loss": 0.6021, "step": 1165 }, { "epoch": 1.1885830784913354, "grad_norm": 4.780755996704102, "learning_rate": 4.361982044065166e-06, "loss": 0.3105, "step": 1166 }, { "epoch": 1.1896024464831805, "grad_norm": 5.239990711212158, "learning_rate": 4.2853374861527905e-06, "loss": 0.3328, "step": 1167 }, { "epoch": 1.1906218144750256, "grad_norm": 8.040908813476562, "learning_rate": 4.209342115331455e-06, "loss": 0.6799, "step": 1168 }, { "epoch": 1.1916411824668705, "grad_norm": 9.09835147857666, "learning_rate": 4.133997010800072e-06, "loss": 0.3449, "step": 1169 }, { "epoch": 1.1926605504587156, "grad_norm": 1.4003539085388184, "learning_rate": 4.0593032425231995e-06, "loss": 0.0486, "step": 1170 }, { "epoch": 1.1936799184505607, "grad_norm": 4.901974201202393, "learning_rate": 3.985261871215906e-06, "loss": 0.216, "step": 1171 }, { "epoch": 1.1946992864424058, "grad_norm": 4.057441711425781, "learning_rate": 3.9118739483285985e-06, "loss": 0.241, "step": 1172 }, { "epoch": 1.1957186544342508, "grad_norm": 3.083153247833252, "learning_rate": 3.83914051603223e-06, "loss": 0.1816, "step": 1173 }, { "epoch": 1.1967380224260957, "grad_norm": 4.460226535797119, "learning_rate": 3.767062607203392e-06, "loss": 0.1868, "step": 1174 }, { "epoch": 1.1977573904179408, "grad_norm": 4.221453666687012, "learning_rate": 3.695641245409709e-06, "loss": 0.2032, "step": 1175 }, { "epoch": 1.198776758409786, "grad_norm": 4.9512434005737305, "learning_rate": 3.624877444895275e-06, "loss": 0.3835, "step": 1176 }, { "epoch": 1.198776758409786, "eval_Qnli-dev-1024_cosine_accuracy": 0.75, "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.7976787090301514, "eval_Qnli-dev-1024_cosine_ap": 0.7822748522324331, "eval_Qnli-dev-1024_cosine_f1": 0.7238095238095237, "eval_Qnli-dev-1024_cosine_f1_threshold": 0.728473961353302, "eval_Qnli-dev-1024_cosine_mcc": 0.42578476395267345, "eval_Qnli-dev-1024_cosine_precision": 0.6333333333333333, "eval_Qnli-dev-1024_cosine_recall": 0.8444444444444444, "eval_Qnli-dev_cosine_accuracy": 0.75, "eval_Qnli-dev_cosine_accuracy_threshold": 0.6868179440498352, "eval_Qnli-dev_cosine_ap": 0.7613974691552876, "eval_Qnli-dev_cosine_f1": 0.7422680412371134, "eval_Qnli-dev_cosine_f1_threshold": 0.6748286485671997, "eval_Qnli-dev_cosine_mcc": 0.48701780569984915, "eval_Qnli-dev_cosine_precision": 0.6923076923076923, "eval_Qnli-dev_cosine_recall": 0.8, "eval_allNLI--triplets-1024_cosine_accuracy": 0.9583333134651184, "eval_allNLI-triplets_cosine_accuracy": 0.9479166865348816, "eval_global_dataset_loss": 0.31731289625167847, "eval_global_dataset_runtime": 103.9541, "eval_global_dataset_samples_per_second": 7.725, "eval_global_dataset_steps_per_second": 0.164, "eval_sequential_score": 0.9583333134651184, "eval_sts-test-1024_pearson_cosine": 0.8887238353668264, "eval_sts-test-1024_spearman_cosine": 0.9149437700482935, "eval_sts-test_pearson_cosine": 0.9069731992077504, "eval_sts-test_spearman_cosine": 0.9201751537259693, "step": 1176 }, { "epoch": 1.199796126401631, "grad_norm": 3.4483885765075684, "learning_rate": 3.554772210566221e-06, "loss": 0.1712, "step": 1177 }, { "epoch": 1.2008154943934761, "grad_norm": 2.9189963340759277, "learning_rate": 3.4853265379765133e-06, "loss": 0.2074, "step": 1178 }, { "epoch": 1.2018348623853212, "grad_norm": 7.722777366638184, "learning_rate": 3.4165414133137728e-06, "loss": 0.6232, "step": 1179 }, { "epoch": 1.202854230377166, "grad_norm": 5.288763046264648, "learning_rate": 3.348417813385274e-06, "loss": 0.4244, "step": 1180 }, { "epoch": 1.2038735983690112, "grad_norm": 9.635140419006348, "learning_rate": 3.2809567056040937e-06, "loss": 0.3264, "step": 1181 }, { "epoch": 1.2048929663608563, "grad_norm": 8.942253112792969, "learning_rate": 3.2141590479753236e-06, "loss": 0.608, "step": 1182 }, { "epoch": 1.2059123343527014, "grad_norm": 6.490620136260986, "learning_rate": 3.1480257890825205e-06, "loss": 0.305, "step": 1183 }, { "epoch": 1.2069317023445465, "grad_norm": 4.955615043640137, "learning_rate": 3.082557868074221e-06, "loss": 0.275, "step": 1184 }, { "epoch": 1.2079510703363914, "grad_norm": 7.741005897521973, "learning_rate": 3.0177562146505856e-06, "loss": 0.329, "step": 1185 }, { "epoch": 1.2089704383282365, "grad_norm": 5.985404968261719, "learning_rate": 2.953621749050206e-06, "loss": 0.7011, "step": 1186 }, { "epoch": 1.2099898063200816, "grad_norm": 7.798089027404785, "learning_rate": 2.8901553820370463e-06, "loss": 0.4506, "step": 1187 }, { "epoch": 1.2110091743119267, "grad_norm": 1.5254271030426025, "learning_rate": 2.827358014887499e-06, "loss": 0.1123, "step": 1188 }, { "epoch": 1.2120285423037718, "grad_norm": 9.104569435119629, "learning_rate": 2.7652305393775947e-06, "loss": 0.407, "step": 1189 }, { "epoch": 1.2130479102956166, "grad_norm": 2.4905483722686768, "learning_rate": 2.70377383777034e-06, "loss": 0.1889, "step": 1190 }, { "epoch": 1.2140672782874617, "grad_norm": 5.599071979522705, "learning_rate": 2.6429887828031407e-06, "loss": 0.4215, "step": 1191 }, { "epoch": 1.2150866462793068, "grad_norm": 3.8303184509277344, "learning_rate": 2.5828762376755024e-06, "loss": 0.21, "step": 1192 }, { "epoch": 1.216106014271152, "grad_norm": 8.759214401245117, "learning_rate": 2.523437056036687e-06, "loss": 0.3706, "step": 1193 }, { "epoch": 1.217125382262997, "grad_norm": 5.240026950836182, "learning_rate": 2.4646720819736344e-06, "loss": 0.3331, "step": 1194 }, { "epoch": 1.218144750254842, "grad_norm": 6.483129024505615, "learning_rate": 2.4065821499989647e-06, "loss": 0.5147, "step": 1195 }, { "epoch": 1.219164118246687, "grad_norm": 8.765278816223145, "learning_rate": 2.3491680850391105e-06, "loss": 0.4573, "step": 1196 }, { "epoch": 1.2201834862385321, "grad_norm": 6.686453342437744, "learning_rate": 2.2924307024226322e-06, "loss": 0.3008, "step": 1197 }, { "epoch": 1.2212028542303772, "grad_norm": 9.20877742767334, "learning_rate": 2.2363708078686263e-06, "loss": 0.9406, "step": 1198 }, { "epoch": 1.2222222222222223, "grad_norm": 1.597818374633789, "learning_rate": 2.1809891974752694e-06, "loss": 0.0947, "step": 1199 }, { "epoch": 1.2232415902140672, "grad_norm": 2.858272075653076, "learning_rate": 2.126286657708548e-06, "loss": 0.1345, "step": 1200 }, { "epoch": 1.2242609582059123, "grad_norm": 3.3939218521118164, "learning_rate": 2.072263965391047e-06, "loss": 0.2185, "step": 1201 }, { "epoch": 1.2252803261977574, "grad_norm": 3.4250073432922363, "learning_rate": 2.0189218876909444e-06, "loss": 0.289, "step": 1202 }, { "epoch": 1.2262996941896025, "grad_norm": 4.34546422958374, "learning_rate": 1.9662611821111122e-06, "loss": 0.3703, "step": 1203 }, { "epoch": 1.2273190621814476, "grad_norm": 8.702229499816895, "learning_rate": 1.914282596478373e-06, "loss": 0.4135, "step": 1204 }, { "epoch": 1.2283384301732925, "grad_norm": 5.491823673248291, "learning_rate": 1.8629868689328533e-06, "loss": 0.34, "step": 1205 }, { "epoch": 1.2293577981651376, "grad_norm": 6.028850078582764, "learning_rate": 1.8123747279174986e-06, "loss": 0.2804, "step": 1206 }, { "epoch": 1.2303771661569827, "grad_norm": 4.991639614105225, "learning_rate": 1.7624468921677738e-06, "loss": 0.1493, "step": 1207 }, { "epoch": 1.2313965341488278, "grad_norm": 7.220829963684082, "learning_rate": 1.713204070701413e-06, "loss": 0.6822, "step": 1208 }, { "epoch": 1.2324159021406729, "grad_norm": 4.651946544647217, "learning_rate": 1.6646469628083583e-06, "loss": 0.2489, "step": 1209 }, { "epoch": 1.2334352701325177, "grad_norm": 3.441234827041626, "learning_rate": 1.6167762580408585e-06, "loss": 0.2702, "step": 1210 }, { "epoch": 1.2344546381243628, "grad_norm": 6.1341776847839355, "learning_rate": 1.5695926362036205e-06, "loss": 0.22, "step": 1211 }, { "epoch": 1.235474006116208, "grad_norm": 5.035029888153076, "learning_rate": 1.5230967673442066e-06, "loss": 0.1727, "step": 1212 }, { "epoch": 1.236493374108053, "grad_norm": 7.472972393035889, "learning_rate": 1.4772893117435127e-06, "loss": 0.5975, "step": 1213 }, { "epoch": 1.2375127420998981, "grad_norm": 4.070760250091553, "learning_rate": 1.432170919906367e-06, "loss": 0.175, "step": 1214 }, { "epoch": 1.238532110091743, "grad_norm": 3.1894075870513916, "learning_rate": 1.3877422325523247e-06, "loss": 0.1245, "step": 1215 }, { "epoch": 1.2395514780835881, "grad_norm": 7.860419273376465, "learning_rate": 1.3440038806065091e-06, "loss": 0.9057, "step": 1216 }, { "epoch": 1.2405708460754332, "grad_norm": 2.675111770629883, "learning_rate": 1.3009564851907297e-06, "loss": 0.1773, "step": 1217 }, { "epoch": 1.2415902140672783, "grad_norm": 7.003297805786133, "learning_rate": 1.258600657614617e-06, "loss": 0.6302, "step": 1218 }, { "epoch": 1.2426095820591234, "grad_norm": 5.752772331237793, "learning_rate": 1.2169369993669578e-06, "loss": 0.4958, "step": 1219 }, { "epoch": 1.2436289500509683, "grad_norm": 5.576023578643799, "learning_rate": 1.1759661021071288e-06, "loss": 0.3934, "step": 1220 }, { "epoch": 1.2446483180428134, "grad_norm": 7.377546787261963, "learning_rate": 1.1356885476567214e-06, "loss": 0.6317, "step": 1221 }, { "epoch": 1.2456676860346585, "grad_norm": 4.526902198791504, "learning_rate": 1.0961049079912633e-06, "loss": 0.2713, "step": 1222 }, { "epoch": 1.2466870540265036, "grad_norm": 5.726096153259277, "learning_rate": 1.0572157452321097e-06, "loss": 0.4534, "step": 1223 }, { "epoch": 1.2477064220183487, "grad_norm": 5.3342413902282715, "learning_rate": 1.0190216116384488e-06, "loss": 0.2045, "step": 1224 }, { "epoch": 1.2487257900101936, "grad_norm": 2.1736345291137695, "learning_rate": 9.8152304959947e-07, "loss": 0.2142, "step": 1225 }, { "epoch": 1.2497451580020387, "grad_norm": 8.47685718536377, "learning_rate": 9.447205916266411e-07, "loss": 0.6366, "step": 1226 }, { "epoch": 1.2507645259938838, "grad_norm": 7.194876670837402, "learning_rate": 9.086147603461714e-07, "loss": 0.5906, "step": 1227 }, { "epoch": 1.2517838939857289, "grad_norm": 4.089115142822266, "learning_rate": 8.732060684915721e-07, "loss": 0.1791, "step": 1228 }, { "epoch": 1.252803261977574, "grad_norm": 7.872777938842773, "learning_rate": 8.384950188963902e-07, "loss": 0.2664, "step": 1229 }, { "epoch": 1.2538226299694188, "grad_norm": 4.340756893157959, "learning_rate": 8.044821044870642e-07, "loss": 0.3745, "step": 1230 }, { "epoch": 1.254841997961264, "grad_norm": 3.608949899673462, "learning_rate": 7.711678082758855e-07, "loss": 0.3423, "step": 1231 }, { "epoch": 1.255861365953109, "grad_norm": 6.383975505828857, "learning_rate": 7.38552603354209e-07, "loss": 0.2945, "step": 1232 }, { "epoch": 1.2568807339449541, "grad_norm": 8.53671646118164, "learning_rate": 7.066369528856809e-07, "loss": 0.4634, "step": 1233 }, { "epoch": 1.2579001019367992, "grad_norm": 7.338963985443115, "learning_rate": 6.754213100996942e-07, "loss": 0.3154, "step": 1234 }, { "epoch": 1.2589194699286441, "grad_norm": 3.15653920173645, "learning_rate": 6.449061182849215e-07, "loss": 0.2901, "step": 1235 }, { "epoch": 1.2599388379204892, "grad_norm": 5.7759928703308105, "learning_rate": 6.150918107830361e-07, "loss": 0.5114, "step": 1236 }, { "epoch": 1.2609582059123343, "grad_norm": 4.276795387268066, "learning_rate": 5.859788109825793e-07, "loss": 0.1735, "step": 1237 }, { "epoch": 1.2619775739041794, "grad_norm": 6.433792591094971, "learning_rate": 5.575675323128915e-07, "loss": 0.3959, "step": 1238 }, { "epoch": 1.2629969418960245, "grad_norm": 4.392580986022949, "learning_rate": 5.298583782383071e-07, "loss": 0.3166, "step": 1239 }, { "epoch": 1.2640163098878694, "grad_norm": 6.252894878387451, "learning_rate": 5.028517422523749e-07, "loss": 0.3007, "step": 1240 }, { "epoch": 1.2650356778797147, "grad_norm": 2.0571022033691406, "learning_rate": 4.7654800787230723e-07, "loss": 0.0804, "step": 1241 }, { "epoch": 1.2660550458715596, "grad_norm": 9.38656234741211, "learning_rate": 4.509475486335013e-07, "loss": 0.3005, "step": 1242 }, { "epoch": 1.2670744138634047, "grad_norm": 7.827151775360107, "learning_rate": 4.260507280842485e-07, "loss": 0.3268, "step": 1243 }, { "epoch": 1.2680937818552498, "grad_norm": 4.051625728607178, "learning_rate": 4.0185789978057774e-07, "loss": 0.2535, "step": 1244 }, { "epoch": 1.2691131498470947, "grad_norm": 7.677860736846924, "learning_rate": 3.7836940728123716e-07, "loss": 0.736, "step": 1245 }, { "epoch": 1.27013251783894, "grad_norm": 5.909502029418945, "learning_rate": 3.555855841427869e-07, "loss": 0.3688, "step": 1246 }, { "epoch": 1.2711518858307849, "grad_norm": 3.4724619388580322, "learning_rate": 3.335067539149084e-07, "loss": 0.2656, "step": 1247 }, { "epoch": 1.27217125382263, "grad_norm": 5.613596439361572, "learning_rate": 3.1213323013575825e-07, "loss": 0.2453, "step": 1248 }, { "epoch": 1.273190621814475, "grad_norm": 9.458756446838379, "learning_rate": 2.914653163275549e-07, "loss": 0.5073, "step": 1249 }, { "epoch": 1.2742099898063202, "grad_norm": 6.05524206161499, "learning_rate": 2.7150330599226e-07, "loss": 0.573, "step": 1250 }, { "epoch": 1.2752293577981653, "grad_norm": 3.4236321449279785, "learning_rate": 2.5224748260739284e-07, "loss": 0.2673, "step": 1251 }, { "epoch": 1.2762487257900101, "grad_norm": 8.313695907592773, "learning_rate": 2.3369811962203335e-07, "loss": 0.5646, "step": 1252 }, { "epoch": 1.2772680937818552, "grad_norm": 9.496676445007324, "learning_rate": 2.1585548045290337e-07, "loss": 0.3595, "step": 1253 }, { "epoch": 1.2782874617737003, "grad_norm": 8.075193405151367, "learning_rate": 1.987198184806638e-07, "loss": 0.3005, "step": 1254 }, { "epoch": 1.2793068297655454, "grad_norm": 5.755889892578125, "learning_rate": 1.8229137704627864e-07, "loss": 0.189, "step": 1255 }, { "epoch": 1.2803261977573905, "grad_norm": 3.0507397651672363, "learning_rate": 1.6657038944759563e-07, "loss": 0.2303, "step": 1256 }, { "epoch": 1.2813455657492354, "grad_norm": 7.6736860275268555, "learning_rate": 1.5155707893601546e-07, "loss": 0.3083, "step": 1257 }, { "epoch": 1.2823649337410805, "grad_norm": 4.9907050132751465, "learning_rate": 1.3725165871331103e-07, "loss": 0.4482, "step": 1258 }, { "epoch": 1.2833843017329256, "grad_norm": 9.572212219238281, "learning_rate": 1.236543319286243e-07, "loss": 0.6032, "step": 1259 }, { "epoch": 1.2844036697247707, "grad_norm": 4.888943195343018, "learning_rate": 1.1076529167554639e-07, "loss": 0.2635, "step": 1260 }, { "epoch": 1.2854230377166158, "grad_norm": 6.444303512573242, "learning_rate": 9.858472098942528e-08, "loss": 0.3321, "step": 1261 }, { "epoch": 1.2864424057084607, "grad_norm": 6.6462626457214355, "learning_rate": 8.71127928447235e-08, "loss": 0.4375, "step": 1262 }, { "epoch": 1.2874617737003058, "grad_norm": 9.320449829101562, "learning_rate": 7.63496701525701e-08, "loss": 0.7222, "step": 1263 }, { "epoch": 1.2884811416921509, "grad_norm": 9.66598129272461, "learning_rate": 6.629550575847354e-08, "loss": 0.5028, "step": 1264 }, { "epoch": 1.289500509683996, "grad_norm": 8.09259033203125, "learning_rate": 5.695044244011238e-08, "loss": 0.5178, "step": 1265 }, { "epoch": 1.290519877675841, "grad_norm": 5.617757320404053, "learning_rate": 4.83146129053369e-08, "loss": 0.4406, "step": 1266 }, { "epoch": 1.291539245667686, "grad_norm": 2.1872336864471436, "learning_rate": 4.038813979027056e-08, "loss": 0.1136, "step": 1267 }, { "epoch": 1.292558613659531, "grad_norm": 6.6291608810424805, "learning_rate": 3.3171135657572575e-08, "loss": 0.2167, "step": 1268 }, { "epoch": 1.2935779816513762, "grad_norm": 5.140267848968506, "learning_rate": 2.6663702994844664e-08, "loss": 0.2206, "step": 1269 }, { "epoch": 1.2945973496432213, "grad_norm": 4.506137371063232, "learning_rate": 2.0865934213160078e-08, "loss": 0.2313, "step": 1270 }, { "epoch": 1.2956167176350664, "grad_norm": 3.7297956943511963, "learning_rate": 1.577791164577014e-08, "loss": 0.248, "step": 1271 }, { "epoch": 1.2966360856269112, "grad_norm": 4.870246887207031, "learning_rate": 1.1399707546921879e-08, "loss": 0.285, "step": 1272 }, { "epoch": 1.2976554536187563, "grad_norm": 2.5686280727386475, "learning_rate": 7.731384090842176e-09, "loss": 0.127, "step": 1273 }, { "epoch": 1.2986748216106014, "grad_norm": 11.340642929077148, "learning_rate": 4.772993370832923e-09, "loss": 0.5928, "step": 1274 }, { "epoch": 1.2986748216106014, "eval_Qnli-dev-1024_cosine_accuracy": 0.75, "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.8024710416793823, "eval_Qnli-dev-1024_cosine_ap": 0.7863440662349561, "eval_Qnli-dev-1024_cosine_f1": 0.7222222222222222, "eval_Qnli-dev-1024_cosine_f1_threshold": 0.7177085280418396, "eval_Qnli-dev-1024_cosine_mcc": 0.41614558708189836, "eval_Qnli-dev-1024_cosine_precision": 0.6190476190476191, "eval_Qnli-dev-1024_cosine_recall": 0.8666666666666667, "eval_Qnli-dev_cosine_accuracy": 0.75, "eval_Qnli-dev_cosine_accuracy_threshold": 0.6857460737228394, "eval_Qnli-dev_cosine_ap": 0.761525183076105, "eval_Qnli-dev_cosine_f1": 0.7422680412371134, "eval_Qnli-dev_cosine_f1_threshold": 0.6738643646240234, "eval_Qnli-dev_cosine_mcc": 0.48701780569984915, "eval_Qnli-dev_cosine_precision": 0.6923076923076923, "eval_Qnli-dev_cosine_recall": 0.8, "eval_allNLI--triplets-1024_cosine_accuracy": 0.9583333134651184, "eval_allNLI-triplets_cosine_accuracy": 0.9479166865348816, "eval_global_dataset_loss": 0.2720896899700165, "eval_global_dataset_runtime": 103.9022, "eval_global_dataset_samples_per_second": 7.728, "eval_global_dataset_steps_per_second": 0.164, "eval_sequential_score": 0.9583333134651184, "eval_sts-test-1024_pearson_cosine": 0.8890098652399769, "eval_sts-test-1024_spearman_cosine": 0.9150082430472904, "eval_sts-test_pearson_cosine": 0.906966954203853, "eval_sts-test_spearman_cosine": 0.9201041628163744, "step": 1274 }, { "epoch": 1.2996941896024465, "grad_norm": 5.426166534423828, "learning_rate": 2.5245773985715927e-09, "loss": 0.2689, "step": 1275 }, { "epoch": 1.3007135575942916, "grad_norm": 6.4207868576049805, "learning_rate": 9.861681034672997e-10, "loss": 0.3258, "step": 1276 }, { "epoch": 1.3017329255861365, "grad_norm": 4.865072727203369, "learning_rate": 1.5778733225002562e-10, "loss": 0.1809, "step": 1277 }, { "epoch": 1.3027522935779816, "grad_norm": 5.379539489746094, "learning_rate": 9.999996055315139e-05, "loss": 0.274, "step": 1278 }, { "epoch": 1.3037716615698267, "grad_norm": 5.370091915130615, "learning_rate": 9.999936885166688e-05, "loss": 0.4674, "step": 1279 }, { "epoch": 1.3047910295616718, "grad_norm": 7.655372619628906, "learning_rate": 9.999806711661691e-05, "loss": 0.2996, "step": 1280 }, { "epoch": 1.305810397553517, "grad_norm": 8.584575653076172, "learning_rate": 9.999605536648723e-05, "loss": 0.6664, "step": 1281 }, { "epoch": 1.3068297655453618, "grad_norm": 7.809893608093262, "learning_rate": 9.999333362984638e-05, "loss": 0.5189, "step": 1282 }, { "epoch": 1.3078491335372069, "grad_norm": 10.140406608581543, "learning_rate": 9.998990194534536e-05, "loss": 1.0369, "step": 1283 }, { "epoch": 1.308868501529052, "grad_norm": 4.876784801483154, "learning_rate": 9.998576036171699e-05, "loss": 0.3865, "step": 1284 }, { "epoch": 1.309887869520897, "grad_norm": 2.4785141944885254, "learning_rate": 9.99809089377753e-05, "loss": 0.2092, "step": 1285 }, { "epoch": 1.3109072375127422, "grad_norm": 3.415043592453003, "learning_rate": 9.997534774241461e-05, "loss": 0.1867, "step": 1286 }, { "epoch": 1.311926605504587, "grad_norm": 5.118708610534668, "learning_rate": 9.996907685460863e-05, "loss": 0.2461, "step": 1287 }, { "epoch": 1.3129459734964322, "grad_norm": 4.022343635559082, "learning_rate": 9.996209636340933e-05, "loss": 0.1819, "step": 1288 }, { "epoch": 1.3139653414882773, "grad_norm": 5.242366790771484, "learning_rate": 9.99544063679456e-05, "loss": 0.3707, "step": 1289 }, { "epoch": 1.3149847094801224, "grad_norm": 3.762380838394165, "learning_rate": 9.994600697742192e-05, "loss": 0.2654, "step": 1290 }, { "epoch": 1.3160040774719675, "grad_norm": 8.3553466796875, "learning_rate": 9.993689831111675e-05, "loss": 0.4869, "step": 1291 }, { "epoch": 1.3170234454638123, "grad_norm": 4.632368087768555, "learning_rate": 9.992708049838096e-05, "loss": 0.2443, "step": 1292 }, { "epoch": 1.3180428134556574, "grad_norm": 11.02981185913086, "learning_rate": 9.99165536786358e-05, "loss": 0.6129, "step": 1293 }, { "epoch": 1.3190621814475025, "grad_norm": 6.492003440856934, "learning_rate": 9.990531800137104e-05, "loss": 0.5258, "step": 1294 }, { "epoch": 1.3200815494393476, "grad_norm": 7.420266628265381, "learning_rate": 9.989337362614292e-05, "loss": 0.3551, "step": 1295 }, { "epoch": 1.3211009174311927, "grad_norm": 5.672893047332764, "learning_rate": 9.988072072257168e-05, "loss": 0.1857, "step": 1296 }, { "epoch": 1.3221202854230376, "grad_norm": 14.246613502502441, "learning_rate": 9.986735947033934e-05, "loss": 0.8164, "step": 1297 }, { "epoch": 1.3231396534148827, "grad_norm": 7.219099521636963, "learning_rate": 9.985329005918702e-05, "loss": 0.4561, "step": 1298 }, { "epoch": 1.3241590214067278, "grad_norm": 7.1640448570251465, "learning_rate": 9.983851268891235e-05, "loss": 0.3761, "step": 1299 }, { "epoch": 1.325178389398573, "grad_norm": 6.6731181144714355, "learning_rate": 9.982302756936654e-05, "loss": 0.4334, "step": 1300 }, { "epoch": 1.326197757390418, "grad_norm": 5.247565746307373, "learning_rate": 9.980683492045146e-05, "loss": 0.2473, "step": 1301 }, { "epoch": 1.3272171253822629, "grad_norm": 6.342030048370361, "learning_rate": 9.978993497211651e-05, "loss": 0.3142, "step": 1302 }, { "epoch": 1.328236493374108, "grad_norm": 9.803683280944824, "learning_rate": 9.977232796435532e-05, "loss": 0.8421, "step": 1303 }, { "epoch": 1.329255861365953, "grad_norm": 8.165091514587402, "learning_rate": 9.975401414720238e-05, "loss": 0.565, "step": 1304 }, { "epoch": 1.3302752293577982, "grad_norm": 3.9490790367126465, "learning_rate": 9.973499378072945e-05, "loss": 0.235, "step": 1305 }, { "epoch": 1.3312945973496433, "grad_norm": 3.1755430698394775, "learning_rate": 9.971526713504195e-05, "loss": 0.1707, "step": 1306 }, { "epoch": 1.3323139653414882, "grad_norm": 7.461353778839111, "learning_rate": 9.969483449027502e-05, "loss": 0.3428, "step": 1307 }, { "epoch": 1.3333333333333333, "grad_norm": 6.9499664306640625, "learning_rate": 9.967369613658955e-05, "loss": 0.349, "step": 1308 }, { "epoch": 1.3343527013251784, "grad_norm": 5.956964492797852, "learning_rate": 9.965185237416821e-05, "loss": 0.3372, "step": 1309 }, { "epoch": 1.3353720693170235, "grad_norm": 10.764342308044434, "learning_rate": 9.962930351321095e-05, "loss": 0.8009, "step": 1310 }, { "epoch": 1.3363914373088686, "grad_norm": 6.670104503631592, "learning_rate": 9.960604987393081e-05, "loss": 0.5033, "step": 1311 }, { "epoch": 1.3374108053007134, "grad_norm": 5.372352600097656, "learning_rate": 9.958209178654921e-05, "loss": 0.352, "step": 1312 }, { "epoch": 1.3384301732925585, "grad_norm": 7.409302234649658, "learning_rate": 9.955742959129142e-05, "loss": 0.4625, "step": 1313 }, { "epoch": 1.3394495412844036, "grad_norm": 6.971982002258301, "learning_rate": 9.953206363838155e-05, "loss": 0.3854, "step": 1314 }, { "epoch": 1.3404689092762487, "grad_norm": 9.18579387664795, "learning_rate": 9.95059942880377e-05, "loss": 0.6175, "step": 1315 }, { "epoch": 1.3414882772680938, "grad_norm": 5.547621250152588, "learning_rate": 9.947922191046686e-05, "loss": 0.3714, "step": 1316 }, { "epoch": 1.3425076452599387, "grad_norm": 4.560646057128906, "learning_rate": 9.945174688585954e-05, "loss": 0.3143, "step": 1317 }, { "epoch": 1.343527013251784, "grad_norm": 4.778387069702148, "learning_rate": 9.942356960438447e-05, "loss": 0.31, "step": 1318 }, { "epoch": 1.344546381243629, "grad_norm": 7.907627105712891, "learning_rate": 9.939469046618297e-05, "loss": 0.9255, "step": 1319 }, { "epoch": 1.345565749235474, "grad_norm": 8.810420989990234, "learning_rate": 9.93651098813634e-05, "loss": 0.412, "step": 1320 }, { "epoch": 1.346585117227319, "grad_norm": 6.384575843811035, "learning_rate": 9.933482826999525e-05, "loss": 0.6548, "step": 1321 }, { "epoch": 1.347604485219164, "grad_norm": 3.368100881576538, "learning_rate": 9.930384606210312e-05, "loss": 0.2306, "step": 1322 }, { "epoch": 1.3486238532110093, "grad_norm": 6.657802581787109, "learning_rate": 9.927216369766071e-05, "loss": 0.3323, "step": 1323 }, { "epoch": 1.3496432212028542, "grad_norm": 6.96447229385376, "learning_rate": 9.923978162658459e-05, "loss": 0.6847, "step": 1324 }, { "epoch": 1.3506625891946993, "grad_norm": 5.063149452209473, "learning_rate": 9.920670030872765e-05, "loss": 0.1593, "step": 1325 }, { "epoch": 1.3516819571865444, "grad_norm": 8.810893058776855, "learning_rate": 9.917292021387277e-05, "loss": 0.9157, "step": 1326 }, { "epoch": 1.3527013251783895, "grad_norm": 7.74990177154541, "learning_rate": 9.913844182172604e-05, "loss": 0.3446, "step": 1327 }, { "epoch": 1.3537206931702346, "grad_norm": 4.324987888336182, "learning_rate": 9.910326562190997e-05, "loss": 0.4183, "step": 1328 }, { "epoch": 1.3547400611620795, "grad_norm": 5.610269546508789, "learning_rate": 9.906739211395648e-05, "loss": 0.3208, "step": 1329 }, { "epoch": 1.3557594291539246, "grad_norm": 4.970494270324707, "learning_rate": 9.90308218072999e-05, "loss": 0.2009, "step": 1330 }, { "epoch": 1.3567787971457697, "grad_norm": 5.053140640258789, "learning_rate": 9.89935552212697e-05, "loss": 0.3931, "step": 1331 }, { "epoch": 1.3577981651376148, "grad_norm": 4.1161932945251465, "learning_rate": 9.895559288508309e-05, "loss": 0.1704, "step": 1332 }, { "epoch": 1.3588175331294599, "grad_norm": 1.7614240646362305, "learning_rate": 9.891693533783756e-05, "loss": 0.1302, "step": 1333 }, { "epoch": 1.3598369011213047, "grad_norm": 5.90859842300415, "learning_rate": 9.887758312850312e-05, "loss": 0.3544, "step": 1334 }, { "epoch": 1.3608562691131498, "grad_norm": 12.01405143737793, "learning_rate": 9.883753681591467e-05, "loss": 1.0104, "step": 1335 }, { "epoch": 1.361875637104995, "grad_norm": 3.6148080825805664, "learning_rate": 9.879679696876384e-05, "loss": 0.1455, "step": 1336 }, { "epoch": 1.36289500509684, "grad_norm": 8.55483341217041, "learning_rate": 9.875536416559118e-05, "loss": 0.5414, "step": 1337 }, { "epoch": 1.3639143730886851, "grad_norm": 9.5636568069458, "learning_rate": 9.871323899477769e-05, "loss": 0.5544, "step": 1338 }, { "epoch": 1.36493374108053, "grad_norm": 4.816516876220703, "learning_rate": 9.867042205453665e-05, "loss": 0.2696, "step": 1339 }, { "epoch": 1.365953109072375, "grad_norm": 5.621625900268555, "learning_rate": 9.862691395290502e-05, "loss": 0.2817, "step": 1340 }, { "epoch": 1.3669724770642202, "grad_norm": 11.75194263458252, "learning_rate": 9.858271530773486e-05, "loss": 0.8851, "step": 1341 }, { "epoch": 1.3679918450560653, "grad_norm": 6.325568675994873, "learning_rate": 9.85378267466845e-05, "loss": 0.3788, "step": 1342 }, { "epoch": 1.3690112130479104, "grad_norm": 6.309666156768799, "learning_rate": 9.849224890720972e-05, "loss": 0.4749, "step": 1343 }, { "epoch": 1.3700305810397553, "grad_norm": 7.9817423820495605, "learning_rate": 9.844598243655458e-05, "loss": 0.5644, "step": 1344 }, { "epoch": 1.3710499490316004, "grad_norm": 8.903715133666992, "learning_rate": 9.839902799174232e-05, "loss": 0.8262, "step": 1345 }, { "epoch": 1.3720693170234455, "grad_norm": 11.527419090270996, "learning_rate": 9.835138623956601e-05, "loss": 1.1799, "step": 1346 }, { "epoch": 1.3730886850152906, "grad_norm": 6.788682460784912, "learning_rate": 9.830305785657905e-05, "loss": 0.4572, "step": 1347 }, { "epoch": 1.3741080530071357, "grad_norm": 5.4351091384887695, "learning_rate": 9.82540435290856e-05, "loss": 0.4109, "step": 1348 }, { "epoch": 1.3751274209989806, "grad_norm": 4.732766628265381, "learning_rate": 9.820434395313076e-05, "loss": 0.2398, "step": 1349 }, { "epoch": 1.3761467889908257, "grad_norm": 5.804527282714844, "learning_rate": 9.81539598344908e-05, "loss": 0.4608, "step": 1350 }, { "epoch": 1.3771661569826708, "grad_norm": 10.281136512756348, "learning_rate": 9.810289188866307e-05, "loss": 0.8431, "step": 1351 }, { "epoch": 1.3781855249745159, "grad_norm": 5.0923991203308105, "learning_rate": 9.805114084085581e-05, "loss": 0.3761, "step": 1352 }, { "epoch": 1.379204892966361, "grad_norm": 2.6065218448638916, "learning_rate": 9.799870742597796e-05, "loss": 0.1865, "step": 1353 }, { "epoch": 1.3802242609582058, "grad_norm": 7.639798164367676, "learning_rate": 9.794559238862857e-05, "loss": 0.4188, "step": 1354 }, { "epoch": 1.381243628950051, "grad_norm": 7.440917015075684, "learning_rate": 9.789179648308637e-05, "loss": 0.3582, "step": 1355 }, { "epoch": 1.382262996941896, "grad_norm": 8.206061363220215, "learning_rate": 9.783732047329897e-05, "loss": 0.4767, "step": 1356 }, { "epoch": 1.3832823649337411, "grad_norm": 6.895366191864014, "learning_rate": 9.778216513287204e-05, "loss": 0.5777, "step": 1357 }, { "epoch": 1.3843017329255862, "grad_norm": 6.850992202758789, "learning_rate": 9.772633124505834e-05, "loss": 0.2647, "step": 1358 }, { "epoch": 1.385321100917431, "grad_norm": 6.088956356048584, "learning_rate": 9.766981960274653e-05, "loss": 0.3234, "step": 1359 }, { "epoch": 1.3863404689092762, "grad_norm": 6.445374011993408, "learning_rate": 9.761263100845005e-05, "loss": 0.5274, "step": 1360 }, { "epoch": 1.3873598369011213, "grad_norm": 4.1013031005859375, "learning_rate": 9.755476627429554e-05, "loss": 0.3592, "step": 1361 }, { "epoch": 1.3883792048929664, "grad_norm": 5.010069847106934, "learning_rate": 9.749622622201149e-05, "loss": 0.3388, "step": 1362 }, { "epoch": 1.3893985728848115, "grad_norm": 7.896244049072266, "learning_rate": 9.743701168291638e-05, "loss": 0.5058, "step": 1363 }, { "epoch": 1.3904179408766564, "grad_norm": 3.5064926147460938, "learning_rate": 9.737712349790706e-05, "loss": 0.1924, "step": 1364 }, { "epoch": 1.3914373088685015, "grad_norm": 9.105462074279785, "learning_rate": 9.73165625174467e-05, "loss": 0.7132, "step": 1365 }, { "epoch": 1.3924566768603466, "grad_norm": 5.037233829498291, "learning_rate": 9.725532960155272e-05, "loss": 0.3511, "step": 1366 }, { "epoch": 1.3934760448521917, "grad_norm": 5.967740535736084, "learning_rate": 9.719342561978462e-05, "loss": 0.5524, "step": 1367 }, { "epoch": 1.3944954128440368, "grad_norm": 7.187025547027588, "learning_rate": 9.713085145123158e-05, "loss": 0.3859, "step": 1368 }, { "epoch": 1.3955147808358817, "grad_norm": 9.78358268737793, "learning_rate": 9.706760798450004e-05, "loss": 0.6256, "step": 1369 }, { "epoch": 1.3965341488277268, "grad_norm": 9.806771278381348, "learning_rate": 9.700369611770099e-05, "loss": 0.7257, "step": 1370 }, { "epoch": 1.3975535168195719, "grad_norm": 8.946002006530762, "learning_rate": 9.693911675843732e-05, "loss": 0.9092, "step": 1371 }, { "epoch": 1.398572884811417, "grad_norm": 4.351489067077637, "learning_rate": 9.687387082379085e-05, "loss": 0.1719, "step": 1372 }, { "epoch": 1.398572884811417, "eval_Qnli-dev-1024_cosine_accuracy": 0.7291666666666666, "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.7861621975898743, "eval_Qnli-dev-1024_cosine_ap": 0.7440149504607769, "eval_Qnli-dev-1024_cosine_f1": 0.7157894736842104, "eval_Qnli-dev-1024_cosine_f1_threshold": 0.756857693195343, "eval_Qnli-dev-1024_cosine_mcc": 0.44134955399887316, "eval_Qnli-dev-1024_cosine_precision": 0.68, "eval_Qnli-dev-1024_cosine_recall": 0.7555555555555555, "eval_Qnli-dev_cosine_accuracy": 0.7395833333333334, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7261146306991577, "eval_Qnli-dev_cosine_ap": 0.7580605820681848, "eval_Qnli-dev_cosine_f1": 0.7454545454545455, "eval_Qnli-dev_cosine_f1_threshold": 0.6269410848617554, "eval_Qnli-dev_cosine_mcc": 0.47013467657639685, "eval_Qnli-dev_cosine_precision": 0.6307692307692307, "eval_Qnli-dev_cosine_recall": 0.9111111111111111, "eval_allNLI--triplets-1024_cosine_accuracy": 0.9791666865348816, "eval_allNLI-triplets_cosine_accuracy": 0.96875, "eval_global_dataset_loss": 0.39148974418640137, "eval_global_dataset_runtime": 103.9064, "eval_global_dataset_samples_per_second": 7.728, "eval_global_dataset_steps_per_second": 0.164, "eval_sequential_score": 0.9791666865348816, "eval_sts-test-1024_pearson_cosine": 0.8610388110383311, "eval_sts-test-1024_spearman_cosine": 0.897175185335658, "eval_sts-test_pearson_cosine": 0.9050122455904925, "eval_sts-test_spearman_cosine": 0.9224667967762508, "step": 1372 }, { "epoch": 1.399592252803262, "grad_norm": 6.123342990875244, "learning_rate": 9.680795924030936e-05, "loss": 0.5039, "step": 1373 }, { "epoch": 1.400611620795107, "grad_norm": 6.790772438049316, "learning_rate": 9.67413829439934e-05, "loss": 0.4752, "step": 1374 }, { "epoch": 1.401630988786952, "grad_norm": 10.278209686279297, "learning_rate": 9.6674142880283e-05, "loss": 0.7859, "step": 1375 }, { "epoch": 1.4026503567787971, "grad_norm": 9.054181098937988, "learning_rate": 9.660624000404423e-05, "loss": 0.5784, "step": 1376 }, { "epoch": 1.4036697247706422, "grad_norm": 9.129789352416992, "learning_rate": 9.653767527955574e-05, "loss": 0.5278, "step": 1377 }, { "epoch": 1.4046890927624873, "grad_norm": 4.569271087646484, "learning_rate": 9.646844968049488e-05, "loss": 0.215, "step": 1378 }, { "epoch": 1.4057084607543322, "grad_norm": 12.985228538513184, "learning_rate": 9.639856418992409e-05, "loss": 0.7942, "step": 1379 }, { "epoch": 1.4067278287461773, "grad_norm": 8.284036636352539, "learning_rate": 9.632801980027672e-05, "loss": 0.4433, "step": 1380 }, { "epoch": 1.4077471967380224, "grad_norm": 6.351473331451416, "learning_rate": 9.625681751334319e-05, "loss": 0.3836, "step": 1381 }, { "epoch": 1.4087665647298675, "grad_norm": 8.75439167022705, "learning_rate": 9.618495834025646e-05, "loss": 0.4579, "step": 1382 }, { "epoch": 1.4097859327217126, "grad_norm": 5.620606422424316, "learning_rate": 9.611244330147793e-05, "loss": 0.3356, "step": 1383 }, { "epoch": 1.4108053007135575, "grad_norm": 5.859284400939941, "learning_rate": 9.603927342678285e-05, "loss": 0.3149, "step": 1384 }, { "epoch": 1.4118246687054026, "grad_norm": 4.390924453735352, "learning_rate": 9.596544975524565e-05, "loss": 0.209, "step": 1385 }, { "epoch": 1.4128440366972477, "grad_norm": 4.031999588012695, "learning_rate": 9.589097333522528e-05, "loss": 0.1533, "step": 1386 }, { "epoch": 1.4138634046890928, "grad_norm": 7.579880714416504, "learning_rate": 9.581584522435026e-05, "loss": 0.3178, "step": 1387 }, { "epoch": 1.4148827726809379, "grad_norm": 8.35764217376709, "learning_rate": 9.574006648950362e-05, "loss": 0.431, "step": 1388 }, { "epoch": 1.4159021406727827, "grad_norm": 5.463128566741943, "learning_rate": 9.566363820680787e-05, "loss": 0.3837, "step": 1389 }, { "epoch": 1.4169215086646278, "grad_norm": 10.94266128540039, "learning_rate": 9.558656146160964e-05, "loss": 0.6833, "step": 1390 }, { "epoch": 1.417940876656473, "grad_norm": 7.073869228363037, "learning_rate": 9.550883734846427e-05, "loss": 0.2991, "step": 1391 }, { "epoch": 1.418960244648318, "grad_norm": 5.472149848937988, "learning_rate": 9.54304669711203e-05, "loss": 0.4607, "step": 1392 }, { "epoch": 1.4199796126401631, "grad_norm": 6.09562349319458, "learning_rate": 9.53514514425037e-05, "loss": 0.4161, "step": 1393 }, { "epoch": 1.420998980632008, "grad_norm": 4.8968939781188965, "learning_rate": 9.527179188470222e-05, "loss": 0.3701, "step": 1394 }, { "epoch": 1.4220183486238533, "grad_norm": 5.902759075164795, "learning_rate": 9.51914894289493e-05, "loss": 0.2434, "step": 1395 }, { "epoch": 1.4230377166156982, "grad_norm": 7.825418949127197, "learning_rate": 9.511054521560816e-05, "loss": 0.471, "step": 1396 }, { "epoch": 1.4240570846075433, "grad_norm": 8.981621742248535, "learning_rate": 9.502896039415545e-05, "loss": 0.692, "step": 1397 }, { "epoch": 1.4250764525993884, "grad_norm": 4.354804992675781, "learning_rate": 9.494673612316505e-05, "loss": 0.3583, "step": 1398 }, { "epoch": 1.4260958205912333, "grad_norm": 3.310420513153076, "learning_rate": 9.486387357029148e-05, "loss": 0.2116, "step": 1399 }, { "epoch": 1.4271151885830786, "grad_norm": 5.200766563415527, "learning_rate": 9.478037391225356e-05, "loss": 0.2882, "step": 1400 }, { "epoch": 1.4281345565749235, "grad_norm": 7.121079444885254, "learning_rate": 9.46962383348174e-05, "loss": 0.5388, "step": 1401 }, { "epoch": 1.4291539245667686, "grad_norm": 9.046875953674316, "learning_rate": 9.461146803277979e-05, "loss": 0.534, "step": 1402 }, { "epoch": 1.4301732925586137, "grad_norm": 4.394150733947754, "learning_rate": 9.45260642099511e-05, "loss": 0.189, "step": 1403 }, { "epoch": 1.4311926605504588, "grad_norm": 8.304125785827637, "learning_rate": 9.444002807913828e-05, "loss": 0.5505, "step": 1404 }, { "epoch": 1.432212028542304, "grad_norm": 8.639719009399414, "learning_rate": 9.435336086212753e-05, "loss": 0.6606, "step": 1405 }, { "epoch": 1.4332313965341488, "grad_norm": 9.554101943969727, "learning_rate": 9.426606378966707e-05, "loss": 0.9008, "step": 1406 }, { "epoch": 1.4342507645259939, "grad_norm": 4.584405422210693, "learning_rate": 9.417813810144962e-05, "loss": 0.3275, "step": 1407 }, { "epoch": 1.435270132517839, "grad_norm": 5.24059534072876, "learning_rate": 9.408958504609466e-05, "loss": 0.3699, "step": 1408 }, { "epoch": 1.436289500509684, "grad_norm": 6.843227863311768, "learning_rate": 9.400040588113095e-05, "loss": 0.5497, "step": 1409 }, { "epoch": 1.4373088685015292, "grad_norm": 6.056573390960693, "learning_rate": 9.391060187297846e-05, "loss": 0.2722, "step": 1410 }, { "epoch": 1.438328236493374, "grad_norm": 10.264248847961426, "learning_rate": 9.382017429693053e-05, "loss": 0.8038, "step": 1411 }, { "epoch": 1.4393476044852191, "grad_norm": 4.382177352905273, "learning_rate": 9.372912443713561e-05, "loss": 0.1399, "step": 1412 }, { "epoch": 1.4403669724770642, "grad_norm": 7.841924667358398, "learning_rate": 9.363745358657917e-05, "loss": 0.2747, "step": 1413 }, { "epoch": 1.4413863404689093, "grad_norm": 6.299066543579102, "learning_rate": 9.354516304706527e-05, "loss": 0.3525, "step": 1414 }, { "epoch": 1.4424057084607544, "grad_norm": 4.2002949714660645, "learning_rate": 9.345225412919803e-05, "loss": 0.353, "step": 1415 }, { "epoch": 1.4434250764525993, "grad_norm": 5.748711109161377, "learning_rate": 9.335872815236315e-05, "loss": 0.3674, "step": 1416 }, { "epoch": 1.4444444444444444, "grad_norm": 10.88302230834961, "learning_rate": 9.326458644470907e-05, "loss": 0.826, "step": 1417 }, { "epoch": 1.4454638124362895, "grad_norm": 7.587636947631836, "learning_rate": 9.316983034312804e-05, "loss": 0.6642, "step": 1418 }, { "epoch": 1.4464831804281346, "grad_norm": 3.749462842941284, "learning_rate": 9.307446119323738e-05, "loss": 0.2859, "step": 1419 }, { "epoch": 1.4475025484199797, "grad_norm": 7.171744346618652, "learning_rate": 9.297848034936005e-05, "loss": 0.3904, "step": 1420 }, { "epoch": 1.4485219164118246, "grad_norm": 8.47977066040039, "learning_rate": 9.288188917450575e-05, "loss": 0.612, "step": 1421 }, { "epoch": 1.4495412844036697, "grad_norm": 2.8732564449310303, "learning_rate": 9.278468904035129e-05, "loss": 0.1087, "step": 1422 }, { "epoch": 1.4505606523955148, "grad_norm": 5.59121036529541, "learning_rate": 9.268688132722124e-05, "loss": 0.2336, "step": 1423 }, { "epoch": 1.45158002038736, "grad_norm": 8.162138938903809, "learning_rate": 9.258846742406833e-05, "loss": 0.2767, "step": 1424 }, { "epoch": 1.452599388379205, "grad_norm": 1.6772691011428833, "learning_rate": 9.248944872845369e-05, "loss": 0.1598, "step": 1425 }, { "epoch": 1.4536187563710499, "grad_norm": 6.238204479217529, "learning_rate": 9.238982664652701e-05, "loss": 0.3641, "step": 1426 }, { "epoch": 1.454638124362895, "grad_norm": 5.3337202072143555, "learning_rate": 9.228960259300662e-05, "loss": 0.5724, "step": 1427 }, { "epoch": 1.45565749235474, "grad_norm": 5.208616733551025, "learning_rate": 9.21887779911593e-05, "loss": 0.3569, "step": 1428 }, { "epoch": 1.4566768603465852, "grad_norm": 4.691967010498047, "learning_rate": 9.208735427278014e-05, "loss": 0.1306, "step": 1429 }, { "epoch": 1.4576962283384303, "grad_norm": 5.81736421585083, "learning_rate": 9.198533287817223e-05, "loss": 0.2527, "step": 1430 }, { "epoch": 1.4587155963302751, "grad_norm": 7.3892340660095215, "learning_rate": 9.188271525612615e-05, "loss": 0.3424, "step": 1431 }, { "epoch": 1.4597349643221202, "grad_norm": 8.754351615905762, "learning_rate": 9.177950286389942e-05, "loss": 0.5734, "step": 1432 }, { "epoch": 1.4607543323139653, "grad_norm": 4.002055644989014, "learning_rate": 9.167569716719579e-05, "loss": 0.2381, "step": 1433 }, { "epoch": 1.4617737003058104, "grad_norm": 9.790334701538086, "learning_rate": 9.157129964014445e-05, "loss": 0.7411, "step": 1434 }, { "epoch": 1.4627930682976555, "grad_norm": 6.292593955993652, "learning_rate": 9.146631176527906e-05, "loss": 0.3854, "step": 1435 }, { "epoch": 1.4638124362895004, "grad_norm": 5.826279163360596, "learning_rate": 9.136073503351679e-05, "loss": 0.4475, "step": 1436 }, { "epoch": 1.4648318042813455, "grad_norm": 7.841248035430908, "learning_rate": 9.125457094413698e-05, "loss": 0.5616, "step": 1437 }, { "epoch": 1.4658511722731906, "grad_norm": 6.76909065246582, "learning_rate": 9.114782100476005e-05, "loss": 0.4432, "step": 1438 }, { "epoch": 1.4668705402650357, "grad_norm": 6.64108419418335, "learning_rate": 9.104048673132587e-05, "loss": 0.5708, "step": 1439 }, { "epoch": 1.4678899082568808, "grad_norm": 6.3219218254089355, "learning_rate": 9.093256964807249e-05, "loss": 0.3974, "step": 1440 }, { "epoch": 1.4689092762487257, "grad_norm": 5.191364765167236, "learning_rate": 9.082407128751423e-05, "loss": 0.2253, "step": 1441 }, { "epoch": 1.4699286442405708, "grad_norm": 2.702972412109375, "learning_rate": 9.071499319042011e-05, "loss": 0.1825, "step": 1442 }, { "epoch": 1.470948012232416, "grad_norm": 7.462799072265625, "learning_rate": 9.060533690579191e-05, "loss": 0.5323, "step": 1443 }, { "epoch": 1.471967380224261, "grad_norm": 6.826075553894043, "learning_rate": 9.049510399084211e-05, "loss": 0.2, "step": 1444 }, { "epoch": 1.472986748216106, "grad_norm": 2.9609243869781494, "learning_rate": 9.038429601097187e-05, "loss": 0.2048, "step": 1445 }, { "epoch": 1.474006116207951, "grad_norm": 8.720600128173828, "learning_rate": 9.027291453974877e-05, "loss": 0.964, "step": 1446 }, { "epoch": 1.475025484199796, "grad_norm": 5.362283229827881, "learning_rate": 9.016096115888443e-05, "loss": 0.2534, "step": 1447 }, { "epoch": 1.4760448521916412, "grad_norm": 6.537226676940918, "learning_rate": 9.004843745821207e-05, "loss": 0.3222, "step": 1448 }, { "epoch": 1.4770642201834863, "grad_norm": 8.83950424194336, "learning_rate": 8.993534503566397e-05, "loss": 0.4912, "step": 1449 }, { "epoch": 1.4780835881753314, "grad_norm": 7.760464191436768, "learning_rate": 8.982168549724869e-05, "loss": 0.7533, "step": 1450 }, { "epoch": 1.4791029561671762, "grad_norm": 2.361077308654785, "learning_rate": 8.970746045702841e-05, "loss": 0.161, "step": 1451 }, { "epoch": 1.4801223241590213, "grad_norm": 12.053560256958008, "learning_rate": 8.959267153709578e-05, "loss": 0.9155, "step": 1452 }, { "epoch": 1.4811416921508664, "grad_norm": 7.693728446960449, "learning_rate": 8.947732036755114e-05, "loss": 1.0607, "step": 1453 }, { "epoch": 1.4821610601427115, "grad_norm": 6.32127046585083, "learning_rate": 8.936140858647923e-05, "loss": 0.5694, "step": 1454 }, { "epoch": 1.4831804281345566, "grad_norm": 5.448962211608887, "learning_rate": 8.924493783992589e-05, "loss": 0.5814, "step": 1455 }, { "epoch": 1.4841997961264015, "grad_norm": 11.943927764892578, "learning_rate": 8.91279097818748e-05, "loss": 0.8297, "step": 1456 }, { "epoch": 1.4852191641182466, "grad_norm": 6.5847930908203125, "learning_rate": 8.901032607422397e-05, "loss": 0.4661, "step": 1457 }, { "epoch": 1.4862385321100917, "grad_norm": 10.038482666015625, "learning_rate": 8.889218838676198e-05, "loss": 1.035, "step": 1458 }, { "epoch": 1.4872579001019368, "grad_norm": 5.818447589874268, "learning_rate": 8.877349839714454e-05, "loss": 0.2972, "step": 1459 }, { "epoch": 1.488277268093782, "grad_norm": 8.625869750976562, "learning_rate": 8.865425779087042e-05, "loss": 0.5676, "step": 1460 }, { "epoch": 1.4892966360856268, "grad_norm": 7.4677910804748535, "learning_rate": 8.85344682612577e-05, "loss": 0.3514, "step": 1461 }, { "epoch": 1.490316004077472, "grad_norm": 11.1100435256958, "learning_rate": 8.841413150941954e-05, "loss": 1.017, "step": 1462 }, { "epoch": 1.491335372069317, "grad_norm": 5.164977073669434, "learning_rate": 8.829324924424016e-05, "loss": 0.4169, "step": 1463 }, { "epoch": 1.492354740061162, "grad_norm": 5.428584098815918, "learning_rate": 8.817182318235059e-05, "loss": 0.4397, "step": 1464 }, { "epoch": 1.4933741080530072, "grad_norm": 5.453851222991943, "learning_rate": 8.804985504810416e-05, "loss": 0.3873, "step": 1465 }, { "epoch": 1.494393476044852, "grad_norm": 3.758465528488159, "learning_rate": 8.792734657355217e-05, "loss": 0.2653, "step": 1466 }, { "epoch": 1.4954128440366974, "grad_norm": 5.696004867553711, "learning_rate": 8.780429949841908e-05, "loss": 0.3229, "step": 1467 }, { "epoch": 1.4964322120285423, "grad_norm": 7.936735153198242, "learning_rate": 8.768071557007806e-05, "loss": 0.4697, "step": 1468 }, { "epoch": 1.4974515800203874, "grad_norm": 8.697381019592285, "learning_rate": 8.755659654352599e-05, "loss": 0.595, "step": 1469 }, { "epoch": 1.4984709480122325, "grad_norm": 5.7998528480529785, "learning_rate": 8.743194418135865e-05, "loss": 0.4291, "step": 1470 }, { "epoch": 1.4984709480122325, "eval_Qnli-dev-1024_cosine_accuracy": 0.7291666666666666, "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.8078973293304443, "eval_Qnli-dev-1024_cosine_ap": 0.7609733003079024, "eval_Qnli-dev-1024_cosine_f1": 0.7128712871287128, "eval_Qnli-dev-1024_cosine_f1_threshold": 0.7630533576011658, "eval_Qnli-dev-1024_cosine_mcc": 0.41281977673947123, "eval_Qnli-dev-1024_cosine_precision": 0.6428571428571429, "eval_Qnli-dev-1024_cosine_recall": 0.8, "eval_Qnli-dev_cosine_accuracy": 0.7395833333333334, "eval_Qnli-dev_cosine_accuracy_threshold": 0.693989634513855, "eval_Qnli-dev_cosine_ap": 0.7635441957451561, "eval_Qnli-dev_cosine_f1": 0.7289719626168225, "eval_Qnli-dev_cosine_f1_threshold": 0.6386604905128479, "eval_Qnli-dev_cosine_mcc": 0.43373226132862797, "eval_Qnli-dev_cosine_precision": 0.6290322580645161, "eval_Qnli-dev_cosine_recall": 0.8666666666666667, "eval_allNLI--triplets-1024_cosine_accuracy": 0.9583333134651184, "eval_allNLI-triplets_cosine_accuracy": 0.96875, "eval_global_dataset_loss": 0.3019393980503082, "eval_global_dataset_runtime": 103.8441, "eval_global_dataset_samples_per_second": 7.733, "eval_global_dataset_steps_per_second": 0.164, "eval_sequential_score": 0.9583333134651184, "eval_sts-test-1024_pearson_cosine": 0.874585602380735, "eval_sts-test-1024_spearman_cosine": 0.9074874432721208, "eval_sts-test_pearson_cosine": 0.9044927301887464, "eval_sts-test_spearman_cosine": 0.9217758439369574, "step": 1470 }, { "epoch": 1.4994903160040773, "grad_norm": 6.363936901092529, "learning_rate": 8.73067602537456e-05, "loss": 0.472, "step": 1471 }, { "epoch": 1.5005096839959227, "grad_norm": 21.527421951293945, "learning_rate": 8.718104653840506e-05, "loss": 1.5565, "step": 1472 }, { "epoch": 1.5015290519877675, "grad_norm": 5.951153755187988, "learning_rate": 8.705480482057875e-05, "loss": 0.226, "step": 1473 }, { "epoch": 1.5025484199796126, "grad_norm": 3.3359270095825195, "learning_rate": 8.692803689300641e-05, "loss": 0.2818, "step": 1474 }, { "epoch": 1.5035677879714577, "grad_norm": 4.292994022369385, "learning_rate": 8.680074455590045e-05, "loss": 0.331, "step": 1475 }, { "epoch": 1.5045871559633026, "grad_norm": 8.305481910705566, "learning_rate": 8.667292961692035e-05, "loss": 0.5533, "step": 1476 }, { "epoch": 1.505606523955148, "grad_norm": 12.950459480285645, "learning_rate": 8.65445938911469e-05, "loss": 0.9666, "step": 1477 }, { "epoch": 1.5066258919469928, "grad_norm": 13.390387535095215, "learning_rate": 8.641573920105664e-05, "loss": 0.9593, "step": 1478 }, { "epoch": 1.507645259938838, "grad_norm": 5.181076526641846, "learning_rate": 8.628636737649569e-05, "loss": 0.2917, "step": 1479 }, { "epoch": 1.508664627930683, "grad_norm": 5.981019020080566, "learning_rate": 8.615648025465409e-05, "loss": 0.396, "step": 1480 }, { "epoch": 1.5096839959225279, "grad_norm": 9.361347198486328, "learning_rate": 8.602607968003935e-05, "loss": 0.7183, "step": 1481 }, { "epoch": 1.5107033639143732, "grad_norm": 6.163973331451416, "learning_rate": 8.589516750445061e-05, "loss": 0.3434, "step": 1482 }, { "epoch": 1.511722731906218, "grad_norm": 3.649142265319824, "learning_rate": 8.576374558695208e-05, "loss": 0.218, "step": 1483 }, { "epoch": 1.5127420998980632, "grad_norm": 3.3364171981811523, "learning_rate": 8.563181579384679e-05, "loss": 0.1735, "step": 1484 }, { "epoch": 1.5137614678899083, "grad_norm": 4.01456356048584, "learning_rate": 8.549937999865001e-05, "loss": 0.4277, "step": 1485 }, { "epoch": 1.5147808358817532, "grad_norm": 5.251349449157715, "learning_rate": 8.53664400820627e-05, "loss": 0.2979, "step": 1486 }, { "epoch": 1.5158002038735985, "grad_norm": 10.812429428100586, "learning_rate": 8.523299793194471e-05, "loss": 0.8903, "step": 1487 }, { "epoch": 1.5168195718654434, "grad_norm": 9.290162086486816, "learning_rate": 8.509905544328808e-05, "loss": 0.5525, "step": 1488 }, { "epoch": 1.5178389398572885, "grad_norm": 3.046337604522705, "learning_rate": 8.496461451819009e-05, "loss": 0.2197, "step": 1489 }, { "epoch": 1.5188583078491336, "grad_norm": 4.534480571746826, "learning_rate": 8.482967706582623e-05, "loss": 0.2638, "step": 1490 }, { "epoch": 1.5198776758409784, "grad_norm": 6.42506217956543, "learning_rate": 8.46942450024231e-05, "loss": 0.3562, "step": 1491 }, { "epoch": 1.5208970438328238, "grad_norm": 6.858558177947998, "learning_rate": 8.455832025123119e-05, "loss": 0.4361, "step": 1492 }, { "epoch": 1.5219164118246686, "grad_norm": 7.202121734619141, "learning_rate": 8.442190474249755e-05, "loss": 0.6902, "step": 1493 }, { "epoch": 1.5229357798165137, "grad_norm": 5.493877410888672, "learning_rate": 8.428500041343847e-05, "loss": 0.2922, "step": 1494 }, { "epoch": 1.5239551478083588, "grad_norm": 9.43299674987793, "learning_rate": 8.414760920821185e-05, "loss": 0.9379, "step": 1495 }, { "epoch": 1.5249745158002037, "grad_norm": 10.610505104064941, "learning_rate": 8.400973307788968e-05, "loss": 0.7092, "step": 1496 }, { "epoch": 1.525993883792049, "grad_norm": 8.512781143188477, "learning_rate": 8.387137398043031e-05, "loss": 0.5948, "step": 1497 }, { "epoch": 1.527013251783894, "grad_norm": 8.651867866516113, "learning_rate": 8.37325338806505e-05, "loss": 0.4054, "step": 1498 }, { "epoch": 1.528032619775739, "grad_norm": 7.8862199783325195, "learning_rate": 8.35932147501979e-05, "loss": 0.5618, "step": 1499 }, { "epoch": 1.529051987767584, "grad_norm": 10.186121940612793, "learning_rate": 8.345341856752254e-05, "loss": 0.7368, "step": 1500 }, { "epoch": 1.5300713557594292, "grad_norm": 5.780384063720703, "learning_rate": 8.331314731784922e-05, "loss": 0.5013, "step": 1501 }, { "epoch": 1.5310907237512743, "grad_norm": 5.746453762054443, "learning_rate": 8.317240299314894e-05, "loss": 0.4127, "step": 1502 }, { "epoch": 1.5321100917431192, "grad_norm": 6.575455188751221, "learning_rate": 8.303118759211082e-05, "loss": 0.5177, "step": 1503 }, { "epoch": 1.5331294597349643, "grad_norm": 8.351615905761719, "learning_rate": 8.288950312011368e-05, "loss": 0.5595, "step": 1504 }, { "epoch": 1.5341488277268094, "grad_norm": 5.2061767578125, "learning_rate": 8.274735158919757e-05, "loss": 0.1897, "step": 1505 }, { "epoch": 1.5351681957186545, "grad_norm": 8.74786376953125, "learning_rate": 8.260473501803508e-05, "loss": 0.5909, "step": 1506 }, { "epoch": 1.5361875637104996, "grad_norm": 8.112961769104004, "learning_rate": 8.246165543190285e-05, "loss": 0.7854, "step": 1507 }, { "epoch": 1.5372069317023445, "grad_norm": 5.495329856872559, "learning_rate": 8.231811486265271e-05, "loss": 0.3179, "step": 1508 }, { "epoch": 1.5382262996941896, "grad_norm": 5.7515668869018555, "learning_rate": 8.217411534868281e-05, "loss": 0.2756, "step": 1509 }, { "epoch": 1.5392456676860347, "grad_norm": 3.9375154972076416, "learning_rate": 8.202965893490878e-05, "loss": 0.2375, "step": 1510 }, { "epoch": 1.5402650356778798, "grad_norm": 4.8834757804870605, "learning_rate": 8.18847476727345e-05, "loss": 0.4164, "step": 1511 }, { "epoch": 1.5412844036697249, "grad_norm": 5.144208908081055, "learning_rate": 8.173938362002318e-05, "loss": 0.2501, "step": 1512 }, { "epoch": 1.5423037716615697, "grad_norm": 2.856844186782837, "learning_rate": 8.159356884106802e-05, "loss": 0.1618, "step": 1513 }, { "epoch": 1.5433231396534148, "grad_norm": 8.247689247131348, "learning_rate": 8.14473054065629e-05, "loss": 0.6395, "step": 1514 }, { "epoch": 1.54434250764526, "grad_norm": 7.794886112213135, "learning_rate": 8.130059539357297e-05, "loss": 0.5933, "step": 1515 }, { "epoch": 1.545361875637105, "grad_norm": 11.686348915100098, "learning_rate": 8.115344088550526e-05, "loss": 0.8926, "step": 1516 }, { "epoch": 1.5463812436289501, "grad_norm": 6.921617031097412, "learning_rate": 8.100584397207886e-05, "loss": 0.3411, "step": 1517 }, { "epoch": 1.547400611620795, "grad_norm": 3.175189256668091, "learning_rate": 8.08578067492956e-05, "loss": 0.1612, "step": 1518 }, { "epoch": 1.5484199796126403, "grad_norm": 5.199557304382324, "learning_rate": 8.070933131940982e-05, "loss": 0.2548, "step": 1519 }, { "epoch": 1.5494393476044852, "grad_norm": 9.249714851379395, "learning_rate": 8.056041979089905e-05, "loss": 0.892, "step": 1520 }, { "epoch": 1.5504587155963303, "grad_norm": 8.709851264953613, "learning_rate": 8.041107427843357e-05, "loss": 0.3798, "step": 1521 }, { "epoch": 1.5514780835881754, "grad_norm": 9.481815338134766, "learning_rate": 8.026129690284669e-05, "loss": 0.5753, "step": 1522 }, { "epoch": 1.5524974515800203, "grad_norm": 10.316771507263184, "learning_rate": 8.011108979110457e-05, "loss": 1.2305, "step": 1523 }, { "epoch": 1.5535168195718656, "grad_norm": 9.717813491821289, "learning_rate": 7.996045507627594e-05, "loss": 0.7169, "step": 1524 }, { "epoch": 1.5545361875637105, "grad_norm": 9.637219429016113, "learning_rate": 7.98093948975019e-05, "loss": 0.7199, "step": 1525 }, { "epoch": 1.5555555555555556, "grad_norm": 10.511667251586914, "learning_rate": 7.965791139996543e-05, "loss": 0.7569, "step": 1526 }, { "epoch": 1.5565749235474007, "grad_norm": 8.276548385620117, "learning_rate": 7.950600673486106e-05, "loss": 0.5522, "step": 1527 }, { "epoch": 1.5575942915392456, "grad_norm": 10.812773704528809, "learning_rate": 7.935368305936425e-05, "loss": 0.9574, "step": 1528 }, { "epoch": 1.5586136595310909, "grad_norm": 4.123507499694824, "learning_rate": 7.920094253660074e-05, "loss": 0.241, "step": 1529 }, { "epoch": 1.5596330275229358, "grad_norm": 7.355940818786621, "learning_rate": 7.904778733561591e-05, "loss": 0.3016, "step": 1530 }, { "epoch": 1.5606523955147809, "grad_norm": 7.6690802574157715, "learning_rate": 7.889421963134383e-05, "loss": 0.342, "step": 1531 }, { "epoch": 1.561671763506626, "grad_norm": 11.40833568572998, "learning_rate": 7.874024160457652e-05, "loss": 0.6452, "step": 1532 }, { "epoch": 1.5626911314984708, "grad_norm": 1.7198033332824707, "learning_rate": 7.858585544193297e-05, "loss": 0.0948, "step": 1533 }, { "epoch": 1.5637104994903162, "grad_norm": 7.055710315704346, "learning_rate": 7.843106333582796e-05, "loss": 0.4199, "step": 1534 }, { "epoch": 1.564729867482161, "grad_norm": 6.451409816741943, "learning_rate": 7.827586748444114e-05, "loss": 0.4689, "step": 1535 }, { "epoch": 1.5657492354740061, "grad_norm": 5.309058666229248, "learning_rate": 7.812027009168546e-05, "loss": 0.3849, "step": 1536 }, { "epoch": 1.5667686034658512, "grad_norm": 6.170285224914551, "learning_rate": 7.79642733671764e-05, "loss": 0.3263, "step": 1537 }, { "epoch": 1.567787971457696, "grad_norm": 7.963518142700195, "learning_rate": 7.78078795262e-05, "loss": 0.2742, "step": 1538 }, { "epoch": 1.5688073394495414, "grad_norm": 5.102199554443359, "learning_rate": 7.765109078968193e-05, "loss": 0.2751, "step": 1539 }, { "epoch": 1.5698267074413863, "grad_norm": 9.289502143859863, "learning_rate": 7.749390938415556e-05, "loss": 0.4461, "step": 1540 }, { "epoch": 1.5708460754332314, "grad_norm": 7.5046210289001465, "learning_rate": 7.733633754173053e-05, "loss": 0.454, "step": 1541 }, { "epoch": 1.5718654434250765, "grad_norm": 4.898295879364014, "learning_rate": 7.717837750006105e-05, "loss": 0.3172, "step": 1542 }, { "epoch": 1.5728848114169214, "grad_norm": 5.772686004638672, "learning_rate": 7.702003150231407e-05, "loss": 0.3302, "step": 1543 }, { "epoch": 1.5739041794087667, "grad_norm": 6.3255791664123535, "learning_rate": 7.686130179713742e-05, "loss": 0.4316, "step": 1544 }, { "epoch": 1.5749235474006116, "grad_norm": 3.258190155029297, "learning_rate": 7.670219063862798e-05, "loss": 0.1803, "step": 1545 }, { "epoch": 1.5759429153924567, "grad_norm": 6.360843658447266, "learning_rate": 7.654270028629943e-05, "loss": 0.4801, "step": 1546 }, { "epoch": 1.5769622833843018, "grad_norm": 2.4267330169677734, "learning_rate": 7.638283300505052e-05, "loss": 0.0975, "step": 1547 }, { "epoch": 1.5779816513761467, "grad_norm": 8.417614936828613, "learning_rate": 7.622259106513259e-05, "loss": 0.6647, "step": 1548 }, { "epoch": 1.579001019367992, "grad_norm": 7.388288974761963, "learning_rate": 7.606197674211747e-05, "loss": 0.3962, "step": 1549 }, { "epoch": 1.5800203873598369, "grad_norm": 3.0918948650360107, "learning_rate": 7.590099231686524e-05, "loss": 0.1611, "step": 1550 }, { "epoch": 1.581039755351682, "grad_norm": 4.632137775421143, "learning_rate": 7.573964007549155e-05, "loss": 0.4832, "step": 1551 }, { "epoch": 1.582059123343527, "grad_norm": 3.659292697906494, "learning_rate": 7.557792230933552e-05, "loss": 0.2286, "step": 1552 }, { "epoch": 1.583078491335372, "grad_norm": 7.091201305389404, "learning_rate": 7.541584131492701e-05, "loss": 0.3312, "step": 1553 }, { "epoch": 1.5840978593272173, "grad_norm": 10.971521377563477, "learning_rate": 7.525339939395394e-05, "loss": 0.5886, "step": 1554 }, { "epoch": 1.5851172273190621, "grad_norm": 9.640290260314941, "learning_rate": 7.50905988532298e-05, "loss": 0.3686, "step": 1555 }, { "epoch": 1.5861365953109072, "grad_norm": 3.686574697494507, "learning_rate": 7.492744200466075e-05, "loss": 0.2189, "step": 1556 }, { "epoch": 1.5871559633027523, "grad_norm": 5.4901018142700195, "learning_rate": 7.476393116521276e-05, "loss": 0.1875, "step": 1557 }, { "epoch": 1.5881753312945972, "grad_norm": 8.475384712219238, "learning_rate": 7.46000686568789e-05, "loss": 0.4654, "step": 1558 }, { "epoch": 1.5891946992864425, "grad_norm": 7.7796759605407715, "learning_rate": 7.443585680664607e-05, "loss": 0.4154, "step": 1559 }, { "epoch": 1.5902140672782874, "grad_norm": 9.805006980895996, "learning_rate": 7.427129794646234e-05, "loss": 0.5811, "step": 1560 }, { "epoch": 1.5912334352701325, "grad_norm": 10.294364929199219, "learning_rate": 7.410639441320339e-05, "loss": 0.8518, "step": 1561 }, { "epoch": 1.5922528032619776, "grad_norm": 2.761817693710327, "learning_rate": 7.39411485486397e-05, "loss": 0.1038, "step": 1562 }, { "epoch": 1.5932721712538225, "grad_norm": 8.296953201293945, "learning_rate": 7.37755626994031e-05, "loss": 0.6343, "step": 1563 }, { "epoch": 1.5942915392456678, "grad_norm": 4.766301155090332, "learning_rate": 7.360963921695344e-05, "loss": 0.1857, "step": 1564 }, { "epoch": 1.5953109072375127, "grad_norm": 5.613104343414307, "learning_rate": 7.34433804575454e-05, "loss": 0.4422, "step": 1565 }, { "epoch": 1.5963302752293578, "grad_norm": 7.4801459312438965, "learning_rate": 7.327678878219467e-05, "loss": 0.4847, "step": 1566 }, { "epoch": 1.5973496432212029, "grad_norm": 5.0312886238098145, "learning_rate": 7.31098665566448e-05, "loss": 0.2852, "step": 1567 }, { "epoch": 1.5983690112130478, "grad_norm": 7.759483337402344, "learning_rate": 7.294261615133333e-05, "loss": 0.5726, "step": 1568 }, { "epoch": 1.5983690112130478, "eval_Qnli-dev-1024_cosine_accuracy": 0.7291666666666666, "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.8175796270370483, "eval_Qnli-dev-1024_cosine_ap": 0.7381325092579727, "eval_Qnli-dev-1024_cosine_f1": 0.6862745098039216, "eval_Qnli-dev-1024_cosine_f1_threshold": 0.72412109375, "eval_Qnli-dev-1024_cosine_mcc": 0.3519703275834634, "eval_Qnli-dev-1024_cosine_precision": 0.6140350877192983, "eval_Qnli-dev-1024_cosine_recall": 0.7777777777777778, "eval_Qnli-dev_cosine_accuracy": 0.7395833333333334, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7137888669967651, "eval_Qnli-dev_cosine_ap": 0.7705408016969871, "eval_Qnli-dev_cosine_f1": 0.7192982456140351, "eval_Qnli-dev_cosine_f1_threshold": 0.621550440788269, "eval_Qnli-dev_cosine_mcc": 0.401886346014753, "eval_Qnli-dev_cosine_precision": 0.5942028985507246, "eval_Qnli-dev_cosine_recall": 0.9111111111111111, "eval_allNLI--triplets-1024_cosine_accuracy": 0.96875, "eval_allNLI-triplets_cosine_accuracy": 0.96875, "eval_global_dataset_loss": 0.35073724389076233, "eval_global_dataset_runtime": 104.0769, "eval_global_dataset_samples_per_second": 7.715, "eval_global_dataset_steps_per_second": 0.163, "eval_sequential_score": 0.96875, "eval_sts-test-1024_pearson_cosine": 0.8847218700205163, "eval_sts-test-1024_spearman_cosine": 0.9125263313234735, "eval_sts-test_pearson_cosine": 0.9057930208914168, "eval_sts-test_spearman_cosine": 0.9222613739605754, "step": 1568 }, { "epoch": 1.599388379204893, "grad_norm": 5.494419574737549, "learning_rate": 7.277503994135835e-05, "loss": 0.2879, "step": 1569 }, { "epoch": 1.600407747196738, "grad_norm": 6.195016860961914, "learning_rate": 7.260714030644449e-05, "loss": 0.2818, "step": 1570 }, { "epoch": 1.601427115188583, "grad_norm": 7.059222221374512, "learning_rate": 7.24389196309094e-05, "loss": 0.7579, "step": 1571 }, { "epoch": 1.6024464831804281, "grad_norm": 9.059467315673828, "learning_rate": 7.227038030362979e-05, "loss": 0.3801, "step": 1572 }, { "epoch": 1.603465851172273, "grad_norm": 6.267899990081787, "learning_rate": 7.210152471800741e-05, "loss": 0.3066, "step": 1573 }, { "epoch": 1.6044852191641183, "grad_norm": 6.997797966003418, "learning_rate": 7.193235527193523e-05, "loss": 0.2659, "step": 1574 }, { "epoch": 1.6055045871559632, "grad_norm": 5.461020469665527, "learning_rate": 7.176287436776333e-05, "loss": 0.3371, "step": 1575 }, { "epoch": 1.6065239551478083, "grad_norm": 2.6381993293762207, "learning_rate": 7.159308441226455e-05, "loss": 0.0823, "step": 1576 }, { "epoch": 1.6075433231396534, "grad_norm": 6.970646858215332, "learning_rate": 7.142298781660082e-05, "loss": 0.4826, "step": 1577 }, { "epoch": 1.6085626911314985, "grad_norm": 9.13263988494873, "learning_rate": 7.12525869962884e-05, "loss": 0.5215, "step": 1578 }, { "epoch": 1.6095820591233436, "grad_norm": 3.3016436100006104, "learning_rate": 7.108188437116394e-05, "loss": 0.2161, "step": 1579 }, { "epoch": 1.6106014271151885, "grad_norm": 9.76569938659668, "learning_rate": 7.091088236534985e-05, "loss": 0.6502, "step": 1580 }, { "epoch": 1.6116207951070336, "grad_norm": 5.767617702484131, "learning_rate": 7.073958340722008e-05, "loss": 0.4971, "step": 1581 }, { "epoch": 1.6126401630988787, "grad_norm": 6.001529216766357, "learning_rate": 7.056798992936555e-05, "loss": 0.4015, "step": 1582 }, { "epoch": 1.6136595310907238, "grad_norm": 6.167148590087891, "learning_rate": 7.039610436855957e-05, "loss": 0.2794, "step": 1583 }, { "epoch": 1.614678899082569, "grad_norm": 8.581472396850586, "learning_rate": 7.022392916572336e-05, "loss": 0.9008, "step": 1584 }, { "epoch": 1.6156982670744138, "grad_norm": 4.615275859832764, "learning_rate": 7.005146676589118e-05, "loss": 0.2382, "step": 1585 }, { "epoch": 1.6167176350662589, "grad_norm": 7.006135940551758, "learning_rate": 6.987871961817581e-05, "loss": 0.3115, "step": 1586 }, { "epoch": 1.617737003058104, "grad_norm": 10.424875259399414, "learning_rate": 6.970569017573371e-05, "loss": 0.7609, "step": 1587 }, { "epoch": 1.618756371049949, "grad_norm": 8.667938232421875, "learning_rate": 6.953238089573012e-05, "loss": 0.475, "step": 1588 }, { "epoch": 1.6197757390417942, "grad_norm": 8.349859237670898, "learning_rate": 6.935879423930426e-05, "loss": 0.714, "step": 1589 }, { "epoch": 1.620795107033639, "grad_norm": 3.482165575027466, "learning_rate": 6.918493267153424e-05, "loss": 0.2345, "step": 1590 }, { "epoch": 1.6218144750254841, "grad_norm": 5.844244003295898, "learning_rate": 6.901079866140222e-05, "loss": 0.3892, "step": 1591 }, { "epoch": 1.6228338430173292, "grad_norm": 6.793012619018555, "learning_rate": 6.883639468175927e-05, "loss": 0.3771, "step": 1592 }, { "epoch": 1.6238532110091743, "grad_norm": 5.695591449737549, "learning_rate": 6.866172320929022e-05, "loss": 0.4352, "step": 1593 }, { "epoch": 1.6248725790010194, "grad_norm": 8.875423431396484, "learning_rate": 6.848678672447863e-05, "loss": 1.0865, "step": 1594 }, { "epoch": 1.6258919469928643, "grad_norm": 4.769944190979004, "learning_rate": 6.831158771157124e-05, "loss": 0.3441, "step": 1595 }, { "epoch": 1.6269113149847096, "grad_norm": 9.363751411437988, "learning_rate": 6.81361286585432e-05, "loss": 0.8453, "step": 1596 }, { "epoch": 1.6279306829765545, "grad_norm": 5.906173229217529, "learning_rate": 6.796041205706216e-05, "loss": 0.2858, "step": 1597 }, { "epoch": 1.6289500509683996, "grad_norm": 6.232714653015137, "learning_rate": 6.778444040245345e-05, "loss": 0.5172, "step": 1598 }, { "epoch": 1.6299694189602447, "grad_norm": 6.986159801483154, "learning_rate": 6.760821619366415e-05, "loss": 0.2466, "step": 1599 }, { "epoch": 1.6309887869520896, "grad_norm": 4.346200942993164, "learning_rate": 6.743174193322796e-05, "loss": 0.2753, "step": 1600 }, { "epoch": 1.632008154943935, "grad_norm": 6.619315147399902, "learning_rate": 6.725502012722948e-05, "loss": 0.6937, "step": 1601 }, { "epoch": 1.6330275229357798, "grad_norm": 5.2573723793029785, "learning_rate": 6.707805328526864e-05, "loss": 0.4227, "step": 1602 }, { "epoch": 1.634046890927625, "grad_norm": 10.570611000061035, "learning_rate": 6.690084392042513e-05, "loss": 0.7107, "step": 1603 }, { "epoch": 1.63506625891947, "grad_norm": 5.327670574188232, "learning_rate": 6.67233945492227e-05, "loss": 0.4683, "step": 1604 }, { "epoch": 1.6360856269113149, "grad_norm": 7.018125057220459, "learning_rate": 6.654570769159328e-05, "loss": 0.3031, "step": 1605 }, { "epoch": 1.6371049949031602, "grad_norm": 9.383675575256348, "learning_rate": 6.636778587084142e-05, "loss": 0.4622, "step": 1606 }, { "epoch": 1.638124362895005, "grad_norm": 3.507168769836426, "learning_rate": 6.618963161360832e-05, "loss": 0.145, "step": 1607 }, { "epoch": 1.6391437308868502, "grad_norm": 8.523704528808594, "learning_rate": 6.601124744983596e-05, "loss": 0.3465, "step": 1608 }, { "epoch": 1.6401630988786953, "grad_norm": 6.723085880279541, "learning_rate": 6.583263591273121e-05, "loss": 0.508, "step": 1609 }, { "epoch": 1.6411824668705401, "grad_norm": 5.034709453582764, "learning_rate": 6.565379953872977e-05, "loss": 0.205, "step": 1610 }, { "epoch": 1.6422018348623855, "grad_norm": 10.521743774414062, "learning_rate": 6.547474086746028e-05, "loss": 1.0824, "step": 1611 }, { "epoch": 1.6432212028542303, "grad_norm": 8.010995864868164, "learning_rate": 6.529546244170818e-05, "loss": 0.4191, "step": 1612 }, { "epoch": 1.6442405708460754, "grad_norm": 5.811416149139404, "learning_rate": 6.51159668073796e-05, "loss": 0.4591, "step": 1613 }, { "epoch": 1.6452599388379205, "grad_norm": 3.7815189361572266, "learning_rate": 6.493625651346523e-05, "loss": 0.3046, "step": 1614 }, { "epoch": 1.6462793068297654, "grad_norm": 3.966926097869873, "learning_rate": 6.475633411200414e-05, "loss": 0.2447, "step": 1615 }, { "epoch": 1.6472986748216107, "grad_norm": 5.435057640075684, "learning_rate": 6.457620215804734e-05, "loss": 0.2797, "step": 1616 }, { "epoch": 1.6483180428134556, "grad_norm": 11.412252426147461, "learning_rate": 6.439586320962194e-05, "loss": 1.2793, "step": 1617 }, { "epoch": 1.6493374108053007, "grad_norm": 8.497121810913086, "learning_rate": 6.421531982769427e-05, "loss": 0.4026, "step": 1618 }, { "epoch": 1.6503567787971458, "grad_norm": 10.641962051391602, "learning_rate": 6.403457457613404e-05, "loss": 0.6297, "step": 1619 }, { "epoch": 1.6513761467889907, "grad_norm": 9.194568634033203, "learning_rate": 6.385363002167746e-05, "loss": 0.9285, "step": 1620 }, { "epoch": 1.652395514780836, "grad_norm": 4.320868968963623, "learning_rate": 6.367248873389115e-05, "loss": 0.327, "step": 1621 }, { "epoch": 1.653414882772681, "grad_norm": 5.681830883026123, "learning_rate": 6.349115328513545e-05, "loss": 0.3873, "step": 1622 }, { "epoch": 1.654434250764526, "grad_norm": 3.3009400367736816, "learning_rate": 6.330962625052798e-05, "loss": 0.2392, "step": 1623 }, { "epoch": 1.655453618756371, "grad_norm": 7.098931312561035, "learning_rate": 6.312791020790709e-05, "loss": 0.4528, "step": 1624 }, { "epoch": 1.656472986748216, "grad_norm": 3.843114137649536, "learning_rate": 6.294600773779504e-05, "loss": 0.1439, "step": 1625 }, { "epoch": 1.6574923547400613, "grad_norm": 7.354673862457275, "learning_rate": 6.276392142336168e-05, "loss": 0.4292, "step": 1626 }, { "epoch": 1.6585117227319062, "grad_norm": 5.355445861816406, "learning_rate": 6.258165385038755e-05, "loss": 0.1736, "step": 1627 }, { "epoch": 1.6595310907237513, "grad_norm": 5.2437849044799805, "learning_rate": 6.239920760722722e-05, "loss": 0.5714, "step": 1628 }, { "epoch": 1.6605504587155964, "grad_norm": 3.639320135116577, "learning_rate": 6.221658528477255e-05, "loss": 0.2066, "step": 1629 }, { "epoch": 1.6615698267074412, "grad_norm": 6.4522247314453125, "learning_rate": 6.203378947641581e-05, "loss": 0.3611, "step": 1630 }, { "epoch": 1.6625891946992866, "grad_norm": 6.9241461753845215, "learning_rate": 6.185082277801294e-05, "loss": 0.3273, "step": 1631 }, { "epoch": 1.6636085626911314, "grad_norm": 5.387557029724121, "learning_rate": 6.166768778784673e-05, "loss": 0.1974, "step": 1632 }, { "epoch": 1.6646279306829765, "grad_norm": 8.088689804077148, "learning_rate": 6.148438710658978e-05, "loss": 0.3644, "step": 1633 }, { "epoch": 1.6656472986748216, "grad_norm": 9.129353523254395, "learning_rate": 6.130092333726773e-05, "loss": 0.4186, "step": 1634 }, { "epoch": 1.6666666666666665, "grad_norm": 6.403126239776611, "learning_rate": 6.111729908522203e-05, "loss": 0.4829, "step": 1635 }, { "epoch": 1.6676860346585118, "grad_norm": 7.876978874206543, "learning_rate": 6.093351695807339e-05, "loss": 0.3506, "step": 1636 }, { "epoch": 1.6687054026503567, "grad_norm": 5.055951118469238, "learning_rate": 6.074957956568421e-05, "loss": 0.2133, "step": 1637 }, { "epoch": 1.6697247706422018, "grad_norm": 8.396081924438477, "learning_rate": 6.056548952012204e-05, "loss": 0.4685, "step": 1638 }, { "epoch": 1.670744138634047, "grad_norm": 6.602619171142578, "learning_rate": 6.038124943562199e-05, "loss": 0.3484, "step": 1639 }, { "epoch": 1.6717635066258918, "grad_norm": 8.266304016113281, "learning_rate": 6.019686192855002e-05, "loss": 0.6875, "step": 1640 }, { "epoch": 1.6727828746177371, "grad_norm": 4.310380935668945, "learning_rate": 6.001232961736555e-05, "loss": 0.171, "step": 1641 }, { "epoch": 1.673802242609582, "grad_norm": 6.7401652336120605, "learning_rate": 5.982765512258437e-05, "loss": 0.4644, "step": 1642 }, { "epoch": 1.674821610601427, "grad_norm": 5.405831336975098, "learning_rate": 5.9642841066741415e-05, "loss": 0.2419, "step": 1643 }, { "epoch": 1.6758409785932722, "grad_norm": 5.0712714195251465, "learning_rate": 5.9457890074353404e-05, "loss": 0.2566, "step": 1644 }, { "epoch": 1.676860346585117, "grad_norm": 3.400216579437256, "learning_rate": 5.9272804771881776e-05, "loss": 0.125, "step": 1645 }, { "epoch": 1.6778797145769624, "grad_norm": 8.119034767150879, "learning_rate": 5.9087587787695244e-05, "loss": 0.5321, "step": 1646 }, { "epoch": 1.6788990825688073, "grad_norm": 5.170970916748047, "learning_rate": 5.8902241752032536e-05, "loss": 0.3017, "step": 1647 }, { "epoch": 1.6799184505606524, "grad_norm": 8.949460983276367, "learning_rate": 5.871676929696506e-05, "loss": 0.7024, "step": 1648 }, { "epoch": 1.6809378185524975, "grad_norm": 7.251186370849609, "learning_rate": 5.853117305635932e-05, "loss": 0.4489, "step": 1649 }, { "epoch": 1.6819571865443423, "grad_norm": 7.259670734405518, "learning_rate": 5.834545566583986e-05, "loss": 0.3247, "step": 1650 }, { "epoch": 1.6829765545361877, "grad_norm": 10.1055326461792, "learning_rate": 5.815961976275158e-05, "loss": 0.6301, "step": 1651 }, { "epoch": 1.6839959225280325, "grad_norm": 8.810796737670898, "learning_rate": 5.797366798612237e-05, "loss": 0.52, "step": 1652 }, { "epoch": 1.6850152905198776, "grad_norm": 7.598231315612793, "learning_rate": 5.778760297662567e-05, "loss": 0.7236, "step": 1653 }, { "epoch": 1.6860346585117227, "grad_norm": 3.1995418071746826, "learning_rate": 5.760142737654275e-05, "loss": 0.146, "step": 1654 }, { "epoch": 1.6870540265035678, "grad_norm": 10.069400787353516, "learning_rate": 5.7415143829725634e-05, "loss": 0.649, "step": 1655 }, { "epoch": 1.688073394495413, "grad_norm": 7.260120391845703, "learning_rate": 5.722875498155901e-05, "loss": 0.7242, "step": 1656 }, { "epoch": 1.6890927624872578, "grad_norm": 6.980922698974609, "learning_rate": 5.704226347892319e-05, "loss": 0.3496, "step": 1657 }, { "epoch": 1.690112130479103, "grad_norm": 8.837504386901855, "learning_rate": 5.6855671970156e-05, "loss": 0.9688, "step": 1658 }, { "epoch": 1.691131498470948, "grad_norm": 7.200884819030762, "learning_rate": 5.6668983105015635e-05, "loss": 0.7482, "step": 1659 }, { "epoch": 1.6921508664627931, "grad_norm": 6.389528274536133, "learning_rate": 5.6482199534642775e-05, "loss": 0.2365, "step": 1660 }, { "epoch": 1.6931702344546382, "grad_norm": 1.9209339618682861, "learning_rate": 5.629532391152298e-05, "loss": 0.1193, "step": 1661 }, { "epoch": 1.694189602446483, "grad_norm": 11.277609825134277, "learning_rate": 5.6108358889449055e-05, "loss": 0.864, "step": 1662 }, { "epoch": 1.6952089704383282, "grad_norm": 8.00525188446045, "learning_rate": 5.5921307123483365e-05, "loss": 0.5214, "step": 1663 }, { "epoch": 1.6962283384301733, "grad_norm": 8.152433395385742, "learning_rate": 5.573417126992003e-05, "loss": 0.5691, "step": 1664 }, { "epoch": 1.6972477064220184, "grad_norm": 6.9097490310668945, "learning_rate": 5.5546953986247366e-05, "loss": 0.2645, "step": 1665 }, { "epoch": 1.6982670744138635, "grad_norm": 3.46396541595459, "learning_rate": 5.535965793111004e-05, "loss": 0.1646, "step": 1666 }, { "epoch": 1.6982670744138635, "eval_Qnli-dev-1024_cosine_accuracy": 0.71875, "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.7981840372085571, "eval_Qnli-dev-1024_cosine_ap": 0.7241242792005642, "eval_Qnli-dev-1024_cosine_f1": 0.7200000000000001, "eval_Qnli-dev-1024_cosine_f1_threshold": 0.7665559649467468, "eval_Qnli-dev-1024_cosine_mcc": 0.43122545523632066, "eval_Qnli-dev-1024_cosine_precision": 0.6545454545454545, "eval_Qnli-dev-1024_cosine_recall": 0.8, "eval_Qnli-dev_cosine_accuracy": 0.7395833333333334, "eval_Qnli-dev_cosine_accuracy_threshold": 0.6936626434326172, "eval_Qnli-dev_cosine_ap": 0.7608245383505858, "eval_Qnli-dev_cosine_f1": 0.7454545454545455, "eval_Qnli-dev_cosine_f1_threshold": 0.6196604371070862, "eval_Qnli-dev_cosine_mcc": 0.47013467657639685, "eval_Qnli-dev_cosine_precision": 0.6307692307692307, "eval_Qnli-dev_cosine_recall": 0.9111111111111111, "eval_allNLI--triplets-1024_cosine_accuracy": 0.9583333134651184, "eval_allNLI-triplets_cosine_accuracy": 0.96875, "eval_global_dataset_loss": 0.35253724455833435, "eval_global_dataset_runtime": 104.1307, "eval_global_dataset_samples_per_second": 7.711, "eval_global_dataset_steps_per_second": 0.163, "eval_sequential_score": 0.9583333134651184, "eval_sts-test-1024_pearson_cosine": 0.8808346155517819, "eval_sts-test-1024_spearman_cosine": 0.9112034670517186, "eval_sts-test_pearson_cosine": 0.9053504773307102, "eval_sts-test_spearman_cosine": 0.9205147911840388, "step": 1666 }, { "epoch": 1.6992864424057084, "grad_norm": 4.240916728973389, "learning_rate": 5.517228576427137e-05, "loss": 0.1995, "step": 1667 }, { "epoch": 1.7003058103975535, "grad_norm": 3.9136712551116943, "learning_rate": 5.49848401465755e-05, "loss": 0.2672, "step": 1668 }, { "epoch": 1.7013251783893986, "grad_norm": 7.760051727294922, "learning_rate": 5.479732373990958e-05, "loss": 0.3449, "step": 1669 }, { "epoch": 1.7023445463812437, "grad_norm": 5.199024677276611, "learning_rate": 5.46097392071661e-05, "loss": 0.4137, "step": 1670 }, { "epoch": 1.7033639143730888, "grad_norm": 5.0411906242370605, "learning_rate": 5.4422089212204994e-05, "loss": 0.2984, "step": 1671 }, { "epoch": 1.7043832823649336, "grad_norm": 2.470310688018799, "learning_rate": 5.4234376419815805e-05, "loss": 0.1743, "step": 1672 }, { "epoch": 1.705402650356779, "grad_norm": 5.406039714813232, "learning_rate": 5.4046603495679825e-05, "loss": 0.2739, "step": 1673 }, { "epoch": 1.7064220183486238, "grad_norm": 4.936440944671631, "learning_rate": 5.385877310633234e-05, "loss": 0.1755, "step": 1674 }, { "epoch": 1.707441386340469, "grad_norm": 8.654784202575684, "learning_rate": 5.367088791912454e-05, "loss": 0.5298, "step": 1675 }, { "epoch": 1.708460754332314, "grad_norm": 6.0031938552856445, "learning_rate": 5.348295060218603e-05, "loss": 0.3842, "step": 1676 }, { "epoch": 1.709480122324159, "grad_norm": 2.8984975814819336, "learning_rate": 5.329496382438641e-05, "loss": 0.1637, "step": 1677 }, { "epoch": 1.7104994903160042, "grad_norm": 7.161764144897461, "learning_rate": 5.310693025529797e-05, "loss": 0.6203, "step": 1678 }, { "epoch": 1.7115188583078491, "grad_norm": 3.71823787689209, "learning_rate": 5.2918852565157216e-05, "loss": 0.1549, "step": 1679 }, { "epoch": 1.7125382262996942, "grad_norm": 10.297740936279297, "learning_rate": 5.273073342482736e-05, "loss": 0.4458, "step": 1680 }, { "epoch": 1.7135575942915393, "grad_norm": 6.423152923583984, "learning_rate": 5.254257550576021e-05, "loss": 0.4041, "step": 1681 }, { "epoch": 1.7145769622833842, "grad_norm": 6.07492733001709, "learning_rate": 5.235438147995824e-05, "loss": 0.3155, "step": 1682 }, { "epoch": 1.7155963302752295, "grad_norm": 4.876982688903809, "learning_rate": 5.216615401993674e-05, "loss": 0.1794, "step": 1683 }, { "epoch": 1.7166156982670744, "grad_norm": 8.693746566772461, "learning_rate": 5.1977895798685664e-05, "loss": 0.6088, "step": 1684 }, { "epoch": 1.7176350662589195, "grad_norm": 8.894753456115723, "learning_rate": 5.178960948963191e-05, "loss": 0.3606, "step": 1685 }, { "epoch": 1.7186544342507646, "grad_norm": 3.4491775035858154, "learning_rate": 5.160129776660123e-05, "loss": 0.1251, "step": 1686 }, { "epoch": 1.7196738022426095, "grad_norm": 5.475042819976807, "learning_rate": 5.141296330378025e-05, "loss": 0.2667, "step": 1687 }, { "epoch": 1.7206931702344548, "grad_norm": 6.056704998016357, "learning_rate": 5.122460877567857e-05, "loss": 0.4352, "step": 1688 }, { "epoch": 1.7217125382262997, "grad_norm": 10.229000091552734, "learning_rate": 5.103623685709063e-05, "loss": 0.6197, "step": 1689 }, { "epoch": 1.7227319062181448, "grad_norm": 5.402790546417236, "learning_rate": 5.0847850223057936e-05, "loss": 0.1889, "step": 1690 }, { "epoch": 1.7237512742099899, "grad_norm": 2.1127982139587402, "learning_rate": 5.0659451548830917e-05, "loss": 0.1342, "step": 1691 }, { "epoch": 1.7247706422018347, "grad_norm": 4.332535743713379, "learning_rate": 5.0471043509831e-05, "loss": 0.2149, "step": 1692 }, { "epoch": 1.72579001019368, "grad_norm": 8.170999526977539, "learning_rate": 5.028262878161262e-05, "loss": 0.5404, "step": 1693 }, { "epoch": 1.726809378185525, "grad_norm": 3.3618481159210205, "learning_rate": 5.009421003982508e-05, "loss": 0.1522, "step": 1694 }, { "epoch": 1.72782874617737, "grad_norm": 5.414154529571533, "learning_rate": 4.990578996017493e-05, "loss": 0.2736, "step": 1695 }, { "epoch": 1.7288481141692151, "grad_norm": 4.799069881439209, "learning_rate": 4.971737121838739e-05, "loss": 0.3062, "step": 1696 }, { "epoch": 1.72986748216106, "grad_norm": 5.350613594055176, "learning_rate": 4.952895649016901e-05, "loss": 0.4809, "step": 1697 }, { "epoch": 1.7308868501529053, "grad_norm": 3.9708356857299805, "learning_rate": 4.934054845116906e-05, "loss": 0.3985, "step": 1698 }, { "epoch": 1.7319062181447502, "grad_norm": 6.353355407714844, "learning_rate": 4.9152149776942076e-05, "loss": 0.3152, "step": 1699 }, { "epoch": 1.7329255861365953, "grad_norm": 9.268322944641113, "learning_rate": 4.896376314290941e-05, "loss": 0.6119, "step": 1700 }, { "epoch": 1.7339449541284404, "grad_norm": 5.95634651184082, "learning_rate": 4.877539122432147e-05, "loss": 0.3004, "step": 1701 }, { "epoch": 1.7349643221202853, "grad_norm": 9.091329574584961, "learning_rate": 4.8587036696219754e-05, "loss": 0.7715, "step": 1702 }, { "epoch": 1.7359836901121306, "grad_norm": 7.045917987823486, "learning_rate": 4.839870223339878e-05, "loss": 0.3693, "step": 1703 }, { "epoch": 1.7370030581039755, "grad_norm": 6.63254976272583, "learning_rate": 4.82103905103681e-05, "loss": 0.546, "step": 1704 }, { "epoch": 1.7380224260958206, "grad_norm": 8.315082550048828, "learning_rate": 4.8022104201314354e-05, "loss": 0.6736, "step": 1705 }, { "epoch": 1.7390417940876657, "grad_norm": 5.275691986083984, "learning_rate": 4.783384598006327e-05, "loss": 0.2598, "step": 1706 }, { "epoch": 1.7400611620795106, "grad_norm": 3.820236921310425, "learning_rate": 4.764561852004173e-05, "loss": 0.1398, "step": 1707 }, { "epoch": 1.7410805300713559, "grad_norm": 7.040982723236084, "learning_rate": 4.745742449423977e-05, "loss": 0.379, "step": 1708 }, { "epoch": 1.7420998980632008, "grad_norm": 6.6788482666015625, "learning_rate": 4.726926657517262e-05, "loss": 0.5142, "step": 1709 }, { "epoch": 1.7431192660550459, "grad_norm": 6.863526344299316, "learning_rate": 4.708114743484282e-05, "loss": 0.3975, "step": 1710 }, { "epoch": 1.744138634046891, "grad_norm": 8.351316452026367, "learning_rate": 4.6893069744702045e-05, "loss": 0.3153, "step": 1711 }, { "epoch": 1.7451580020387358, "grad_norm": 10.181187629699707, "learning_rate": 4.6705036175613606e-05, "loss": 0.8402, "step": 1712 }, { "epoch": 1.7461773700305812, "grad_norm": 6.891512393951416, "learning_rate": 4.651704939781398e-05, "loss": 0.4639, "step": 1713 }, { "epoch": 1.747196738022426, "grad_norm": 8.785947799682617, "learning_rate": 4.6329112080875474e-05, "loss": 0.5627, "step": 1714 }, { "epoch": 1.7482161060142711, "grad_norm": 7.0134196281433105, "learning_rate": 4.614122689366767e-05, "loss": 0.4902, "step": 1715 }, { "epoch": 1.7492354740061162, "grad_norm": 8.736035346984863, "learning_rate": 4.5953396504320186e-05, "loss": 0.6778, "step": 1716 }, { "epoch": 1.750254841997961, "grad_norm": 3.7078139781951904, "learning_rate": 4.576562358018418e-05, "loss": 0.4115, "step": 1717 }, { "epoch": 1.7512742099898064, "grad_norm": 2.6372509002685547, "learning_rate": 4.557791078779502e-05, "loss": 0.1016, "step": 1718 }, { "epoch": 1.7522935779816513, "grad_norm": 4.356749057769775, "learning_rate": 4.539026079283388e-05, "loss": 0.2225, "step": 1719 }, { "epoch": 1.7533129459734964, "grad_norm": 5.2822418212890625, "learning_rate": 4.520267626009047e-05, "loss": 0.3715, "step": 1720 }, { "epoch": 1.7543323139653415, "grad_norm": 9.365150451660156, "learning_rate": 4.5015159853424546e-05, "loss": 0.7597, "step": 1721 }, { "epoch": 1.7553516819571864, "grad_norm": 6.232312202453613, "learning_rate": 4.4827714235728635e-05, "loss": 0.2513, "step": 1722 }, { "epoch": 1.7563710499490317, "grad_norm": 7.213202476501465, "learning_rate": 4.4640342068889964e-05, "loss": 0.6002, "step": 1723 }, { "epoch": 1.7573904179408766, "grad_norm": 4.853435039520264, "learning_rate": 4.445304601375264e-05, "loss": 0.1624, "step": 1724 }, { "epoch": 1.7584097859327217, "grad_norm": 4.7718892097473145, "learning_rate": 4.4265828730079987e-05, "loss": 0.286, "step": 1725 }, { "epoch": 1.7594291539245668, "grad_norm": 4.1447038650512695, "learning_rate": 4.407869287651664e-05, "loss": 0.206, "step": 1726 }, { "epoch": 1.7604485219164119, "grad_norm": 5.51598596572876, "learning_rate": 4.389164111055092e-05, "loss": 0.211, "step": 1727 }, { "epoch": 1.761467889908257, "grad_norm": 8.03819751739502, "learning_rate": 4.370467608847699e-05, "loss": 0.3425, "step": 1728 }, { "epoch": 1.7624872579001019, "grad_norm": 6.400393962860107, "learning_rate": 4.3517800465357264e-05, "loss": 0.5059, "step": 1729 }, { "epoch": 1.763506625891947, "grad_norm": 9.561805725097656, "learning_rate": 4.333101689498437e-05, "loss": 0.5656, "step": 1730 }, { "epoch": 1.764525993883792, "grad_norm": 8.619925498962402, "learning_rate": 4.314432802984406e-05, "loss": 0.4599, "step": 1731 }, { "epoch": 1.7655453618756372, "grad_norm": 3.827829122543335, "learning_rate": 4.295773652107683e-05, "loss": 0.2513, "step": 1732 }, { "epoch": 1.7665647298674823, "grad_norm": 8.728647232055664, "learning_rate": 4.2771245018441e-05, "loss": 0.5087, "step": 1733 }, { "epoch": 1.7675840978593271, "grad_norm": 6.656282424926758, "learning_rate": 4.258485617027437e-05, "loss": 0.4029, "step": 1734 }, { "epoch": 1.7686034658511722, "grad_norm": 3.345684051513672, "learning_rate": 4.239857262345726e-05, "loss": 0.1634, "step": 1735 }, { "epoch": 1.7696228338430173, "grad_norm": 4.737642765045166, "learning_rate": 4.221239702337434e-05, "loss": 0.3575, "step": 1736 }, { "epoch": 1.7706422018348624, "grad_norm": 5.47233772277832, "learning_rate": 4.2026332013877634e-05, "loss": 0.2304, "step": 1737 }, { "epoch": 1.7716615698267075, "grad_norm": 4.983118534088135, "learning_rate": 4.18403802372484e-05, "loss": 0.2056, "step": 1738 }, { "epoch": 1.7726809378185524, "grad_norm": 7.349947452545166, "learning_rate": 4.165454433416018e-05, "loss": 0.5025, "step": 1739 }, { "epoch": 1.7737003058103975, "grad_norm": 5.841440200805664, "learning_rate": 4.1468826943640724e-05, "loss": 0.355, "step": 1740 }, { "epoch": 1.7747196738022426, "grad_norm": 3.5881755352020264, "learning_rate": 4.128323070303499e-05, "loss": 0.2275, "step": 1741 }, { "epoch": 1.7757390417940877, "grad_norm": 5.0039963722229, "learning_rate": 4.109775824796747e-05, "loss": 0.2257, "step": 1742 }, { "epoch": 1.7767584097859328, "grad_norm": 5.417962074279785, "learning_rate": 4.091241221230476e-05, "loss": 0.2303, "step": 1743 }, { "epoch": 1.7777777777777777, "grad_norm": 6.626123428344727, "learning_rate": 4.072719522811824e-05, "loss": 0.5247, "step": 1744 }, { "epoch": 1.7787971457696228, "grad_norm": 4.882989406585693, "learning_rate": 4.054210992564661e-05, "loss": 0.2173, "step": 1745 }, { "epoch": 1.7798165137614679, "grad_norm": 4.634632110595703, "learning_rate": 4.0357158933258596e-05, "loss": 0.4443, "step": 1746 }, { "epoch": 1.780835881753313, "grad_norm": 6.226490020751953, "learning_rate": 4.017234487741561e-05, "loss": 0.569, "step": 1747 }, { "epoch": 1.781855249745158, "grad_norm": 6.3392767906188965, "learning_rate": 3.998767038263442e-05, "loss": 0.3075, "step": 1748 }, { "epoch": 1.782874617737003, "grad_norm": 4.992177963256836, "learning_rate": 3.9803138071449996e-05, "loss": 0.3494, "step": 1749 }, { "epoch": 1.7838939857288483, "grad_norm": 7.793630599975586, "learning_rate": 3.9618750564378064e-05, "loss": 0.4004, "step": 1750 }, { "epoch": 1.7849133537206932, "grad_norm": 4.538532733917236, "learning_rate": 3.9434510479877975e-05, "loss": 0.1982, "step": 1751 }, { "epoch": 1.7859327217125383, "grad_norm": 7.877995491027832, "learning_rate": 3.9250420434315806e-05, "loss": 0.4556, "step": 1752 }, { "epoch": 1.7869520897043834, "grad_norm": 6.7634053230285645, "learning_rate": 3.9066483041926616e-05, "loss": 0.4251, "step": 1753 }, { "epoch": 1.7879714576962282, "grad_norm": 3.238656759262085, "learning_rate": 3.888270091477798e-05, "loss": 0.1181, "step": 1754 }, { "epoch": 1.7889908256880735, "grad_norm": 9.58579158782959, "learning_rate": 3.8699076662732284e-05, "loss": 0.5456, "step": 1755 }, { "epoch": 1.7900101936799184, "grad_norm": 6.472123622894287, "learning_rate": 3.851561289341023e-05, "loss": 0.379, "step": 1756 }, { "epoch": 1.7910295616717635, "grad_norm": 4.2044172286987305, "learning_rate": 3.833231221215325e-05, "loss": 0.2473, "step": 1757 }, { "epoch": 1.7920489296636086, "grad_norm": 3.803764581680298, "learning_rate": 3.814917722198707e-05, "loss": 0.2603, "step": 1758 }, { "epoch": 1.7930682976554535, "grad_norm": 7.514235973358154, "learning_rate": 3.7966210523584245e-05, "loss": 0.5655, "step": 1759 }, { "epoch": 1.7940876656472988, "grad_norm": 7.211087226867676, "learning_rate": 3.778341471522749e-05, "loss": 0.5536, "step": 1760 }, { "epoch": 1.7951070336391437, "grad_norm": 8.897974014282227, "learning_rate": 3.7600792392772795e-05, "loss": 0.645, "step": 1761 }, { "epoch": 1.7961264016309888, "grad_norm": 7.907827377319336, "learning_rate": 3.741834614961246e-05, "loss": 0.4652, "step": 1762 }, { "epoch": 1.797145769622834, "grad_norm": 5.5902791023254395, "learning_rate": 3.7236078576638334e-05, "loss": 0.4776, "step": 1763 }, { "epoch": 1.7981651376146788, "grad_norm": 3.502904176712036, "learning_rate": 3.705399226220497e-05, "loss": 0.1951, "step": 1764 }, { "epoch": 1.7981651376146788, "eval_Qnli-dev-1024_cosine_accuracy": 0.7291666666666666, "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.8075562715530396, "eval_Qnli-dev-1024_cosine_ap": 0.749513188759678, "eval_Qnli-dev-1024_cosine_f1": 0.7272727272727272, "eval_Qnli-dev-1024_cosine_f1_threshold": 0.7558927536010742, "eval_Qnli-dev-1024_cosine_mcc": 0.4497120149145933, "eval_Qnli-dev-1024_cosine_precision": 0.6666666666666666, "eval_Qnli-dev-1024_cosine_recall": 0.8, "eval_Qnli-dev_cosine_accuracy": 0.7291666666666666, "eval_Qnli-dev_cosine_accuracy_threshold": 0.6980013251304626, "eval_Qnli-dev_cosine_ap": 0.7628031195186381, "eval_Qnli-dev_cosine_f1": 0.7500000000000001, "eval_Qnli-dev_cosine_f1_threshold": 0.636489748954773, "eval_Qnli-dev_cosine_mcc": 0.48653004754089046, "eval_Qnli-dev_cosine_precision": 0.6610169491525424, "eval_Qnli-dev_cosine_recall": 0.8666666666666667, "eval_allNLI--triplets-1024_cosine_accuracy": 0.9479166865348816, "eval_allNLI-triplets_cosine_accuracy": 0.96875, "eval_global_dataset_loss": 0.28949180245399475, "eval_global_dataset_runtime": 103.9623, "eval_global_dataset_samples_per_second": 7.724, "eval_global_dataset_steps_per_second": 0.164, "eval_sequential_score": 0.9479166865348816, "eval_sts-test-1024_pearson_cosine": 0.8750724336289886, "eval_sts-test-1024_spearman_cosine": 0.9060820187994599, "eval_sts-test_pearson_cosine": 0.9050891583995959, "eval_sts-test_spearman_cosine": 0.9208979900112706, "step": 1764 }, { "epoch": 1.799184505606524, "grad_norm": 5.523859977722168, "learning_rate": 3.6872089792092925e-05, "loss": 0.2023, "step": 1765 }, { "epoch": 1.800203873598369, "grad_norm": 7.787282943725586, "learning_rate": 3.669037374947199e-05, "loss": 0.6349, "step": 1766 }, { "epoch": 1.801223241590214, "grad_norm": 4.277161121368408, "learning_rate": 3.650884671486453e-05, "loss": 0.2479, "step": 1767 }, { "epoch": 1.8022426095820592, "grad_norm": 11.096944808959961, "learning_rate": 3.6327511266108835e-05, "loss": 0.7766, "step": 1768 }, { "epoch": 1.803261977573904, "grad_norm": 5.538233280181885, "learning_rate": 3.614636997832259e-05, "loss": 0.2044, "step": 1769 }, { "epoch": 1.8042813455657494, "grad_norm": 3.1658334732055664, "learning_rate": 3.596542542386597e-05, "loss": 0.2323, "step": 1770 }, { "epoch": 1.8053007135575942, "grad_norm": 4.729072570800781, "learning_rate": 3.578468017230575e-05, "loss": 0.242, "step": 1771 }, { "epoch": 1.8063200815494393, "grad_norm": 6.442706108093262, "learning_rate": 3.560413679037807e-05, "loss": 0.2719, "step": 1772 }, { "epoch": 1.8073394495412844, "grad_norm": 7.747926712036133, "learning_rate": 3.542379784195267e-05, "loss": 0.2921, "step": 1773 }, { "epoch": 1.8083588175331293, "grad_norm": 5.683032512664795, "learning_rate": 3.524366588799588e-05, "loss": 0.3284, "step": 1774 }, { "epoch": 1.8093781855249746, "grad_norm": 11.293792724609375, "learning_rate": 3.5063743486534775e-05, "loss": 1.0605, "step": 1775 }, { "epoch": 1.8103975535168195, "grad_norm": 5.910725116729736, "learning_rate": 3.488403319262037e-05, "loss": 0.291, "step": 1776 }, { "epoch": 1.8114169215086646, "grad_norm": 4.038328170776367, "learning_rate": 3.470453755829183e-05, "loss": 0.2033, "step": 1777 }, { "epoch": 1.8124362895005097, "grad_norm": 2.819408655166626, "learning_rate": 3.45252591325397e-05, "loss": 0.2328, "step": 1778 }, { "epoch": 1.8134556574923546, "grad_norm": 5.704439640045166, "learning_rate": 3.4346200461270284e-05, "loss": 0.4924, "step": 1779 }, { "epoch": 1.8144750254842, "grad_norm": 6.023610591888428, "learning_rate": 3.416736408726884e-05, "loss": 0.3924, "step": 1780 }, { "epoch": 1.8154943934760448, "grad_norm": 7.2247490882873535, "learning_rate": 3.398875255016405e-05, "loss": 0.6327, "step": 1781 }, { "epoch": 1.81651376146789, "grad_norm": 2.289973735809326, "learning_rate": 3.381036838639169e-05, "loss": 0.1877, "step": 1782 }, { "epoch": 1.817533129459735, "grad_norm": 1.9493658542633057, "learning_rate": 3.363221412915858e-05, "loss": 0.1135, "step": 1783 }, { "epoch": 1.8185524974515799, "grad_norm": 4.736155986785889, "learning_rate": 3.345429230840672e-05, "loss": 0.3404, "step": 1784 }, { "epoch": 1.8195718654434252, "grad_norm": 6.019724369049072, "learning_rate": 3.327660545077731e-05, "loss": 0.2669, "step": 1785 }, { "epoch": 1.82059123343527, "grad_norm": 9.922126770019531, "learning_rate": 3.309915607957485e-05, "loss": 0.5143, "step": 1786 }, { "epoch": 1.8216106014271152, "grad_norm": 5.188144683837891, "learning_rate": 3.292194671473135e-05, "loss": 0.3719, "step": 1787 }, { "epoch": 1.8226299694189603, "grad_norm": 6.299932956695557, "learning_rate": 3.2744979872770506e-05, "loss": 0.3536, "step": 1788 }, { "epoch": 1.8236493374108051, "grad_norm": 4.642245769500732, "learning_rate": 3.256825806677205e-05, "loss": 0.2735, "step": 1789 }, { "epoch": 1.8246687054026505, "grad_norm": 4.521740913391113, "learning_rate": 3.2391783806335885e-05, "loss": 0.1971, "step": 1790 }, { "epoch": 1.8256880733944953, "grad_norm": 4.655755996704102, "learning_rate": 3.221555959754656e-05, "loss": 0.4326, "step": 1791 }, { "epoch": 1.8267074413863404, "grad_norm": 0.8639253377914429, "learning_rate": 3.2039587942937855e-05, "loss": 0.0688, "step": 1792 }, { "epoch": 1.8277268093781855, "grad_norm": 4.769573211669922, "learning_rate": 3.186387134145682e-05, "loss": 0.2317, "step": 1793 }, { "epoch": 1.8287461773700304, "grad_norm": 5.148104667663574, "learning_rate": 3.168841228842877e-05, "loss": 0.3941, "step": 1794 }, { "epoch": 1.8297655453618757, "grad_norm": 5.8588080406188965, "learning_rate": 3.1513213275521384e-05, "loss": 0.3326, "step": 1795 }, { "epoch": 1.8307849133537206, "grad_norm": 4.889102935791016, "learning_rate": 3.1338276790709775e-05, "loss": 0.2981, "step": 1796 }, { "epoch": 1.8318042813455657, "grad_norm": 10.12937068939209, "learning_rate": 3.1163605318240715e-05, "loss": 0.4302, "step": 1797 }, { "epoch": 1.8328236493374108, "grad_norm": 4.488791465759277, "learning_rate": 3.098920133859783e-05, "loss": 0.1922, "step": 1798 }, { "epoch": 1.8338430173292557, "grad_norm": 7.872842311859131, "learning_rate": 3.0815067328465816e-05, "loss": 0.6065, "step": 1799 }, { "epoch": 1.834862385321101, "grad_norm": 9.769292831420898, "learning_rate": 3.064120576069579e-05, "loss": 0.7834, "step": 1800 }, { "epoch": 1.835881753312946, "grad_norm": 7.953698635101318, "learning_rate": 3.0467619104269896e-05, "loss": 0.521, "step": 1801 }, { "epoch": 1.836901121304791, "grad_norm": 5.160398960113525, "learning_rate": 3.0294309824266298e-05, "loss": 0.3919, "step": 1802 }, { "epoch": 1.837920489296636, "grad_norm": 7.545567989349365, "learning_rate": 3.012128038182419e-05, "loss": 0.303, "step": 1803 }, { "epoch": 1.8389398572884812, "grad_norm": 7.457415580749512, "learning_rate": 2.9948533234108834e-05, "loss": 0.4616, "step": 1804 }, { "epoch": 1.8399592252803263, "grad_norm": 7.828042030334473, "learning_rate": 2.9776070834276647e-05, "loss": 0.5165, "step": 1805 }, { "epoch": 1.8409785932721712, "grad_norm": 2.9092535972595215, "learning_rate": 2.9603895631440405e-05, "loss": 0.1552, "step": 1806 }, { "epoch": 1.8419979612640163, "grad_norm": 4.436439514160156, "learning_rate": 2.943201007063443e-05, "loss": 0.2466, "step": 1807 }, { "epoch": 1.8430173292558614, "grad_norm": 6.84224796295166, "learning_rate": 2.9260416592779934e-05, "loss": 0.5846, "step": 1808 }, { "epoch": 1.8440366972477065, "grad_norm": 6.275019645690918, "learning_rate": 2.9089117634650192e-05, "loss": 0.4507, "step": 1809 }, { "epoch": 1.8450560652395516, "grad_norm": 7.318490982055664, "learning_rate": 2.8918115628836062e-05, "loss": 0.2341, "step": 1810 }, { "epoch": 1.8460754332313964, "grad_norm": 4.57275915145874, "learning_rate": 2.8747413003711614e-05, "loss": 0.1863, "step": 1811 }, { "epoch": 1.8470948012232415, "grad_norm": 7.398787975311279, "learning_rate": 2.8577012183399164e-05, "loss": 0.2505, "step": 1812 }, { "epoch": 1.8481141692150866, "grad_norm": 6.0185370445251465, "learning_rate": 2.8406915587735466e-05, "loss": 0.2833, "step": 1813 }, { "epoch": 1.8491335372069317, "grad_norm": 9.213631629943848, "learning_rate": 2.8237125632236704e-05, "loss": 0.3884, "step": 1814 }, { "epoch": 1.8501529051987768, "grad_norm": 14.965542793273926, "learning_rate": 2.8067644728064767e-05, "loss": 0.8776, "step": 1815 }, { "epoch": 1.8511722731906217, "grad_norm": 5.327033996582031, "learning_rate": 2.7898475281992575e-05, "loss": 0.4291, "step": 1816 }, { "epoch": 1.8521916411824668, "grad_norm": 4.308135986328125, "learning_rate": 2.7729619696370223e-05, "loss": 0.3203, "step": 1817 }, { "epoch": 1.853211009174312, "grad_norm": 6.840002059936523, "learning_rate": 2.756108036909064e-05, "loss": 0.3705, "step": 1818 }, { "epoch": 1.854230377166157, "grad_norm": 10.1884765625, "learning_rate": 2.7392859693555555e-05, "loss": 0.6378, "step": 1819 }, { "epoch": 1.8552497451580021, "grad_norm": 8.687849998474121, "learning_rate": 2.7224960058641692e-05, "loss": 1.005, "step": 1820 }, { "epoch": 1.856269113149847, "grad_norm": 8.345951080322266, "learning_rate": 2.7057383848666677e-05, "loss": 0.4776, "step": 1821 }, { "epoch": 1.8572884811416923, "grad_norm": 7.518494606018066, "learning_rate": 2.6890133443355224e-05, "loss": 0.4999, "step": 1822 }, { "epoch": 1.8583078491335372, "grad_norm": 7.342522144317627, "learning_rate": 2.6723211217805343e-05, "loss": 0.2922, "step": 1823 }, { "epoch": 1.8593272171253823, "grad_norm": 10.63000774383545, "learning_rate": 2.655661954245462e-05, "loss": 0.4653, "step": 1824 }, { "epoch": 1.8603465851172274, "grad_norm": 5.479205131530762, "learning_rate": 2.6390360783046535e-05, "loss": 0.3264, "step": 1825 }, { "epoch": 1.8613659531090723, "grad_norm": 6.947875499725342, "learning_rate": 2.6224437300596892e-05, "loss": 0.5453, "step": 1826 }, { "epoch": 1.8623853211009176, "grad_norm": 12.862199783325195, "learning_rate": 2.6058851451360278e-05, "loss": 1.0997, "step": 1827 }, { "epoch": 1.8634046890927625, "grad_norm": 6.396128177642822, "learning_rate": 2.589360558679664e-05, "loss": 0.5142, "step": 1828 }, { "epoch": 1.8644240570846076, "grad_norm": 3.3630640506744385, "learning_rate": 2.5728702053537668e-05, "loss": 0.1527, "step": 1829 }, { "epoch": 1.8654434250764527, "grad_norm": 3.5490384101867676, "learning_rate": 2.5564143193353928e-05, "loss": 0.2184, "step": 1830 }, { "epoch": 1.8664627930682975, "grad_norm": 6.618597507476807, "learning_rate": 2.539993134312111e-05, "loss": 0.3838, "step": 1831 }, { "epoch": 1.8674821610601429, "grad_norm": 5.235430717468262, "learning_rate": 2.5236068834787263e-05, "loss": 0.4265, "step": 1832 }, { "epoch": 1.8685015290519877, "grad_norm": 10.94070053100586, "learning_rate": 2.507255799533925e-05, "loss": 0.5124, "step": 1833 }, { "epoch": 1.8695208970438328, "grad_norm": 2.8730661869049072, "learning_rate": 2.490940114677022e-05, "loss": 0.1508, "step": 1834 }, { "epoch": 1.870540265035678, "grad_norm": 5.6821980476379395, "learning_rate": 2.4746600606046037e-05, "loss": 0.2766, "step": 1835 }, { "epoch": 1.8715596330275228, "grad_norm": 8.820428848266602, "learning_rate": 2.4584158685073024e-05, "loss": 0.5235, "step": 1836 }, { "epoch": 1.8725790010193681, "grad_norm": 2.182717800140381, "learning_rate": 2.4422077690664446e-05, "loss": 0.2394, "step": 1837 }, { "epoch": 1.873598369011213, "grad_norm": 6.691490650177002, "learning_rate": 2.426035992450848e-05, "loss": 0.326, "step": 1838 }, { "epoch": 1.8746177370030581, "grad_norm": 4.4693217277526855, "learning_rate": 2.4099007683134796e-05, "loss": 0.3008, "step": 1839 }, { "epoch": 1.8756371049949032, "grad_norm": 3.999272108078003, "learning_rate": 2.3938023257882514e-05, "loss": 0.1525, "step": 1840 }, { "epoch": 1.876656472986748, "grad_norm": 9.120763778686523, "learning_rate": 2.3777408934867424e-05, "loss": 0.39, "step": 1841 }, { "epoch": 1.8776758409785934, "grad_norm": 1.8546042442321777, "learning_rate": 2.3617166994949493e-05, "loss": 0.0774, "step": 1842 }, { "epoch": 1.8786952089704383, "grad_norm": 4.0527825355529785, "learning_rate": 2.3457299713700577e-05, "loss": 0.2674, "step": 1843 }, { "epoch": 1.8797145769622834, "grad_norm": 3.430372714996338, "learning_rate": 2.329780936137205e-05, "loss": 0.1116, "step": 1844 }, { "epoch": 1.8807339449541285, "grad_norm": 6.467163562774658, "learning_rate": 2.313869820286257e-05, "loss": 0.4234, "step": 1845 }, { "epoch": 1.8817533129459734, "grad_norm": 12.85617733001709, "learning_rate": 2.2979968497685924e-05, "loss": 0.8432, "step": 1846 }, { "epoch": 1.8827726809378187, "grad_norm": 5.897470474243164, "learning_rate": 2.2821622499938926e-05, "loss": 0.3522, "step": 1847 }, { "epoch": 1.8837920489296636, "grad_norm": 4.003026485443115, "learning_rate": 2.266366245826947e-05, "loss": 0.1858, "step": 1848 }, { "epoch": 1.8848114169215087, "grad_norm": 5.525468826293945, "learning_rate": 2.2506090615844477e-05, "loss": 0.2835, "step": 1849 }, { "epoch": 1.8858307849133538, "grad_norm": 9.481707572937012, "learning_rate": 2.2348909210318064e-05, "loss": 0.7738, "step": 1850 }, { "epoch": 1.8868501529051986, "grad_norm": 2.575413942337036, "learning_rate": 2.2192120473800014e-05, "loss": 0.1524, "step": 1851 }, { "epoch": 1.887869520897044, "grad_norm": 9.639274597167969, "learning_rate": 2.203572663282362e-05, "loss": 0.7539, "step": 1852 }, { "epoch": 1.8888888888888888, "grad_norm": 5.821088790893555, "learning_rate": 2.1879729908314544e-05, "loss": 0.5166, "step": 1853 }, { "epoch": 1.889908256880734, "grad_norm": 3.0853917598724365, "learning_rate": 2.1724132515558887e-05, "loss": 0.2346, "step": 1854 }, { "epoch": 1.890927624872579, "grad_norm": 5.704559803009033, "learning_rate": 2.156893666417204e-05, "loss": 0.2003, "step": 1855 }, { "epoch": 1.891946992864424, "grad_norm": 9.251178741455078, "learning_rate": 2.1414144558067023e-05, "loss": 0.7549, "step": 1856 }, { "epoch": 1.8929663608562692, "grad_norm": 3.4819564819335938, "learning_rate": 2.1259758395423512e-05, "loss": 0.1809, "step": 1857 }, { "epoch": 1.8939857288481141, "grad_norm": 9.680218696594238, "learning_rate": 2.1105780368656215e-05, "loss": 0.6533, "step": 1858 }, { "epoch": 1.8950050968399592, "grad_norm": 3.142969846725464, "learning_rate": 2.0952212664384124e-05, "loss": 0.1845, "step": 1859 }, { "epoch": 1.8960244648318043, "grad_norm": 5.323211193084717, "learning_rate": 2.079905746339927e-05, "loss": 0.3156, "step": 1860 }, { "epoch": 1.8970438328236492, "grad_norm": 9.717694282531738, "learning_rate": 2.0646316940635763e-05, "loss": 0.5856, "step": 1861 }, { "epoch": 1.8980632008154945, "grad_norm": 2.1464314460754395, "learning_rate": 2.049399326513895e-05, "loss": 0.1409, "step": 1862 }, { "epoch": 1.8980632008154945, "eval_Qnli-dev-1024_cosine_accuracy": 0.7604166666666666, "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.7837380170822144, "eval_Qnli-dev-1024_cosine_ap": 0.7655287400697098, "eval_Qnli-dev-1024_cosine_f1": 0.7358490566037736, "eval_Qnli-dev-1024_cosine_f1_threshold": 0.7243161797523499, "eval_Qnli-dev-1024_cosine_mcc": 0.45131025668485714, "eval_Qnli-dev-1024_cosine_precision": 0.639344262295082, "eval_Qnli-dev-1024_cosine_recall": 0.8666666666666667, "eval_Qnli-dev_cosine_accuracy": 0.75, "eval_Qnli-dev_cosine_accuracy_threshold": 0.6975128650665283, "eval_Qnli-dev_cosine_ap": 0.7629854627366377, "eval_Qnli-dev_cosine_f1": 0.7476635514018692, "eval_Qnli-dev_cosine_f1_threshold": 0.6258813738822937, "eval_Qnli-dev_cosine_mcc": 0.47737827504723207, "eval_Qnli-dev_cosine_precision": 0.6451612903225806, "eval_Qnli-dev_cosine_recall": 0.8888888888888888, "eval_allNLI--triplets-1024_cosine_accuracy": 0.9479166865348816, "eval_allNLI-triplets_cosine_accuracy": 0.96875, "eval_global_dataset_loss": 0.3163749873638153, "eval_global_dataset_runtime": 103.9557, "eval_global_dataset_samples_per_second": 7.724, "eval_global_dataset_steps_per_second": 0.164, "eval_sequential_score": 0.9479166865348816, "eval_sts-test-1024_pearson_cosine": 0.8841306743942718, "eval_sts-test-1024_spearman_cosine": 0.9094778502209384, "eval_sts-test_pearson_cosine": 0.9062162835110004, "eval_sts-test_spearman_cosine": 0.9204680794914208, "step": 1862 }, { "epoch": 1.8990825688073394, "grad_norm": 7.201123237609863, "learning_rate": 2.0342088600034582e-05, "loss": 0.3773, "step": 1863 }, { "epoch": 1.9001019367991845, "grad_norm": 8.974194526672363, "learning_rate": 2.0190605102498105e-05, "loss": 0.8936, "step": 1864 }, { "epoch": 1.9011213047910296, "grad_norm": 10.563526153564453, "learning_rate": 2.003954492372404e-05, "loss": 0.3592, "step": 1865 }, { "epoch": 1.9021406727828745, "grad_norm": 7.776918888092041, "learning_rate": 1.9888910208895407e-05, "loss": 0.2913, "step": 1866 }, { "epoch": 1.9031600407747198, "grad_norm": 7.598758220672607, "learning_rate": 1.9738703097153316e-05, "loss": 0.436, "step": 1867 }, { "epoch": 1.9041794087665647, "grad_norm": 8.523947715759277, "learning_rate": 1.958892572156648e-05, "loss": 0.4502, "step": 1868 }, { "epoch": 1.9051987767584098, "grad_norm": 4.650737762451172, "learning_rate": 1.943958020910096e-05, "loss": 0.2545, "step": 1869 }, { "epoch": 1.9062181447502549, "grad_norm": 3.7644124031066895, "learning_rate": 1.92906686805902e-05, "loss": 0.1539, "step": 1870 }, { "epoch": 1.9072375127420997, "grad_norm": 8.597049713134766, "learning_rate": 1.914219325070442e-05, "loss": 0.4734, "step": 1871 }, { "epoch": 1.908256880733945, "grad_norm": 3.616166830062866, "learning_rate": 1.8994156027921162e-05, "loss": 0.1828, "step": 1872 }, { "epoch": 1.90927624872579, "grad_norm": 6.930202960968018, "learning_rate": 1.8846559114494756e-05, "loss": 0.3154, "step": 1873 }, { "epoch": 1.910295616717635, "grad_norm": 6.539050102233887, "learning_rate": 1.8699404606427052e-05, "loss": 0.2951, "step": 1874 }, { "epoch": 1.9113149847094801, "grad_norm": 5.528707504272461, "learning_rate": 1.8552694593437097e-05, "loss": 0.3063, "step": 1875 }, { "epoch": 1.912334352701325, "grad_norm": 5.866353511810303, "learning_rate": 1.8406431158931996e-05, "loss": 0.4556, "step": 1876 }, { "epoch": 1.9133537206931703, "grad_norm": 8.167952537536621, "learning_rate": 1.826061637997685e-05, "loss": 0.4327, "step": 1877 }, { "epoch": 1.9143730886850152, "grad_norm": 11.24150276184082, "learning_rate": 1.8115252327265543e-05, "loss": 0.5985, "step": 1878 }, { "epoch": 1.9153924566768603, "grad_norm": 5.483310222625732, "learning_rate": 1.797034106509127e-05, "loss": 0.1853, "step": 1879 }, { "epoch": 1.9164118246687054, "grad_norm": 5.693748474121094, "learning_rate": 1.7825884651317204e-05, "loss": 0.2905, "step": 1880 }, { "epoch": 1.9174311926605505, "grad_norm": 4.149360656738281, "learning_rate": 1.768188513734731e-05, "loss": 0.19, "step": 1881 }, { "epoch": 1.9184505606523956, "grad_norm": 3.8560631275177, "learning_rate": 1.753834456809716e-05, "loss": 0.1926, "step": 1882 }, { "epoch": 1.9194699286442405, "grad_norm": 7.31168270111084, "learning_rate": 1.7395264981964927e-05, "loss": 0.291, "step": 1883 }, { "epoch": 1.9204892966360856, "grad_norm": 6.7560715675354, "learning_rate": 1.7252648410802434e-05, "loss": 0.6064, "step": 1884 }, { "epoch": 1.9215086646279307, "grad_norm": 6.752120494842529, "learning_rate": 1.71104968798863e-05, "loss": 0.6472, "step": 1885 }, { "epoch": 1.9225280326197758, "grad_norm": 3.197389602661133, "learning_rate": 1.696881240788917e-05, "loss": 0.2575, "step": 1886 }, { "epoch": 1.9235474006116209, "grad_norm": 10.3342866897583, "learning_rate": 1.6827597006851104e-05, "loss": 0.4679, "step": 1887 }, { "epoch": 1.9245667686034658, "grad_norm": 3.474792003631592, "learning_rate": 1.6686852682150794e-05, "loss": 0.1591, "step": 1888 }, { "epoch": 1.9255861365953109, "grad_norm": 9.85100269317627, "learning_rate": 1.654658143247747e-05, "loss": 0.4278, "step": 1889 }, { "epoch": 1.926605504587156, "grad_norm": 2.223907947540283, "learning_rate": 1.640678524980212e-05, "loss": 0.1457, "step": 1890 }, { "epoch": 1.927624872579001, "grad_norm": 4.660088539123535, "learning_rate": 1.6267466119349507e-05, "loss": 0.3107, "step": 1891 }, { "epoch": 1.9286442405708462, "grad_norm": 7.299057483673096, "learning_rate": 1.6128626019569715e-05, "loss": 0.3369, "step": 1892 }, { "epoch": 1.929663608562691, "grad_norm": 5.282012462615967, "learning_rate": 1.5990266922110324e-05, "loss": 0.4062, "step": 1893 }, { "epoch": 1.9306829765545361, "grad_norm": 4.812608242034912, "learning_rate": 1.5852390791788134e-05, "loss": 0.3265, "step": 1894 }, { "epoch": 1.9317023445463812, "grad_norm": 10.287457466125488, "learning_rate": 1.5714999586561536e-05, "loss": 0.6488, "step": 1895 }, { "epoch": 1.9327217125382263, "grad_norm": 8.709385871887207, "learning_rate": 1.5578095257502433e-05, "loss": 0.5861, "step": 1896 }, { "epoch": 1.9337410805300714, "grad_norm": 6.345649242401123, "learning_rate": 1.544167974876885e-05, "loss": 0.2017, "step": 1897 }, { "epoch": 1.9347604485219163, "grad_norm": 4.160301208496094, "learning_rate": 1.5305754997576922e-05, "loss": 0.275, "step": 1898 }, { "epoch": 1.9357798165137616, "grad_norm": 6.832029342651367, "learning_rate": 1.5170322934173775e-05, "loss": 0.2694, "step": 1899 }, { "epoch": 1.9367991845056065, "grad_norm": 4.3108296394348145, "learning_rate": 1.503538548180991e-05, "loss": 0.1678, "step": 1900 }, { "epoch": 1.9378185524974516, "grad_norm": 6.166708946228027, "learning_rate": 1.4900944556711927e-05, "loss": 0.2412, "step": 1901 }, { "epoch": 1.9388379204892967, "grad_norm": 7.308569431304932, "learning_rate": 1.4767002068055297e-05, "loss": 0.6276, "step": 1902 }, { "epoch": 1.9398572884811416, "grad_norm": 2.976868152618408, "learning_rate": 1.4633559917937306e-05, "loss": 0.1367, "step": 1903 }, { "epoch": 1.940876656472987, "grad_norm": 6.624225616455078, "learning_rate": 1.4500620001349968e-05, "loss": 0.551, "step": 1904 }, { "epoch": 1.9418960244648318, "grad_norm": 4.78963041305542, "learning_rate": 1.436818420615319e-05, "loss": 0.3431, "step": 1905 }, { "epoch": 1.9429153924566769, "grad_norm": 5.378144264221191, "learning_rate": 1.4236254413047896e-05, "loss": 0.2006, "step": 1906 }, { "epoch": 1.943934760448522, "grad_norm": 6.3299760818481445, "learning_rate": 1.4104832495549402e-05, "loss": 0.4514, "step": 1907 }, { "epoch": 1.9449541284403669, "grad_norm": 3.8007140159606934, "learning_rate": 1.3973920319960682e-05, "loss": 0.232, "step": 1908 }, { "epoch": 1.9459734964322122, "grad_norm": 8.437200546264648, "learning_rate": 1.3843519745345923e-05, "loss": 0.7087, "step": 1909 }, { "epoch": 1.946992864424057, "grad_norm": 9.044495582580566, "learning_rate": 1.3713632623504318e-05, "loss": 0.3232, "step": 1910 }, { "epoch": 1.9480122324159022, "grad_norm": 3.7584590911865234, "learning_rate": 1.358426079894336e-05, "loss": 0.142, "step": 1911 }, { "epoch": 1.9490316004077473, "grad_norm": 3.0307207107543945, "learning_rate": 1.3455406108853108e-05, "loss": 0.1597, "step": 1912 }, { "epoch": 1.9500509683995921, "grad_norm": 5.186105728149414, "learning_rate": 1.3327070383079649e-05, "loss": 0.2134, "step": 1913 }, { "epoch": 1.9510703363914375, "grad_norm": 11.696715354919434, "learning_rate": 1.3199255444099557e-05, "loss": 0.5582, "step": 1914 }, { "epoch": 1.9520897043832823, "grad_norm": 6.090602874755859, "learning_rate": 1.3071963106993573e-05, "loss": 0.184, "step": 1915 }, { "epoch": 1.9531090723751274, "grad_norm": 7.272611618041992, "learning_rate": 1.2945195179421266e-05, "loss": 0.5319, "step": 1916 }, { "epoch": 1.9541284403669725, "grad_norm": 6.290035724639893, "learning_rate": 1.2818953461594969e-05, "loss": 0.2426, "step": 1917 }, { "epoch": 1.9551478083588174, "grad_norm": 6.6716156005859375, "learning_rate": 1.2693239746254432e-05, "loss": 0.3149, "step": 1918 }, { "epoch": 1.9561671763506627, "grad_norm": 6.8174896240234375, "learning_rate": 1.2568055818641366e-05, "loss": 0.281, "step": 1919 }, { "epoch": 1.9571865443425076, "grad_norm": 5.842833518981934, "learning_rate": 1.2443403456474017e-05, "loss": 0.2779, "step": 1920 }, { "epoch": 1.9582059123343527, "grad_norm": 8.29177474975586, "learning_rate": 1.2319284429921957e-05, "loss": 0.3821, "step": 1921 }, { "epoch": 1.9592252803261978, "grad_norm": 7.320400714874268, "learning_rate": 1.2195700501580937e-05, "loss": 0.5121, "step": 1922 }, { "epoch": 1.9602446483180427, "grad_norm": 6.3962321281433105, "learning_rate": 1.207265342644785e-05, "loss": 0.2637, "step": 1923 }, { "epoch": 1.961264016309888, "grad_norm": 9.778839111328125, "learning_rate": 1.1950144951895819e-05, "loss": 0.8175, "step": 1924 }, { "epoch": 1.9622833843017329, "grad_norm": 7.268181800842285, "learning_rate": 1.18281768176494e-05, "loss": 0.3914, "step": 1925 }, { "epoch": 1.963302752293578, "grad_norm": 3.256939172744751, "learning_rate": 1.1706750755759854e-05, "loss": 0.1444, "step": 1926 }, { "epoch": 1.964322120285423, "grad_norm": 3.6003193855285645, "learning_rate": 1.1585868490580503e-05, "loss": 0.141, "step": 1927 }, { "epoch": 1.965341488277268, "grad_norm": 7.784787654876709, "learning_rate": 1.146553173874232e-05, "loss": 0.291, "step": 1928 }, { "epoch": 1.9663608562691133, "grad_norm": 4.663620471954346, "learning_rate": 1.1345742209129589e-05, "loss": 0.211, "step": 1929 }, { "epoch": 1.9673802242609582, "grad_norm": 8.380531311035156, "learning_rate": 1.1226501602855466e-05, "loss": 0.3455, "step": 1930 }, { "epoch": 1.9683995922528033, "grad_norm": 3.0026469230651855, "learning_rate": 1.1107811613238034e-05, "loss": 0.1692, "step": 1931 }, { "epoch": 1.9694189602446484, "grad_norm": 8.605925559997559, "learning_rate": 1.0989673925776039e-05, "loss": 0.8896, "step": 1932 }, { "epoch": 1.9704383282364932, "grad_norm": 6.9157233238220215, "learning_rate": 1.0872090218125197e-05, "loss": 0.5577, "step": 1933 }, { "epoch": 1.9714576962283386, "grad_norm": 5.527960777282715, "learning_rate": 1.0755062160074103e-05, "loss": 0.4554, "step": 1934 }, { "epoch": 1.9724770642201834, "grad_norm": 4.29521369934082, "learning_rate": 1.0638591413520782e-05, "loss": 0.1784, "step": 1935 }, { "epoch": 1.9734964322120285, "grad_norm": 6.10489559173584, "learning_rate": 1.0522679632448879e-05, "loss": 0.2751, "step": 1936 }, { "epoch": 1.9745158002038736, "grad_norm": 5.862469673156738, "learning_rate": 1.0407328462904247e-05, "loss": 0.3531, "step": 1937 }, { "epoch": 1.9755351681957185, "grad_norm": 9.201395988464355, "learning_rate": 1.0292539542971625e-05, "loss": 0.6773, "step": 1938 }, { "epoch": 1.9765545361875638, "grad_norm": 5.540637969970703, "learning_rate": 1.0178314502751312e-05, "loss": 0.2948, "step": 1939 }, { "epoch": 1.9775739041794087, "grad_norm": 8.971341133117676, "learning_rate": 1.006465496433604e-05, "loss": 0.4181, "step": 1940 }, { "epoch": 1.9785932721712538, "grad_norm": 8.356114387512207, "learning_rate": 9.951562541787929e-06, "loss": 0.7386, "step": 1941 }, { "epoch": 1.979612640163099, "grad_norm": 6.853330135345459, "learning_rate": 9.839038841115566e-06, "loss": 0.2926, "step": 1942 }, { "epoch": 1.9806320081549438, "grad_norm": 5.473418712615967, "learning_rate": 9.727085460251218e-06, "loss": 0.2158, "step": 1943 }, { "epoch": 1.981651376146789, "grad_norm": 7.808025360107422, "learning_rate": 9.615703989028112e-06, "loss": 0.6198, "step": 1944 }, { "epoch": 1.982670744138634, "grad_norm": 4.961716651916504, "learning_rate": 9.504896009157876e-06, "loss": 0.2425, "step": 1945 }, { "epoch": 1.983690112130479, "grad_norm": 8.417882919311523, "learning_rate": 9.394663094208128e-06, "loss": 0.4055, "step": 1946 }, { "epoch": 1.9847094801223242, "grad_norm": 5.619123935699463, "learning_rate": 9.285006809579888e-06, "loss": 0.4608, "step": 1947 }, { "epoch": 1.985728848114169, "grad_norm": 6.5537567138671875, "learning_rate": 9.175928712485798e-06, "loss": 0.3028, "step": 1948 }, { "epoch": 1.9867482161060144, "grad_norm": 6.92478084564209, "learning_rate": 9.067430351927513e-06, "loss": 0.7157, "step": 1949 }, { "epoch": 1.9877675840978593, "grad_norm": 6.300648212432861, "learning_rate": 8.959513268674141e-06, "loss": 0.3149, "step": 1950 }, { "epoch": 1.9887869520897044, "grad_norm": 3.5753180980682373, "learning_rate": 8.852178995239952e-06, "loss": 0.0783, "step": 1951 }, { "epoch": 1.9898063200815495, "grad_norm": 2.463205337524414, "learning_rate": 8.745429055863024e-06, "loss": 0.1055, "step": 1952 }, { "epoch": 1.9908256880733946, "grad_norm": 7.284326553344727, "learning_rate": 8.639264966483196e-06, "loss": 0.4019, "step": 1953 }, { "epoch": 1.9918450560652396, "grad_norm": 7.7004852294921875, "learning_rate": 8.533688234720937e-06, "loss": 0.4939, "step": 1954 }, { "epoch": 1.9928644240570845, "grad_norm": 6.602788925170898, "learning_rate": 8.428700359855535e-06, "loss": 0.2458, "step": 1955 }, { "epoch": 1.9938837920489296, "grad_norm": 3.019469738006592, "learning_rate": 8.324302832804237e-06, "loss": 0.1383, "step": 1956 }, { "epoch": 1.9949031600407747, "grad_norm": 3.9522721767425537, "learning_rate": 8.220497136100602e-06, "loss": 0.2046, "step": 1957 }, { "epoch": 1.9959225280326198, "grad_norm": 8.255660057067871, "learning_rate": 8.117284743873859e-06, "loss": 0.254, "step": 1958 }, { "epoch": 1.996941896024465, "grad_norm": 3.8426706790924072, "learning_rate": 8.014667121827784e-06, "loss": 0.1302, "step": 1959 }, { "epoch": 1.9979612640163098, "grad_norm": 6.920229911804199, "learning_rate": 7.912645727219875e-06, "loss": 0.595, "step": 1960 }, { "epoch": 1.9979612640163098, "eval_Qnli-dev-1024_cosine_accuracy": 0.7604166666666666, "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.7934629917144775, "eval_Qnli-dev-1024_cosine_ap": 0.7691003178954985, "eval_Qnli-dev-1024_cosine_f1": 0.7450980392156862, "eval_Qnli-dev-1024_cosine_f1_threshold": 0.734605610370636, "eval_Qnli-dev-1024_cosine_mcc": 0.4794765594627558, "eval_Qnli-dev-1024_cosine_precision": 0.6666666666666666, "eval_Qnli-dev-1024_cosine_recall": 0.8444444444444444, "eval_Qnli-dev_cosine_accuracy": 0.75, "eval_Qnli-dev_cosine_accuracy_threshold": 0.6731052994728088, "eval_Qnli-dev_cosine_ap": 0.7666441534536714, "eval_Qnli-dev_cosine_f1": 0.7476635514018692, "eval_Qnli-dev_cosine_f1_threshold": 0.6249356865882874, "eval_Qnli-dev_cosine_mcc": 0.47737827504723207, "eval_Qnli-dev_cosine_precision": 0.6451612903225806, "eval_Qnli-dev_cosine_recall": 0.8888888888888888, "eval_allNLI--triplets-1024_cosine_accuracy": 0.9583333134651184, "eval_allNLI-triplets_cosine_accuracy": 0.96875, "eval_global_dataset_loss": 0.3007451593875885, "eval_global_dataset_runtime": 103.9733, "eval_global_dataset_samples_per_second": 7.723, "eval_global_dataset_steps_per_second": 0.164, "eval_sequential_score": 0.9583333134651184, "eval_sts-test-1024_pearson_cosine": 0.8876934677640924, "eval_sts-test-1024_spearman_cosine": 0.9111361153088742, "eval_sts-test_pearson_cosine": 0.9070198458509664, "eval_sts-test_spearman_cosine": 0.9207956588148845, "step": 1960 }, { "epoch": 1.998980632008155, "grad_norm": 7.969030857086182, "learning_rate": 7.81122200884072e-06, "loss": 0.3089, "step": 1961 }, { "epoch": 2.0, "grad_norm": 4.521041393280029, "learning_rate": 7.710397406993387e-06, "loss": 0.2925, "step": 1962 }, { "epoch": 2.001019367991845, "grad_norm": 1.096656322479248, "learning_rate": 7.610173353472977e-06, "loss": 0.0737, "step": 1963 }, { "epoch": 2.00203873598369, "grad_norm": 5.494912624359131, "learning_rate": 7.510551271546301e-06, "loss": 0.3361, "step": 1964 }, { "epoch": 2.003058103975535, "grad_norm": 7.918971538543701, "learning_rate": 7.411532575931657e-06, "loss": 0.738, "step": 1965 }, { "epoch": 2.0040774719673804, "grad_norm": 4.213651657104492, "learning_rate": 7.313118672778768e-06, "loss": 0.2693, "step": 1966 }, { "epoch": 2.0050968399592253, "grad_norm": 10.979836463928223, "learning_rate": 7.21531095964873e-06, "loss": 0.9024, "step": 1967 }, { "epoch": 2.00611620795107, "grad_norm": 8.45730209350586, "learning_rate": 7.118110825494251e-06, "loss": 0.3686, "step": 1968 }, { "epoch": 2.0071355759429155, "grad_norm": 3.898918628692627, "learning_rate": 7.0215196506399515e-06, "loss": 0.1791, "step": 1969 }, { "epoch": 2.0081549439347604, "grad_norm": 5.748415946960449, "learning_rate": 6.925538806762638e-06, "loss": 0.341, "step": 1970 }, { "epoch": 2.0091743119266057, "grad_norm": 6.287943363189697, "learning_rate": 6.830169656871966e-06, "loss": 0.4936, "step": 1971 }, { "epoch": 2.0101936799184505, "grad_norm": 6.493780136108398, "learning_rate": 6.735413555290937e-06, "loss": 0.5617, "step": 1972 }, { "epoch": 2.0112130479102954, "grad_norm": 2.098747968673706, "learning_rate": 6.641271847636854e-06, "loss": 0.1672, "step": 1973 }, { "epoch": 2.0122324159021407, "grad_norm": 7.320080757141113, "learning_rate": 6.547745870801958e-06, "loss": 0.4374, "step": 1974 }, { "epoch": 2.0132517838939856, "grad_norm": 8.45764446258545, "learning_rate": 6.4548369529347566e-06, "loss": 0.5038, "step": 1975 }, { "epoch": 2.014271151885831, "grad_norm": 1.3194546699523926, "learning_rate": 6.36254641342085e-06, "loss": 0.0422, "step": 1976 }, { "epoch": 2.015290519877676, "grad_norm": 7.725487232208252, "learning_rate": 6.270875562864409e-06, "loss": 0.3417, "step": 1977 }, { "epoch": 2.0163098878695207, "grad_norm": 3.446214437484741, "learning_rate": 6.179825703069486e-06, "loss": 0.1576, "step": 1978 }, { "epoch": 2.017329255861366, "grad_norm": 2.145524024963379, "learning_rate": 6.089398127021534e-06, "loss": 0.0915, "step": 1979 }, { "epoch": 2.018348623853211, "grad_norm": 2.5441269874572754, "learning_rate": 5.999594118869051e-06, "loss": 0.1103, "step": 1980 }, { "epoch": 2.019367991845056, "grad_norm": 6.149587154388428, "learning_rate": 5.910414953905341e-06, "loss": 0.3266, "step": 1981 }, { "epoch": 2.020387359836901, "grad_norm": 6.195409297943115, "learning_rate": 5.82186189855039e-06, "loss": 0.1879, "step": 1982 }, { "epoch": 2.021406727828746, "grad_norm": 6.459590435028076, "learning_rate": 5.733936210332919e-06, "loss": 0.5804, "step": 1983 }, { "epoch": 2.0224260958205913, "grad_norm": 7.298165798187256, "learning_rate": 5.646639137872467e-06, "loss": 0.4159, "step": 1984 }, { "epoch": 2.023445463812436, "grad_norm": 3.1634249687194824, "learning_rate": 5.559971920861734e-06, "loss": 0.1358, "step": 1985 }, { "epoch": 2.0244648318042815, "grad_norm": 2.3591601848602295, "learning_rate": 5.473935790048923e-06, "loss": 0.0971, "step": 1986 }, { "epoch": 2.0254841997961264, "grad_norm": 4.338249683380127, "learning_rate": 5.388531967220211e-06, "loss": 0.1704, "step": 1987 }, { "epoch": 2.0265035677879712, "grad_norm": 6.169326305389404, "learning_rate": 5.30376166518261e-06, "loss": 0.221, "step": 1988 }, { "epoch": 2.0275229357798166, "grad_norm": 3.112377882003784, "learning_rate": 5.219626087746432e-06, "loss": 0.1377, "step": 1989 }, { "epoch": 2.0285423037716614, "grad_norm": 4.449329853057861, "learning_rate": 5.136126429708521e-06, "loss": 0.2792, "step": 1990 }, { "epoch": 2.0295616717635068, "grad_norm": 7.014669895172119, "learning_rate": 5.053263876834957e-06, "loss": 0.5533, "step": 1991 }, { "epoch": 2.0305810397553516, "grad_norm": 5.11379861831665, "learning_rate": 4.971039605844558e-06, "loss": 0.243, "step": 1992 }, { "epoch": 2.0316004077471965, "grad_norm": 4.427427768707275, "learning_rate": 4.889454784391823e-06, "loss": 0.2642, "step": 1993 }, { "epoch": 2.032619775739042, "grad_norm": 2.5310215950012207, "learning_rate": 4.808510571050695e-06, "loss": 0.1542, "step": 1994 }, { "epoch": 2.0336391437308867, "grad_norm": 4.451014518737793, "learning_rate": 4.7282081152978056e-06, "loss": 0.3514, "step": 1995 }, { "epoch": 2.034658511722732, "grad_norm": 6.827959060668945, "learning_rate": 4.6485485574963125e-06, "loss": 0.192, "step": 1996 }, { "epoch": 2.035677879714577, "grad_norm": 4.101830005645752, "learning_rate": 4.569533028879719e-06, "loss": 0.1563, "step": 1997 }, { "epoch": 2.036697247706422, "grad_norm": 6.259415626525879, "learning_rate": 4.491162651535729e-06, "loss": 0.2977, "step": 1998 }, { "epoch": 2.037716615698267, "grad_norm": 4.608965873718262, "learning_rate": 4.413438538390363e-06, "loss": 0.3345, "step": 1999 }, { "epoch": 2.038735983690112, "grad_norm": 3.5626466274261475, "learning_rate": 4.3363617931921396e-06, "loss": 0.2128, "step": 2000 }, { "epoch": 2.0397553516819573, "grad_norm": 3.7776856422424316, "learning_rate": 4.2599335104964e-06, "loss": 0.178, "step": 2001 }, { "epoch": 2.040774719673802, "grad_norm": 4.22892951965332, "learning_rate": 4.184154775649762e-06, "loss": 0.2355, "step": 2002 }, { "epoch": 2.0417940876656475, "grad_norm": 7.214908123016357, "learning_rate": 4.109026664774718e-06, "loss": 0.2723, "step": 2003 }, { "epoch": 2.0428134556574924, "grad_norm": 4.9243645668029785, "learning_rate": 4.034550244754337e-06, "loss": 0.3108, "step": 2004 }, { "epoch": 2.0438328236493373, "grad_norm": 2.550334930419922, "learning_rate": 3.960726573217171e-06, "loss": 0.1128, "step": 2005 }, { "epoch": 2.0448521916411826, "grad_norm": 6.916386604309082, "learning_rate": 3.887556698522071e-06, "loss": 0.4542, "step": 2006 }, { "epoch": 2.0458715596330275, "grad_norm": 11.782197952270508, "learning_rate": 3.815041659743556e-06, "loss": 1.0039, "step": 2007 }, { "epoch": 2.046890927624873, "grad_norm": 6.364333152770996, "learning_rate": 3.743182486656821e-06, "loss": 0.4065, "step": 2008 }, { "epoch": 2.0479102956167177, "grad_norm": 8.159380912780762, "learning_rate": 3.671980199723274e-06, "loss": 0.6474, "step": 2009 }, { "epoch": 2.0489296636085625, "grad_norm": 9.521140098571777, "learning_rate": 3.6014358100759204e-06, "loss": 0.6126, "step": 2010 }, { "epoch": 2.049949031600408, "grad_norm": 5.315720558166504, "learning_rate": 3.5315503195051337e-06, "loss": 0.2899, "step": 2011 }, { "epoch": 2.0509683995922527, "grad_norm": 3.33170747756958, "learning_rate": 3.462324720444271e-06, "loss": 0.2236, "step": 2012 }, { "epoch": 2.051987767584098, "grad_norm": 2.4479920864105225, "learning_rate": 3.393759995955781e-06, "loss": 0.1018, "step": 2013 }, { "epoch": 2.053007135575943, "grad_norm": 5.11319637298584, "learning_rate": 3.3258571197170017e-06, "loss": 0.4546, "step": 2014 }, { "epoch": 2.054026503567788, "grad_norm": 5.450096607208252, "learning_rate": 3.2586170560066133e-06, "loss": 0.1877, "step": 2015 }, { "epoch": 2.055045871559633, "grad_norm": 3.155742645263672, "learning_rate": 3.1920407596906455e-06, "loss": 0.203, "step": 2016 }, { "epoch": 2.056065239551478, "grad_norm": 3.579530715942383, "learning_rate": 3.1261291762091527e-06, "loss": 0.2151, "step": 2017 }, { "epoch": 2.0570846075433233, "grad_norm": 6.1576032638549805, "learning_rate": 3.0608832415626898e-06, "loss": 0.4264, "step": 2018 }, { "epoch": 2.058103975535168, "grad_norm": 6.620378017425537, "learning_rate": 2.9963038822990174e-06, "loss": 0.305, "step": 2019 }, { "epoch": 2.059123343527013, "grad_norm": 3.6878819465637207, "learning_rate": 2.932392015499974e-06, "loss": 0.1407, "step": 2020 }, { "epoch": 2.0601427115188584, "grad_norm": 5.848405838012695, "learning_rate": 2.8691485487684246e-06, "loss": 0.1914, "step": 2021 }, { "epoch": 2.0611620795107033, "grad_norm": 3.685908317565918, "learning_rate": 2.8065743802153875e-06, "loss": 0.1305, "step": 2022 }, { "epoch": 2.0621814475025486, "grad_norm": 6.540226936340332, "learning_rate": 2.7446703984472797e-06, "loss": 0.3055, "step": 2023 }, { "epoch": 2.0632008154943935, "grad_norm": 2.917329788208008, "learning_rate": 2.6834374825533025e-06, "loss": 0.1309, "step": 2024 }, { "epoch": 2.0642201834862384, "grad_norm": 8.328691482543945, "learning_rate": 2.6228765020929415e-06, "loss": 0.8764, "step": 2025 }, { "epoch": 2.0652395514780837, "grad_norm": 3.314098358154297, "learning_rate": 2.5629883170836366e-06, "loss": 0.1458, "step": 2026 }, { "epoch": 2.0662589194699286, "grad_norm": 3.6642093658447266, "learning_rate": 2.503773777988522e-06, "loss": 0.1433, "step": 2027 }, { "epoch": 2.067278287461774, "grad_norm": 5.074317932128906, "learning_rate": 2.4452337257044656e-06, "loss": 0.3791, "step": 2028 }, { "epoch": 2.0682976554536188, "grad_norm": 2.789017677307129, "learning_rate": 2.387368991549954e-06, "loss": 0.2152, "step": 2029 }, { "epoch": 2.0693170234454636, "grad_norm": 7.425163269042969, "learning_rate": 2.3301803972534785e-06, "loss": 0.4335, "step": 2030 }, { "epoch": 2.070336391437309, "grad_norm": 4.4709062576293945, "learning_rate": 2.273668754941677e-06, "loss": 0.2469, "step": 2031 }, { "epoch": 2.071355759429154, "grad_norm": 5.429105281829834, "learning_rate": 2.217834867127977e-06, "loss": 0.3997, "step": 2032 }, { "epoch": 2.072375127420999, "grad_norm": 9.30593204498291, "learning_rate": 2.1626795267010393e-06, "loss": 0.4345, "step": 2033 }, { "epoch": 2.073394495412844, "grad_norm": 5.841343402862549, "learning_rate": 2.1082035169136373e-06, "loss": 0.2812, "step": 2034 }, { "epoch": 2.074413863404689, "grad_norm": 8.149571418762207, "learning_rate": 2.054407611371445e-06, "loss": 0.3564, "step": 2035 }, { "epoch": 2.0754332313965342, "grad_norm": 4.423587799072266, "learning_rate": 2.0012925740220624e-06, "loss": 0.1496, "step": 2036 }, { "epoch": 2.076452599388379, "grad_norm": 2.966921806335449, "learning_rate": 1.9488591591441954e-06, "loss": 0.1768, "step": 2037 }, { "epoch": 2.0774719673802244, "grad_norm": 1.870186686515808, "learning_rate": 1.8971081113369481e-06, "loss": 0.1257, "step": 2038 }, { "epoch": 2.0784913353720693, "grad_norm": 5.432903289794922, "learning_rate": 1.8460401655092107e-06, "loss": 0.3168, "step": 2039 }, { "epoch": 2.079510703363914, "grad_norm": 2.7983829975128174, "learning_rate": 1.795656046869254e-06, "loss": 0.21, "step": 2040 }, { "epoch": 2.0805300713557595, "grad_norm": 1.5770361423492432, "learning_rate": 1.7459564709144116e-06, "loss": 0.0651, "step": 2041 }, { "epoch": 2.0815494393476044, "grad_norm": 2.986161947250366, "learning_rate": 1.6969421434209376e-06, "loss": 0.1315, "step": 2042 }, { "epoch": 2.0825688073394497, "grad_norm": 8.22683048248291, "learning_rate": 1.6486137604339758e-06, "loss": 0.7834, "step": 2043 }, { "epoch": 2.0835881753312946, "grad_norm": 1.8245917558670044, "learning_rate": 1.6009720082576728e-06, "loss": 0.0943, "step": 2044 }, { "epoch": 2.0846075433231395, "grad_norm": 4.468945503234863, "learning_rate": 1.5540175634454368e-06, "loss": 0.1908, "step": 2045 }, { "epoch": 2.085626911314985, "grad_norm": 4.662402629852295, "learning_rate": 1.5077510927902938e-06, "loss": 0.1952, "step": 2046 }, { "epoch": 2.0866462793068297, "grad_norm": 6.890171051025391, "learning_rate": 1.4621732533155075e-06, "loss": 0.6385, "step": 2047 }, { "epoch": 2.087665647298675, "grad_norm": 2.867135524749756, "learning_rate": 1.4172846922651528e-06, "loss": 0.203, "step": 2048 }, { "epoch": 2.08868501529052, "grad_norm": 6.184851169586182, "learning_rate": 1.3730860470949902e-06, "loss": 0.1727, "step": 2049 }, { "epoch": 2.0897043832823647, "grad_norm": 3.2211556434631348, "learning_rate": 1.3295779454633451e-06, "loss": 0.1612, "step": 2050 }, { "epoch": 2.09072375127421, "grad_norm": 5.037843704223633, "learning_rate": 1.2867610052223144e-06, "loss": 0.3213, "step": 2051 }, { "epoch": 2.091743119266055, "grad_norm": 10.782960891723633, "learning_rate": 1.2446358344088193e-06, "loss": 0.4739, "step": 2052 }, { "epoch": 2.0927624872579003, "grad_norm": 9.444459915161133, "learning_rate": 1.2032030312361554e-06, "loss": 0.4755, "step": 2053 }, { "epoch": 2.093781855249745, "grad_norm": 6.151844024658203, "learning_rate": 1.1624631840853495e-06, "loss": 0.6616, "step": 2054 }, { "epoch": 2.09480122324159, "grad_norm": 6.036664009094238, "learning_rate": 1.1224168714968786e-06, "loss": 0.3557, "step": 2055 }, { "epoch": 2.0958205912334353, "grad_norm": 7.177772521972656, "learning_rate": 1.0830646621624529e-06, "loss": 0.4678, "step": 2056 }, { "epoch": 2.09683995922528, "grad_norm": 3.4542040824890137, "learning_rate": 1.0444071149169122e-06, "loss": 0.2447, "step": 2057 }, { "epoch": 2.0978593272171255, "grad_norm": 1.9717903137207031, "learning_rate": 1.0064447787303144e-06, "loss": 0.1077, "step": 2058 }, { "epoch": 2.0978593272171255, "eval_Qnli-dev-1024_cosine_accuracy": 0.7708333333333334, "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.7858775854110718, "eval_Qnli-dev-1024_cosine_ap": 0.7717931866855599, "eval_Qnli-dev-1024_cosine_f1": 0.7450980392156862, "eval_Qnli-dev-1024_cosine_f1_threshold": 0.7231606245040894, "eval_Qnli-dev-1024_cosine_mcc": 0.4794765594627558, "eval_Qnli-dev-1024_cosine_precision": 0.6666666666666666, "eval_Qnli-dev-1024_cosine_recall": 0.8444444444444444, "eval_Qnli-dev_cosine_accuracy": 0.75, "eval_Qnli-dev_cosine_accuracy_threshold": 0.6960161924362183, "eval_Qnli-dev_cosine_ap": 0.7668004254073291, "eval_Qnli-dev_cosine_f1": 0.7524752475247526, "eval_Qnli-dev_cosine_f1_threshold": 0.643132209777832, "eval_Qnli-dev_cosine_mcc": 0.4975007565834654, "eval_Qnli-dev_cosine_precision": 0.6785714285714286, "eval_Qnli-dev_cosine_recall": 0.8444444444444444, "eval_allNLI--triplets-1024_cosine_accuracy": 0.9583333134651184, "eval_allNLI-triplets_cosine_accuracy": 0.96875, "eval_global_dataset_loss": 0.28185296058654785, "eval_global_dataset_runtime": 103.9195, "eval_global_dataset_samples_per_second": 7.727, "eval_global_dataset_steps_per_second": 0.164, "eval_sequential_score": 0.9583333134651184, "eval_sts-test-1024_pearson_cosine": 0.8880021668150686, "eval_sts-test-1024_spearman_cosine": 0.9116855751722728, "eval_sts-test_pearson_cosine": 0.9070605432857464, "eval_sts-test_spearman_cosine": 0.9208122251709873, "step": 2058 }, { "epoch": 2.0988786952089704, "grad_norm": 4.328063011169434, "learning_rate": 9.691781927001154e-07, "loss": 0.4307, "step": 2059 }, { "epoch": 2.0998980632008153, "grad_norm": 3.5777828693389893, "learning_rate": 9.326078860435349e-07, "loss": 0.1913, "step": 2060 }, { "epoch": 2.1009174311926606, "grad_norm": 7.239850044250488, "learning_rate": 8.967343780900361e-07, "loss": 0.4133, "step": 2061 }, { "epoch": 2.1019367991845055, "grad_norm": 2.4862873554229736, "learning_rate": 8.615581782739468e-07, "loss": 0.1214, "step": 2062 }, { "epoch": 2.102956167176351, "grad_norm": 4.599758625030518, "learning_rate": 8.270797861272217e-07, "loss": 0.2374, "step": 2063 }, { "epoch": 2.1039755351681957, "grad_norm": 3.0807836055755615, "learning_rate": 7.932996912723644e-07, "loss": 0.1138, "step": 2064 }, { "epoch": 2.1049949031600406, "grad_norm": 4.003174304962158, "learning_rate": 7.602183734154278e-07, "loss": 0.1756, "step": 2065 }, { "epoch": 2.106014271151886, "grad_norm": 6.551426887512207, "learning_rate": 7.278363023392964e-07, "loss": 0.3942, "step": 2066 }, { "epoch": 2.1070336391437308, "grad_norm": 8.321428298950195, "learning_rate": 6.961539378968929e-07, "loss": 0.3343, "step": 2067 }, { "epoch": 2.108053007135576, "grad_norm": 2.4432618618011475, "learning_rate": 6.651717300047656e-07, "loss": 0.1298, "step": 2068 }, { "epoch": 2.109072375127421, "grad_norm": 4.8150954246521, "learning_rate": 6.348901186365941e-07, "loss": 0.1862, "step": 2069 }, { "epoch": 2.1100917431192663, "grad_norm": 10.590511322021484, "learning_rate": 6.053095338170389e-07, "loss": 0.8467, "step": 2070 }, { "epoch": 2.111111111111111, "grad_norm": 4.546511650085449, "learning_rate": 5.764303956155515e-07, "loss": 0.2192, "step": 2071 }, { "epoch": 2.112130479102956, "grad_norm": 4.88209342956543, "learning_rate": 5.482531141404679e-07, "loss": 0.2541, "step": 2072 }, { "epoch": 2.1131498470948014, "grad_norm": 4.770115375518799, "learning_rate": 5.20778089533136e-07, "loss": 0.4093, "step": 2073 }, { "epoch": 2.1141692150866462, "grad_norm": 7.157455921173096, "learning_rate": 4.940057119622976e-07, "loss": 0.5352, "step": 2074 }, { "epoch": 2.115188583078491, "grad_norm": 1.989957571029663, "learning_rate": 4.679363616184651e-07, "loss": 0.1393, "step": 2075 }, { "epoch": 2.1162079510703364, "grad_norm": 5.129319190979004, "learning_rate": 4.425704087085925e-07, "loss": 0.1769, "step": 2076 }, { "epoch": 2.1172273190621813, "grad_norm": 3.754472255706787, "learning_rate": 4.1790821345079055e-07, "loss": 0.1795, "step": 2077 }, { "epoch": 2.1182466870540266, "grad_norm": 8.386497497558594, "learning_rate": 3.939501260692036e-07, "loss": 0.3381, "step": 2078 }, { "epoch": 2.1192660550458715, "grad_norm": 5.126825332641602, "learning_rate": 3.706964867890572e-07, "loss": 0.3702, "step": 2079 }, { "epoch": 2.120285423037717, "grad_norm": 5.695154666900635, "learning_rate": 3.481476258318017e-07, "loss": 0.3147, "step": 2080 }, { "epoch": 2.1213047910295617, "grad_norm": 8.309889793395996, "learning_rate": 3.263038634104487e-07, "loss": 0.3419, "step": 2081 }, { "epoch": 2.1223241590214066, "grad_norm": 6.508575439453125, "learning_rate": 3.051655097249917e-07, "loss": 0.2264, "step": 2082 }, { "epoch": 2.123343527013252, "grad_norm": 5.034822940826416, "learning_rate": 2.847328649580483e-07, "loss": 0.2863, "step": 2083 }, { "epoch": 2.124362895005097, "grad_norm": 5.786725997924805, "learning_rate": 2.6500621927054715e-07, "loss": 0.3757, "step": 2084 }, { "epoch": 2.1253822629969417, "grad_norm": 3.350344181060791, "learning_rate": 2.459858527976366e-07, "loss": 0.1941, "step": 2085 }, { "epoch": 2.126401630988787, "grad_norm": 4.888509273529053, "learning_rate": 2.276720356446882e-07, "loss": 0.3484, "step": 2086 }, { "epoch": 2.127420998980632, "grad_norm": 2.2694365978240967, "learning_rate": 2.1006502788349924e-07, "loss": 0.1174, "step": 2087 }, { "epoch": 2.128440366972477, "grad_norm": 6.6389594078063965, "learning_rate": 1.9316507954854067e-07, "loss": 0.3486, "step": 2088 }, { "epoch": 2.129459734964322, "grad_norm": 7.36968469619751, "learning_rate": 1.7697243063346524e-07, "loss": 0.2587, "step": 2089 }, { "epoch": 2.1304791029561674, "grad_norm": 2.8399484157562256, "learning_rate": 1.6148731108764913e-07, "loss": 0.166, "step": 2090 }, { "epoch": 2.1314984709480123, "grad_norm": 6.889630317687988, "learning_rate": 1.4670994081297795e-07, "loss": 0.5485, "step": 2091 }, { "epoch": 2.132517838939857, "grad_norm": 10.132883071899414, "learning_rate": 1.3264052966066033e-07, "loss": 0.5755, "step": 2092 }, { "epoch": 2.1335372069317025, "grad_norm": 7.8372602462768555, "learning_rate": 1.1927927742831358e-07, "loss": 0.2181, "step": 2093 }, { "epoch": 2.1345565749235473, "grad_norm": 3.8295810222625732, "learning_rate": 1.0662637385708274e-07, "loss": 0.2995, "step": 2094 }, { "epoch": 2.1355759429153927, "grad_norm": 2.522563934326172, "learning_rate": 9.468199862895377e-08, "loss": 0.1704, "step": 2095 }, { "epoch": 2.1365953109072375, "grad_norm": 6.671058654785156, "learning_rate": 8.344632136422225e-08, "loss": 0.3037, "step": 2096 }, { "epoch": 2.1376146788990824, "grad_norm": 2.7199294567108154, "learning_rate": 7.291950161905092e-08, "loss": 0.1496, "step": 2097 }, { "epoch": 2.1386340468909277, "grad_norm": 6.42370080947876, "learning_rate": 6.310168888324919e-08, "loss": 0.3936, "step": 2098 }, { "epoch": 2.1396534148827726, "grad_norm": 6.266992092132568, "learning_rate": 5.399302257809713e-08, "loss": 0.4842, "step": 2099 }, { "epoch": 2.140672782874618, "grad_norm": 6.234472274780273, "learning_rate": 4.559363205440814e-08, "loss": 0.4433, "step": 2100 }, { "epoch": 2.141692150866463, "grad_norm": 1.4575471878051758, "learning_rate": 3.790363659066931e-08, "loss": 0.0814, "step": 2101 }, { "epoch": 2.1427115188583077, "grad_norm": 5.8357367515563965, "learning_rate": 3.0923145391364984e-08, "loss": 0.308, "step": 2102 }, { "epoch": 2.143730886850153, "grad_norm": 4.658002853393555, "learning_rate": 2.4652257585394688e-08, "loss": 0.2528, "step": 2103 }, { "epoch": 2.144750254841998, "grad_norm": 6.6633219718933105, "learning_rate": 1.909106222471313e-08, "loss": 0.4332, "step": 2104 }, { "epoch": 2.145769622833843, "grad_norm": 9.741914749145508, "learning_rate": 1.4239638283014555e-08, "loss": 1.0274, "step": 2105 }, { "epoch": 2.146788990825688, "grad_norm": 3.0744028091430664, "learning_rate": 1.009805465464475e-08, "loss": 0.248, "step": 2106 }, { "epoch": 2.147808358817533, "grad_norm": 3.3139028549194336, "learning_rate": 6.666370153624035e-09, "loss": 0.2274, "step": 2107 }, { "epoch": 2.1488277268093783, "grad_norm": 3.4172706604003906, "learning_rate": 3.9446335127757414e-09, "loss": 0.1735, "step": 2108 }, { "epoch": 2.149847094801223, "grad_norm": 5.286227226257324, "learning_rate": 1.932883383093387e-09, "loss": 0.3032, "step": 2109 }, { "epoch": 2.1508664627930685, "grad_norm": 4.259544372558594, "learning_rate": 6.311483331244983e-10, "loss": 0.1403, "step": 2110 }, { "epoch": 2.1518858307849134, "grad_norm": 7.1230292320251465, "learning_rate": 3.944684862089432e-11, "loss": 0.2983, "step": 2111 }, { "epoch": 2.1529051987767582, "grad_norm": 5.71543025970459, "learning_rate": 9.999984221266776e-05, "loss": 0.4975, "step": 2112 }, { "epoch": 2.1539245667686036, "grad_norm": 8.9960355758667, "learning_rate": 9.999901383189654e-05, "loss": 0.3564, "step": 2113 }, { "epoch": 2.1549439347604484, "grad_norm": 5.388774394989014, "learning_rate": 9.999747542260143e-05, "loss": 0.2478, "step": 2114 }, { "epoch": 2.1559633027522938, "grad_norm": 4.084414482116699, "learning_rate": 9.999522700662917e-05, "loss": 0.2289, "step": 2115 }, { "epoch": 2.1569826707441386, "grad_norm": 4.7534332275390625, "learning_rate": 9.999226861590915e-05, "loss": 0.3633, "step": 2116 }, { "epoch": 2.1580020387359835, "grad_norm": 5.211404323577881, "learning_rate": 9.998860029245308e-05, "loss": 0.3248, "step": 2117 }, { "epoch": 2.159021406727829, "grad_norm": 4.009915351867676, "learning_rate": 9.998422208835423e-05, "loss": 0.2411, "step": 2118 }, { "epoch": 2.1600407747196737, "grad_norm": 4.3494181632995605, "learning_rate": 9.997913406578685e-05, "loss": 0.2621, "step": 2119 }, { "epoch": 2.161060142711519, "grad_norm": 5.335855960845947, "learning_rate": 9.997333629700516e-05, "loss": 0.2409, "step": 2120 }, { "epoch": 2.162079510703364, "grad_norm": 3.7476727962493896, "learning_rate": 9.996682886434243e-05, "loss": 0.1745, "step": 2121 }, { "epoch": 2.163098878695209, "grad_norm": 9.698570251464844, "learning_rate": 9.995961186020974e-05, "loss": 0.7162, "step": 2122 }, { "epoch": 2.164118246687054, "grad_norm": 4.056085109710693, "learning_rate": 9.995168538709467e-05, "loss": 0.2209, "step": 2123 }, { "epoch": 2.165137614678899, "grad_norm": 6.227935791015625, "learning_rate": 9.994304955755988e-05, "loss": 0.3706, "step": 2124 }, { "epoch": 2.1661569826707443, "grad_norm": 3.197852373123169, "learning_rate": 9.993370449424153e-05, "loss": 0.108, "step": 2125 }, { "epoch": 2.167176350662589, "grad_norm": 4.697956085205078, "learning_rate": 9.992365032984743e-05, "loss": 0.2343, "step": 2126 }, { "epoch": 2.168195718654434, "grad_norm": 8.649667739868164, "learning_rate": 9.991288720715528e-05, "loss": 0.7377, "step": 2127 }, { "epoch": 2.1692150866462794, "grad_norm": 3.0363082885742188, "learning_rate": 9.990141527901058e-05, "loss": 0.1477, "step": 2128 }, { "epoch": 2.1702344546381243, "grad_norm": 8.80601692199707, "learning_rate": 9.988923470832445e-05, "loss": 0.4212, "step": 2129 }, { "epoch": 2.1712538226299696, "grad_norm": 5.953189373016357, "learning_rate": 9.987634566807139e-05, "loss": 0.4576, "step": 2130 }, { "epoch": 2.1722731906218145, "grad_norm": 2.836698055267334, "learning_rate": 9.98627483412867e-05, "loss": 0.1307, "step": 2131 }, { "epoch": 2.1732925586136593, "grad_norm": 10.231998443603516, "learning_rate": 9.984844292106399e-05, "loss": 0.7796, "step": 2132 }, { "epoch": 2.1743119266055047, "grad_norm": 5.002100467681885, "learning_rate": 9.98334296105524e-05, "loss": 0.2185, "step": 2133 }, { "epoch": 2.1753312945973495, "grad_norm": 5.0506696701049805, "learning_rate": 9.981770862295373e-05, "loss": 0.4168, "step": 2134 }, { "epoch": 2.176350662589195, "grad_norm": 5.320004463195801, "learning_rate": 9.980128018151936e-05, "loss": 0.2968, "step": 2135 }, { "epoch": 2.1773700305810397, "grad_norm": 4.058662414550781, "learning_rate": 9.978414451954709e-05, "loss": 0.2187, "step": 2136 }, { "epoch": 2.1783893985728846, "grad_norm": 4.8133134841918945, "learning_rate": 9.976630188037796e-05, "loss": 0.4316, "step": 2137 }, { "epoch": 2.17940876656473, "grad_norm": 2.8492605686187744, "learning_rate": 9.974775251739262e-05, "loss": 0.1237, "step": 2138 }, { "epoch": 2.180428134556575, "grad_norm": 3.489551544189453, "learning_rate": 9.972849669400775e-05, "loss": 0.2164, "step": 2139 }, { "epoch": 2.18144750254842, "grad_norm": 6.3751702308654785, "learning_rate": 9.970853468367245e-05, "loss": 0.4144, "step": 2140 }, { "epoch": 2.182466870540265, "grad_norm": 5.416356563568115, "learning_rate": 9.968786676986424e-05, "loss": 0.3848, "step": 2141 }, { "epoch": 2.18348623853211, "grad_norm": 3.666043281555176, "learning_rate": 9.966649324608511e-05, "loss": 0.1683, "step": 2142 }, { "epoch": 2.184505606523955, "grad_norm": 2.8194453716278076, "learning_rate": 9.964441441585722e-05, "loss": 0.0857, "step": 2143 }, { "epoch": 2.1855249745158, "grad_norm": 12.224459648132324, "learning_rate": 9.962163059271878e-05, "loss": 0.6639, "step": 2144 }, { "epoch": 2.1865443425076454, "grad_norm": 7.330053806304932, "learning_rate": 9.959814210021943e-05, "loss": 0.243, "step": 2145 }, { "epoch": 2.1875637104994903, "grad_norm": 7.457223892211914, "learning_rate": 9.957394927191577e-05, "loss": 0.4547, "step": 2146 }, { "epoch": 2.1885830784913356, "grad_norm": 7.0147223472595215, "learning_rate": 9.95490524513665e-05, "loss": 0.5578, "step": 2147 }, { "epoch": 2.1896024464831805, "grad_norm": 5.137038707733154, "learning_rate": 9.952345199212769e-05, "loss": 0.2503, "step": 2148 }, { "epoch": 2.1906218144750254, "grad_norm": 6.037380218505859, "learning_rate": 9.949714825774763e-05, "loss": 0.2688, "step": 2149 }, { "epoch": 2.1916411824668707, "grad_norm": 5.003573894500732, "learning_rate": 9.94701416217617e-05, "loss": 0.3163, "step": 2150 }, { "epoch": 2.1926605504587156, "grad_norm": 9.454754829406738, "learning_rate": 9.944243246768712e-05, "loss": 0.4275, "step": 2151 }, { "epoch": 2.1936799184505604, "grad_norm": 3.1916465759277344, "learning_rate": 9.941402118901744e-05, "loss": 0.2407, "step": 2152 }, { "epoch": 2.1946992864424058, "grad_norm": 6.89423131942749, "learning_rate": 9.938490818921697e-05, "loss": 0.1208, "step": 2153 }, { "epoch": 2.1957186544342506, "grad_norm": 8.483088493347168, "learning_rate": 9.935509388171509e-05, "loss": 0.3453, "step": 2154 }, { "epoch": 2.196738022426096, "grad_norm": 4.580751895904541, "learning_rate": 9.93245786899003e-05, "loss": 0.2303, "step": 2155 }, { "epoch": 2.197757390417941, "grad_norm": 5.408090114593506, "learning_rate": 9.929336304711432e-05, "loss": 0.2171, "step": 2156 }, { "epoch": 2.197757390417941, "eval_Qnli-dev-1024_cosine_accuracy": 0.7291666666666666, "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.8115701675415039, "eval_Qnli-dev-1024_cosine_ap": 0.7199766664494036, "eval_Qnli-dev-1024_cosine_f1": 0.7254901960784313, "eval_Qnli-dev-1024_cosine_f1_threshold": 0.7445250749588013, "eval_Qnli-dev-1024_cosine_mcc": 0.43697448216965834, "eval_Qnli-dev-1024_cosine_precision": 0.6491228070175439, "eval_Qnli-dev-1024_cosine_recall": 0.8222222222222222, "eval_Qnli-dev_cosine_accuracy": 0.75, "eval_Qnli-dev_cosine_accuracy_threshold": 0.6777580976486206, "eval_Qnli-dev_cosine_ap": 0.7484611306073241, "eval_Qnli-dev_cosine_f1": 0.7476635514018692, "eval_Qnli-dev_cosine_f1_threshold": 0.6229462027549744, "eval_Qnli-dev_cosine_mcc": 0.47737827504723207, "eval_Qnli-dev_cosine_precision": 0.6451612903225806, "eval_Qnli-dev_cosine_recall": 0.8888888888888888, "eval_allNLI--triplets-1024_cosine_accuracy": 0.9583333134651184, "eval_allNLI-triplets_cosine_accuracy": 0.96875, "eval_global_dataset_loss": 0.2789512574672699, "eval_global_dataset_runtime": 103.8218, "eval_global_dataset_samples_per_second": 7.734, "eval_global_dataset_steps_per_second": 0.164, "eval_sequential_score": 0.9583333134651184, "eval_sts-test-1024_pearson_cosine": 0.8786118022629635, "eval_sts-test-1024_spearman_cosine": 0.904928185675876, "eval_sts-test_pearson_cosine": 0.9066404858754178, "eval_sts-test_spearman_cosine": 0.9204510786079447, "step": 2156 }, { "epoch": 2.198776758409786, "grad_norm": 5.818151950836182, "learning_rate": 9.92614473966458e-05, "loss": 0.3691, "step": 2157 }, { "epoch": 2.199796126401631, "grad_norm": 7.688577175140381, "learning_rate": 9.922883219172413e-05, "loss": 0.283, "step": 2158 }, { "epoch": 2.200815494393476, "grad_norm": 9.936375617980957, "learning_rate": 9.919551789551295e-05, "loss": 0.8282, "step": 2159 }, { "epoch": 2.2018348623853212, "grad_norm": 5.860223770141602, "learning_rate": 9.91615049811036e-05, "loss": 0.365, "step": 2160 }, { "epoch": 2.202854230377166, "grad_norm": 1.4134682416915894, "learning_rate": 9.912679393150843e-05, "loss": 0.0961, "step": 2161 }, { "epoch": 2.203873598369011, "grad_norm": 8.950481414794922, "learning_rate": 9.909138523965385e-05, "loss": 0.4634, "step": 2162 }, { "epoch": 2.2048929663608563, "grad_norm": 6.5395331382751465, "learning_rate": 9.905527940837338e-05, "loss": 0.376, "step": 2163 }, { "epoch": 2.205912334352701, "grad_norm": 7.763592720031738, "learning_rate": 9.901847695040054e-05, "loss": 0.3719, "step": 2164 }, { "epoch": 2.2069317023445465, "grad_norm": 12.144659042358398, "learning_rate": 9.898097838836156e-05, "loss": 0.578, "step": 2165 }, { "epoch": 2.2079510703363914, "grad_norm": 2.760550022125244, "learning_rate": 9.89427842547679e-05, "loss": 0.0921, "step": 2166 }, { "epoch": 2.2089704383282367, "grad_norm": 7.8105363845825195, "learning_rate": 9.890389509200874e-05, "loss": 0.293, "step": 2167 }, { "epoch": 2.2099898063200816, "grad_norm": 5.359696388244629, "learning_rate": 9.886431145234328e-05, "loss": 0.3036, "step": 2168 }, { "epoch": 2.2110091743119265, "grad_norm": 6.331681251525879, "learning_rate": 9.882403389789288e-05, "loss": 0.2374, "step": 2169 }, { "epoch": 2.2120285423037718, "grad_norm": 4.270999908447266, "learning_rate": 9.878306300063305e-05, "loss": 0.2242, "step": 2170 }, { "epoch": 2.2130479102956166, "grad_norm": 7.299144268035889, "learning_rate": 9.874139934238538e-05, "loss": 0.4379, "step": 2171 }, { "epoch": 2.214067278287462, "grad_norm": 6.275256156921387, "learning_rate": 9.869904351480928e-05, "loss": 0.2515, "step": 2172 }, { "epoch": 2.215086646279307, "grad_norm": 3.000127077102661, "learning_rate": 9.865599611939351e-05, "loss": 0.1617, "step": 2173 }, { "epoch": 2.2161060142711517, "grad_norm": 8.104231834411621, "learning_rate": 9.86122577674477e-05, "loss": 0.4121, "step": 2174 }, { "epoch": 2.217125382262997, "grad_norm": 2.574758291244507, "learning_rate": 9.856782908009363e-05, "loss": 0.2169, "step": 2175 }, { "epoch": 2.218144750254842, "grad_norm": 5.668185234069824, "learning_rate": 9.85227106882565e-05, "loss": 0.4749, "step": 2176 }, { "epoch": 2.2191641182466872, "grad_norm": 9.118616104125977, "learning_rate": 9.847690323265581e-05, "loss": 0.8064, "step": 2177 }, { "epoch": 2.220183486238532, "grad_norm": 4.4046831130981445, "learning_rate": 9.843040736379639e-05, "loss": 0.1712, "step": 2178 }, { "epoch": 2.221202854230377, "grad_norm": 4.443243503570557, "learning_rate": 9.838322374195915e-05, "loss": 0.2074, "step": 2179 }, { "epoch": 2.2222222222222223, "grad_norm": 6.054386615753174, "learning_rate": 9.833535303719163e-05, "loss": 0.3482, "step": 2180 }, { "epoch": 2.223241590214067, "grad_norm": 4.216024398803711, "learning_rate": 9.82867959292986e-05, "loss": 0.2334, "step": 2181 }, { "epoch": 2.2242609582059125, "grad_norm": 7.953604221343994, "learning_rate": 9.823755310783224e-05, "loss": 0.3724, "step": 2182 }, { "epoch": 2.2252803261977574, "grad_norm": 7.373310089111328, "learning_rate": 9.818762527208251e-05, "loss": 0.2847, "step": 2183 }, { "epoch": 2.2262996941896023, "grad_norm": 6.493857383728027, "learning_rate": 9.813701313106716e-05, "loss": 0.3983, "step": 2184 }, { "epoch": 2.2273190621814476, "grad_norm": 7.406630992889404, "learning_rate": 9.808571740352163e-05, "loss": 0.2759, "step": 2185 }, { "epoch": 2.2283384301732925, "grad_norm": 8.359546661376953, "learning_rate": 9.803373881788888e-05, "loss": 0.5751, "step": 2186 }, { "epoch": 2.229357798165138, "grad_norm": 4.752198219299316, "learning_rate": 9.798107811230906e-05, "loss": 0.1683, "step": 2187 }, { "epoch": 2.2303771661569827, "grad_norm": 6.510313034057617, "learning_rate": 9.792773603460897e-05, "loss": 0.2357, "step": 2188 }, { "epoch": 2.2313965341488275, "grad_norm": 6.2854743003845215, "learning_rate": 9.787371334229145e-05, "loss": 0.3081, "step": 2189 }, { "epoch": 2.232415902140673, "grad_norm": 4.730719566345215, "learning_rate": 9.781901080252473e-05, "loss": 0.3411, "step": 2190 }, { "epoch": 2.2334352701325177, "grad_norm": 9.087937355041504, "learning_rate": 9.776362919213137e-05, "loss": 0.4046, "step": 2191 }, { "epoch": 2.234454638124363, "grad_norm": 6.309107780456543, "learning_rate": 9.77075692975774e-05, "loss": 0.2311, "step": 2192 }, { "epoch": 2.235474006116208, "grad_norm": 5.710136413574219, "learning_rate": 9.76508319149609e-05, "loss": 0.2457, "step": 2193 }, { "epoch": 2.236493374108053, "grad_norm": 9.256056785583496, "learning_rate": 9.759341785000105e-05, "loss": 0.4165, "step": 2194 }, { "epoch": 2.237512742099898, "grad_norm": 3.0695488452911377, "learning_rate": 9.753532791802637e-05, "loss": 0.1263, "step": 2195 }, { "epoch": 2.238532110091743, "grad_norm": 7.062183856964111, "learning_rate": 9.747656294396334e-05, "loss": 0.3795, "step": 2196 }, { "epoch": 2.2395514780835883, "grad_norm": 9.748225212097168, "learning_rate": 9.74171237623245e-05, "loss": 0.5094, "step": 2197 }, { "epoch": 2.240570846075433, "grad_norm": 4.3179826736450195, "learning_rate": 9.735701121719686e-05, "loss": 0.1671, "step": 2198 }, { "epoch": 2.241590214067278, "grad_norm": 3.598177671432495, "learning_rate": 9.729622616222966e-05, "loss": 0.1327, "step": 2199 }, { "epoch": 2.2426095820591234, "grad_norm": 4.8626813888549805, "learning_rate": 9.723476946062243e-05, "loss": 0.4628, "step": 2200 }, { "epoch": 2.2436289500509683, "grad_norm": 7.495136260986328, "learning_rate": 9.71726419851125e-05, "loss": 0.3733, "step": 2201 }, { "epoch": 2.2446483180428136, "grad_norm": 3.3497166633605957, "learning_rate": 9.710984461796297e-05, "loss": 0.1483, "step": 2202 }, { "epoch": 2.2456676860346585, "grad_norm": 10.693584442138672, "learning_rate": 9.704637825094983e-05, "loss": 0.5756, "step": 2203 }, { "epoch": 2.2466870540265034, "grad_norm": 3.6354808807373047, "learning_rate": 9.698224378534943e-05, "loss": 0.2273, "step": 2204 }, { "epoch": 2.2477064220183487, "grad_norm": 5.719125747680664, "learning_rate": 9.691744213192579e-05, "loss": 0.2651, "step": 2205 }, { "epoch": 2.2487257900101936, "grad_norm": 2.961449384689331, "learning_rate": 9.685197421091747e-05, "loss": 0.1644, "step": 2206 }, { "epoch": 2.249745158002039, "grad_norm": 7.821652412414551, "learning_rate": 9.67858409520247e-05, "loss": 0.8976, "step": 2207 }, { "epoch": 2.2507645259938838, "grad_norm": 8.642051696777344, "learning_rate": 9.671904329439592e-05, "loss": 0.4614, "step": 2208 }, { "epoch": 2.2517838939857286, "grad_norm": 3.5357613563537598, "learning_rate": 9.665158218661473e-05, "loss": 0.1483, "step": 2209 }, { "epoch": 2.252803261977574, "grad_norm": 4.354801177978516, "learning_rate": 9.658345858668622e-05, "loss": 0.22, "step": 2210 }, { "epoch": 2.253822629969419, "grad_norm": 5.700054168701172, "learning_rate": 9.65146734620235e-05, "loss": 0.2612, "step": 2211 }, { "epoch": 2.254841997961264, "grad_norm": 10.76186752319336, "learning_rate": 9.64452277894338e-05, "loss": 1.1265, "step": 2212 }, { "epoch": 2.255861365953109, "grad_norm": 6.577109336853027, "learning_rate": 9.637512255510472e-05, "loss": 0.5792, "step": 2213 }, { "epoch": 2.2568807339449544, "grad_norm": 3.7338356971740723, "learning_rate": 9.630435875459029e-05, "loss": 0.2643, "step": 2214 }, { "epoch": 2.2579001019367992, "grad_norm": 6.350942611694336, "learning_rate": 9.623293739279661e-05, "loss": 0.3964, "step": 2215 }, { "epoch": 2.258919469928644, "grad_norm": 5.956366539001465, "learning_rate": 9.616085948396778e-05, "loss": 0.5892, "step": 2216 }, { "epoch": 2.2599388379204894, "grad_norm": 7.00551700592041, "learning_rate": 9.608812605167139e-05, "loss": 0.357, "step": 2217 }, { "epoch": 2.2609582059123343, "grad_norm": 8.318918228149414, "learning_rate": 9.60147381287841e-05, "loss": 0.5915, "step": 2218 }, { "epoch": 2.261977573904179, "grad_norm": 3.9956605434417725, "learning_rate": 9.594069675747681e-05, "loss": 0.2284, "step": 2219 }, { "epoch": 2.2629969418960245, "grad_norm": 6.184433937072754, "learning_rate": 9.586600298919992e-05, "loss": 0.2741, "step": 2220 }, { "epoch": 2.2640163098878694, "grad_norm": 6.979036331176758, "learning_rate": 9.579065788466853e-05, "loss": 0.3373, "step": 2221 }, { "epoch": 2.2650356778797147, "grad_norm": 3.9439306259155273, "learning_rate": 9.571466251384722e-05, "loss": 0.1557, "step": 2222 }, { "epoch": 2.2660550458715596, "grad_norm": 5.736883640289307, "learning_rate": 9.563801795593483e-05, "loss": 0.4192, "step": 2223 }, { "epoch": 2.267074413863405, "grad_norm": 9.300738334655762, "learning_rate": 9.556072529934935e-05, "loss": 0.6405, "step": 2224 }, { "epoch": 2.26809378185525, "grad_norm": 6.677163600921631, "learning_rate": 9.548278564171219e-05, "loss": 0.2207, "step": 2225 }, { "epoch": 2.2691131498470947, "grad_norm": 1.9049301147460938, "learning_rate": 9.54042000898328e-05, "loss": 0.1769, "step": 2226 }, { "epoch": 2.27013251783894, "grad_norm": 2.9124698638916016, "learning_rate": 9.532496975969283e-05, "loss": 0.1876, "step": 2227 }, { "epoch": 2.271151885830785, "grad_norm": 6.220628261566162, "learning_rate": 9.524509577643043e-05, "loss": 0.4072, "step": 2228 }, { "epoch": 2.2721712538226297, "grad_norm": 4.811798572540283, "learning_rate": 9.516457927432402e-05, "loss": 0.3481, "step": 2229 }, { "epoch": 2.273190621814475, "grad_norm": 5.673060417175293, "learning_rate": 9.508342139677648e-05, "loss": 0.4266, "step": 2230 }, { "epoch": 2.27420998980632, "grad_norm": 5.483099460601807, "learning_rate": 9.500162329629866e-05, "loss": 0.4333, "step": 2231 }, { "epoch": 2.2752293577981653, "grad_norm": 5.340078353881836, "learning_rate": 9.49191861344932e-05, "loss": 0.2413, "step": 2232 }, { "epoch": 2.27624872579001, "grad_norm": 9.624187469482422, "learning_rate": 9.483611108203788e-05, "loss": 0.5317, "step": 2233 }, { "epoch": 2.2772680937818555, "grad_norm": 4.5000786781311035, "learning_rate": 9.475239931866913e-05, "loss": 0.2643, "step": 2234 }, { "epoch": 2.2782874617737003, "grad_norm": 7.982000827789307, "learning_rate": 9.466805203316514e-05, "loss": 0.7617, "step": 2235 }, { "epoch": 2.279306829765545, "grad_norm": 5.092596054077148, "learning_rate": 9.458307042332914e-05, "loss": 0.3159, "step": 2236 }, { "epoch": 2.2803261977573905, "grad_norm": 7.607308387756348, "learning_rate": 9.449745569597232e-05, "loss": 0.3722, "step": 2237 }, { "epoch": 2.2813455657492354, "grad_norm": 9.206737518310547, "learning_rate": 9.441120906689658e-05, "loss": 0.7328, "step": 2238 }, { "epoch": 2.2823649337410803, "grad_norm": 8.724227905273438, "learning_rate": 9.432433176087738e-05, "loss": 0.4747, "step": 2239 }, { "epoch": 2.2833843017329256, "grad_norm": 7.079002380371094, "learning_rate": 9.423682501164641e-05, "loss": 0.3334, "step": 2240 }, { "epoch": 2.2844036697247705, "grad_norm": 5.796501636505127, "learning_rate": 9.4148690061874e-05, "loss": 0.3518, "step": 2241 }, { "epoch": 2.285423037716616, "grad_norm": 7.118459701538086, "learning_rate": 9.405992816315125e-05, "loss": 0.242, "step": 2242 }, { "epoch": 2.2864424057084607, "grad_norm": 6.54943323135376, "learning_rate": 9.397054057597275e-05, "loss": 0.4397, "step": 2243 }, { "epoch": 2.287461773700306, "grad_norm": 10.270355224609375, "learning_rate": 9.388052856971816e-05, "loss": 0.7398, "step": 2244 }, { "epoch": 2.288481141692151, "grad_norm": 8.216813087463379, "learning_rate": 9.378989342263464e-05, "loss": 0.7289, "step": 2245 }, { "epoch": 2.2895005096839958, "grad_norm": 6.499934196472168, "learning_rate": 9.369863642181828e-05, "loss": 0.2978, "step": 2246 }, { "epoch": 2.290519877675841, "grad_norm": 3.5899710655212402, "learning_rate": 9.360675886319617e-05, "loss": 0.2361, "step": 2247 }, { "epoch": 2.291539245667686, "grad_norm": 3.1094970703125, "learning_rate": 9.351426205150774e-05, "loss": 0.2009, "step": 2248 }, { "epoch": 2.292558613659531, "grad_norm": 5.628443717956543, "learning_rate": 9.342114730028647e-05, "loss": 0.5583, "step": 2249 }, { "epoch": 2.293577981651376, "grad_norm": 3.848273277282715, "learning_rate": 9.332741593184094e-05, "loss": 0.151, "step": 2250 }, { "epoch": 2.294597349643221, "grad_norm": 7.327558517456055, "learning_rate": 9.323306927723637e-05, "loss": 0.3008, "step": 2251 }, { "epoch": 2.2956167176350664, "grad_norm": 8.393162727355957, "learning_rate": 9.313810867627549e-05, "loss": 0.4939, "step": 2252 }, { "epoch": 2.2966360856269112, "grad_norm": 6.88380241394043, "learning_rate": 9.304253547747956e-05, "loss": 0.2644, "step": 2253 }, { "epoch": 2.2976554536187566, "grad_norm": 4.7066850662231445, "learning_rate": 9.294635103806933e-05, "loss": 0.1587, "step": 2254 }, { "epoch": 2.2976554536187566, "eval_Qnli-dev-1024_cosine_accuracy": 0.71875, "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.8054693937301636, "eval_Qnli-dev-1024_cosine_ap": 0.7510897581326281, "eval_Qnli-dev-1024_cosine_f1": 0.7256637168141592, "eval_Qnli-dev-1024_cosine_f1_threshold": 0.6971149444580078, "eval_Qnli-dev-1024_cosine_mcc": 0.419062972501429, "eval_Qnli-dev-1024_cosine_precision": 0.6029411764705882, "eval_Qnli-dev-1024_cosine_recall": 0.9111111111111111, "eval_Qnli-dev_cosine_accuracy": 0.7395833333333334, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7125545740127563, "eval_Qnli-dev_cosine_ap": 0.7513469031575852, "eval_Qnli-dev_cosine_f1": 0.7339449541284404, "eval_Qnli-dev_cosine_f1_threshold": 0.6222972869873047, "eval_Qnli-dev_cosine_mcc": 0.4428074427700477, "eval_Qnli-dev_cosine_precision": 0.625, "eval_Qnli-dev_cosine_recall": 0.8888888888888888, "eval_allNLI--triplets-1024_cosine_accuracy": 0.9791666865348816, "eval_allNLI-triplets_cosine_accuracy": 0.96875, "eval_global_dataset_loss": 0.35465556383132935, "eval_global_dataset_runtime": 103.8621, "eval_global_dataset_samples_per_second": 7.731, "eval_global_dataset_steps_per_second": 0.164, "eval_sequential_score": 0.9791666865348816, "eval_sts-test-1024_pearson_cosine": 0.884708804231128, "eval_sts-test-1024_spearman_cosine": 0.9101560388186445, "eval_sts-test_pearson_cosine": 0.9064669487568003, "eval_sts-test_spearman_cosine": 0.9197963544883909, "step": 2254 }, { "epoch": 2.2986748216106014, "grad_norm": 4.420230865478516, "learning_rate": 9.284955672394562e-05, "loss": 0.1962, "step": 2255 }, { "epoch": 2.2996941896024463, "grad_norm": 2.1525185108184814, "learning_rate": 9.275215390967009e-05, "loss": 0.1461, "step": 2256 }, { "epoch": 2.3007135575942916, "grad_norm": 6.365402698516846, "learning_rate": 9.265414397844552e-05, "loss": 0.1951, "step": 2257 }, { "epoch": 2.3017329255861365, "grad_norm": 6.767193794250488, "learning_rate": 9.255552832209623e-05, "loss": 0.284, "step": 2258 }, { "epoch": 2.302752293577982, "grad_norm": 4.424633502960205, "learning_rate": 9.245630834104848e-05, "loss": 0.3417, "step": 2259 }, { "epoch": 2.3037716615698267, "grad_norm": 4.7739152908325195, "learning_rate": 9.235648544431044e-05, "loss": 0.2067, "step": 2260 }, { "epoch": 2.3047910295616716, "grad_norm": 3.6877939701080322, "learning_rate": 9.225606104945208e-05, "loss": 0.2394, "step": 2261 }, { "epoch": 2.305810397553517, "grad_norm": 7.127565383911133, "learning_rate": 9.215503658258524e-05, "loss": 0.4127, "step": 2262 }, { "epoch": 2.306829765545362, "grad_norm": 4.339338302612305, "learning_rate": 9.205341347834325e-05, "loss": 0.295, "step": 2263 }, { "epoch": 2.307849133537207, "grad_norm": 4.622450351715088, "learning_rate": 9.19511931798607e-05, "loss": 0.3318, "step": 2264 }, { "epoch": 2.308868501529052, "grad_norm": 6.046075820922852, "learning_rate": 9.18483771387527e-05, "loss": 0.3391, "step": 2265 }, { "epoch": 2.309887869520897, "grad_norm": 7.373628616333008, "learning_rate": 9.174496681509453e-05, "loss": 0.6988, "step": 2266 }, { "epoch": 2.310907237512742, "grad_norm": 5.423571586608887, "learning_rate": 9.164096367740072e-05, "loss": 0.2089, "step": 2267 }, { "epoch": 2.311926605504587, "grad_norm": 10.064982414245605, "learning_rate": 9.15363692026043e-05, "loss": 0.7888, "step": 2268 }, { "epoch": 2.3129459734964324, "grad_norm": 6.538580417633057, "learning_rate": 9.143118487603576e-05, "loss": 0.2692, "step": 2269 }, { "epoch": 2.3139653414882773, "grad_norm": 5.713818550109863, "learning_rate": 9.132541219140205e-05, "loss": 0.3081, "step": 2270 }, { "epoch": 2.314984709480122, "grad_norm": 6.522933006286621, "learning_rate": 9.121905265076523e-05, "loss": 0.5452, "step": 2271 }, { "epoch": 2.3160040774719675, "grad_norm": 7.0937275886535645, "learning_rate": 9.111210776452124e-05, "loss": 0.561, "step": 2272 }, { "epoch": 2.3170234454638123, "grad_norm": 6.514686584472656, "learning_rate": 9.100457905137836e-05, "loss": 0.4489, "step": 2273 }, { "epoch": 2.3180428134556577, "grad_norm": 5.026601791381836, "learning_rate": 9.089646803833586e-05, "loss": 0.2322, "step": 2274 }, { "epoch": 2.3190621814475025, "grad_norm": 3.829639196395874, "learning_rate": 9.078777626066212e-05, "loss": 0.3165, "step": 2275 }, { "epoch": 2.3200815494393474, "grad_norm": 6.508183479309082, "learning_rate": 9.067850526187276e-05, "loss": 0.3822, "step": 2276 }, { "epoch": 2.3211009174311927, "grad_norm": 8.799606323242188, "learning_rate": 9.056865659370889e-05, "loss": 0.6136, "step": 2277 }, { "epoch": 2.3221202854230376, "grad_norm": 2.762552499771118, "learning_rate": 9.045823181611506e-05, "loss": 0.1497, "step": 2278 }, { "epoch": 2.323139653414883, "grad_norm": 6.088753700256348, "learning_rate": 9.034723249721708e-05, "loss": 0.2277, "step": 2279 }, { "epoch": 2.324159021406728, "grad_norm": 10.172626495361328, "learning_rate": 9.023566021329963e-05, "loss": 0.7456, "step": 2280 }, { "epoch": 2.325178389398573, "grad_norm": 8.9097900390625, "learning_rate": 9.012351654878408e-05, "loss": 0.7613, "step": 2281 }, { "epoch": 2.326197757390418, "grad_norm": 4.431034564971924, "learning_rate": 9.00108030962058e-05, "loss": 0.3183, "step": 2282 }, { "epoch": 2.327217125382263, "grad_norm": 4.566897392272949, "learning_rate": 8.989752145619174e-05, "loss": 0.2711, "step": 2283 }, { "epoch": 2.328236493374108, "grad_norm": 5.168494701385498, "learning_rate": 8.978367323743748e-05, "loss": 0.3639, "step": 2284 }, { "epoch": 2.329255861365953, "grad_norm": 2.9187684059143066, "learning_rate": 8.966926005668465e-05, "loss": 0.1569, "step": 2285 }, { "epoch": 2.330275229357798, "grad_norm": 10.322551727294922, "learning_rate": 8.955428353869766e-05, "loss": 0.6622, "step": 2286 }, { "epoch": 2.3312945973496433, "grad_norm": 2.910454750061035, "learning_rate": 8.94387453162409e-05, "loss": 0.1854, "step": 2287 }, { "epoch": 2.332313965341488, "grad_norm": 5.9969892501831055, "learning_rate": 8.932264703005537e-05, "loss": 0.2545, "step": 2288 }, { "epoch": 2.3333333333333335, "grad_norm": 3.5394508838653564, "learning_rate": 8.920599032883552e-05, "loss": 0.2507, "step": 2289 }, { "epoch": 2.3343527013251784, "grad_norm": 3.04937744140625, "learning_rate": 8.90887768692057e-05, "loss": 0.1284, "step": 2290 }, { "epoch": 2.3353720693170237, "grad_norm": 1.6997092962265015, "learning_rate": 8.89710083156968e-05, "loss": 0.0984, "step": 2291 }, { "epoch": 2.3363914373088686, "grad_norm": 4.270977973937988, "learning_rate": 8.885268634072233e-05, "loss": 0.2855, "step": 2292 }, { "epoch": 2.3374108053007134, "grad_norm": 4.65965461730957, "learning_rate": 8.873381262455503e-05, "loss": 0.2992, "step": 2293 }, { "epoch": 2.3384301732925588, "grad_norm": 8.1211519241333, "learning_rate": 8.861438885530283e-05, "loss": 0.3614, "step": 2294 }, { "epoch": 2.3394495412844036, "grad_norm": 7.206984519958496, "learning_rate": 8.849441672888481e-05, "loss": 0.3101, "step": 2295 }, { "epoch": 2.3404689092762485, "grad_norm": 7.5257110595703125, "learning_rate": 8.837389794900713e-05, "loss": 0.443, "step": 2296 }, { "epoch": 2.341488277268094, "grad_norm": 6.125361919403076, "learning_rate": 8.825283422713905e-05, "loss": 0.4293, "step": 2297 }, { "epoch": 2.3425076452599387, "grad_norm": 4.915850639343262, "learning_rate": 8.813122728248842e-05, "loss": 0.2135, "step": 2298 }, { "epoch": 2.343527013251784, "grad_norm": 8.4254789352417, "learning_rate": 8.800907884197725e-05, "loss": 0.4949, "step": 2299 }, { "epoch": 2.344546381243629, "grad_norm": 2.7700583934783936, "learning_rate": 8.788639064021721e-05, "loss": 0.1009, "step": 2300 }, { "epoch": 2.3455657492354742, "grad_norm": 2.5243680477142334, "learning_rate": 8.776316441948529e-05, "loss": 0.0916, "step": 2301 }, { "epoch": 2.346585117227319, "grad_norm": 2.952864408493042, "learning_rate": 8.763940192969853e-05, "loss": 0.2077, "step": 2302 }, { "epoch": 2.347604485219164, "grad_norm": 5.396402359008789, "learning_rate": 8.75151049283895e-05, "loss": 0.2194, "step": 2303 }, { "epoch": 2.3486238532110093, "grad_norm": 6.208902359008789, "learning_rate": 8.739027518068148e-05, "loss": 0.4228, "step": 2304 }, { "epoch": 2.349643221202854, "grad_norm": 4.609702110290527, "learning_rate": 8.726491445926292e-05, "loss": 0.1663, "step": 2305 }, { "epoch": 2.350662589194699, "grad_norm": 2.649308681488037, "learning_rate": 8.713902454436285e-05, "loss": 0.1789, "step": 2306 }, { "epoch": 2.3516819571865444, "grad_norm": 8.164649963378906, "learning_rate": 8.701260722372497e-05, "loss": 0.2843, "step": 2307 }, { "epoch": 2.3527013251783893, "grad_norm": 7.712100505828857, "learning_rate": 8.68856642925829e-05, "loss": 0.6079, "step": 2308 }, { "epoch": 2.3537206931702346, "grad_norm": 4.999669551849365, "learning_rate": 8.675819755363412e-05, "loss": 0.2216, "step": 2309 }, { "epoch": 2.3547400611620795, "grad_norm": 6.541714668273926, "learning_rate": 8.663020881701491e-05, "loss": 0.3121, "step": 2310 }, { "epoch": 2.3557594291539248, "grad_norm": 6.196269512176514, "learning_rate": 8.650169990027399e-05, "loss": 0.4209, "step": 2311 }, { "epoch": 2.3567787971457697, "grad_norm": 4.6961894035339355, "learning_rate": 8.637267262834737e-05, "loss": 0.1548, "step": 2312 }, { "epoch": 2.3577981651376145, "grad_norm": 7.67713737487793, "learning_rate": 8.624312883353211e-05, "loss": 0.2983, "step": 2313 }, { "epoch": 2.35881753312946, "grad_norm": 5.699832439422607, "learning_rate": 8.611307035546023e-05, "loss": 0.2876, "step": 2314 }, { "epoch": 2.3598369011213047, "grad_norm": 3.3547933101654053, "learning_rate": 8.59824990410727e-05, "loss": 0.1027, "step": 2315 }, { "epoch": 2.3608562691131496, "grad_norm": 7.3823323249816895, "learning_rate": 8.585141674459329e-05, "loss": 0.5218, "step": 2316 }, { "epoch": 2.361875637104995, "grad_norm": 2.786806583404541, "learning_rate": 8.571982532750217e-05, "loss": 0.1536, "step": 2317 }, { "epoch": 2.36289500509684, "grad_norm": 5.496379375457764, "learning_rate": 8.558772665850932e-05, "loss": 0.4112, "step": 2318 }, { "epoch": 2.363914373088685, "grad_norm": 8.679461479187012, "learning_rate": 8.545512261352812e-05, "loss": 0.418, "step": 2319 }, { "epoch": 2.36493374108053, "grad_norm": 8.090753555297852, "learning_rate": 8.532201507564898e-05, "loss": 0.3728, "step": 2320 }, { "epoch": 2.3659531090723753, "grad_norm": 5.165255069732666, "learning_rate": 8.518840593511202e-05, "loss": 0.2554, "step": 2321 }, { "epoch": 2.36697247706422, "grad_norm": 6.425000190734863, "learning_rate": 8.505429708928068e-05, "loss": 0.4931, "step": 2322 }, { "epoch": 2.367991845056065, "grad_norm": 2.3066000938415527, "learning_rate": 8.491969044261472e-05, "loss": 0.1478, "step": 2323 }, { "epoch": 2.3690112130479104, "grad_norm": 3.497620105743408, "learning_rate": 8.478458790664292e-05, "loss": 0.2185, "step": 2324 }, { "epoch": 2.3700305810397553, "grad_norm": 7.057107448577881, "learning_rate": 8.46489913999363e-05, "loss": 0.6166, "step": 2325 }, { "epoch": 2.3710499490316006, "grad_norm": 4.212674140930176, "learning_rate": 8.451290284808048e-05, "loss": 0.2404, "step": 2326 }, { "epoch": 2.3720693170234455, "grad_norm": 5.787489891052246, "learning_rate": 8.437632418364878e-05, "loss": 0.371, "step": 2327 }, { "epoch": 2.3730886850152904, "grad_norm": 8.768516540527344, "learning_rate": 8.423925734617428e-05, "loss": 0.3587, "step": 2328 }, { "epoch": 2.3741080530071357, "grad_norm": 5.419494152069092, "learning_rate": 8.410170428212276e-05, "loss": 0.1999, "step": 2329 }, { "epoch": 2.3751274209989806, "grad_norm": 3.336252450942993, "learning_rate": 8.396366694486469e-05, "loss": 0.1607, "step": 2330 }, { "epoch": 2.376146788990826, "grad_norm": 5.8776535987854, "learning_rate": 8.38251472946476e-05, "loss": 0.4547, "step": 2331 }, { "epoch": 2.3771661569826708, "grad_norm": 3.843593120574951, "learning_rate": 8.368614729856843e-05, "loss": 0.1942, "step": 2332 }, { "epoch": 2.3781855249745156, "grad_norm": 6.0981574058532715, "learning_rate": 8.354666893054533e-05, "loss": 0.4157, "step": 2333 }, { "epoch": 2.379204892966361, "grad_norm": 3.4683003425598145, "learning_rate": 8.340671417128971e-05, "loss": 0.2771, "step": 2334 }, { "epoch": 2.380224260958206, "grad_norm": 9.179664611816406, "learning_rate": 8.326628500827825e-05, "loss": 0.6997, "step": 2335 }, { "epoch": 2.381243628950051, "grad_norm": 2.7431480884552, "learning_rate": 8.312538343572454e-05, "loss": 0.1812, "step": 2336 }, { "epoch": 2.382262996941896, "grad_norm": 8.77397632598877, "learning_rate": 8.29840114545507e-05, "loss": 0.6393, "step": 2337 }, { "epoch": 2.383282364933741, "grad_norm": 3.6477086544036865, "learning_rate": 8.284217107235908e-05, "loss": 0.2243, "step": 2338 }, { "epoch": 2.3843017329255862, "grad_norm": 9.626862525939941, "learning_rate": 8.269986430340379e-05, "loss": 0.6306, "step": 2339 }, { "epoch": 2.385321100917431, "grad_norm": 6.5339131355285645, "learning_rate": 8.25570931685621e-05, "loss": 0.2627, "step": 2340 }, { "epoch": 2.3863404689092764, "grad_norm": 4.381488800048828, "learning_rate": 8.241385969530535e-05, "loss": 0.1925, "step": 2341 }, { "epoch": 2.3873598369011213, "grad_norm": 5.464411735534668, "learning_rate": 8.227016591767085e-05, "loss": 0.5205, "step": 2342 }, { "epoch": 2.388379204892966, "grad_norm": 7.5830841064453125, "learning_rate": 8.212601387623235e-05, "loss": 0.3821, "step": 2343 }, { "epoch": 2.3893985728848115, "grad_norm": 6.62937068939209, "learning_rate": 8.198140561807157e-05, "loss": 0.3973, "step": 2344 }, { "epoch": 2.3904179408766564, "grad_norm": 5.732163906097412, "learning_rate": 8.183634319674867e-05, "loss": 0.4019, "step": 2345 }, { "epoch": 2.3914373088685017, "grad_norm": 5.45266580581665, "learning_rate": 8.169082867227349e-05, "loss": 0.2915, "step": 2346 }, { "epoch": 2.3924566768603466, "grad_norm": 5.806284427642822, "learning_rate": 8.154486411107596e-05, "loss": 0.3495, "step": 2347 }, { "epoch": 2.3934760448521915, "grad_norm": 3.3837454319000244, "learning_rate": 8.139845158597712e-05, "loss": 0.1303, "step": 2348 }, { "epoch": 2.3944954128440368, "grad_norm": 6.189777374267578, "learning_rate": 8.125159317615926e-05, "loss": 0.316, "step": 2349 }, { "epoch": 2.3955147808358817, "grad_norm": 6.784691333770752, "learning_rate": 8.110429096713679e-05, "loss": 0.428, "step": 2350 }, { "epoch": 2.396534148827727, "grad_norm": 7.712477684020996, "learning_rate": 8.095654705072632e-05, "loss": 0.78, "step": 2351 }, { "epoch": 2.397553516819572, "grad_norm": 6.170988082885742, "learning_rate": 8.080836352501717e-05, "loss": 0.2817, "step": 2352 }, { "epoch": 2.397553516819572, "eval_Qnli-dev-1024_cosine_accuracy": 0.7604166666666666, "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.8025434017181396, "eval_Qnli-dev-1024_cosine_ap": 0.7565294376349275, "eval_Qnli-dev-1024_cosine_f1": 0.7294117647058822, "eval_Qnli-dev-1024_cosine_f1_threshold": 0.786746621131897, "eval_Qnli-dev-1024_cosine_mcc": 0.5186710015444639, "eval_Qnli-dev-1024_cosine_precision": 0.775, "eval_Qnli-dev-1024_cosine_recall": 0.6888888888888889, "eval_Qnli-dev_cosine_accuracy": 0.7395833333333334, "eval_Qnli-dev_cosine_accuracy_threshold": 0.6690382957458496, "eval_Qnli-dev_cosine_ap": 0.7519786295263348, "eval_Qnli-dev_cosine_f1": 0.7547169811320755, "eval_Qnli-dev_cosine_f1_threshold": 0.6388322114944458, "eval_Qnli-dev_cosine_mcc": 0.494679410480399, "eval_Qnli-dev_cosine_precision": 0.6557377049180327, "eval_Qnli-dev_cosine_recall": 0.8888888888888888, "eval_allNLI--triplets-1024_cosine_accuracy": 0.9583333134651184, "eval_allNLI-triplets_cosine_accuracy": 0.96875, "eval_global_dataset_loss": 0.32961907982826233, "eval_global_dataset_runtime": 104.2744, "eval_global_dataset_samples_per_second": 7.701, "eval_global_dataset_steps_per_second": 0.163, "eval_sequential_score": 0.9583333134651184, "eval_sts-test-1024_pearson_cosine": 0.8876446213573181, "eval_sts-test-1024_spearman_cosine": 0.9120654064123624, "eval_sts-test_pearson_cosine": 0.9064454982972331, "eval_sts-test_spearman_cosine": 0.9216604226034537, "step": 2352 }, { "epoch": 2.3985728848114167, "grad_norm": 4.633171081542969, "learning_rate": 8.065974249434133e-05, "loss": 0.4222, "step": 2353 }, { "epoch": 2.399592252803262, "grad_norm": 4.3140997886657715, "learning_rate": 8.051068606924395e-05, "loss": 0.273, "step": 2354 }, { "epoch": 2.400611620795107, "grad_norm": 6.594303131103516, "learning_rate": 8.036119636645307e-05, "loss": 0.4256, "step": 2355 }, { "epoch": 2.4016309887869522, "grad_norm": 2.8384170532226562, "learning_rate": 8.021127550884959e-05, "loss": 0.1358, "step": 2356 }, { "epoch": 2.402650356778797, "grad_norm": 8.013402938842773, "learning_rate": 8.006092562543714e-05, "loss": 0.6191, "step": 2357 }, { "epoch": 2.4036697247706424, "grad_norm": 7.213308334350586, "learning_rate": 7.9910148851312e-05, "loss": 0.4026, "step": 2358 }, { "epoch": 2.4046890927624873, "grad_norm": 4.690682888031006, "learning_rate": 7.975894732763267e-05, "loss": 0.3215, "step": 2359 }, { "epoch": 2.405708460754332, "grad_norm": 10.285234451293945, "learning_rate": 7.960732320158932e-05, "loss": 0.5026, "step": 2360 }, { "epoch": 2.4067278287461775, "grad_norm": 1.6485399007797241, "learning_rate": 7.945527862637354e-05, "loss": 0.0854, "step": 2361 }, { "epoch": 2.4077471967380224, "grad_norm": 5.8788371086120605, "learning_rate": 7.930281576114754e-05, "loss": 0.2336, "step": 2362 }, { "epoch": 2.4087665647298673, "grad_norm": 3.2773468494415283, "learning_rate": 7.91499367710138e-05, "loss": 0.1266, "step": 2363 }, { "epoch": 2.4097859327217126, "grad_norm": 2.446866273880005, "learning_rate": 7.89966438269839e-05, "loss": 0.1203, "step": 2364 }, { "epoch": 2.4108053007135575, "grad_norm": 7.6452765464782715, "learning_rate": 7.884293910594816e-05, "loss": 0.4739, "step": 2365 }, { "epoch": 2.411824668705403, "grad_norm": 4.1281352043151855, "learning_rate": 7.868882479064423e-05, "loss": 0.1563, "step": 2366 }, { "epoch": 2.4128440366972477, "grad_norm": 6.580128192901611, "learning_rate": 7.85343030696266e-05, "loss": 0.4343, "step": 2367 }, { "epoch": 2.413863404689093, "grad_norm": 4.8013105392456055, "learning_rate": 7.837937613723498e-05, "loss": 0.2669, "step": 2368 }, { "epoch": 2.414882772680938, "grad_norm": 4.374575138092041, "learning_rate": 7.822404619356376e-05, "loss": 0.319, "step": 2369 }, { "epoch": 2.4159021406727827, "grad_norm": 4.736109733581543, "learning_rate": 7.806831544443015e-05, "loss": 0.2109, "step": 2370 }, { "epoch": 2.416921508664628, "grad_norm": 5.618893623352051, "learning_rate": 7.791218610134329e-05, "loss": 0.4047, "step": 2371 }, { "epoch": 2.417940876656473, "grad_norm": 6.425780773162842, "learning_rate": 7.775566038147256e-05, "loss": 0.2339, "step": 2372 }, { "epoch": 2.418960244648318, "grad_norm": 5.232985973358154, "learning_rate": 7.759874050761639e-05, "loss": 0.2116, "step": 2373 }, { "epoch": 2.419979612640163, "grad_norm": 5.943445205688477, "learning_rate": 7.744142870817052e-05, "loss": 0.3895, "step": 2374 }, { "epoch": 2.420998980632008, "grad_norm": 4.810280799865723, "learning_rate": 7.728372721709623e-05, "loss": 0.1327, "step": 2375 }, { "epoch": 2.4220183486238533, "grad_norm": 6.564337253570557, "learning_rate": 7.71256382738888e-05, "loss": 0.3463, "step": 2376 }, { "epoch": 2.4230377166156982, "grad_norm": 4.958079814910889, "learning_rate": 7.696716412354574e-05, "loss": 0.2962, "step": 2377 }, { "epoch": 2.4240570846075435, "grad_norm": 8.169729232788086, "learning_rate": 7.680830701653481e-05, "loss": 0.7273, "step": 2378 }, { "epoch": 2.4250764525993884, "grad_norm": 6.614120006561279, "learning_rate": 7.6649069208762e-05, "loss": 0.3655, "step": 2379 }, { "epoch": 2.4260958205912333, "grad_norm": 2.4092414379119873, "learning_rate": 7.648945296153963e-05, "loss": 0.1066, "step": 2380 }, { "epoch": 2.4271151885830786, "grad_norm": 5.589473724365234, "learning_rate": 7.632946054155412e-05, "loss": 0.2677, "step": 2381 }, { "epoch": 2.4281345565749235, "grad_norm": 4.0893096923828125, "learning_rate": 7.616909422083405e-05, "loss": 0.2942, "step": 2382 }, { "epoch": 2.4291539245667684, "grad_norm": 6.262392520904541, "learning_rate": 7.60083562767174e-05, "loss": 0.2955, "step": 2383 }, { "epoch": 2.4301732925586137, "grad_norm": 4.966406345367432, "learning_rate": 7.58472489918199e-05, "loss": 0.3573, "step": 2384 }, { "epoch": 2.4311926605504586, "grad_norm": 5.376223564147949, "learning_rate": 7.568577465400184e-05, "loss": 0.1925, "step": 2385 }, { "epoch": 2.432212028542304, "grad_norm": 2.352720260620117, "learning_rate": 7.55239355563363e-05, "loss": 0.2332, "step": 2386 }, { "epoch": 2.4332313965341488, "grad_norm": 5.364041805267334, "learning_rate": 7.5361733997076e-05, "loss": 0.2864, "step": 2387 }, { "epoch": 2.434250764525994, "grad_norm": 9.162808418273926, "learning_rate": 7.519917227962116e-05, "loss": 0.3454, "step": 2388 }, { "epoch": 2.435270132517839, "grad_norm": 5.606325149536133, "learning_rate": 7.50362527124864e-05, "loss": 0.2977, "step": 2389 }, { "epoch": 2.436289500509684, "grad_norm": 9.166282653808594, "learning_rate": 7.487297760926814e-05, "loss": 0.5608, "step": 2390 }, { "epoch": 2.437308868501529, "grad_norm": 6.161780834197998, "learning_rate": 7.470934928861164e-05, "loss": 0.2588, "step": 2391 }, { "epoch": 2.438328236493374, "grad_norm": 5.777623176574707, "learning_rate": 7.454537007417832e-05, "loss": 0.3611, "step": 2392 }, { "epoch": 2.439347604485219, "grad_norm": 6.068408012390137, "learning_rate": 7.438104229461255e-05, "loss": 0.3381, "step": 2393 }, { "epoch": 2.4403669724770642, "grad_norm": 3.068044900894165, "learning_rate": 7.421636828350849e-05, "loss": 0.1305, "step": 2394 }, { "epoch": 2.441386340468909, "grad_norm": 5.745426654815674, "learning_rate": 7.405135037937712e-05, "loss": 0.259, "step": 2395 }, { "epoch": 2.4424057084607544, "grad_norm": 9.846254348754883, "learning_rate": 7.388599092561312e-05, "loss": 0.9878, "step": 2396 }, { "epoch": 2.4434250764525993, "grad_norm": 2.3299012184143066, "learning_rate": 7.37202922704614e-05, "loss": 0.0945, "step": 2397 }, { "epoch": 2.4444444444444446, "grad_norm": 5.867788314819336, "learning_rate": 7.355425676698377e-05, "loss": 0.2487, "step": 2398 }, { "epoch": 2.4454638124362895, "grad_norm": 6.639028549194336, "learning_rate": 7.338788677302558e-05, "loss": 0.2808, "step": 2399 }, { "epoch": 2.4464831804281344, "grad_norm": 7.313209533691406, "learning_rate": 7.322118465118223e-05, "loss": 0.3393, "step": 2400 }, { "epoch": 2.4475025484199797, "grad_norm": 6.632044315338135, "learning_rate": 7.305415276876573e-05, "loss": 0.3317, "step": 2401 }, { "epoch": 2.4485219164118246, "grad_norm": 8.070530891418457, "learning_rate": 7.288679349777077e-05, "loss": 0.37, "step": 2402 }, { "epoch": 2.44954128440367, "grad_norm": 5.373542785644531, "learning_rate": 7.271910921484148e-05, "loss": 0.3059, "step": 2403 }, { "epoch": 2.450560652395515, "grad_norm": 4.393563270568848, "learning_rate": 7.255110230123716e-05, "loss": 0.2791, "step": 2404 }, { "epoch": 2.4515800203873597, "grad_norm": 3.5517146587371826, "learning_rate": 7.238277514279903e-05, "loss": 0.2652, "step": 2405 }, { "epoch": 2.452599388379205, "grad_norm": 7.802023410797119, "learning_rate": 7.221413012991576e-05, "loss": 0.4359, "step": 2406 }, { "epoch": 2.45361875637105, "grad_norm": 8.12000846862793, "learning_rate": 7.204516965749014e-05, "loss": 0.3624, "step": 2407 }, { "epoch": 2.454638124362895, "grad_norm": 4.847606182098389, "learning_rate": 7.187589612490444e-05, "loss": 0.2125, "step": 2408 }, { "epoch": 2.45565749235474, "grad_norm": 9.615645408630371, "learning_rate": 7.1706311935987e-05, "loss": 0.44, "step": 2409 }, { "epoch": 2.456676860346585, "grad_norm": 6.675761699676514, "learning_rate": 7.153641949897728e-05, "loss": 0.4531, "step": 2410 }, { "epoch": 2.4576962283384303, "grad_norm": 4.328212738037109, "learning_rate": 7.136622122649252e-05, "loss": 0.3082, "step": 2411 }, { "epoch": 2.458715596330275, "grad_norm": 4.534638404846191, "learning_rate": 7.119571953549305e-05, "loss": 0.2764, "step": 2412 }, { "epoch": 2.4597349643221205, "grad_norm": 5.104310989379883, "learning_rate": 7.10249168472478e-05, "loss": 0.338, "step": 2413 }, { "epoch": 2.4607543323139653, "grad_norm": 7.326198101043701, "learning_rate": 7.085381558730016e-05, "loss": 0.3387, "step": 2414 }, { "epoch": 2.46177370030581, "grad_norm": 6.451673984527588, "learning_rate": 7.068241818543364e-05, "loss": 0.3514, "step": 2415 }, { "epoch": 2.4627930682976555, "grad_norm": 8.178759574890137, "learning_rate": 7.051072707563718e-05, "loss": 0.481, "step": 2416 }, { "epoch": 2.4638124362895004, "grad_norm": 6.602081298828125, "learning_rate": 7.033874469607052e-05, "loss": 0.4517, "step": 2417 }, { "epoch": 2.4648318042813457, "grad_norm": 6.172163009643555, "learning_rate": 7.016647348902967e-05, "loss": 0.3072, "step": 2418 }, { "epoch": 2.4658511722731906, "grad_norm": 3.3300230503082275, "learning_rate": 6.999391590091241e-05, "loss": 0.1182, "step": 2419 }, { "epoch": 2.4668705402650355, "grad_norm": 4.476836681365967, "learning_rate": 6.982107438218323e-05, "loss": 0.2143, "step": 2420 }, { "epoch": 2.467889908256881, "grad_norm": 4.774931907653809, "learning_rate": 6.96479513873386e-05, "loss": 0.1692, "step": 2421 }, { "epoch": 2.4689092762487257, "grad_norm": 8.344072341918945, "learning_rate": 6.947454937487245e-05, "loss": 0.4225, "step": 2422 }, { "epoch": 2.469928644240571, "grad_norm": 4.035219669342041, "learning_rate": 6.930087080724073e-05, "loss": 0.1875, "step": 2423 }, { "epoch": 2.470948012232416, "grad_norm": 7.9445624351501465, "learning_rate": 6.912691815082695e-05, "loss": 0.3638, "step": 2424 }, { "epoch": 2.4719673802242608, "grad_norm": 5.097878456115723, "learning_rate": 6.895269387590664e-05, "loss": 0.3165, "step": 2425 }, { "epoch": 2.472986748216106, "grad_norm": 3.0666706562042236, "learning_rate": 6.877820045661285e-05, "loss": 0.1063, "step": 2426 }, { "epoch": 2.474006116207951, "grad_norm": 4.933103084564209, "learning_rate": 6.860344037090041e-05, "loss": 0.235, "step": 2427 }, { "epoch": 2.4750254841997963, "grad_norm": 6.793315887451172, "learning_rate": 6.84284161005113e-05, "loss": 0.2965, "step": 2428 }, { "epoch": 2.476044852191641, "grad_norm": 3.302661657333374, "learning_rate": 6.825313013093898e-05, "loss": 0.1366, "step": 2429 }, { "epoch": 2.477064220183486, "grad_norm": 4.984063148498535, "learning_rate": 6.807758495139325e-05, "loss": 0.1843, "step": 2430 }, { "epoch": 2.4780835881753314, "grad_norm": 5.525577545166016, "learning_rate": 6.790178305476509e-05, "loss": 0.2572, "step": 2431 }, { "epoch": 2.4791029561671762, "grad_norm": 3.890428066253662, "learning_rate": 6.77257269375909e-05, "loss": 0.1414, "step": 2432 }, { "epoch": 2.4801223241590216, "grad_norm": 8.498802185058594, "learning_rate": 6.754941910001722e-05, "loss": 0.4618, "step": 2433 }, { "epoch": 2.4811416921508664, "grad_norm": 5.803354740142822, "learning_rate": 6.737286204576538e-05, "loss": 0.2221, "step": 2434 }, { "epoch": 2.4821610601427118, "grad_norm": 8.255953788757324, "learning_rate": 6.719605828209578e-05, "loss": 0.5251, "step": 2435 }, { "epoch": 2.4831804281345566, "grad_norm": 5.898708343505859, "learning_rate": 6.701901031977221e-05, "loss": 0.4367, "step": 2436 }, { "epoch": 2.4841997961264015, "grad_norm": 6.51034688949585, "learning_rate": 6.684172067302623e-05, "loss": 0.2569, "step": 2437 }, { "epoch": 2.485219164118247, "grad_norm": 7.042967319488525, "learning_rate": 6.666419185952176e-05, "loss": 0.3693, "step": 2438 }, { "epoch": 2.4862385321100917, "grad_norm": 3.433729648590088, "learning_rate": 6.648642640031888e-05, "loss": 0.2292, "step": 2439 }, { "epoch": 2.4872579001019366, "grad_norm": 5.5609049797058105, "learning_rate": 6.630842681983825e-05, "loss": 0.2464, "step": 2440 }, { "epoch": 2.488277268093782, "grad_norm": 4.253046989440918, "learning_rate": 6.613019564582546e-05, "loss": 0.3644, "step": 2441 }, { "epoch": 2.489296636085627, "grad_norm": 5.878583908081055, "learning_rate": 6.595173540931464e-05, "loss": 0.2055, "step": 2442 }, { "epoch": 2.490316004077472, "grad_norm": 9.551033973693848, "learning_rate": 6.577304864459306e-05, "loss": 0.4718, "step": 2443 }, { "epoch": 2.491335372069317, "grad_norm": 5.011621475219727, "learning_rate": 6.559413788916464e-05, "loss": 0.2129, "step": 2444 }, { "epoch": 2.4923547400611623, "grad_norm": 5.127644062042236, "learning_rate": 6.541500568371441e-05, "loss": 0.2876, "step": 2445 }, { "epoch": 2.493374108053007, "grad_norm": 6.037209510803223, "learning_rate": 6.523565457207193e-05, "loss": 0.2636, "step": 2446 }, { "epoch": 2.494393476044852, "grad_norm": 8.762588500976562, "learning_rate": 6.505608710117566e-05, "loss": 0.4882, "step": 2447 }, { "epoch": 2.4954128440366974, "grad_norm": 5.17008113861084, "learning_rate": 6.487630582103635e-05, "loss": 0.1813, "step": 2448 }, { "epoch": 2.4964322120285423, "grad_norm": 7.731829643249512, "learning_rate": 6.469631328470103e-05, "loss": 0.2958, "step": 2449 }, { "epoch": 2.497451580020387, "grad_norm": 6.016170501708984, "learning_rate": 6.451611204821695e-05, "loss": 0.2231, "step": 2450 }, { "epoch": 2.497451580020387, "eval_Qnli-dev-1024_cosine_accuracy": 0.7708333333333334, "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.7951542139053345, "eval_Qnli-dev-1024_cosine_ap": 0.7338367052537506, "eval_Qnli-dev-1024_cosine_f1": 0.738095238095238, "eval_Qnli-dev-1024_cosine_f1_threshold": 0.7809998989105225, "eval_Qnli-dev-1024_cosine_mcc": 0.5405732955715834, "eval_Qnli-dev-1024_cosine_precision": 0.7948717948717948, "eval_Qnli-dev-1024_cosine_recall": 0.6888888888888889, "eval_Qnli-dev_cosine_accuracy": 0.7291666666666666, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7180579900741577, "eval_Qnli-dev_cosine_ap": 0.7378262269664827, "eval_Qnli-dev_cosine_f1": 0.7339449541284404, "eval_Qnli-dev_cosine_f1_threshold": 0.624002993106842, "eval_Qnli-dev_cosine_mcc": 0.4428074427700477, "eval_Qnli-dev_cosine_precision": 0.625, "eval_Qnli-dev_cosine_recall": 0.8888888888888888, "eval_allNLI--triplets-1024_cosine_accuracy": 0.9479166865348816, "eval_allNLI-triplets_cosine_accuracy": 0.9583333134651184, "eval_global_dataset_loss": 0.37020039558410645, "eval_global_dataset_runtime": 104.1585, "eval_global_dataset_samples_per_second": 7.709, "eval_global_dataset_steps_per_second": 0.163, "eval_sequential_score": 0.9479166865348816, "eval_sts-test-1024_pearson_cosine": 0.8783180735157112, "eval_sts-test-1024_spearman_cosine": 0.9080893179999104, "eval_sts-test_pearson_cosine": 0.9058391907599144, "eval_sts-test_spearman_cosine": 0.9220448706968835, "step": 2450 }, { "epoch": 2.4984709480122325, "grad_norm": 5.705495834350586, "learning_rate": 6.433570467059489e-05, "loss": 0.3331, "step": 2451 }, { "epoch": 2.4994903160040773, "grad_norm": 2.863286256790161, "learning_rate": 6.415509371377295e-05, "loss": 0.2186, "step": 2452 }, { "epoch": 2.5005096839959227, "grad_norm": 5.570882797241211, "learning_rate": 6.397428174258045e-05, "loss": 0.2169, "step": 2453 }, { "epoch": 2.5015290519877675, "grad_norm": 9.619322776794434, "learning_rate": 6.379327132470121e-05, "loss": 0.8065, "step": 2454 }, { "epoch": 2.502548419979613, "grad_norm": 5.321788311004639, "learning_rate": 6.361206503063706e-05, "loss": 0.2582, "step": 2455 }, { "epoch": 2.5035677879714577, "grad_norm": 6.590585708618164, "learning_rate": 6.343066543367147e-05, "loss": 0.2145, "step": 2456 }, { "epoch": 2.5045871559633026, "grad_norm": 3.9598350524902344, "learning_rate": 6.324907510983303e-05, "loss": 0.1023, "step": 2457 }, { "epoch": 2.505606523955148, "grad_norm": 6.960710048675537, "learning_rate": 6.306729663785897e-05, "loss": 0.302, "step": 2458 }, { "epoch": 2.506625891946993, "grad_norm": 6.428700923919678, "learning_rate": 6.288533259915791e-05, "loss": 0.444, "step": 2459 }, { "epoch": 2.5076452599388377, "grad_norm": 3.422715425491333, "learning_rate": 6.270318557777418e-05, "loss": 0.2323, "step": 2460 }, { "epoch": 2.508664627930683, "grad_norm": 6.3273186683654785, "learning_rate": 6.252085816035027e-05, "loss": 0.2695, "step": 2461 }, { "epoch": 2.509683995922528, "grad_norm": 6.5879340171813965, "learning_rate": 6.233835293609074e-05, "loss": 0.3088, "step": 2462 }, { "epoch": 2.510703363914373, "grad_norm": 6.615034103393555, "learning_rate": 6.215567249672486e-05, "loss": 0.4545, "step": 2463 }, { "epoch": 2.511722731906218, "grad_norm": 6.601266860961914, "learning_rate": 6.19728194364704e-05, "loss": 0.2826, "step": 2464 }, { "epoch": 2.5127420998980634, "grad_norm": 3.1535212993621826, "learning_rate": 6.178979635199619e-05, "loss": 0.1859, "step": 2465 }, { "epoch": 2.5137614678899083, "grad_norm": 3.975011110305786, "learning_rate": 6.160660584238584e-05, "loss": 0.2104, "step": 2466 }, { "epoch": 2.514780835881753, "grad_norm": 4.474131107330322, "learning_rate": 6.142325050910029e-05, "loss": 0.2007, "step": 2467 }, { "epoch": 2.5158002038735985, "grad_norm": 3.4603450298309326, "learning_rate": 6.123973295594134e-05, "loss": 0.1731, "step": 2468 }, { "epoch": 2.5168195718654434, "grad_norm": 7.2192912101745605, "learning_rate": 6.10560557890143e-05, "loss": 0.6639, "step": 2469 }, { "epoch": 2.5178389398572882, "grad_norm": 10.097108840942383, "learning_rate": 6.0872221616691127e-05, "loss": 0.4735, "step": 2470 }, { "epoch": 2.5188583078491336, "grad_norm": 5.151859283447266, "learning_rate": 6.068823304957339e-05, "loss": 0.277, "step": 2471 }, { "epoch": 2.5198776758409784, "grad_norm": 5.761507987976074, "learning_rate": 6.0504092700455306e-05, "loss": 0.3984, "step": 2472 }, { "epoch": 2.5208970438328238, "grad_norm": 8.459686279296875, "learning_rate": 6.031980318428652e-05, "loss": 0.5498, "step": 2473 }, { "epoch": 2.5219164118246686, "grad_norm": 4.518146991729736, "learning_rate": 6.013536711813482e-05, "loss": 0.318, "step": 2474 }, { "epoch": 2.522935779816514, "grad_norm": 6.556336879730225, "learning_rate": 5.995078712114919e-05, "loss": 0.334, "step": 2475 }, { "epoch": 2.523955147808359, "grad_norm": 6.316927909851074, "learning_rate": 5.9766065814522645e-05, "loss": 0.2502, "step": 2476 }, { "epoch": 2.5249745158002037, "grad_norm": 6.545000076293945, "learning_rate": 5.95812058214549e-05, "loss": 0.2655, "step": 2477 }, { "epoch": 2.525993883792049, "grad_norm": 9.777205467224121, "learning_rate": 5.9396209767115053e-05, "loss": 0.5449, "step": 2478 }, { "epoch": 2.527013251783894, "grad_norm": 4.878248691558838, "learning_rate": 5.9211080278604415e-05, "loss": 0.2503, "step": 2479 }, { "epoch": 2.528032619775739, "grad_norm": 6.339738368988037, "learning_rate": 5.9025819984919115e-05, "loss": 0.5456, "step": 2480 }, { "epoch": 2.529051987767584, "grad_norm": 6.251143932342529, "learning_rate": 5.884043151691303e-05, "loss": 0.3832, "step": 2481 }, { "epoch": 2.5300713557594294, "grad_norm": 6.100076198577881, "learning_rate": 5.865491750725998e-05, "loss": 0.452, "step": 2482 }, { "epoch": 2.5310907237512743, "grad_norm": 4.228699207305908, "learning_rate": 5.8469280590416806e-05, "loss": 0.399, "step": 2483 }, { "epoch": 2.532110091743119, "grad_norm": 6.980855464935303, "learning_rate": 5.8283523402585505e-05, "loss": 0.3104, "step": 2484 }, { "epoch": 2.5331294597349645, "grad_norm": 10.003399848937988, "learning_rate": 5.809764858167627e-05, "loss": 0.5134, "step": 2485 }, { "epoch": 2.5341488277268094, "grad_norm": 9.853922843933105, "learning_rate": 5.7911658767269516e-05, "loss": 0.4744, "step": 2486 }, { "epoch": 2.5351681957186543, "grad_norm": 5.091624736785889, "learning_rate": 5.772555660057895e-05, "loss": 0.2087, "step": 2487 }, { "epoch": 2.5361875637104996, "grad_norm": 6.095240116119385, "learning_rate": 5.753934472441356e-05, "loss": 0.2806, "step": 2488 }, { "epoch": 2.5372069317023445, "grad_norm": 7.038525104522705, "learning_rate": 5.735302578314036e-05, "loss": 0.3187, "step": 2489 }, { "epoch": 2.5382262996941893, "grad_norm": 6.65778923034668, "learning_rate": 5.716660242264674e-05, "loss": 0.2877, "step": 2490 }, { "epoch": 2.5392456676860347, "grad_norm": 8.296245574951172, "learning_rate": 5.698007729030306e-05, "loss": 0.273, "step": 2491 }, { "epoch": 2.54026503567788, "grad_norm": 8.01514720916748, "learning_rate": 5.6793453034924906e-05, "loss": 0.3605, "step": 2492 }, { "epoch": 2.541284403669725, "grad_norm": 7.5846476554870605, "learning_rate": 5.6606732306735366e-05, "loss": 0.3579, "step": 2493 }, { "epoch": 2.5423037716615697, "grad_norm": 12.329811096191406, "learning_rate": 5.641991775732754e-05, "loss": 0.8508, "step": 2494 }, { "epoch": 2.543323139653415, "grad_norm": 4.689081192016602, "learning_rate": 5.6233012039626994e-05, "loss": 0.3545, "step": 2495 }, { "epoch": 2.54434250764526, "grad_norm": 6.278460502624512, "learning_rate": 5.6046017807853965e-05, "loss": 0.5836, "step": 2496 }, { "epoch": 2.545361875637105, "grad_norm": 7.2878313064575195, "learning_rate": 5.585893771748555e-05, "loss": 0.2704, "step": 2497 }, { "epoch": 2.54638124362895, "grad_norm": 6.415917873382568, "learning_rate": 5.5671774425218115e-05, "loss": 0.1861, "step": 2498 }, { "epoch": 2.547400611620795, "grad_norm": 4.367844581604004, "learning_rate": 5.548453058892955e-05, "loss": 0.1821, "step": 2499 }, { "epoch": 2.5484199796126403, "grad_norm": 6.253251552581787, "learning_rate": 5.529720886764174e-05, "loss": 0.2393, "step": 2500 }, { "epoch": 2.549439347604485, "grad_norm": 5.571644306182861, "learning_rate": 5.51098119214823e-05, "loss": 0.2192, "step": 2501 }, { "epoch": 2.5504587155963305, "grad_norm": 5.735546588897705, "learning_rate": 5.4922342411647424e-05, "loss": 0.5834, "step": 2502 }, { "epoch": 2.5514780835881754, "grad_norm": 3.2489004135131836, "learning_rate": 5.4734803000363456e-05, "loss": 0.2575, "step": 2503 }, { "epoch": 2.5524974515800203, "grad_norm": 2.887796401977539, "learning_rate": 5.454719635084968e-05, "loss": 0.1475, "step": 2504 }, { "epoch": 2.5535168195718656, "grad_norm": 2.2065820693969727, "learning_rate": 5.435952512727998e-05, "loss": 0.1323, "step": 2505 }, { "epoch": 2.5545361875637105, "grad_norm": 4.697101593017578, "learning_rate": 5.4171791994745455e-05, "loss": 0.4186, "step": 2506 }, { "epoch": 2.5555555555555554, "grad_norm": 7.107315540313721, "learning_rate": 5.398399961921624e-05, "loss": 0.4217, "step": 2507 }, { "epoch": 2.5565749235474007, "grad_norm": 4.283377647399902, "learning_rate": 5.379615066750377e-05, "loss": 0.1777, "step": 2508 }, { "epoch": 2.5575942915392456, "grad_norm": 2.9004464149475098, "learning_rate": 5.360824780722287e-05, "loss": 0.1921, "step": 2509 }, { "epoch": 2.558613659531091, "grad_norm": 8.493511199951172, "learning_rate": 5.3420293706754054e-05, "loss": 0.4628, "step": 2510 }, { "epoch": 2.5596330275229358, "grad_norm": 3.586085796356201, "learning_rate": 5.3232291035205485e-05, "loss": 0.2653, "step": 2511 }, { "epoch": 2.560652395514781, "grad_norm": 8.821948051452637, "learning_rate": 5.304424246237494e-05, "loss": 0.4299, "step": 2512 }, { "epoch": 2.561671763506626, "grad_norm": 8.818429946899414, "learning_rate": 5.285615065871203e-05, "loss": 0.4889, "step": 2513 }, { "epoch": 2.562691131498471, "grad_norm": 2.939493417739868, "learning_rate": 5.2668018295280416e-05, "loss": 0.1218, "step": 2514 }, { "epoch": 2.563710499490316, "grad_norm": 7.347713947296143, "learning_rate": 5.247984804371976e-05, "loss": 0.4561, "step": 2515 }, { "epoch": 2.564729867482161, "grad_norm": 5.744930744171143, "learning_rate": 5.229164257620762e-05, "loss": 0.3118, "step": 2516 }, { "epoch": 2.565749235474006, "grad_norm": 3.0342025756835938, "learning_rate": 5.210340456542169e-05, "loss": 0.1742, "step": 2517 }, { "epoch": 2.5667686034658512, "grad_norm": 5.914112567901611, "learning_rate": 5.191513668450178e-05, "loss": 0.2229, "step": 2518 }, { "epoch": 2.567787971457696, "grad_norm": 5.135507583618164, "learning_rate": 5.172684160701207e-05, "loss": 0.2406, "step": 2519 }, { "epoch": 2.5688073394495414, "grad_norm": 8.28679084777832, "learning_rate": 5.153852200690267e-05, "loss": 0.5932, "step": 2520 }, { "epoch": 2.5698267074413863, "grad_norm": 7.528500080108643, "learning_rate": 5.135018055847223e-05, "loss": 0.2706, "step": 2521 }, { "epoch": 2.5708460754332316, "grad_norm": 3.1454074382781982, "learning_rate": 5.116181993632937e-05, "loss": 0.1532, "step": 2522 }, { "epoch": 2.5718654434250765, "grad_norm": 2.4760966300964355, "learning_rate": 5.097344281535529e-05, "loss": 0.1913, "step": 2523 }, { "epoch": 2.5728848114169214, "grad_norm": 4.306360244750977, "learning_rate": 5.078505187066517e-05, "loss": 0.3509, "step": 2524 }, { "epoch": 2.5739041794087667, "grad_norm": 10.413430213928223, "learning_rate": 5.059664977757083e-05, "loss": 0.4713, "step": 2525 }, { "epoch": 2.5749235474006116, "grad_norm": 5.3981781005859375, "learning_rate": 5.0408239211542084e-05, "loss": 0.2841, "step": 2526 }, { "epoch": 2.5759429153924565, "grad_norm": 2.901654005050659, "learning_rate": 5.021982284816944e-05, "loss": 0.1348, "step": 2527 }, { "epoch": 2.5769622833843018, "grad_norm": 7.1827545166015625, "learning_rate": 5.003140336312524e-05, "loss": 0.408, "step": 2528 }, { "epoch": 2.5779816513761467, "grad_norm": 5.4475932121276855, "learning_rate": 4.9842983432126574e-05, "loss": 0.1852, "step": 2529 }, { "epoch": 2.579001019367992, "grad_norm": 8.925348281860352, "learning_rate": 4.965456573089678e-05, "loss": 0.681, "step": 2530 }, { "epoch": 2.580020387359837, "grad_norm": 4.100930213928223, "learning_rate": 4.946615293512744e-05, "loss": 0.1261, "step": 2531 }, { "epoch": 2.581039755351682, "grad_norm": 5.197756290435791, "learning_rate": 4.927774772044042e-05, "loss": 0.2175, "step": 2532 }, { "epoch": 2.582059123343527, "grad_norm": 3.854301691055298, "learning_rate": 4.908935276235009e-05, "loss": 0.146, "step": 2533 }, { "epoch": 2.583078491335372, "grad_norm": 6.240309238433838, "learning_rate": 4.8900970736225164e-05, "loss": 0.4023, "step": 2534 }, { "epoch": 2.5840978593272173, "grad_norm": 5.006434440612793, "learning_rate": 4.8712604317250596e-05, "loss": 0.1632, "step": 2535 }, { "epoch": 2.585117227319062, "grad_norm": 7.134576797485352, "learning_rate": 4.852425618038966e-05, "loss": 0.278, "step": 2536 }, { "epoch": 2.586136595310907, "grad_norm": 4.0280327796936035, "learning_rate": 4.8335929000346245e-05, "loss": 0.1896, "step": 2537 }, { "epoch": 2.5871559633027523, "grad_norm": 5.938254356384277, "learning_rate": 4.814762545152643e-05, "loss": 0.2148, "step": 2538 }, { "epoch": 2.588175331294597, "grad_norm": 13.921966552734375, "learning_rate": 4.795934820800071e-05, "loss": 1.4833, "step": 2539 }, { "epoch": 2.5891946992864425, "grad_norm": 5.229700565338135, "learning_rate": 4.77710999434662e-05, "loss": 0.5066, "step": 2540 }, { "epoch": 2.5902140672782874, "grad_norm": 5.119759559631348, "learning_rate": 4.758288333120826e-05, "loss": 0.2406, "step": 2541 }, { "epoch": 2.5912334352701327, "grad_norm": 4.06699275970459, "learning_rate": 4.7394701044063004e-05, "loss": 0.2206, "step": 2542 }, { "epoch": 2.5922528032619776, "grad_norm": 3.3897323608398438, "learning_rate": 4.7206555754378825e-05, "loss": 0.1581, "step": 2543 }, { "epoch": 2.5932721712538225, "grad_norm": 8.2315673828125, "learning_rate": 4.701845013397903e-05, "loss": 0.3168, "step": 2544 }, { "epoch": 2.594291539245668, "grad_norm": 3.2306790351867676, "learning_rate": 4.683038685412325e-05, "loss": 0.1425, "step": 2545 }, { "epoch": 2.5953109072375127, "grad_norm": 2.8688368797302246, "learning_rate": 4.664236858547019e-05, "loss": 0.1639, "step": 2546 }, { "epoch": 2.5963302752293576, "grad_norm": 4.288220405578613, "learning_rate": 4.645439799803909e-05, "loss": 0.1915, "step": 2547 }, { "epoch": 2.597349643221203, "grad_norm": 9.484834671020508, "learning_rate": 4.626647776117213e-05, "loss": 0.4821, "step": 2548 }, { "epoch": 2.597349643221203, "eval_Qnli-dev-1024_cosine_accuracy": 0.7395833333333334, "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.8034257888793945, "eval_Qnli-dev-1024_cosine_ap": 0.744787681075809, "eval_Qnli-dev-1024_cosine_f1": 0.7256637168141592, "eval_Qnli-dev-1024_cosine_f1_threshold": 0.687965989112854, "eval_Qnli-dev-1024_cosine_mcc": 0.419062972501429, "eval_Qnli-dev-1024_cosine_precision": 0.6029411764705882, "eval_Qnli-dev-1024_cosine_recall": 0.9111111111111111, "eval_Qnli-dev_cosine_accuracy": 0.7395833333333334, "eval_Qnli-dev_cosine_accuracy_threshold": 0.6738884449005127, "eval_Qnli-dev_cosine_ap": 0.7378629719325543, "eval_Qnli-dev_cosine_f1": 0.7407407407407407, "eval_Qnli-dev_cosine_f1_threshold": 0.617904782295227, "eval_Qnli-dev_cosine_mcc": 0.4600949560146401, "eval_Qnli-dev_cosine_precision": 0.6349206349206349, "eval_Qnli-dev_cosine_recall": 0.8888888888888888, "eval_allNLI--triplets-1024_cosine_accuracy": 0.96875, "eval_allNLI-triplets_cosine_accuracy": 0.96875, "eval_global_dataset_loss": 0.3022010624408722, "eval_global_dataset_runtime": 104.1737, "eval_global_dataset_samples_per_second": 7.708, "eval_global_dataset_steps_per_second": 0.163, "eval_sequential_score": 0.96875, "eval_sts-test-1024_pearson_cosine": 0.880993649364402, "eval_sts-test-1024_spearman_cosine": 0.9061897815880109, "eval_sts-test_pearson_cosine": 0.9026524055260908, "eval_sts-test_spearman_cosine": 0.9186055322221641, "step": 2548 }, { "epoch": 2.5983690112130478, "grad_norm": 3.646552801132202, "learning_rate": 4.607861054349663e-05, "loss": 0.2793, "step": 2549 }, { "epoch": 2.599388379204893, "grad_norm": 6.311948299407959, "learning_rate": 4.589079901288681e-05, "loss": 0.4315, "step": 2550 }, { "epoch": 2.600407747196738, "grad_norm": 5.987321376800537, "learning_rate": 4.57030458364261e-05, "loss": 0.4048, "step": 2551 }, { "epoch": 2.6014271151885833, "grad_norm": 4.98535680770874, "learning_rate": 4.551535368036934e-05, "loss": 0.2146, "step": 2552 }, { "epoch": 2.602446483180428, "grad_norm": 6.634832382202148, "learning_rate": 4.532772521010488e-05, "loss": 0.4875, "step": 2553 }, { "epoch": 2.603465851172273, "grad_norm": 7.706894874572754, "learning_rate": 4.514016309011653e-05, "loss": 0.239, "step": 2554 }, { "epoch": 2.6044852191641183, "grad_norm": 7.011508941650391, "learning_rate": 4.495266998394584e-05, "loss": 0.2756, "step": 2555 }, { "epoch": 2.6055045871559632, "grad_norm": 8.816591262817383, "learning_rate": 4.4765248554154454e-05, "loss": 0.2547, "step": 2556 }, { "epoch": 2.606523955147808, "grad_norm": 6.2531585693359375, "learning_rate": 4.4577901462286244e-05, "loss": 0.2222, "step": 2557 }, { "epoch": 2.6075433231396534, "grad_norm": 6.575552463531494, "learning_rate": 4.4390631368828984e-05, "loss": 0.2396, "step": 2558 }, { "epoch": 2.6085626911314987, "grad_norm": 7.20054817199707, "learning_rate": 4.420344093317749e-05, "loss": 0.2958, "step": 2559 }, { "epoch": 2.6095820591233436, "grad_norm": 4.621170520782471, "learning_rate": 4.401633281359504e-05, "loss": 0.2492, "step": 2560 }, { "epoch": 2.6106014271151885, "grad_norm": 6.393376350402832, "learning_rate": 4.382930966717621e-05, "loss": 0.3191, "step": 2561 }, { "epoch": 2.611620795107034, "grad_norm": 2.958164691925049, "learning_rate": 4.3642374149808615e-05, "loss": 0.2152, "step": 2562 }, { "epoch": 2.6126401630988787, "grad_norm": 4.713347434997559, "learning_rate": 4.345552891613576e-05, "loss": 0.2988, "step": 2563 }, { "epoch": 2.6136595310907236, "grad_norm": 6.476726531982422, "learning_rate": 4.326877661951871e-05, "loss": 0.4129, "step": 2564 }, { "epoch": 2.614678899082569, "grad_norm": 4.05019998550415, "learning_rate": 4.30821199119991e-05, "loss": 0.2333, "step": 2565 }, { "epoch": 2.6156982670744138, "grad_norm": 5.9746012687683105, "learning_rate": 4.289556144426084e-05, "loss": 0.3483, "step": 2566 }, { "epoch": 2.6167176350662587, "grad_norm": 6.902339935302734, "learning_rate": 4.2709103865592803e-05, "loss": 0.2648, "step": 2567 }, { "epoch": 2.617737003058104, "grad_norm": 8.029814720153809, "learning_rate": 4.2522749823851335e-05, "loss": 0.2654, "step": 2568 }, { "epoch": 2.6187563710499493, "grad_norm": 8.265084266662598, "learning_rate": 4.2336501965422254e-05, "loss": 0.2348, "step": 2569 }, { "epoch": 2.619775739041794, "grad_norm": 5.334643840789795, "learning_rate": 4.2150362935183515e-05, "loss": 0.3404, "step": 2570 }, { "epoch": 2.620795107033639, "grad_norm": 2.954676389694214, "learning_rate": 4.1964335376467734e-05, "loss": 0.202, "step": 2571 }, { "epoch": 2.6218144750254844, "grad_norm": 3.2372565269470215, "learning_rate": 4.1778421931024535e-05, "loss": 0.2473, "step": 2572 }, { "epoch": 2.6228338430173292, "grad_norm": 7.0147480964660645, "learning_rate": 4.159262523898293e-05, "loss": 0.4864, "step": 2573 }, { "epoch": 2.623853211009174, "grad_norm": 5.646935939788818, "learning_rate": 4.140694793881387e-05, "loss": 0.2384, "step": 2574 }, { "epoch": 2.6248725790010194, "grad_norm": 2.3776071071624756, "learning_rate": 4.122139266729305e-05, "loss": 0.0747, "step": 2575 }, { "epoch": 2.6258919469928643, "grad_norm": 7.911314010620117, "learning_rate": 4.103596205946326e-05, "loss": 0.2378, "step": 2576 }, { "epoch": 2.6269113149847096, "grad_norm": 8.810286521911621, "learning_rate": 4.085065874859661e-05, "loss": 0.4169, "step": 2577 }, { "epoch": 2.6279306829765545, "grad_norm": 5.295368194580078, "learning_rate": 4.066548536615792e-05, "loss": 0.2578, "step": 2578 }, { "epoch": 2.6289500509684, "grad_norm": 3.0903420448303223, "learning_rate": 4.0480444541766576e-05, "loss": 0.2464, "step": 2579 }, { "epoch": 2.6299694189602447, "grad_norm": 6.635354518890381, "learning_rate": 4.029553890315982e-05, "loss": 0.5019, "step": 2580 }, { "epoch": 2.6309887869520896, "grad_norm": 5.016622066497803, "learning_rate": 4.0110771076154865e-05, "loss": 0.5358, "step": 2581 }, { "epoch": 2.632008154943935, "grad_norm": 9.547528266906738, "learning_rate": 3.9926143684612145e-05, "loss": 0.9614, "step": 2582 }, { "epoch": 2.63302752293578, "grad_norm": 9.94871997833252, "learning_rate": 3.97416593503975e-05, "loss": 0.7375, "step": 2583 }, { "epoch": 2.6340468909276247, "grad_norm": 5.576138973236084, "learning_rate": 3.955732069334556e-05, "loss": 0.2736, "step": 2584 }, { "epoch": 2.63506625891947, "grad_norm": 6.261617183685303, "learning_rate": 3.9373130331221886e-05, "loss": 0.3175, "step": 2585 }, { "epoch": 2.636085626911315, "grad_norm": 3.3927159309387207, "learning_rate": 3.9189090879686426e-05, "loss": 0.1689, "step": 2586 }, { "epoch": 2.63710499490316, "grad_norm": 9.100037574768066, "learning_rate": 3.900520495225588e-05, "loss": 0.5786, "step": 2587 }, { "epoch": 2.638124362895005, "grad_norm": 4.424569129943848, "learning_rate": 3.8821475160266805e-05, "loss": 0.2639, "step": 2588 }, { "epoch": 2.6391437308868504, "grad_norm": 5.2483811378479, "learning_rate": 3.8637904112838466e-05, "loss": 0.2476, "step": 2589 }, { "epoch": 2.6401630988786953, "grad_norm": 2.6344261169433594, "learning_rate": 3.845449441683594e-05, "loss": 0.1445, "step": 2590 }, { "epoch": 2.64118246687054, "grad_norm": 7.211129665374756, "learning_rate": 3.827124867683297e-05, "loss": 0.3231, "step": 2591 }, { "epoch": 2.6422018348623855, "grad_norm": 3.855678081512451, "learning_rate": 3.808816949507489e-05, "loss": 0.2304, "step": 2592 }, { "epoch": 2.6432212028542303, "grad_norm": 5.783268928527832, "learning_rate": 3.79052594714417e-05, "loss": 0.2328, "step": 2593 }, { "epoch": 2.6442405708460752, "grad_norm": 6.157005786895752, "learning_rate": 3.7722521203411385e-05, "loss": 0.5027, "step": 2594 }, { "epoch": 2.6452599388379205, "grad_norm": 8.950606346130371, "learning_rate": 3.753995728602286e-05, "loss": 0.4601, "step": 2595 }, { "epoch": 2.6462793068297654, "grad_norm": 1.801347017288208, "learning_rate": 3.735757031183896e-05, "loss": 0.0599, "step": 2596 }, { "epoch": 2.6472986748216107, "grad_norm": 1.2584683895111084, "learning_rate": 3.7175362870909857e-05, "loss": 0.0803, "step": 2597 }, { "epoch": 2.6483180428134556, "grad_norm": 2.8321571350097656, "learning_rate": 3.699333755073613e-05, "loss": 0.1362, "step": 2598 }, { "epoch": 2.649337410805301, "grad_norm": 5.989924430847168, "learning_rate": 3.681149693623227e-05, "loss": 0.4228, "step": 2599 }, { "epoch": 2.650356778797146, "grad_norm": 7.6800665855407715, "learning_rate": 3.662984360968954e-05, "loss": 0.3464, "step": 2600 }, { "epoch": 2.6513761467889907, "grad_norm": 7.051743507385254, "learning_rate": 3.644838015073983e-05, "loss": 0.2288, "step": 2601 }, { "epoch": 2.652395514780836, "grad_norm": 11.621438026428223, "learning_rate": 3.626710913631847e-05, "loss": 0.8281, "step": 2602 }, { "epoch": 2.653414882772681, "grad_norm": 11.922146797180176, "learning_rate": 3.6086033140628154e-05, "loss": 0.5109, "step": 2603 }, { "epoch": 2.6544342507645258, "grad_norm": 5.72918701171875, "learning_rate": 3.590515473510193e-05, "loss": 0.353, "step": 2604 }, { "epoch": 2.655453618756371, "grad_norm": 4.346190452575684, "learning_rate": 3.572447648836714e-05, "loss": 0.2096, "step": 2605 }, { "epoch": 2.656472986748216, "grad_norm": 5.70217227935791, "learning_rate": 3.554400096620848e-05, "loss": 0.2133, "step": 2606 }, { "epoch": 2.6574923547400613, "grad_norm": 7.087839126586914, "learning_rate": 3.5363730731531884e-05, "loss": 0.417, "step": 2607 }, { "epoch": 2.658511722731906, "grad_norm": 6.4641547203063965, "learning_rate": 3.518366834432796e-05, "loss": 0.2561, "step": 2608 }, { "epoch": 2.6595310907237515, "grad_norm": 5.385159492492676, "learning_rate": 3.500381636163581e-05, "loss": 0.311, "step": 2609 }, { "epoch": 2.6605504587155964, "grad_norm": 4.892179489135742, "learning_rate": 3.482417733750665e-05, "loss": 0.304, "step": 2610 }, { "epoch": 2.6615698267074412, "grad_norm": 8.19067668914795, "learning_rate": 3.464475382296733e-05, "loss": 0.3286, "step": 2611 }, { "epoch": 2.6625891946992866, "grad_norm": 3.3572440147399902, "learning_rate": 3.4465548365984304e-05, "loss": 0.242, "step": 2612 }, { "epoch": 2.6636085626911314, "grad_norm": 7.174647331237793, "learning_rate": 3.428656351142756e-05, "loss": 0.2746, "step": 2613 }, { "epoch": 2.6646279306829763, "grad_norm": 1.8434126377105713, "learning_rate": 3.410780180103434e-05, "loss": 0.0884, "step": 2614 }, { "epoch": 2.6656472986748216, "grad_norm": 8.234567642211914, "learning_rate": 3.392926577337291e-05, "loss": 0.564, "step": 2615 }, { "epoch": 2.6666666666666665, "grad_norm": 7.409701347351074, "learning_rate": 3.375095796380672e-05, "loss": 0.4143, "step": 2616 }, { "epoch": 2.667686034658512, "grad_norm": 8.04309368133545, "learning_rate": 3.357288090445827e-05, "loss": 0.3845, "step": 2617 }, { "epoch": 2.6687054026503567, "grad_norm": 3.051427125930786, "learning_rate": 3.339503712417338e-05, "loss": 0.1215, "step": 2618 }, { "epoch": 2.669724770642202, "grad_norm": 8.909161567687988, "learning_rate": 3.3217429148484893e-05, "loss": 0.4757, "step": 2619 }, { "epoch": 2.670744138634047, "grad_norm": 6.251672744750977, "learning_rate": 3.304005949957726e-05, "loss": 0.2547, "step": 2620 }, { "epoch": 2.671763506625892, "grad_norm": 4.752338409423828, "learning_rate": 3.28629306962502e-05, "loss": 0.1851, "step": 2621 }, { "epoch": 2.672782874617737, "grad_norm": 6.223941802978516, "learning_rate": 3.268604525388354e-05, "loss": 0.2438, "step": 2622 }, { "epoch": 2.673802242609582, "grad_norm": 5.9268293380737305, "learning_rate": 3.2509405684400864e-05, "loss": 0.437, "step": 2623 }, { "epoch": 2.674821610601427, "grad_norm": 5.047306060791016, "learning_rate": 3.233301449623445e-05, "loss": 0.3586, "step": 2624 }, { "epoch": 2.675840978593272, "grad_norm": 4.698922634124756, "learning_rate": 3.2156874194289123e-05, "loss": 0.208, "step": 2625 }, { "epoch": 2.676860346585117, "grad_norm": 7.528968811035156, "learning_rate": 3.198098727990699e-05, "loss": 0.3605, "step": 2626 }, { "epoch": 2.6778797145769624, "grad_norm": 3.599567413330078, "learning_rate": 3.18053562508318e-05, "loss": 0.2689, "step": 2627 }, { "epoch": 2.6788990825688073, "grad_norm": 8.71760082244873, "learning_rate": 3.1629983601173585e-05, "loss": 0.6955, "step": 2628 }, { "epoch": 2.6799184505606526, "grad_norm": 3.29335880279541, "learning_rate": 3.145487182137322e-05, "loss": 0.154, "step": 2629 }, { "epoch": 2.6809378185524975, "grad_norm": 6.2927937507629395, "learning_rate": 3.128002339816683e-05, "loss": 0.3374, "step": 2630 }, { "epoch": 2.6819571865443423, "grad_norm": 5.186264991760254, "learning_rate": 3.110544081455072e-05, "loss": 0.2798, "step": 2631 }, { "epoch": 2.6829765545361877, "grad_norm": 6.7110466957092285, "learning_rate": 3.093112654974611e-05, "loss": 0.3138, "step": 2632 }, { "epoch": 2.6839959225280325, "grad_norm": 4.012898921966553, "learning_rate": 3.075708307916389e-05, "loss": 0.2881, "step": 2633 }, { "epoch": 2.6850152905198774, "grad_norm": 6.191000938415527, "learning_rate": 3.058331287436933e-05, "loss": 0.2858, "step": 2634 }, { "epoch": 2.6860346585117227, "grad_norm": 4.7844085693359375, "learning_rate": 3.040981840304712e-05, "loss": 0.2717, "step": 2635 }, { "epoch": 2.687054026503568, "grad_norm": 7.037001609802246, "learning_rate": 3.0236602128966275e-05, "loss": 0.6454, "step": 2636 }, { "epoch": 2.688073394495413, "grad_norm": 4.534252166748047, "learning_rate": 3.0063666511945336e-05, "loss": 0.3898, "step": 2637 }, { "epoch": 2.689092762487258, "grad_norm": 6.041018962860107, "learning_rate": 2.989101400781704e-05, "loss": 0.4842, "step": 2638 }, { "epoch": 2.690112130479103, "grad_norm": 7.821871280670166, "learning_rate": 2.9718647068393925e-05, "loss": 0.2664, "step": 2639 }, { "epoch": 2.691131498470948, "grad_norm": 3.5887277126312256, "learning_rate": 2.9546568141432996e-05, "loss": 0.1344, "step": 2640 }, { "epoch": 2.692150866462793, "grad_norm": 8.541189193725586, "learning_rate": 2.9374779670601522e-05, "loss": 0.2727, "step": 2641 }, { "epoch": 2.693170234454638, "grad_norm": 4.584117889404297, "learning_rate": 2.9203284095441773e-05, "loss": 0.2096, "step": 2642 }, { "epoch": 2.694189602446483, "grad_norm": 4.40488862991333, "learning_rate": 2.903208385133692e-05, "loss": 0.2204, "step": 2643 }, { "epoch": 2.695208970438328, "grad_norm": 8.537614822387695, "learning_rate": 2.8861181369475902e-05, "loss": 0.5485, "step": 2644 }, { "epoch": 2.6962283384301733, "grad_norm": 5.3955769538879395, "learning_rate": 2.8690579076819544e-05, "loss": 0.2895, "step": 2645 }, { "epoch": 2.6972477064220186, "grad_norm": 5.815518379211426, "learning_rate": 2.852027939606525e-05, "loss": 0.1881, "step": 2646 }, { "epoch": 2.6972477064220186, "eval_Qnli-dev-1024_cosine_accuracy": 0.7291666666666666, "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.8087142109870911, "eval_Qnli-dev-1024_cosine_ap": 0.7557593703190841, "eval_Qnli-dev-1024_cosine_f1": 0.7179487179487181, "eval_Qnli-dev-1024_cosine_f1_threshold": 0.6551511287689209, "eval_Qnli-dev-1024_cosine_mcc": 0.397705839334203, "eval_Qnli-dev-1024_cosine_precision": 0.5833333333333334, "eval_Qnli-dev-1024_cosine_recall": 0.9333333333333333, "eval_Qnli-dev_cosine_accuracy": 0.71875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7260158061981201, "eval_Qnli-dev_cosine_ap": 0.7399947312668047, "eval_Qnli-dev_cosine_f1": 0.7339449541284404, "eval_Qnli-dev_cosine_f1_threshold": 0.6138423681259155, "eval_Qnli-dev_cosine_mcc": 0.4428074427700477, "eval_Qnli-dev_cosine_precision": 0.625, "eval_Qnli-dev_cosine_recall": 0.8888888888888888, "eval_allNLI--triplets-1024_cosine_accuracy": 0.9583333134651184, "eval_allNLI-triplets_cosine_accuracy": 0.96875, "eval_global_dataset_loss": 0.28514590859413147, "eval_global_dataset_runtime": 103.8721, "eval_global_dataset_samples_per_second": 7.731, "eval_global_dataset_steps_per_second": 0.164, "eval_sequential_score": 0.9583333134651184, "eval_sts-test-1024_pearson_cosine": 0.8911116976425595, "eval_sts-test-1024_spearman_cosine": 0.9133837081466986, "eval_sts-test_pearson_cosine": 0.906324200031563, "eval_sts-test_spearman_cosine": 0.9208139632804799, "step": 2646 }, { "epoch": 2.6982670744138635, "grad_norm": 4.640608787536621, "learning_rate": 2.8350284745613432e-05, "loss": 0.2747, "step": 2647 }, { "epoch": 2.6992864424057084, "grad_norm": 4.971389293670654, "learning_rate": 2.8180597539532816e-05, "loss": 0.2051, "step": 2648 }, { "epoch": 2.7003058103975537, "grad_norm": 9.61092758178711, "learning_rate": 2.8011220187525968e-05, "loss": 0.6696, "step": 2649 }, { "epoch": 2.7013251783893986, "grad_norm": 6.7433319091796875, "learning_rate": 2.7842155094895326e-05, "loss": 0.2742, "step": 2650 }, { "epoch": 2.7023445463812434, "grad_norm": 3.069979667663574, "learning_rate": 2.7673404662509038e-05, "loss": 0.1871, "step": 2651 }, { "epoch": 2.7033639143730888, "grad_norm": 7.87565803527832, "learning_rate": 2.7504971286766866e-05, "loss": 0.2713, "step": 2652 }, { "epoch": 2.7043832823649336, "grad_norm": 4.452849388122559, "learning_rate": 2.7336857359565925e-05, "loss": 0.1782, "step": 2653 }, { "epoch": 2.705402650356779, "grad_norm": 9.874443054199219, "learning_rate": 2.7169065268266906e-05, "loss": 0.8587, "step": 2654 }, { "epoch": 2.706422018348624, "grad_norm": 8.942673683166504, "learning_rate": 2.7001597395660376e-05, "loss": 0.3289, "step": 2655 }, { "epoch": 2.707441386340469, "grad_norm": 3.1680140495300293, "learning_rate": 2.683445611993247e-05, "loss": 0.161, "step": 2656 }, { "epoch": 2.708460754332314, "grad_norm": 1.2902299165725708, "learning_rate": 2.6667643814631453e-05, "loss": 0.0794, "step": 2657 }, { "epoch": 2.709480122324159, "grad_norm": 3.362070083618164, "learning_rate": 2.650116284863402e-05, "loss": 0.1901, "step": 2658 }, { "epoch": 2.7104994903160042, "grad_norm": 1.860801100730896, "learning_rate": 2.6335015586111413e-05, "loss": 0.155, "step": 2659 }, { "epoch": 2.711518858307849, "grad_norm": 9.599827766418457, "learning_rate": 2.6169204386496148e-05, "loss": 0.3647, "step": 2660 }, { "epoch": 2.712538226299694, "grad_norm": 2.472621440887451, "learning_rate": 2.6003731604448235e-05, "loss": 0.2017, "step": 2661 }, { "epoch": 2.7135575942915393, "grad_norm": 6.350067615509033, "learning_rate": 2.5838599589822e-05, "loss": 0.4322, "step": 2662 }, { "epoch": 2.714576962283384, "grad_norm": 9.118274688720703, "learning_rate": 2.5673810687632394e-05, "loss": 0.4111, "step": 2663 }, { "epoch": 2.7155963302752295, "grad_norm": 6.452308654785156, "learning_rate": 2.5509367238022126e-05, "loss": 0.3989, "step": 2664 }, { "epoch": 2.7166156982670744, "grad_norm": 6.926027297973633, "learning_rate": 2.5345271576227962e-05, "loss": 0.2821, "step": 2665 }, { "epoch": 2.7176350662589197, "grad_norm": 6.381979942321777, "learning_rate": 2.518152603254785e-05, "loss": 0.2476, "step": 2666 }, { "epoch": 2.7186544342507646, "grad_norm": 7.060910701751709, "learning_rate": 2.5018132932307882e-05, "loss": 0.515, "step": 2667 }, { "epoch": 2.7196738022426095, "grad_norm": 3.8371355533599854, "learning_rate": 2.4855094595829015e-05, "loss": 0.1412, "step": 2668 }, { "epoch": 2.720693170234455, "grad_norm": 10.531432151794434, "learning_rate": 2.4692413338394223e-05, "loss": 0.572, "step": 2669 }, { "epoch": 2.7217125382262997, "grad_norm": 9.766855239868164, "learning_rate": 2.4530091470215815e-05, "loss": 0.588, "step": 2670 }, { "epoch": 2.7227319062181445, "grad_norm": 4.339486598968506, "learning_rate": 2.4368131296402415e-05, "loss": 0.319, "step": 2671 }, { "epoch": 2.72375127420999, "grad_norm": 4.394313335418701, "learning_rate": 2.4206535116926222e-05, "loss": 0.1713, "step": 2672 }, { "epoch": 2.7247706422018347, "grad_norm": 6.469154357910156, "learning_rate": 2.404530522659036e-05, "loss": 0.4146, "step": 2673 }, { "epoch": 2.72579001019368, "grad_norm": 8.661172866821289, "learning_rate": 2.3884443914996447e-05, "loss": 0.5111, "step": 2674 }, { "epoch": 2.726809378185525, "grad_norm": 7.845223426818848, "learning_rate": 2.3723953466512083e-05, "loss": 0.3798, "step": 2675 }, { "epoch": 2.7278287461773703, "grad_norm": 1.795512318611145, "learning_rate": 2.3563836160237873e-05, "loss": 0.1102, "step": 2676 }, { "epoch": 2.728848114169215, "grad_norm": 7.233203411102295, "learning_rate": 2.3404094269975972e-05, "loss": 0.2584, "step": 2677 }, { "epoch": 2.72986748216106, "grad_norm": 3.4168431758880615, "learning_rate": 2.3244730064196946e-05, "loss": 0.243, "step": 2678 }, { "epoch": 2.7308868501529053, "grad_norm": 5.412033557891846, "learning_rate": 2.3085745806008202e-05, "loss": 0.3049, "step": 2679 }, { "epoch": 2.73190621814475, "grad_norm": 4.973258972167969, "learning_rate": 2.2927143753121293e-05, "loss": 0.3685, "step": 2680 }, { "epoch": 2.732925586136595, "grad_norm": 5.000575542449951, "learning_rate": 2.2768926157820425e-05, "loss": 0.2967, "step": 2681 }, { "epoch": 2.7339449541284404, "grad_norm": 8.042799949645996, "learning_rate": 2.261109526692988e-05, "loss": 0.6673, "step": 2682 }, { "epoch": 2.7349643221202853, "grad_norm": 9.245428085327148, "learning_rate": 2.245365332178267e-05, "loss": 0.5758, "step": 2683 }, { "epoch": 2.7359836901121306, "grad_norm": 7.430521488189697, "learning_rate": 2.2296602558188236e-05, "loss": 0.2995, "step": 2684 }, { "epoch": 2.7370030581039755, "grad_norm": 4.441887855529785, "learning_rate": 2.2139945206401086e-05, "loss": 0.2548, "step": 2685 }, { "epoch": 2.738022426095821, "grad_norm": 4.856107234954834, "learning_rate": 2.198368349108884e-05, "loss": 0.3433, "step": 2686 }, { "epoch": 2.7390417940876657, "grad_norm": 3.318927526473999, "learning_rate": 2.182781963130074e-05, "loss": 0.1641, "step": 2687 }, { "epoch": 2.7400611620795106, "grad_norm": 6.292228698730469, "learning_rate": 2.1672355840436136e-05, "loss": 0.2822, "step": 2688 }, { "epoch": 2.741080530071356, "grad_norm": 5.510320663452148, "learning_rate": 2.1517294326213115e-05, "loss": 0.1818, "step": 2689 }, { "epoch": 2.7420998980632008, "grad_norm": 3.1131033897399902, "learning_rate": 2.136263729063716e-05, "loss": 0.2151, "step": 2690 }, { "epoch": 2.7431192660550456, "grad_norm": 9.573681831359863, "learning_rate": 2.1208386929969653e-05, "loss": 0.5548, "step": 2691 }, { "epoch": 2.744138634046891, "grad_norm": 3.7194857597351074, "learning_rate": 2.1054545434696837e-05, "loss": 0.1391, "step": 2692 }, { "epoch": 2.745158002038736, "grad_norm": 7.959753513336182, "learning_rate": 2.0901114989498892e-05, "loss": 0.4368, "step": 2693 }, { "epoch": 2.746177370030581, "grad_norm": 3.638978958129883, "learning_rate": 2.0748097773218712e-05, "loss": 0.1451, "step": 2694 }, { "epoch": 2.747196738022426, "grad_norm": 2.528212785720825, "learning_rate": 2.059549595883074e-05, "loss": 0.1124, "step": 2695 }, { "epoch": 2.7482161060142714, "grad_norm": 6.308834552764893, "learning_rate": 2.044331171341067e-05, "loss": 0.2342, "step": 2696 }, { "epoch": 2.7492354740061162, "grad_norm": 4.431784629821777, "learning_rate": 2.0291547198104143e-05, "loss": 0.2525, "step": 2697 }, { "epoch": 2.750254841997961, "grad_norm": 10.3881196975708, "learning_rate": 2.0140204568096448e-05, "loss": 0.6524, "step": 2698 }, { "epoch": 2.7512742099898064, "grad_norm": 9.04806900024414, "learning_rate": 1.9989285972581578e-05, "loss": 0.3521, "step": 2699 }, { "epoch": 2.7522935779816513, "grad_norm": 6.471098899841309, "learning_rate": 1.9838793554732053e-05, "loss": 0.5034, "step": 2700 }, { "epoch": 2.753312945973496, "grad_norm": 5.406115531921387, "learning_rate": 1.968872945166808e-05, "loss": 0.2161, "step": 2701 }, { "epoch": 2.7543323139653415, "grad_norm": 3.8615589141845703, "learning_rate": 1.9539095794427702e-05, "loss": 0.1294, "step": 2702 }, { "epoch": 2.7553516819571864, "grad_norm": 4.188022136688232, "learning_rate": 1.938989470793599e-05, "loss": 0.1653, "step": 2703 }, { "epoch": 2.7563710499490317, "grad_norm": 3.1788315773010254, "learning_rate": 1.9241128310975415e-05, "loss": 0.213, "step": 2704 }, { "epoch": 2.7573904179408766, "grad_norm": 3.548327684402466, "learning_rate": 1.9092798716155263e-05, "loss": 0.0979, "step": 2705 }, { "epoch": 2.758409785932722, "grad_norm": 8.507022857666016, "learning_rate": 1.8944908029881975e-05, "loss": 0.6243, "step": 2706 }, { "epoch": 2.759429153924567, "grad_norm": 4.468634605407715, "learning_rate": 1.8797458352329005e-05, "loss": 0.1636, "step": 2707 }, { "epoch": 2.7604485219164117, "grad_norm": 10.539908409118652, "learning_rate": 1.8650451777407272e-05, "loss": 0.6725, "step": 2708 }, { "epoch": 2.761467889908257, "grad_norm": 9.40577507019043, "learning_rate": 1.850389039273521e-05, "loss": 0.6119, "step": 2709 }, { "epoch": 2.762487257900102, "grad_norm": 5.786467552185059, "learning_rate": 1.8357776279609103e-05, "loss": 0.4547, "step": 2710 }, { "epoch": 2.7635066258919467, "grad_norm": 3.0541884899139404, "learning_rate": 1.821211151297358e-05, "loss": 0.1164, "step": 2711 }, { "epoch": 2.764525993883792, "grad_norm": 4.641866683959961, "learning_rate": 1.8066898161392258e-05, "loss": 0.2381, "step": 2712 }, { "epoch": 2.7655453618756374, "grad_norm": 4.353042125701904, "learning_rate": 1.792213828701833e-05, "loss": 0.3101, "step": 2713 }, { "epoch": 2.7665647298674823, "grad_norm": 7.727181911468506, "learning_rate": 1.7777833945565052e-05, "loss": 0.2404, "step": 2714 }, { "epoch": 2.767584097859327, "grad_norm": 4.7307233810424805, "learning_rate": 1.7633987186276845e-05, "loss": 0.1955, "step": 2715 }, { "epoch": 2.7686034658511725, "grad_norm": 8.080448150634766, "learning_rate": 1.7490600051899963e-05, "loss": 0.4751, "step": 2716 }, { "epoch": 2.7696228338430173, "grad_norm": 5.427289009094238, "learning_rate": 1.7347674578653806e-05, "loss": 0.1427, "step": 2717 }, { "epoch": 2.770642201834862, "grad_norm": 4.197767734527588, "learning_rate": 1.720521279620153e-05, "loss": 0.2715, "step": 2718 }, { "epoch": 2.7716615698267075, "grad_norm": 4.137823581695557, "learning_rate": 1.706321672762175e-05, "loss": 0.2044, "step": 2719 }, { "epoch": 2.7726809378185524, "grad_norm": 4.52974271774292, "learning_rate": 1.69216883893793e-05, "loss": 0.3344, "step": 2720 }, { "epoch": 2.7737003058103973, "grad_norm": 5.985829830169678, "learning_rate": 1.6780629791297044e-05, "loss": 0.2797, "step": 2721 }, { "epoch": 2.7747196738022426, "grad_norm": 2.3331515789031982, "learning_rate": 1.6640042936526994e-05, "loss": 0.0801, "step": 2722 }, { "epoch": 2.775739041794088, "grad_norm": 6.142969131469727, "learning_rate": 1.6499929821522125e-05, "loss": 0.2107, "step": 2723 }, { "epoch": 2.776758409785933, "grad_norm": 3.632021427154541, "learning_rate": 1.6360292436007836e-05, "loss": 0.161, "step": 2724 }, { "epoch": 2.7777777777777777, "grad_norm": 8.594420433044434, "learning_rate": 1.6221132762953733e-05, "loss": 0.5933, "step": 2725 }, { "epoch": 2.778797145769623, "grad_norm": 7.609989166259766, "learning_rate": 1.6082452778545532e-05, "loss": 0.2562, "step": 2726 }, { "epoch": 2.779816513761468, "grad_norm": 5.107419013977051, "learning_rate": 1.594425445215697e-05, "loss": 0.1928, "step": 2727 }, { "epoch": 2.7808358817533128, "grad_norm": 8.12761116027832, "learning_rate": 1.5806539746321918e-05, "loss": 0.5271, "step": 2728 }, { "epoch": 2.781855249745158, "grad_norm": 11.674429893493652, "learning_rate": 1.5669310616706268e-05, "loss": 0.5303, "step": 2729 }, { "epoch": 2.782874617737003, "grad_norm": 6.5717926025390625, "learning_rate": 1.5532569012080322e-05, "loss": 0.4117, "step": 2730 }, { "epoch": 2.7838939857288483, "grad_norm": 5.438577651977539, "learning_rate": 1.5396316874291244e-05, "loss": 0.1852, "step": 2731 }, { "epoch": 2.784913353720693, "grad_norm": 7.296583652496338, "learning_rate": 1.526055613823531e-05, "loss": 0.2319, "step": 2732 }, { "epoch": 2.7859327217125385, "grad_norm": 7.109930992126465, "learning_rate": 1.5125288731830428e-05, "loss": 0.3579, "step": 2733 }, { "epoch": 2.7869520897043834, "grad_norm": 7.2482991218566895, "learning_rate": 1.4990516575988778e-05, "loss": 0.36, "step": 2734 }, { "epoch": 2.7879714576962282, "grad_norm": 8.622855186462402, "learning_rate": 1.4856241584589603e-05, "loss": 0.4538, "step": 2735 }, { "epoch": 2.7889908256880735, "grad_norm": 5.97171688079834, "learning_rate": 1.472246566445205e-05, "loss": 0.218, "step": 2736 }, { "epoch": 2.7900101936799184, "grad_norm": 4.555757522583008, "learning_rate": 1.458919071530791e-05, "loss": 0.1233, "step": 2737 }, { "epoch": 2.7910295616717633, "grad_norm": 3.099499225616455, "learning_rate": 1.4456418629774892e-05, "loss": 0.1817, "step": 2738 }, { "epoch": 2.7920489296636086, "grad_norm": 5.9811248779296875, "learning_rate": 1.4324151293329436e-05, "loss": 0.2584, "step": 2739 }, { "epoch": 2.7930682976554535, "grad_norm": 7.639220237731934, "learning_rate": 1.4192390584280346e-05, "loss": 0.5893, "step": 2740 }, { "epoch": 2.794087665647299, "grad_norm": 5.624851226806641, "learning_rate": 1.4061138373741638e-05, "loss": 0.2184, "step": 2741 }, { "epoch": 2.7951070336391437, "grad_norm": 9.424039840698242, "learning_rate": 1.393039652560647e-05, "loss": 0.6381, "step": 2742 }, { "epoch": 2.796126401630989, "grad_norm": 6.021475315093994, "learning_rate": 1.3800166896520155e-05, "loss": 0.3252, "step": 2743 }, { "epoch": 2.797145769622834, "grad_norm": 6.239711284637451, "learning_rate": 1.3670451335854372e-05, "loss": 0.2966, "step": 2744 }, { "epoch": 2.797145769622834, "eval_Qnli-dev-1024_cosine_accuracy": 0.7395833333333334, "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.7571010589599609, "eval_Qnli-dev-1024_cosine_ap": 0.7504622260499265, "eval_Qnli-dev-1024_cosine_f1": 0.723404255319149, "eval_Qnli-dev-1024_cosine_f1_threshold": 0.7475010752677917, "eval_Qnli-dev-1024_cosine_mcc": 0.46063575594147665, "eval_Qnli-dev-1024_cosine_precision": 0.6938775510204082, "eval_Qnli-dev-1024_cosine_recall": 0.7555555555555555, "eval_Qnli-dev_cosine_accuracy": 0.7291666666666666, "eval_Qnli-dev_cosine_accuracy_threshold": 0.6713130474090576, "eval_Qnli-dev_cosine_ap": 0.7399770165919752, "eval_Qnli-dev_cosine_f1": 0.7256637168141592, "eval_Qnli-dev_cosine_f1_threshold": 0.5859470963478088, "eval_Qnli-dev_cosine_mcc": 0.419062972501429, "eval_Qnli-dev_cosine_precision": 0.6029411764705882, "eval_Qnli-dev_cosine_recall": 0.9111111111111111, "eval_allNLI--triplets-1024_cosine_accuracy": 0.9583333134651184, "eval_allNLI-triplets_cosine_accuracy": 0.96875, "eval_global_dataset_loss": 0.3241865932941437, "eval_global_dataset_runtime": 103.8083, "eval_global_dataset_samples_per_second": 7.735, "eval_global_dataset_steps_per_second": 0.164, "eval_sequential_score": 0.9583333134651184, "eval_sts-test-1024_pearson_cosine": 0.8939745372812724, "eval_sts-test-1024_spearman_cosine": 0.9148932562411601, "eval_sts-test_pearson_cosine": 0.9070974288417767, "eval_sts-test_spearman_cosine": 0.9217502611378607, "step": 2744 }, { "epoch": 2.7981651376146788, "grad_norm": 6.5206685066223145, "learning_rate": 1.3541251685680196e-05, "loss": 0.6773, "step": 2745 }, { "epoch": 2.799184505606524, "grad_norm": 6.555695533752441, "learning_rate": 1.3412569780742673e-05, "loss": 0.4735, "step": 2746 }, { "epoch": 2.800203873598369, "grad_norm": 3.916958808898926, "learning_rate": 1.3284407448434343e-05, "loss": 0.2945, "step": 2747 }, { "epoch": 2.801223241590214, "grad_norm": 5.9219231605529785, "learning_rate": 1.3156766508769269e-05, "loss": 0.3866, "step": 2748 }, { "epoch": 2.802242609582059, "grad_norm": 6.4752278327941895, "learning_rate": 1.3029648774357345e-05, "loss": 0.2936, "step": 2749 }, { "epoch": 2.803261977573904, "grad_norm": 5.425044536590576, "learning_rate": 1.2903056050378543e-05, "loss": 0.3548, "step": 2750 }, { "epoch": 2.8042813455657494, "grad_norm": 4.103187561035156, "learning_rate": 1.2776990134557293e-05, "loss": 0.401, "step": 2751 }, { "epoch": 2.8053007135575942, "grad_norm": 2.648897171020508, "learning_rate": 1.2651452817136744e-05, "loss": 0.1286, "step": 2752 }, { "epoch": 2.8063200815494396, "grad_norm": 2.8857932090759277, "learning_rate": 1.2526445880853622e-05, "loss": 0.1662, "step": 2753 }, { "epoch": 2.8073394495412844, "grad_norm": 7.301206588745117, "learning_rate": 1.2401971100912663e-05, "loss": 0.3503, "step": 2754 }, { "epoch": 2.8083588175331293, "grad_norm": 2.2922003269195557, "learning_rate": 1.2278030244961747e-05, "loss": 0.0835, "step": 2755 }, { "epoch": 2.8093781855249746, "grad_norm": 4.0575666427612305, "learning_rate": 1.2154625073066323e-05, "loss": 0.164, "step": 2756 }, { "epoch": 2.8103975535168195, "grad_norm": 6.752877235412598, "learning_rate": 1.2031757337684912e-05, "loss": 0.3127, "step": 2757 }, { "epoch": 2.8114169215086644, "grad_norm": 7.1814446449279785, "learning_rate": 1.1909428783643766e-05, "loss": 0.479, "step": 2758 }, { "epoch": 2.8124362895005097, "grad_norm": 2.4549272060394287, "learning_rate": 1.1787641148112472e-05, "loss": 0.1292, "step": 2759 }, { "epoch": 2.8134556574923546, "grad_norm": 9.54740047454834, "learning_rate": 1.1666396160578985e-05, "loss": 0.5329, "step": 2760 }, { "epoch": 2.8144750254842, "grad_norm": 8.301751136779785, "learning_rate": 1.1545695542825313e-05, "loss": 0.2671, "step": 2761 }, { "epoch": 2.815494393476045, "grad_norm": 11.171977043151855, "learning_rate": 1.1425541008902834e-05, "loss": 0.4597, "step": 2762 }, { "epoch": 2.81651376146789, "grad_norm": 5.594420433044434, "learning_rate": 1.1305934265108232e-05, "loss": 0.3011, "step": 2763 }, { "epoch": 2.817533129459735, "grad_norm": 5.141785144805908, "learning_rate": 1.1186877009958851e-05, "loss": 0.2159, "step": 2764 }, { "epoch": 2.81855249745158, "grad_norm": 3.065958261489868, "learning_rate": 1.1068370934169048e-05, "loss": 0.1038, "step": 2765 }, { "epoch": 2.819571865443425, "grad_norm": 8.466320991516113, "learning_rate": 1.0950417720625961e-05, "loss": 0.3436, "step": 2766 }, { "epoch": 2.82059123343527, "grad_norm": 6.828653335571289, "learning_rate": 1.0833019044365495e-05, "loss": 0.6016, "step": 2767 }, { "epoch": 2.821610601427115, "grad_norm": 11.670631408691406, "learning_rate": 1.0716176572548648e-05, "loss": 0.7132, "step": 2768 }, { "epoch": 2.8226299694189603, "grad_norm": 4.495013236999512, "learning_rate": 1.059989196443798e-05, "loss": 0.2802, "step": 2769 }, { "epoch": 2.823649337410805, "grad_norm": 5.054066181182861, "learning_rate": 1.048416687137384e-05, "loss": 0.2158, "step": 2770 }, { "epoch": 2.8246687054026505, "grad_norm": 6.254016399383545, "learning_rate": 1.036900293675097e-05, "loss": 0.4267, "step": 2771 }, { "epoch": 2.8256880733944953, "grad_norm": 7.1414947509765625, "learning_rate": 1.0254401795995134e-05, "loss": 0.2813, "step": 2772 }, { "epoch": 2.8267074413863407, "grad_norm": 5.415386199951172, "learning_rate": 1.0140365076540104e-05, "loss": 0.3579, "step": 2773 }, { "epoch": 2.8277268093781855, "grad_norm": 4.965087413787842, "learning_rate": 1.0026894397804242e-05, "loss": 0.3354, "step": 2774 }, { "epoch": 2.8287461773700304, "grad_norm": 6.15515661239624, "learning_rate": 9.913991371167653e-06, "loss": 0.3876, "step": 2775 }, { "epoch": 2.8297655453618757, "grad_norm": 4.003582000732422, "learning_rate": 9.801657599949449e-06, "loss": 0.2908, "step": 2776 }, { "epoch": 2.8307849133537206, "grad_norm": 3.4854180812835693, "learning_rate": 9.689894679384614e-06, "loss": 0.2131, "step": 2777 }, { "epoch": 2.8318042813455655, "grad_norm": 6.192078113555908, "learning_rate": 9.578704196601807e-06, "loss": 0.3381, "step": 2778 }, { "epoch": 2.832823649337411, "grad_norm": 3.7153677940368652, "learning_rate": 9.468087730600333e-06, "loss": 0.2269, "step": 2779 }, { "epoch": 2.8338430173292557, "grad_norm": 3.5471065044403076, "learning_rate": 9.358046852228214e-06, "loss": 0.1943, "step": 2780 }, { "epoch": 2.834862385321101, "grad_norm": 3.409437417984009, "learning_rate": 9.24858312415941e-06, "loss": 0.1935, "step": 2781 }, { "epoch": 2.835881753312946, "grad_norm": 4.60510778427124, "learning_rate": 9.139698100872074e-06, "loss": 0.2, "step": 2782 }, { "epoch": 2.836901121304791, "grad_norm": 3.126591444015503, "learning_rate": 9.031393328626082e-06, "loss": 0.2202, "step": 2783 }, { "epoch": 2.837920489296636, "grad_norm": 4.4597930908203125, "learning_rate": 8.923670345441303e-06, "loss": 0.2008, "step": 2784 }, { "epoch": 2.838939857288481, "grad_norm": 5.132372856140137, "learning_rate": 8.816530681075796e-06, "loss": 0.1634, "step": 2785 }, { "epoch": 2.8399592252803263, "grad_norm": 7.019527912139893, "learning_rate": 8.709975857003866e-06, "loss": 0.1994, "step": 2786 }, { "epoch": 2.840978593272171, "grad_norm": 1.802855134010315, "learning_rate": 8.604007386394647e-06, "loss": 0.0783, "step": 2787 }, { "epoch": 2.841997961264016, "grad_norm": 8.37390422821045, "learning_rate": 8.49862677409055e-06, "loss": 0.2402, "step": 2788 }, { "epoch": 2.8430173292558614, "grad_norm": 5.241154670715332, "learning_rate": 8.393835516585979e-06, "loss": 0.2826, "step": 2789 }, { "epoch": 2.8440366972477067, "grad_norm": 8.102668762207031, "learning_rate": 8.289635102005855e-06, "loss": 0.3095, "step": 2790 }, { "epoch": 2.8450560652395516, "grad_norm": 7.070498943328857, "learning_rate": 8.186027010084684e-06, "loss": 0.394, "step": 2791 }, { "epoch": 2.8460754332313964, "grad_norm": 6.332981586456299, "learning_rate": 8.083012712145505e-06, "loss": 0.2141, "step": 2792 }, { "epoch": 2.8470948012232418, "grad_norm": 2.69991135597229, "learning_rate": 7.980593671079068e-06, "loss": 0.1438, "step": 2793 }, { "epoch": 2.8481141692150866, "grad_norm": 6.667513847351074, "learning_rate": 7.878771341322716e-06, "loss": 0.4134, "step": 2794 }, { "epoch": 2.8491335372069315, "grad_norm": 11.117960929870605, "learning_rate": 7.777547168840233e-06, "loss": 1.0622, "step": 2795 }, { "epoch": 2.850152905198777, "grad_norm": 8.538312911987305, "learning_rate": 7.676922591100922e-06, "loss": 0.3276, "step": 2796 }, { "epoch": 2.8511722731906217, "grad_norm": 9.44412612915039, "learning_rate": 7.576899037059409e-06, "loss": 0.285, "step": 2797 }, { "epoch": 2.8521916411824666, "grad_norm": 3.452684164047241, "learning_rate": 7.477477927135207e-06, "loss": 0.1505, "step": 2798 }, { "epoch": 2.853211009174312, "grad_norm": 4.277743816375732, "learning_rate": 7.378660673192683e-06, "loss": 0.1384, "step": 2799 }, { "epoch": 2.8542303771661572, "grad_norm": 4.803715705871582, "learning_rate": 7.28044867852084e-06, "loss": 0.2855, "step": 2800 }, { "epoch": 2.855249745158002, "grad_norm": 3.5064492225646973, "learning_rate": 7.182843337813589e-06, "loss": 0.1841, "step": 2801 }, { "epoch": 2.856269113149847, "grad_norm": 3.3467700481414795, "learning_rate": 7.085846037149746e-06, "loss": 0.1406, "step": 2802 }, { "epoch": 2.8572884811416923, "grad_norm": 6.449463844299316, "learning_rate": 6.989458153973522e-06, "loss": 0.4628, "step": 2803 }, { "epoch": 2.858307849133537, "grad_norm": 2.180995464324951, "learning_rate": 6.893681057074835e-06, "loss": 0.1789, "step": 2804 }, { "epoch": 2.859327217125382, "grad_norm": 4.380829334259033, "learning_rate": 6.7985161065699185e-06, "loss": 0.1803, "step": 2805 }, { "epoch": 2.8603465851172274, "grad_norm": 2.5380773544311523, "learning_rate": 6.703964653881955e-06, "loss": 0.1188, "step": 2806 }, { "epoch": 2.8613659531090723, "grad_norm": 5.769050121307373, "learning_rate": 6.610028041722066e-06, "loss": 0.1484, "step": 2807 }, { "epoch": 2.8623853211009176, "grad_norm": 8.550139427185059, "learning_rate": 6.5167076040700495e-06, "loss": 0.4521, "step": 2808 }, { "epoch": 2.8634046890927625, "grad_norm": 13.322113037109375, "learning_rate": 6.424004666155481e-06, "loss": 0.8927, "step": 2809 }, { "epoch": 2.864424057084608, "grad_norm": 6.6663007736206055, "learning_rate": 6.331920544438874e-06, "loss": 0.2667, "step": 2810 }, { "epoch": 2.8654434250764527, "grad_norm": 7.3615522384643555, "learning_rate": 6.240456546593138e-06, "loss": 0.5552, "step": 2811 }, { "epoch": 2.8664627930682975, "grad_norm": 4.977193832397461, "learning_rate": 6.149613971484852e-06, "loss": 0.246, "step": 2812 }, { "epoch": 2.867482161060143, "grad_norm": 3.421010971069336, "learning_rate": 6.05939410915583e-06, "loss": 0.1816, "step": 2813 }, { "epoch": 2.8685015290519877, "grad_norm": 3.8643534183502197, "learning_rate": 5.969798240804853e-06, "loss": 0.179, "step": 2814 }, { "epoch": 2.8695208970438326, "grad_norm": 5.163575172424316, "learning_rate": 5.880827638769415e-06, "loss": 0.1567, "step": 2815 }, { "epoch": 2.870540265035678, "grad_norm": 10.057819366455078, "learning_rate": 5.792483566507822e-06, "loss": 0.8622, "step": 2816 }, { "epoch": 2.871559633027523, "grad_norm": 6.665347099304199, "learning_rate": 5.704767278580958e-06, "loss": 0.3246, "step": 2817 }, { "epoch": 2.872579001019368, "grad_norm": 4.60490083694458, "learning_rate": 5.6176800206348075e-06, "loss": 0.0939, "step": 2818 }, { "epoch": 2.873598369011213, "grad_norm": 10.552454948425293, "learning_rate": 5.531223029382426e-06, "loss": 0.4211, "step": 2819 }, { "epoch": 2.8746177370030583, "grad_norm": 6.045144557952881, "learning_rate": 5.445397532586699e-06, "loss": 0.3004, "step": 2820 }, { "epoch": 2.875637104994903, "grad_norm": 7.7133026123046875, "learning_rate": 5.3602047490426076e-06, "loss": 0.5342, "step": 2821 }, { "epoch": 2.876656472986748, "grad_norm": 3.270509958267212, "learning_rate": 5.275645888560221e-06, "loss": 0.2811, "step": 2822 }, { "epoch": 2.8776758409785934, "grad_norm": 5.2947893142700195, "learning_rate": 5.191722151947237e-06, "loss": 0.2122, "step": 2823 }, { "epoch": 2.8786952089704383, "grad_norm": 4.58398962020874, "learning_rate": 5.1084347309920895e-06, "loss": 0.2948, "step": 2824 }, { "epoch": 2.879714576962283, "grad_norm": 3.8912346363067627, "learning_rate": 5.025784808446987e-06, "loss": 0.1354, "step": 2825 }, { "epoch": 2.8807339449541285, "grad_norm": 2.3357369899749756, "learning_rate": 4.9437735580111385e-06, "loss": 0.1591, "step": 2826 }, { "epoch": 2.8817533129459734, "grad_norm": 7.461204528808594, "learning_rate": 4.86240214431411e-06, "loss": 0.3842, "step": 2827 }, { "epoch": 2.8827726809378187, "grad_norm": 12.113036155700684, "learning_rate": 4.781671722899139e-06, "loss": 0.6479, "step": 2828 }, { "epoch": 2.8837920489296636, "grad_norm": 8.575617790222168, "learning_rate": 4.701583440206858e-06, "loss": 0.402, "step": 2829 }, { "epoch": 2.884811416921509, "grad_norm": 8.859447479248047, "learning_rate": 4.622138433559015e-06, "loss": 0.4825, "step": 2830 }, { "epoch": 2.8858307849133538, "grad_norm": 4.101320266723633, "learning_rate": 4.54333783114233e-06, "loss": 0.1895, "step": 2831 }, { "epoch": 2.8868501529051986, "grad_norm": 7.142346382141113, "learning_rate": 4.465182751992342e-06, "loss": 0.5035, "step": 2832 }, { "epoch": 2.887869520897044, "grad_norm": 5.056869983673096, "learning_rate": 4.38767430597764e-06, "loss": 0.2528, "step": 2833 }, { "epoch": 2.888888888888889, "grad_norm": 4.816026210784912, "learning_rate": 4.310813593784075e-06, "loss": 0.1977, "step": 2834 }, { "epoch": 2.8899082568807337, "grad_norm": 6.842755317687988, "learning_rate": 4.234601706899166e-06, "loss": 0.3115, "step": 2835 }, { "epoch": 2.890927624872579, "grad_norm": 7.81460428237915, "learning_rate": 4.159039727596509e-06, "loss": 0.3466, "step": 2836 }, { "epoch": 2.891946992864424, "grad_norm": 6.274500846862793, "learning_rate": 4.0841287289205e-06, "loss": 0.4209, "step": 2837 }, { "epoch": 2.8929663608562692, "grad_norm": 5.631227493286133, "learning_rate": 4.0098697746710155e-06, "loss": 0.5445, "step": 2838 }, { "epoch": 2.893985728848114, "grad_norm": 7.602738857269287, "learning_rate": 3.936263919388394e-06, "loss": 0.6857, "step": 2839 }, { "epoch": 2.8950050968399594, "grad_norm": 3.171926498413086, "learning_rate": 3.863312208338354e-06, "loss": 0.1408, "step": 2840 }, { "epoch": 2.8960244648318043, "grad_norm": 3.9830353260040283, "learning_rate": 3.7910156774972784e-06, "loss": 0.18, "step": 2841 }, { "epoch": 2.897043832823649, "grad_norm": 4.877954959869385, "learning_rate": 3.7193753535373854e-06, "loss": 0.2425, "step": 2842 }, { "epoch": 2.897043832823649, "eval_Qnli-dev-1024_cosine_accuracy": 0.7291666666666666, "eval_Qnli-dev-1024_cosine_accuracy_threshold": 0.7575306296348572, "eval_Qnli-dev-1024_cosine_ap": 0.7439778731668312, "eval_Qnli-dev-1024_cosine_f1": 0.7142857142857142, "eval_Qnli-dev-1024_cosine_f1_threshold": 0.7346500158309937, "eval_Qnli-dev-1024_cosine_mcc": 0.4263253018001963, "eval_Qnli-dev-1024_cosine_precision": 0.660377358490566, "eval_Qnli-dev-1024_cosine_recall": 0.7777777777777778, "eval_Qnli-dev_cosine_accuracy": 0.7291666666666666, "eval_Qnli-dev_cosine_accuracy_threshold": 0.6697263717651367, "eval_Qnli-dev_cosine_ap": 0.7410436810871419, "eval_Qnli-dev_cosine_f1": 0.7256637168141592, "eval_Qnli-dev_cosine_f1_threshold": 0.5878369808197021, "eval_Qnli-dev_cosine_mcc": 0.419062972501429, "eval_Qnli-dev_cosine_precision": 0.6029411764705882, "eval_Qnli-dev_cosine_recall": 0.9111111111111111, "eval_allNLI--triplets-1024_cosine_accuracy": 0.9583333134651184, "eval_allNLI-triplets_cosine_accuracy": 0.96875, "eval_global_dataset_loss": 0.316562294960022, "eval_global_dataset_runtime": 103.7708, "eval_global_dataset_samples_per_second": 7.738, "eval_global_dataset_steps_per_second": 0.164, "eval_sequential_score": 0.9583333134651184, "eval_sts-test-1024_pearson_cosine": 0.8948788156897003, "eval_sts-test-1024_spearman_cosine": 0.9161429026505324, "eval_sts-test_pearson_cosine": 0.9074720804711505, "eval_sts-test_spearman_cosine": 0.922284621175041, "step": 2842 }, { "epoch": 2.8980632008154945, "grad_norm": 6.035852909088135, "learning_rate": 3.648392253812205e-06, "loss": 0.1627, "step": 2843 }, { "epoch": 2.8990825688073394, "grad_norm": 8.02778148651123, "learning_rate": 3.578067386342099e-06, "loss": 0.523, "step": 2844 }, { "epoch": 2.9001019367991843, "grad_norm": 2.975386142730713, "learning_rate": 3.5084017498000443e-06, "loss": 0.2438, "step": 2845 }, { "epoch": 2.9011213047910296, "grad_norm": 5.486757278442383, "learning_rate": 3.43939633349733e-06, "loss": 0.2044, "step": 2846 }, { "epoch": 2.9021406727828745, "grad_norm": 5.736207008361816, "learning_rate": 3.3710521173695665e-06, "loss": 0.1509, "step": 2847 }, { "epoch": 2.90316004077472, "grad_norm": 4.051654815673828, "learning_rate": 3.303370071962708e-06, "loss": 0.0885, "step": 2848 }, { "epoch": 2.9041794087665647, "grad_norm": 3.427027940750122, "learning_rate": 3.2363511584194093e-06, "loss": 0.1362, "step": 2849 }, { "epoch": 2.90519877675841, "grad_norm": 9.068493843078613, "learning_rate": 3.1699963284652523e-06, "loss": 0.351, "step": 2850 }, { "epoch": 2.906218144750255, "grad_norm": 7.973621368408203, "learning_rate": 3.104306524395256e-06, "loss": 0.2569, "step": 2851 }, { "epoch": 2.9072375127420997, "grad_norm": 1.665149450302124, "learning_rate": 3.0392826790605068e-06, "loss": 0.0932, "step": 2852 }, { "epoch": 2.908256880733945, "grad_norm": 7.884661674499512, "learning_rate": 2.974925715854876e-06, "loss": 0.2864, "step": 2853 }, { "epoch": 2.90927624872579, "grad_norm": 3.071857452392578, "learning_rate": 2.911236548702051e-06, "loss": 0.1591, "step": 2854 }, { "epoch": 2.910295616717635, "grad_norm": 6.92976188659668, "learning_rate": 2.84821608204231e-06, "loss": 0.2428, "step": 2855 }, { "epoch": 2.91131498470948, "grad_norm": 2.4845893383026123, "learning_rate": 2.7858652108199437e-06, "loss": 0.1249, "step": 2856 }, { "epoch": 2.912334352701325, "grad_norm": 5.34236478805542, "learning_rate": 2.724184820470299e-06, "loss": 0.1352, "step": 2857 }, { "epoch": 2.9133537206931703, "grad_norm": 4.656986713409424, "learning_rate": 2.6631757869074457e-06, "loss": 0.2245, "step": 2858 }, { "epoch": 2.914373088685015, "grad_norm": 8.07036304473877, "learning_rate": 2.6028389765114845e-06, "loss": 0.3924, "step": 2859 }, { "epoch": 2.9153924566768605, "grad_norm": 4.92465353012085, "learning_rate": 2.543175246116514e-06, "loss": 0.2301, "step": 2860 }, { "epoch": 2.9164118246687054, "grad_norm": 3.6421494483947754, "learning_rate": 2.4841854429981824e-06, "loss": 0.1289, "step": 2861 }, { "epoch": 2.9174311926605503, "grad_norm": 1.8478538990020752, "learning_rate": 2.4258704048619574e-06, "loss": 0.1209, "step": 2862 }, { "epoch": 2.9184505606523956, "grad_norm": 6.145997047424316, "learning_rate": 2.3682309598308807e-06, "loss": 0.3491, "step": 2863 }, { "epoch": 2.9194699286442405, "grad_norm": 5.5693359375, "learning_rate": 2.311267926434141e-06, "loss": 0.2015, "step": 2864 }, { "epoch": 2.9204892966360854, "grad_norm": 2.2183048725128174, "learning_rate": 2.254982113595294e-06, "loss": 0.0936, "step": 2865 }, { "epoch": 2.9215086646279307, "grad_norm": 8.294523239135742, "learning_rate": 2.1993743206207283e-06, "loss": 0.4695, "step": 2866 }, { "epoch": 2.922528032619776, "grad_norm": 7.269809722900391, "learning_rate": 2.1444453371883833e-06, "loss": 0.235, "step": 2867 }, { "epoch": 2.923547400611621, "grad_norm": 4.965348720550537, "learning_rate": 2.090195943336565e-06, "loss": 0.2326, "step": 2868 }, { "epoch": 2.9245667686034658, "grad_norm": 7.198317527770996, "learning_rate": 2.0366269094528325e-06, "loss": 0.4356, "step": 2869 }, { "epoch": 2.925586136595311, "grad_norm": 3.5571677684783936, "learning_rate": 1.983738996263007e-06, "loss": 0.1375, "step": 2870 }, { "epoch": 2.926605504587156, "grad_norm": 8.104644775390625, "learning_rate": 1.9315329548204195e-06, "loss": 0.4536, "step": 2871 }, { "epoch": 2.927624872579001, "grad_norm": 7.931567668914795, "learning_rate": 1.8800095264953021e-06, "loss": 0.371, "step": 2872 }, { "epoch": 2.928644240570846, "grad_norm": 5.3822503089904785, "learning_rate": 1.829169442964146e-06, "loss": 0.3237, "step": 2873 }, { "epoch": 2.929663608562691, "grad_norm": 7.507733345031738, "learning_rate": 1.7790134261993607e-06, "loss": 0.1961, "step": 2874 }, { "epoch": 2.930682976554536, "grad_norm": 7.484766483306885, "learning_rate": 1.7295421884590769e-06, "loss": 0.4343, "step": 2875 }, { "epoch": 2.9317023445463812, "grad_norm": 3.543367385864258, "learning_rate": 1.6807564322769198e-06, "loss": 0.1418, "step": 2876 }, { "epoch": 2.9327217125382266, "grad_norm": 6.311351776123047, "learning_rate": 1.6326568504521521e-06, "loss": 0.2141, "step": 2877 }, { "epoch": 2.9337410805300714, "grad_norm": 7.337040424346924, "learning_rate": 1.58524412603972e-06, "loss": 0.4081, "step": 2878 }, { "epoch": 2.9347604485219163, "grad_norm": 3.225853443145752, "learning_rate": 1.5385189323406657e-06, "loss": 0.0816, "step": 2879 }, { "epoch": 2.9357798165137616, "grad_norm": 6.153621673583984, "learning_rate": 1.4924819328924477e-06, "loss": 0.331, "step": 2880 }, { "epoch": 2.9367991845056065, "grad_norm": 5.877473831176758, "learning_rate": 1.4471337814596752e-06, "loss": 0.1582, "step": 2881 }, { "epoch": 2.9378185524974514, "grad_norm": 5.566036701202393, "learning_rate": 1.4024751220245935e-06, "loss": 0.4107, "step": 2882 }, { "epoch": 2.9388379204892967, "grad_norm": 6.447503089904785, "learning_rate": 1.3585065887781912e-06, "loss": 0.3968, "step": 2883 }, { "epoch": 2.9398572884811416, "grad_norm": 7.995684623718262, "learning_rate": 1.3152288061110518e-06, "loss": 0.2108, "step": 2884 }, { "epoch": 2.940876656472987, "grad_norm": 5.465237140655518, "learning_rate": 1.2726423886044835e-06, "loss": 0.2353, "step": 2885 }, { "epoch": 2.941896024464832, "grad_norm": 2.0872719287872314, "learning_rate": 1.2307479410218203e-06, "loss": 0.1005, "step": 2886 }, { "epoch": 2.942915392456677, "grad_norm": 3.393397331237793, "learning_rate": 1.189546058299873e-06, "loss": 0.2052, "step": 2887 }, { "epoch": 2.943934760448522, "grad_norm": 3.042525291442871, "learning_rate": 1.1490373255404309e-06, "loss": 0.1, "step": 2888 }, { "epoch": 2.944954128440367, "grad_norm": 7.329123497009277, "learning_rate": 1.1092223180019456e-06, "loss": 0.4187, "step": 2889 }, { "epoch": 2.945973496432212, "grad_norm": 9.219369888305664, "learning_rate": 1.0701016010913723e-06, "loss": 0.7002, "step": 2890 }, { "epoch": 2.946992864424057, "grad_norm": 6.616352558135986, "learning_rate": 1.0316757303561852e-06, "loss": 0.3459, "step": 2891 }, { "epoch": 2.948012232415902, "grad_norm": 6.501418113708496, "learning_rate": 9.939452514764303e-07, "loss": 0.2325, "step": 2892 }, { "epoch": 2.9490316004077473, "grad_norm": 2.8526558876037598, "learning_rate": 9.5691070025698e-07, "loss": 0.1916, "step": 2893 }, { "epoch": 2.950050968399592, "grad_norm": 4.048646926879883, "learning_rate": 9.205726026199957e-07, "loss": 0.1248, "step": 2894 }, { "epoch": 2.9510703363914375, "grad_norm": 6.920399188995361, "learning_rate": 8.849314745973392e-07, "loss": 0.5866, "step": 2895 }, { "epoch": 2.9520897043832823, "grad_norm": 12.091155052185059, "learning_rate": 8.499878223233726e-07, "loss": 0.7633, "step": 2896 }, { "epoch": 2.9531090723751277, "grad_norm": 4.252422332763672, "learning_rate": 8.157421420276479e-07, "loss": 0.2877, "step": 2897 }, { "epoch": 2.9541284403669725, "grad_norm": 12.343497276306152, "learning_rate": 7.821949200279899e-07, "loss": 0.5611, "step": 2898 }, { "epoch": 2.9551478083588174, "grad_norm": 6.737751007080078, "learning_rate": 7.493466327234521e-07, "loss": 0.2056, "step": 2899 }, { "epoch": 2.9561671763506627, "grad_norm": 7.8665947914123535, "learning_rate": 7.171977465876834e-07, "loss": 0.3976, "step": 2900 }, { "epoch": 2.9571865443425076, "grad_norm": 6.2594709396362305, "learning_rate": 6.857487181621935e-07, "loss": 0.2407, "step": 2901 }, { "epoch": 2.9582059123343525, "grad_norm": 8.915580749511719, "learning_rate": 6.549999940499263e-07, "loss": 0.6218, "step": 2902 }, { "epoch": 2.959225280326198, "grad_norm": 6.0038743019104, "learning_rate": 6.249520109089469e-07, "loss": 0.3254, "step": 2903 }, { "epoch": 2.9602446483180427, "grad_norm": 3.5023767948150635, "learning_rate": 5.956051954461472e-07, "loss": 0.2097, "step": 2904 }, { "epoch": 2.961264016309888, "grad_norm": 5.1068034172058105, "learning_rate": 5.669599644112788e-07, "loss": 0.3198, "step": 2905 }, { "epoch": 2.962283384301733, "grad_norm": 6.734827995300293, "learning_rate": 5.390167245909794e-07, "loss": 0.365, "step": 2906 }, { "epoch": 2.963302752293578, "grad_norm": 6.597443580627441, "learning_rate": 5.117758728030441e-07, "loss": 0.3549, "step": 2907 }, { "epoch": 2.964322120285423, "grad_norm": 4.171931266784668, "learning_rate": 4.852377958907195e-07, "loss": 0.2156, "step": 2908 }, { "epoch": 2.965341488277268, "grad_norm": 3.3261940479278564, "learning_rate": 4.594028707172626e-07, "loss": 0.1436, "step": 2909 }, { "epoch": 2.9663608562691133, "grad_norm": 6.220198631286621, "learning_rate": 4.3427146416060163e-07, "loss": 0.3647, "step": 2910 }, { "epoch": 2.967380224260958, "grad_norm": 7.359838962554932, "learning_rate": 4.09843933108095e-07, "loss": 0.29, "step": 2911 }, { "epoch": 2.968399592252803, "grad_norm": 5.08355712890625, "learning_rate": 3.8612062445143596e-07, "loss": 0.2596, "step": 2912 }, { "epoch": 2.9694189602446484, "grad_norm": 2.2986602783203125, "learning_rate": 3.6310187508179494e-07, "loss": 0.076, "step": 2913 }, { "epoch": 2.9704383282364932, "grad_norm": 3.739488124847412, "learning_rate": 3.4078801188499597e-07, "loss": 0.1321, "step": 2914 }, { "epoch": 2.9714576962283386, "grad_norm": 10.203991889953613, "learning_rate": 3.191793517368702e-07, "loss": 0.5034, "step": 2915 }, { "epoch": 2.9724770642201834, "grad_norm": 3.171394109725952, "learning_rate": 2.982762014987761e-07, "loss": 0.1067, "step": 2916 } ], "logging_steps": 1, "max_steps": 2943, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 972, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }