bobox's picture
Training in progress, step 1054, checkpoint
6d204df verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.981203007518797,
"eval_steps": 50,
"global_step": 1054,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0018796992481203006,
"grad_norm": 15.693917274475098,
"learning_rate": 0.0,
"loss": 1.7608,
"step": 1
},
{
"epoch": 0.0037593984962406013,
"grad_norm": Infinity,
"learning_rate": 2.506265664160401e-07,
"loss": 1.7877,
"step": 2
},
{
"epoch": 0.005639097744360902,
"grad_norm": 17.349987030029297,
"learning_rate": 2.506265664160401e-07,
"loss": 2.2507,
"step": 3
},
{
"epoch": 0.007518796992481203,
"grad_norm": 13.13552474975586,
"learning_rate": 5.012531328320802e-07,
"loss": 1.5007,
"step": 4
},
{
"epoch": 0.009398496240601503,
"grad_norm": 16.77507972717285,
"learning_rate": 7.518796992481203e-07,
"loss": 1.9385,
"step": 5
},
{
"epoch": 0.011278195488721804,
"grad_norm": 13.212800979614258,
"learning_rate": 1.0025062656641603e-06,
"loss": 2.2608,
"step": 6
},
{
"epoch": 0.013157894736842105,
"grad_norm": 12.166118621826172,
"learning_rate": 1.2531328320802005e-06,
"loss": 1.792,
"step": 7
},
{
"epoch": 0.015037593984962405,
"grad_norm": 15.633316993713379,
"learning_rate": 1.5037593984962406e-06,
"loss": 1.9776,
"step": 8
},
{
"epoch": 0.016917293233082706,
"grad_norm": 11.28406810760498,
"learning_rate": 1.7543859649122807e-06,
"loss": 1.139,
"step": 9
},
{
"epoch": 0.018796992481203006,
"grad_norm": 14.046056747436523,
"learning_rate": 2.0050125313283207e-06,
"loss": 1.5296,
"step": 10
},
{
"epoch": 0.020676691729323307,
"grad_norm": 10.505359649658203,
"learning_rate": 2.255639097744361e-06,
"loss": 1.1633,
"step": 11
},
{
"epoch": 0.022556390977443608,
"grad_norm": 14.75375747680664,
"learning_rate": 2.506265664160401e-06,
"loss": 2.0384,
"step": 12
},
{
"epoch": 0.02443609022556391,
"grad_norm": 10.45843505859375,
"learning_rate": 2.7568922305764413e-06,
"loss": 1.395,
"step": 13
},
{
"epoch": 0.02631578947368421,
"grad_norm": 13.602055549621582,
"learning_rate": 3.007518796992481e-06,
"loss": 1.7397,
"step": 14
},
{
"epoch": 0.02819548872180451,
"grad_norm": 12.818756103515625,
"learning_rate": 3.258145363408521e-06,
"loss": 1.4049,
"step": 15
},
{
"epoch": 0.03007518796992481,
"grad_norm": 11.55542278289795,
"learning_rate": 3.5087719298245615e-06,
"loss": 1.2005,
"step": 16
},
{
"epoch": 0.03195488721804511,
"grad_norm": 12.352794647216797,
"learning_rate": 3.7593984962406014e-06,
"loss": 1.441,
"step": 17
},
{
"epoch": 0.03383458646616541,
"grad_norm": 11.01584243774414,
"learning_rate": 4.010025062656641e-06,
"loss": 1.3119,
"step": 18
},
{
"epoch": 0.03571428571428571,
"grad_norm": 8.868569374084473,
"learning_rate": 4.260651629072682e-06,
"loss": 0.8352,
"step": 19
},
{
"epoch": 0.03759398496240601,
"grad_norm": 9.641520500183105,
"learning_rate": 4.511278195488722e-06,
"loss": 1.3154,
"step": 20
},
{
"epoch": 0.039473684210526314,
"grad_norm": 10.837322235107422,
"learning_rate": 4.7619047619047615e-06,
"loss": 1.0206,
"step": 21
},
{
"epoch": 0.041353383458646614,
"grad_norm": 9.514777183532715,
"learning_rate": 5.012531328320802e-06,
"loss": 0.9626,
"step": 22
},
{
"epoch": 0.043233082706766915,
"grad_norm": 10.232863426208496,
"learning_rate": 5.263157894736842e-06,
"loss": 1.3082,
"step": 23
},
{
"epoch": 0.045112781954887216,
"grad_norm": 9.25696849822998,
"learning_rate": 5.5137844611528826e-06,
"loss": 1.0918,
"step": 24
},
{
"epoch": 0.046992481203007516,
"grad_norm": 11.14088249206543,
"learning_rate": 5.764411027568922e-06,
"loss": 1.3777,
"step": 25
},
{
"epoch": 0.04887218045112782,
"grad_norm": 7.43634033203125,
"learning_rate": 6.015037593984962e-06,
"loss": 0.907,
"step": 26
},
{
"epoch": 0.05075187969924812,
"grad_norm": 7.395077705383301,
"learning_rate": 6.265664160401003e-06,
"loss": 0.9302,
"step": 27
},
{
"epoch": 0.05263157894736842,
"grad_norm": 9.007256507873535,
"learning_rate": 6.516290726817042e-06,
"loss": 1.0028,
"step": 28
},
{
"epoch": 0.05451127819548872,
"grad_norm": 7.390114784240723,
"learning_rate": 6.766917293233083e-06,
"loss": 0.9131,
"step": 29
},
{
"epoch": 0.05639097744360902,
"grad_norm": 9.816267967224121,
"learning_rate": 7.017543859649123e-06,
"loss": 1.3304,
"step": 30
},
{
"epoch": 0.05827067669172932,
"grad_norm": 8.316947937011719,
"learning_rate": 7.2681704260651625e-06,
"loss": 1.0405,
"step": 31
},
{
"epoch": 0.06015037593984962,
"grad_norm": 6.560980796813965,
"learning_rate": 7.518796992481203e-06,
"loss": 0.6233,
"step": 32
},
{
"epoch": 0.06203007518796992,
"grad_norm": 8.559331893920898,
"learning_rate": 7.769423558897243e-06,
"loss": 1.4009,
"step": 33
},
{
"epoch": 0.06390977443609022,
"grad_norm": 8.17328929901123,
"learning_rate": 8.020050125313283e-06,
"loss": 0.7543,
"step": 34
},
{
"epoch": 0.06578947368421052,
"grad_norm": 6.206168174743652,
"learning_rate": 8.270676691729324e-06,
"loss": 0.5975,
"step": 35
},
{
"epoch": 0.06766917293233082,
"grad_norm": 7.136289119720459,
"learning_rate": 8.521303258145363e-06,
"loss": 0.803,
"step": 36
},
{
"epoch": 0.06954887218045112,
"grad_norm": 6.918433666229248,
"learning_rate": 8.771929824561403e-06,
"loss": 0.7285,
"step": 37
},
{
"epoch": 0.07142857142857142,
"grad_norm": 6.956127166748047,
"learning_rate": 9.022556390977444e-06,
"loss": 0.759,
"step": 38
},
{
"epoch": 0.07330827067669173,
"grad_norm": 8.294937133789062,
"learning_rate": 9.273182957393484e-06,
"loss": 1.0653,
"step": 39
},
{
"epoch": 0.07518796992481203,
"grad_norm": 7.191320419311523,
"learning_rate": 9.523809523809523e-06,
"loss": 0.8235,
"step": 40
},
{
"epoch": 0.07706766917293233,
"grad_norm": 7.36721658706665,
"learning_rate": 9.774436090225564e-06,
"loss": 0.7822,
"step": 41
},
{
"epoch": 0.07894736842105263,
"grad_norm": 5.150545597076416,
"learning_rate": 1.0025062656641604e-05,
"loss": 0.539,
"step": 42
},
{
"epoch": 0.08082706766917293,
"grad_norm": 7.323425769805908,
"learning_rate": 1.0275689223057643e-05,
"loss": 0.9211,
"step": 43
},
{
"epoch": 0.08270676691729323,
"grad_norm": 5.510406017303467,
"learning_rate": 1.0526315789473684e-05,
"loss": 0.6063,
"step": 44
},
{
"epoch": 0.08458646616541353,
"grad_norm": 6.937963008880615,
"learning_rate": 1.0776942355889724e-05,
"loss": 0.8769,
"step": 45
},
{
"epoch": 0.08646616541353383,
"grad_norm": 6.418097496032715,
"learning_rate": 1.1027568922305765e-05,
"loss": 0.8044,
"step": 46
},
{
"epoch": 0.08834586466165413,
"grad_norm": 7.569619178771973,
"learning_rate": 1.1278195488721805e-05,
"loss": 1.0656,
"step": 47
},
{
"epoch": 0.09022556390977443,
"grad_norm": 5.450784206390381,
"learning_rate": 1.1528822055137844e-05,
"loss": 0.6475,
"step": 48
},
{
"epoch": 0.09210526315789473,
"grad_norm": 5.681114196777344,
"learning_rate": 1.1779448621553885e-05,
"loss": 0.7331,
"step": 49
},
{
"epoch": 0.09398496240601503,
"grad_norm": 4.720979690551758,
"learning_rate": 1.2030075187969925e-05,
"loss": 0.4248,
"step": 50
},
{
"epoch": 0.09398496240601503,
"eval_global_dataset_loss": 1.4761557579040527,
"eval_global_dataset_runtime": 74.8102,
"eval_global_dataset_samples_per_second": 12.993,
"eval_global_dataset_steps_per_second": 0.053,
"eval_sequential_score": 0.9117502957528434,
"eval_sts-test-1024_pearson_cosine": 0.9001737490289153,
"eval_sts-test-1024_spearman_cosine": 0.913358387579682,
"eval_sts-test-1280_pearson_cosine": 0.9001751171497632,
"eval_sts-test-1280_spearman_cosine": 0.9133455887189241,
"eval_sts-test-512_pearson_cosine": 0.89801885928312,
"eval_sts-test-512_spearman_cosine": 0.9117502957528434,
"eval_sts-test-760_pearson_cosine": 0.8983629702187792,
"eval_sts-test-760_spearman_cosine": 0.9130246326722251,
"eval_sts-test_pearson_cosine": 0.9001764198713166,
"eval_sts-test_spearman_cosine": 0.9133424561306267,
"step": 50
},
{
"epoch": 0.09586466165413533,
"grad_norm": 4.459887504577637,
"learning_rate": 1.2280701754385964e-05,
"loss": 0.3661,
"step": 51
},
{
"epoch": 0.09774436090225563,
"grad_norm": 5.0679030418396,
"learning_rate": 1.2531328320802006e-05,
"loss": 0.645,
"step": 52
},
{
"epoch": 0.09962406015037593,
"grad_norm": 5.17216682434082,
"learning_rate": 1.2781954887218045e-05,
"loss": 0.3991,
"step": 53
},
{
"epoch": 0.10150375939849623,
"grad_norm": 5.646566390991211,
"learning_rate": 1.3032581453634085e-05,
"loss": 0.8027,
"step": 54
},
{
"epoch": 0.10338345864661654,
"grad_norm": 4.848506927490234,
"learning_rate": 1.3283208020050126e-05,
"loss": 0.5594,
"step": 55
},
{
"epoch": 0.10526315789473684,
"grad_norm": 6.822021484375,
"learning_rate": 1.3533834586466165e-05,
"loss": 0.973,
"step": 56
},
{
"epoch": 0.10714285714285714,
"grad_norm": 6.811206817626953,
"learning_rate": 1.3784461152882205e-05,
"loss": 0.9,
"step": 57
},
{
"epoch": 0.10902255639097744,
"grad_norm": 4.507321357727051,
"learning_rate": 1.4035087719298246e-05,
"loss": 0.4526,
"step": 58
},
{
"epoch": 0.11090225563909774,
"grad_norm": 3.8985111713409424,
"learning_rate": 1.4285714285714285e-05,
"loss": 0.3216,
"step": 59
},
{
"epoch": 0.11278195488721804,
"grad_norm": 5.594377040863037,
"learning_rate": 1.4536340852130325e-05,
"loss": 0.6491,
"step": 60
},
{
"epoch": 0.11466165413533834,
"grad_norm": 5.777069568634033,
"learning_rate": 1.4786967418546366e-05,
"loss": 0.6211,
"step": 61
},
{
"epoch": 0.11654135338345864,
"grad_norm": 4.431408405303955,
"learning_rate": 1.5037593984962406e-05,
"loss": 0.4682,
"step": 62
},
{
"epoch": 0.11842105263157894,
"grad_norm": 4.991281986236572,
"learning_rate": 1.5288220551378447e-05,
"loss": 0.5099,
"step": 63
},
{
"epoch": 0.12030075187969924,
"grad_norm": 5.1047186851501465,
"learning_rate": 1.5538847117794486e-05,
"loss": 0.5467,
"step": 64
},
{
"epoch": 0.12218045112781954,
"grad_norm": 4.4686279296875,
"learning_rate": 1.5789473684210526e-05,
"loss": 0.4413,
"step": 65
},
{
"epoch": 0.12406015037593984,
"grad_norm": 3.8792800903320312,
"learning_rate": 1.6040100250626565e-05,
"loss": 0.3663,
"step": 66
},
{
"epoch": 0.12593984962406016,
"grad_norm": 6.33101224899292,
"learning_rate": 1.6290726817042605e-05,
"loss": 0.6832,
"step": 67
},
{
"epoch": 0.12781954887218044,
"grad_norm": 3.861156940460205,
"learning_rate": 1.6541353383458648e-05,
"loss": 0.3447,
"step": 68
},
{
"epoch": 0.12969924812030076,
"grad_norm": 6.406822681427002,
"learning_rate": 1.6791979949874687e-05,
"loss": 0.8614,
"step": 69
},
{
"epoch": 0.13157894736842105,
"grad_norm": 4.49379825592041,
"learning_rate": 1.7042606516290727e-05,
"loss": 0.4724,
"step": 70
},
{
"epoch": 0.13345864661654136,
"grad_norm": 5.233432292938232,
"learning_rate": 1.7293233082706766e-05,
"loss": 0.5842,
"step": 71
},
{
"epoch": 0.13533834586466165,
"grad_norm": 4.645366191864014,
"learning_rate": 1.7543859649122806e-05,
"loss": 0.4599,
"step": 72
},
{
"epoch": 0.13721804511278196,
"grad_norm": 5.10455322265625,
"learning_rate": 1.779448621553885e-05,
"loss": 0.5251,
"step": 73
},
{
"epoch": 0.13909774436090225,
"grad_norm": 3.6988489627838135,
"learning_rate": 1.8045112781954888e-05,
"loss": 0.2282,
"step": 74
},
{
"epoch": 0.14097744360902256,
"grad_norm": 5.149707794189453,
"learning_rate": 1.8295739348370928e-05,
"loss": 0.5728,
"step": 75
},
{
"epoch": 0.14285714285714285,
"grad_norm": 4.207123756408691,
"learning_rate": 1.8546365914786967e-05,
"loss": 0.4518,
"step": 76
},
{
"epoch": 0.14473684210526316,
"grad_norm": 4.2548980712890625,
"learning_rate": 1.8796992481203007e-05,
"loss": 0.4483,
"step": 77
},
{
"epoch": 0.14661654135338345,
"grad_norm": 4.605799674987793,
"learning_rate": 1.9047619047619046e-05,
"loss": 0.5031,
"step": 78
},
{
"epoch": 0.14849624060150377,
"grad_norm": 4.675529956817627,
"learning_rate": 1.929824561403509e-05,
"loss": 0.5342,
"step": 79
},
{
"epoch": 0.15037593984962405,
"grad_norm": 3.668121814727783,
"learning_rate": 1.954887218045113e-05,
"loss": 0.2656,
"step": 80
},
{
"epoch": 0.15225563909774437,
"grad_norm": 4.905953407287598,
"learning_rate": 1.9799498746867168e-05,
"loss": 0.4979,
"step": 81
},
{
"epoch": 0.15413533834586465,
"grad_norm": 3.5785348415374756,
"learning_rate": 2.0050125313283208e-05,
"loss": 0.2907,
"step": 82
},
{
"epoch": 0.15601503759398497,
"grad_norm": 4.371109962463379,
"learning_rate": 2.0300751879699247e-05,
"loss": 0.4795,
"step": 83
},
{
"epoch": 0.15789473684210525,
"grad_norm": 4.517609119415283,
"learning_rate": 2.0551378446115287e-05,
"loss": 0.3756,
"step": 84
},
{
"epoch": 0.15977443609022557,
"grad_norm": 5.095982074737549,
"learning_rate": 2.080200501253133e-05,
"loss": 0.4711,
"step": 85
},
{
"epoch": 0.16165413533834586,
"grad_norm": 5.376642227172852,
"learning_rate": 2.105263157894737e-05,
"loss": 0.4183,
"step": 86
},
{
"epoch": 0.16353383458646617,
"grad_norm": 4.557480335235596,
"learning_rate": 2.130325814536341e-05,
"loss": 0.4993,
"step": 87
},
{
"epoch": 0.16541353383458646,
"grad_norm": 4.666930675506592,
"learning_rate": 2.1553884711779448e-05,
"loss": 0.4767,
"step": 88
},
{
"epoch": 0.16729323308270677,
"grad_norm": 5.519852638244629,
"learning_rate": 2.1804511278195487e-05,
"loss": 0.7443,
"step": 89
},
{
"epoch": 0.16917293233082706,
"grad_norm": 3.6135387420654297,
"learning_rate": 2.205513784461153e-05,
"loss": 0.301,
"step": 90
},
{
"epoch": 0.17105263157894737,
"grad_norm": 3.6060757637023926,
"learning_rate": 2.230576441102757e-05,
"loss": 0.2712,
"step": 91
},
{
"epoch": 0.17293233082706766,
"grad_norm": 4.757140636444092,
"learning_rate": 2.255639097744361e-05,
"loss": 0.4745,
"step": 92
},
{
"epoch": 0.17481203007518797,
"grad_norm": 4.468283653259277,
"learning_rate": 2.280701754385965e-05,
"loss": 0.3506,
"step": 93
},
{
"epoch": 0.17669172932330826,
"grad_norm": 5.2164835929870605,
"learning_rate": 2.3057644110275688e-05,
"loss": 0.5394,
"step": 94
},
{
"epoch": 0.17857142857142858,
"grad_norm": 3.763227939605713,
"learning_rate": 2.3308270676691728e-05,
"loss": 0.2925,
"step": 95
},
{
"epoch": 0.18045112781954886,
"grad_norm": 2.758868455886841,
"learning_rate": 2.355889724310777e-05,
"loss": 0.2154,
"step": 96
},
{
"epoch": 0.18233082706766918,
"grad_norm": 4.797206401824951,
"learning_rate": 2.380952380952381e-05,
"loss": 0.468,
"step": 97
},
{
"epoch": 0.18421052631578946,
"grad_norm": 3.767606019973755,
"learning_rate": 2.406015037593985e-05,
"loss": 0.2269,
"step": 98
},
{
"epoch": 0.18609022556390978,
"grad_norm": 4.662686824798584,
"learning_rate": 2.431077694235589e-05,
"loss": 0.3967,
"step": 99
},
{
"epoch": 0.18796992481203006,
"grad_norm": 4.955148220062256,
"learning_rate": 2.456140350877193e-05,
"loss": 0.489,
"step": 100
},
{
"epoch": 0.18796992481203006,
"eval_global_dataset_loss": 1.2233351469039917,
"eval_global_dataset_runtime": 74.0924,
"eval_global_dataset_samples_per_second": 13.119,
"eval_global_dataset_steps_per_second": 0.054,
"eval_sequential_score": 0.9123232014011761,
"eval_sts-test-1024_pearson_cosine": 0.8996086094131341,
"eval_sts-test-1024_spearman_cosine": 0.914071946442568,
"eval_sts-test-1280_pearson_cosine": 0.8996114667036814,
"eval_sts-test-1280_spearman_cosine": 0.9140625486776759,
"eval_sts-test-512_pearson_cosine": 0.8970205686989379,
"eval_sts-test-512_spearman_cosine": 0.9123232014011761,
"eval_sts-test-760_pearson_cosine": 0.8975729153169623,
"eval_sts-test-760_spearman_cosine": 0.9137202015280316,
"eval_sts-test_pearson_cosine": 0.8996145648456096,
"eval_sts-test_spearman_cosine": 0.9140547172069323,
"step": 100
},
{
"epoch": 0.18984962406015038,
"grad_norm": 4.103837490081787,
"learning_rate": 2.4812030075187968e-05,
"loss": 0.3021,
"step": 101
},
{
"epoch": 0.19172932330827067,
"grad_norm": 3.842869281768799,
"learning_rate": 2.506265664160401e-05,
"loss": 0.315,
"step": 102
},
{
"epoch": 0.19360902255639098,
"grad_norm": 5.454257965087891,
"learning_rate": 2.531328320802005e-05,
"loss": 0.664,
"step": 103
},
{
"epoch": 0.19548872180451127,
"grad_norm": 5.219631671905518,
"learning_rate": 2.556390977443609e-05,
"loss": 0.5144,
"step": 104
},
{
"epoch": 0.19736842105263158,
"grad_norm": 4.749826431274414,
"learning_rate": 2.581453634085213e-05,
"loss": 0.5137,
"step": 105
},
{
"epoch": 0.19924812030075187,
"grad_norm": 3.728238821029663,
"learning_rate": 2.606516290726817e-05,
"loss": 0.2783,
"step": 106
},
{
"epoch": 0.20112781954887218,
"grad_norm": 4.029903411865234,
"learning_rate": 2.6315789473684212e-05,
"loss": 0.2859,
"step": 107
},
{
"epoch": 0.20300751879699247,
"grad_norm": 3.996060848236084,
"learning_rate": 2.656641604010025e-05,
"loss": 0.333,
"step": 108
},
{
"epoch": 0.20488721804511278,
"grad_norm": 4.141890525817871,
"learning_rate": 2.681704260651629e-05,
"loss": 0.3578,
"step": 109
},
{
"epoch": 0.20676691729323307,
"grad_norm": 3.9686567783355713,
"learning_rate": 2.706766917293233e-05,
"loss": 0.373,
"step": 110
},
{
"epoch": 0.20864661654135339,
"grad_norm": 3.90350341796875,
"learning_rate": 2.731829573934837e-05,
"loss": 0.3707,
"step": 111
},
{
"epoch": 0.21052631578947367,
"grad_norm": 3.921323537826538,
"learning_rate": 2.756892230576441e-05,
"loss": 0.2798,
"step": 112
},
{
"epoch": 0.212406015037594,
"grad_norm": 3.878953695297241,
"learning_rate": 2.7819548872180452e-05,
"loss": 0.3597,
"step": 113
},
{
"epoch": 0.21428571428571427,
"grad_norm": 4.601593494415283,
"learning_rate": 2.8070175438596492e-05,
"loss": 0.43,
"step": 114
},
{
"epoch": 0.2161654135338346,
"grad_norm": 3.9261109828948975,
"learning_rate": 2.832080200501253e-05,
"loss": 0.3277,
"step": 115
},
{
"epoch": 0.21804511278195488,
"grad_norm": 5.408901691436768,
"learning_rate": 2.857142857142857e-05,
"loss": 0.5529,
"step": 116
},
{
"epoch": 0.2199248120300752,
"grad_norm": 4.431450366973877,
"learning_rate": 2.882205513784461e-05,
"loss": 0.3227,
"step": 117
},
{
"epoch": 0.22180451127819548,
"grad_norm": 5.22983980178833,
"learning_rate": 2.907268170426065e-05,
"loss": 0.6035,
"step": 118
},
{
"epoch": 0.2236842105263158,
"grad_norm": 3.532494306564331,
"learning_rate": 2.9323308270676693e-05,
"loss": 0.2348,
"step": 119
},
{
"epoch": 0.22556390977443608,
"grad_norm": 5.134215354919434,
"learning_rate": 2.9573934837092732e-05,
"loss": 0.5626,
"step": 120
},
{
"epoch": 0.2274436090225564,
"grad_norm": 4.672839164733887,
"learning_rate": 2.9824561403508772e-05,
"loss": 0.3628,
"step": 121
},
{
"epoch": 0.22932330827067668,
"grad_norm": 4.489410877227783,
"learning_rate": 3.007518796992481e-05,
"loss": 0.4222,
"step": 122
},
{
"epoch": 0.231203007518797,
"grad_norm": 4.356950759887695,
"learning_rate": 3.032581453634085e-05,
"loss": 0.3231,
"step": 123
},
{
"epoch": 0.23308270676691728,
"grad_norm": 3.497793436050415,
"learning_rate": 3.0576441102756894e-05,
"loss": 0.1875,
"step": 124
},
{
"epoch": 0.2349624060150376,
"grad_norm": 3.3374269008636475,
"learning_rate": 3.082706766917293e-05,
"loss": 0.2226,
"step": 125
},
{
"epoch": 0.23684210526315788,
"grad_norm": 4.252456188201904,
"learning_rate": 3.107769423558897e-05,
"loss": 0.318,
"step": 126
},
{
"epoch": 0.2387218045112782,
"grad_norm": 4.883126735687256,
"learning_rate": 3.132832080200501e-05,
"loss": 0.4381,
"step": 127
},
{
"epoch": 0.24060150375939848,
"grad_norm": 4.2091288566589355,
"learning_rate": 3.157894736842105e-05,
"loss": 0.3985,
"step": 128
},
{
"epoch": 0.2424812030075188,
"grad_norm": 4.547649383544922,
"learning_rate": 3.182957393483709e-05,
"loss": 0.3571,
"step": 129
},
{
"epoch": 0.24436090225563908,
"grad_norm": 3.526421070098877,
"learning_rate": 3.208020050125313e-05,
"loss": 0.2185,
"step": 130
},
{
"epoch": 0.2462406015037594,
"grad_norm": 4.988466262817383,
"learning_rate": 3.233082706766917e-05,
"loss": 0.4206,
"step": 131
},
{
"epoch": 0.24812030075187969,
"grad_norm": 4.972884178161621,
"learning_rate": 3.258145363408521e-05,
"loss": 0.5639,
"step": 132
},
{
"epoch": 0.25,
"grad_norm": 5.1827898025512695,
"learning_rate": 3.2832080200501256e-05,
"loss": 0.4593,
"step": 133
},
{
"epoch": 0.2518796992481203,
"grad_norm": 4.37896203994751,
"learning_rate": 3.3082706766917295e-05,
"loss": 0.392,
"step": 134
},
{
"epoch": 0.25375939849624063,
"grad_norm": 4.721109390258789,
"learning_rate": 3.3333333333333335e-05,
"loss": 0.4681,
"step": 135
},
{
"epoch": 0.2556390977443609,
"grad_norm": 3.8587379455566406,
"learning_rate": 3.3583959899749374e-05,
"loss": 0.2313,
"step": 136
},
{
"epoch": 0.2575187969924812,
"grad_norm": 3.4314234256744385,
"learning_rate": 3.3834586466165414e-05,
"loss": 0.2191,
"step": 137
},
{
"epoch": 0.2593984962406015,
"grad_norm": 4.806185245513916,
"learning_rate": 3.4085213032581453e-05,
"loss": 0.405,
"step": 138
},
{
"epoch": 0.26127819548872183,
"grad_norm": 4.882153511047363,
"learning_rate": 3.433583959899749e-05,
"loss": 0.4579,
"step": 139
},
{
"epoch": 0.2631578947368421,
"grad_norm": 3.507141351699829,
"learning_rate": 3.458646616541353e-05,
"loss": 0.2927,
"step": 140
},
{
"epoch": 0.2650375939849624,
"grad_norm": 3.5936336517333984,
"learning_rate": 3.483709273182957e-05,
"loss": 0.2333,
"step": 141
},
{
"epoch": 0.2669172932330827,
"grad_norm": 3.201254367828369,
"learning_rate": 3.508771929824561e-05,
"loss": 0.2328,
"step": 142
},
{
"epoch": 0.26879699248120303,
"grad_norm": 3.535102605819702,
"learning_rate": 3.533834586466165e-05,
"loss": 0.1589,
"step": 143
},
{
"epoch": 0.2706766917293233,
"grad_norm": 4.333312511444092,
"learning_rate": 3.55889724310777e-05,
"loss": 0.3064,
"step": 144
},
{
"epoch": 0.2725563909774436,
"grad_norm": 3.9214117527008057,
"learning_rate": 3.583959899749374e-05,
"loss": 0.3051,
"step": 145
},
{
"epoch": 0.2744360902255639,
"grad_norm": 3.739518880844116,
"learning_rate": 3.6090225563909776e-05,
"loss": 0.2781,
"step": 146
},
{
"epoch": 0.27631578947368424,
"grad_norm": 3.70009446144104,
"learning_rate": 3.6340852130325816e-05,
"loss": 0.2371,
"step": 147
},
{
"epoch": 0.2781954887218045,
"grad_norm": 4.299279689788818,
"learning_rate": 3.6591478696741855e-05,
"loss": 0.3233,
"step": 148
},
{
"epoch": 0.2800751879699248,
"grad_norm": 3.6360323429107666,
"learning_rate": 3.6842105263157895e-05,
"loss": 0.2306,
"step": 149
},
{
"epoch": 0.2819548872180451,
"grad_norm": 3.7798750400543213,
"learning_rate": 3.7092731829573934e-05,
"loss": 0.2543,
"step": 150
},
{
"epoch": 0.2819548872180451,
"eval_global_dataset_loss": 1.1358981132507324,
"eval_global_dataset_runtime": 76.0147,
"eval_global_dataset_samples_per_second": 12.787,
"eval_global_dataset_steps_per_second": 0.053,
"eval_sequential_score": 0.9124108243709806,
"eval_sts-test-1024_pearson_cosine": 0.9000072544243531,
"eval_sts-test-1024_spearman_cosine": 0.9145547678017197,
"eval_sts-test-1280_pearson_cosine": 0.9000105142972739,
"eval_sts-test-1280_spearman_cosine": 0.9145460860570099,
"eval_sts-test-512_pearson_cosine": 0.8967922549092373,
"eval_sts-test-512_spearman_cosine": 0.9124108243709806,
"eval_sts-test-760_pearson_cosine": 0.8975479669400709,
"eval_sts-test-760_spearman_cosine": 0.9133563737729194,
"eval_sts-test_pearson_cosine": 0.9000145065459184,
"eval_sts-test_spearman_cosine": 0.9145197723153118,
"step": 150
},
{
"epoch": 0.28383458646616544,
"grad_norm": 3.044969081878662,
"learning_rate": 3.7343358395989974e-05,
"loss": 0.232,
"step": 151
},
{
"epoch": 0.2857142857142857,
"grad_norm": 3.209074020385742,
"learning_rate": 3.759398496240601e-05,
"loss": 0.2088,
"step": 152
},
{
"epoch": 0.287593984962406,
"grad_norm": 5.255861282348633,
"learning_rate": 3.784461152882205e-05,
"loss": 0.43,
"step": 153
},
{
"epoch": 0.2894736842105263,
"grad_norm": 4.211223125457764,
"learning_rate": 3.809523809523809e-05,
"loss": 0.2591,
"step": 154
},
{
"epoch": 0.29135338345864664,
"grad_norm": 4.526270866394043,
"learning_rate": 3.834586466165413e-05,
"loss": 0.374,
"step": 155
},
{
"epoch": 0.2932330827067669,
"grad_norm": 4.315747261047363,
"learning_rate": 3.859649122807018e-05,
"loss": 0.3955,
"step": 156
},
{
"epoch": 0.2951127819548872,
"grad_norm": 3.7073168754577637,
"learning_rate": 3.884711779448622e-05,
"loss": 0.2377,
"step": 157
},
{
"epoch": 0.29699248120300753,
"grad_norm": 4.333474159240723,
"learning_rate": 3.909774436090226e-05,
"loss": 0.3472,
"step": 158
},
{
"epoch": 0.29887218045112784,
"grad_norm": 3.814296007156372,
"learning_rate": 3.9348370927318297e-05,
"loss": 0.2649,
"step": 159
},
{
"epoch": 0.3007518796992481,
"grad_norm": 4.728816509246826,
"learning_rate": 3.9598997493734336e-05,
"loss": 0.3457,
"step": 160
},
{
"epoch": 0.3026315789473684,
"grad_norm": 4.436241149902344,
"learning_rate": 3.9849624060150376e-05,
"loss": 0.3089,
"step": 161
},
{
"epoch": 0.30451127819548873,
"grad_norm": 4.423499584197998,
"learning_rate": 4.0100250626566415e-05,
"loss": 0.301,
"step": 162
},
{
"epoch": 0.30639097744360905,
"grad_norm": 4.738509654998779,
"learning_rate": 4.0350877192982455e-05,
"loss": 0.3386,
"step": 163
},
{
"epoch": 0.3082706766917293,
"grad_norm": 5.178467273712158,
"learning_rate": 4.0601503759398494e-05,
"loss": 0.458,
"step": 164
},
{
"epoch": 0.3101503759398496,
"grad_norm": 4.101895332336426,
"learning_rate": 4.0852130325814534e-05,
"loss": 0.3676,
"step": 165
},
{
"epoch": 0.31203007518796994,
"grad_norm": 4.933971881866455,
"learning_rate": 4.110275689223057e-05,
"loss": 0.5165,
"step": 166
},
{
"epoch": 0.31390977443609025,
"grad_norm": 3.5641555786132812,
"learning_rate": 4.135338345864662e-05,
"loss": 0.2245,
"step": 167
},
{
"epoch": 0.3157894736842105,
"grad_norm": 3.8185558319091797,
"learning_rate": 4.160401002506266e-05,
"loss": 0.2643,
"step": 168
},
{
"epoch": 0.3176691729323308,
"grad_norm": 5.329522132873535,
"learning_rate": 4.18546365914787e-05,
"loss": 0.4889,
"step": 169
},
{
"epoch": 0.31954887218045114,
"grad_norm": 3.6215882301330566,
"learning_rate": 4.210526315789474e-05,
"loss": 0.2034,
"step": 170
},
{
"epoch": 0.32142857142857145,
"grad_norm": 4.935929298400879,
"learning_rate": 4.235588972431078e-05,
"loss": 0.4686,
"step": 171
},
{
"epoch": 0.3233082706766917,
"grad_norm": 4.228303909301758,
"learning_rate": 4.260651629072682e-05,
"loss": 0.2751,
"step": 172
},
{
"epoch": 0.325187969924812,
"grad_norm": 4.45400857925415,
"learning_rate": 4.2857142857142856e-05,
"loss": 0.3089,
"step": 173
},
{
"epoch": 0.32706766917293234,
"grad_norm": 3.3213822841644287,
"learning_rate": 4.3107769423558896e-05,
"loss": 0.2034,
"step": 174
},
{
"epoch": 0.32894736842105265,
"grad_norm": 4.85194206237793,
"learning_rate": 4.3358395989974935e-05,
"loss": 0.4197,
"step": 175
},
{
"epoch": 0.3308270676691729,
"grad_norm": 4.171342372894287,
"learning_rate": 4.3609022556390975e-05,
"loss": 0.2756,
"step": 176
},
{
"epoch": 0.33270676691729323,
"grad_norm": 4.365931987762451,
"learning_rate": 4.3859649122807014e-05,
"loss": 0.2734,
"step": 177
},
{
"epoch": 0.33458646616541354,
"grad_norm": 2.6044809818267822,
"learning_rate": 4.411027568922306e-05,
"loss": 0.169,
"step": 178
},
{
"epoch": 0.33646616541353386,
"grad_norm": 3.5510780811309814,
"learning_rate": 4.43609022556391e-05,
"loss": 0.2378,
"step": 179
},
{
"epoch": 0.3383458646616541,
"grad_norm": 3.4351441860198975,
"learning_rate": 4.461152882205514e-05,
"loss": 0.207,
"step": 180
},
{
"epoch": 0.34022556390977443,
"grad_norm": 2.6987929344177246,
"learning_rate": 4.486215538847118e-05,
"loss": 0.1922,
"step": 181
},
{
"epoch": 0.34210526315789475,
"grad_norm": 4.182808876037598,
"learning_rate": 4.511278195488722e-05,
"loss": 0.2401,
"step": 182
},
{
"epoch": 0.34398496240601506,
"grad_norm": 3.7199277877807617,
"learning_rate": 4.536340852130326e-05,
"loss": 0.2093,
"step": 183
},
{
"epoch": 0.3458646616541353,
"grad_norm": 3.071550130844116,
"learning_rate": 4.56140350877193e-05,
"loss": 0.1656,
"step": 184
},
{
"epoch": 0.34774436090225563,
"grad_norm": 4.635529041290283,
"learning_rate": 4.586466165413534e-05,
"loss": 0.3097,
"step": 185
},
{
"epoch": 0.34962406015037595,
"grad_norm": 3.997650146484375,
"learning_rate": 4.6115288220551377e-05,
"loss": 0.2157,
"step": 186
},
{
"epoch": 0.35150375939849626,
"grad_norm": 3.3299057483673096,
"learning_rate": 4.6365914786967416e-05,
"loss": 0.2462,
"step": 187
},
{
"epoch": 0.3533834586466165,
"grad_norm": 2.1565983295440674,
"learning_rate": 4.6616541353383456e-05,
"loss": 0.1129,
"step": 188
},
{
"epoch": 0.35526315789473684,
"grad_norm": 3.879913806915283,
"learning_rate": 4.6867167919799495e-05,
"loss": 0.2231,
"step": 189
},
{
"epoch": 0.35714285714285715,
"grad_norm": 3.759981870651245,
"learning_rate": 4.711779448621554e-05,
"loss": 0.2683,
"step": 190
},
{
"epoch": 0.35902255639097747,
"grad_norm": 1.0419526100158691,
"learning_rate": 4.736842105263158e-05,
"loss": 0.0246,
"step": 191
},
{
"epoch": 0.3609022556390977,
"grad_norm": 3.446683645248413,
"learning_rate": 4.761904761904762e-05,
"loss": 0.27,
"step": 192
},
{
"epoch": 0.36278195488721804,
"grad_norm": 4.075059413909912,
"learning_rate": 4.786967418546366e-05,
"loss": 0.3308,
"step": 193
},
{
"epoch": 0.36466165413533835,
"grad_norm": 4.341902732849121,
"learning_rate": 4.81203007518797e-05,
"loss": 0.28,
"step": 194
},
{
"epoch": 0.36654135338345867,
"grad_norm": 4.575509548187256,
"learning_rate": 4.837092731829574e-05,
"loss": 0.3338,
"step": 195
},
{
"epoch": 0.3684210526315789,
"grad_norm": 3.01415753364563,
"learning_rate": 4.862155388471178e-05,
"loss": 0.1966,
"step": 196
},
{
"epoch": 0.37030075187969924,
"grad_norm": 3.4799773693084717,
"learning_rate": 4.887218045112782e-05,
"loss": 0.1798,
"step": 197
},
{
"epoch": 0.37218045112781956,
"grad_norm": 4.396617889404297,
"learning_rate": 4.912280701754386e-05,
"loss": 0.2979,
"step": 198
},
{
"epoch": 0.37406015037593987,
"grad_norm": 4.624708652496338,
"learning_rate": 4.93734335839599e-05,
"loss": 0.3221,
"step": 199
},
{
"epoch": 0.37593984962406013,
"grad_norm": 5.769001483917236,
"learning_rate": 4.9624060150375936e-05,
"loss": 0.6034,
"step": 200
},
{
"epoch": 0.37593984962406013,
"eval_global_dataset_loss": 1.083878517150879,
"eval_global_dataset_runtime": 75.8699,
"eval_global_dataset_samples_per_second": 12.811,
"eval_global_dataset_steps_per_second": 0.053,
"eval_sequential_score": 0.9137803472233416,
"eval_sts-test-1024_pearson_cosine": 0.9014369154728474,
"eval_sts-test-1024_spearman_cosine": 0.9158642792125549,
"eval_sts-test-1280_pearson_cosine": 0.9014376697226278,
"eval_sts-test-1280_spearman_cosine": 0.9158756460329482,
"eval_sts-test-512_pearson_cosine": 0.8989734750714087,
"eval_sts-test-512_spearman_cosine": 0.9137803472233416,
"eval_sts-test-760_pearson_cosine": 0.899035552252563,
"eval_sts-test-760_spearman_cosine": 0.9145744583567318,
"eval_sts-test_pearson_cosine": 0.90143853711727,
"eval_sts-test_spearman_cosine": 0.9158528228896387,
"step": 200
},
{
"epoch": 0.37781954887218044,
"grad_norm": 3.755136489868164,
"learning_rate": 4.987468671679198e-05,
"loss": 0.2707,
"step": 201
},
{
"epoch": 0.37969924812030076,
"grad_norm": 4.266496181488037,
"learning_rate": 5.012531328320802e-05,
"loss": 0.288,
"step": 202
},
{
"epoch": 0.3815789473684211,
"grad_norm": 4.073738098144531,
"learning_rate": 5.037593984962407e-05,
"loss": 0.2101,
"step": 203
},
{
"epoch": 0.38345864661654133,
"grad_norm": 5.029702186584473,
"learning_rate": 5.06265664160401e-05,
"loss": 0.4055,
"step": 204
},
{
"epoch": 0.38533834586466165,
"grad_norm": 4.128860950469971,
"learning_rate": 5.087719298245615e-05,
"loss": 0.3662,
"step": 205
},
{
"epoch": 0.38721804511278196,
"grad_norm": 4.01772928237915,
"learning_rate": 5.112781954887218e-05,
"loss": 0.2623,
"step": 206
},
{
"epoch": 0.3890977443609023,
"grad_norm": 3.6195261478424072,
"learning_rate": 5.1378446115288226e-05,
"loss": 0.1804,
"step": 207
},
{
"epoch": 0.39097744360902253,
"grad_norm": 3.148761034011841,
"learning_rate": 5.162907268170426e-05,
"loss": 0.21,
"step": 208
},
{
"epoch": 0.39285714285714285,
"grad_norm": 5.000106334686279,
"learning_rate": 5.1879699248120305e-05,
"loss": 0.5188,
"step": 209
},
{
"epoch": 0.39473684210526316,
"grad_norm": 4.500979900360107,
"learning_rate": 5.213032581453634e-05,
"loss": 0.2961,
"step": 210
},
{
"epoch": 0.3966165413533835,
"grad_norm": 3.465824604034424,
"learning_rate": 5.2380952380952384e-05,
"loss": 0.212,
"step": 211
},
{
"epoch": 0.39849624060150374,
"grad_norm": 3.279611587524414,
"learning_rate": 5.2631578947368424e-05,
"loss": 0.2593,
"step": 212
},
{
"epoch": 0.40037593984962405,
"grad_norm": 4.062462329864502,
"learning_rate": 5.2882205513784463e-05,
"loss": 0.2851,
"step": 213
},
{
"epoch": 0.40225563909774437,
"grad_norm": 3.7090327739715576,
"learning_rate": 5.31328320802005e-05,
"loss": 0.21,
"step": 214
},
{
"epoch": 0.4041353383458647,
"grad_norm": 3.333369493484497,
"learning_rate": 5.338345864661655e-05,
"loss": 0.206,
"step": 215
},
{
"epoch": 0.40601503759398494,
"grad_norm": 4.723068714141846,
"learning_rate": 5.363408521303258e-05,
"loss": 0.4391,
"step": 216
},
{
"epoch": 0.40789473684210525,
"grad_norm": 3.584779977798462,
"learning_rate": 5.388471177944863e-05,
"loss": 0.2652,
"step": 217
},
{
"epoch": 0.40977443609022557,
"grad_norm": 1.9154776334762573,
"learning_rate": 5.413533834586466e-05,
"loss": 0.073,
"step": 218
},
{
"epoch": 0.4116541353383459,
"grad_norm": 4.774673938751221,
"learning_rate": 5.438596491228071e-05,
"loss": 0.4636,
"step": 219
},
{
"epoch": 0.41353383458646614,
"grad_norm": 4.749974727630615,
"learning_rate": 5.463659147869674e-05,
"loss": 0.4002,
"step": 220
},
{
"epoch": 0.41541353383458646,
"grad_norm": 4.607359409332275,
"learning_rate": 5.4887218045112786e-05,
"loss": 0.3869,
"step": 221
},
{
"epoch": 0.41729323308270677,
"grad_norm": 3.5087730884552,
"learning_rate": 5.513784461152882e-05,
"loss": 0.2313,
"step": 222
},
{
"epoch": 0.4191729323308271,
"grad_norm": 3.191532611846924,
"learning_rate": 5.5388471177944865e-05,
"loss": 0.177,
"step": 223
},
{
"epoch": 0.42105263157894735,
"grad_norm": 2.9819788932800293,
"learning_rate": 5.5639097744360905e-05,
"loss": 0.2246,
"step": 224
},
{
"epoch": 0.42293233082706766,
"grad_norm": 3.826098918914795,
"learning_rate": 5.5889724310776944e-05,
"loss": 0.2082,
"step": 225
},
{
"epoch": 0.424812030075188,
"grad_norm": 4.612794399261475,
"learning_rate": 5.6140350877192984e-05,
"loss": 0.3497,
"step": 226
},
{
"epoch": 0.4266917293233083,
"grad_norm": 2.861586093902588,
"learning_rate": 5.639097744360903e-05,
"loss": 0.1367,
"step": 227
},
{
"epoch": 0.42857142857142855,
"grad_norm": 3.5423388481140137,
"learning_rate": 5.664160401002506e-05,
"loss": 0.2292,
"step": 228
},
{
"epoch": 0.43045112781954886,
"grad_norm": 2.9980733394622803,
"learning_rate": 5.689223057644111e-05,
"loss": 0.1934,
"step": 229
},
{
"epoch": 0.4323308270676692,
"grad_norm": 3.1661713123321533,
"learning_rate": 5.714285714285714e-05,
"loss": 0.1817,
"step": 230
},
{
"epoch": 0.4342105263157895,
"grad_norm": 3.6269607543945312,
"learning_rate": 5.739348370927319e-05,
"loss": 0.2364,
"step": 231
},
{
"epoch": 0.43609022556390975,
"grad_norm": 2.7368719577789307,
"learning_rate": 5.764411027568922e-05,
"loss": 0.1361,
"step": 232
},
{
"epoch": 0.43796992481203006,
"grad_norm": 3.269443988800049,
"learning_rate": 5.789473684210527e-05,
"loss": 0.2478,
"step": 233
},
{
"epoch": 0.4398496240601504,
"grad_norm": 4.16945743560791,
"learning_rate": 5.81453634085213e-05,
"loss": 0.3088,
"step": 234
},
{
"epoch": 0.4417293233082707,
"grad_norm": 3.336332082748413,
"learning_rate": 5.8395989974937346e-05,
"loss": 0.2762,
"step": 235
},
{
"epoch": 0.44360902255639095,
"grad_norm": 2.378638744354248,
"learning_rate": 5.8646616541353386e-05,
"loss": 0.1596,
"step": 236
},
{
"epoch": 0.44548872180451127,
"grad_norm": 4.7622270584106445,
"learning_rate": 5.889724310776943e-05,
"loss": 0.4028,
"step": 237
},
{
"epoch": 0.4473684210526316,
"grad_norm": 3.4411444664001465,
"learning_rate": 5.9147869674185465e-05,
"loss": 0.2385,
"step": 238
},
{
"epoch": 0.4492481203007519,
"grad_norm": 2.6086032390594482,
"learning_rate": 5.939849624060151e-05,
"loss": 0.1096,
"step": 239
},
{
"epoch": 0.45112781954887216,
"grad_norm": 3.7840077877044678,
"learning_rate": 5.9649122807017544e-05,
"loss": 0.2783,
"step": 240
},
{
"epoch": 0.45300751879699247,
"grad_norm": 3.639894962310791,
"learning_rate": 5.989974937343359e-05,
"loss": 0.2536,
"step": 241
},
{
"epoch": 0.4548872180451128,
"grad_norm": 3.169121026992798,
"learning_rate": 6.015037593984962e-05,
"loss": 0.132,
"step": 242
},
{
"epoch": 0.4567669172932331,
"grad_norm": 3.416808843612671,
"learning_rate": 6.040100250626567e-05,
"loss": 0.1748,
"step": 243
},
{
"epoch": 0.45864661654135336,
"grad_norm": 2.435342311859131,
"learning_rate": 6.06516290726817e-05,
"loss": 0.0997,
"step": 244
},
{
"epoch": 0.4605263157894737,
"grad_norm": 3.853893995285034,
"learning_rate": 6.090225563909775e-05,
"loss": 0.2786,
"step": 245
},
{
"epoch": 0.462406015037594,
"grad_norm": 3.432488441467285,
"learning_rate": 6.115288220551379e-05,
"loss": 0.2071,
"step": 246
},
{
"epoch": 0.4642857142857143,
"grad_norm": 3.495830774307251,
"learning_rate": 6.140350877192983e-05,
"loss": 0.1845,
"step": 247
},
{
"epoch": 0.46616541353383456,
"grad_norm": 2.5177180767059326,
"learning_rate": 6.165413533834587e-05,
"loss": 0.1302,
"step": 248
},
{
"epoch": 0.4680451127819549,
"grad_norm": 3.5735349655151367,
"learning_rate": 6.19047619047619e-05,
"loss": 0.3023,
"step": 249
},
{
"epoch": 0.4699248120300752,
"grad_norm": 3.233510732650757,
"learning_rate": 6.215538847117795e-05,
"loss": 0.1952,
"step": 250
},
{
"epoch": 0.4699248120300752,
"eval_global_dataset_loss": 1.0789817571640015,
"eval_global_dataset_runtime": 74.4897,
"eval_global_dataset_samples_per_second": 13.049,
"eval_global_dataset_steps_per_second": 0.054,
"eval_sequential_score": 0.913595614016318,
"eval_sts-test-1024_pearson_cosine": 0.9008086953101686,
"eval_sts-test-1024_spearman_cosine": 0.9153775197424008,
"eval_sts-test-1280_pearson_cosine": 0.9008071588604939,
"eval_sts-test-1280_spearman_cosine": 0.9153737158851823,
"eval_sts-test-512_pearson_cosine": 0.8989954925060826,
"eval_sts-test-512_spearman_cosine": 0.913595614016318,
"eval_sts-test-760_pearson_cosine": 0.8984529046187754,
"eval_sts-test-760_spearman_cosine": 0.9140583868103663,
"eval_sts-test_pearson_cosine": 0.9008049969790113,
"eval_sts-test_spearman_cosine": 0.9153696435203957,
"step": 250
},
{
"epoch": 0.4718045112781955,
"grad_norm": 3.872725486755371,
"learning_rate": 6.240601503759398e-05,
"loss": 0.2147,
"step": 251
},
{
"epoch": 0.47368421052631576,
"grad_norm": 3.6091225147247314,
"learning_rate": 6.265664160401002e-05,
"loss": 0.2907,
"step": 252
},
{
"epoch": 0.4755639097744361,
"grad_norm": 3.6047286987304688,
"learning_rate": 6.290726817042606e-05,
"loss": 0.204,
"step": 253
},
{
"epoch": 0.4774436090225564,
"grad_norm": 4.709647178649902,
"learning_rate": 6.31578947368421e-05,
"loss": 0.2603,
"step": 254
},
{
"epoch": 0.4793233082706767,
"grad_norm": 4.329948902130127,
"learning_rate": 6.340852130325816e-05,
"loss": 0.2308,
"step": 255
},
{
"epoch": 0.48120300751879697,
"grad_norm": 3.181044816970825,
"learning_rate": 6.365914786967418e-05,
"loss": 0.173,
"step": 256
},
{
"epoch": 0.4830827067669173,
"grad_norm": 3.8401877880096436,
"learning_rate": 6.390977443609024e-05,
"loss": 0.2796,
"step": 257
},
{
"epoch": 0.4849624060150376,
"grad_norm": 2.901517629623413,
"learning_rate": 6.416040100250626e-05,
"loss": 0.1085,
"step": 258
},
{
"epoch": 0.4868421052631579,
"grad_norm": 4.388332843780518,
"learning_rate": 6.441102756892231e-05,
"loss": 0.2431,
"step": 259
},
{
"epoch": 0.48872180451127817,
"grad_norm": 4.373783588409424,
"learning_rate": 6.466165413533834e-05,
"loss": 0.2521,
"step": 260
},
{
"epoch": 0.4906015037593985,
"grad_norm": 4.299677848815918,
"learning_rate": 6.49122807017544e-05,
"loss": 0.3279,
"step": 261
},
{
"epoch": 0.4924812030075188,
"grad_norm": 5.152429580688477,
"learning_rate": 6.516290726817042e-05,
"loss": 0.3679,
"step": 262
},
{
"epoch": 0.4943609022556391,
"grad_norm": 3.118044853210449,
"learning_rate": 6.541353383458647e-05,
"loss": 0.1284,
"step": 263
},
{
"epoch": 0.49624060150375937,
"grad_norm": 5.0754241943359375,
"learning_rate": 6.566416040100251e-05,
"loss": 0.3286,
"step": 264
},
{
"epoch": 0.4981203007518797,
"grad_norm": 5.476406097412109,
"learning_rate": 6.591478696741855e-05,
"loss": 0.3751,
"step": 265
},
{
"epoch": 0.5,
"grad_norm": 4.401240348815918,
"learning_rate": 6.616541353383459e-05,
"loss": 0.3392,
"step": 266
},
{
"epoch": 0.5018796992481203,
"grad_norm": 3.763532876968384,
"learning_rate": 6.641604010025063e-05,
"loss": 0.1515,
"step": 267
},
{
"epoch": 0.5037593984962406,
"grad_norm": 4.5540595054626465,
"learning_rate": 6.666666666666667e-05,
"loss": 0.2974,
"step": 268
},
{
"epoch": 0.5056390977443609,
"grad_norm": 3.9668686389923096,
"learning_rate": 6.691729323308271e-05,
"loss": 0.2106,
"step": 269
},
{
"epoch": 0.5075187969924813,
"grad_norm": 3.220064878463745,
"learning_rate": 6.716791979949875e-05,
"loss": 0.1307,
"step": 270
},
{
"epoch": 0.5093984962406015,
"grad_norm": 5.087316513061523,
"learning_rate": 6.741854636591479e-05,
"loss": 0.3075,
"step": 271
},
{
"epoch": 0.5112781954887218,
"grad_norm": 4.474050998687744,
"learning_rate": 6.766917293233083e-05,
"loss": 0.3512,
"step": 272
},
{
"epoch": 0.5131578947368421,
"grad_norm": 3.038557767868042,
"learning_rate": 6.791979949874687e-05,
"loss": 0.1349,
"step": 273
},
{
"epoch": 0.5150375939849624,
"grad_norm": 3.3275628089904785,
"learning_rate": 6.817042606516291e-05,
"loss": 0.1833,
"step": 274
},
{
"epoch": 0.5169172932330827,
"grad_norm": 4.29977560043335,
"learning_rate": 6.842105263157895e-05,
"loss": 0.2363,
"step": 275
},
{
"epoch": 0.518796992481203,
"grad_norm": 4.888143062591553,
"learning_rate": 6.867167919799499e-05,
"loss": 0.3437,
"step": 276
},
{
"epoch": 0.5206766917293233,
"grad_norm": 3.9522321224212646,
"learning_rate": 6.892230576441104e-05,
"loss": 0.2152,
"step": 277
},
{
"epoch": 0.5225563909774437,
"grad_norm": 4.254467010498047,
"learning_rate": 6.917293233082706e-05,
"loss": 0.2306,
"step": 278
},
{
"epoch": 0.5244360902255639,
"grad_norm": 3.122506856918335,
"learning_rate": 6.942355889724312e-05,
"loss": 0.1523,
"step": 279
},
{
"epoch": 0.5263157894736842,
"grad_norm": 3.234250068664551,
"learning_rate": 6.967418546365914e-05,
"loss": 0.2025,
"step": 280
},
{
"epoch": 0.5281954887218046,
"grad_norm": 3.9348092079162598,
"learning_rate": 6.99248120300752e-05,
"loss": 0.2563,
"step": 281
},
{
"epoch": 0.5300751879699248,
"grad_norm": 3.6671388149261475,
"learning_rate": 7.017543859649122e-05,
"loss": 0.1861,
"step": 282
},
{
"epoch": 0.5319548872180451,
"grad_norm": 3.2786614894866943,
"learning_rate": 7.042606516290728e-05,
"loss": 0.1602,
"step": 283
},
{
"epoch": 0.5338345864661654,
"grad_norm": 3.0137619972229004,
"learning_rate": 7.06766917293233e-05,
"loss": 0.2251,
"step": 284
},
{
"epoch": 0.5357142857142857,
"grad_norm": 4.028774261474609,
"learning_rate": 7.092731829573935e-05,
"loss": 0.2004,
"step": 285
},
{
"epoch": 0.5375939849624061,
"grad_norm": 3.455634355545044,
"learning_rate": 7.11779448621554e-05,
"loss": 0.2024,
"step": 286
},
{
"epoch": 0.5394736842105263,
"grad_norm": 3.2219948768615723,
"learning_rate": 7.142857142857143e-05,
"loss": 0.1639,
"step": 287
},
{
"epoch": 0.5413533834586466,
"grad_norm": 3.3190667629241943,
"learning_rate": 7.167919799498747e-05,
"loss": 0.205,
"step": 288
},
{
"epoch": 0.543233082706767,
"grad_norm": 3.935049295425415,
"learning_rate": 7.192982456140351e-05,
"loss": 0.2216,
"step": 289
},
{
"epoch": 0.5451127819548872,
"grad_norm": 3.7380313873291016,
"learning_rate": 7.218045112781955e-05,
"loss": 0.2815,
"step": 290
},
{
"epoch": 0.5469924812030075,
"grad_norm": 4.446345806121826,
"learning_rate": 7.243107769423559e-05,
"loss": 0.2416,
"step": 291
},
{
"epoch": 0.5488721804511278,
"grad_norm": 3.7456045150756836,
"learning_rate": 7.268170426065163e-05,
"loss": 0.3183,
"step": 292
},
{
"epoch": 0.5507518796992481,
"grad_norm": 5.020063400268555,
"learning_rate": 7.293233082706767e-05,
"loss": 0.3881,
"step": 293
},
{
"epoch": 0.5526315789473685,
"grad_norm": 2.9395172595977783,
"learning_rate": 7.318295739348371e-05,
"loss": 0.1166,
"step": 294
},
{
"epoch": 0.5545112781954887,
"grad_norm": 3.341320276260376,
"learning_rate": 7.343358395989975e-05,
"loss": 0.1939,
"step": 295
},
{
"epoch": 0.556390977443609,
"grad_norm": 2.8824589252471924,
"learning_rate": 7.368421052631579e-05,
"loss": 0.1113,
"step": 296
},
{
"epoch": 0.5582706766917294,
"grad_norm": 3.57820463180542,
"learning_rate": 7.393483709273183e-05,
"loss": 0.2423,
"step": 297
},
{
"epoch": 0.5601503759398496,
"grad_norm": 3.81009578704834,
"learning_rate": 7.418546365914787e-05,
"loss": 0.2569,
"step": 298
},
{
"epoch": 0.5620300751879699,
"grad_norm": 4.205862998962402,
"learning_rate": 7.443609022556392e-05,
"loss": 0.3817,
"step": 299
},
{
"epoch": 0.5639097744360902,
"grad_norm": 3.4671545028686523,
"learning_rate": 7.468671679197995e-05,
"loss": 0.1794,
"step": 300
},
{
"epoch": 0.5639097744360902,
"eval_global_dataset_loss": 1.0346671342849731,
"eval_global_dataset_runtime": 75.5018,
"eval_global_dataset_samples_per_second": 12.874,
"eval_global_dataset_steps_per_second": 0.053,
"eval_sequential_score": 0.9143511495623889,
"eval_sts-test-1024_pearson_cosine": 0.9000323597244693,
"eval_sts-test-1024_spearman_cosine": 0.9162368782148997,
"eval_sts-test-1280_pearson_cosine": 0.9000281200118452,
"eval_sts-test-1280_spearman_cosine": 0.9162393842855376,
"eval_sts-test-512_pearson_cosine": 0.8981734518979707,
"eval_sts-test-512_spearman_cosine": 0.9143511495623889,
"eval_sts-test-760_pearson_cosine": 0.8975195986295481,
"eval_sts-test-760_spearman_cosine": 0.9148535719740302,
"eval_sts-test_pearson_cosine": 0.9000245395073697,
"eval_sts-test_spearman_cosine": 0.9162469024974514,
"step": 300
},
{
"epoch": 0.5657894736842105,
"grad_norm": 3.4575231075286865,
"learning_rate": 7.4937343358396e-05,
"loss": 0.207,
"step": 301
},
{
"epoch": 0.5676691729323309,
"grad_norm": 4.194314956665039,
"learning_rate": 7.518796992481203e-05,
"loss": 0.28,
"step": 302
},
{
"epoch": 0.5695488721804511,
"grad_norm": 3.3316540718078613,
"learning_rate": 7.543859649122808e-05,
"loss": 0.2256,
"step": 303
},
{
"epoch": 0.5714285714285714,
"grad_norm": 2.6786365509033203,
"learning_rate": 7.56892230576441e-05,
"loss": 0.1659,
"step": 304
},
{
"epoch": 0.5733082706766918,
"grad_norm": 3.0687808990478516,
"learning_rate": 7.593984962406016e-05,
"loss": 0.1587,
"step": 305
},
{
"epoch": 0.575187969924812,
"grad_norm": 5.067666530609131,
"learning_rate": 7.619047619047618e-05,
"loss": 0.4479,
"step": 306
},
{
"epoch": 0.5770676691729323,
"grad_norm": 2.9828665256500244,
"learning_rate": 7.644110275689224e-05,
"loss": 0.1649,
"step": 307
},
{
"epoch": 0.5789473684210527,
"grad_norm": 4.292194366455078,
"learning_rate": 7.669172932330826e-05,
"loss": 0.402,
"step": 308
},
{
"epoch": 0.5808270676691729,
"grad_norm": 4.088753700256348,
"learning_rate": 7.694235588972432e-05,
"loss": 0.3003,
"step": 309
},
{
"epoch": 0.5827067669172933,
"grad_norm": 3.17425799369812,
"learning_rate": 7.719298245614036e-05,
"loss": 0.1697,
"step": 310
},
{
"epoch": 0.5845864661654135,
"grad_norm": 3.302765130996704,
"learning_rate": 7.74436090225564e-05,
"loss": 0.1789,
"step": 311
},
{
"epoch": 0.5864661654135338,
"grad_norm": 3.9758617877960205,
"learning_rate": 7.769423558897244e-05,
"loss": 0.3012,
"step": 312
},
{
"epoch": 0.5883458646616542,
"grad_norm": 3.0176424980163574,
"learning_rate": 7.794486215538847e-05,
"loss": 0.1306,
"step": 313
},
{
"epoch": 0.5902255639097744,
"grad_norm": 3.656402349472046,
"learning_rate": 7.819548872180451e-05,
"loss": 0.2429,
"step": 314
},
{
"epoch": 0.5921052631578947,
"grad_norm": 3.609954833984375,
"learning_rate": 7.844611528822055e-05,
"loss": 0.2456,
"step": 315
},
{
"epoch": 0.5939849624060151,
"grad_norm": 4.076263427734375,
"learning_rate": 7.869674185463659e-05,
"loss": 0.2612,
"step": 316
},
{
"epoch": 0.5958646616541353,
"grad_norm": 2.1173534393310547,
"learning_rate": 7.894736842105263e-05,
"loss": 0.071,
"step": 317
},
{
"epoch": 0.5977443609022557,
"grad_norm": 2.5280675888061523,
"learning_rate": 7.919799498746867e-05,
"loss": 0.1342,
"step": 318
},
{
"epoch": 0.599624060150376,
"grad_norm": 2.712512254714966,
"learning_rate": 7.944862155388471e-05,
"loss": 0.1107,
"step": 319
},
{
"epoch": 0.6015037593984962,
"grad_norm": 3.069580316543579,
"learning_rate": 7.969924812030075e-05,
"loss": 0.1375,
"step": 320
},
{
"epoch": 0.6033834586466166,
"grad_norm": 3.0585827827453613,
"learning_rate": 7.994987468671679e-05,
"loss": 0.1394,
"step": 321
},
{
"epoch": 0.6052631578947368,
"grad_norm": 3.4839272499084473,
"learning_rate": 8.020050125313283e-05,
"loss": 0.2689,
"step": 322
},
{
"epoch": 0.6071428571428571,
"grad_norm": 3.538287401199341,
"learning_rate": 8.045112781954888e-05,
"loss": 0.2019,
"step": 323
},
{
"epoch": 0.6090225563909775,
"grad_norm": 4.231574535369873,
"learning_rate": 8.070175438596491e-05,
"loss": 0.247,
"step": 324
},
{
"epoch": 0.6109022556390977,
"grad_norm": 2.747537612915039,
"learning_rate": 8.095238095238096e-05,
"loss": 0.0957,
"step": 325
},
{
"epoch": 0.6127819548872181,
"grad_norm": 3.517162561416626,
"learning_rate": 8.120300751879699e-05,
"loss": 0.2257,
"step": 326
},
{
"epoch": 0.6146616541353384,
"grad_norm": 3.922318458557129,
"learning_rate": 8.145363408521304e-05,
"loss": 0.2134,
"step": 327
},
{
"epoch": 0.6165413533834586,
"grad_norm": 3.7342982292175293,
"learning_rate": 8.170426065162907e-05,
"loss": 0.2157,
"step": 328
},
{
"epoch": 0.618421052631579,
"grad_norm": 3.6022825241088867,
"learning_rate": 8.195488721804512e-05,
"loss": 0.2729,
"step": 329
},
{
"epoch": 0.6203007518796992,
"grad_norm": 3.2551891803741455,
"learning_rate": 8.220551378446115e-05,
"loss": 0.1582,
"step": 330
},
{
"epoch": 0.6221804511278195,
"grad_norm": 2.9833812713623047,
"learning_rate": 8.24561403508772e-05,
"loss": 0.1599,
"step": 331
},
{
"epoch": 0.6240601503759399,
"grad_norm": 4.321685791015625,
"learning_rate": 8.270676691729324e-05,
"loss": 0.216,
"step": 332
},
{
"epoch": 0.6259398496240601,
"grad_norm": 3.155758857727051,
"learning_rate": 8.295739348370928e-05,
"loss": 0.1367,
"step": 333
},
{
"epoch": 0.6278195488721805,
"grad_norm": 4.673583984375,
"learning_rate": 8.320802005012532e-05,
"loss": 0.2675,
"step": 334
},
{
"epoch": 0.6296992481203008,
"grad_norm": 4.123340129852295,
"learning_rate": 8.345864661654136e-05,
"loss": 0.3074,
"step": 335
},
{
"epoch": 0.631578947368421,
"grad_norm": 3.709581136703491,
"learning_rate": 8.37092731829574e-05,
"loss": 0.1689,
"step": 336
},
{
"epoch": 0.6334586466165414,
"grad_norm": 4.334682941436768,
"learning_rate": 8.395989974937344e-05,
"loss": 0.2549,
"step": 337
},
{
"epoch": 0.6353383458646616,
"grad_norm": 2.5441734790802,
"learning_rate": 8.421052631578948e-05,
"loss": 0.1448,
"step": 338
},
{
"epoch": 0.6372180451127819,
"grad_norm": 3.9318222999572754,
"learning_rate": 8.446115288220552e-05,
"loss": 0.2533,
"step": 339
},
{
"epoch": 0.6390977443609023,
"grad_norm": 4.122605323791504,
"learning_rate": 8.471177944862155e-05,
"loss": 0.3232,
"step": 340
},
{
"epoch": 0.6409774436090225,
"grad_norm": 3.933474540710449,
"learning_rate": 8.49624060150376e-05,
"loss": 0.1825,
"step": 341
},
{
"epoch": 0.6428571428571429,
"grad_norm": 4.069019317626953,
"learning_rate": 8.521303258145363e-05,
"loss": 0.2873,
"step": 342
},
{
"epoch": 0.6447368421052632,
"grad_norm": 4.113502025604248,
"learning_rate": 8.546365914786967e-05,
"loss": 0.2546,
"step": 343
},
{
"epoch": 0.6466165413533834,
"grad_norm": 3.7378008365631104,
"learning_rate": 8.571428571428571e-05,
"loss": 0.2048,
"step": 344
},
{
"epoch": 0.6484962406015038,
"grad_norm": 4.110744476318359,
"learning_rate": 8.596491228070177e-05,
"loss": 0.2674,
"step": 345
},
{
"epoch": 0.650375939849624,
"grad_norm": 3.0982956886291504,
"learning_rate": 8.621553884711779e-05,
"loss": 0.1629,
"step": 346
},
{
"epoch": 0.6522556390977443,
"grad_norm": 2.9119224548339844,
"learning_rate": 8.646616541353384e-05,
"loss": 0.1747,
"step": 347
},
{
"epoch": 0.6541353383458647,
"grad_norm": 3.0742011070251465,
"learning_rate": 8.671679197994987e-05,
"loss": 0.1784,
"step": 348
},
{
"epoch": 0.6560150375939849,
"grad_norm": 3.249251127243042,
"learning_rate": 8.696741854636592e-05,
"loss": 0.2269,
"step": 349
},
{
"epoch": 0.6578947368421053,
"grad_norm": 4.456097602844238,
"learning_rate": 8.721804511278195e-05,
"loss": 0.4473,
"step": 350
},
{
"epoch": 0.6578947368421053,
"eval_global_dataset_loss": 1.0552350282669067,
"eval_global_dataset_runtime": 74.8399,
"eval_global_dataset_samples_per_second": 12.988,
"eval_global_dataset_steps_per_second": 0.053,
"eval_sequential_score": 0.9164483279249741,
"eval_sts-test-1024_pearson_cosine": 0.9032664609316077,
"eval_sts-test-1024_spearman_cosine": 0.9181316466222116,
"eval_sts-test-1280_pearson_cosine": 0.903261190428662,
"eval_sts-test-1280_spearman_cosine": 0.9181101212654824,
"eval_sts-test-512_pearson_cosine": 0.9002490696708711,
"eval_sts-test-512_spearman_cosine": 0.9164483279249741,
"eval_sts-test-760_pearson_cosine": 0.9008742248612986,
"eval_sts-test-760_spearman_cosine": 0.9173301067794302,
"eval_sts-test_pearson_cosine": 0.9032553577987494,
"eval_sts-test_spearman_cosine": 0.9181009920081584,
"step": 350
},
{
"epoch": 0.6597744360902256,
"grad_norm": 2.178828716278076,
"learning_rate": 8.7468671679198e-05,
"loss": 0.1349,
"step": 351
},
{
"epoch": 0.6616541353383458,
"grad_norm": 3.7205710411071777,
"learning_rate": 8.771929824561403e-05,
"loss": 0.2307,
"step": 352
},
{
"epoch": 0.6635338345864662,
"grad_norm": 4.17662239074707,
"learning_rate": 8.796992481203008e-05,
"loss": 0.3436,
"step": 353
},
{
"epoch": 0.6654135338345865,
"grad_norm": 4.62085485458374,
"learning_rate": 8.822055137844612e-05,
"loss": 0.4285,
"step": 354
},
{
"epoch": 0.6672932330827067,
"grad_norm": 3.5682456493377686,
"learning_rate": 8.847117794486216e-05,
"loss": 0.2067,
"step": 355
},
{
"epoch": 0.6691729323308271,
"grad_norm": 4.600581645965576,
"learning_rate": 8.87218045112782e-05,
"loss": 0.3689,
"step": 356
},
{
"epoch": 0.6710526315789473,
"grad_norm": 4.159579753875732,
"learning_rate": 8.897243107769424e-05,
"loss": 0.267,
"step": 357
},
{
"epoch": 0.6729323308270677,
"grad_norm": 2.1827890872955322,
"learning_rate": 8.922305764411028e-05,
"loss": 0.0947,
"step": 358
},
{
"epoch": 0.674812030075188,
"grad_norm": 2.3302104473114014,
"learning_rate": 8.947368421052632e-05,
"loss": 0.1395,
"step": 359
},
{
"epoch": 0.6766917293233082,
"grad_norm": 2.3280320167541504,
"learning_rate": 8.972431077694236e-05,
"loss": 0.0728,
"step": 360
},
{
"epoch": 0.6785714285714286,
"grad_norm": 4.282434940338135,
"learning_rate": 8.99749373433584e-05,
"loss": 0.3466,
"step": 361
},
{
"epoch": 0.6804511278195489,
"grad_norm": 2.6343634128570557,
"learning_rate": 9.022556390977444e-05,
"loss": 0.118,
"step": 362
},
{
"epoch": 0.6823308270676691,
"grad_norm": 3.3122928142547607,
"learning_rate": 9.047619047619048e-05,
"loss": 0.2302,
"step": 363
},
{
"epoch": 0.6842105263157895,
"grad_norm": 3.051161289215088,
"learning_rate": 9.072681704260652e-05,
"loss": 0.1604,
"step": 364
},
{
"epoch": 0.6860902255639098,
"grad_norm": 3.329271078109741,
"learning_rate": 9.097744360902256e-05,
"loss": 0.2416,
"step": 365
},
{
"epoch": 0.6879699248120301,
"grad_norm": 3.8991355895996094,
"learning_rate": 9.12280701754386e-05,
"loss": 0.3026,
"step": 366
},
{
"epoch": 0.6898496240601504,
"grad_norm": 3.379802703857422,
"learning_rate": 9.147869674185465e-05,
"loss": 0.205,
"step": 367
},
{
"epoch": 0.6917293233082706,
"grad_norm": 3.8771727085113525,
"learning_rate": 9.172932330827067e-05,
"loss": 0.2291,
"step": 368
},
{
"epoch": 0.693609022556391,
"grad_norm": 4.642533779144287,
"learning_rate": 9.197994987468673e-05,
"loss": 0.3908,
"step": 369
},
{
"epoch": 0.6954887218045113,
"grad_norm": 3.090883493423462,
"learning_rate": 9.223057644110275e-05,
"loss": 0.2343,
"step": 370
},
{
"epoch": 0.6973684210526315,
"grad_norm": 3.6720900535583496,
"learning_rate": 9.24812030075188e-05,
"loss": 0.2384,
"step": 371
},
{
"epoch": 0.6992481203007519,
"grad_norm": 3.4769504070281982,
"learning_rate": 9.273182957393483e-05,
"loss": 0.304,
"step": 372
},
{
"epoch": 0.7011278195488722,
"grad_norm": 2.949640989303589,
"learning_rate": 9.298245614035089e-05,
"loss": 0.1508,
"step": 373
},
{
"epoch": 0.7030075187969925,
"grad_norm": 2.618457317352295,
"learning_rate": 9.323308270676691e-05,
"loss": 0.1184,
"step": 374
},
{
"epoch": 0.7048872180451128,
"grad_norm": 3.8140110969543457,
"learning_rate": 9.348370927318296e-05,
"loss": 0.2863,
"step": 375
},
{
"epoch": 0.706766917293233,
"grad_norm": 3.744682788848877,
"learning_rate": 9.373433583959899e-05,
"loss": 0.243,
"step": 376
},
{
"epoch": 0.7086466165413534,
"grad_norm": 3.3721110820770264,
"learning_rate": 9.398496240601504e-05,
"loss": 0.2347,
"step": 377
},
{
"epoch": 0.7105263157894737,
"grad_norm": 3.7434332370758057,
"learning_rate": 9.423558897243108e-05,
"loss": 0.2225,
"step": 378
},
{
"epoch": 0.7124060150375939,
"grad_norm": 2.8612782955169678,
"learning_rate": 9.448621553884712e-05,
"loss": 0.1221,
"step": 379
},
{
"epoch": 0.7142857142857143,
"grad_norm": 1.6849597692489624,
"learning_rate": 9.473684210526316e-05,
"loss": 0.0915,
"step": 380
},
{
"epoch": 0.7161654135338346,
"grad_norm": 4.169658184051514,
"learning_rate": 9.49874686716792e-05,
"loss": 0.2929,
"step": 381
},
{
"epoch": 0.7180451127819549,
"grad_norm": 2.304687976837158,
"learning_rate": 9.523809523809524e-05,
"loss": 0.1045,
"step": 382
},
{
"epoch": 0.7199248120300752,
"grad_norm": 3.5404067039489746,
"learning_rate": 9.548872180451128e-05,
"loss": 0.2764,
"step": 383
},
{
"epoch": 0.7218045112781954,
"grad_norm": 3.1424617767333984,
"learning_rate": 9.573934837092732e-05,
"loss": 0.1787,
"step": 384
},
{
"epoch": 0.7236842105263158,
"grad_norm": 4.354782581329346,
"learning_rate": 9.598997493734336e-05,
"loss": 0.3038,
"step": 385
},
{
"epoch": 0.7255639097744361,
"grad_norm": 2.4454684257507324,
"learning_rate": 9.62406015037594e-05,
"loss": 0.1276,
"step": 386
},
{
"epoch": 0.7274436090225563,
"grad_norm": 4.368272304534912,
"learning_rate": 9.649122807017544e-05,
"loss": 0.318,
"step": 387
},
{
"epoch": 0.7293233082706767,
"grad_norm": 2.330340623855591,
"learning_rate": 9.674185463659148e-05,
"loss": 0.1114,
"step": 388
},
{
"epoch": 0.731203007518797,
"grad_norm": 2.093837261199951,
"learning_rate": 9.699248120300752e-05,
"loss": 0.0779,
"step": 389
},
{
"epoch": 0.7330827067669173,
"grad_norm": 2.6749417781829834,
"learning_rate": 9.724310776942356e-05,
"loss": 0.1246,
"step": 390
},
{
"epoch": 0.7349624060150376,
"grad_norm": 3.0585834980010986,
"learning_rate": 9.749373433583961e-05,
"loss": 0.1865,
"step": 391
},
{
"epoch": 0.7368421052631579,
"grad_norm": 3.205200433731079,
"learning_rate": 9.774436090225564e-05,
"loss": 0.1603,
"step": 392
},
{
"epoch": 0.7387218045112782,
"grad_norm": 4.226044654846191,
"learning_rate": 9.799498746867169e-05,
"loss": 0.3891,
"step": 393
},
{
"epoch": 0.7406015037593985,
"grad_norm": 2.3619284629821777,
"learning_rate": 9.824561403508771e-05,
"loss": 0.0831,
"step": 394
},
{
"epoch": 0.7424812030075187,
"grad_norm": 2.842703104019165,
"learning_rate": 9.849624060150377e-05,
"loss": 0.2145,
"step": 395
},
{
"epoch": 0.7443609022556391,
"grad_norm": 3.2277991771698,
"learning_rate": 9.87468671679198e-05,
"loss": 0.1798,
"step": 396
},
{
"epoch": 0.7462406015037594,
"grad_norm": 3.115555763244629,
"learning_rate": 9.899749373433585e-05,
"loss": 0.2372,
"step": 397
},
{
"epoch": 0.7481203007518797,
"grad_norm": 4.073906421661377,
"learning_rate": 9.924812030075187e-05,
"loss": 0.2344,
"step": 398
},
{
"epoch": 0.75,
"grad_norm": 2.634641408920288,
"learning_rate": 9.949874686716793e-05,
"loss": 0.1169,
"step": 399
},
{
"epoch": 0.7518796992481203,
"grad_norm": 2.948498010635376,
"learning_rate": 9.974937343358397e-05,
"loss": 0.1729,
"step": 400
},
{
"epoch": 0.7518796992481203,
"eval_global_dataset_loss": 1.039243221282959,
"eval_global_dataset_runtime": 73.9438,
"eval_global_dataset_samples_per_second": 13.145,
"eval_global_dataset_steps_per_second": 0.054,
"eval_sequential_score": 0.915137787235127,
"eval_sts-test-1024_pearson_cosine": 0.8991702968527955,
"eval_sts-test-1024_spearman_cosine": 0.9174485633683334,
"eval_sts-test-1280_pearson_cosine": 0.899162232134754,
"eval_sts-test-1280_spearman_cosine": 0.9174606014576475,
"eval_sts-test-512_pearson_cosine": 0.8962762177120662,
"eval_sts-test-512_spearman_cosine": 0.915137787235127,
"eval_sts-test-760_pearson_cosine": 0.8970663329368722,
"eval_sts-test-760_spearman_cosine": 0.9162537046891828,
"eval_sts-test_pearson_cosine": 0.8991549992060922,
"eval_sts-test_spearman_cosine": 0.9174283805494456,
"step": 400
},
{
"epoch": 0.7537593984962406,
"grad_norm": 3.9373810291290283,
"learning_rate": 0.0001,
"loss": 0.2767,
"step": 401
},
{
"epoch": 0.7556390977443609,
"grad_norm": 2.065608263015747,
"learning_rate": 9.999987084463017e-05,
"loss": 0.0738,
"step": 402
},
{
"epoch": 0.7575187969924813,
"grad_norm": 3.8259966373443604,
"learning_rate": 9.999948337941033e-05,
"loss": 0.2413,
"step": 403
},
{
"epoch": 0.7593984962406015,
"grad_norm": 3.622448444366455,
"learning_rate": 9.999883760700945e-05,
"loss": 0.2307,
"step": 404
},
{
"epoch": 0.7612781954887218,
"grad_norm": 3.470798969268799,
"learning_rate": 9.999793353187582e-05,
"loss": 0.2238,
"step": 405
},
{
"epoch": 0.7631578947368421,
"grad_norm": 3.9379584789276123,
"learning_rate": 9.999677116023694e-05,
"loss": 0.264,
"step": 406
},
{
"epoch": 0.7650375939849624,
"grad_norm": 3.8329389095306396,
"learning_rate": 9.999535050009956e-05,
"loss": 0.2212,
"step": 407
},
{
"epoch": 0.7669172932330827,
"grad_norm": 3.588489055633545,
"learning_rate": 9.99936715612496e-05,
"loss": 0.1936,
"step": 408
},
{
"epoch": 0.768796992481203,
"grad_norm": 2.136953830718994,
"learning_rate": 9.999173435525209e-05,
"loss": 0.0843,
"step": 409
},
{
"epoch": 0.7706766917293233,
"grad_norm": 2.8177402019500732,
"learning_rate": 9.998953889545103e-05,
"loss": 0.1398,
"step": 410
},
{
"epoch": 0.7725563909774437,
"grad_norm": 4.3074951171875,
"learning_rate": 9.99870851969694e-05,
"loss": 0.2536,
"step": 411
},
{
"epoch": 0.7744360902255639,
"grad_norm": 4.085883140563965,
"learning_rate": 9.998437327670895e-05,
"loss": 0.2524,
"step": 412
},
{
"epoch": 0.7763157894736842,
"grad_norm": 1.841971755027771,
"learning_rate": 9.998140315335021e-05,
"loss": 0.0817,
"step": 413
},
{
"epoch": 0.7781954887218046,
"grad_norm": 3.460721969604492,
"learning_rate": 9.997817484735217e-05,
"loss": 0.187,
"step": 414
},
{
"epoch": 0.7800751879699248,
"grad_norm": 3.2461087703704834,
"learning_rate": 9.997468838095238e-05,
"loss": 0.2202,
"step": 415
},
{
"epoch": 0.7819548872180451,
"grad_norm": 5.019058704376221,
"learning_rate": 9.997094377816663e-05,
"loss": 0.4688,
"step": 416
},
{
"epoch": 0.7838345864661654,
"grad_norm": 3.9036221504211426,
"learning_rate": 9.996694106478878e-05,
"loss": 0.2748,
"step": 417
},
{
"epoch": 0.7857142857142857,
"grad_norm": 3.2269797325134277,
"learning_rate": 9.996268026839068e-05,
"loss": 0.1784,
"step": 418
},
{
"epoch": 0.7875939849624061,
"grad_norm": 3.3627684116363525,
"learning_rate": 9.99581614183219e-05,
"loss": 0.181,
"step": 419
},
{
"epoch": 0.7894736842105263,
"grad_norm": 4.656357288360596,
"learning_rate": 9.995338454570959e-05,
"loss": 0.3211,
"step": 420
},
{
"epoch": 0.7913533834586466,
"grad_norm": 3.5906982421875,
"learning_rate": 9.994834968345821e-05,
"loss": 0.1609,
"step": 421
},
{
"epoch": 0.793233082706767,
"grad_norm": 3.2143733501434326,
"learning_rate": 9.994305686624937e-05,
"loss": 0.1783,
"step": 422
},
{
"epoch": 0.7951127819548872,
"grad_norm": 4.057474136352539,
"learning_rate": 9.993750613054145e-05,
"loss": 0.2027,
"step": 423
},
{
"epoch": 0.7969924812030075,
"grad_norm": 3.9779932498931885,
"learning_rate": 9.993169751456954e-05,
"loss": 0.3005,
"step": 424
},
{
"epoch": 0.7988721804511278,
"grad_norm": 1.715262770652771,
"learning_rate": 9.992563105834505e-05,
"loss": 0.0396,
"step": 425
},
{
"epoch": 0.8007518796992481,
"grad_norm": 2.0986955165863037,
"learning_rate": 9.991930680365547e-05,
"loss": 0.0633,
"step": 426
},
{
"epoch": 0.8026315789473685,
"grad_norm": 3.5883572101593018,
"learning_rate": 9.991272479406406e-05,
"loss": 0.2468,
"step": 427
},
{
"epoch": 0.8045112781954887,
"grad_norm": 3.2390847206115723,
"learning_rate": 9.990588507490959e-05,
"loss": 0.1822,
"step": 428
},
{
"epoch": 0.806390977443609,
"grad_norm": 5.052639007568359,
"learning_rate": 9.989878769330603e-05,
"loss": 0.4503,
"step": 429
},
{
"epoch": 0.8082706766917294,
"grad_norm": 2.2490901947021484,
"learning_rate": 9.989143269814216e-05,
"loss": 0.0755,
"step": 430
},
{
"epoch": 0.8101503759398496,
"grad_norm": 3.330777883529663,
"learning_rate": 9.988382014008129e-05,
"loss": 0.1746,
"step": 431
},
{
"epoch": 0.8120300751879699,
"grad_norm": 3.147256374359131,
"learning_rate": 9.98759500715609e-05,
"loss": 0.1353,
"step": 432
},
{
"epoch": 0.8139097744360902,
"grad_norm": 1.620094656944275,
"learning_rate": 9.986782254679227e-05,
"loss": 0.0427,
"step": 433
},
{
"epoch": 0.8157894736842105,
"grad_norm": 4.131981372833252,
"learning_rate": 9.985943762176013e-05,
"loss": 0.2745,
"step": 434
},
{
"epoch": 0.8176691729323309,
"grad_norm": 3.36614727973938,
"learning_rate": 9.985079535422226e-05,
"loss": 0.1701,
"step": 435
},
{
"epoch": 0.8195488721804511,
"grad_norm": 2.6499199867248535,
"learning_rate": 9.984189580370904e-05,
"loss": 0.1108,
"step": 436
},
{
"epoch": 0.8214285714285714,
"grad_norm": 3.2427146434783936,
"learning_rate": 9.983273903152314e-05,
"loss": 0.1247,
"step": 437
},
{
"epoch": 0.8233082706766918,
"grad_norm": 3.399001359939575,
"learning_rate": 9.982332510073902e-05,
"loss": 0.2483,
"step": 438
},
{
"epoch": 0.825187969924812,
"grad_norm": 4.0499653816223145,
"learning_rate": 9.981365407620256e-05,
"loss": 0.2491,
"step": 439
},
{
"epoch": 0.8270676691729323,
"grad_norm": 4.519603252410889,
"learning_rate": 9.980372602453051e-05,
"loss": 0.2228,
"step": 440
},
{
"epoch": 0.8289473684210527,
"grad_norm": 4.894527912139893,
"learning_rate": 9.979354101411016e-05,
"loss": 0.339,
"step": 441
},
{
"epoch": 0.8308270676691729,
"grad_norm": 3.9160590171813965,
"learning_rate": 9.978309911509875e-05,
"loss": 0.2636,
"step": 442
},
{
"epoch": 0.8327067669172933,
"grad_norm": 2.9062135219573975,
"learning_rate": 9.977240039942311e-05,
"loss": 0.1255,
"step": 443
},
{
"epoch": 0.8345864661654135,
"grad_norm": 3.9057412147521973,
"learning_rate": 9.976144494077903e-05,
"loss": 0.2707,
"step": 444
},
{
"epoch": 0.8364661654135338,
"grad_norm": 1.3283665180206299,
"learning_rate": 9.975023281463085e-05,
"loss": 0.0358,
"step": 445
},
{
"epoch": 0.8383458646616542,
"grad_norm": 2.5466246604919434,
"learning_rate": 9.97387640982109e-05,
"loss": 0.1194,
"step": 446
},
{
"epoch": 0.8402255639097744,
"grad_norm": 4.229319095611572,
"learning_rate": 9.9727038870519e-05,
"loss": 0.2849,
"step": 447
},
{
"epoch": 0.8421052631578947,
"grad_norm": 2.857790946960449,
"learning_rate": 9.971505721232187e-05,
"loss": 0.1339,
"step": 448
},
{
"epoch": 0.8439849624060151,
"grad_norm": 4.15946626663208,
"learning_rate": 9.970281920615261e-05,
"loss": 0.2603,
"step": 449
},
{
"epoch": 0.8458646616541353,
"grad_norm": 2.4936351776123047,
"learning_rate": 9.96903249363101e-05,
"loss": 0.108,
"step": 450
},
{
"epoch": 0.8458646616541353,
"eval_global_dataset_loss": 1.0451455116271973,
"eval_global_dataset_runtime": 73.2483,
"eval_global_dataset_samples_per_second": 13.27,
"eval_global_dataset_steps_per_second": 0.055,
"eval_sequential_score": 0.9173902524747404,
"eval_sts-test-1024_pearson_cosine": 0.9024151738171587,
"eval_sts-test-1024_spearman_cosine": 0.9166481870583482,
"eval_sts-test-1280_pearson_cosine": 0.9024051394107033,
"eval_sts-test-1280_spearman_cosine": 0.9166922222995573,
"eval_sts-test-512_pearson_cosine": 0.9034025337392552,
"eval_sts-test-512_spearman_cosine": 0.9173902524747404,
"eval_sts-test-760_pearson_cosine": 0.9010373056134615,
"eval_sts-test-760_spearman_cosine": 0.9162833300242239,
"eval_sts-test_pearson_cosine": 0.9023936089136726,
"eval_sts-test_spearman_cosine": 0.91669906924255,
"step": 450
},
{
"epoch": 0.8477443609022557,
"grad_norm": 3.1610307693481445,
"learning_rate": 9.967757448885844e-05,
"loss": 0.1248,
"step": 451
},
{
"epoch": 0.849624060150376,
"grad_norm": 3.9296188354492188,
"learning_rate": 9.966456795162638e-05,
"loss": 0.1983,
"step": 452
},
{
"epoch": 0.8515037593984962,
"grad_norm": 3.3089802265167236,
"learning_rate": 9.965130541420667e-05,
"loss": 0.2077,
"step": 453
},
{
"epoch": 0.8533834586466166,
"grad_norm": 3.975613594055176,
"learning_rate": 9.963778696795546e-05,
"loss": 0.2199,
"step": 454
},
{
"epoch": 0.8552631578947368,
"grad_norm": 1.0055021047592163,
"learning_rate": 9.96240127059917e-05,
"loss": 0.0839,
"step": 455
},
{
"epoch": 0.8571428571428571,
"grad_norm": 4.69797945022583,
"learning_rate": 9.960998272319641e-05,
"loss": 0.2924,
"step": 456
},
{
"epoch": 0.8590225563909775,
"grad_norm": 3.300013542175293,
"learning_rate": 9.959569711621217e-05,
"loss": 0.1466,
"step": 457
},
{
"epoch": 0.8609022556390977,
"grad_norm": 4.579361438751221,
"learning_rate": 9.958115598344232e-05,
"loss": 0.3597,
"step": 458
},
{
"epoch": 0.8627819548872181,
"grad_norm": 2.8986923694610596,
"learning_rate": 9.956635942505035e-05,
"loss": 0.1387,
"step": 459
},
{
"epoch": 0.8646616541353384,
"grad_norm": 3.608433485031128,
"learning_rate": 9.955130754295918e-05,
"loss": 0.1788,
"step": 460
},
{
"epoch": 0.8665413533834586,
"grad_norm": 3.700620651245117,
"learning_rate": 9.953600044085048e-05,
"loss": 0.2746,
"step": 461
},
{
"epoch": 0.868421052631579,
"grad_norm": 3.4533400535583496,
"learning_rate": 9.952043822416397e-05,
"loss": 0.2969,
"step": 462
},
{
"epoch": 0.8703007518796992,
"grad_norm": 3.3678195476531982,
"learning_rate": 9.950462100009666e-05,
"loss": 0.2054,
"step": 463
},
{
"epoch": 0.8721804511278195,
"grad_norm": 3.894620895385742,
"learning_rate": 9.94885488776021e-05,
"loss": 0.2496,
"step": 464
},
{
"epoch": 0.8740601503759399,
"grad_norm": 3.795100688934326,
"learning_rate": 9.947222196738967e-05,
"loss": 0.2611,
"step": 465
},
{
"epoch": 0.8759398496240601,
"grad_norm": 2.7013235092163086,
"learning_rate": 9.945564038192383e-05,
"loss": 0.1439,
"step": 466
},
{
"epoch": 0.8778195488721805,
"grad_norm": 2.68449330329895,
"learning_rate": 9.943880423542327e-05,
"loss": 0.1146,
"step": 467
},
{
"epoch": 0.8796992481203008,
"grad_norm": 2.876955032348633,
"learning_rate": 9.942171364386019e-05,
"loss": 0.1646,
"step": 468
},
{
"epoch": 0.881578947368421,
"grad_norm": 2.7985072135925293,
"learning_rate": 9.940436872495949e-05,
"loss": 0.1293,
"step": 469
},
{
"epoch": 0.8834586466165414,
"grad_norm": 3.7495200634002686,
"learning_rate": 9.938676959819791e-05,
"loss": 0.3097,
"step": 470
},
{
"epoch": 0.8853383458646616,
"grad_norm": 3.1165237426757812,
"learning_rate": 9.936891638480333e-05,
"loss": 0.2038,
"step": 471
},
{
"epoch": 0.8872180451127819,
"grad_norm": 3.427192449569702,
"learning_rate": 9.935080920775373e-05,
"loss": 0.2284,
"step": 472
},
{
"epoch": 0.8890977443609023,
"grad_norm": 4.627795696258545,
"learning_rate": 9.933244819177658e-05,
"loss": 0.3448,
"step": 473
},
{
"epoch": 0.8909774436090225,
"grad_norm": 3.8821961879730225,
"learning_rate": 9.93138334633478e-05,
"loss": 0.2148,
"step": 474
},
{
"epoch": 0.8928571428571429,
"grad_norm": 3.5933377742767334,
"learning_rate": 9.929496515069098e-05,
"loss": 0.2807,
"step": 475
},
{
"epoch": 0.8947368421052632,
"grad_norm": 3.7735559940338135,
"learning_rate": 9.927584338377643e-05,
"loss": 0.29,
"step": 476
},
{
"epoch": 0.8966165413533834,
"grad_norm": 3.569260597229004,
"learning_rate": 9.925646829432041e-05,
"loss": 0.2555,
"step": 477
},
{
"epoch": 0.8984962406015038,
"grad_norm": 3.9756081104278564,
"learning_rate": 9.923684001578403e-05,
"loss": 0.2942,
"step": 478
},
{
"epoch": 0.900375939849624,
"grad_norm": 2.6943299770355225,
"learning_rate": 9.921695868337252e-05,
"loss": 0.1309,
"step": 479
},
{
"epoch": 0.9022556390977443,
"grad_norm": 3.40865421295166,
"learning_rate": 9.91968244340342e-05,
"loss": 0.1965,
"step": 480
},
{
"epoch": 0.9041353383458647,
"grad_norm": 2.193235158920288,
"learning_rate": 9.917643740645954e-05,
"loss": 0.0971,
"step": 481
},
{
"epoch": 0.9060150375939849,
"grad_norm": 3.178183078765869,
"learning_rate": 9.915579774108021e-05,
"loss": 0.2923,
"step": 482
},
{
"epoch": 0.9078947368421053,
"grad_norm": 2.8752593994140625,
"learning_rate": 9.913490558006814e-05,
"loss": 0.2019,
"step": 483
},
{
"epoch": 0.9097744360902256,
"grad_norm": 2.4448463916778564,
"learning_rate": 9.911376106733453e-05,
"loss": 0.1065,
"step": 484
},
{
"epoch": 0.9116541353383458,
"grad_norm": 3.194176435470581,
"learning_rate": 9.909236434852884e-05,
"loss": 0.212,
"step": 485
},
{
"epoch": 0.9135338345864662,
"grad_norm": 3.9748711585998535,
"learning_rate": 9.907071557103779e-05,
"loss": 0.3035,
"step": 486
},
{
"epoch": 0.9154135338345865,
"grad_norm": 3.6471502780914307,
"learning_rate": 9.904881488398435e-05,
"loss": 0.2386,
"step": 487
},
{
"epoch": 0.9172932330827067,
"grad_norm": 2.23283314704895,
"learning_rate": 9.902666243822675e-05,
"loss": 0.1342,
"step": 488
},
{
"epoch": 0.9191729323308271,
"grad_norm": 2.526794672012329,
"learning_rate": 9.900425838635736e-05,
"loss": 0.1798,
"step": 489
},
{
"epoch": 0.9210526315789473,
"grad_norm": 3.369070529937744,
"learning_rate": 9.898160288270171e-05,
"loss": 0.2655,
"step": 490
},
{
"epoch": 0.9229323308270677,
"grad_norm": 2.8184382915496826,
"learning_rate": 9.895869608331741e-05,
"loss": 0.155,
"step": 491
},
{
"epoch": 0.924812030075188,
"grad_norm": 3.6948490142822266,
"learning_rate": 9.893553814599302e-05,
"loss": 0.2283,
"step": 492
},
{
"epoch": 0.9266917293233082,
"grad_norm": 2.2010343074798584,
"learning_rate": 9.891212923024709e-05,
"loss": 0.098,
"step": 493
},
{
"epoch": 0.9285714285714286,
"grad_norm": 2.7922205924987793,
"learning_rate": 9.88884694973269e-05,
"loss": 0.2384,
"step": 494
},
{
"epoch": 0.9304511278195489,
"grad_norm": 2.5202677249908447,
"learning_rate": 9.886455911020748e-05,
"loss": 0.0843,
"step": 495
},
{
"epoch": 0.9323308270676691,
"grad_norm": 2.3406593799591064,
"learning_rate": 9.884039823359043e-05,
"loss": 0.0996,
"step": 496
},
{
"epoch": 0.9342105263157895,
"grad_norm": 3.4097483158111572,
"learning_rate": 9.881598703390278e-05,
"loss": 0.2008,
"step": 497
},
{
"epoch": 0.9360902255639098,
"grad_norm": 3.603726387023926,
"learning_rate": 9.879132567929585e-05,
"loss": 0.2017,
"step": 498
},
{
"epoch": 0.9379699248120301,
"grad_norm": 3.1295666694641113,
"learning_rate": 9.876641433964415e-05,
"loss": 0.215,
"step": 499
},
{
"epoch": 0.9398496240601504,
"grad_norm": 3.567051887512207,
"learning_rate": 9.87412531865441e-05,
"loss": 0.2233,
"step": 500
},
{
"epoch": 0.9398496240601504,
"eval_global_dataset_loss": 1.0155928134918213,
"eval_global_dataset_runtime": 74.5661,
"eval_global_dataset_samples_per_second": 13.035,
"eval_global_dataset_steps_per_second": 0.054,
"eval_sequential_score": 0.9180905649642541,
"eval_sts-test-1024_pearson_cosine": 0.9074934984822008,
"eval_sts-test-1024_spearman_cosine": 0.9182130491666827,
"eval_sts-test-1280_pearson_cosine": 0.9074909272280929,
"eval_sts-test-1280_spearman_cosine": 0.9182151077247066,
"eval_sts-test-512_pearson_cosine": 0.9062832297154773,
"eval_sts-test-512_spearman_cosine": 0.9180905649642541,
"eval_sts-test-760_pearson_cosine": 0.9057267653570724,
"eval_sts-test-760_spearman_cosine": 0.9172522395846092,
"eval_sts-test_pearson_cosine": 0.9074876367668167,
"eval_sts-test_spearman_cosine": 0.9181634647690607,
"step": 500
},
{
"epoch": 0.9417293233082706,
"grad_norm": 4.2725677490234375,
"learning_rate": 9.871584239331293e-05,
"loss": 0.3899,
"step": 501
},
{
"epoch": 0.943609022556391,
"grad_norm": 2.3998143672943115,
"learning_rate": 9.869018213498746e-05,
"loss": 0.1012,
"step": 502
},
{
"epoch": 0.9454887218045113,
"grad_norm": 4.653426170349121,
"learning_rate": 9.866427258832289e-05,
"loss": 0.4322,
"step": 503
},
{
"epoch": 0.9473684210526315,
"grad_norm": 3.875018835067749,
"learning_rate": 9.863811393179163e-05,
"loss": 0.2699,
"step": 504
},
{
"epoch": 0.9492481203007519,
"grad_norm": 3.956909418106079,
"learning_rate": 9.861170634558194e-05,
"loss": 0.3275,
"step": 505
},
{
"epoch": 0.9511278195488722,
"grad_norm": 3.3166370391845703,
"learning_rate": 9.858505001159689e-05,
"loss": 0.2196,
"step": 506
},
{
"epoch": 0.9530075187969925,
"grad_norm": 2.0084877014160156,
"learning_rate": 9.855814511345293e-05,
"loss": 0.1193,
"step": 507
},
{
"epoch": 0.9548872180451128,
"grad_norm": 2.3565118312835693,
"learning_rate": 9.853099183647869e-05,
"loss": 0.0748,
"step": 508
},
{
"epoch": 0.956766917293233,
"grad_norm": 3.5655505657196045,
"learning_rate": 9.850359036771373e-05,
"loss": 0.2532,
"step": 509
},
{
"epoch": 0.9586466165413534,
"grad_norm": 3.4853789806365967,
"learning_rate": 9.847594089590722e-05,
"loss": 0.2517,
"step": 510
},
{
"epoch": 0.9605263157894737,
"grad_norm": 3.009345531463623,
"learning_rate": 9.844804361151661e-05,
"loss": 0.1423,
"step": 511
},
{
"epoch": 0.9624060150375939,
"grad_norm": 3.2568352222442627,
"learning_rate": 9.84198987067064e-05,
"loss": 0.2196,
"step": 512
},
{
"epoch": 0.9642857142857143,
"grad_norm": 2.9902384281158447,
"learning_rate": 9.839150637534679e-05,
"loss": 0.177,
"step": 513
},
{
"epoch": 0.9661654135338346,
"grad_norm": 3.810408353805542,
"learning_rate": 9.836286681301224e-05,
"loss": 0.3111,
"step": 514
},
{
"epoch": 0.9680451127819549,
"grad_norm": 2.799046516418457,
"learning_rate": 9.833398021698028e-05,
"loss": 0.1433,
"step": 515
},
{
"epoch": 0.9699248120300752,
"grad_norm": 3.353372097015381,
"learning_rate": 9.830484678623006e-05,
"loss": 0.279,
"step": 516
},
{
"epoch": 0.9718045112781954,
"grad_norm": 3.018519163131714,
"learning_rate": 9.827546672144099e-05,
"loss": 0.1455,
"step": 517
},
{
"epoch": 0.9736842105263158,
"grad_norm": 2.485931396484375,
"learning_rate": 9.824584022499135e-05,
"loss": 0.135,
"step": 518
},
{
"epoch": 0.9755639097744361,
"grad_norm": 3.1705570220947266,
"learning_rate": 9.821596750095696e-05,
"loss": 0.2181,
"step": 519
},
{
"epoch": 0.9774436090225563,
"grad_norm": 3.1081695556640625,
"learning_rate": 9.81858487551097e-05,
"loss": 0.1378,
"step": 520
},
{
"epoch": 0.9793233082706767,
"grad_norm": 2.6018893718719482,
"learning_rate": 9.81554841949161e-05,
"loss": 0.207,
"step": 521
},
{
"epoch": 0.981203007518797,
"grad_norm": 3.4171817302703857,
"learning_rate": 9.812487402953595e-05,
"loss": 0.1857,
"step": 522
},
{
"epoch": 0.9830827067669173,
"grad_norm": 3.4658777713775635,
"learning_rate": 9.809401846982083e-05,
"loss": 0.2228,
"step": 523
},
{
"epoch": 0.9849624060150376,
"grad_norm": 1.8977023363113403,
"learning_rate": 9.806291772831271e-05,
"loss": 0.0977,
"step": 524
},
{
"epoch": 0.9868421052631579,
"grad_norm": 1.7279019355773926,
"learning_rate": 9.803157201924235e-05,
"loss": 0.0472,
"step": 525
},
{
"epoch": 0.9887218045112782,
"grad_norm": 4.759050369262695,
"learning_rate": 9.799998155852801e-05,
"loss": 0.4102,
"step": 526
},
{
"epoch": 0.9906015037593985,
"grad_norm": 3.8956758975982666,
"learning_rate": 9.79681465637738e-05,
"loss": 0.2662,
"step": 527
},
{
"epoch": 0.9924812030075187,
"grad_norm": 3.572314500808716,
"learning_rate": 9.793606725426832e-05,
"loss": 0.2859,
"step": 528
},
{
"epoch": 0.9943609022556391,
"grad_norm": 3.7973859310150146,
"learning_rate": 9.7903743850983e-05,
"loss": 0.2533,
"step": 529
},
{
"epoch": 0.9962406015037594,
"grad_norm": 3.5079214572906494,
"learning_rate": 9.787117657657072e-05,
"loss": 0.2009,
"step": 530
},
{
"epoch": 0.9981203007518797,
"grad_norm": 4.260622501373291,
"learning_rate": 9.78383656553642e-05,
"loss": 0.3569,
"step": 531
},
{
"epoch": 1.0,
"grad_norm": 0.23837219178676605,
"learning_rate": 9.780531131337446e-05,
"loss": 0.0021,
"step": 532
},
{
"epoch": 1.0018796992481203,
"grad_norm": 2.626725673675537,
"learning_rate": 9.777201377828926e-05,
"loss": 0.1167,
"step": 533
},
{
"epoch": 1.0037593984962405,
"grad_norm": 2.536475658416748,
"learning_rate": 9.773847327947157e-05,
"loss": 0.1864,
"step": 534
},
{
"epoch": 1.005639097744361,
"grad_norm": 3.6628808975219727,
"learning_rate": 9.770469004795794e-05,
"loss": 0.2067,
"step": 535
},
{
"epoch": 1.0075187969924813,
"grad_norm": 1.5426212549209595,
"learning_rate": 9.767066431645695e-05,
"loss": 0.078,
"step": 536
},
{
"epoch": 1.0093984962406015,
"grad_norm": 2.680577278137207,
"learning_rate": 9.76363963193476e-05,
"loss": 0.1397,
"step": 537
},
{
"epoch": 1.0112781954887218,
"grad_norm": 1.9868223667144775,
"learning_rate": 9.760188629267764e-05,
"loss": 0.085,
"step": 538
},
{
"epoch": 1.013157894736842,
"grad_norm": 2.018101692199707,
"learning_rate": 9.756713447416203e-05,
"loss": 0.1018,
"step": 539
},
{
"epoch": 1.0150375939849625,
"grad_norm": 3.0724055767059326,
"learning_rate": 9.753214110318125e-05,
"loss": 0.1371,
"step": 540
},
{
"epoch": 1.0169172932330828,
"grad_norm": 2.081967830657959,
"learning_rate": 9.749690642077964e-05,
"loss": 0.0639,
"step": 541
},
{
"epoch": 1.018796992481203,
"grad_norm": 3.2037465572357178,
"learning_rate": 9.746143066966382e-05,
"loss": 0.1452,
"step": 542
},
{
"epoch": 1.0206766917293233,
"grad_norm": 2.831393003463745,
"learning_rate": 9.742571409420091e-05,
"loss": 0.1349,
"step": 543
},
{
"epoch": 1.0225563909774436,
"grad_norm": 2.723844528198242,
"learning_rate": 9.73897569404169e-05,
"loss": 0.1495,
"step": 544
},
{
"epoch": 1.0244360902255638,
"grad_norm": 2.4952895641326904,
"learning_rate": 9.735355945599497e-05,
"loss": 0.0946,
"step": 545
},
{
"epoch": 1.0263157894736843,
"grad_norm": 4.644716262817383,
"learning_rate": 9.731712189027377e-05,
"loss": 0.2574,
"step": 546
},
{
"epoch": 1.0281954887218046,
"grad_norm": 2.886420965194702,
"learning_rate": 9.728044449424567e-05,
"loss": 0.1232,
"step": 547
},
{
"epoch": 1.0300751879699248,
"grad_norm": 2.5065267086029053,
"learning_rate": 9.724352752055506e-05,
"loss": 0.1205,
"step": 548
},
{
"epoch": 1.031954887218045,
"grad_norm": 1.9992611408233643,
"learning_rate": 9.720637122349669e-05,
"loss": 0.0761,
"step": 549
},
{
"epoch": 1.0338345864661653,
"grad_norm": 2.5508501529693604,
"learning_rate": 9.716897585901372e-05,
"loss": 0.0885,
"step": 550
},
{
"epoch": 1.0338345864661653,
"eval_global_dataset_loss": 1.018797516822815,
"eval_global_dataset_runtime": 73.4171,
"eval_global_dataset_samples_per_second": 13.239,
"eval_global_dataset_steps_per_second": 0.054,
"eval_sequential_score": 0.9162829720141328,
"eval_sts-test-1024_pearson_cosine": 0.9050044361126697,
"eval_sts-test-1024_spearman_cosine": 0.9173084024176553,
"eval_sts-test-1280_pearson_cosine": 0.9049983320262998,
"eval_sts-test-1280_spearman_cosine": 0.9173004814443892,
"eval_sts-test-512_pearson_cosine": 0.9033928303116558,
"eval_sts-test-512_spearman_cosine": 0.9162829720141328,
"eval_sts-test-760_pearson_cosine": 0.9033036147321826,
"eval_sts-test-760_spearman_cosine": 0.9166361489690339,
"eval_sts-test_pearson_cosine": 0.9049924067928337,
"eval_sts-test_spearman_cosine": 0.9173026742561973,
"step": 550
},
{
"epoch": 1.0357142857142858,
"grad_norm": 2.988827705383301,
"learning_rate": 9.713134168469615e-05,
"loss": 0.1249,
"step": 551
},
{
"epoch": 1.037593984962406,
"grad_norm": 2.299173355102539,
"learning_rate": 9.709346895977896e-05,
"loss": 0.1191,
"step": 552
},
{
"epoch": 1.0394736842105263,
"grad_norm": 3.8693172931671143,
"learning_rate": 9.70553579451403e-05,
"loss": 0.2311,
"step": 553
},
{
"epoch": 1.0413533834586466,
"grad_norm": 2.846728563308716,
"learning_rate": 9.701700890329977e-05,
"loss": 0.1081,
"step": 554
},
{
"epoch": 1.0432330827067668,
"grad_norm": 3.595548391342163,
"learning_rate": 9.697842209841654e-05,
"loss": 0.2131,
"step": 555
},
{
"epoch": 1.045112781954887,
"grad_norm": 2.5240702629089355,
"learning_rate": 9.693959779628761e-05,
"loss": 0.1334,
"step": 556
},
{
"epoch": 1.0469924812030076,
"grad_norm": 2.941345691680908,
"learning_rate": 9.690053626434585e-05,
"loss": 0.1624,
"step": 557
},
{
"epoch": 1.0488721804511278,
"grad_norm": 3.699535369873047,
"learning_rate": 9.68612377716583e-05,
"loss": 0.1951,
"step": 558
},
{
"epoch": 1.050751879699248,
"grad_norm": 1.7172725200653076,
"learning_rate": 9.682170258892423e-05,
"loss": 0.0739,
"step": 559
},
{
"epoch": 1.0526315789473684,
"grad_norm": 3.7899820804595947,
"learning_rate": 9.678193098847328e-05,
"loss": 0.2474,
"step": 560
},
{
"epoch": 1.0545112781954886,
"grad_norm": 2.2706000804901123,
"learning_rate": 9.674192324426366e-05,
"loss": 0.0994,
"step": 561
},
{
"epoch": 1.056390977443609,
"grad_norm": 3.8091330528259277,
"learning_rate": 9.670167963188015e-05,
"loss": 0.2113,
"step": 562
},
{
"epoch": 1.0582706766917294,
"grad_norm": 1.1805905103683472,
"learning_rate": 9.666120042853227e-05,
"loss": 0.0286,
"step": 563
},
{
"epoch": 1.0601503759398496,
"grad_norm": 3.3499631881713867,
"learning_rate": 9.66204859130524e-05,
"loss": 0.2701,
"step": 564
},
{
"epoch": 1.0620300751879699,
"grad_norm": 2.7908685207366943,
"learning_rate": 9.657953636589373e-05,
"loss": 0.1336,
"step": 565
},
{
"epoch": 1.0639097744360901,
"grad_norm": 2.8439011573791504,
"learning_rate": 9.65383520691285e-05,
"loss": 0.1152,
"step": 566
},
{
"epoch": 1.0657894736842106,
"grad_norm": 2.6462042331695557,
"learning_rate": 9.649693330644595e-05,
"loss": 0.0995,
"step": 567
},
{
"epoch": 1.0676691729323309,
"grad_norm": 3.5710973739624023,
"learning_rate": 9.645528036315036e-05,
"loss": 0.2256,
"step": 568
},
{
"epoch": 1.0695488721804511,
"grad_norm": 3.204761028289795,
"learning_rate": 9.641339352615917e-05,
"loss": 0.1368,
"step": 569
},
{
"epoch": 1.0714285714285714,
"grad_norm": 3.230581045150757,
"learning_rate": 9.637127308400085e-05,
"loss": 0.1263,
"step": 570
},
{
"epoch": 1.0733082706766917,
"grad_norm": 0.767558753490448,
"learning_rate": 9.63289193268131e-05,
"loss": 0.0186,
"step": 571
},
{
"epoch": 1.0751879699248121,
"grad_norm": 2.9297759532928467,
"learning_rate": 9.628633254634072e-05,
"loss": 0.146,
"step": 572
},
{
"epoch": 1.0770676691729324,
"grad_norm": 3.178691864013672,
"learning_rate": 9.624351303593366e-05,
"loss": 0.1571,
"step": 573
},
{
"epoch": 1.0789473684210527,
"grad_norm": 2.926023244857788,
"learning_rate": 9.620046109054498e-05,
"loss": 0.1521,
"step": 574
},
{
"epoch": 1.080827067669173,
"grad_norm": 1.6326439380645752,
"learning_rate": 9.615717700672878e-05,
"loss": 0.0338,
"step": 575
},
{
"epoch": 1.0827067669172932,
"grad_norm": 2.366187572479248,
"learning_rate": 9.611366108263826e-05,
"loss": 0.0967,
"step": 576
},
{
"epoch": 1.0845864661654134,
"grad_norm": 1.6901721954345703,
"learning_rate": 9.606991361802354e-05,
"loss": 0.0454,
"step": 577
},
{
"epoch": 1.086466165413534,
"grad_norm": 2.1220741271972656,
"learning_rate": 9.60259349142297e-05,
"loss": 0.0509,
"step": 578
},
{
"epoch": 1.0883458646616542,
"grad_norm": 2.955486536026001,
"learning_rate": 9.598172527419464e-05,
"loss": 0.0962,
"step": 579
},
{
"epoch": 1.0902255639097744,
"grad_norm": 3.1397783756256104,
"learning_rate": 9.593728500244703e-05,
"loss": 0.1644,
"step": 580
},
{
"epoch": 1.0921052631578947,
"grad_norm": 3.1587095260620117,
"learning_rate": 9.589261440510418e-05,
"loss": 0.1733,
"step": 581
},
{
"epoch": 1.093984962406015,
"grad_norm": 4.277261257171631,
"learning_rate": 9.584771378986995e-05,
"loss": 0.2181,
"step": 582
},
{
"epoch": 1.0958646616541354,
"grad_norm": 3.1522698402404785,
"learning_rate": 9.580258346603267e-05,
"loss": 0.1462,
"step": 583
},
{
"epoch": 1.0977443609022557,
"grad_norm": 1.566369891166687,
"learning_rate": 9.57572237444629e-05,
"loss": 0.0393,
"step": 584
},
{
"epoch": 1.099624060150376,
"grad_norm": 3.8027660846710205,
"learning_rate": 9.57116349376114e-05,
"loss": 0.2449,
"step": 585
},
{
"epoch": 1.1015037593984962,
"grad_norm": 2.550198554992676,
"learning_rate": 9.566581735950695e-05,
"loss": 0.0837,
"step": 586
},
{
"epoch": 1.1033834586466165,
"grad_norm": 3.44278621673584,
"learning_rate": 9.561977132575412e-05,
"loss": 0.1569,
"step": 587
},
{
"epoch": 1.1052631578947367,
"grad_norm": 2.3197524547576904,
"learning_rate": 9.55734971535312e-05,
"loss": 0.0708,
"step": 588
},
{
"epoch": 1.1071428571428572,
"grad_norm": 3.2017619609832764,
"learning_rate": 9.552699516158792e-05,
"loss": 0.1351,
"step": 589
},
{
"epoch": 1.1090225563909775,
"grad_norm": 3.7326323986053467,
"learning_rate": 9.548026567024335e-05,
"loss": 0.244,
"step": 590
},
{
"epoch": 1.1109022556390977,
"grad_norm": 2.7139155864715576,
"learning_rate": 9.543330900138357e-05,
"loss": 0.071,
"step": 591
},
{
"epoch": 1.112781954887218,
"grad_norm": 3.909640073776245,
"learning_rate": 9.53861254784596e-05,
"loss": 0.3121,
"step": 592
},
{
"epoch": 1.1146616541353382,
"grad_norm": 3.1949503421783447,
"learning_rate": 9.533871542648504e-05,
"loss": 0.1441,
"step": 593
},
{
"epoch": 1.1165413533834587,
"grad_norm": 3.353057622909546,
"learning_rate": 9.52910791720339e-05,
"loss": 0.1103,
"step": 594
},
{
"epoch": 1.118421052631579,
"grad_norm": 2.921609401702881,
"learning_rate": 9.524321704323836e-05,
"loss": 0.106,
"step": 595
},
{
"epoch": 1.1203007518796992,
"grad_norm": 2.78655743598938,
"learning_rate": 9.519512936978643e-05,
"loss": 0.0783,
"step": 596
},
{
"epoch": 1.1221804511278195,
"grad_norm": 1.8240987062454224,
"learning_rate": 9.514681648291985e-05,
"loss": 0.0545,
"step": 597
},
{
"epoch": 1.1240601503759398,
"grad_norm": 3.7685468196868896,
"learning_rate": 9.509827871543156e-05,
"loss": 0.1546,
"step": 598
},
{
"epoch": 1.1259398496240602,
"grad_norm": 2.281994581222534,
"learning_rate": 9.504951640166362e-05,
"loss": 0.0715,
"step": 599
},
{
"epoch": 1.1278195488721805,
"grad_norm": 3.044015645980835,
"learning_rate": 9.500052987750481e-05,
"loss": 0.1316,
"step": 600
},
{
"epoch": 1.1278195488721805,
"eval_global_dataset_loss": 1.0268114805221558,
"eval_global_dataset_runtime": 72.9657,
"eval_global_dataset_samples_per_second": 13.321,
"eval_global_dataset_steps_per_second": 0.055,
"eval_sequential_score": 0.9198838375107304,
"eval_sts-test-1024_pearson_cosine": 0.90596708830987,
"eval_sts-test-1024_spearman_cosine": 0.9204009830873694,
"eval_sts-test-1280_pearson_cosine": 0.9059622219277733,
"eval_sts-test-1280_spearman_cosine": 0.9204031758991775,
"eval_sts-test-512_pearson_cosine": 0.904702124198475,
"eval_sts-test-512_spearman_cosine": 0.9198838375107304,
"eval_sts-test-760_pearson_cosine": 0.9044429884523028,
"eval_sts-test-760_spearman_cosine": 0.9200724193262337,
"eval_sts-test_pearson_cosine": 0.9059576370598521,
"eval_sts-test_spearman_cosine": 0.9204111863749668,
"step": 600
},
{
"epoch": 1.1296992481203008,
"grad_norm": 4.599878311157227,
"learning_rate": 9.495131948038836e-05,
"loss": 0.2016,
"step": 601
},
{
"epoch": 1.131578947368421,
"grad_norm": 3.2281079292297363,
"learning_rate": 9.490188554928956e-05,
"loss": 0.1192,
"step": 602
},
{
"epoch": 1.1334586466165413,
"grad_norm": 3.7087974548339844,
"learning_rate": 9.485222842472348e-05,
"loss": 0.1884,
"step": 603
},
{
"epoch": 1.1353383458646618,
"grad_norm": 3.4763455390930176,
"learning_rate": 9.48023484487426e-05,
"loss": 0.1772,
"step": 604
},
{
"epoch": 1.137218045112782,
"grad_norm": 4.881205081939697,
"learning_rate": 9.475224596493453e-05,
"loss": 0.3432,
"step": 605
},
{
"epoch": 1.1390977443609023,
"grad_norm": 2.759403944015503,
"learning_rate": 9.470192131841948e-05,
"loss": 0.0932,
"step": 606
},
{
"epoch": 1.1409774436090225,
"grad_norm": 3.1468870639801025,
"learning_rate": 9.465137485584806e-05,
"loss": 0.135,
"step": 607
},
{
"epoch": 1.1428571428571428,
"grad_norm": 2.657116651535034,
"learning_rate": 9.460060692539875e-05,
"loss": 0.1132,
"step": 608
},
{
"epoch": 1.1447368421052633,
"grad_norm": 2.5455267429351807,
"learning_rate": 9.454961787677563e-05,
"loss": 0.1118,
"step": 609
},
{
"epoch": 1.1466165413533835,
"grad_norm": 3.2456393241882324,
"learning_rate": 9.449840806120584e-05,
"loss": 0.2478,
"step": 610
},
{
"epoch": 1.1484962406015038,
"grad_norm": 4.167994976043701,
"learning_rate": 9.444697783143726e-05,
"loss": 0.2155,
"step": 611
},
{
"epoch": 1.150375939849624,
"grad_norm": 2.268927574157715,
"learning_rate": 9.439532754173608e-05,
"loss": 0.1018,
"step": 612
},
{
"epoch": 1.1522556390977443,
"grad_norm": 3.0976715087890625,
"learning_rate": 9.434345754788421e-05,
"loss": 0.124,
"step": 613
},
{
"epoch": 1.1541353383458646,
"grad_norm": 4.238986492156982,
"learning_rate": 9.42913682071771e-05,
"loss": 0.2199,
"step": 614
},
{
"epoch": 1.156015037593985,
"grad_norm": 2.4929847717285156,
"learning_rate": 9.4239059878421e-05,
"loss": 0.1268,
"step": 615
},
{
"epoch": 1.1578947368421053,
"grad_norm": 2.0077924728393555,
"learning_rate": 9.418653292193069e-05,
"loss": 0.0695,
"step": 616
},
{
"epoch": 1.1597744360902256,
"grad_norm": 3.0697379112243652,
"learning_rate": 9.413378769952685e-05,
"loss": 0.1006,
"step": 617
},
{
"epoch": 1.1616541353383458,
"grad_norm": 3.9670581817626953,
"learning_rate": 9.408082457453371e-05,
"loss": 0.2547,
"step": 618
},
{
"epoch": 1.163533834586466,
"grad_norm": 2.876302719116211,
"learning_rate": 9.402764391177645e-05,
"loss": 0.1062,
"step": 619
},
{
"epoch": 1.1654135338345863,
"grad_norm": 3.397611141204834,
"learning_rate": 9.397424607757868e-05,
"loss": 0.1647,
"step": 620
},
{
"epoch": 1.1672932330827068,
"grad_norm": 3.3144094944000244,
"learning_rate": 9.392063143975999e-05,
"loss": 0.1797,
"step": 621
},
{
"epoch": 1.169172932330827,
"grad_norm": 3.333677053451538,
"learning_rate": 9.386680036763333e-05,
"loss": 0.1754,
"step": 622
},
{
"epoch": 1.1710526315789473,
"grad_norm": 1.7224763631820679,
"learning_rate": 9.38127532320026e-05,
"loss": 0.0471,
"step": 623
},
{
"epoch": 1.1729323308270676,
"grad_norm": 3.2871925830841064,
"learning_rate": 9.375849040515989e-05,
"loss": 0.1537,
"step": 624
},
{
"epoch": 1.1748120300751879,
"grad_norm": 3.3504252433776855,
"learning_rate": 9.370401226088313e-05,
"loss": 0.1531,
"step": 625
},
{
"epoch": 1.1766917293233083,
"grad_norm": 3.614995002746582,
"learning_rate": 9.364931917443335e-05,
"loss": 0.2241,
"step": 626
},
{
"epoch": 1.1785714285714286,
"grad_norm": 2.5657124519348145,
"learning_rate": 9.359441152255224e-05,
"loss": 0.103,
"step": 627
},
{
"epoch": 1.1804511278195489,
"grad_norm": 3.1188507080078125,
"learning_rate": 9.353928968345938e-05,
"loss": 0.1551,
"step": 628
},
{
"epoch": 1.1823308270676691,
"grad_norm": 3.6165919303894043,
"learning_rate": 9.348395403684982e-05,
"loss": 0.319,
"step": 629
},
{
"epoch": 1.1842105263157894,
"grad_norm": 3.6497128009796143,
"learning_rate": 9.342840496389132e-05,
"loss": 0.2347,
"step": 630
},
{
"epoch": 1.1860902255639099,
"grad_norm": 2.848780393600464,
"learning_rate": 9.33726428472218e-05,
"loss": 0.1735,
"step": 631
},
{
"epoch": 1.1879699248120301,
"grad_norm": 1.7372788190841675,
"learning_rate": 9.331666807094671e-05,
"loss": 0.0549,
"step": 632
},
{
"epoch": 1.1898496240601504,
"grad_norm": 3.3109991550445557,
"learning_rate": 9.326048102063631e-05,
"loss": 0.2516,
"step": 633
},
{
"epoch": 1.1917293233082706,
"grad_norm": 2.7588775157928467,
"learning_rate": 9.320408208332313e-05,
"loss": 0.0896,
"step": 634
},
{
"epoch": 1.193609022556391,
"grad_norm": 3.7248873710632324,
"learning_rate": 9.314747164749917e-05,
"loss": 0.1549,
"step": 635
},
{
"epoch": 1.1954887218045114,
"grad_norm": 2.6793620586395264,
"learning_rate": 9.309065010311336e-05,
"loss": 0.1302,
"step": 636
},
{
"epoch": 1.1973684210526316,
"grad_norm": 2.9483072757720947,
"learning_rate": 9.303361784156875e-05,
"loss": 0.1657,
"step": 637
},
{
"epoch": 1.199248120300752,
"grad_norm": 3.268934726715088,
"learning_rate": 9.297637525571989e-05,
"loss": 0.1467,
"step": 638
},
{
"epoch": 1.2011278195488722,
"grad_norm": 3.103567123413086,
"learning_rate": 9.291892273987009e-05,
"loss": 0.1631,
"step": 639
},
{
"epoch": 1.2030075187969924,
"grad_norm": 1.773438811302185,
"learning_rate": 9.286126068976875e-05,
"loss": 0.0765,
"step": 640
},
{
"epoch": 1.204887218045113,
"grad_norm": 2.9956865310668945,
"learning_rate": 9.28033895026086e-05,
"loss": 0.1571,
"step": 641
},
{
"epoch": 1.2067669172932332,
"grad_norm": 3.9764537811279297,
"learning_rate": 9.274530957702295e-05,
"loss": 0.2937,
"step": 642
},
{
"epoch": 1.2086466165413534,
"grad_norm": 3.338249444961548,
"learning_rate": 9.268702131308292e-05,
"loss": 0.1284,
"step": 643
},
{
"epoch": 1.2105263157894737,
"grad_norm": 3.7653377056121826,
"learning_rate": 9.26285251122948e-05,
"loss": 0.2418,
"step": 644
},
{
"epoch": 1.212406015037594,
"grad_norm": 2.9056382179260254,
"learning_rate": 9.256982137759718e-05,
"loss": 0.1306,
"step": 645
},
{
"epoch": 1.2142857142857142,
"grad_norm": 4.180511951446533,
"learning_rate": 9.251091051335816e-05,
"loss": 0.3252,
"step": 646
},
{
"epoch": 1.2161654135338347,
"grad_norm": 1.7085641622543335,
"learning_rate": 9.245179292537267e-05,
"loss": 0.0596,
"step": 647
},
{
"epoch": 1.218045112781955,
"grad_norm": 1.9504470825195312,
"learning_rate": 9.239246902085959e-05,
"loss": 0.0563,
"step": 648
},
{
"epoch": 1.2199248120300752,
"grad_norm": 2.950937509536743,
"learning_rate": 9.233293920845897e-05,
"loss": 0.1782,
"step": 649
},
{
"epoch": 1.2218045112781954,
"grad_norm": 3.335604667663574,
"learning_rate": 9.227320389822918e-05,
"loss": 0.2451,
"step": 650
},
{
"epoch": 1.2218045112781954,
"eval_global_dataset_loss": 1.0016443729400635,
"eval_global_dataset_runtime": 73.7823,
"eval_global_dataset_samples_per_second": 13.174,
"eval_global_dataset_steps_per_second": 0.054,
"eval_sequential_score": 0.9186365751044907,
"eval_sts-test-1024_pearson_cosine": 0.9056774761966145,
"eval_sts-test-1024_spearman_cosine": 0.919202812814876,
"eval_sts-test-1280_pearson_cosine": 0.9056743598981793,
"eval_sts-test-1280_spearman_cosine": 0.9192088989864254,
"eval_sts-test-512_pearson_cosine": 0.9055633055097914,
"eval_sts-test-512_spearman_cosine": 0.9186365751044907,
"eval_sts-test-760_pearson_cosine": 0.9043753472141889,
"eval_sts-test-760_spearman_cosine": 0.9189957934796795,
"eval_sts-test_pearson_cosine": 0.9056699239203394,
"eval_sts-test_spearman_cosine": 0.9191896559440269,
"step": 650
},
{
"epoch": 1.2236842105263157,
"grad_norm": 2.9990556240081787,
"learning_rate": 9.221326350164416e-05,
"loss": 0.1245,
"step": 651
},
{
"epoch": 1.225563909774436,
"grad_norm": 2.6252524852752686,
"learning_rate": 9.215311843159054e-05,
"loss": 0.1188,
"step": 652
},
{
"epoch": 1.2274436090225564,
"grad_norm": 2.2566170692443848,
"learning_rate": 9.209276910236477e-05,
"loss": 0.1251,
"step": 653
},
{
"epoch": 1.2293233082706767,
"grad_norm": 2.2437398433685303,
"learning_rate": 9.20322159296703e-05,
"loss": 0.1049,
"step": 654
},
{
"epoch": 1.231203007518797,
"grad_norm": 1.9391624927520752,
"learning_rate": 9.197145933061477e-05,
"loss": 0.0618,
"step": 655
},
{
"epoch": 1.2330827067669172,
"grad_norm": 2.8038880825042725,
"learning_rate": 9.1910499723707e-05,
"loss": 0.1165,
"step": 656
},
{
"epoch": 1.2349624060150375,
"grad_norm": 2.137781858444214,
"learning_rate": 9.184933752885421e-05,
"loss": 0.107,
"step": 657
},
{
"epoch": 1.236842105263158,
"grad_norm": 3.2800896167755127,
"learning_rate": 9.178797316735915e-05,
"loss": 0.1314,
"step": 658
},
{
"epoch": 1.2387218045112782,
"grad_norm": 2.87821364402771,
"learning_rate": 9.17264070619171e-05,
"loss": 0.1947,
"step": 659
},
{
"epoch": 1.2406015037593985,
"grad_norm": 2.358914375305176,
"learning_rate": 9.166463963661303e-05,
"loss": 0.1028,
"step": 660
},
{
"epoch": 1.2424812030075187,
"grad_norm": 1.8619112968444824,
"learning_rate": 9.160267131691865e-05,
"loss": 0.0645,
"step": 661
},
{
"epoch": 1.244360902255639,
"grad_norm": 3.056086301803589,
"learning_rate": 9.154050252968949e-05,
"loss": 0.1805,
"step": 662
},
{
"epoch": 1.2462406015037595,
"grad_norm": 1.4002981185913086,
"learning_rate": 9.147813370316197e-05,
"loss": 0.0659,
"step": 663
},
{
"epoch": 1.2481203007518797,
"grad_norm": 3.8851730823516846,
"learning_rate": 9.14155652669504e-05,
"loss": 0.2046,
"step": 664
},
{
"epoch": 1.25,
"grad_norm": 3.7416234016418457,
"learning_rate": 9.135279765204414e-05,
"loss": 0.2969,
"step": 665
},
{
"epoch": 1.2518796992481203,
"grad_norm": 3.65122389793396,
"learning_rate": 9.128983129080445e-05,
"loss": 0.1854,
"step": 666
},
{
"epoch": 1.2537593984962405,
"grad_norm": 2.737818717956543,
"learning_rate": 9.122666661696167e-05,
"loss": 0.1481,
"step": 667
},
{
"epoch": 1.255639097744361,
"grad_norm": 2.880852460861206,
"learning_rate": 9.11633040656122e-05,
"loss": 0.1347,
"step": 668
},
{
"epoch": 1.2575187969924813,
"grad_norm": 3.3521029949188232,
"learning_rate": 9.10997440732154e-05,
"loss": 0.1817,
"step": 669
},
{
"epoch": 1.2593984962406015,
"grad_norm": 3.2931160926818848,
"learning_rate": 9.103598707759071e-05,
"loss": 0.2038,
"step": 670
},
{
"epoch": 1.2612781954887218,
"grad_norm": 3.4462192058563232,
"learning_rate": 9.097203351791458e-05,
"loss": 0.1796,
"step": 671
},
{
"epoch": 1.263157894736842,
"grad_norm": 3.3525967597961426,
"learning_rate": 9.090788383471744e-05,
"loss": 0.1607,
"step": 672
},
{
"epoch": 1.2650375939849625,
"grad_norm": 3.104384183883667,
"learning_rate": 9.084353846988069e-05,
"loss": 0.1057,
"step": 673
},
{
"epoch": 1.2669172932330828,
"grad_norm": 3.988388776779175,
"learning_rate": 9.077899786663362e-05,
"loss": 0.2247,
"step": 674
},
{
"epoch": 1.268796992481203,
"grad_norm": 3.816800117492676,
"learning_rate": 9.071426246955038e-05,
"loss": 0.2174,
"step": 675
},
{
"epoch": 1.2706766917293233,
"grad_norm": 2.942406177520752,
"learning_rate": 9.064933272454696e-05,
"loss": 0.1497,
"step": 676
},
{
"epoch": 1.2725563909774436,
"grad_norm": 3.1315743923187256,
"learning_rate": 9.058420907887799e-05,
"loss": 0.141,
"step": 677
},
{
"epoch": 1.274436090225564,
"grad_norm": 3.2112250328063965,
"learning_rate": 9.051889198113384e-05,
"loss": 0.1776,
"step": 678
},
{
"epoch": 1.2763157894736843,
"grad_norm": 3.5193326473236084,
"learning_rate": 9.045338188123735e-05,
"loss": 0.1852,
"step": 679
},
{
"epoch": 1.2781954887218046,
"grad_norm": 3.0619587898254395,
"learning_rate": 9.038767923044087e-05,
"loss": 0.2035,
"step": 680
},
{
"epoch": 1.2800751879699248,
"grad_norm": 2.2244269847869873,
"learning_rate": 9.032178448132307e-05,
"loss": 0.0879,
"step": 681
},
{
"epoch": 1.281954887218045,
"grad_norm": 1.9407459497451782,
"learning_rate": 9.025569808778584e-05,
"loss": 0.0721,
"step": 682
},
{
"epoch": 1.2838345864661656,
"grad_norm": 3.1249163150787354,
"learning_rate": 9.018942050505122e-05,
"loss": 0.1494,
"step": 683
},
{
"epoch": 1.2857142857142856,
"grad_norm": 3.244734287261963,
"learning_rate": 9.012295218965812e-05,
"loss": 0.1561,
"step": 684
},
{
"epoch": 1.287593984962406,
"grad_norm": 2.47184419631958,
"learning_rate": 9.005629359945941e-05,
"loss": 0.1012,
"step": 685
},
{
"epoch": 1.2894736842105263,
"grad_norm": 2.8298754692077637,
"learning_rate": 8.99894451936185e-05,
"loss": 0.1798,
"step": 686
},
{
"epoch": 1.2913533834586466,
"grad_norm": 2.8187429904937744,
"learning_rate": 8.992240743260635e-05,
"loss": 0.1093,
"step": 687
},
{
"epoch": 1.2932330827067668,
"grad_norm": 3.052556037902832,
"learning_rate": 8.985518077819828e-05,
"loss": 0.1436,
"step": 688
},
{
"epoch": 1.295112781954887,
"grad_norm": 1.6897327899932861,
"learning_rate": 8.978776569347073e-05,
"loss": 0.0447,
"step": 689
},
{
"epoch": 1.2969924812030076,
"grad_norm": 2.700467586517334,
"learning_rate": 8.972016264279812e-05,
"loss": 0.1563,
"step": 690
},
{
"epoch": 1.2988721804511278,
"grad_norm": 2.610023021697998,
"learning_rate": 8.96523720918496e-05,
"loss": 0.1066,
"step": 691
},
{
"epoch": 1.300751879699248,
"grad_norm": 3.862847089767456,
"learning_rate": 8.958439450758593e-05,
"loss": 0.235,
"step": 692
},
{
"epoch": 1.3026315789473684,
"grad_norm": 2.830010175704956,
"learning_rate": 8.951623035825615e-05,
"loss": 0.131,
"step": 693
},
{
"epoch": 1.3045112781954886,
"grad_norm": 3.1229724884033203,
"learning_rate": 8.944788011339446e-05,
"loss": 0.1766,
"step": 694
},
{
"epoch": 1.306390977443609,
"grad_norm": 2.7942512035369873,
"learning_rate": 8.937934424381694e-05,
"loss": 0.1615,
"step": 695
},
{
"epoch": 1.3082706766917294,
"grad_norm": 2.758984088897705,
"learning_rate": 8.931062322161823e-05,
"loss": 0.165,
"step": 696
},
{
"epoch": 1.3101503759398496,
"grad_norm": 2.493286371231079,
"learning_rate": 8.924171752016845e-05,
"loss": 0.1941,
"step": 697
},
{
"epoch": 1.3120300751879699,
"grad_norm": 1.32247793674469,
"learning_rate": 8.917262761410983e-05,
"loss": 0.0443,
"step": 698
},
{
"epoch": 1.3139097744360901,
"grad_norm": 2.5577824115753174,
"learning_rate": 8.910335397935336e-05,
"loss": 0.1665,
"step": 699
},
{
"epoch": 1.3157894736842106,
"grad_norm": 2.4263994693756104,
"learning_rate": 8.903389709307575e-05,
"loss": 0.1263,
"step": 700
},
{
"epoch": 1.3157894736842106,
"eval_global_dataset_loss": 0.996315598487854,
"eval_global_dataset_runtime": 73.1026,
"eval_global_dataset_samples_per_second": 13.296,
"eval_global_dataset_steps_per_second": 0.055,
"eval_sequential_score": 0.9168267893425611,
"eval_sts-test-1024_pearson_cosine": 0.9032360441021332,
"eval_sts-test-1024_spearman_cosine": 0.9170557815471008,
"eval_sts-test-1280_pearson_cosine": 0.9032358213968139,
"eval_sts-test-1280_spearman_cosine": 0.9170974002201949,
"eval_sts-test-512_pearson_cosine": 0.90237825051997,
"eval_sts-test-512_spearman_cosine": 0.9168267893425611,
"eval_sts-test-760_pearson_cosine": 0.9017398576147293,
"eval_sts-test-760_spearman_cosine": 0.9162999327422001,
"eval_sts-test_pearson_cosine": 0.9032344072558449,
"eval_sts-test_spearman_cosine": 0.9171278758292024,
"step": 700
},
{
"epoch": 1.3176691729323309,
"grad_norm": 2.2901155948638916,
"learning_rate": 8.896425743371588e-05,
"loss": 0.1022,
"step": 701
},
{
"epoch": 1.3195488721804511,
"grad_norm": 3.3940200805664062,
"learning_rate": 8.88944354809717e-05,
"loss": 0.1656,
"step": 702
},
{
"epoch": 1.3214285714285714,
"grad_norm": 4.018152713775635,
"learning_rate": 8.882443171579677e-05,
"loss": 0.262,
"step": 703
},
{
"epoch": 1.3233082706766917,
"grad_norm": 3.101533889770508,
"learning_rate": 8.87542466203971e-05,
"loss": 0.174,
"step": 704
},
{
"epoch": 1.3251879699248121,
"grad_norm": 2.423062324523926,
"learning_rate": 8.868388067822772e-05,
"loss": 0.0899,
"step": 705
},
{
"epoch": 1.3270676691729324,
"grad_norm": 2.2438297271728516,
"learning_rate": 8.861333437398942e-05,
"loss": 0.0773,
"step": 706
},
{
"epoch": 1.3289473684210527,
"grad_norm": 2.8316147327423096,
"learning_rate": 8.854260819362532e-05,
"loss": 0.1117,
"step": 707
},
{
"epoch": 1.330827067669173,
"grad_norm": 2.189030408859253,
"learning_rate": 8.847170262431763e-05,
"loss": 0.0817,
"step": 708
},
{
"epoch": 1.3327067669172932,
"grad_norm": 2.5064244270324707,
"learning_rate": 8.840061815448418e-05,
"loss": 0.1109,
"step": 709
},
{
"epoch": 1.3345864661654137,
"grad_norm": 2.3013577461242676,
"learning_rate": 8.832935527377518e-05,
"loss": 0.0914,
"step": 710
},
{
"epoch": 1.336466165413534,
"grad_norm": 3.4709525108337402,
"learning_rate": 8.825791447306974e-05,
"loss": 0.2248,
"step": 711
},
{
"epoch": 1.3383458646616542,
"grad_norm": 2.144796133041382,
"learning_rate": 8.81862962444726e-05,
"loss": 0.1061,
"step": 712
},
{
"epoch": 1.3402255639097744,
"grad_norm": 3.4525840282440186,
"learning_rate": 8.811450108131059e-05,
"loss": 0.2445,
"step": 713
},
{
"epoch": 1.3421052631578947,
"grad_norm": 2.398047685623169,
"learning_rate": 8.804252947812936e-05,
"loss": 0.1304,
"step": 714
},
{
"epoch": 1.3439849624060152,
"grad_norm": 1.2655713558197021,
"learning_rate": 8.797038193068994e-05,
"loss": 0.0253,
"step": 715
},
{
"epoch": 1.3458646616541352,
"grad_norm": 3.264692783355713,
"learning_rate": 8.78980589359653e-05,
"loss": 0.2007,
"step": 716
},
{
"epoch": 1.3477443609022557,
"grad_norm": 1.141622543334961,
"learning_rate": 8.782556099213692e-05,
"loss": 0.0736,
"step": 717
},
{
"epoch": 1.349624060150376,
"grad_norm": 3.286989688873291,
"learning_rate": 8.775288859859142e-05,
"loss": 0.166,
"step": 718
},
{
"epoch": 1.3515037593984962,
"grad_norm": 3.4484078884124756,
"learning_rate": 8.768004225591704e-05,
"loss": 0.1696,
"step": 719
},
{
"epoch": 1.3533834586466165,
"grad_norm": 2.700953960418701,
"learning_rate": 8.760702246590026e-05,
"loss": 0.1204,
"step": 720
},
{
"epoch": 1.3552631578947367,
"grad_norm": 1.3329017162322998,
"learning_rate": 8.753382973152233e-05,
"loss": 0.0349,
"step": 721
},
{
"epoch": 1.3571428571428572,
"grad_norm": 3.4619643688201904,
"learning_rate": 8.746046455695572e-05,
"loss": 0.2092,
"step": 722
},
{
"epoch": 1.3590225563909775,
"grad_norm": 3.6183106899261475,
"learning_rate": 8.73869274475608e-05,
"loss": 0.2843,
"step": 723
},
{
"epoch": 1.3609022556390977,
"grad_norm": 2.5243875980377197,
"learning_rate": 8.731321890988223e-05,
"loss": 0.0976,
"step": 724
},
{
"epoch": 1.362781954887218,
"grad_norm": 2.034864664077759,
"learning_rate": 8.723933945164553e-05,
"loss": 0.0867,
"step": 725
},
{
"epoch": 1.3646616541353382,
"grad_norm": 2.596176862716675,
"learning_rate": 8.716528958175354e-05,
"loss": 0.1035,
"step": 726
},
{
"epoch": 1.3665413533834587,
"grad_norm": 2.702852487564087,
"learning_rate": 8.709106981028301e-05,
"loss": 0.1341,
"step": 727
},
{
"epoch": 1.368421052631579,
"grad_norm": 3.1285736560821533,
"learning_rate": 8.701668064848096e-05,
"loss": 0.1771,
"step": 728
},
{
"epoch": 1.3703007518796992,
"grad_norm": 3.2308318614959717,
"learning_rate": 8.694212260876125e-05,
"loss": 0.1118,
"step": 729
},
{
"epoch": 1.3721804511278195,
"grad_norm": 3.1482093334198,
"learning_rate": 8.686739620470099e-05,
"loss": 0.1495,
"step": 730
},
{
"epoch": 1.3740601503759398,
"grad_norm": 2.186100721359253,
"learning_rate": 8.679250195103707e-05,
"loss": 0.0727,
"step": 731
},
{
"epoch": 1.3759398496240602,
"grad_norm": 2.4259281158447266,
"learning_rate": 8.671744036366256e-05,
"loss": 0.0749,
"step": 732
},
{
"epoch": 1.3778195488721805,
"grad_norm": 2.370572566986084,
"learning_rate": 8.664221195962318e-05,
"loss": 0.0647,
"step": 733
},
{
"epoch": 1.3796992481203008,
"grad_norm": 2.1155412197113037,
"learning_rate": 8.656681725711369e-05,
"loss": 0.0729,
"step": 734
},
{
"epoch": 1.381578947368421,
"grad_norm": 2.4955532550811768,
"learning_rate": 8.649125677547441e-05,
"loss": 0.0743,
"step": 735
},
{
"epoch": 1.3834586466165413,
"grad_norm": 3.299919605255127,
"learning_rate": 8.641553103518759e-05,
"loss": 0.1728,
"step": 736
},
{
"epoch": 1.3853383458646618,
"grad_norm": 3.5957415103912354,
"learning_rate": 8.633964055787381e-05,
"loss": 0.2017,
"step": 737
},
{
"epoch": 1.387218045112782,
"grad_norm": 3.4752085208892822,
"learning_rate": 8.62635858662884e-05,
"loss": 0.2164,
"step": 738
},
{
"epoch": 1.3890977443609023,
"grad_norm": 2.705174207687378,
"learning_rate": 8.618736748431786e-05,
"loss": 0.1107,
"step": 739
},
{
"epoch": 1.3909774436090225,
"grad_norm": 1.9250751733779907,
"learning_rate": 8.611098593697624e-05,
"loss": 0.0747,
"step": 740
},
{
"epoch": 1.3928571428571428,
"grad_norm": 3.185225248336792,
"learning_rate": 8.603444175040151e-05,
"loss": 0.1181,
"step": 741
},
{
"epoch": 1.3947368421052633,
"grad_norm": 3.068506956100464,
"learning_rate": 8.595773545185196e-05,
"loss": 0.1958,
"step": 742
},
{
"epoch": 1.3966165413533835,
"grad_norm": 4.703313827514648,
"learning_rate": 8.588086756970252e-05,
"loss": 0.3773,
"step": 743
},
{
"epoch": 1.3984962406015038,
"grad_norm": 3.395601511001587,
"learning_rate": 8.580383863344118e-05,
"loss": 0.1545,
"step": 744
},
{
"epoch": 1.400375939849624,
"grad_norm": 2.7103271484375,
"learning_rate": 8.572664917366534e-05,
"loss": 0.1117,
"step": 745
},
{
"epoch": 1.4022556390977443,
"grad_norm": 1.5318002700805664,
"learning_rate": 8.564929972207808e-05,
"loss": 0.0535,
"step": 746
},
{
"epoch": 1.4041353383458648,
"grad_norm": 3.2796318531036377,
"learning_rate": 8.557179081148459e-05,
"loss": 0.1782,
"step": 747
},
{
"epoch": 1.4060150375939848,
"grad_norm": 4.089963436126709,
"learning_rate": 8.549412297578841e-05,
"loss": 0.2679,
"step": 748
},
{
"epoch": 1.4078947368421053,
"grad_norm": 2.2563154697418213,
"learning_rate": 8.541629674998787e-05,
"loss": 0.063,
"step": 749
},
{
"epoch": 1.4097744360902256,
"grad_norm": 4.269903182983398,
"learning_rate": 8.533831267017232e-05,
"loss": 0.2878,
"step": 750
},
{
"epoch": 1.4097744360902256,
"eval_global_dataset_loss": 1.0132509469985962,
"eval_global_dataset_runtime": 72.6207,
"eval_global_dataset_samples_per_second": 13.385,
"eval_global_dataset_steps_per_second": 0.055,
"eval_sequential_score": 0.9202925060297567,
"eval_sts-test-1024_pearson_cosine": 0.9026730374123166,
"eval_sts-test-1024_spearman_cosine": 0.9203161346957712,
"eval_sts-test-1280_pearson_cosine": 0.9026700561671179,
"eval_sts-test-1280_spearman_cosine": 0.9203345274642032,
"eval_sts-test-512_pearson_cosine": 0.9028502216142966,
"eval_sts-test-512_spearman_cosine": 0.9202925060297567,
"eval_sts-test-760_pearson_cosine": 0.9010905085003336,
"eval_sts-test-760_spearman_cosine": 0.9196744016074186,
"eval_sts-test_pearson_cosine": 0.9026655939239919,
"eval_sts-test_spearman_cosine": 0.920319401537853,
"step": 750
},
{
"epoch": 1.4116541353383458,
"grad_norm": 3.073023796081543,
"learning_rate": 8.526017127351838e-05,
"loss": 0.134,
"step": 751
},
{
"epoch": 1.413533834586466,
"grad_norm": 2.0361175537109375,
"learning_rate": 8.518187309828641e-05,
"loss": 0.0676,
"step": 752
},
{
"epoch": 1.4154135338345863,
"grad_norm": 1.954580545425415,
"learning_rate": 8.510341868381665e-05,
"loss": 0.0841,
"step": 753
},
{
"epoch": 1.4172932330827068,
"grad_norm": 2.2092957496643066,
"learning_rate": 8.502480857052559e-05,
"loss": 0.1125,
"step": 754
},
{
"epoch": 1.419172932330827,
"grad_norm": 2.514190673828125,
"learning_rate": 8.49460432999022e-05,
"loss": 0.0927,
"step": 755
},
{
"epoch": 1.4210526315789473,
"grad_norm": 2.4373466968536377,
"learning_rate": 8.486712341450417e-05,
"loss": 0.0954,
"step": 756
},
{
"epoch": 1.4229323308270676,
"grad_norm": 3.186216354370117,
"learning_rate": 8.478804945795435e-05,
"loss": 0.1168,
"step": 757
},
{
"epoch": 1.4248120300751879,
"grad_norm": 2.6759350299835205,
"learning_rate": 8.470882197493675e-05,
"loss": 0.1191,
"step": 758
},
{
"epoch": 1.4266917293233083,
"grad_norm": 3.011976718902588,
"learning_rate": 8.4629441511193e-05,
"loss": 0.1219,
"step": 759
},
{
"epoch": 1.4285714285714286,
"grad_norm": 1.6346877813339233,
"learning_rate": 8.454990861351843e-05,
"loss": 0.0334,
"step": 760
},
{
"epoch": 1.4304511278195489,
"grad_norm": 2.6164300441741943,
"learning_rate": 8.447022382975843e-05,
"loss": 0.0886,
"step": 761
},
{
"epoch": 1.4323308270676691,
"grad_norm": 3.3143882751464844,
"learning_rate": 8.439038770880463e-05,
"loss": 0.1105,
"step": 762
},
{
"epoch": 1.4342105263157894,
"grad_norm": 0.7730489373207092,
"learning_rate": 8.431040080059108e-05,
"loss": 0.0174,
"step": 763
},
{
"epoch": 1.4360902255639099,
"grad_norm": 3.0229296684265137,
"learning_rate": 8.423026365609049e-05,
"loss": 0.1044,
"step": 764
},
{
"epoch": 1.4379699248120301,
"grad_norm": 2.514319896697998,
"learning_rate": 8.41499768273105e-05,
"loss": 0.0946,
"step": 765
},
{
"epoch": 1.4398496240601504,
"grad_norm": 3.089364528656006,
"learning_rate": 8.406954086728976e-05,
"loss": 0.1212,
"step": 766
},
{
"epoch": 1.4417293233082706,
"grad_norm": 3.5201704502105713,
"learning_rate": 8.39889563300942e-05,
"loss": 0.15,
"step": 767
},
{
"epoch": 1.443609022556391,
"grad_norm": 2.829073667526245,
"learning_rate": 8.39082237708132e-05,
"loss": 0.1287,
"step": 768
},
{
"epoch": 1.4454887218045114,
"grad_norm": 4.4007954597473145,
"learning_rate": 8.382734374555574e-05,
"loss": 0.3371,
"step": 769
},
{
"epoch": 1.4473684210526316,
"grad_norm": 3.454887866973877,
"learning_rate": 8.37463168114466e-05,
"loss": 0.1933,
"step": 770
},
{
"epoch": 1.449248120300752,
"grad_norm": 1.4952677488327026,
"learning_rate": 8.366514352662252e-05,
"loss": 0.0267,
"step": 771
},
{
"epoch": 1.4511278195488722,
"grad_norm": 2.9622116088867188,
"learning_rate": 8.358382445022829e-05,
"loss": 0.118,
"step": 772
},
{
"epoch": 1.4530075187969924,
"grad_norm": 2.047785758972168,
"learning_rate": 8.350236014241304e-05,
"loss": 0.048,
"step": 773
},
{
"epoch": 1.454887218045113,
"grad_norm": 3.2718257904052734,
"learning_rate": 8.342075116432625e-05,
"loss": 0.1522,
"step": 774
},
{
"epoch": 1.4567669172932332,
"grad_norm": 4.418330192565918,
"learning_rate": 8.333899807811391e-05,
"loss": 0.24,
"step": 775
},
{
"epoch": 1.4586466165413534,
"grad_norm": 3.0729095935821533,
"learning_rate": 8.325710144691472e-05,
"loss": 0.1502,
"step": 776
},
{
"epoch": 1.4605263157894737,
"grad_norm": 2.873767614364624,
"learning_rate": 8.31750618348561e-05,
"loss": 0.1437,
"step": 777
},
{
"epoch": 1.462406015037594,
"grad_norm": 3.250378131866455,
"learning_rate": 8.309287980705043e-05,
"loss": 0.1875,
"step": 778
},
{
"epoch": 1.4642857142857144,
"grad_norm": 3.683256149291992,
"learning_rate": 8.301055592959101e-05,
"loss": 0.2667,
"step": 779
},
{
"epoch": 1.4661654135338344,
"grad_norm": 2.6825520992279053,
"learning_rate": 8.292809076954832e-05,
"loss": 0.1094,
"step": 780
},
{
"epoch": 1.468045112781955,
"grad_norm": 3.586061477661133,
"learning_rate": 8.284548489496599e-05,
"loss": 0.2173,
"step": 781
},
{
"epoch": 1.4699248120300752,
"grad_norm": 2.057910919189453,
"learning_rate": 8.276273887485693e-05,
"loss": 0.0855,
"step": 782
},
{
"epoch": 1.4718045112781954,
"grad_norm": 2.88437557220459,
"learning_rate": 8.267985327919943e-05,
"loss": 0.1287,
"step": 783
},
{
"epoch": 1.4736842105263157,
"grad_norm": 2.481588125228882,
"learning_rate": 8.259682867893322e-05,
"loss": 0.1129,
"step": 784
},
{
"epoch": 1.475563909774436,
"grad_norm": 2.7012627124786377,
"learning_rate": 8.251366564595551e-05,
"loss": 0.0758,
"step": 785
},
{
"epoch": 1.4774436090225564,
"grad_norm": 3.422213554382324,
"learning_rate": 8.24303647531171e-05,
"loss": 0.2099,
"step": 786
},
{
"epoch": 1.4793233082706767,
"grad_norm": 4.288575649261475,
"learning_rate": 8.234692657421838e-05,
"loss": 0.2751,
"step": 787
},
{
"epoch": 1.481203007518797,
"grad_norm": 3.3151443004608154,
"learning_rate": 8.226335168400547e-05,
"loss": 0.1354,
"step": 788
},
{
"epoch": 1.4830827067669172,
"grad_norm": 1.8601645231246948,
"learning_rate": 8.217964065816609e-05,
"loss": 0.058,
"step": 789
},
{
"epoch": 1.4849624060150375,
"grad_norm": 2.664788246154785,
"learning_rate": 8.209579407332578e-05,
"loss": 0.1121,
"step": 790
},
{
"epoch": 1.486842105263158,
"grad_norm": 3.37274432182312,
"learning_rate": 8.201181250704382e-05,
"loss": 0.2309,
"step": 791
},
{
"epoch": 1.4887218045112782,
"grad_norm": 2.6739940643310547,
"learning_rate": 8.192769653780931e-05,
"loss": 0.1206,
"step": 792
},
{
"epoch": 1.4906015037593985,
"grad_norm": 2.947044849395752,
"learning_rate": 8.184344674503716e-05,
"loss": 0.1848,
"step": 793
},
{
"epoch": 1.4924812030075187,
"grad_norm": 1.8714139461517334,
"learning_rate": 8.175906370906401e-05,
"loss": 0.1164,
"step": 794
},
{
"epoch": 1.494360902255639,
"grad_norm": 2.4230494499206543,
"learning_rate": 8.167454801114442e-05,
"loss": 0.1058,
"step": 795
},
{
"epoch": 1.4962406015037595,
"grad_norm": 2.2121522426605225,
"learning_rate": 8.15899002334467e-05,
"loss": 0.0676,
"step": 796
},
{
"epoch": 1.4981203007518797,
"grad_norm": 2.338106632232666,
"learning_rate": 8.150512095904901e-05,
"loss": 0.0922,
"step": 797
},
{
"epoch": 1.5,
"grad_norm": 2.665654420852661,
"learning_rate": 8.142021077193524e-05,
"loss": 0.1241,
"step": 798
},
{
"epoch": 1.5018796992481203,
"grad_norm": 2.739638566970825,
"learning_rate": 8.13351702569911e-05,
"loss": 0.1232,
"step": 799
},
{
"epoch": 1.5037593984962405,
"grad_norm": 2.2335495948791504,
"learning_rate": 8.125000000000001e-05,
"loss": 0.0644,
"step": 800
},
{
"epoch": 1.5037593984962405,
"eval_global_dataset_loss": 1.0063103437423706,
"eval_global_dataset_runtime": 74.1382,
"eval_global_dataset_samples_per_second": 13.111,
"eval_global_dataset_steps_per_second": 0.054,
"eval_sequential_score": 0.9225955849460038,
"eval_sts-test-1024_pearson_cosine": 0.9052989812166632,
"eval_sts-test-1024_spearman_cosine": 0.9221961799380853,
"eval_sts-test-1280_pearson_cosine": 0.9052941365370162,
"eval_sts-test-1280_spearman_cosine": 0.9222044589214429,
"eval_sts-test-512_pearson_cosine": 0.9048916374185606,
"eval_sts-test-512_spearman_cosine": 0.9225955849460038,
"eval_sts-test-760_pearson_cosine": 0.9038999523705149,
"eval_sts-test-760_spearman_cosine": 0.9219881313238769,
"eval_sts-test_pearson_cosine": 0.9052886258574522,
"eval_sts-test_spearman_cosine": 0.922200610312963,
"step": 800
},
{
"epoch": 1.505639097744361,
"grad_norm": 3.764857769012451,
"learning_rate": 8.116470058763909e-05,
"loss": 0.2569,
"step": 801
},
{
"epoch": 1.5075187969924813,
"grad_norm": 0.6914541125297546,
"learning_rate": 8.10792726074751e-05,
"loss": 0.0157,
"step": 802
},
{
"epoch": 1.5093984962406015,
"grad_norm": 2.945553779602051,
"learning_rate": 8.099371664796047e-05,
"loss": 0.1063,
"step": 803
},
{
"epoch": 1.5112781954887218,
"grad_norm": 2.2534825801849365,
"learning_rate": 8.090803329842914e-05,
"loss": 0.1023,
"step": 804
},
{
"epoch": 1.513157894736842,
"grad_norm": 3.933741331100464,
"learning_rate": 8.082222314909253e-05,
"loss": 0.2308,
"step": 805
},
{
"epoch": 1.5150375939849625,
"grad_norm": 3.690402030944824,
"learning_rate": 8.073628679103555e-05,
"loss": 0.2341,
"step": 806
},
{
"epoch": 1.5169172932330826,
"grad_norm": 3.517522096633911,
"learning_rate": 8.065022481621248e-05,
"loss": 0.2281,
"step": 807
},
{
"epoch": 1.518796992481203,
"grad_norm": 2.9169869422912598,
"learning_rate": 8.056403781744278e-05,
"loss": 0.1646,
"step": 808
},
{
"epoch": 1.5206766917293233,
"grad_norm": 2.823941230773926,
"learning_rate": 8.047772638840721e-05,
"loss": 0.1352,
"step": 809
},
{
"epoch": 1.5225563909774436,
"grad_norm": 2.6774966716766357,
"learning_rate": 8.039129112364363e-05,
"loss": 0.0896,
"step": 810
},
{
"epoch": 1.524436090225564,
"grad_norm": 3.2963550090789795,
"learning_rate": 8.030473261854288e-05,
"loss": 0.173,
"step": 811
},
{
"epoch": 1.526315789473684,
"grad_norm": 4.204916954040527,
"learning_rate": 8.021805146934473e-05,
"loss": 0.2946,
"step": 812
},
{
"epoch": 1.5281954887218046,
"grad_norm": 1.9068849086761475,
"learning_rate": 8.013124827313379e-05,
"loss": 0.0928,
"step": 813
},
{
"epoch": 1.5300751879699248,
"grad_norm": 1.860642433166504,
"learning_rate": 8.00443236278353e-05,
"loss": 0.1008,
"step": 814
},
{
"epoch": 1.531954887218045,
"grad_norm": 3.6413190364837646,
"learning_rate": 7.995727813221113e-05,
"loss": 0.2301,
"step": 815
},
{
"epoch": 1.5338345864661656,
"grad_norm": 1.9489543437957764,
"learning_rate": 7.987011238585555e-05,
"loss": 0.0953,
"step": 816
},
{
"epoch": 1.5357142857142856,
"grad_norm": 2.9605321884155273,
"learning_rate": 7.978282698919125e-05,
"loss": 0.1275,
"step": 817
},
{
"epoch": 1.537593984962406,
"grad_norm": 3.240093469619751,
"learning_rate": 7.969542254346498e-05,
"loss": 0.167,
"step": 818
},
{
"epoch": 1.5394736842105263,
"grad_norm": 3.018465518951416,
"learning_rate": 7.960789965074362e-05,
"loss": 0.1617,
"step": 819
},
{
"epoch": 1.5413533834586466,
"grad_norm": 2.4230682849884033,
"learning_rate": 7.952025891390991e-05,
"loss": 0.1428,
"step": 820
},
{
"epoch": 1.543233082706767,
"grad_norm": 3.2175021171569824,
"learning_rate": 7.943250093665835e-05,
"loss": 0.1515,
"step": 821
},
{
"epoch": 1.545112781954887,
"grad_norm": 2.267045259475708,
"learning_rate": 7.934462632349103e-05,
"loss": 0.0876,
"step": 822
},
{
"epoch": 1.5469924812030076,
"grad_norm": 3.216639995574951,
"learning_rate": 7.925663567971345e-05,
"loss": 0.1386,
"step": 823
},
{
"epoch": 1.5488721804511278,
"grad_norm": 4.30193567276001,
"learning_rate": 7.916852961143035e-05,
"loss": 0.35,
"step": 824
},
{
"epoch": 1.550751879699248,
"grad_norm": 3.556504964828491,
"learning_rate": 7.908030872554155e-05,
"loss": 0.1543,
"step": 825
},
{
"epoch": 1.5526315789473686,
"grad_norm": 3.369008779525757,
"learning_rate": 7.899197362973782e-05,
"loss": 0.1913,
"step": 826
},
{
"epoch": 1.5545112781954886,
"grad_norm": 2.986630916595459,
"learning_rate": 7.890352493249656e-05,
"loss": 0.1459,
"step": 827
},
{
"epoch": 1.556390977443609,
"grad_norm": 2.2407569885253906,
"learning_rate": 7.881496324307776e-05,
"loss": 0.0726,
"step": 828
},
{
"epoch": 1.5582706766917294,
"grad_norm": 2.5670650005340576,
"learning_rate": 7.872628917151967e-05,
"loss": 0.1532,
"step": 829
},
{
"epoch": 1.5601503759398496,
"grad_norm": 3.387564182281494,
"learning_rate": 7.863750332863468e-05,
"loss": 0.2685,
"step": 830
},
{
"epoch": 1.5620300751879699,
"grad_norm": 2.2817680835723877,
"learning_rate": 7.854860632600517e-05,
"loss": 0.0786,
"step": 831
},
{
"epoch": 1.5639097744360901,
"grad_norm": 2.6481478214263916,
"learning_rate": 7.845959877597908e-05,
"loss": 0.1155,
"step": 832
},
{
"epoch": 1.5657894736842106,
"grad_norm": 2.793203592300415,
"learning_rate": 7.837048129166594e-05,
"loss": 0.0849,
"step": 833
},
{
"epoch": 1.5676691729323309,
"grad_norm": 1.97296142578125,
"learning_rate": 7.82812544869325e-05,
"loss": 0.0519,
"step": 834
},
{
"epoch": 1.5695488721804511,
"grad_norm": 3.4556632041931152,
"learning_rate": 7.819191897639854e-05,
"loss": 0.1754,
"step": 835
},
{
"epoch": 1.5714285714285714,
"grad_norm": 3.4186487197875977,
"learning_rate": 7.810247537543265e-05,
"loss": 0.1959,
"step": 836
},
{
"epoch": 1.5733082706766917,
"grad_norm": 3.430490255355835,
"learning_rate": 7.801292430014797e-05,
"loss": 0.1705,
"step": 837
},
{
"epoch": 1.5751879699248121,
"grad_norm": 3.6400420665740967,
"learning_rate": 7.792326636739795e-05,
"loss": 0.2029,
"step": 838
},
{
"epoch": 1.5770676691729322,
"grad_norm": 2.497185707092285,
"learning_rate": 7.783350219477208e-05,
"loss": 0.0788,
"step": 839
},
{
"epoch": 1.5789473684210527,
"grad_norm": 2.136199951171875,
"learning_rate": 7.774363240059174e-05,
"loss": 0.0554,
"step": 840
},
{
"epoch": 1.580827067669173,
"grad_norm": 2.642343521118164,
"learning_rate": 7.765365760390577e-05,
"loss": 0.1014,
"step": 841
},
{
"epoch": 1.5827067669172932,
"grad_norm": 3.2328081130981445,
"learning_rate": 7.756357842448636e-05,
"loss": 0.1713,
"step": 842
},
{
"epoch": 1.5845864661654137,
"grad_norm": 2.859098434448242,
"learning_rate": 7.747339548282469e-05,
"loss": 0.163,
"step": 843
},
{
"epoch": 1.5864661654135337,
"grad_norm": 1.999979019165039,
"learning_rate": 7.738310940012668e-05,
"loss": 0.0694,
"step": 844
},
{
"epoch": 1.5883458646616542,
"grad_norm": 1.6762597560882568,
"learning_rate": 7.729272079830874e-05,
"loss": 0.0914,
"step": 845
},
{
"epoch": 1.5902255639097744,
"grad_norm": 3.4642903804779053,
"learning_rate": 7.720223029999343e-05,
"loss": 0.2394,
"step": 846
},
{
"epoch": 1.5921052631578947,
"grad_norm": 2.8646364212036133,
"learning_rate": 7.711163852850523e-05,
"loss": 0.1341,
"step": 847
},
{
"epoch": 1.5939849624060152,
"grad_norm": 2.863694190979004,
"learning_rate": 7.702094610786621e-05,
"loss": 0.1077,
"step": 848
},
{
"epoch": 1.5958646616541352,
"grad_norm": 3.843398094177246,
"learning_rate": 7.693015366279173e-05,
"loss": 0.2587,
"step": 849
},
{
"epoch": 1.5977443609022557,
"grad_norm": 2.592925548553467,
"learning_rate": 7.683926181868613e-05,
"loss": 0.1046,
"step": 850
},
{
"epoch": 1.5977443609022557,
"eval_global_dataset_loss": 0.9889132976531982,
"eval_global_dataset_runtime": 73.9652,
"eval_global_dataset_samples_per_second": 13.141,
"eval_global_dataset_steps_per_second": 0.054,
"eval_sequential_score": 0.920544992646527,
"eval_sts-test-1024_pearson_cosine": 0.9048893863916063,
"eval_sts-test-1024_spearman_cosine": 0.9210602139189262,
"eval_sts-test-1280_pearson_cosine": 0.9048860398282079,
"eval_sts-test-1280_spearman_cosine": 0.9210629437458709,
"eval_sts-test-512_pearson_cosine": 0.9043480001505791,
"eval_sts-test-512_spearman_cosine": 0.920544992646527,
"eval_sts-test-760_pearson_cosine": 0.903769561880325,
"eval_sts-test-760_spearman_cosine": 0.9211490899240494,
"eval_sts-test_pearson_cosine": 0.9048811346013073,
"eval_sts-test_spearman_cosine": 0.9210440139623024,
"step": 850
},
{
"epoch": 1.599624060150376,
"grad_norm": 2.807727813720703,
"learning_rate": 7.67482712016385e-05,
"loss": 0.1489,
"step": 851
},
{
"epoch": 1.6015037593984962,
"grad_norm": 2.7714085578918457,
"learning_rate": 7.665718243841826e-05,
"loss": 0.1306,
"step": 852
},
{
"epoch": 1.6033834586466167,
"grad_norm": 2.3455610275268555,
"learning_rate": 7.656599615647089e-05,
"loss": 0.0815,
"step": 853
},
{
"epoch": 1.6052631578947367,
"grad_norm": 2.8961308002471924,
"learning_rate": 7.647471298391362e-05,
"loss": 0.1799,
"step": 854
},
{
"epoch": 1.6071428571428572,
"grad_norm": 2.836217164993286,
"learning_rate": 7.638333354953108e-05,
"loss": 0.1138,
"step": 855
},
{
"epoch": 1.6090225563909775,
"grad_norm": 1.5786960124969482,
"learning_rate": 7.6291858482771e-05,
"loss": 0.042,
"step": 856
},
{
"epoch": 1.6109022556390977,
"grad_norm": 1.3835277557373047,
"learning_rate": 7.620028841373983e-05,
"loss": 0.0387,
"step": 857
},
{
"epoch": 1.6127819548872182,
"grad_norm": 2.7180635929107666,
"learning_rate": 7.61086239731984e-05,
"loss": 0.0929,
"step": 858
},
{
"epoch": 1.6146616541353382,
"grad_norm": 1.880897045135498,
"learning_rate": 7.601686579255768e-05,
"loss": 0.0656,
"step": 859
},
{
"epoch": 1.6165413533834587,
"grad_norm": 2.802232503890991,
"learning_rate": 7.592501450387425e-05,
"loss": 0.108,
"step": 860
},
{
"epoch": 1.618421052631579,
"grad_norm": 3.5828518867492676,
"learning_rate": 7.583307073984613e-05,
"loss": 0.2153,
"step": 861
},
{
"epoch": 1.6203007518796992,
"grad_norm": 3.0358164310455322,
"learning_rate": 7.574103513380826e-05,
"loss": 0.1542,
"step": 862
},
{
"epoch": 1.6221804511278195,
"grad_norm": 3.075732946395874,
"learning_rate": 7.56489083197283e-05,
"loss": 0.1405,
"step": 863
},
{
"epoch": 1.6240601503759398,
"grad_norm": 2.816676378250122,
"learning_rate": 7.555669093220208e-05,
"loss": 0.1154,
"step": 864
},
{
"epoch": 1.6259398496240602,
"grad_norm": 2.9650542736053467,
"learning_rate": 7.546438360644942e-05,
"loss": 0.116,
"step": 865
},
{
"epoch": 1.6278195488721805,
"grad_norm": 3.609433650970459,
"learning_rate": 7.537198697830958e-05,
"loss": 0.2381,
"step": 866
},
{
"epoch": 1.6296992481203008,
"grad_norm": 4.428701877593994,
"learning_rate": 7.527950168423704e-05,
"loss": 0.2811,
"step": 867
},
{
"epoch": 1.631578947368421,
"grad_norm": 2.2774205207824707,
"learning_rate": 7.518692836129693e-05,
"loss": 0.0911,
"step": 868
},
{
"epoch": 1.6334586466165413,
"grad_norm": 2.677185535430908,
"learning_rate": 7.509426764716086e-05,
"loss": 0.1305,
"step": 869
},
{
"epoch": 1.6353383458646618,
"grad_norm": 2.101545572280884,
"learning_rate": 7.500152018010233e-05,
"loss": 0.0658,
"step": 870
},
{
"epoch": 1.6372180451127818,
"grad_norm": 3.0622670650482178,
"learning_rate": 7.49086865989925e-05,
"loss": 0.1623,
"step": 871
},
{
"epoch": 1.6390977443609023,
"grad_norm": 1.6130925416946411,
"learning_rate": 7.481576754329564e-05,
"loss": 0.0453,
"step": 872
},
{
"epoch": 1.6409774436090225,
"grad_norm": 2.1795146465301514,
"learning_rate": 7.472276365306481e-05,
"loss": 0.1334,
"step": 873
},
{
"epoch": 1.6428571428571428,
"grad_norm": 2.377540349960327,
"learning_rate": 7.462967556893743e-05,
"loss": 0.0564,
"step": 874
},
{
"epoch": 1.6447368421052633,
"grad_norm": 2.5608479976654053,
"learning_rate": 7.453650393213092e-05,
"loss": 0.1334,
"step": 875
},
{
"epoch": 1.6466165413533833,
"grad_norm": 3.679516553878784,
"learning_rate": 7.444324938443817e-05,
"loss": 0.1552,
"step": 876
},
{
"epoch": 1.6484962406015038,
"grad_norm": 2.912466049194336,
"learning_rate": 7.434991256822323e-05,
"loss": 0.1236,
"step": 877
},
{
"epoch": 1.650375939849624,
"grad_norm": 3.505437135696411,
"learning_rate": 7.425649412641681e-05,
"loss": 0.175,
"step": 878
},
{
"epoch": 1.6522556390977443,
"grad_norm": 4.615713119506836,
"learning_rate": 7.416299470251187e-05,
"loss": 0.2705,
"step": 879
},
{
"epoch": 1.6541353383458648,
"grad_norm": 3.014660120010376,
"learning_rate": 7.406941494055927e-05,
"loss": 0.1821,
"step": 880
},
{
"epoch": 1.6560150375939848,
"grad_norm": 2.3996410369873047,
"learning_rate": 7.397575548516317e-05,
"loss": 0.1217,
"step": 881
},
{
"epoch": 1.6578947368421053,
"grad_norm": 0.7946998476982117,
"learning_rate": 7.388201698147674e-05,
"loss": 0.0191,
"step": 882
},
{
"epoch": 1.6597744360902256,
"grad_norm": 3.1606884002685547,
"learning_rate": 7.378820007519763e-05,
"loss": 0.1562,
"step": 883
},
{
"epoch": 1.6616541353383458,
"grad_norm": 2.4829394817352295,
"learning_rate": 7.369430541256354e-05,
"loss": 0.1229,
"step": 884
},
{
"epoch": 1.6635338345864663,
"grad_norm": 4.674249649047852,
"learning_rate": 7.360033364034785e-05,
"loss": 0.4534,
"step": 885
},
{
"epoch": 1.6654135338345863,
"grad_norm": 2.6263651847839355,
"learning_rate": 7.350628540585501e-05,
"loss": 0.107,
"step": 886
},
{
"epoch": 1.6672932330827068,
"grad_norm": 1.9486063718795776,
"learning_rate": 7.341216135691617e-05,
"loss": 0.0396,
"step": 887
},
{
"epoch": 1.669172932330827,
"grad_norm": 2.4839587211608887,
"learning_rate": 7.331796214188478e-05,
"loss": 0.0831,
"step": 888
},
{
"epoch": 1.6710526315789473,
"grad_norm": 4.256960391998291,
"learning_rate": 7.322368840963198e-05,
"loss": 0.2312,
"step": 889
},
{
"epoch": 1.6729323308270678,
"grad_norm": 2.9588348865509033,
"learning_rate": 7.312934080954225e-05,
"loss": 0.107,
"step": 890
},
{
"epoch": 1.6748120300751879,
"grad_norm": 3.246208667755127,
"learning_rate": 7.303491999150887e-05,
"loss": 0.1322,
"step": 891
},
{
"epoch": 1.6766917293233083,
"grad_norm": 3.030524969100952,
"learning_rate": 7.294042660592947e-05,
"loss": 0.1794,
"step": 892
},
{
"epoch": 1.6785714285714286,
"grad_norm": 4.523505210876465,
"learning_rate": 7.284586130370156e-05,
"loss": 0.2517,
"step": 893
},
{
"epoch": 1.6804511278195489,
"grad_norm": 3.097930431365967,
"learning_rate": 7.275122473621806e-05,
"loss": 0.1961,
"step": 894
},
{
"epoch": 1.6823308270676691,
"grad_norm": 3.7234556674957275,
"learning_rate": 7.26565175553627e-05,
"loss": 0.2131,
"step": 895
},
{
"epoch": 1.6842105263157894,
"grad_norm": 2.1135101318359375,
"learning_rate": 7.256174041350568e-05,
"loss": 0.127,
"step": 896
},
{
"epoch": 1.6860902255639099,
"grad_norm": 1.9874850511550903,
"learning_rate": 7.24668939634991e-05,
"loss": 0.096,
"step": 897
},
{
"epoch": 1.6879699248120301,
"grad_norm": 2.357801675796509,
"learning_rate": 7.237197885867248e-05,
"loss": 0.1038,
"step": 898
},
{
"epoch": 1.6898496240601504,
"grad_norm": 3.280027389526367,
"learning_rate": 7.227699575282823e-05,
"loss": 0.1448,
"step": 899
},
{
"epoch": 1.6917293233082706,
"grad_norm": 2.795931577682495,
"learning_rate": 7.218194530023718e-05,
"loss": 0.2087,
"step": 900
},
{
"epoch": 1.6917293233082706,
"eval_global_dataset_loss": 1.026059627532959,
"eval_global_dataset_runtime": 75.3127,
"eval_global_dataset_samples_per_second": 12.906,
"eval_global_dataset_steps_per_second": 0.053,
"eval_sequential_score": 0.9175974508149826,
"eval_sts-test-1024_pearson_cosine": 0.9021183505960124,
"eval_sts-test-1024_spearman_cosine": 0.9182267878039297,
"eval_sts-test-1280_pearson_cosine": 0.9021185166658658,
"eval_sts-test-1280_spearman_cosine": 0.9182425402479394,
"eval_sts-test-512_pearson_cosine": 0.9007274650132412,
"eval_sts-test-512_spearman_cosine": 0.9175974508149826,
"eval_sts-test-760_pearson_cosine": 0.9007953604283914,
"eval_sts-test-760_spearman_cosine": 0.9180001226649821,
"eval_sts-test_pearson_cosine": 0.9021177440718349,
"eval_sts-test_spearman_cosine": 0.9182428982580305,
"step": 900
},
{
"epoch": 1.693609022556391,
"grad_norm": 3.9899439811706543,
"learning_rate": 7.208682815563409e-05,
"loss": 0.1872,
"step": 901
},
{
"epoch": 1.6954887218045114,
"grad_norm": 4.918177127838135,
"learning_rate": 7.199164497421308e-05,
"loss": 0.3849,
"step": 902
},
{
"epoch": 1.6973684210526314,
"grad_norm": 2.930837869644165,
"learning_rate": 7.189639641162316e-05,
"loss": 0.1832,
"step": 903
},
{
"epoch": 1.699248120300752,
"grad_norm": 3.5482215881347656,
"learning_rate": 7.180108312396373e-05,
"loss": 0.2174,
"step": 904
},
{
"epoch": 1.7011278195488722,
"grad_norm": 2.3238465785980225,
"learning_rate": 7.170570576777997e-05,
"loss": 0.1082,
"step": 905
},
{
"epoch": 1.7030075187969924,
"grad_norm": 2.338536024093628,
"learning_rate": 7.161026500005848e-05,
"loss": 0.1198,
"step": 906
},
{
"epoch": 1.704887218045113,
"grad_norm": 3.4399287700653076,
"learning_rate": 7.151476147822254e-05,
"loss": 0.1817,
"step": 907
},
{
"epoch": 1.706766917293233,
"grad_norm": 1.87295663356781,
"learning_rate": 7.141919586012781e-05,
"loss": 0.0582,
"step": 908
},
{
"epoch": 1.7086466165413534,
"grad_norm": 2.7307817935943604,
"learning_rate": 7.13235688040576e-05,
"loss": 0.1096,
"step": 909
},
{
"epoch": 1.7105263157894737,
"grad_norm": 2.521165132522583,
"learning_rate": 7.12278809687185e-05,
"loss": 0.0846,
"step": 910
},
{
"epoch": 1.712406015037594,
"grad_norm": 2.777625799179077,
"learning_rate": 7.113213301323568e-05,
"loss": 0.1956,
"step": 911
},
{
"epoch": 1.7142857142857144,
"grad_norm": 2.8448679447174072,
"learning_rate": 7.103632559714852e-05,
"loss": 0.1669,
"step": 912
},
{
"epoch": 1.7161654135338344,
"grad_norm": 3.025099754333496,
"learning_rate": 7.09404593804059e-05,
"loss": 0.1405,
"step": 913
},
{
"epoch": 1.718045112781955,
"grad_norm": 3.907864809036255,
"learning_rate": 7.08445350233618e-05,
"loss": 0.2375,
"step": 914
},
{
"epoch": 1.7199248120300752,
"grad_norm": 1.3771640062332153,
"learning_rate": 7.074855318677065e-05,
"loss": 0.0416,
"step": 915
},
{
"epoch": 1.7218045112781954,
"grad_norm": 2.4469826221466064,
"learning_rate": 7.065251453178285e-05,
"loss": 0.0965,
"step": 916
},
{
"epoch": 1.723684210526316,
"grad_norm": 1.9343886375427246,
"learning_rate": 7.055641971994015e-05,
"loss": 0.1192,
"step": 917
},
{
"epoch": 1.725563909774436,
"grad_norm": 2.995832920074463,
"learning_rate": 7.046026941317113e-05,
"loss": 0.1436,
"step": 918
},
{
"epoch": 1.7274436090225564,
"grad_norm": 3.5150604248046875,
"learning_rate": 7.036406427378663e-05,
"loss": 0.2399,
"step": 919
},
{
"epoch": 1.7293233082706767,
"grad_norm": 2.318694829940796,
"learning_rate": 7.026780496447524e-05,
"loss": 0.1052,
"step": 920
},
{
"epoch": 1.731203007518797,
"grad_norm": 3.5060043334960938,
"learning_rate": 7.01714921482986e-05,
"loss": 0.2563,
"step": 921
},
{
"epoch": 1.7330827067669174,
"grad_norm": 3.699139356613159,
"learning_rate": 7.007512648868702e-05,
"loss": 0.2326,
"step": 922
},
{
"epoch": 1.7349624060150375,
"grad_norm": 2.5102176666259766,
"learning_rate": 6.997870864943472e-05,
"loss": 0.0925,
"step": 923
},
{
"epoch": 1.736842105263158,
"grad_norm": 3.0189504623413086,
"learning_rate": 6.98822392946954e-05,
"loss": 0.1497,
"step": 924
},
{
"epoch": 1.7387218045112782,
"grad_norm": 2.051042318344116,
"learning_rate": 6.978571908897764e-05,
"loss": 0.1182,
"step": 925
},
{
"epoch": 1.7406015037593985,
"grad_norm": 2.7945802211761475,
"learning_rate": 6.968914869714019e-05,
"loss": 0.1262,
"step": 926
},
{
"epoch": 1.7424812030075187,
"grad_norm": 2.681675434112549,
"learning_rate": 6.959252878438764e-05,
"loss": 0.1003,
"step": 927
},
{
"epoch": 1.744360902255639,
"grad_norm": 2.2682015895843506,
"learning_rate": 6.949586001626557e-05,
"loss": 0.1105,
"step": 928
},
{
"epoch": 1.7462406015037595,
"grad_norm": 2.556654930114746,
"learning_rate": 6.939914305865615e-05,
"loss": 0.1579,
"step": 929
},
{
"epoch": 1.7481203007518797,
"grad_norm": 1.8412240743637085,
"learning_rate": 6.930237857777351e-05,
"loss": 0.087,
"step": 930
},
{
"epoch": 1.75,
"grad_norm": 4.481410503387451,
"learning_rate": 6.920556724015907e-05,
"loss": 0.3344,
"step": 931
},
{
"epoch": 1.7518796992481203,
"grad_norm": 2.281207323074341,
"learning_rate": 6.910870971267707e-05,
"loss": 0.1117,
"step": 932
},
{
"epoch": 1.7537593984962405,
"grad_norm": 1.5092271566390991,
"learning_rate": 6.901180666250991e-05,
"loss": 0.0418,
"step": 933
},
{
"epoch": 1.755639097744361,
"grad_norm": 2.3904976844787598,
"learning_rate": 6.891485875715351e-05,
"loss": 0.1536,
"step": 934
},
{
"epoch": 1.7575187969924813,
"grad_norm": 2.7644925117492676,
"learning_rate": 6.881786666441283e-05,
"loss": 0.1441,
"step": 935
},
{
"epoch": 1.7593984962406015,
"grad_norm": 3.0127475261688232,
"learning_rate": 6.872083105239719e-05,
"loss": 0.2039,
"step": 936
},
{
"epoch": 1.7612781954887218,
"grad_norm": 3.1405107975006104,
"learning_rate": 6.86237525895157e-05,
"loss": 0.19,
"step": 937
},
{
"epoch": 1.763157894736842,
"grad_norm": 2.284958839416504,
"learning_rate": 6.852663194447255e-05,
"loss": 0.1295,
"step": 938
},
{
"epoch": 1.7650375939849625,
"grad_norm": 2.2690012454986572,
"learning_rate": 6.842946978626262e-05,
"loss": 0.0694,
"step": 939
},
{
"epoch": 1.7669172932330826,
"grad_norm": 2.514007568359375,
"learning_rate": 6.833226678416663e-05,
"loss": 0.133,
"step": 940
},
{
"epoch": 1.768796992481203,
"grad_norm": 2.3104093074798584,
"learning_rate": 6.823502360774673e-05,
"loss": 0.0779,
"step": 941
},
{
"epoch": 1.7706766917293233,
"grad_norm": 1.9387654066085815,
"learning_rate": 6.813774092684176e-05,
"loss": 0.0648,
"step": 942
},
{
"epoch": 1.7725563909774436,
"grad_norm": 2.588404893875122,
"learning_rate": 6.80404194115627e-05,
"loss": 0.1141,
"step": 943
},
{
"epoch": 1.774436090225564,
"grad_norm": 2.852778673171997,
"learning_rate": 6.794305973228799e-05,
"loss": 0.1103,
"step": 944
},
{
"epoch": 1.776315789473684,
"grad_norm": 3.120905876159668,
"learning_rate": 6.7845662559659e-05,
"loss": 0.1673,
"step": 945
},
{
"epoch": 1.7781954887218046,
"grad_norm": 2.5651848316192627,
"learning_rate": 6.774822856457536e-05,
"loss": 0.1971,
"step": 946
},
{
"epoch": 1.7800751879699248,
"grad_norm": 2.2139346599578857,
"learning_rate": 6.76507584181903e-05,
"loss": 0.073,
"step": 947
},
{
"epoch": 1.781954887218045,
"grad_norm": 3.4515695571899414,
"learning_rate": 6.755325279190616e-05,
"loss": 0.1718,
"step": 948
},
{
"epoch": 1.7838345864661656,
"grad_norm": 3.680602550506592,
"learning_rate": 6.745571235736955e-05,
"loss": 0.2008,
"step": 949
},
{
"epoch": 1.7857142857142856,
"grad_norm": 2.1303181648254395,
"learning_rate": 6.735813778646698e-05,
"loss": 0.0967,
"step": 950
},
{
"epoch": 1.7857142857142856,
"eval_global_dataset_loss": 1.0005853176116943,
"eval_global_dataset_runtime": 75.2623,
"eval_global_dataset_samples_per_second": 12.915,
"eval_global_dataset_steps_per_second": 0.053,
"eval_sequential_score": 0.9174314683864816,
"eval_sts-test-1024_pearson_cosine": 0.9016950025477838,
"eval_sts-test-1024_spearman_cosine": 0.9180962931257123,
"eval_sts-test-1280_pearson_cosine": 0.9016927110616119,
"eval_sts-test-1280_spearman_cosine": 0.9181243969178661,
"eval_sts-test-512_pearson_cosine": 0.8999063058043584,
"eval_sts-test-512_spearman_cosine": 0.9174314683864816,
"eval_sts-test-760_pearson_cosine": 0.9003691295599635,
"eval_sts-test-760_spearman_cosine": 0.917645066157103,
"eval_sts-test_pearson_cosine": 0.9016887954419438,
"eval_sts-test_spearman_cosine": 0.9180766920732231,
"step": 950
},
{
"epoch": 1.787593984962406,
"grad_norm": 3.379387617111206,
"learning_rate": 6.726052975131999e-05,
"loss": 0.1966,
"step": 951
},
{
"epoch": 1.7894736842105263,
"grad_norm": 3.0120327472686768,
"learning_rate": 6.71628889242807e-05,
"loss": 0.122,
"step": 952
},
{
"epoch": 1.7913533834586466,
"grad_norm": 2.767864465713501,
"learning_rate": 6.70652159779271e-05,
"loss": 0.1842,
"step": 953
},
{
"epoch": 1.793233082706767,
"grad_norm": 2.6627755165100098,
"learning_rate": 6.696751158505838e-05,
"loss": 0.1725,
"step": 954
},
{
"epoch": 1.795112781954887,
"grad_norm": 2.2890477180480957,
"learning_rate": 6.686977641869045e-05,
"loss": 0.0995,
"step": 955
},
{
"epoch": 1.7969924812030076,
"grad_norm": 3.1283280849456787,
"learning_rate": 6.677201115205105e-05,
"loss": 0.1372,
"step": 956
},
{
"epoch": 1.7988721804511278,
"grad_norm": 3.510864496231079,
"learning_rate": 6.66742164585754e-05,
"loss": 0.2178,
"step": 957
},
{
"epoch": 1.800751879699248,
"grad_norm": 1.897053599357605,
"learning_rate": 6.65763930119013e-05,
"loss": 0.0658,
"step": 958
},
{
"epoch": 1.8026315789473686,
"grad_norm": 3.1732497215270996,
"learning_rate": 6.647854148586467e-05,
"loss": 0.1593,
"step": 959
},
{
"epoch": 1.8045112781954886,
"grad_norm": 3.1102993488311768,
"learning_rate": 6.638066255449491e-05,
"loss": 0.2025,
"step": 960
},
{
"epoch": 1.806390977443609,
"grad_norm": 3.147205114364624,
"learning_rate": 6.628275689201007e-05,
"loss": 0.2018,
"step": 961
},
{
"epoch": 1.8082706766917294,
"grad_norm": 2.6337480545043945,
"learning_rate": 6.61848251728124e-05,
"loss": 0.1033,
"step": 962
},
{
"epoch": 1.8101503759398496,
"grad_norm": 3.233461380004883,
"learning_rate": 6.608686807148365e-05,
"loss": 0.1816,
"step": 963
},
{
"epoch": 1.8120300751879699,
"grad_norm": 3.0871407985687256,
"learning_rate": 6.598888626278039e-05,
"loss": 0.1354,
"step": 964
},
{
"epoch": 1.8139097744360901,
"grad_norm": 3.1812994480133057,
"learning_rate": 6.589088042162934e-05,
"loss": 0.1228,
"step": 965
},
{
"epoch": 1.8157894736842106,
"grad_norm": 3.910372257232666,
"learning_rate": 6.579285122312284e-05,
"loss": 0.2353,
"step": 966
},
{
"epoch": 1.8176691729323309,
"grad_norm": 3.3136324882507324,
"learning_rate": 6.569479934251408e-05,
"loss": 0.191,
"step": 967
},
{
"epoch": 1.8195488721804511,
"grad_norm": 2.245729446411133,
"learning_rate": 6.559672545521248e-05,
"loss": 0.142,
"step": 968
},
{
"epoch": 1.8214285714285714,
"grad_norm": 1.959720253944397,
"learning_rate": 6.549863023677905e-05,
"loss": 0.0692,
"step": 969
},
{
"epoch": 1.8233082706766917,
"grad_norm": 3.1250393390655518,
"learning_rate": 6.540051436292178e-05,
"loss": 0.1626,
"step": 970
},
{
"epoch": 1.8251879699248121,
"grad_norm": 4.049576282501221,
"learning_rate": 6.530237850949091e-05,
"loss": 0.3098,
"step": 971
},
{
"epoch": 1.8270676691729322,
"grad_norm": 3.1379828453063965,
"learning_rate": 6.520422335247429e-05,
"loss": 0.165,
"step": 972
},
{
"epoch": 1.8289473684210527,
"grad_norm": 2.626603841781616,
"learning_rate": 6.510604956799276e-05,
"loss": 0.1167,
"step": 973
},
{
"epoch": 1.830827067669173,
"grad_norm": 2.630916118621826,
"learning_rate": 6.500785783229543e-05,
"loss": 0.1235,
"step": 974
},
{
"epoch": 1.8327067669172932,
"grad_norm": 2.5859103202819824,
"learning_rate": 6.490964882175514e-05,
"loss": 0.1263,
"step": 975
},
{
"epoch": 1.8345864661654137,
"grad_norm": 2.5505852699279785,
"learning_rate": 6.481142321286367e-05,
"loss": 0.0938,
"step": 976
},
{
"epoch": 1.8364661654135337,
"grad_norm": 2.5898983478546143,
"learning_rate": 6.471318168222711e-05,
"loss": 0.1854,
"step": 977
},
{
"epoch": 1.8383458646616542,
"grad_norm": 3.1198155879974365,
"learning_rate": 6.461492490656131e-05,
"loss": 0.188,
"step": 978
},
{
"epoch": 1.8402255639097744,
"grad_norm": 2.085031509399414,
"learning_rate": 6.451665356268706e-05,
"loss": 0.0944,
"step": 979
},
{
"epoch": 1.8421052631578947,
"grad_norm": 3.46240234375,
"learning_rate": 6.441836832752551e-05,
"loss": 0.215,
"step": 980
},
{
"epoch": 1.8439849624060152,
"grad_norm": 3.1626245975494385,
"learning_rate": 6.432006987809351e-05,
"loss": 0.1419,
"step": 981
},
{
"epoch": 1.8458646616541352,
"grad_norm": 1.4989360570907593,
"learning_rate": 6.422175889149895e-05,
"loss": 0.0393,
"step": 982
},
{
"epoch": 1.8477443609022557,
"grad_norm": 1.7400661706924438,
"learning_rate": 6.412343604493605e-05,
"loss": 0.1053,
"step": 983
},
{
"epoch": 1.849624060150376,
"grad_norm": 3.4047110080718994,
"learning_rate": 6.402510201568073e-05,
"loss": 0.2591,
"step": 984
},
{
"epoch": 1.8515037593984962,
"grad_norm": 3.1428349018096924,
"learning_rate": 6.392675748108595e-05,
"loss": 0.2317,
"step": 985
},
{
"epoch": 1.8533834586466167,
"grad_norm": 2.285388946533203,
"learning_rate": 6.382840311857703e-05,
"loss": 0.0773,
"step": 986
},
{
"epoch": 1.8552631578947367,
"grad_norm": 1.1402099132537842,
"learning_rate": 6.373003960564698e-05,
"loss": 0.0319,
"step": 987
},
{
"epoch": 1.8571428571428572,
"grad_norm": 2.901012420654297,
"learning_rate": 6.363166761985186e-05,
"loss": 0.1784,
"step": 988
},
{
"epoch": 1.8590225563909775,
"grad_norm": 3.1784751415252686,
"learning_rate": 6.353328783880608e-05,
"loss": 0.169,
"step": 989
},
{
"epoch": 1.8609022556390977,
"grad_norm": 1.4703645706176758,
"learning_rate": 6.343490094017774e-05,
"loss": 0.0297,
"step": 990
},
{
"epoch": 1.8627819548872182,
"grad_norm": 1.4826966524124146,
"learning_rate": 6.333650760168398e-05,
"loss": 0.0623,
"step": 991
},
{
"epoch": 1.8646616541353382,
"grad_norm": 2.5351502895355225,
"learning_rate": 6.323810850108628e-05,
"loss": 0.1196,
"step": 992
},
{
"epoch": 1.8665413533834587,
"grad_norm": 2.9735541343688965,
"learning_rate": 6.313970431618585e-05,
"loss": 0.1775,
"step": 993
},
{
"epoch": 1.868421052631579,
"grad_norm": 2.139782428741455,
"learning_rate": 6.304129572481887e-05,
"loss": 0.0573,
"step": 994
},
{
"epoch": 1.8703007518796992,
"grad_norm": 2.248852491378784,
"learning_rate": 6.294288340485192e-05,
"loss": 0.1144,
"step": 995
},
{
"epoch": 1.8721804511278195,
"grad_norm": 2.7781121730804443,
"learning_rate": 6.284446803417725e-05,
"loss": 0.1461,
"step": 996
},
{
"epoch": 1.8740601503759398,
"grad_norm": 1.9069122076034546,
"learning_rate": 6.27460502907081e-05,
"loss": 0.0928,
"step": 997
},
{
"epoch": 1.8759398496240602,
"grad_norm": 2.3354716300964355,
"learning_rate": 6.264763085237408e-05,
"loss": 0.0833,
"step": 998
},
{
"epoch": 1.8778195488721805,
"grad_norm": 3.2997379302978516,
"learning_rate": 6.254921039711648e-05,
"loss": 0.2346,
"step": 999
},
{
"epoch": 1.8796992481203008,
"grad_norm": 1.7376987934112549,
"learning_rate": 6.245078960288354e-05,
"loss": 0.0471,
"step": 1000
},
{
"epoch": 1.8796992481203008,
"eval_global_dataset_loss": 1.0007705688476562,
"eval_global_dataset_runtime": 73.6339,
"eval_global_dataset_samples_per_second": 13.2,
"eval_global_dataset_steps_per_second": 0.054,
"eval_sequential_score": 0.9177210985502069,
"eval_sts-test-1024_pearson_cosine": 0.9025234688655628,
"eval_sts-test-1024_spearman_cosine": 0.9176032684789633,
"eval_sts-test-1280_pearson_cosine": 0.9025230342933502,
"eval_sts-test-1280_spearman_cosine": 0.9175965110384932,
"eval_sts-test-512_pearson_cosine": 0.9015480520372816,
"eval_sts-test-512_spearman_cosine": 0.9177210985502069,
"eval_sts-test-760_pearson_cosine": 0.9013899032141457,
"eval_sts-test-760_spearman_cosine": 0.9176601025809303,
"eval_sts-test_pearson_cosine": 0.9025207350942973,
"eval_sts-test_spearman_cosine": 0.917561381298301,
"step": 1000
},
{
"epoch": 1.881578947368421,
"grad_norm": 3.5159120559692383,
"learning_rate": 6.235236914762595e-05,
"loss": 0.2246,
"step": 1001
},
{
"epoch": 1.8834586466165413,
"grad_norm": 3.255794048309326,
"learning_rate": 6.225394970929191e-05,
"loss": 0.2424,
"step": 1002
},
{
"epoch": 1.8853383458646618,
"grad_norm": 2.7804276943206787,
"learning_rate": 6.215553196582277e-05,
"loss": 0.1008,
"step": 1003
},
{
"epoch": 1.8872180451127818,
"grad_norm": 1.8781172037124634,
"learning_rate": 6.205711659514808e-05,
"loss": 0.0493,
"step": 1004
},
{
"epoch": 1.8890977443609023,
"grad_norm": 2.7714462280273438,
"learning_rate": 6.195870427518115e-05,
"loss": 0.1101,
"step": 1005
},
{
"epoch": 1.8909774436090225,
"grad_norm": 3.425043821334839,
"learning_rate": 6.186029568381416e-05,
"loss": 0.2017,
"step": 1006
},
{
"epoch": 1.8928571428571428,
"grad_norm": 4.350175380706787,
"learning_rate": 6.176189149891373e-05,
"loss": 0.3919,
"step": 1007
},
{
"epoch": 1.8947368421052633,
"grad_norm": 2.360135316848755,
"learning_rate": 6.166349239831603e-05,
"loss": 0.0908,
"step": 1008
},
{
"epoch": 1.8966165413533833,
"grad_norm": 1.7306382656097412,
"learning_rate": 6.156509905982227e-05,
"loss": 0.0622,
"step": 1009
},
{
"epoch": 1.8984962406015038,
"grad_norm": 1.7376872301101685,
"learning_rate": 6.146671216119393e-05,
"loss": 0.0655,
"step": 1010
},
{
"epoch": 1.900375939849624,
"grad_norm": 3.714693307876587,
"learning_rate": 6.136833238014814e-05,
"loss": 0.252,
"step": 1011
},
{
"epoch": 1.9022556390977443,
"grad_norm": 3.6632583141326904,
"learning_rate": 6.126996039435302e-05,
"loss": 0.3261,
"step": 1012
},
{
"epoch": 1.9041353383458648,
"grad_norm": 2.3958234786987305,
"learning_rate": 6.117159688142299e-05,
"loss": 0.1221,
"step": 1013
},
{
"epoch": 1.9060150375939848,
"grad_norm": 2.7297089099884033,
"learning_rate": 6.107324251891407e-05,
"loss": 0.1101,
"step": 1014
},
{
"epoch": 1.9078947368421053,
"grad_norm": 2.423370122909546,
"learning_rate": 6.097489798431929e-05,
"loss": 0.0951,
"step": 1015
},
{
"epoch": 1.9097744360902256,
"grad_norm": 2.812894821166992,
"learning_rate": 6.087656395506396e-05,
"loss": 0.1421,
"step": 1016
},
{
"epoch": 1.9116541353383458,
"grad_norm": 2.0672447681427,
"learning_rate": 6.077824110850106e-05,
"loss": 0.0644,
"step": 1017
},
{
"epoch": 1.9135338345864663,
"grad_norm": 3.210081100463867,
"learning_rate": 6.067993012190649e-05,
"loss": 0.1413,
"step": 1018
},
{
"epoch": 1.9154135338345863,
"grad_norm": 2.6235413551330566,
"learning_rate": 6.0581631672474515e-05,
"loss": 0.1214,
"step": 1019
},
{
"epoch": 1.9172932330827068,
"grad_norm": 3.1395761966705322,
"learning_rate": 6.048334643731295e-05,
"loss": 0.208,
"step": 1020
},
{
"epoch": 1.919172932330827,
"grad_norm": 3.3170242309570312,
"learning_rate": 6.0385075093438716e-05,
"loss": 0.1795,
"step": 1021
},
{
"epoch": 1.9210526315789473,
"grad_norm": 3.103555202484131,
"learning_rate": 6.02868183177729e-05,
"loss": 0.1614,
"step": 1022
},
{
"epoch": 1.9229323308270678,
"grad_norm": 3.3904659748077393,
"learning_rate": 6.0188576787136364e-05,
"loss": 0.1513,
"step": 1023
},
{
"epoch": 1.9248120300751879,
"grad_norm": 1.0235174894332886,
"learning_rate": 6.009035117824487e-05,
"loss": 0.0291,
"step": 1024
},
{
"epoch": 1.9266917293233083,
"grad_norm": 2.410435199737549,
"learning_rate": 5.999214216770459e-05,
"loss": 0.066,
"step": 1025
},
{
"epoch": 1.9285714285714286,
"grad_norm": 1.549646019935608,
"learning_rate": 5.9893950432007263e-05,
"loss": 0.0552,
"step": 1026
},
{
"epoch": 1.9304511278195489,
"grad_norm": 3.0009422302246094,
"learning_rate": 5.979577664752572e-05,
"loss": 0.1837,
"step": 1027
},
{
"epoch": 1.9323308270676691,
"grad_norm": 2.2576701641082764,
"learning_rate": 5.9697621490509095e-05,
"loss": 0.1053,
"step": 1028
},
{
"epoch": 1.9342105263157894,
"grad_norm": 2.387059211730957,
"learning_rate": 5.959948563707822e-05,
"loss": 0.0974,
"step": 1029
},
{
"epoch": 1.9360902255639099,
"grad_norm": 2.6841256618499756,
"learning_rate": 5.950136976322095e-05,
"loss": 0.1521,
"step": 1030
},
{
"epoch": 1.9379699248120301,
"grad_norm": 1.8788162469863892,
"learning_rate": 5.9403274544787546e-05,
"loss": 0.0598,
"step": 1031
},
{
"epoch": 1.9398496240601504,
"grad_norm": 3.121676206588745,
"learning_rate": 5.9305200657485936e-05,
"loss": 0.1883,
"step": 1032
},
{
"epoch": 1.9417293233082706,
"grad_norm": 1.5553926229476929,
"learning_rate": 5.920714877687719e-05,
"loss": 0.0306,
"step": 1033
},
{
"epoch": 1.943609022556391,
"grad_norm": 2.58564829826355,
"learning_rate": 5.910911957837066e-05,
"loss": 0.1019,
"step": 1034
},
{
"epoch": 1.9454887218045114,
"grad_norm": 3.2539162635803223,
"learning_rate": 5.9011113737219636e-05,
"loss": 0.1244,
"step": 1035
},
{
"epoch": 1.9473684210526314,
"grad_norm": 2.184638261795044,
"learning_rate": 5.891313192851635e-05,
"loss": 0.1358,
"step": 1036
},
{
"epoch": 1.949248120300752,
"grad_norm": 3.0994555950164795,
"learning_rate": 5.881517482718761e-05,
"loss": 0.1477,
"step": 1037
},
{
"epoch": 1.9511278195488722,
"grad_norm": 2.4086666107177734,
"learning_rate": 5.871724310798995e-05,
"loss": 0.0892,
"step": 1038
},
{
"epoch": 1.9530075187969924,
"grad_norm": 3.608358860015869,
"learning_rate": 5.861933744550512e-05,
"loss": 0.1625,
"step": 1039
},
{
"epoch": 1.954887218045113,
"grad_norm": 2.080369234085083,
"learning_rate": 5.852145851413533e-05,
"loss": 0.0782,
"step": 1040
},
{
"epoch": 1.956766917293233,
"grad_norm": 2.9665627479553223,
"learning_rate": 5.842360698809872e-05,
"loss": 0.1908,
"step": 1041
},
{
"epoch": 1.9586466165413534,
"grad_norm": 3.191819190979004,
"learning_rate": 5.832578354142463e-05,
"loss": 0.11,
"step": 1042
},
{
"epoch": 1.9605263157894737,
"grad_norm": 3.310695171356201,
"learning_rate": 5.8227988847948955e-05,
"loss": 0.1647,
"step": 1043
},
{
"epoch": 1.962406015037594,
"grad_norm": 2.5343101024627686,
"learning_rate": 5.8130223581309575e-05,
"loss": 0.0864,
"step": 1044
},
{
"epoch": 1.9642857142857144,
"grad_norm": 2.4089009761810303,
"learning_rate": 5.8032488414941625e-05,
"loss": 0.0953,
"step": 1045
},
{
"epoch": 1.9661654135338344,
"grad_norm": 2.40613055229187,
"learning_rate": 5.793478402207291e-05,
"loss": 0.1,
"step": 1046
},
{
"epoch": 1.968045112781955,
"grad_norm": 3.5450172424316406,
"learning_rate": 5.783711107571932e-05,
"loss": 0.1592,
"step": 1047
},
{
"epoch": 1.9699248120300752,
"grad_norm": 2.3371622562408447,
"learning_rate": 5.773947024868002e-05,
"loss": 0.0991,
"step": 1048
},
{
"epoch": 1.9718045112781954,
"grad_norm": 3.386986017227173,
"learning_rate": 5.7641862213533046e-05,
"loss": 0.1837,
"step": 1049
},
{
"epoch": 1.973684210526316,
"grad_norm": 2.946260690689087,
"learning_rate": 5.7544287642630455e-05,
"loss": 0.1211,
"step": 1050
},
{
"epoch": 1.973684210526316,
"eval_global_dataset_loss": 1.0035452842712402,
"eval_global_dataset_runtime": 74.0905,
"eval_global_dataset_samples_per_second": 13.119,
"eval_global_dataset_steps_per_second": 0.054,
"eval_sequential_score": 0.92074932690604,
"eval_sts-test-1024_pearson_cosine": 0.9057887446299446,
"eval_sts-test-1024_spearman_cosine": 0.9199283202645535,
"eval_sts-test-1280_pearson_cosine": 0.9057869799328011,
"eval_sts-test-1280_spearman_cosine": 0.9199397318362084,
"eval_sts-test-512_pearson_cosine": 0.9050175538356355,
"eval_sts-test-512_spearman_cosine": 0.92074932690604,
"eval_sts-test-760_pearson_cosine": 0.9048899865653727,
"eval_sts-test-760_spearman_cosine": 0.9203155287488471,
"eval_sts-test_pearson_cosine": 0.9057823528825933,
"eval_sts-test_spearman_cosine": 0.919914671129829,
"step": 1050
},
{
"epoch": 1.975563909774436,
"grad_norm": 1.902694582939148,
"learning_rate": 5.7446747208093866e-05,
"loss": 0.0752,
"step": 1051
},
{
"epoch": 1.9774436090225564,
"grad_norm": 3.0880801677703857,
"learning_rate": 5.73492415818097e-05,
"loss": 0.1237,
"step": 1052
},
{
"epoch": 1.9793233082706767,
"grad_norm": 1.482879400253296,
"learning_rate": 5.725177143542466e-05,
"loss": 0.0486,
"step": 1053
},
{
"epoch": 1.981203007518797,
"grad_norm": 3.4937937259674072,
"learning_rate": 5.715433744034101e-05,
"loss": 0.1704,
"step": 1054
}
],
"logging_steps": 1,
"max_steps": 1596,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 527,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 192,
"trial_name": null,
"trial_params": null
}