XLMRoBERTaM3-CustomPoolin-v1.04-1024conc512-MLP-s1-checkpoints-tmp
/
checkpoint-1054
/trainer_state.json
{ | |
"best_global_step": null, | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 1.981203007518797, | |
"eval_steps": 50, | |
"global_step": 1054, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.0018796992481203006, | |
"grad_norm": 15.693917274475098, | |
"learning_rate": 0.0, | |
"loss": 1.7608, | |
"step": 1 | |
}, | |
{ | |
"epoch": 0.0037593984962406013, | |
"grad_norm": Infinity, | |
"learning_rate": 2.506265664160401e-07, | |
"loss": 1.7877, | |
"step": 2 | |
}, | |
{ | |
"epoch": 0.005639097744360902, | |
"grad_norm": 17.349987030029297, | |
"learning_rate": 2.506265664160401e-07, | |
"loss": 2.2507, | |
"step": 3 | |
}, | |
{ | |
"epoch": 0.007518796992481203, | |
"grad_norm": 13.13552474975586, | |
"learning_rate": 5.012531328320802e-07, | |
"loss": 1.5007, | |
"step": 4 | |
}, | |
{ | |
"epoch": 0.009398496240601503, | |
"grad_norm": 16.77507972717285, | |
"learning_rate": 7.518796992481203e-07, | |
"loss": 1.9385, | |
"step": 5 | |
}, | |
{ | |
"epoch": 0.011278195488721804, | |
"grad_norm": 13.212800979614258, | |
"learning_rate": 1.0025062656641603e-06, | |
"loss": 2.2608, | |
"step": 6 | |
}, | |
{ | |
"epoch": 0.013157894736842105, | |
"grad_norm": 12.166118621826172, | |
"learning_rate": 1.2531328320802005e-06, | |
"loss": 1.792, | |
"step": 7 | |
}, | |
{ | |
"epoch": 0.015037593984962405, | |
"grad_norm": 15.633316993713379, | |
"learning_rate": 1.5037593984962406e-06, | |
"loss": 1.9776, | |
"step": 8 | |
}, | |
{ | |
"epoch": 0.016917293233082706, | |
"grad_norm": 11.28406810760498, | |
"learning_rate": 1.7543859649122807e-06, | |
"loss": 1.139, | |
"step": 9 | |
}, | |
{ | |
"epoch": 0.018796992481203006, | |
"grad_norm": 14.046056747436523, | |
"learning_rate": 2.0050125313283207e-06, | |
"loss": 1.5296, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.020676691729323307, | |
"grad_norm": 10.505359649658203, | |
"learning_rate": 2.255639097744361e-06, | |
"loss": 1.1633, | |
"step": 11 | |
}, | |
{ | |
"epoch": 0.022556390977443608, | |
"grad_norm": 14.75375747680664, | |
"learning_rate": 2.506265664160401e-06, | |
"loss": 2.0384, | |
"step": 12 | |
}, | |
{ | |
"epoch": 0.02443609022556391, | |
"grad_norm": 10.45843505859375, | |
"learning_rate": 2.7568922305764413e-06, | |
"loss": 1.395, | |
"step": 13 | |
}, | |
{ | |
"epoch": 0.02631578947368421, | |
"grad_norm": 13.602055549621582, | |
"learning_rate": 3.007518796992481e-06, | |
"loss": 1.7397, | |
"step": 14 | |
}, | |
{ | |
"epoch": 0.02819548872180451, | |
"grad_norm": 12.818756103515625, | |
"learning_rate": 3.258145363408521e-06, | |
"loss": 1.4049, | |
"step": 15 | |
}, | |
{ | |
"epoch": 0.03007518796992481, | |
"grad_norm": 11.55542278289795, | |
"learning_rate": 3.5087719298245615e-06, | |
"loss": 1.2005, | |
"step": 16 | |
}, | |
{ | |
"epoch": 0.03195488721804511, | |
"grad_norm": 12.352794647216797, | |
"learning_rate": 3.7593984962406014e-06, | |
"loss": 1.441, | |
"step": 17 | |
}, | |
{ | |
"epoch": 0.03383458646616541, | |
"grad_norm": 11.01584243774414, | |
"learning_rate": 4.010025062656641e-06, | |
"loss": 1.3119, | |
"step": 18 | |
}, | |
{ | |
"epoch": 0.03571428571428571, | |
"grad_norm": 8.868569374084473, | |
"learning_rate": 4.260651629072682e-06, | |
"loss": 0.8352, | |
"step": 19 | |
}, | |
{ | |
"epoch": 0.03759398496240601, | |
"grad_norm": 9.641520500183105, | |
"learning_rate": 4.511278195488722e-06, | |
"loss": 1.3154, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.039473684210526314, | |
"grad_norm": 10.837322235107422, | |
"learning_rate": 4.7619047619047615e-06, | |
"loss": 1.0206, | |
"step": 21 | |
}, | |
{ | |
"epoch": 0.041353383458646614, | |
"grad_norm": 9.514777183532715, | |
"learning_rate": 5.012531328320802e-06, | |
"loss": 0.9626, | |
"step": 22 | |
}, | |
{ | |
"epoch": 0.043233082706766915, | |
"grad_norm": 10.232863426208496, | |
"learning_rate": 5.263157894736842e-06, | |
"loss": 1.3082, | |
"step": 23 | |
}, | |
{ | |
"epoch": 0.045112781954887216, | |
"grad_norm": 9.25696849822998, | |
"learning_rate": 5.5137844611528826e-06, | |
"loss": 1.0918, | |
"step": 24 | |
}, | |
{ | |
"epoch": 0.046992481203007516, | |
"grad_norm": 11.14088249206543, | |
"learning_rate": 5.764411027568922e-06, | |
"loss": 1.3777, | |
"step": 25 | |
}, | |
{ | |
"epoch": 0.04887218045112782, | |
"grad_norm": 7.43634033203125, | |
"learning_rate": 6.015037593984962e-06, | |
"loss": 0.907, | |
"step": 26 | |
}, | |
{ | |
"epoch": 0.05075187969924812, | |
"grad_norm": 7.395077705383301, | |
"learning_rate": 6.265664160401003e-06, | |
"loss": 0.9302, | |
"step": 27 | |
}, | |
{ | |
"epoch": 0.05263157894736842, | |
"grad_norm": 9.007256507873535, | |
"learning_rate": 6.516290726817042e-06, | |
"loss": 1.0028, | |
"step": 28 | |
}, | |
{ | |
"epoch": 0.05451127819548872, | |
"grad_norm": 7.390114784240723, | |
"learning_rate": 6.766917293233083e-06, | |
"loss": 0.9131, | |
"step": 29 | |
}, | |
{ | |
"epoch": 0.05639097744360902, | |
"grad_norm": 9.816267967224121, | |
"learning_rate": 7.017543859649123e-06, | |
"loss": 1.3304, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.05827067669172932, | |
"grad_norm": 8.316947937011719, | |
"learning_rate": 7.2681704260651625e-06, | |
"loss": 1.0405, | |
"step": 31 | |
}, | |
{ | |
"epoch": 0.06015037593984962, | |
"grad_norm": 6.560980796813965, | |
"learning_rate": 7.518796992481203e-06, | |
"loss": 0.6233, | |
"step": 32 | |
}, | |
{ | |
"epoch": 0.06203007518796992, | |
"grad_norm": 8.559331893920898, | |
"learning_rate": 7.769423558897243e-06, | |
"loss": 1.4009, | |
"step": 33 | |
}, | |
{ | |
"epoch": 0.06390977443609022, | |
"grad_norm": 8.17328929901123, | |
"learning_rate": 8.020050125313283e-06, | |
"loss": 0.7543, | |
"step": 34 | |
}, | |
{ | |
"epoch": 0.06578947368421052, | |
"grad_norm": 6.206168174743652, | |
"learning_rate": 8.270676691729324e-06, | |
"loss": 0.5975, | |
"step": 35 | |
}, | |
{ | |
"epoch": 0.06766917293233082, | |
"grad_norm": 7.136289119720459, | |
"learning_rate": 8.521303258145363e-06, | |
"loss": 0.803, | |
"step": 36 | |
}, | |
{ | |
"epoch": 0.06954887218045112, | |
"grad_norm": 6.918433666229248, | |
"learning_rate": 8.771929824561403e-06, | |
"loss": 0.7285, | |
"step": 37 | |
}, | |
{ | |
"epoch": 0.07142857142857142, | |
"grad_norm": 6.956127166748047, | |
"learning_rate": 9.022556390977444e-06, | |
"loss": 0.759, | |
"step": 38 | |
}, | |
{ | |
"epoch": 0.07330827067669173, | |
"grad_norm": 8.294937133789062, | |
"learning_rate": 9.273182957393484e-06, | |
"loss": 1.0653, | |
"step": 39 | |
}, | |
{ | |
"epoch": 0.07518796992481203, | |
"grad_norm": 7.191320419311523, | |
"learning_rate": 9.523809523809523e-06, | |
"loss": 0.8235, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.07706766917293233, | |
"grad_norm": 7.36721658706665, | |
"learning_rate": 9.774436090225564e-06, | |
"loss": 0.7822, | |
"step": 41 | |
}, | |
{ | |
"epoch": 0.07894736842105263, | |
"grad_norm": 5.150545597076416, | |
"learning_rate": 1.0025062656641604e-05, | |
"loss": 0.539, | |
"step": 42 | |
}, | |
{ | |
"epoch": 0.08082706766917293, | |
"grad_norm": 7.323425769805908, | |
"learning_rate": 1.0275689223057643e-05, | |
"loss": 0.9211, | |
"step": 43 | |
}, | |
{ | |
"epoch": 0.08270676691729323, | |
"grad_norm": 5.510406017303467, | |
"learning_rate": 1.0526315789473684e-05, | |
"loss": 0.6063, | |
"step": 44 | |
}, | |
{ | |
"epoch": 0.08458646616541353, | |
"grad_norm": 6.937963008880615, | |
"learning_rate": 1.0776942355889724e-05, | |
"loss": 0.8769, | |
"step": 45 | |
}, | |
{ | |
"epoch": 0.08646616541353383, | |
"grad_norm": 6.418097496032715, | |
"learning_rate": 1.1027568922305765e-05, | |
"loss": 0.8044, | |
"step": 46 | |
}, | |
{ | |
"epoch": 0.08834586466165413, | |
"grad_norm": 7.569619178771973, | |
"learning_rate": 1.1278195488721805e-05, | |
"loss": 1.0656, | |
"step": 47 | |
}, | |
{ | |
"epoch": 0.09022556390977443, | |
"grad_norm": 5.450784206390381, | |
"learning_rate": 1.1528822055137844e-05, | |
"loss": 0.6475, | |
"step": 48 | |
}, | |
{ | |
"epoch": 0.09210526315789473, | |
"grad_norm": 5.681114196777344, | |
"learning_rate": 1.1779448621553885e-05, | |
"loss": 0.7331, | |
"step": 49 | |
}, | |
{ | |
"epoch": 0.09398496240601503, | |
"grad_norm": 4.720979690551758, | |
"learning_rate": 1.2030075187969925e-05, | |
"loss": 0.4248, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.09398496240601503, | |
"eval_global_dataset_loss": 1.4761557579040527, | |
"eval_global_dataset_runtime": 74.8102, | |
"eval_global_dataset_samples_per_second": 12.993, | |
"eval_global_dataset_steps_per_second": 0.053, | |
"eval_sequential_score": 0.9117502957528434, | |
"eval_sts-test-1024_pearson_cosine": 0.9001737490289153, | |
"eval_sts-test-1024_spearman_cosine": 0.913358387579682, | |
"eval_sts-test-1280_pearson_cosine": 0.9001751171497632, | |
"eval_sts-test-1280_spearman_cosine": 0.9133455887189241, | |
"eval_sts-test-512_pearson_cosine": 0.89801885928312, | |
"eval_sts-test-512_spearman_cosine": 0.9117502957528434, | |
"eval_sts-test-760_pearson_cosine": 0.8983629702187792, | |
"eval_sts-test-760_spearman_cosine": 0.9130246326722251, | |
"eval_sts-test_pearson_cosine": 0.9001764198713166, | |
"eval_sts-test_spearman_cosine": 0.9133424561306267, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.09586466165413533, | |
"grad_norm": 4.459887504577637, | |
"learning_rate": 1.2280701754385964e-05, | |
"loss": 0.3661, | |
"step": 51 | |
}, | |
{ | |
"epoch": 0.09774436090225563, | |
"grad_norm": 5.0679030418396, | |
"learning_rate": 1.2531328320802006e-05, | |
"loss": 0.645, | |
"step": 52 | |
}, | |
{ | |
"epoch": 0.09962406015037593, | |
"grad_norm": 5.17216682434082, | |
"learning_rate": 1.2781954887218045e-05, | |
"loss": 0.3991, | |
"step": 53 | |
}, | |
{ | |
"epoch": 0.10150375939849623, | |
"grad_norm": 5.646566390991211, | |
"learning_rate": 1.3032581453634085e-05, | |
"loss": 0.8027, | |
"step": 54 | |
}, | |
{ | |
"epoch": 0.10338345864661654, | |
"grad_norm": 4.848506927490234, | |
"learning_rate": 1.3283208020050126e-05, | |
"loss": 0.5594, | |
"step": 55 | |
}, | |
{ | |
"epoch": 0.10526315789473684, | |
"grad_norm": 6.822021484375, | |
"learning_rate": 1.3533834586466165e-05, | |
"loss": 0.973, | |
"step": 56 | |
}, | |
{ | |
"epoch": 0.10714285714285714, | |
"grad_norm": 6.811206817626953, | |
"learning_rate": 1.3784461152882205e-05, | |
"loss": 0.9, | |
"step": 57 | |
}, | |
{ | |
"epoch": 0.10902255639097744, | |
"grad_norm": 4.507321357727051, | |
"learning_rate": 1.4035087719298246e-05, | |
"loss": 0.4526, | |
"step": 58 | |
}, | |
{ | |
"epoch": 0.11090225563909774, | |
"grad_norm": 3.8985111713409424, | |
"learning_rate": 1.4285714285714285e-05, | |
"loss": 0.3216, | |
"step": 59 | |
}, | |
{ | |
"epoch": 0.11278195488721804, | |
"grad_norm": 5.594377040863037, | |
"learning_rate": 1.4536340852130325e-05, | |
"loss": 0.6491, | |
"step": 60 | |
}, | |
{ | |
"epoch": 0.11466165413533834, | |
"grad_norm": 5.777069568634033, | |
"learning_rate": 1.4786967418546366e-05, | |
"loss": 0.6211, | |
"step": 61 | |
}, | |
{ | |
"epoch": 0.11654135338345864, | |
"grad_norm": 4.431408405303955, | |
"learning_rate": 1.5037593984962406e-05, | |
"loss": 0.4682, | |
"step": 62 | |
}, | |
{ | |
"epoch": 0.11842105263157894, | |
"grad_norm": 4.991281986236572, | |
"learning_rate": 1.5288220551378447e-05, | |
"loss": 0.5099, | |
"step": 63 | |
}, | |
{ | |
"epoch": 0.12030075187969924, | |
"grad_norm": 5.1047186851501465, | |
"learning_rate": 1.5538847117794486e-05, | |
"loss": 0.5467, | |
"step": 64 | |
}, | |
{ | |
"epoch": 0.12218045112781954, | |
"grad_norm": 4.4686279296875, | |
"learning_rate": 1.5789473684210526e-05, | |
"loss": 0.4413, | |
"step": 65 | |
}, | |
{ | |
"epoch": 0.12406015037593984, | |
"grad_norm": 3.8792800903320312, | |
"learning_rate": 1.6040100250626565e-05, | |
"loss": 0.3663, | |
"step": 66 | |
}, | |
{ | |
"epoch": 0.12593984962406016, | |
"grad_norm": 6.33101224899292, | |
"learning_rate": 1.6290726817042605e-05, | |
"loss": 0.6832, | |
"step": 67 | |
}, | |
{ | |
"epoch": 0.12781954887218044, | |
"grad_norm": 3.861156940460205, | |
"learning_rate": 1.6541353383458648e-05, | |
"loss": 0.3447, | |
"step": 68 | |
}, | |
{ | |
"epoch": 0.12969924812030076, | |
"grad_norm": 6.406822681427002, | |
"learning_rate": 1.6791979949874687e-05, | |
"loss": 0.8614, | |
"step": 69 | |
}, | |
{ | |
"epoch": 0.13157894736842105, | |
"grad_norm": 4.49379825592041, | |
"learning_rate": 1.7042606516290727e-05, | |
"loss": 0.4724, | |
"step": 70 | |
}, | |
{ | |
"epoch": 0.13345864661654136, | |
"grad_norm": 5.233432292938232, | |
"learning_rate": 1.7293233082706766e-05, | |
"loss": 0.5842, | |
"step": 71 | |
}, | |
{ | |
"epoch": 0.13533834586466165, | |
"grad_norm": 4.645366191864014, | |
"learning_rate": 1.7543859649122806e-05, | |
"loss": 0.4599, | |
"step": 72 | |
}, | |
{ | |
"epoch": 0.13721804511278196, | |
"grad_norm": 5.10455322265625, | |
"learning_rate": 1.779448621553885e-05, | |
"loss": 0.5251, | |
"step": 73 | |
}, | |
{ | |
"epoch": 0.13909774436090225, | |
"grad_norm": 3.6988489627838135, | |
"learning_rate": 1.8045112781954888e-05, | |
"loss": 0.2282, | |
"step": 74 | |
}, | |
{ | |
"epoch": 0.14097744360902256, | |
"grad_norm": 5.149707794189453, | |
"learning_rate": 1.8295739348370928e-05, | |
"loss": 0.5728, | |
"step": 75 | |
}, | |
{ | |
"epoch": 0.14285714285714285, | |
"grad_norm": 4.207123756408691, | |
"learning_rate": 1.8546365914786967e-05, | |
"loss": 0.4518, | |
"step": 76 | |
}, | |
{ | |
"epoch": 0.14473684210526316, | |
"grad_norm": 4.2548980712890625, | |
"learning_rate": 1.8796992481203007e-05, | |
"loss": 0.4483, | |
"step": 77 | |
}, | |
{ | |
"epoch": 0.14661654135338345, | |
"grad_norm": 4.605799674987793, | |
"learning_rate": 1.9047619047619046e-05, | |
"loss": 0.5031, | |
"step": 78 | |
}, | |
{ | |
"epoch": 0.14849624060150377, | |
"grad_norm": 4.675529956817627, | |
"learning_rate": 1.929824561403509e-05, | |
"loss": 0.5342, | |
"step": 79 | |
}, | |
{ | |
"epoch": 0.15037593984962405, | |
"grad_norm": 3.668121814727783, | |
"learning_rate": 1.954887218045113e-05, | |
"loss": 0.2656, | |
"step": 80 | |
}, | |
{ | |
"epoch": 0.15225563909774437, | |
"grad_norm": 4.905953407287598, | |
"learning_rate": 1.9799498746867168e-05, | |
"loss": 0.4979, | |
"step": 81 | |
}, | |
{ | |
"epoch": 0.15413533834586465, | |
"grad_norm": 3.5785348415374756, | |
"learning_rate": 2.0050125313283208e-05, | |
"loss": 0.2907, | |
"step": 82 | |
}, | |
{ | |
"epoch": 0.15601503759398497, | |
"grad_norm": 4.371109962463379, | |
"learning_rate": 2.0300751879699247e-05, | |
"loss": 0.4795, | |
"step": 83 | |
}, | |
{ | |
"epoch": 0.15789473684210525, | |
"grad_norm": 4.517609119415283, | |
"learning_rate": 2.0551378446115287e-05, | |
"loss": 0.3756, | |
"step": 84 | |
}, | |
{ | |
"epoch": 0.15977443609022557, | |
"grad_norm": 5.095982074737549, | |
"learning_rate": 2.080200501253133e-05, | |
"loss": 0.4711, | |
"step": 85 | |
}, | |
{ | |
"epoch": 0.16165413533834586, | |
"grad_norm": 5.376642227172852, | |
"learning_rate": 2.105263157894737e-05, | |
"loss": 0.4183, | |
"step": 86 | |
}, | |
{ | |
"epoch": 0.16353383458646617, | |
"grad_norm": 4.557480335235596, | |
"learning_rate": 2.130325814536341e-05, | |
"loss": 0.4993, | |
"step": 87 | |
}, | |
{ | |
"epoch": 0.16541353383458646, | |
"grad_norm": 4.666930675506592, | |
"learning_rate": 2.1553884711779448e-05, | |
"loss": 0.4767, | |
"step": 88 | |
}, | |
{ | |
"epoch": 0.16729323308270677, | |
"grad_norm": 5.519852638244629, | |
"learning_rate": 2.1804511278195487e-05, | |
"loss": 0.7443, | |
"step": 89 | |
}, | |
{ | |
"epoch": 0.16917293233082706, | |
"grad_norm": 3.6135387420654297, | |
"learning_rate": 2.205513784461153e-05, | |
"loss": 0.301, | |
"step": 90 | |
}, | |
{ | |
"epoch": 0.17105263157894737, | |
"grad_norm": 3.6060757637023926, | |
"learning_rate": 2.230576441102757e-05, | |
"loss": 0.2712, | |
"step": 91 | |
}, | |
{ | |
"epoch": 0.17293233082706766, | |
"grad_norm": 4.757140636444092, | |
"learning_rate": 2.255639097744361e-05, | |
"loss": 0.4745, | |
"step": 92 | |
}, | |
{ | |
"epoch": 0.17481203007518797, | |
"grad_norm": 4.468283653259277, | |
"learning_rate": 2.280701754385965e-05, | |
"loss": 0.3506, | |
"step": 93 | |
}, | |
{ | |
"epoch": 0.17669172932330826, | |
"grad_norm": 5.2164835929870605, | |
"learning_rate": 2.3057644110275688e-05, | |
"loss": 0.5394, | |
"step": 94 | |
}, | |
{ | |
"epoch": 0.17857142857142858, | |
"grad_norm": 3.763227939605713, | |
"learning_rate": 2.3308270676691728e-05, | |
"loss": 0.2925, | |
"step": 95 | |
}, | |
{ | |
"epoch": 0.18045112781954886, | |
"grad_norm": 2.758868455886841, | |
"learning_rate": 2.355889724310777e-05, | |
"loss": 0.2154, | |
"step": 96 | |
}, | |
{ | |
"epoch": 0.18233082706766918, | |
"grad_norm": 4.797206401824951, | |
"learning_rate": 2.380952380952381e-05, | |
"loss": 0.468, | |
"step": 97 | |
}, | |
{ | |
"epoch": 0.18421052631578946, | |
"grad_norm": 3.767606019973755, | |
"learning_rate": 2.406015037593985e-05, | |
"loss": 0.2269, | |
"step": 98 | |
}, | |
{ | |
"epoch": 0.18609022556390978, | |
"grad_norm": 4.662686824798584, | |
"learning_rate": 2.431077694235589e-05, | |
"loss": 0.3967, | |
"step": 99 | |
}, | |
{ | |
"epoch": 0.18796992481203006, | |
"grad_norm": 4.955148220062256, | |
"learning_rate": 2.456140350877193e-05, | |
"loss": 0.489, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.18796992481203006, | |
"eval_global_dataset_loss": 1.2233351469039917, | |
"eval_global_dataset_runtime": 74.0924, | |
"eval_global_dataset_samples_per_second": 13.119, | |
"eval_global_dataset_steps_per_second": 0.054, | |
"eval_sequential_score": 0.9123232014011761, | |
"eval_sts-test-1024_pearson_cosine": 0.8996086094131341, | |
"eval_sts-test-1024_spearman_cosine": 0.914071946442568, | |
"eval_sts-test-1280_pearson_cosine": 0.8996114667036814, | |
"eval_sts-test-1280_spearman_cosine": 0.9140625486776759, | |
"eval_sts-test-512_pearson_cosine": 0.8970205686989379, | |
"eval_sts-test-512_spearman_cosine": 0.9123232014011761, | |
"eval_sts-test-760_pearson_cosine": 0.8975729153169623, | |
"eval_sts-test-760_spearman_cosine": 0.9137202015280316, | |
"eval_sts-test_pearson_cosine": 0.8996145648456096, | |
"eval_sts-test_spearman_cosine": 0.9140547172069323, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.18984962406015038, | |
"grad_norm": 4.103837490081787, | |
"learning_rate": 2.4812030075187968e-05, | |
"loss": 0.3021, | |
"step": 101 | |
}, | |
{ | |
"epoch": 0.19172932330827067, | |
"grad_norm": 3.842869281768799, | |
"learning_rate": 2.506265664160401e-05, | |
"loss": 0.315, | |
"step": 102 | |
}, | |
{ | |
"epoch": 0.19360902255639098, | |
"grad_norm": 5.454257965087891, | |
"learning_rate": 2.531328320802005e-05, | |
"loss": 0.664, | |
"step": 103 | |
}, | |
{ | |
"epoch": 0.19548872180451127, | |
"grad_norm": 5.219631671905518, | |
"learning_rate": 2.556390977443609e-05, | |
"loss": 0.5144, | |
"step": 104 | |
}, | |
{ | |
"epoch": 0.19736842105263158, | |
"grad_norm": 4.749826431274414, | |
"learning_rate": 2.581453634085213e-05, | |
"loss": 0.5137, | |
"step": 105 | |
}, | |
{ | |
"epoch": 0.19924812030075187, | |
"grad_norm": 3.728238821029663, | |
"learning_rate": 2.606516290726817e-05, | |
"loss": 0.2783, | |
"step": 106 | |
}, | |
{ | |
"epoch": 0.20112781954887218, | |
"grad_norm": 4.029903411865234, | |
"learning_rate": 2.6315789473684212e-05, | |
"loss": 0.2859, | |
"step": 107 | |
}, | |
{ | |
"epoch": 0.20300751879699247, | |
"grad_norm": 3.996060848236084, | |
"learning_rate": 2.656641604010025e-05, | |
"loss": 0.333, | |
"step": 108 | |
}, | |
{ | |
"epoch": 0.20488721804511278, | |
"grad_norm": 4.141890525817871, | |
"learning_rate": 2.681704260651629e-05, | |
"loss": 0.3578, | |
"step": 109 | |
}, | |
{ | |
"epoch": 0.20676691729323307, | |
"grad_norm": 3.9686567783355713, | |
"learning_rate": 2.706766917293233e-05, | |
"loss": 0.373, | |
"step": 110 | |
}, | |
{ | |
"epoch": 0.20864661654135339, | |
"grad_norm": 3.90350341796875, | |
"learning_rate": 2.731829573934837e-05, | |
"loss": 0.3707, | |
"step": 111 | |
}, | |
{ | |
"epoch": 0.21052631578947367, | |
"grad_norm": 3.921323537826538, | |
"learning_rate": 2.756892230576441e-05, | |
"loss": 0.2798, | |
"step": 112 | |
}, | |
{ | |
"epoch": 0.212406015037594, | |
"grad_norm": 3.878953695297241, | |
"learning_rate": 2.7819548872180452e-05, | |
"loss": 0.3597, | |
"step": 113 | |
}, | |
{ | |
"epoch": 0.21428571428571427, | |
"grad_norm": 4.601593494415283, | |
"learning_rate": 2.8070175438596492e-05, | |
"loss": 0.43, | |
"step": 114 | |
}, | |
{ | |
"epoch": 0.2161654135338346, | |
"grad_norm": 3.9261109828948975, | |
"learning_rate": 2.832080200501253e-05, | |
"loss": 0.3277, | |
"step": 115 | |
}, | |
{ | |
"epoch": 0.21804511278195488, | |
"grad_norm": 5.408901691436768, | |
"learning_rate": 2.857142857142857e-05, | |
"loss": 0.5529, | |
"step": 116 | |
}, | |
{ | |
"epoch": 0.2199248120300752, | |
"grad_norm": 4.431450366973877, | |
"learning_rate": 2.882205513784461e-05, | |
"loss": 0.3227, | |
"step": 117 | |
}, | |
{ | |
"epoch": 0.22180451127819548, | |
"grad_norm": 5.22983980178833, | |
"learning_rate": 2.907268170426065e-05, | |
"loss": 0.6035, | |
"step": 118 | |
}, | |
{ | |
"epoch": 0.2236842105263158, | |
"grad_norm": 3.532494306564331, | |
"learning_rate": 2.9323308270676693e-05, | |
"loss": 0.2348, | |
"step": 119 | |
}, | |
{ | |
"epoch": 0.22556390977443608, | |
"grad_norm": 5.134215354919434, | |
"learning_rate": 2.9573934837092732e-05, | |
"loss": 0.5626, | |
"step": 120 | |
}, | |
{ | |
"epoch": 0.2274436090225564, | |
"grad_norm": 4.672839164733887, | |
"learning_rate": 2.9824561403508772e-05, | |
"loss": 0.3628, | |
"step": 121 | |
}, | |
{ | |
"epoch": 0.22932330827067668, | |
"grad_norm": 4.489410877227783, | |
"learning_rate": 3.007518796992481e-05, | |
"loss": 0.4222, | |
"step": 122 | |
}, | |
{ | |
"epoch": 0.231203007518797, | |
"grad_norm": 4.356950759887695, | |
"learning_rate": 3.032581453634085e-05, | |
"loss": 0.3231, | |
"step": 123 | |
}, | |
{ | |
"epoch": 0.23308270676691728, | |
"grad_norm": 3.497793436050415, | |
"learning_rate": 3.0576441102756894e-05, | |
"loss": 0.1875, | |
"step": 124 | |
}, | |
{ | |
"epoch": 0.2349624060150376, | |
"grad_norm": 3.3374269008636475, | |
"learning_rate": 3.082706766917293e-05, | |
"loss": 0.2226, | |
"step": 125 | |
}, | |
{ | |
"epoch": 0.23684210526315788, | |
"grad_norm": 4.252456188201904, | |
"learning_rate": 3.107769423558897e-05, | |
"loss": 0.318, | |
"step": 126 | |
}, | |
{ | |
"epoch": 0.2387218045112782, | |
"grad_norm": 4.883126735687256, | |
"learning_rate": 3.132832080200501e-05, | |
"loss": 0.4381, | |
"step": 127 | |
}, | |
{ | |
"epoch": 0.24060150375939848, | |
"grad_norm": 4.2091288566589355, | |
"learning_rate": 3.157894736842105e-05, | |
"loss": 0.3985, | |
"step": 128 | |
}, | |
{ | |
"epoch": 0.2424812030075188, | |
"grad_norm": 4.547649383544922, | |
"learning_rate": 3.182957393483709e-05, | |
"loss": 0.3571, | |
"step": 129 | |
}, | |
{ | |
"epoch": 0.24436090225563908, | |
"grad_norm": 3.526421070098877, | |
"learning_rate": 3.208020050125313e-05, | |
"loss": 0.2185, | |
"step": 130 | |
}, | |
{ | |
"epoch": 0.2462406015037594, | |
"grad_norm": 4.988466262817383, | |
"learning_rate": 3.233082706766917e-05, | |
"loss": 0.4206, | |
"step": 131 | |
}, | |
{ | |
"epoch": 0.24812030075187969, | |
"grad_norm": 4.972884178161621, | |
"learning_rate": 3.258145363408521e-05, | |
"loss": 0.5639, | |
"step": 132 | |
}, | |
{ | |
"epoch": 0.25, | |
"grad_norm": 5.1827898025512695, | |
"learning_rate": 3.2832080200501256e-05, | |
"loss": 0.4593, | |
"step": 133 | |
}, | |
{ | |
"epoch": 0.2518796992481203, | |
"grad_norm": 4.37896203994751, | |
"learning_rate": 3.3082706766917295e-05, | |
"loss": 0.392, | |
"step": 134 | |
}, | |
{ | |
"epoch": 0.25375939849624063, | |
"grad_norm": 4.721109390258789, | |
"learning_rate": 3.3333333333333335e-05, | |
"loss": 0.4681, | |
"step": 135 | |
}, | |
{ | |
"epoch": 0.2556390977443609, | |
"grad_norm": 3.8587379455566406, | |
"learning_rate": 3.3583959899749374e-05, | |
"loss": 0.2313, | |
"step": 136 | |
}, | |
{ | |
"epoch": 0.2575187969924812, | |
"grad_norm": 3.4314234256744385, | |
"learning_rate": 3.3834586466165414e-05, | |
"loss": 0.2191, | |
"step": 137 | |
}, | |
{ | |
"epoch": 0.2593984962406015, | |
"grad_norm": 4.806185245513916, | |
"learning_rate": 3.4085213032581453e-05, | |
"loss": 0.405, | |
"step": 138 | |
}, | |
{ | |
"epoch": 0.26127819548872183, | |
"grad_norm": 4.882153511047363, | |
"learning_rate": 3.433583959899749e-05, | |
"loss": 0.4579, | |
"step": 139 | |
}, | |
{ | |
"epoch": 0.2631578947368421, | |
"grad_norm": 3.507141351699829, | |
"learning_rate": 3.458646616541353e-05, | |
"loss": 0.2927, | |
"step": 140 | |
}, | |
{ | |
"epoch": 0.2650375939849624, | |
"grad_norm": 3.5936336517333984, | |
"learning_rate": 3.483709273182957e-05, | |
"loss": 0.2333, | |
"step": 141 | |
}, | |
{ | |
"epoch": 0.2669172932330827, | |
"grad_norm": 3.201254367828369, | |
"learning_rate": 3.508771929824561e-05, | |
"loss": 0.2328, | |
"step": 142 | |
}, | |
{ | |
"epoch": 0.26879699248120303, | |
"grad_norm": 3.535102605819702, | |
"learning_rate": 3.533834586466165e-05, | |
"loss": 0.1589, | |
"step": 143 | |
}, | |
{ | |
"epoch": 0.2706766917293233, | |
"grad_norm": 4.333312511444092, | |
"learning_rate": 3.55889724310777e-05, | |
"loss": 0.3064, | |
"step": 144 | |
}, | |
{ | |
"epoch": 0.2725563909774436, | |
"grad_norm": 3.9214117527008057, | |
"learning_rate": 3.583959899749374e-05, | |
"loss": 0.3051, | |
"step": 145 | |
}, | |
{ | |
"epoch": 0.2744360902255639, | |
"grad_norm": 3.739518880844116, | |
"learning_rate": 3.6090225563909776e-05, | |
"loss": 0.2781, | |
"step": 146 | |
}, | |
{ | |
"epoch": 0.27631578947368424, | |
"grad_norm": 3.70009446144104, | |
"learning_rate": 3.6340852130325816e-05, | |
"loss": 0.2371, | |
"step": 147 | |
}, | |
{ | |
"epoch": 0.2781954887218045, | |
"grad_norm": 4.299279689788818, | |
"learning_rate": 3.6591478696741855e-05, | |
"loss": 0.3233, | |
"step": 148 | |
}, | |
{ | |
"epoch": 0.2800751879699248, | |
"grad_norm": 3.6360323429107666, | |
"learning_rate": 3.6842105263157895e-05, | |
"loss": 0.2306, | |
"step": 149 | |
}, | |
{ | |
"epoch": 0.2819548872180451, | |
"grad_norm": 3.7798750400543213, | |
"learning_rate": 3.7092731829573934e-05, | |
"loss": 0.2543, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.2819548872180451, | |
"eval_global_dataset_loss": 1.1358981132507324, | |
"eval_global_dataset_runtime": 76.0147, | |
"eval_global_dataset_samples_per_second": 12.787, | |
"eval_global_dataset_steps_per_second": 0.053, | |
"eval_sequential_score": 0.9124108243709806, | |
"eval_sts-test-1024_pearson_cosine": 0.9000072544243531, | |
"eval_sts-test-1024_spearman_cosine": 0.9145547678017197, | |
"eval_sts-test-1280_pearson_cosine": 0.9000105142972739, | |
"eval_sts-test-1280_spearman_cosine": 0.9145460860570099, | |
"eval_sts-test-512_pearson_cosine": 0.8967922549092373, | |
"eval_sts-test-512_spearman_cosine": 0.9124108243709806, | |
"eval_sts-test-760_pearson_cosine": 0.8975479669400709, | |
"eval_sts-test-760_spearman_cosine": 0.9133563737729194, | |
"eval_sts-test_pearson_cosine": 0.9000145065459184, | |
"eval_sts-test_spearman_cosine": 0.9145197723153118, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.28383458646616544, | |
"grad_norm": 3.044969081878662, | |
"learning_rate": 3.7343358395989974e-05, | |
"loss": 0.232, | |
"step": 151 | |
}, | |
{ | |
"epoch": 0.2857142857142857, | |
"grad_norm": 3.209074020385742, | |
"learning_rate": 3.759398496240601e-05, | |
"loss": 0.2088, | |
"step": 152 | |
}, | |
{ | |
"epoch": 0.287593984962406, | |
"grad_norm": 5.255861282348633, | |
"learning_rate": 3.784461152882205e-05, | |
"loss": 0.43, | |
"step": 153 | |
}, | |
{ | |
"epoch": 0.2894736842105263, | |
"grad_norm": 4.211223125457764, | |
"learning_rate": 3.809523809523809e-05, | |
"loss": 0.2591, | |
"step": 154 | |
}, | |
{ | |
"epoch": 0.29135338345864664, | |
"grad_norm": 4.526270866394043, | |
"learning_rate": 3.834586466165413e-05, | |
"loss": 0.374, | |
"step": 155 | |
}, | |
{ | |
"epoch": 0.2932330827067669, | |
"grad_norm": 4.315747261047363, | |
"learning_rate": 3.859649122807018e-05, | |
"loss": 0.3955, | |
"step": 156 | |
}, | |
{ | |
"epoch": 0.2951127819548872, | |
"grad_norm": 3.7073168754577637, | |
"learning_rate": 3.884711779448622e-05, | |
"loss": 0.2377, | |
"step": 157 | |
}, | |
{ | |
"epoch": 0.29699248120300753, | |
"grad_norm": 4.333474159240723, | |
"learning_rate": 3.909774436090226e-05, | |
"loss": 0.3472, | |
"step": 158 | |
}, | |
{ | |
"epoch": 0.29887218045112784, | |
"grad_norm": 3.814296007156372, | |
"learning_rate": 3.9348370927318297e-05, | |
"loss": 0.2649, | |
"step": 159 | |
}, | |
{ | |
"epoch": 0.3007518796992481, | |
"grad_norm": 4.728816509246826, | |
"learning_rate": 3.9598997493734336e-05, | |
"loss": 0.3457, | |
"step": 160 | |
}, | |
{ | |
"epoch": 0.3026315789473684, | |
"grad_norm": 4.436241149902344, | |
"learning_rate": 3.9849624060150376e-05, | |
"loss": 0.3089, | |
"step": 161 | |
}, | |
{ | |
"epoch": 0.30451127819548873, | |
"grad_norm": 4.423499584197998, | |
"learning_rate": 4.0100250626566415e-05, | |
"loss": 0.301, | |
"step": 162 | |
}, | |
{ | |
"epoch": 0.30639097744360905, | |
"grad_norm": 4.738509654998779, | |
"learning_rate": 4.0350877192982455e-05, | |
"loss": 0.3386, | |
"step": 163 | |
}, | |
{ | |
"epoch": 0.3082706766917293, | |
"grad_norm": 5.178467273712158, | |
"learning_rate": 4.0601503759398494e-05, | |
"loss": 0.458, | |
"step": 164 | |
}, | |
{ | |
"epoch": 0.3101503759398496, | |
"grad_norm": 4.101895332336426, | |
"learning_rate": 4.0852130325814534e-05, | |
"loss": 0.3676, | |
"step": 165 | |
}, | |
{ | |
"epoch": 0.31203007518796994, | |
"grad_norm": 4.933971881866455, | |
"learning_rate": 4.110275689223057e-05, | |
"loss": 0.5165, | |
"step": 166 | |
}, | |
{ | |
"epoch": 0.31390977443609025, | |
"grad_norm": 3.5641555786132812, | |
"learning_rate": 4.135338345864662e-05, | |
"loss": 0.2245, | |
"step": 167 | |
}, | |
{ | |
"epoch": 0.3157894736842105, | |
"grad_norm": 3.8185558319091797, | |
"learning_rate": 4.160401002506266e-05, | |
"loss": 0.2643, | |
"step": 168 | |
}, | |
{ | |
"epoch": 0.3176691729323308, | |
"grad_norm": 5.329522132873535, | |
"learning_rate": 4.18546365914787e-05, | |
"loss": 0.4889, | |
"step": 169 | |
}, | |
{ | |
"epoch": 0.31954887218045114, | |
"grad_norm": 3.6215882301330566, | |
"learning_rate": 4.210526315789474e-05, | |
"loss": 0.2034, | |
"step": 170 | |
}, | |
{ | |
"epoch": 0.32142857142857145, | |
"grad_norm": 4.935929298400879, | |
"learning_rate": 4.235588972431078e-05, | |
"loss": 0.4686, | |
"step": 171 | |
}, | |
{ | |
"epoch": 0.3233082706766917, | |
"grad_norm": 4.228303909301758, | |
"learning_rate": 4.260651629072682e-05, | |
"loss": 0.2751, | |
"step": 172 | |
}, | |
{ | |
"epoch": 0.325187969924812, | |
"grad_norm": 4.45400857925415, | |
"learning_rate": 4.2857142857142856e-05, | |
"loss": 0.3089, | |
"step": 173 | |
}, | |
{ | |
"epoch": 0.32706766917293234, | |
"grad_norm": 3.3213822841644287, | |
"learning_rate": 4.3107769423558896e-05, | |
"loss": 0.2034, | |
"step": 174 | |
}, | |
{ | |
"epoch": 0.32894736842105265, | |
"grad_norm": 4.85194206237793, | |
"learning_rate": 4.3358395989974935e-05, | |
"loss": 0.4197, | |
"step": 175 | |
}, | |
{ | |
"epoch": 0.3308270676691729, | |
"grad_norm": 4.171342372894287, | |
"learning_rate": 4.3609022556390975e-05, | |
"loss": 0.2756, | |
"step": 176 | |
}, | |
{ | |
"epoch": 0.33270676691729323, | |
"grad_norm": 4.365931987762451, | |
"learning_rate": 4.3859649122807014e-05, | |
"loss": 0.2734, | |
"step": 177 | |
}, | |
{ | |
"epoch": 0.33458646616541354, | |
"grad_norm": 2.6044809818267822, | |
"learning_rate": 4.411027568922306e-05, | |
"loss": 0.169, | |
"step": 178 | |
}, | |
{ | |
"epoch": 0.33646616541353386, | |
"grad_norm": 3.5510780811309814, | |
"learning_rate": 4.43609022556391e-05, | |
"loss": 0.2378, | |
"step": 179 | |
}, | |
{ | |
"epoch": 0.3383458646616541, | |
"grad_norm": 3.4351441860198975, | |
"learning_rate": 4.461152882205514e-05, | |
"loss": 0.207, | |
"step": 180 | |
}, | |
{ | |
"epoch": 0.34022556390977443, | |
"grad_norm": 2.6987929344177246, | |
"learning_rate": 4.486215538847118e-05, | |
"loss": 0.1922, | |
"step": 181 | |
}, | |
{ | |
"epoch": 0.34210526315789475, | |
"grad_norm": 4.182808876037598, | |
"learning_rate": 4.511278195488722e-05, | |
"loss": 0.2401, | |
"step": 182 | |
}, | |
{ | |
"epoch": 0.34398496240601506, | |
"grad_norm": 3.7199277877807617, | |
"learning_rate": 4.536340852130326e-05, | |
"loss": 0.2093, | |
"step": 183 | |
}, | |
{ | |
"epoch": 0.3458646616541353, | |
"grad_norm": 3.071550130844116, | |
"learning_rate": 4.56140350877193e-05, | |
"loss": 0.1656, | |
"step": 184 | |
}, | |
{ | |
"epoch": 0.34774436090225563, | |
"grad_norm": 4.635529041290283, | |
"learning_rate": 4.586466165413534e-05, | |
"loss": 0.3097, | |
"step": 185 | |
}, | |
{ | |
"epoch": 0.34962406015037595, | |
"grad_norm": 3.997650146484375, | |
"learning_rate": 4.6115288220551377e-05, | |
"loss": 0.2157, | |
"step": 186 | |
}, | |
{ | |
"epoch": 0.35150375939849626, | |
"grad_norm": 3.3299057483673096, | |
"learning_rate": 4.6365914786967416e-05, | |
"loss": 0.2462, | |
"step": 187 | |
}, | |
{ | |
"epoch": 0.3533834586466165, | |
"grad_norm": 2.1565983295440674, | |
"learning_rate": 4.6616541353383456e-05, | |
"loss": 0.1129, | |
"step": 188 | |
}, | |
{ | |
"epoch": 0.35526315789473684, | |
"grad_norm": 3.879913806915283, | |
"learning_rate": 4.6867167919799495e-05, | |
"loss": 0.2231, | |
"step": 189 | |
}, | |
{ | |
"epoch": 0.35714285714285715, | |
"grad_norm": 3.759981870651245, | |
"learning_rate": 4.711779448621554e-05, | |
"loss": 0.2683, | |
"step": 190 | |
}, | |
{ | |
"epoch": 0.35902255639097747, | |
"grad_norm": 1.0419526100158691, | |
"learning_rate": 4.736842105263158e-05, | |
"loss": 0.0246, | |
"step": 191 | |
}, | |
{ | |
"epoch": 0.3609022556390977, | |
"grad_norm": 3.446683645248413, | |
"learning_rate": 4.761904761904762e-05, | |
"loss": 0.27, | |
"step": 192 | |
}, | |
{ | |
"epoch": 0.36278195488721804, | |
"grad_norm": 4.075059413909912, | |
"learning_rate": 4.786967418546366e-05, | |
"loss": 0.3308, | |
"step": 193 | |
}, | |
{ | |
"epoch": 0.36466165413533835, | |
"grad_norm": 4.341902732849121, | |
"learning_rate": 4.81203007518797e-05, | |
"loss": 0.28, | |
"step": 194 | |
}, | |
{ | |
"epoch": 0.36654135338345867, | |
"grad_norm": 4.575509548187256, | |
"learning_rate": 4.837092731829574e-05, | |
"loss": 0.3338, | |
"step": 195 | |
}, | |
{ | |
"epoch": 0.3684210526315789, | |
"grad_norm": 3.01415753364563, | |
"learning_rate": 4.862155388471178e-05, | |
"loss": 0.1966, | |
"step": 196 | |
}, | |
{ | |
"epoch": 0.37030075187969924, | |
"grad_norm": 3.4799773693084717, | |
"learning_rate": 4.887218045112782e-05, | |
"loss": 0.1798, | |
"step": 197 | |
}, | |
{ | |
"epoch": 0.37218045112781956, | |
"grad_norm": 4.396617889404297, | |
"learning_rate": 4.912280701754386e-05, | |
"loss": 0.2979, | |
"step": 198 | |
}, | |
{ | |
"epoch": 0.37406015037593987, | |
"grad_norm": 4.624708652496338, | |
"learning_rate": 4.93734335839599e-05, | |
"loss": 0.3221, | |
"step": 199 | |
}, | |
{ | |
"epoch": 0.37593984962406013, | |
"grad_norm": 5.769001483917236, | |
"learning_rate": 4.9624060150375936e-05, | |
"loss": 0.6034, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.37593984962406013, | |
"eval_global_dataset_loss": 1.083878517150879, | |
"eval_global_dataset_runtime": 75.8699, | |
"eval_global_dataset_samples_per_second": 12.811, | |
"eval_global_dataset_steps_per_second": 0.053, | |
"eval_sequential_score": 0.9137803472233416, | |
"eval_sts-test-1024_pearson_cosine": 0.9014369154728474, | |
"eval_sts-test-1024_spearman_cosine": 0.9158642792125549, | |
"eval_sts-test-1280_pearson_cosine": 0.9014376697226278, | |
"eval_sts-test-1280_spearman_cosine": 0.9158756460329482, | |
"eval_sts-test-512_pearson_cosine": 0.8989734750714087, | |
"eval_sts-test-512_spearman_cosine": 0.9137803472233416, | |
"eval_sts-test-760_pearson_cosine": 0.899035552252563, | |
"eval_sts-test-760_spearman_cosine": 0.9145744583567318, | |
"eval_sts-test_pearson_cosine": 0.90143853711727, | |
"eval_sts-test_spearman_cosine": 0.9158528228896387, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.37781954887218044, | |
"grad_norm": 3.755136489868164, | |
"learning_rate": 4.987468671679198e-05, | |
"loss": 0.2707, | |
"step": 201 | |
}, | |
{ | |
"epoch": 0.37969924812030076, | |
"grad_norm": 4.266496181488037, | |
"learning_rate": 5.012531328320802e-05, | |
"loss": 0.288, | |
"step": 202 | |
}, | |
{ | |
"epoch": 0.3815789473684211, | |
"grad_norm": 4.073738098144531, | |
"learning_rate": 5.037593984962407e-05, | |
"loss": 0.2101, | |
"step": 203 | |
}, | |
{ | |
"epoch": 0.38345864661654133, | |
"grad_norm": 5.029702186584473, | |
"learning_rate": 5.06265664160401e-05, | |
"loss": 0.4055, | |
"step": 204 | |
}, | |
{ | |
"epoch": 0.38533834586466165, | |
"grad_norm": 4.128860950469971, | |
"learning_rate": 5.087719298245615e-05, | |
"loss": 0.3662, | |
"step": 205 | |
}, | |
{ | |
"epoch": 0.38721804511278196, | |
"grad_norm": 4.01772928237915, | |
"learning_rate": 5.112781954887218e-05, | |
"loss": 0.2623, | |
"step": 206 | |
}, | |
{ | |
"epoch": 0.3890977443609023, | |
"grad_norm": 3.6195261478424072, | |
"learning_rate": 5.1378446115288226e-05, | |
"loss": 0.1804, | |
"step": 207 | |
}, | |
{ | |
"epoch": 0.39097744360902253, | |
"grad_norm": 3.148761034011841, | |
"learning_rate": 5.162907268170426e-05, | |
"loss": 0.21, | |
"step": 208 | |
}, | |
{ | |
"epoch": 0.39285714285714285, | |
"grad_norm": 5.000106334686279, | |
"learning_rate": 5.1879699248120305e-05, | |
"loss": 0.5188, | |
"step": 209 | |
}, | |
{ | |
"epoch": 0.39473684210526316, | |
"grad_norm": 4.500979900360107, | |
"learning_rate": 5.213032581453634e-05, | |
"loss": 0.2961, | |
"step": 210 | |
}, | |
{ | |
"epoch": 0.3966165413533835, | |
"grad_norm": 3.465824604034424, | |
"learning_rate": 5.2380952380952384e-05, | |
"loss": 0.212, | |
"step": 211 | |
}, | |
{ | |
"epoch": 0.39849624060150374, | |
"grad_norm": 3.279611587524414, | |
"learning_rate": 5.2631578947368424e-05, | |
"loss": 0.2593, | |
"step": 212 | |
}, | |
{ | |
"epoch": 0.40037593984962405, | |
"grad_norm": 4.062462329864502, | |
"learning_rate": 5.2882205513784463e-05, | |
"loss": 0.2851, | |
"step": 213 | |
}, | |
{ | |
"epoch": 0.40225563909774437, | |
"grad_norm": 3.7090327739715576, | |
"learning_rate": 5.31328320802005e-05, | |
"loss": 0.21, | |
"step": 214 | |
}, | |
{ | |
"epoch": 0.4041353383458647, | |
"grad_norm": 3.333369493484497, | |
"learning_rate": 5.338345864661655e-05, | |
"loss": 0.206, | |
"step": 215 | |
}, | |
{ | |
"epoch": 0.40601503759398494, | |
"grad_norm": 4.723068714141846, | |
"learning_rate": 5.363408521303258e-05, | |
"loss": 0.4391, | |
"step": 216 | |
}, | |
{ | |
"epoch": 0.40789473684210525, | |
"grad_norm": 3.584779977798462, | |
"learning_rate": 5.388471177944863e-05, | |
"loss": 0.2652, | |
"step": 217 | |
}, | |
{ | |
"epoch": 0.40977443609022557, | |
"grad_norm": 1.9154776334762573, | |
"learning_rate": 5.413533834586466e-05, | |
"loss": 0.073, | |
"step": 218 | |
}, | |
{ | |
"epoch": 0.4116541353383459, | |
"grad_norm": 4.774673938751221, | |
"learning_rate": 5.438596491228071e-05, | |
"loss": 0.4636, | |
"step": 219 | |
}, | |
{ | |
"epoch": 0.41353383458646614, | |
"grad_norm": 4.749974727630615, | |
"learning_rate": 5.463659147869674e-05, | |
"loss": 0.4002, | |
"step": 220 | |
}, | |
{ | |
"epoch": 0.41541353383458646, | |
"grad_norm": 4.607359409332275, | |
"learning_rate": 5.4887218045112786e-05, | |
"loss": 0.3869, | |
"step": 221 | |
}, | |
{ | |
"epoch": 0.41729323308270677, | |
"grad_norm": 3.5087730884552, | |
"learning_rate": 5.513784461152882e-05, | |
"loss": 0.2313, | |
"step": 222 | |
}, | |
{ | |
"epoch": 0.4191729323308271, | |
"grad_norm": 3.191532611846924, | |
"learning_rate": 5.5388471177944865e-05, | |
"loss": 0.177, | |
"step": 223 | |
}, | |
{ | |
"epoch": 0.42105263157894735, | |
"grad_norm": 2.9819788932800293, | |
"learning_rate": 5.5639097744360905e-05, | |
"loss": 0.2246, | |
"step": 224 | |
}, | |
{ | |
"epoch": 0.42293233082706766, | |
"grad_norm": 3.826098918914795, | |
"learning_rate": 5.5889724310776944e-05, | |
"loss": 0.2082, | |
"step": 225 | |
}, | |
{ | |
"epoch": 0.424812030075188, | |
"grad_norm": 4.612794399261475, | |
"learning_rate": 5.6140350877192984e-05, | |
"loss": 0.3497, | |
"step": 226 | |
}, | |
{ | |
"epoch": 0.4266917293233083, | |
"grad_norm": 2.861586093902588, | |
"learning_rate": 5.639097744360903e-05, | |
"loss": 0.1367, | |
"step": 227 | |
}, | |
{ | |
"epoch": 0.42857142857142855, | |
"grad_norm": 3.5423388481140137, | |
"learning_rate": 5.664160401002506e-05, | |
"loss": 0.2292, | |
"step": 228 | |
}, | |
{ | |
"epoch": 0.43045112781954886, | |
"grad_norm": 2.9980733394622803, | |
"learning_rate": 5.689223057644111e-05, | |
"loss": 0.1934, | |
"step": 229 | |
}, | |
{ | |
"epoch": 0.4323308270676692, | |
"grad_norm": 3.1661713123321533, | |
"learning_rate": 5.714285714285714e-05, | |
"loss": 0.1817, | |
"step": 230 | |
}, | |
{ | |
"epoch": 0.4342105263157895, | |
"grad_norm": 3.6269607543945312, | |
"learning_rate": 5.739348370927319e-05, | |
"loss": 0.2364, | |
"step": 231 | |
}, | |
{ | |
"epoch": 0.43609022556390975, | |
"grad_norm": 2.7368719577789307, | |
"learning_rate": 5.764411027568922e-05, | |
"loss": 0.1361, | |
"step": 232 | |
}, | |
{ | |
"epoch": 0.43796992481203006, | |
"grad_norm": 3.269443988800049, | |
"learning_rate": 5.789473684210527e-05, | |
"loss": 0.2478, | |
"step": 233 | |
}, | |
{ | |
"epoch": 0.4398496240601504, | |
"grad_norm": 4.16945743560791, | |
"learning_rate": 5.81453634085213e-05, | |
"loss": 0.3088, | |
"step": 234 | |
}, | |
{ | |
"epoch": 0.4417293233082707, | |
"grad_norm": 3.336332082748413, | |
"learning_rate": 5.8395989974937346e-05, | |
"loss": 0.2762, | |
"step": 235 | |
}, | |
{ | |
"epoch": 0.44360902255639095, | |
"grad_norm": 2.378638744354248, | |
"learning_rate": 5.8646616541353386e-05, | |
"loss": 0.1596, | |
"step": 236 | |
}, | |
{ | |
"epoch": 0.44548872180451127, | |
"grad_norm": 4.7622270584106445, | |
"learning_rate": 5.889724310776943e-05, | |
"loss": 0.4028, | |
"step": 237 | |
}, | |
{ | |
"epoch": 0.4473684210526316, | |
"grad_norm": 3.4411444664001465, | |
"learning_rate": 5.9147869674185465e-05, | |
"loss": 0.2385, | |
"step": 238 | |
}, | |
{ | |
"epoch": 0.4492481203007519, | |
"grad_norm": 2.6086032390594482, | |
"learning_rate": 5.939849624060151e-05, | |
"loss": 0.1096, | |
"step": 239 | |
}, | |
{ | |
"epoch": 0.45112781954887216, | |
"grad_norm": 3.7840077877044678, | |
"learning_rate": 5.9649122807017544e-05, | |
"loss": 0.2783, | |
"step": 240 | |
}, | |
{ | |
"epoch": 0.45300751879699247, | |
"grad_norm": 3.639894962310791, | |
"learning_rate": 5.989974937343359e-05, | |
"loss": 0.2536, | |
"step": 241 | |
}, | |
{ | |
"epoch": 0.4548872180451128, | |
"grad_norm": 3.169121026992798, | |
"learning_rate": 6.015037593984962e-05, | |
"loss": 0.132, | |
"step": 242 | |
}, | |
{ | |
"epoch": 0.4567669172932331, | |
"grad_norm": 3.416808843612671, | |
"learning_rate": 6.040100250626567e-05, | |
"loss": 0.1748, | |
"step": 243 | |
}, | |
{ | |
"epoch": 0.45864661654135336, | |
"grad_norm": 2.435342311859131, | |
"learning_rate": 6.06516290726817e-05, | |
"loss": 0.0997, | |
"step": 244 | |
}, | |
{ | |
"epoch": 0.4605263157894737, | |
"grad_norm": 3.853893995285034, | |
"learning_rate": 6.090225563909775e-05, | |
"loss": 0.2786, | |
"step": 245 | |
}, | |
{ | |
"epoch": 0.462406015037594, | |
"grad_norm": 3.432488441467285, | |
"learning_rate": 6.115288220551379e-05, | |
"loss": 0.2071, | |
"step": 246 | |
}, | |
{ | |
"epoch": 0.4642857142857143, | |
"grad_norm": 3.495830774307251, | |
"learning_rate": 6.140350877192983e-05, | |
"loss": 0.1845, | |
"step": 247 | |
}, | |
{ | |
"epoch": 0.46616541353383456, | |
"grad_norm": 2.5177180767059326, | |
"learning_rate": 6.165413533834587e-05, | |
"loss": 0.1302, | |
"step": 248 | |
}, | |
{ | |
"epoch": 0.4680451127819549, | |
"grad_norm": 3.5735349655151367, | |
"learning_rate": 6.19047619047619e-05, | |
"loss": 0.3023, | |
"step": 249 | |
}, | |
{ | |
"epoch": 0.4699248120300752, | |
"grad_norm": 3.233510732650757, | |
"learning_rate": 6.215538847117795e-05, | |
"loss": 0.1952, | |
"step": 250 | |
}, | |
{ | |
"epoch": 0.4699248120300752, | |
"eval_global_dataset_loss": 1.0789817571640015, | |
"eval_global_dataset_runtime": 74.4897, | |
"eval_global_dataset_samples_per_second": 13.049, | |
"eval_global_dataset_steps_per_second": 0.054, | |
"eval_sequential_score": 0.913595614016318, | |
"eval_sts-test-1024_pearson_cosine": 0.9008086953101686, | |
"eval_sts-test-1024_spearman_cosine": 0.9153775197424008, | |
"eval_sts-test-1280_pearson_cosine": 0.9008071588604939, | |
"eval_sts-test-1280_spearman_cosine": 0.9153737158851823, | |
"eval_sts-test-512_pearson_cosine": 0.8989954925060826, | |
"eval_sts-test-512_spearman_cosine": 0.913595614016318, | |
"eval_sts-test-760_pearson_cosine": 0.8984529046187754, | |
"eval_sts-test-760_spearman_cosine": 0.9140583868103663, | |
"eval_sts-test_pearson_cosine": 0.9008049969790113, | |
"eval_sts-test_spearman_cosine": 0.9153696435203957, | |
"step": 250 | |
}, | |
{ | |
"epoch": 0.4718045112781955, | |
"grad_norm": 3.872725486755371, | |
"learning_rate": 6.240601503759398e-05, | |
"loss": 0.2147, | |
"step": 251 | |
}, | |
{ | |
"epoch": 0.47368421052631576, | |
"grad_norm": 3.6091225147247314, | |
"learning_rate": 6.265664160401002e-05, | |
"loss": 0.2907, | |
"step": 252 | |
}, | |
{ | |
"epoch": 0.4755639097744361, | |
"grad_norm": 3.6047286987304688, | |
"learning_rate": 6.290726817042606e-05, | |
"loss": 0.204, | |
"step": 253 | |
}, | |
{ | |
"epoch": 0.4774436090225564, | |
"grad_norm": 4.709647178649902, | |
"learning_rate": 6.31578947368421e-05, | |
"loss": 0.2603, | |
"step": 254 | |
}, | |
{ | |
"epoch": 0.4793233082706767, | |
"grad_norm": 4.329948902130127, | |
"learning_rate": 6.340852130325816e-05, | |
"loss": 0.2308, | |
"step": 255 | |
}, | |
{ | |
"epoch": 0.48120300751879697, | |
"grad_norm": 3.181044816970825, | |
"learning_rate": 6.365914786967418e-05, | |
"loss": 0.173, | |
"step": 256 | |
}, | |
{ | |
"epoch": 0.4830827067669173, | |
"grad_norm": 3.8401877880096436, | |
"learning_rate": 6.390977443609024e-05, | |
"loss": 0.2796, | |
"step": 257 | |
}, | |
{ | |
"epoch": 0.4849624060150376, | |
"grad_norm": 2.901517629623413, | |
"learning_rate": 6.416040100250626e-05, | |
"loss": 0.1085, | |
"step": 258 | |
}, | |
{ | |
"epoch": 0.4868421052631579, | |
"grad_norm": 4.388332843780518, | |
"learning_rate": 6.441102756892231e-05, | |
"loss": 0.2431, | |
"step": 259 | |
}, | |
{ | |
"epoch": 0.48872180451127817, | |
"grad_norm": 4.373783588409424, | |
"learning_rate": 6.466165413533834e-05, | |
"loss": 0.2521, | |
"step": 260 | |
}, | |
{ | |
"epoch": 0.4906015037593985, | |
"grad_norm": 4.299677848815918, | |
"learning_rate": 6.49122807017544e-05, | |
"loss": 0.3279, | |
"step": 261 | |
}, | |
{ | |
"epoch": 0.4924812030075188, | |
"grad_norm": 5.152429580688477, | |
"learning_rate": 6.516290726817042e-05, | |
"loss": 0.3679, | |
"step": 262 | |
}, | |
{ | |
"epoch": 0.4943609022556391, | |
"grad_norm": 3.118044853210449, | |
"learning_rate": 6.541353383458647e-05, | |
"loss": 0.1284, | |
"step": 263 | |
}, | |
{ | |
"epoch": 0.49624060150375937, | |
"grad_norm": 5.0754241943359375, | |
"learning_rate": 6.566416040100251e-05, | |
"loss": 0.3286, | |
"step": 264 | |
}, | |
{ | |
"epoch": 0.4981203007518797, | |
"grad_norm": 5.476406097412109, | |
"learning_rate": 6.591478696741855e-05, | |
"loss": 0.3751, | |
"step": 265 | |
}, | |
{ | |
"epoch": 0.5, | |
"grad_norm": 4.401240348815918, | |
"learning_rate": 6.616541353383459e-05, | |
"loss": 0.3392, | |
"step": 266 | |
}, | |
{ | |
"epoch": 0.5018796992481203, | |
"grad_norm": 3.763532876968384, | |
"learning_rate": 6.641604010025063e-05, | |
"loss": 0.1515, | |
"step": 267 | |
}, | |
{ | |
"epoch": 0.5037593984962406, | |
"grad_norm": 4.5540595054626465, | |
"learning_rate": 6.666666666666667e-05, | |
"loss": 0.2974, | |
"step": 268 | |
}, | |
{ | |
"epoch": 0.5056390977443609, | |
"grad_norm": 3.9668686389923096, | |
"learning_rate": 6.691729323308271e-05, | |
"loss": 0.2106, | |
"step": 269 | |
}, | |
{ | |
"epoch": 0.5075187969924813, | |
"grad_norm": 3.220064878463745, | |
"learning_rate": 6.716791979949875e-05, | |
"loss": 0.1307, | |
"step": 270 | |
}, | |
{ | |
"epoch": 0.5093984962406015, | |
"grad_norm": 5.087316513061523, | |
"learning_rate": 6.741854636591479e-05, | |
"loss": 0.3075, | |
"step": 271 | |
}, | |
{ | |
"epoch": 0.5112781954887218, | |
"grad_norm": 4.474050998687744, | |
"learning_rate": 6.766917293233083e-05, | |
"loss": 0.3512, | |
"step": 272 | |
}, | |
{ | |
"epoch": 0.5131578947368421, | |
"grad_norm": 3.038557767868042, | |
"learning_rate": 6.791979949874687e-05, | |
"loss": 0.1349, | |
"step": 273 | |
}, | |
{ | |
"epoch": 0.5150375939849624, | |
"grad_norm": 3.3275628089904785, | |
"learning_rate": 6.817042606516291e-05, | |
"loss": 0.1833, | |
"step": 274 | |
}, | |
{ | |
"epoch": 0.5169172932330827, | |
"grad_norm": 4.29977560043335, | |
"learning_rate": 6.842105263157895e-05, | |
"loss": 0.2363, | |
"step": 275 | |
}, | |
{ | |
"epoch": 0.518796992481203, | |
"grad_norm": 4.888143062591553, | |
"learning_rate": 6.867167919799499e-05, | |
"loss": 0.3437, | |
"step": 276 | |
}, | |
{ | |
"epoch": 0.5206766917293233, | |
"grad_norm": 3.9522321224212646, | |
"learning_rate": 6.892230576441104e-05, | |
"loss": 0.2152, | |
"step": 277 | |
}, | |
{ | |
"epoch": 0.5225563909774437, | |
"grad_norm": 4.254467010498047, | |
"learning_rate": 6.917293233082706e-05, | |
"loss": 0.2306, | |
"step": 278 | |
}, | |
{ | |
"epoch": 0.5244360902255639, | |
"grad_norm": 3.122506856918335, | |
"learning_rate": 6.942355889724312e-05, | |
"loss": 0.1523, | |
"step": 279 | |
}, | |
{ | |
"epoch": 0.5263157894736842, | |
"grad_norm": 3.234250068664551, | |
"learning_rate": 6.967418546365914e-05, | |
"loss": 0.2025, | |
"step": 280 | |
}, | |
{ | |
"epoch": 0.5281954887218046, | |
"grad_norm": 3.9348092079162598, | |
"learning_rate": 6.99248120300752e-05, | |
"loss": 0.2563, | |
"step": 281 | |
}, | |
{ | |
"epoch": 0.5300751879699248, | |
"grad_norm": 3.6671388149261475, | |
"learning_rate": 7.017543859649122e-05, | |
"loss": 0.1861, | |
"step": 282 | |
}, | |
{ | |
"epoch": 0.5319548872180451, | |
"grad_norm": 3.2786614894866943, | |
"learning_rate": 7.042606516290728e-05, | |
"loss": 0.1602, | |
"step": 283 | |
}, | |
{ | |
"epoch": 0.5338345864661654, | |
"grad_norm": 3.0137619972229004, | |
"learning_rate": 7.06766917293233e-05, | |
"loss": 0.2251, | |
"step": 284 | |
}, | |
{ | |
"epoch": 0.5357142857142857, | |
"grad_norm": 4.028774261474609, | |
"learning_rate": 7.092731829573935e-05, | |
"loss": 0.2004, | |
"step": 285 | |
}, | |
{ | |
"epoch": 0.5375939849624061, | |
"grad_norm": 3.455634355545044, | |
"learning_rate": 7.11779448621554e-05, | |
"loss": 0.2024, | |
"step": 286 | |
}, | |
{ | |
"epoch": 0.5394736842105263, | |
"grad_norm": 3.2219948768615723, | |
"learning_rate": 7.142857142857143e-05, | |
"loss": 0.1639, | |
"step": 287 | |
}, | |
{ | |
"epoch": 0.5413533834586466, | |
"grad_norm": 3.3190667629241943, | |
"learning_rate": 7.167919799498747e-05, | |
"loss": 0.205, | |
"step": 288 | |
}, | |
{ | |
"epoch": 0.543233082706767, | |
"grad_norm": 3.935049295425415, | |
"learning_rate": 7.192982456140351e-05, | |
"loss": 0.2216, | |
"step": 289 | |
}, | |
{ | |
"epoch": 0.5451127819548872, | |
"grad_norm": 3.7380313873291016, | |
"learning_rate": 7.218045112781955e-05, | |
"loss": 0.2815, | |
"step": 290 | |
}, | |
{ | |
"epoch": 0.5469924812030075, | |
"grad_norm": 4.446345806121826, | |
"learning_rate": 7.243107769423559e-05, | |
"loss": 0.2416, | |
"step": 291 | |
}, | |
{ | |
"epoch": 0.5488721804511278, | |
"grad_norm": 3.7456045150756836, | |
"learning_rate": 7.268170426065163e-05, | |
"loss": 0.3183, | |
"step": 292 | |
}, | |
{ | |
"epoch": 0.5507518796992481, | |
"grad_norm": 5.020063400268555, | |
"learning_rate": 7.293233082706767e-05, | |
"loss": 0.3881, | |
"step": 293 | |
}, | |
{ | |
"epoch": 0.5526315789473685, | |
"grad_norm": 2.9395172595977783, | |
"learning_rate": 7.318295739348371e-05, | |
"loss": 0.1166, | |
"step": 294 | |
}, | |
{ | |
"epoch": 0.5545112781954887, | |
"grad_norm": 3.341320276260376, | |
"learning_rate": 7.343358395989975e-05, | |
"loss": 0.1939, | |
"step": 295 | |
}, | |
{ | |
"epoch": 0.556390977443609, | |
"grad_norm": 2.8824589252471924, | |
"learning_rate": 7.368421052631579e-05, | |
"loss": 0.1113, | |
"step": 296 | |
}, | |
{ | |
"epoch": 0.5582706766917294, | |
"grad_norm": 3.57820463180542, | |
"learning_rate": 7.393483709273183e-05, | |
"loss": 0.2423, | |
"step": 297 | |
}, | |
{ | |
"epoch": 0.5601503759398496, | |
"grad_norm": 3.81009578704834, | |
"learning_rate": 7.418546365914787e-05, | |
"loss": 0.2569, | |
"step": 298 | |
}, | |
{ | |
"epoch": 0.5620300751879699, | |
"grad_norm": 4.205862998962402, | |
"learning_rate": 7.443609022556392e-05, | |
"loss": 0.3817, | |
"step": 299 | |
}, | |
{ | |
"epoch": 0.5639097744360902, | |
"grad_norm": 3.4671545028686523, | |
"learning_rate": 7.468671679197995e-05, | |
"loss": 0.1794, | |
"step": 300 | |
}, | |
{ | |
"epoch": 0.5639097744360902, | |
"eval_global_dataset_loss": 1.0346671342849731, | |
"eval_global_dataset_runtime": 75.5018, | |
"eval_global_dataset_samples_per_second": 12.874, | |
"eval_global_dataset_steps_per_second": 0.053, | |
"eval_sequential_score": 0.9143511495623889, | |
"eval_sts-test-1024_pearson_cosine": 0.9000323597244693, | |
"eval_sts-test-1024_spearman_cosine": 0.9162368782148997, | |
"eval_sts-test-1280_pearson_cosine": 0.9000281200118452, | |
"eval_sts-test-1280_spearman_cosine": 0.9162393842855376, | |
"eval_sts-test-512_pearson_cosine": 0.8981734518979707, | |
"eval_sts-test-512_spearman_cosine": 0.9143511495623889, | |
"eval_sts-test-760_pearson_cosine": 0.8975195986295481, | |
"eval_sts-test-760_spearman_cosine": 0.9148535719740302, | |
"eval_sts-test_pearson_cosine": 0.9000245395073697, | |
"eval_sts-test_spearman_cosine": 0.9162469024974514, | |
"step": 300 | |
}, | |
{ | |
"epoch": 0.5657894736842105, | |
"grad_norm": 3.4575231075286865, | |
"learning_rate": 7.4937343358396e-05, | |
"loss": 0.207, | |
"step": 301 | |
}, | |
{ | |
"epoch": 0.5676691729323309, | |
"grad_norm": 4.194314956665039, | |
"learning_rate": 7.518796992481203e-05, | |
"loss": 0.28, | |
"step": 302 | |
}, | |
{ | |
"epoch": 0.5695488721804511, | |
"grad_norm": 3.3316540718078613, | |
"learning_rate": 7.543859649122808e-05, | |
"loss": 0.2256, | |
"step": 303 | |
}, | |
{ | |
"epoch": 0.5714285714285714, | |
"grad_norm": 2.6786365509033203, | |
"learning_rate": 7.56892230576441e-05, | |
"loss": 0.1659, | |
"step": 304 | |
}, | |
{ | |
"epoch": 0.5733082706766918, | |
"grad_norm": 3.0687808990478516, | |
"learning_rate": 7.593984962406016e-05, | |
"loss": 0.1587, | |
"step": 305 | |
}, | |
{ | |
"epoch": 0.575187969924812, | |
"grad_norm": 5.067666530609131, | |
"learning_rate": 7.619047619047618e-05, | |
"loss": 0.4479, | |
"step": 306 | |
}, | |
{ | |
"epoch": 0.5770676691729323, | |
"grad_norm": 2.9828665256500244, | |
"learning_rate": 7.644110275689224e-05, | |
"loss": 0.1649, | |
"step": 307 | |
}, | |
{ | |
"epoch": 0.5789473684210527, | |
"grad_norm": 4.292194366455078, | |
"learning_rate": 7.669172932330826e-05, | |
"loss": 0.402, | |
"step": 308 | |
}, | |
{ | |
"epoch": 0.5808270676691729, | |
"grad_norm": 4.088753700256348, | |
"learning_rate": 7.694235588972432e-05, | |
"loss": 0.3003, | |
"step": 309 | |
}, | |
{ | |
"epoch": 0.5827067669172933, | |
"grad_norm": 3.17425799369812, | |
"learning_rate": 7.719298245614036e-05, | |
"loss": 0.1697, | |
"step": 310 | |
}, | |
{ | |
"epoch": 0.5845864661654135, | |
"grad_norm": 3.302765130996704, | |
"learning_rate": 7.74436090225564e-05, | |
"loss": 0.1789, | |
"step": 311 | |
}, | |
{ | |
"epoch": 0.5864661654135338, | |
"grad_norm": 3.9758617877960205, | |
"learning_rate": 7.769423558897244e-05, | |
"loss": 0.3012, | |
"step": 312 | |
}, | |
{ | |
"epoch": 0.5883458646616542, | |
"grad_norm": 3.0176424980163574, | |
"learning_rate": 7.794486215538847e-05, | |
"loss": 0.1306, | |
"step": 313 | |
}, | |
{ | |
"epoch": 0.5902255639097744, | |
"grad_norm": 3.656402349472046, | |
"learning_rate": 7.819548872180451e-05, | |
"loss": 0.2429, | |
"step": 314 | |
}, | |
{ | |
"epoch": 0.5921052631578947, | |
"grad_norm": 3.609954833984375, | |
"learning_rate": 7.844611528822055e-05, | |
"loss": 0.2456, | |
"step": 315 | |
}, | |
{ | |
"epoch": 0.5939849624060151, | |
"grad_norm": 4.076263427734375, | |
"learning_rate": 7.869674185463659e-05, | |
"loss": 0.2612, | |
"step": 316 | |
}, | |
{ | |
"epoch": 0.5958646616541353, | |
"grad_norm": 2.1173534393310547, | |
"learning_rate": 7.894736842105263e-05, | |
"loss": 0.071, | |
"step": 317 | |
}, | |
{ | |
"epoch": 0.5977443609022557, | |
"grad_norm": 2.5280675888061523, | |
"learning_rate": 7.919799498746867e-05, | |
"loss": 0.1342, | |
"step": 318 | |
}, | |
{ | |
"epoch": 0.599624060150376, | |
"grad_norm": 2.712512254714966, | |
"learning_rate": 7.944862155388471e-05, | |
"loss": 0.1107, | |
"step": 319 | |
}, | |
{ | |
"epoch": 0.6015037593984962, | |
"grad_norm": 3.069580316543579, | |
"learning_rate": 7.969924812030075e-05, | |
"loss": 0.1375, | |
"step": 320 | |
}, | |
{ | |
"epoch": 0.6033834586466166, | |
"grad_norm": 3.0585827827453613, | |
"learning_rate": 7.994987468671679e-05, | |
"loss": 0.1394, | |
"step": 321 | |
}, | |
{ | |
"epoch": 0.6052631578947368, | |
"grad_norm": 3.4839272499084473, | |
"learning_rate": 8.020050125313283e-05, | |
"loss": 0.2689, | |
"step": 322 | |
}, | |
{ | |
"epoch": 0.6071428571428571, | |
"grad_norm": 3.538287401199341, | |
"learning_rate": 8.045112781954888e-05, | |
"loss": 0.2019, | |
"step": 323 | |
}, | |
{ | |
"epoch": 0.6090225563909775, | |
"grad_norm": 4.231574535369873, | |
"learning_rate": 8.070175438596491e-05, | |
"loss": 0.247, | |
"step": 324 | |
}, | |
{ | |
"epoch": 0.6109022556390977, | |
"grad_norm": 2.747537612915039, | |
"learning_rate": 8.095238095238096e-05, | |
"loss": 0.0957, | |
"step": 325 | |
}, | |
{ | |
"epoch": 0.6127819548872181, | |
"grad_norm": 3.517162561416626, | |
"learning_rate": 8.120300751879699e-05, | |
"loss": 0.2257, | |
"step": 326 | |
}, | |
{ | |
"epoch": 0.6146616541353384, | |
"grad_norm": 3.922318458557129, | |
"learning_rate": 8.145363408521304e-05, | |
"loss": 0.2134, | |
"step": 327 | |
}, | |
{ | |
"epoch": 0.6165413533834586, | |
"grad_norm": 3.7342982292175293, | |
"learning_rate": 8.170426065162907e-05, | |
"loss": 0.2157, | |
"step": 328 | |
}, | |
{ | |
"epoch": 0.618421052631579, | |
"grad_norm": 3.6022825241088867, | |
"learning_rate": 8.195488721804512e-05, | |
"loss": 0.2729, | |
"step": 329 | |
}, | |
{ | |
"epoch": 0.6203007518796992, | |
"grad_norm": 3.2551891803741455, | |
"learning_rate": 8.220551378446115e-05, | |
"loss": 0.1582, | |
"step": 330 | |
}, | |
{ | |
"epoch": 0.6221804511278195, | |
"grad_norm": 2.9833812713623047, | |
"learning_rate": 8.24561403508772e-05, | |
"loss": 0.1599, | |
"step": 331 | |
}, | |
{ | |
"epoch": 0.6240601503759399, | |
"grad_norm": 4.321685791015625, | |
"learning_rate": 8.270676691729324e-05, | |
"loss": 0.216, | |
"step": 332 | |
}, | |
{ | |
"epoch": 0.6259398496240601, | |
"grad_norm": 3.155758857727051, | |
"learning_rate": 8.295739348370928e-05, | |
"loss": 0.1367, | |
"step": 333 | |
}, | |
{ | |
"epoch": 0.6278195488721805, | |
"grad_norm": 4.673583984375, | |
"learning_rate": 8.320802005012532e-05, | |
"loss": 0.2675, | |
"step": 334 | |
}, | |
{ | |
"epoch": 0.6296992481203008, | |
"grad_norm": 4.123340129852295, | |
"learning_rate": 8.345864661654136e-05, | |
"loss": 0.3074, | |
"step": 335 | |
}, | |
{ | |
"epoch": 0.631578947368421, | |
"grad_norm": 3.709581136703491, | |
"learning_rate": 8.37092731829574e-05, | |
"loss": 0.1689, | |
"step": 336 | |
}, | |
{ | |
"epoch": 0.6334586466165414, | |
"grad_norm": 4.334682941436768, | |
"learning_rate": 8.395989974937344e-05, | |
"loss": 0.2549, | |
"step": 337 | |
}, | |
{ | |
"epoch": 0.6353383458646616, | |
"grad_norm": 2.5441734790802, | |
"learning_rate": 8.421052631578948e-05, | |
"loss": 0.1448, | |
"step": 338 | |
}, | |
{ | |
"epoch": 0.6372180451127819, | |
"grad_norm": 3.9318222999572754, | |
"learning_rate": 8.446115288220552e-05, | |
"loss": 0.2533, | |
"step": 339 | |
}, | |
{ | |
"epoch": 0.6390977443609023, | |
"grad_norm": 4.122605323791504, | |
"learning_rate": 8.471177944862155e-05, | |
"loss": 0.3232, | |
"step": 340 | |
}, | |
{ | |
"epoch": 0.6409774436090225, | |
"grad_norm": 3.933474540710449, | |
"learning_rate": 8.49624060150376e-05, | |
"loss": 0.1825, | |
"step": 341 | |
}, | |
{ | |
"epoch": 0.6428571428571429, | |
"grad_norm": 4.069019317626953, | |
"learning_rate": 8.521303258145363e-05, | |
"loss": 0.2873, | |
"step": 342 | |
}, | |
{ | |
"epoch": 0.6447368421052632, | |
"grad_norm": 4.113502025604248, | |
"learning_rate": 8.546365914786967e-05, | |
"loss": 0.2546, | |
"step": 343 | |
}, | |
{ | |
"epoch": 0.6466165413533834, | |
"grad_norm": 3.7378008365631104, | |
"learning_rate": 8.571428571428571e-05, | |
"loss": 0.2048, | |
"step": 344 | |
}, | |
{ | |
"epoch": 0.6484962406015038, | |
"grad_norm": 4.110744476318359, | |
"learning_rate": 8.596491228070177e-05, | |
"loss": 0.2674, | |
"step": 345 | |
}, | |
{ | |
"epoch": 0.650375939849624, | |
"grad_norm": 3.0982956886291504, | |
"learning_rate": 8.621553884711779e-05, | |
"loss": 0.1629, | |
"step": 346 | |
}, | |
{ | |
"epoch": 0.6522556390977443, | |
"grad_norm": 2.9119224548339844, | |
"learning_rate": 8.646616541353384e-05, | |
"loss": 0.1747, | |
"step": 347 | |
}, | |
{ | |
"epoch": 0.6541353383458647, | |
"grad_norm": 3.0742011070251465, | |
"learning_rate": 8.671679197994987e-05, | |
"loss": 0.1784, | |
"step": 348 | |
}, | |
{ | |
"epoch": 0.6560150375939849, | |
"grad_norm": 3.249251127243042, | |
"learning_rate": 8.696741854636592e-05, | |
"loss": 0.2269, | |
"step": 349 | |
}, | |
{ | |
"epoch": 0.6578947368421053, | |
"grad_norm": 4.456097602844238, | |
"learning_rate": 8.721804511278195e-05, | |
"loss": 0.4473, | |
"step": 350 | |
}, | |
{ | |
"epoch": 0.6578947368421053, | |
"eval_global_dataset_loss": 1.0552350282669067, | |
"eval_global_dataset_runtime": 74.8399, | |
"eval_global_dataset_samples_per_second": 12.988, | |
"eval_global_dataset_steps_per_second": 0.053, | |
"eval_sequential_score": 0.9164483279249741, | |
"eval_sts-test-1024_pearson_cosine": 0.9032664609316077, | |
"eval_sts-test-1024_spearman_cosine": 0.9181316466222116, | |
"eval_sts-test-1280_pearson_cosine": 0.903261190428662, | |
"eval_sts-test-1280_spearman_cosine": 0.9181101212654824, | |
"eval_sts-test-512_pearson_cosine": 0.9002490696708711, | |
"eval_sts-test-512_spearman_cosine": 0.9164483279249741, | |
"eval_sts-test-760_pearson_cosine": 0.9008742248612986, | |
"eval_sts-test-760_spearman_cosine": 0.9173301067794302, | |
"eval_sts-test_pearson_cosine": 0.9032553577987494, | |
"eval_sts-test_spearman_cosine": 0.9181009920081584, | |
"step": 350 | |
}, | |
{ | |
"epoch": 0.6597744360902256, | |
"grad_norm": 2.178828716278076, | |
"learning_rate": 8.7468671679198e-05, | |
"loss": 0.1349, | |
"step": 351 | |
}, | |
{ | |
"epoch": 0.6616541353383458, | |
"grad_norm": 3.7205710411071777, | |
"learning_rate": 8.771929824561403e-05, | |
"loss": 0.2307, | |
"step": 352 | |
}, | |
{ | |
"epoch": 0.6635338345864662, | |
"grad_norm": 4.17662239074707, | |
"learning_rate": 8.796992481203008e-05, | |
"loss": 0.3436, | |
"step": 353 | |
}, | |
{ | |
"epoch": 0.6654135338345865, | |
"grad_norm": 4.62085485458374, | |
"learning_rate": 8.822055137844612e-05, | |
"loss": 0.4285, | |
"step": 354 | |
}, | |
{ | |
"epoch": 0.6672932330827067, | |
"grad_norm": 3.5682456493377686, | |
"learning_rate": 8.847117794486216e-05, | |
"loss": 0.2067, | |
"step": 355 | |
}, | |
{ | |
"epoch": 0.6691729323308271, | |
"grad_norm": 4.600581645965576, | |
"learning_rate": 8.87218045112782e-05, | |
"loss": 0.3689, | |
"step": 356 | |
}, | |
{ | |
"epoch": 0.6710526315789473, | |
"grad_norm": 4.159579753875732, | |
"learning_rate": 8.897243107769424e-05, | |
"loss": 0.267, | |
"step": 357 | |
}, | |
{ | |
"epoch": 0.6729323308270677, | |
"grad_norm": 2.1827890872955322, | |
"learning_rate": 8.922305764411028e-05, | |
"loss": 0.0947, | |
"step": 358 | |
}, | |
{ | |
"epoch": 0.674812030075188, | |
"grad_norm": 2.3302104473114014, | |
"learning_rate": 8.947368421052632e-05, | |
"loss": 0.1395, | |
"step": 359 | |
}, | |
{ | |
"epoch": 0.6766917293233082, | |
"grad_norm": 2.3280320167541504, | |
"learning_rate": 8.972431077694236e-05, | |
"loss": 0.0728, | |
"step": 360 | |
}, | |
{ | |
"epoch": 0.6785714285714286, | |
"grad_norm": 4.282434940338135, | |
"learning_rate": 8.99749373433584e-05, | |
"loss": 0.3466, | |
"step": 361 | |
}, | |
{ | |
"epoch": 0.6804511278195489, | |
"grad_norm": 2.6343634128570557, | |
"learning_rate": 9.022556390977444e-05, | |
"loss": 0.118, | |
"step": 362 | |
}, | |
{ | |
"epoch": 0.6823308270676691, | |
"grad_norm": 3.3122928142547607, | |
"learning_rate": 9.047619047619048e-05, | |
"loss": 0.2302, | |
"step": 363 | |
}, | |
{ | |
"epoch": 0.6842105263157895, | |
"grad_norm": 3.051161289215088, | |
"learning_rate": 9.072681704260652e-05, | |
"loss": 0.1604, | |
"step": 364 | |
}, | |
{ | |
"epoch": 0.6860902255639098, | |
"grad_norm": 3.329271078109741, | |
"learning_rate": 9.097744360902256e-05, | |
"loss": 0.2416, | |
"step": 365 | |
}, | |
{ | |
"epoch": 0.6879699248120301, | |
"grad_norm": 3.8991355895996094, | |
"learning_rate": 9.12280701754386e-05, | |
"loss": 0.3026, | |
"step": 366 | |
}, | |
{ | |
"epoch": 0.6898496240601504, | |
"grad_norm": 3.379802703857422, | |
"learning_rate": 9.147869674185465e-05, | |
"loss": 0.205, | |
"step": 367 | |
}, | |
{ | |
"epoch": 0.6917293233082706, | |
"grad_norm": 3.8771727085113525, | |
"learning_rate": 9.172932330827067e-05, | |
"loss": 0.2291, | |
"step": 368 | |
}, | |
{ | |
"epoch": 0.693609022556391, | |
"grad_norm": 4.642533779144287, | |
"learning_rate": 9.197994987468673e-05, | |
"loss": 0.3908, | |
"step": 369 | |
}, | |
{ | |
"epoch": 0.6954887218045113, | |
"grad_norm": 3.090883493423462, | |
"learning_rate": 9.223057644110275e-05, | |
"loss": 0.2343, | |
"step": 370 | |
}, | |
{ | |
"epoch": 0.6973684210526315, | |
"grad_norm": 3.6720900535583496, | |
"learning_rate": 9.24812030075188e-05, | |
"loss": 0.2384, | |
"step": 371 | |
}, | |
{ | |
"epoch": 0.6992481203007519, | |
"grad_norm": 3.4769504070281982, | |
"learning_rate": 9.273182957393483e-05, | |
"loss": 0.304, | |
"step": 372 | |
}, | |
{ | |
"epoch": 0.7011278195488722, | |
"grad_norm": 2.949640989303589, | |
"learning_rate": 9.298245614035089e-05, | |
"loss": 0.1508, | |
"step": 373 | |
}, | |
{ | |
"epoch": 0.7030075187969925, | |
"grad_norm": 2.618457317352295, | |
"learning_rate": 9.323308270676691e-05, | |
"loss": 0.1184, | |
"step": 374 | |
}, | |
{ | |
"epoch": 0.7048872180451128, | |
"grad_norm": 3.8140110969543457, | |
"learning_rate": 9.348370927318296e-05, | |
"loss": 0.2863, | |
"step": 375 | |
}, | |
{ | |
"epoch": 0.706766917293233, | |
"grad_norm": 3.744682788848877, | |
"learning_rate": 9.373433583959899e-05, | |
"loss": 0.243, | |
"step": 376 | |
}, | |
{ | |
"epoch": 0.7086466165413534, | |
"grad_norm": 3.3721110820770264, | |
"learning_rate": 9.398496240601504e-05, | |
"loss": 0.2347, | |
"step": 377 | |
}, | |
{ | |
"epoch": 0.7105263157894737, | |
"grad_norm": 3.7434332370758057, | |
"learning_rate": 9.423558897243108e-05, | |
"loss": 0.2225, | |
"step": 378 | |
}, | |
{ | |
"epoch": 0.7124060150375939, | |
"grad_norm": 2.8612782955169678, | |
"learning_rate": 9.448621553884712e-05, | |
"loss": 0.1221, | |
"step": 379 | |
}, | |
{ | |
"epoch": 0.7142857142857143, | |
"grad_norm": 1.6849597692489624, | |
"learning_rate": 9.473684210526316e-05, | |
"loss": 0.0915, | |
"step": 380 | |
}, | |
{ | |
"epoch": 0.7161654135338346, | |
"grad_norm": 4.169658184051514, | |
"learning_rate": 9.49874686716792e-05, | |
"loss": 0.2929, | |
"step": 381 | |
}, | |
{ | |
"epoch": 0.7180451127819549, | |
"grad_norm": 2.304687976837158, | |
"learning_rate": 9.523809523809524e-05, | |
"loss": 0.1045, | |
"step": 382 | |
}, | |
{ | |
"epoch": 0.7199248120300752, | |
"grad_norm": 3.5404067039489746, | |
"learning_rate": 9.548872180451128e-05, | |
"loss": 0.2764, | |
"step": 383 | |
}, | |
{ | |
"epoch": 0.7218045112781954, | |
"grad_norm": 3.1424617767333984, | |
"learning_rate": 9.573934837092732e-05, | |
"loss": 0.1787, | |
"step": 384 | |
}, | |
{ | |
"epoch": 0.7236842105263158, | |
"grad_norm": 4.354782581329346, | |
"learning_rate": 9.598997493734336e-05, | |
"loss": 0.3038, | |
"step": 385 | |
}, | |
{ | |
"epoch": 0.7255639097744361, | |
"grad_norm": 2.4454684257507324, | |
"learning_rate": 9.62406015037594e-05, | |
"loss": 0.1276, | |
"step": 386 | |
}, | |
{ | |
"epoch": 0.7274436090225563, | |
"grad_norm": 4.368272304534912, | |
"learning_rate": 9.649122807017544e-05, | |
"loss": 0.318, | |
"step": 387 | |
}, | |
{ | |
"epoch": 0.7293233082706767, | |
"grad_norm": 2.330340623855591, | |
"learning_rate": 9.674185463659148e-05, | |
"loss": 0.1114, | |
"step": 388 | |
}, | |
{ | |
"epoch": 0.731203007518797, | |
"grad_norm": 2.093837261199951, | |
"learning_rate": 9.699248120300752e-05, | |
"loss": 0.0779, | |
"step": 389 | |
}, | |
{ | |
"epoch": 0.7330827067669173, | |
"grad_norm": 2.6749417781829834, | |
"learning_rate": 9.724310776942356e-05, | |
"loss": 0.1246, | |
"step": 390 | |
}, | |
{ | |
"epoch": 0.7349624060150376, | |
"grad_norm": 3.0585834980010986, | |
"learning_rate": 9.749373433583961e-05, | |
"loss": 0.1865, | |
"step": 391 | |
}, | |
{ | |
"epoch": 0.7368421052631579, | |
"grad_norm": 3.205200433731079, | |
"learning_rate": 9.774436090225564e-05, | |
"loss": 0.1603, | |
"step": 392 | |
}, | |
{ | |
"epoch": 0.7387218045112782, | |
"grad_norm": 4.226044654846191, | |
"learning_rate": 9.799498746867169e-05, | |
"loss": 0.3891, | |
"step": 393 | |
}, | |
{ | |
"epoch": 0.7406015037593985, | |
"grad_norm": 2.3619284629821777, | |
"learning_rate": 9.824561403508771e-05, | |
"loss": 0.0831, | |
"step": 394 | |
}, | |
{ | |
"epoch": 0.7424812030075187, | |
"grad_norm": 2.842703104019165, | |
"learning_rate": 9.849624060150377e-05, | |
"loss": 0.2145, | |
"step": 395 | |
}, | |
{ | |
"epoch": 0.7443609022556391, | |
"grad_norm": 3.2277991771698, | |
"learning_rate": 9.87468671679198e-05, | |
"loss": 0.1798, | |
"step": 396 | |
}, | |
{ | |
"epoch": 0.7462406015037594, | |
"grad_norm": 3.115555763244629, | |
"learning_rate": 9.899749373433585e-05, | |
"loss": 0.2372, | |
"step": 397 | |
}, | |
{ | |
"epoch": 0.7481203007518797, | |
"grad_norm": 4.073906421661377, | |
"learning_rate": 9.924812030075187e-05, | |
"loss": 0.2344, | |
"step": 398 | |
}, | |
{ | |
"epoch": 0.75, | |
"grad_norm": 2.634641408920288, | |
"learning_rate": 9.949874686716793e-05, | |
"loss": 0.1169, | |
"step": 399 | |
}, | |
{ | |
"epoch": 0.7518796992481203, | |
"grad_norm": 2.948498010635376, | |
"learning_rate": 9.974937343358397e-05, | |
"loss": 0.1729, | |
"step": 400 | |
}, | |
{ | |
"epoch": 0.7518796992481203, | |
"eval_global_dataset_loss": 1.039243221282959, | |
"eval_global_dataset_runtime": 73.9438, | |
"eval_global_dataset_samples_per_second": 13.145, | |
"eval_global_dataset_steps_per_second": 0.054, | |
"eval_sequential_score": 0.915137787235127, | |
"eval_sts-test-1024_pearson_cosine": 0.8991702968527955, | |
"eval_sts-test-1024_spearman_cosine": 0.9174485633683334, | |
"eval_sts-test-1280_pearson_cosine": 0.899162232134754, | |
"eval_sts-test-1280_spearman_cosine": 0.9174606014576475, | |
"eval_sts-test-512_pearson_cosine": 0.8962762177120662, | |
"eval_sts-test-512_spearman_cosine": 0.915137787235127, | |
"eval_sts-test-760_pearson_cosine": 0.8970663329368722, | |
"eval_sts-test-760_spearman_cosine": 0.9162537046891828, | |
"eval_sts-test_pearson_cosine": 0.8991549992060922, | |
"eval_sts-test_spearman_cosine": 0.9174283805494456, | |
"step": 400 | |
}, | |
{ | |
"epoch": 0.7537593984962406, | |
"grad_norm": 3.9373810291290283, | |
"learning_rate": 0.0001, | |
"loss": 0.2767, | |
"step": 401 | |
}, | |
{ | |
"epoch": 0.7556390977443609, | |
"grad_norm": 2.065608263015747, | |
"learning_rate": 9.999987084463017e-05, | |
"loss": 0.0738, | |
"step": 402 | |
}, | |
{ | |
"epoch": 0.7575187969924813, | |
"grad_norm": 3.8259966373443604, | |
"learning_rate": 9.999948337941033e-05, | |
"loss": 0.2413, | |
"step": 403 | |
}, | |
{ | |
"epoch": 0.7593984962406015, | |
"grad_norm": 3.622448444366455, | |
"learning_rate": 9.999883760700945e-05, | |
"loss": 0.2307, | |
"step": 404 | |
}, | |
{ | |
"epoch": 0.7612781954887218, | |
"grad_norm": 3.470798969268799, | |
"learning_rate": 9.999793353187582e-05, | |
"loss": 0.2238, | |
"step": 405 | |
}, | |
{ | |
"epoch": 0.7631578947368421, | |
"grad_norm": 3.9379584789276123, | |
"learning_rate": 9.999677116023694e-05, | |
"loss": 0.264, | |
"step": 406 | |
}, | |
{ | |
"epoch": 0.7650375939849624, | |
"grad_norm": 3.8329389095306396, | |
"learning_rate": 9.999535050009956e-05, | |
"loss": 0.2212, | |
"step": 407 | |
}, | |
{ | |
"epoch": 0.7669172932330827, | |
"grad_norm": 3.588489055633545, | |
"learning_rate": 9.99936715612496e-05, | |
"loss": 0.1936, | |
"step": 408 | |
}, | |
{ | |
"epoch": 0.768796992481203, | |
"grad_norm": 2.136953830718994, | |
"learning_rate": 9.999173435525209e-05, | |
"loss": 0.0843, | |
"step": 409 | |
}, | |
{ | |
"epoch": 0.7706766917293233, | |
"grad_norm": 2.8177402019500732, | |
"learning_rate": 9.998953889545103e-05, | |
"loss": 0.1398, | |
"step": 410 | |
}, | |
{ | |
"epoch": 0.7725563909774437, | |
"grad_norm": 4.3074951171875, | |
"learning_rate": 9.99870851969694e-05, | |
"loss": 0.2536, | |
"step": 411 | |
}, | |
{ | |
"epoch": 0.7744360902255639, | |
"grad_norm": 4.085883140563965, | |
"learning_rate": 9.998437327670895e-05, | |
"loss": 0.2524, | |
"step": 412 | |
}, | |
{ | |
"epoch": 0.7763157894736842, | |
"grad_norm": 1.841971755027771, | |
"learning_rate": 9.998140315335021e-05, | |
"loss": 0.0817, | |
"step": 413 | |
}, | |
{ | |
"epoch": 0.7781954887218046, | |
"grad_norm": 3.460721969604492, | |
"learning_rate": 9.997817484735217e-05, | |
"loss": 0.187, | |
"step": 414 | |
}, | |
{ | |
"epoch": 0.7800751879699248, | |
"grad_norm": 3.2461087703704834, | |
"learning_rate": 9.997468838095238e-05, | |
"loss": 0.2202, | |
"step": 415 | |
}, | |
{ | |
"epoch": 0.7819548872180451, | |
"grad_norm": 5.019058704376221, | |
"learning_rate": 9.997094377816663e-05, | |
"loss": 0.4688, | |
"step": 416 | |
}, | |
{ | |
"epoch": 0.7838345864661654, | |
"grad_norm": 3.9036221504211426, | |
"learning_rate": 9.996694106478878e-05, | |
"loss": 0.2748, | |
"step": 417 | |
}, | |
{ | |
"epoch": 0.7857142857142857, | |
"grad_norm": 3.2269797325134277, | |
"learning_rate": 9.996268026839068e-05, | |
"loss": 0.1784, | |
"step": 418 | |
}, | |
{ | |
"epoch": 0.7875939849624061, | |
"grad_norm": 3.3627684116363525, | |
"learning_rate": 9.99581614183219e-05, | |
"loss": 0.181, | |
"step": 419 | |
}, | |
{ | |
"epoch": 0.7894736842105263, | |
"grad_norm": 4.656357288360596, | |
"learning_rate": 9.995338454570959e-05, | |
"loss": 0.3211, | |
"step": 420 | |
}, | |
{ | |
"epoch": 0.7913533834586466, | |
"grad_norm": 3.5906982421875, | |
"learning_rate": 9.994834968345821e-05, | |
"loss": 0.1609, | |
"step": 421 | |
}, | |
{ | |
"epoch": 0.793233082706767, | |
"grad_norm": 3.2143733501434326, | |
"learning_rate": 9.994305686624937e-05, | |
"loss": 0.1783, | |
"step": 422 | |
}, | |
{ | |
"epoch": 0.7951127819548872, | |
"grad_norm": 4.057474136352539, | |
"learning_rate": 9.993750613054145e-05, | |
"loss": 0.2027, | |
"step": 423 | |
}, | |
{ | |
"epoch": 0.7969924812030075, | |
"grad_norm": 3.9779932498931885, | |
"learning_rate": 9.993169751456954e-05, | |
"loss": 0.3005, | |
"step": 424 | |
}, | |
{ | |
"epoch": 0.7988721804511278, | |
"grad_norm": 1.715262770652771, | |
"learning_rate": 9.992563105834505e-05, | |
"loss": 0.0396, | |
"step": 425 | |
}, | |
{ | |
"epoch": 0.8007518796992481, | |
"grad_norm": 2.0986955165863037, | |
"learning_rate": 9.991930680365547e-05, | |
"loss": 0.0633, | |
"step": 426 | |
}, | |
{ | |
"epoch": 0.8026315789473685, | |
"grad_norm": 3.5883572101593018, | |
"learning_rate": 9.991272479406406e-05, | |
"loss": 0.2468, | |
"step": 427 | |
}, | |
{ | |
"epoch": 0.8045112781954887, | |
"grad_norm": 3.2390847206115723, | |
"learning_rate": 9.990588507490959e-05, | |
"loss": 0.1822, | |
"step": 428 | |
}, | |
{ | |
"epoch": 0.806390977443609, | |
"grad_norm": 5.052639007568359, | |
"learning_rate": 9.989878769330603e-05, | |
"loss": 0.4503, | |
"step": 429 | |
}, | |
{ | |
"epoch": 0.8082706766917294, | |
"grad_norm": 2.2490901947021484, | |
"learning_rate": 9.989143269814216e-05, | |
"loss": 0.0755, | |
"step": 430 | |
}, | |
{ | |
"epoch": 0.8101503759398496, | |
"grad_norm": 3.330777883529663, | |
"learning_rate": 9.988382014008129e-05, | |
"loss": 0.1746, | |
"step": 431 | |
}, | |
{ | |
"epoch": 0.8120300751879699, | |
"grad_norm": 3.147256374359131, | |
"learning_rate": 9.98759500715609e-05, | |
"loss": 0.1353, | |
"step": 432 | |
}, | |
{ | |
"epoch": 0.8139097744360902, | |
"grad_norm": 1.620094656944275, | |
"learning_rate": 9.986782254679227e-05, | |
"loss": 0.0427, | |
"step": 433 | |
}, | |
{ | |
"epoch": 0.8157894736842105, | |
"grad_norm": 4.131981372833252, | |
"learning_rate": 9.985943762176013e-05, | |
"loss": 0.2745, | |
"step": 434 | |
}, | |
{ | |
"epoch": 0.8176691729323309, | |
"grad_norm": 3.36614727973938, | |
"learning_rate": 9.985079535422226e-05, | |
"loss": 0.1701, | |
"step": 435 | |
}, | |
{ | |
"epoch": 0.8195488721804511, | |
"grad_norm": 2.6499199867248535, | |
"learning_rate": 9.984189580370904e-05, | |
"loss": 0.1108, | |
"step": 436 | |
}, | |
{ | |
"epoch": 0.8214285714285714, | |
"grad_norm": 3.2427146434783936, | |
"learning_rate": 9.983273903152314e-05, | |
"loss": 0.1247, | |
"step": 437 | |
}, | |
{ | |
"epoch": 0.8233082706766918, | |
"grad_norm": 3.399001359939575, | |
"learning_rate": 9.982332510073902e-05, | |
"loss": 0.2483, | |
"step": 438 | |
}, | |
{ | |
"epoch": 0.825187969924812, | |
"grad_norm": 4.0499653816223145, | |
"learning_rate": 9.981365407620256e-05, | |
"loss": 0.2491, | |
"step": 439 | |
}, | |
{ | |
"epoch": 0.8270676691729323, | |
"grad_norm": 4.519603252410889, | |
"learning_rate": 9.980372602453051e-05, | |
"loss": 0.2228, | |
"step": 440 | |
}, | |
{ | |
"epoch": 0.8289473684210527, | |
"grad_norm": 4.894527912139893, | |
"learning_rate": 9.979354101411016e-05, | |
"loss": 0.339, | |
"step": 441 | |
}, | |
{ | |
"epoch": 0.8308270676691729, | |
"grad_norm": 3.9160590171813965, | |
"learning_rate": 9.978309911509875e-05, | |
"loss": 0.2636, | |
"step": 442 | |
}, | |
{ | |
"epoch": 0.8327067669172933, | |
"grad_norm": 2.9062135219573975, | |
"learning_rate": 9.977240039942311e-05, | |
"loss": 0.1255, | |
"step": 443 | |
}, | |
{ | |
"epoch": 0.8345864661654135, | |
"grad_norm": 3.9057412147521973, | |
"learning_rate": 9.976144494077903e-05, | |
"loss": 0.2707, | |
"step": 444 | |
}, | |
{ | |
"epoch": 0.8364661654135338, | |
"grad_norm": 1.3283665180206299, | |
"learning_rate": 9.975023281463085e-05, | |
"loss": 0.0358, | |
"step": 445 | |
}, | |
{ | |
"epoch": 0.8383458646616542, | |
"grad_norm": 2.5466246604919434, | |
"learning_rate": 9.97387640982109e-05, | |
"loss": 0.1194, | |
"step": 446 | |
}, | |
{ | |
"epoch": 0.8402255639097744, | |
"grad_norm": 4.229319095611572, | |
"learning_rate": 9.9727038870519e-05, | |
"loss": 0.2849, | |
"step": 447 | |
}, | |
{ | |
"epoch": 0.8421052631578947, | |
"grad_norm": 2.857790946960449, | |
"learning_rate": 9.971505721232187e-05, | |
"loss": 0.1339, | |
"step": 448 | |
}, | |
{ | |
"epoch": 0.8439849624060151, | |
"grad_norm": 4.15946626663208, | |
"learning_rate": 9.970281920615261e-05, | |
"loss": 0.2603, | |
"step": 449 | |
}, | |
{ | |
"epoch": 0.8458646616541353, | |
"grad_norm": 2.4936351776123047, | |
"learning_rate": 9.96903249363101e-05, | |
"loss": 0.108, | |
"step": 450 | |
}, | |
{ | |
"epoch": 0.8458646616541353, | |
"eval_global_dataset_loss": 1.0451455116271973, | |
"eval_global_dataset_runtime": 73.2483, | |
"eval_global_dataset_samples_per_second": 13.27, | |
"eval_global_dataset_steps_per_second": 0.055, | |
"eval_sequential_score": 0.9173902524747404, | |
"eval_sts-test-1024_pearson_cosine": 0.9024151738171587, | |
"eval_sts-test-1024_spearman_cosine": 0.9166481870583482, | |
"eval_sts-test-1280_pearson_cosine": 0.9024051394107033, | |
"eval_sts-test-1280_spearman_cosine": 0.9166922222995573, | |
"eval_sts-test-512_pearson_cosine": 0.9034025337392552, | |
"eval_sts-test-512_spearman_cosine": 0.9173902524747404, | |
"eval_sts-test-760_pearson_cosine": 0.9010373056134615, | |
"eval_sts-test-760_spearman_cosine": 0.9162833300242239, | |
"eval_sts-test_pearson_cosine": 0.9023936089136726, | |
"eval_sts-test_spearman_cosine": 0.91669906924255, | |
"step": 450 | |
}, | |
{ | |
"epoch": 0.8477443609022557, | |
"grad_norm": 3.1610307693481445, | |
"learning_rate": 9.967757448885844e-05, | |
"loss": 0.1248, | |
"step": 451 | |
}, | |
{ | |
"epoch": 0.849624060150376, | |
"grad_norm": 3.9296188354492188, | |
"learning_rate": 9.966456795162638e-05, | |
"loss": 0.1983, | |
"step": 452 | |
}, | |
{ | |
"epoch": 0.8515037593984962, | |
"grad_norm": 3.3089802265167236, | |
"learning_rate": 9.965130541420667e-05, | |
"loss": 0.2077, | |
"step": 453 | |
}, | |
{ | |
"epoch": 0.8533834586466166, | |
"grad_norm": 3.975613594055176, | |
"learning_rate": 9.963778696795546e-05, | |
"loss": 0.2199, | |
"step": 454 | |
}, | |
{ | |
"epoch": 0.8552631578947368, | |
"grad_norm": 1.0055021047592163, | |
"learning_rate": 9.96240127059917e-05, | |
"loss": 0.0839, | |
"step": 455 | |
}, | |
{ | |
"epoch": 0.8571428571428571, | |
"grad_norm": 4.69797945022583, | |
"learning_rate": 9.960998272319641e-05, | |
"loss": 0.2924, | |
"step": 456 | |
}, | |
{ | |
"epoch": 0.8590225563909775, | |
"grad_norm": 3.300013542175293, | |
"learning_rate": 9.959569711621217e-05, | |
"loss": 0.1466, | |
"step": 457 | |
}, | |
{ | |
"epoch": 0.8609022556390977, | |
"grad_norm": 4.579361438751221, | |
"learning_rate": 9.958115598344232e-05, | |
"loss": 0.3597, | |
"step": 458 | |
}, | |
{ | |
"epoch": 0.8627819548872181, | |
"grad_norm": 2.8986923694610596, | |
"learning_rate": 9.956635942505035e-05, | |
"loss": 0.1387, | |
"step": 459 | |
}, | |
{ | |
"epoch": 0.8646616541353384, | |
"grad_norm": 3.608433485031128, | |
"learning_rate": 9.955130754295918e-05, | |
"loss": 0.1788, | |
"step": 460 | |
}, | |
{ | |
"epoch": 0.8665413533834586, | |
"grad_norm": 3.700620651245117, | |
"learning_rate": 9.953600044085048e-05, | |
"loss": 0.2746, | |
"step": 461 | |
}, | |
{ | |
"epoch": 0.868421052631579, | |
"grad_norm": 3.4533400535583496, | |
"learning_rate": 9.952043822416397e-05, | |
"loss": 0.2969, | |
"step": 462 | |
}, | |
{ | |
"epoch": 0.8703007518796992, | |
"grad_norm": 3.3678195476531982, | |
"learning_rate": 9.950462100009666e-05, | |
"loss": 0.2054, | |
"step": 463 | |
}, | |
{ | |
"epoch": 0.8721804511278195, | |
"grad_norm": 3.894620895385742, | |
"learning_rate": 9.94885488776021e-05, | |
"loss": 0.2496, | |
"step": 464 | |
}, | |
{ | |
"epoch": 0.8740601503759399, | |
"grad_norm": 3.795100688934326, | |
"learning_rate": 9.947222196738967e-05, | |
"loss": 0.2611, | |
"step": 465 | |
}, | |
{ | |
"epoch": 0.8759398496240601, | |
"grad_norm": 2.7013235092163086, | |
"learning_rate": 9.945564038192383e-05, | |
"loss": 0.1439, | |
"step": 466 | |
}, | |
{ | |
"epoch": 0.8778195488721805, | |
"grad_norm": 2.68449330329895, | |
"learning_rate": 9.943880423542327e-05, | |
"loss": 0.1146, | |
"step": 467 | |
}, | |
{ | |
"epoch": 0.8796992481203008, | |
"grad_norm": 2.876955032348633, | |
"learning_rate": 9.942171364386019e-05, | |
"loss": 0.1646, | |
"step": 468 | |
}, | |
{ | |
"epoch": 0.881578947368421, | |
"grad_norm": 2.7985072135925293, | |
"learning_rate": 9.940436872495949e-05, | |
"loss": 0.1293, | |
"step": 469 | |
}, | |
{ | |
"epoch": 0.8834586466165414, | |
"grad_norm": 3.7495200634002686, | |
"learning_rate": 9.938676959819791e-05, | |
"loss": 0.3097, | |
"step": 470 | |
}, | |
{ | |
"epoch": 0.8853383458646616, | |
"grad_norm": 3.1165237426757812, | |
"learning_rate": 9.936891638480333e-05, | |
"loss": 0.2038, | |
"step": 471 | |
}, | |
{ | |
"epoch": 0.8872180451127819, | |
"grad_norm": 3.427192449569702, | |
"learning_rate": 9.935080920775373e-05, | |
"loss": 0.2284, | |
"step": 472 | |
}, | |
{ | |
"epoch": 0.8890977443609023, | |
"grad_norm": 4.627795696258545, | |
"learning_rate": 9.933244819177658e-05, | |
"loss": 0.3448, | |
"step": 473 | |
}, | |
{ | |
"epoch": 0.8909774436090225, | |
"grad_norm": 3.8821961879730225, | |
"learning_rate": 9.93138334633478e-05, | |
"loss": 0.2148, | |
"step": 474 | |
}, | |
{ | |
"epoch": 0.8928571428571429, | |
"grad_norm": 3.5933377742767334, | |
"learning_rate": 9.929496515069098e-05, | |
"loss": 0.2807, | |
"step": 475 | |
}, | |
{ | |
"epoch": 0.8947368421052632, | |
"grad_norm": 3.7735559940338135, | |
"learning_rate": 9.927584338377643e-05, | |
"loss": 0.29, | |
"step": 476 | |
}, | |
{ | |
"epoch": 0.8966165413533834, | |
"grad_norm": 3.569260597229004, | |
"learning_rate": 9.925646829432041e-05, | |
"loss": 0.2555, | |
"step": 477 | |
}, | |
{ | |
"epoch": 0.8984962406015038, | |
"grad_norm": 3.9756081104278564, | |
"learning_rate": 9.923684001578403e-05, | |
"loss": 0.2942, | |
"step": 478 | |
}, | |
{ | |
"epoch": 0.900375939849624, | |
"grad_norm": 2.6943299770355225, | |
"learning_rate": 9.921695868337252e-05, | |
"loss": 0.1309, | |
"step": 479 | |
}, | |
{ | |
"epoch": 0.9022556390977443, | |
"grad_norm": 3.40865421295166, | |
"learning_rate": 9.91968244340342e-05, | |
"loss": 0.1965, | |
"step": 480 | |
}, | |
{ | |
"epoch": 0.9041353383458647, | |
"grad_norm": 2.193235158920288, | |
"learning_rate": 9.917643740645954e-05, | |
"loss": 0.0971, | |
"step": 481 | |
}, | |
{ | |
"epoch": 0.9060150375939849, | |
"grad_norm": 3.178183078765869, | |
"learning_rate": 9.915579774108021e-05, | |
"loss": 0.2923, | |
"step": 482 | |
}, | |
{ | |
"epoch": 0.9078947368421053, | |
"grad_norm": 2.8752593994140625, | |
"learning_rate": 9.913490558006814e-05, | |
"loss": 0.2019, | |
"step": 483 | |
}, | |
{ | |
"epoch": 0.9097744360902256, | |
"grad_norm": 2.4448463916778564, | |
"learning_rate": 9.911376106733453e-05, | |
"loss": 0.1065, | |
"step": 484 | |
}, | |
{ | |
"epoch": 0.9116541353383458, | |
"grad_norm": 3.194176435470581, | |
"learning_rate": 9.909236434852884e-05, | |
"loss": 0.212, | |
"step": 485 | |
}, | |
{ | |
"epoch": 0.9135338345864662, | |
"grad_norm": 3.9748711585998535, | |
"learning_rate": 9.907071557103779e-05, | |
"loss": 0.3035, | |
"step": 486 | |
}, | |
{ | |
"epoch": 0.9154135338345865, | |
"grad_norm": 3.6471502780914307, | |
"learning_rate": 9.904881488398435e-05, | |
"loss": 0.2386, | |
"step": 487 | |
}, | |
{ | |
"epoch": 0.9172932330827067, | |
"grad_norm": 2.23283314704895, | |
"learning_rate": 9.902666243822675e-05, | |
"loss": 0.1342, | |
"step": 488 | |
}, | |
{ | |
"epoch": 0.9191729323308271, | |
"grad_norm": 2.526794672012329, | |
"learning_rate": 9.900425838635736e-05, | |
"loss": 0.1798, | |
"step": 489 | |
}, | |
{ | |
"epoch": 0.9210526315789473, | |
"grad_norm": 3.369070529937744, | |
"learning_rate": 9.898160288270171e-05, | |
"loss": 0.2655, | |
"step": 490 | |
}, | |
{ | |
"epoch": 0.9229323308270677, | |
"grad_norm": 2.8184382915496826, | |
"learning_rate": 9.895869608331741e-05, | |
"loss": 0.155, | |
"step": 491 | |
}, | |
{ | |
"epoch": 0.924812030075188, | |
"grad_norm": 3.6948490142822266, | |
"learning_rate": 9.893553814599302e-05, | |
"loss": 0.2283, | |
"step": 492 | |
}, | |
{ | |
"epoch": 0.9266917293233082, | |
"grad_norm": 2.2010343074798584, | |
"learning_rate": 9.891212923024709e-05, | |
"loss": 0.098, | |
"step": 493 | |
}, | |
{ | |
"epoch": 0.9285714285714286, | |
"grad_norm": 2.7922205924987793, | |
"learning_rate": 9.88884694973269e-05, | |
"loss": 0.2384, | |
"step": 494 | |
}, | |
{ | |
"epoch": 0.9304511278195489, | |
"grad_norm": 2.5202677249908447, | |
"learning_rate": 9.886455911020748e-05, | |
"loss": 0.0843, | |
"step": 495 | |
}, | |
{ | |
"epoch": 0.9323308270676691, | |
"grad_norm": 2.3406593799591064, | |
"learning_rate": 9.884039823359043e-05, | |
"loss": 0.0996, | |
"step": 496 | |
}, | |
{ | |
"epoch": 0.9342105263157895, | |
"grad_norm": 3.4097483158111572, | |
"learning_rate": 9.881598703390278e-05, | |
"loss": 0.2008, | |
"step": 497 | |
}, | |
{ | |
"epoch": 0.9360902255639098, | |
"grad_norm": 3.603726387023926, | |
"learning_rate": 9.879132567929585e-05, | |
"loss": 0.2017, | |
"step": 498 | |
}, | |
{ | |
"epoch": 0.9379699248120301, | |
"grad_norm": 3.1295666694641113, | |
"learning_rate": 9.876641433964415e-05, | |
"loss": 0.215, | |
"step": 499 | |
}, | |
{ | |
"epoch": 0.9398496240601504, | |
"grad_norm": 3.567051887512207, | |
"learning_rate": 9.87412531865441e-05, | |
"loss": 0.2233, | |
"step": 500 | |
}, | |
{ | |
"epoch": 0.9398496240601504, | |
"eval_global_dataset_loss": 1.0155928134918213, | |
"eval_global_dataset_runtime": 74.5661, | |
"eval_global_dataset_samples_per_second": 13.035, | |
"eval_global_dataset_steps_per_second": 0.054, | |
"eval_sequential_score": 0.9180905649642541, | |
"eval_sts-test-1024_pearson_cosine": 0.9074934984822008, | |
"eval_sts-test-1024_spearman_cosine": 0.9182130491666827, | |
"eval_sts-test-1280_pearson_cosine": 0.9074909272280929, | |
"eval_sts-test-1280_spearman_cosine": 0.9182151077247066, | |
"eval_sts-test-512_pearson_cosine": 0.9062832297154773, | |
"eval_sts-test-512_spearman_cosine": 0.9180905649642541, | |
"eval_sts-test-760_pearson_cosine": 0.9057267653570724, | |
"eval_sts-test-760_spearman_cosine": 0.9172522395846092, | |
"eval_sts-test_pearson_cosine": 0.9074876367668167, | |
"eval_sts-test_spearman_cosine": 0.9181634647690607, | |
"step": 500 | |
}, | |
{ | |
"epoch": 0.9417293233082706, | |
"grad_norm": 4.2725677490234375, | |
"learning_rate": 9.871584239331293e-05, | |
"loss": 0.3899, | |
"step": 501 | |
}, | |
{ | |
"epoch": 0.943609022556391, | |
"grad_norm": 2.3998143672943115, | |
"learning_rate": 9.869018213498746e-05, | |
"loss": 0.1012, | |
"step": 502 | |
}, | |
{ | |
"epoch": 0.9454887218045113, | |
"grad_norm": 4.653426170349121, | |
"learning_rate": 9.866427258832289e-05, | |
"loss": 0.4322, | |
"step": 503 | |
}, | |
{ | |
"epoch": 0.9473684210526315, | |
"grad_norm": 3.875018835067749, | |
"learning_rate": 9.863811393179163e-05, | |
"loss": 0.2699, | |
"step": 504 | |
}, | |
{ | |
"epoch": 0.9492481203007519, | |
"grad_norm": 3.956909418106079, | |
"learning_rate": 9.861170634558194e-05, | |
"loss": 0.3275, | |
"step": 505 | |
}, | |
{ | |
"epoch": 0.9511278195488722, | |
"grad_norm": 3.3166370391845703, | |
"learning_rate": 9.858505001159689e-05, | |
"loss": 0.2196, | |
"step": 506 | |
}, | |
{ | |
"epoch": 0.9530075187969925, | |
"grad_norm": 2.0084877014160156, | |
"learning_rate": 9.855814511345293e-05, | |
"loss": 0.1193, | |
"step": 507 | |
}, | |
{ | |
"epoch": 0.9548872180451128, | |
"grad_norm": 2.3565118312835693, | |
"learning_rate": 9.853099183647869e-05, | |
"loss": 0.0748, | |
"step": 508 | |
}, | |
{ | |
"epoch": 0.956766917293233, | |
"grad_norm": 3.5655505657196045, | |
"learning_rate": 9.850359036771373e-05, | |
"loss": 0.2532, | |
"step": 509 | |
}, | |
{ | |
"epoch": 0.9586466165413534, | |
"grad_norm": 3.4853789806365967, | |
"learning_rate": 9.847594089590722e-05, | |
"loss": 0.2517, | |
"step": 510 | |
}, | |
{ | |
"epoch": 0.9605263157894737, | |
"grad_norm": 3.009345531463623, | |
"learning_rate": 9.844804361151661e-05, | |
"loss": 0.1423, | |
"step": 511 | |
}, | |
{ | |
"epoch": 0.9624060150375939, | |
"grad_norm": 3.2568352222442627, | |
"learning_rate": 9.84198987067064e-05, | |
"loss": 0.2196, | |
"step": 512 | |
}, | |
{ | |
"epoch": 0.9642857142857143, | |
"grad_norm": 2.9902384281158447, | |
"learning_rate": 9.839150637534679e-05, | |
"loss": 0.177, | |
"step": 513 | |
}, | |
{ | |
"epoch": 0.9661654135338346, | |
"grad_norm": 3.810408353805542, | |
"learning_rate": 9.836286681301224e-05, | |
"loss": 0.3111, | |
"step": 514 | |
}, | |
{ | |
"epoch": 0.9680451127819549, | |
"grad_norm": 2.799046516418457, | |
"learning_rate": 9.833398021698028e-05, | |
"loss": 0.1433, | |
"step": 515 | |
}, | |
{ | |
"epoch": 0.9699248120300752, | |
"grad_norm": 3.353372097015381, | |
"learning_rate": 9.830484678623006e-05, | |
"loss": 0.279, | |
"step": 516 | |
}, | |
{ | |
"epoch": 0.9718045112781954, | |
"grad_norm": 3.018519163131714, | |
"learning_rate": 9.827546672144099e-05, | |
"loss": 0.1455, | |
"step": 517 | |
}, | |
{ | |
"epoch": 0.9736842105263158, | |
"grad_norm": 2.485931396484375, | |
"learning_rate": 9.824584022499135e-05, | |
"loss": 0.135, | |
"step": 518 | |
}, | |
{ | |
"epoch": 0.9755639097744361, | |
"grad_norm": 3.1705570220947266, | |
"learning_rate": 9.821596750095696e-05, | |
"loss": 0.2181, | |
"step": 519 | |
}, | |
{ | |
"epoch": 0.9774436090225563, | |
"grad_norm": 3.1081695556640625, | |
"learning_rate": 9.81858487551097e-05, | |
"loss": 0.1378, | |
"step": 520 | |
}, | |
{ | |
"epoch": 0.9793233082706767, | |
"grad_norm": 2.6018893718719482, | |
"learning_rate": 9.81554841949161e-05, | |
"loss": 0.207, | |
"step": 521 | |
}, | |
{ | |
"epoch": 0.981203007518797, | |
"grad_norm": 3.4171817302703857, | |
"learning_rate": 9.812487402953595e-05, | |
"loss": 0.1857, | |
"step": 522 | |
}, | |
{ | |
"epoch": 0.9830827067669173, | |
"grad_norm": 3.4658777713775635, | |
"learning_rate": 9.809401846982083e-05, | |
"loss": 0.2228, | |
"step": 523 | |
}, | |
{ | |
"epoch": 0.9849624060150376, | |
"grad_norm": 1.8977023363113403, | |
"learning_rate": 9.806291772831271e-05, | |
"loss": 0.0977, | |
"step": 524 | |
}, | |
{ | |
"epoch": 0.9868421052631579, | |
"grad_norm": 1.7279019355773926, | |
"learning_rate": 9.803157201924235e-05, | |
"loss": 0.0472, | |
"step": 525 | |
}, | |
{ | |
"epoch": 0.9887218045112782, | |
"grad_norm": 4.759050369262695, | |
"learning_rate": 9.799998155852801e-05, | |
"loss": 0.4102, | |
"step": 526 | |
}, | |
{ | |
"epoch": 0.9906015037593985, | |
"grad_norm": 3.8956758975982666, | |
"learning_rate": 9.79681465637738e-05, | |
"loss": 0.2662, | |
"step": 527 | |
}, | |
{ | |
"epoch": 0.9924812030075187, | |
"grad_norm": 3.572314500808716, | |
"learning_rate": 9.793606725426832e-05, | |
"loss": 0.2859, | |
"step": 528 | |
}, | |
{ | |
"epoch": 0.9943609022556391, | |
"grad_norm": 3.7973859310150146, | |
"learning_rate": 9.7903743850983e-05, | |
"loss": 0.2533, | |
"step": 529 | |
}, | |
{ | |
"epoch": 0.9962406015037594, | |
"grad_norm": 3.5079214572906494, | |
"learning_rate": 9.787117657657072e-05, | |
"loss": 0.2009, | |
"step": 530 | |
}, | |
{ | |
"epoch": 0.9981203007518797, | |
"grad_norm": 4.260622501373291, | |
"learning_rate": 9.78383656553642e-05, | |
"loss": 0.3569, | |
"step": 531 | |
}, | |
{ | |
"epoch": 1.0, | |
"grad_norm": 0.23837219178676605, | |
"learning_rate": 9.780531131337446e-05, | |
"loss": 0.0021, | |
"step": 532 | |
}, | |
{ | |
"epoch": 1.0018796992481203, | |
"grad_norm": 2.626725673675537, | |
"learning_rate": 9.777201377828926e-05, | |
"loss": 0.1167, | |
"step": 533 | |
}, | |
{ | |
"epoch": 1.0037593984962405, | |
"grad_norm": 2.536475658416748, | |
"learning_rate": 9.773847327947157e-05, | |
"loss": 0.1864, | |
"step": 534 | |
}, | |
{ | |
"epoch": 1.005639097744361, | |
"grad_norm": 3.6628808975219727, | |
"learning_rate": 9.770469004795794e-05, | |
"loss": 0.2067, | |
"step": 535 | |
}, | |
{ | |
"epoch": 1.0075187969924813, | |
"grad_norm": 1.5426212549209595, | |
"learning_rate": 9.767066431645695e-05, | |
"loss": 0.078, | |
"step": 536 | |
}, | |
{ | |
"epoch": 1.0093984962406015, | |
"grad_norm": 2.680577278137207, | |
"learning_rate": 9.76363963193476e-05, | |
"loss": 0.1397, | |
"step": 537 | |
}, | |
{ | |
"epoch": 1.0112781954887218, | |
"grad_norm": 1.9868223667144775, | |
"learning_rate": 9.760188629267764e-05, | |
"loss": 0.085, | |
"step": 538 | |
}, | |
{ | |
"epoch": 1.013157894736842, | |
"grad_norm": 2.018101692199707, | |
"learning_rate": 9.756713447416203e-05, | |
"loss": 0.1018, | |
"step": 539 | |
}, | |
{ | |
"epoch": 1.0150375939849625, | |
"grad_norm": 3.0724055767059326, | |
"learning_rate": 9.753214110318125e-05, | |
"loss": 0.1371, | |
"step": 540 | |
}, | |
{ | |
"epoch": 1.0169172932330828, | |
"grad_norm": 2.081967830657959, | |
"learning_rate": 9.749690642077964e-05, | |
"loss": 0.0639, | |
"step": 541 | |
}, | |
{ | |
"epoch": 1.018796992481203, | |
"grad_norm": 3.2037465572357178, | |
"learning_rate": 9.746143066966382e-05, | |
"loss": 0.1452, | |
"step": 542 | |
}, | |
{ | |
"epoch": 1.0206766917293233, | |
"grad_norm": 2.831393003463745, | |
"learning_rate": 9.742571409420091e-05, | |
"loss": 0.1349, | |
"step": 543 | |
}, | |
{ | |
"epoch": 1.0225563909774436, | |
"grad_norm": 2.723844528198242, | |
"learning_rate": 9.73897569404169e-05, | |
"loss": 0.1495, | |
"step": 544 | |
}, | |
{ | |
"epoch": 1.0244360902255638, | |
"grad_norm": 2.4952895641326904, | |
"learning_rate": 9.735355945599497e-05, | |
"loss": 0.0946, | |
"step": 545 | |
}, | |
{ | |
"epoch": 1.0263157894736843, | |
"grad_norm": 4.644716262817383, | |
"learning_rate": 9.731712189027377e-05, | |
"loss": 0.2574, | |
"step": 546 | |
}, | |
{ | |
"epoch": 1.0281954887218046, | |
"grad_norm": 2.886420965194702, | |
"learning_rate": 9.728044449424567e-05, | |
"loss": 0.1232, | |
"step": 547 | |
}, | |
{ | |
"epoch": 1.0300751879699248, | |
"grad_norm": 2.5065267086029053, | |
"learning_rate": 9.724352752055506e-05, | |
"loss": 0.1205, | |
"step": 548 | |
}, | |
{ | |
"epoch": 1.031954887218045, | |
"grad_norm": 1.9992611408233643, | |
"learning_rate": 9.720637122349669e-05, | |
"loss": 0.0761, | |
"step": 549 | |
}, | |
{ | |
"epoch": 1.0338345864661653, | |
"grad_norm": 2.5508501529693604, | |
"learning_rate": 9.716897585901372e-05, | |
"loss": 0.0885, | |
"step": 550 | |
}, | |
{ | |
"epoch": 1.0338345864661653, | |
"eval_global_dataset_loss": 1.018797516822815, | |
"eval_global_dataset_runtime": 73.4171, | |
"eval_global_dataset_samples_per_second": 13.239, | |
"eval_global_dataset_steps_per_second": 0.054, | |
"eval_sequential_score": 0.9162829720141328, | |
"eval_sts-test-1024_pearson_cosine": 0.9050044361126697, | |
"eval_sts-test-1024_spearman_cosine": 0.9173084024176553, | |
"eval_sts-test-1280_pearson_cosine": 0.9049983320262998, | |
"eval_sts-test-1280_spearman_cosine": 0.9173004814443892, | |
"eval_sts-test-512_pearson_cosine": 0.9033928303116558, | |
"eval_sts-test-512_spearman_cosine": 0.9162829720141328, | |
"eval_sts-test-760_pearson_cosine": 0.9033036147321826, | |
"eval_sts-test-760_spearman_cosine": 0.9166361489690339, | |
"eval_sts-test_pearson_cosine": 0.9049924067928337, | |
"eval_sts-test_spearman_cosine": 0.9173026742561973, | |
"step": 550 | |
}, | |
{ | |
"epoch": 1.0357142857142858, | |
"grad_norm": 2.988827705383301, | |
"learning_rate": 9.713134168469615e-05, | |
"loss": 0.1249, | |
"step": 551 | |
}, | |
{ | |
"epoch": 1.037593984962406, | |
"grad_norm": 2.299173355102539, | |
"learning_rate": 9.709346895977896e-05, | |
"loss": 0.1191, | |
"step": 552 | |
}, | |
{ | |
"epoch": 1.0394736842105263, | |
"grad_norm": 3.8693172931671143, | |
"learning_rate": 9.70553579451403e-05, | |
"loss": 0.2311, | |
"step": 553 | |
}, | |
{ | |
"epoch": 1.0413533834586466, | |
"grad_norm": 2.846728563308716, | |
"learning_rate": 9.701700890329977e-05, | |
"loss": 0.1081, | |
"step": 554 | |
}, | |
{ | |
"epoch": 1.0432330827067668, | |
"grad_norm": 3.595548391342163, | |
"learning_rate": 9.697842209841654e-05, | |
"loss": 0.2131, | |
"step": 555 | |
}, | |
{ | |
"epoch": 1.045112781954887, | |
"grad_norm": 2.5240702629089355, | |
"learning_rate": 9.693959779628761e-05, | |
"loss": 0.1334, | |
"step": 556 | |
}, | |
{ | |
"epoch": 1.0469924812030076, | |
"grad_norm": 2.941345691680908, | |
"learning_rate": 9.690053626434585e-05, | |
"loss": 0.1624, | |
"step": 557 | |
}, | |
{ | |
"epoch": 1.0488721804511278, | |
"grad_norm": 3.699535369873047, | |
"learning_rate": 9.68612377716583e-05, | |
"loss": 0.1951, | |
"step": 558 | |
}, | |
{ | |
"epoch": 1.050751879699248, | |
"grad_norm": 1.7172725200653076, | |
"learning_rate": 9.682170258892423e-05, | |
"loss": 0.0739, | |
"step": 559 | |
}, | |
{ | |
"epoch": 1.0526315789473684, | |
"grad_norm": 3.7899820804595947, | |
"learning_rate": 9.678193098847328e-05, | |
"loss": 0.2474, | |
"step": 560 | |
}, | |
{ | |
"epoch": 1.0545112781954886, | |
"grad_norm": 2.2706000804901123, | |
"learning_rate": 9.674192324426366e-05, | |
"loss": 0.0994, | |
"step": 561 | |
}, | |
{ | |
"epoch": 1.056390977443609, | |
"grad_norm": 3.8091330528259277, | |
"learning_rate": 9.670167963188015e-05, | |
"loss": 0.2113, | |
"step": 562 | |
}, | |
{ | |
"epoch": 1.0582706766917294, | |
"grad_norm": 1.1805905103683472, | |
"learning_rate": 9.666120042853227e-05, | |
"loss": 0.0286, | |
"step": 563 | |
}, | |
{ | |
"epoch": 1.0601503759398496, | |
"grad_norm": 3.3499631881713867, | |
"learning_rate": 9.66204859130524e-05, | |
"loss": 0.2701, | |
"step": 564 | |
}, | |
{ | |
"epoch": 1.0620300751879699, | |
"grad_norm": 2.7908685207366943, | |
"learning_rate": 9.657953636589373e-05, | |
"loss": 0.1336, | |
"step": 565 | |
}, | |
{ | |
"epoch": 1.0639097744360901, | |
"grad_norm": 2.8439011573791504, | |
"learning_rate": 9.65383520691285e-05, | |
"loss": 0.1152, | |
"step": 566 | |
}, | |
{ | |
"epoch": 1.0657894736842106, | |
"grad_norm": 2.6462042331695557, | |
"learning_rate": 9.649693330644595e-05, | |
"loss": 0.0995, | |
"step": 567 | |
}, | |
{ | |
"epoch": 1.0676691729323309, | |
"grad_norm": 3.5710973739624023, | |
"learning_rate": 9.645528036315036e-05, | |
"loss": 0.2256, | |
"step": 568 | |
}, | |
{ | |
"epoch": 1.0695488721804511, | |
"grad_norm": 3.204761028289795, | |
"learning_rate": 9.641339352615917e-05, | |
"loss": 0.1368, | |
"step": 569 | |
}, | |
{ | |
"epoch": 1.0714285714285714, | |
"grad_norm": 3.230581045150757, | |
"learning_rate": 9.637127308400085e-05, | |
"loss": 0.1263, | |
"step": 570 | |
}, | |
{ | |
"epoch": 1.0733082706766917, | |
"grad_norm": 0.767558753490448, | |
"learning_rate": 9.63289193268131e-05, | |
"loss": 0.0186, | |
"step": 571 | |
}, | |
{ | |
"epoch": 1.0751879699248121, | |
"grad_norm": 2.9297759532928467, | |
"learning_rate": 9.628633254634072e-05, | |
"loss": 0.146, | |
"step": 572 | |
}, | |
{ | |
"epoch": 1.0770676691729324, | |
"grad_norm": 3.178691864013672, | |
"learning_rate": 9.624351303593366e-05, | |
"loss": 0.1571, | |
"step": 573 | |
}, | |
{ | |
"epoch": 1.0789473684210527, | |
"grad_norm": 2.926023244857788, | |
"learning_rate": 9.620046109054498e-05, | |
"loss": 0.1521, | |
"step": 574 | |
}, | |
{ | |
"epoch": 1.080827067669173, | |
"grad_norm": 1.6326439380645752, | |
"learning_rate": 9.615717700672878e-05, | |
"loss": 0.0338, | |
"step": 575 | |
}, | |
{ | |
"epoch": 1.0827067669172932, | |
"grad_norm": 2.366187572479248, | |
"learning_rate": 9.611366108263826e-05, | |
"loss": 0.0967, | |
"step": 576 | |
}, | |
{ | |
"epoch": 1.0845864661654134, | |
"grad_norm": 1.6901721954345703, | |
"learning_rate": 9.606991361802354e-05, | |
"loss": 0.0454, | |
"step": 577 | |
}, | |
{ | |
"epoch": 1.086466165413534, | |
"grad_norm": 2.1220741271972656, | |
"learning_rate": 9.60259349142297e-05, | |
"loss": 0.0509, | |
"step": 578 | |
}, | |
{ | |
"epoch": 1.0883458646616542, | |
"grad_norm": 2.955486536026001, | |
"learning_rate": 9.598172527419464e-05, | |
"loss": 0.0962, | |
"step": 579 | |
}, | |
{ | |
"epoch": 1.0902255639097744, | |
"grad_norm": 3.1397783756256104, | |
"learning_rate": 9.593728500244703e-05, | |
"loss": 0.1644, | |
"step": 580 | |
}, | |
{ | |
"epoch": 1.0921052631578947, | |
"grad_norm": 3.1587095260620117, | |
"learning_rate": 9.589261440510418e-05, | |
"loss": 0.1733, | |
"step": 581 | |
}, | |
{ | |
"epoch": 1.093984962406015, | |
"grad_norm": 4.277261257171631, | |
"learning_rate": 9.584771378986995e-05, | |
"loss": 0.2181, | |
"step": 582 | |
}, | |
{ | |
"epoch": 1.0958646616541354, | |
"grad_norm": 3.1522698402404785, | |
"learning_rate": 9.580258346603267e-05, | |
"loss": 0.1462, | |
"step": 583 | |
}, | |
{ | |
"epoch": 1.0977443609022557, | |
"grad_norm": 1.566369891166687, | |
"learning_rate": 9.57572237444629e-05, | |
"loss": 0.0393, | |
"step": 584 | |
}, | |
{ | |
"epoch": 1.099624060150376, | |
"grad_norm": 3.8027660846710205, | |
"learning_rate": 9.57116349376114e-05, | |
"loss": 0.2449, | |
"step": 585 | |
}, | |
{ | |
"epoch": 1.1015037593984962, | |
"grad_norm": 2.550198554992676, | |
"learning_rate": 9.566581735950695e-05, | |
"loss": 0.0837, | |
"step": 586 | |
}, | |
{ | |
"epoch": 1.1033834586466165, | |
"grad_norm": 3.44278621673584, | |
"learning_rate": 9.561977132575412e-05, | |
"loss": 0.1569, | |
"step": 587 | |
}, | |
{ | |
"epoch": 1.1052631578947367, | |
"grad_norm": 2.3197524547576904, | |
"learning_rate": 9.55734971535312e-05, | |
"loss": 0.0708, | |
"step": 588 | |
}, | |
{ | |
"epoch": 1.1071428571428572, | |
"grad_norm": 3.2017619609832764, | |
"learning_rate": 9.552699516158792e-05, | |
"loss": 0.1351, | |
"step": 589 | |
}, | |
{ | |
"epoch": 1.1090225563909775, | |
"grad_norm": 3.7326323986053467, | |
"learning_rate": 9.548026567024335e-05, | |
"loss": 0.244, | |
"step": 590 | |
}, | |
{ | |
"epoch": 1.1109022556390977, | |
"grad_norm": 2.7139155864715576, | |
"learning_rate": 9.543330900138357e-05, | |
"loss": 0.071, | |
"step": 591 | |
}, | |
{ | |
"epoch": 1.112781954887218, | |
"grad_norm": 3.909640073776245, | |
"learning_rate": 9.53861254784596e-05, | |
"loss": 0.3121, | |
"step": 592 | |
}, | |
{ | |
"epoch": 1.1146616541353382, | |
"grad_norm": 3.1949503421783447, | |
"learning_rate": 9.533871542648504e-05, | |
"loss": 0.1441, | |
"step": 593 | |
}, | |
{ | |
"epoch": 1.1165413533834587, | |
"grad_norm": 3.353057622909546, | |
"learning_rate": 9.52910791720339e-05, | |
"loss": 0.1103, | |
"step": 594 | |
}, | |
{ | |
"epoch": 1.118421052631579, | |
"grad_norm": 2.921609401702881, | |
"learning_rate": 9.524321704323836e-05, | |
"loss": 0.106, | |
"step": 595 | |
}, | |
{ | |
"epoch": 1.1203007518796992, | |
"grad_norm": 2.78655743598938, | |
"learning_rate": 9.519512936978643e-05, | |
"loss": 0.0783, | |
"step": 596 | |
}, | |
{ | |
"epoch": 1.1221804511278195, | |
"grad_norm": 1.8240987062454224, | |
"learning_rate": 9.514681648291985e-05, | |
"loss": 0.0545, | |
"step": 597 | |
}, | |
{ | |
"epoch": 1.1240601503759398, | |
"grad_norm": 3.7685468196868896, | |
"learning_rate": 9.509827871543156e-05, | |
"loss": 0.1546, | |
"step": 598 | |
}, | |
{ | |
"epoch": 1.1259398496240602, | |
"grad_norm": 2.281994581222534, | |
"learning_rate": 9.504951640166362e-05, | |
"loss": 0.0715, | |
"step": 599 | |
}, | |
{ | |
"epoch": 1.1278195488721805, | |
"grad_norm": 3.044015645980835, | |
"learning_rate": 9.500052987750481e-05, | |
"loss": 0.1316, | |
"step": 600 | |
}, | |
{ | |
"epoch": 1.1278195488721805, | |
"eval_global_dataset_loss": 1.0268114805221558, | |
"eval_global_dataset_runtime": 72.9657, | |
"eval_global_dataset_samples_per_second": 13.321, | |
"eval_global_dataset_steps_per_second": 0.055, | |
"eval_sequential_score": 0.9198838375107304, | |
"eval_sts-test-1024_pearson_cosine": 0.90596708830987, | |
"eval_sts-test-1024_spearman_cosine": 0.9204009830873694, | |
"eval_sts-test-1280_pearson_cosine": 0.9059622219277733, | |
"eval_sts-test-1280_spearman_cosine": 0.9204031758991775, | |
"eval_sts-test-512_pearson_cosine": 0.904702124198475, | |
"eval_sts-test-512_spearman_cosine": 0.9198838375107304, | |
"eval_sts-test-760_pearson_cosine": 0.9044429884523028, | |
"eval_sts-test-760_spearman_cosine": 0.9200724193262337, | |
"eval_sts-test_pearson_cosine": 0.9059576370598521, | |
"eval_sts-test_spearman_cosine": 0.9204111863749668, | |
"step": 600 | |
}, | |
{ | |
"epoch": 1.1296992481203008, | |
"grad_norm": 4.599878311157227, | |
"learning_rate": 9.495131948038836e-05, | |
"loss": 0.2016, | |
"step": 601 | |
}, | |
{ | |
"epoch": 1.131578947368421, | |
"grad_norm": 3.2281079292297363, | |
"learning_rate": 9.490188554928956e-05, | |
"loss": 0.1192, | |
"step": 602 | |
}, | |
{ | |
"epoch": 1.1334586466165413, | |
"grad_norm": 3.7087974548339844, | |
"learning_rate": 9.485222842472348e-05, | |
"loss": 0.1884, | |
"step": 603 | |
}, | |
{ | |
"epoch": 1.1353383458646618, | |
"grad_norm": 3.4763455390930176, | |
"learning_rate": 9.48023484487426e-05, | |
"loss": 0.1772, | |
"step": 604 | |
}, | |
{ | |
"epoch": 1.137218045112782, | |
"grad_norm": 4.881205081939697, | |
"learning_rate": 9.475224596493453e-05, | |
"loss": 0.3432, | |
"step": 605 | |
}, | |
{ | |
"epoch": 1.1390977443609023, | |
"grad_norm": 2.759403944015503, | |
"learning_rate": 9.470192131841948e-05, | |
"loss": 0.0932, | |
"step": 606 | |
}, | |
{ | |
"epoch": 1.1409774436090225, | |
"grad_norm": 3.1468870639801025, | |
"learning_rate": 9.465137485584806e-05, | |
"loss": 0.135, | |
"step": 607 | |
}, | |
{ | |
"epoch": 1.1428571428571428, | |
"grad_norm": 2.657116651535034, | |
"learning_rate": 9.460060692539875e-05, | |
"loss": 0.1132, | |
"step": 608 | |
}, | |
{ | |
"epoch": 1.1447368421052633, | |
"grad_norm": 2.5455267429351807, | |
"learning_rate": 9.454961787677563e-05, | |
"loss": 0.1118, | |
"step": 609 | |
}, | |
{ | |
"epoch": 1.1466165413533835, | |
"grad_norm": 3.2456393241882324, | |
"learning_rate": 9.449840806120584e-05, | |
"loss": 0.2478, | |
"step": 610 | |
}, | |
{ | |
"epoch": 1.1484962406015038, | |
"grad_norm": 4.167994976043701, | |
"learning_rate": 9.444697783143726e-05, | |
"loss": 0.2155, | |
"step": 611 | |
}, | |
{ | |
"epoch": 1.150375939849624, | |
"grad_norm": 2.268927574157715, | |
"learning_rate": 9.439532754173608e-05, | |
"loss": 0.1018, | |
"step": 612 | |
}, | |
{ | |
"epoch": 1.1522556390977443, | |
"grad_norm": 3.0976715087890625, | |
"learning_rate": 9.434345754788421e-05, | |
"loss": 0.124, | |
"step": 613 | |
}, | |
{ | |
"epoch": 1.1541353383458646, | |
"grad_norm": 4.238986492156982, | |
"learning_rate": 9.42913682071771e-05, | |
"loss": 0.2199, | |
"step": 614 | |
}, | |
{ | |
"epoch": 1.156015037593985, | |
"grad_norm": 2.4929847717285156, | |
"learning_rate": 9.4239059878421e-05, | |
"loss": 0.1268, | |
"step": 615 | |
}, | |
{ | |
"epoch": 1.1578947368421053, | |
"grad_norm": 2.0077924728393555, | |
"learning_rate": 9.418653292193069e-05, | |
"loss": 0.0695, | |
"step": 616 | |
}, | |
{ | |
"epoch": 1.1597744360902256, | |
"grad_norm": 3.0697379112243652, | |
"learning_rate": 9.413378769952685e-05, | |
"loss": 0.1006, | |
"step": 617 | |
}, | |
{ | |
"epoch": 1.1616541353383458, | |
"grad_norm": 3.9670581817626953, | |
"learning_rate": 9.408082457453371e-05, | |
"loss": 0.2547, | |
"step": 618 | |
}, | |
{ | |
"epoch": 1.163533834586466, | |
"grad_norm": 2.876302719116211, | |
"learning_rate": 9.402764391177645e-05, | |
"loss": 0.1062, | |
"step": 619 | |
}, | |
{ | |
"epoch": 1.1654135338345863, | |
"grad_norm": 3.397611141204834, | |
"learning_rate": 9.397424607757868e-05, | |
"loss": 0.1647, | |
"step": 620 | |
}, | |
{ | |
"epoch": 1.1672932330827068, | |
"grad_norm": 3.3144094944000244, | |
"learning_rate": 9.392063143975999e-05, | |
"loss": 0.1797, | |
"step": 621 | |
}, | |
{ | |
"epoch": 1.169172932330827, | |
"grad_norm": 3.333677053451538, | |
"learning_rate": 9.386680036763333e-05, | |
"loss": 0.1754, | |
"step": 622 | |
}, | |
{ | |
"epoch": 1.1710526315789473, | |
"grad_norm": 1.7224763631820679, | |
"learning_rate": 9.38127532320026e-05, | |
"loss": 0.0471, | |
"step": 623 | |
}, | |
{ | |
"epoch": 1.1729323308270676, | |
"grad_norm": 3.2871925830841064, | |
"learning_rate": 9.375849040515989e-05, | |
"loss": 0.1537, | |
"step": 624 | |
}, | |
{ | |
"epoch": 1.1748120300751879, | |
"grad_norm": 3.3504252433776855, | |
"learning_rate": 9.370401226088313e-05, | |
"loss": 0.1531, | |
"step": 625 | |
}, | |
{ | |
"epoch": 1.1766917293233083, | |
"grad_norm": 3.614995002746582, | |
"learning_rate": 9.364931917443335e-05, | |
"loss": 0.2241, | |
"step": 626 | |
}, | |
{ | |
"epoch": 1.1785714285714286, | |
"grad_norm": 2.5657124519348145, | |
"learning_rate": 9.359441152255224e-05, | |
"loss": 0.103, | |
"step": 627 | |
}, | |
{ | |
"epoch": 1.1804511278195489, | |
"grad_norm": 3.1188507080078125, | |
"learning_rate": 9.353928968345938e-05, | |
"loss": 0.1551, | |
"step": 628 | |
}, | |
{ | |
"epoch": 1.1823308270676691, | |
"grad_norm": 3.6165919303894043, | |
"learning_rate": 9.348395403684982e-05, | |
"loss": 0.319, | |
"step": 629 | |
}, | |
{ | |
"epoch": 1.1842105263157894, | |
"grad_norm": 3.6497128009796143, | |
"learning_rate": 9.342840496389132e-05, | |
"loss": 0.2347, | |
"step": 630 | |
}, | |
{ | |
"epoch": 1.1860902255639099, | |
"grad_norm": 2.848780393600464, | |
"learning_rate": 9.33726428472218e-05, | |
"loss": 0.1735, | |
"step": 631 | |
}, | |
{ | |
"epoch": 1.1879699248120301, | |
"grad_norm": 1.7372788190841675, | |
"learning_rate": 9.331666807094671e-05, | |
"loss": 0.0549, | |
"step": 632 | |
}, | |
{ | |
"epoch": 1.1898496240601504, | |
"grad_norm": 3.3109991550445557, | |
"learning_rate": 9.326048102063631e-05, | |
"loss": 0.2516, | |
"step": 633 | |
}, | |
{ | |
"epoch": 1.1917293233082706, | |
"grad_norm": 2.7588775157928467, | |
"learning_rate": 9.320408208332313e-05, | |
"loss": 0.0896, | |
"step": 634 | |
}, | |
{ | |
"epoch": 1.193609022556391, | |
"grad_norm": 3.7248873710632324, | |
"learning_rate": 9.314747164749917e-05, | |
"loss": 0.1549, | |
"step": 635 | |
}, | |
{ | |
"epoch": 1.1954887218045114, | |
"grad_norm": 2.6793620586395264, | |
"learning_rate": 9.309065010311336e-05, | |
"loss": 0.1302, | |
"step": 636 | |
}, | |
{ | |
"epoch": 1.1973684210526316, | |
"grad_norm": 2.9483072757720947, | |
"learning_rate": 9.303361784156875e-05, | |
"loss": 0.1657, | |
"step": 637 | |
}, | |
{ | |
"epoch": 1.199248120300752, | |
"grad_norm": 3.268934726715088, | |
"learning_rate": 9.297637525571989e-05, | |
"loss": 0.1467, | |
"step": 638 | |
}, | |
{ | |
"epoch": 1.2011278195488722, | |
"grad_norm": 3.103567123413086, | |
"learning_rate": 9.291892273987009e-05, | |
"loss": 0.1631, | |
"step": 639 | |
}, | |
{ | |
"epoch": 1.2030075187969924, | |
"grad_norm": 1.773438811302185, | |
"learning_rate": 9.286126068976875e-05, | |
"loss": 0.0765, | |
"step": 640 | |
}, | |
{ | |
"epoch": 1.204887218045113, | |
"grad_norm": 2.9956865310668945, | |
"learning_rate": 9.28033895026086e-05, | |
"loss": 0.1571, | |
"step": 641 | |
}, | |
{ | |
"epoch": 1.2067669172932332, | |
"grad_norm": 3.9764537811279297, | |
"learning_rate": 9.274530957702295e-05, | |
"loss": 0.2937, | |
"step": 642 | |
}, | |
{ | |
"epoch": 1.2086466165413534, | |
"grad_norm": 3.338249444961548, | |
"learning_rate": 9.268702131308292e-05, | |
"loss": 0.1284, | |
"step": 643 | |
}, | |
{ | |
"epoch": 1.2105263157894737, | |
"grad_norm": 3.7653377056121826, | |
"learning_rate": 9.26285251122948e-05, | |
"loss": 0.2418, | |
"step": 644 | |
}, | |
{ | |
"epoch": 1.212406015037594, | |
"grad_norm": 2.9056382179260254, | |
"learning_rate": 9.256982137759718e-05, | |
"loss": 0.1306, | |
"step": 645 | |
}, | |
{ | |
"epoch": 1.2142857142857142, | |
"grad_norm": 4.180511951446533, | |
"learning_rate": 9.251091051335816e-05, | |
"loss": 0.3252, | |
"step": 646 | |
}, | |
{ | |
"epoch": 1.2161654135338347, | |
"grad_norm": 1.7085641622543335, | |
"learning_rate": 9.245179292537267e-05, | |
"loss": 0.0596, | |
"step": 647 | |
}, | |
{ | |
"epoch": 1.218045112781955, | |
"grad_norm": 1.9504470825195312, | |
"learning_rate": 9.239246902085959e-05, | |
"loss": 0.0563, | |
"step": 648 | |
}, | |
{ | |
"epoch": 1.2199248120300752, | |
"grad_norm": 2.950937509536743, | |
"learning_rate": 9.233293920845897e-05, | |
"loss": 0.1782, | |
"step": 649 | |
}, | |
{ | |
"epoch": 1.2218045112781954, | |
"grad_norm": 3.335604667663574, | |
"learning_rate": 9.227320389822918e-05, | |
"loss": 0.2451, | |
"step": 650 | |
}, | |
{ | |
"epoch": 1.2218045112781954, | |
"eval_global_dataset_loss": 1.0016443729400635, | |
"eval_global_dataset_runtime": 73.7823, | |
"eval_global_dataset_samples_per_second": 13.174, | |
"eval_global_dataset_steps_per_second": 0.054, | |
"eval_sequential_score": 0.9186365751044907, | |
"eval_sts-test-1024_pearson_cosine": 0.9056774761966145, | |
"eval_sts-test-1024_spearman_cosine": 0.919202812814876, | |
"eval_sts-test-1280_pearson_cosine": 0.9056743598981793, | |
"eval_sts-test-1280_spearman_cosine": 0.9192088989864254, | |
"eval_sts-test-512_pearson_cosine": 0.9055633055097914, | |
"eval_sts-test-512_spearman_cosine": 0.9186365751044907, | |
"eval_sts-test-760_pearson_cosine": 0.9043753472141889, | |
"eval_sts-test-760_spearman_cosine": 0.9189957934796795, | |
"eval_sts-test_pearson_cosine": 0.9056699239203394, | |
"eval_sts-test_spearman_cosine": 0.9191896559440269, | |
"step": 650 | |
}, | |
{ | |
"epoch": 1.2236842105263157, | |
"grad_norm": 2.9990556240081787, | |
"learning_rate": 9.221326350164416e-05, | |
"loss": 0.1245, | |
"step": 651 | |
}, | |
{ | |
"epoch": 1.225563909774436, | |
"grad_norm": 2.6252524852752686, | |
"learning_rate": 9.215311843159054e-05, | |
"loss": 0.1188, | |
"step": 652 | |
}, | |
{ | |
"epoch": 1.2274436090225564, | |
"grad_norm": 2.2566170692443848, | |
"learning_rate": 9.209276910236477e-05, | |
"loss": 0.1251, | |
"step": 653 | |
}, | |
{ | |
"epoch": 1.2293233082706767, | |
"grad_norm": 2.2437398433685303, | |
"learning_rate": 9.20322159296703e-05, | |
"loss": 0.1049, | |
"step": 654 | |
}, | |
{ | |
"epoch": 1.231203007518797, | |
"grad_norm": 1.9391624927520752, | |
"learning_rate": 9.197145933061477e-05, | |
"loss": 0.0618, | |
"step": 655 | |
}, | |
{ | |
"epoch": 1.2330827067669172, | |
"grad_norm": 2.8038880825042725, | |
"learning_rate": 9.1910499723707e-05, | |
"loss": 0.1165, | |
"step": 656 | |
}, | |
{ | |
"epoch": 1.2349624060150375, | |
"grad_norm": 2.137781858444214, | |
"learning_rate": 9.184933752885421e-05, | |
"loss": 0.107, | |
"step": 657 | |
}, | |
{ | |
"epoch": 1.236842105263158, | |
"grad_norm": 3.2800896167755127, | |
"learning_rate": 9.178797316735915e-05, | |
"loss": 0.1314, | |
"step": 658 | |
}, | |
{ | |
"epoch": 1.2387218045112782, | |
"grad_norm": 2.87821364402771, | |
"learning_rate": 9.17264070619171e-05, | |
"loss": 0.1947, | |
"step": 659 | |
}, | |
{ | |
"epoch": 1.2406015037593985, | |
"grad_norm": 2.358914375305176, | |
"learning_rate": 9.166463963661303e-05, | |
"loss": 0.1028, | |
"step": 660 | |
}, | |
{ | |
"epoch": 1.2424812030075187, | |
"grad_norm": 1.8619112968444824, | |
"learning_rate": 9.160267131691865e-05, | |
"loss": 0.0645, | |
"step": 661 | |
}, | |
{ | |
"epoch": 1.244360902255639, | |
"grad_norm": 3.056086301803589, | |
"learning_rate": 9.154050252968949e-05, | |
"loss": 0.1805, | |
"step": 662 | |
}, | |
{ | |
"epoch": 1.2462406015037595, | |
"grad_norm": 1.4002981185913086, | |
"learning_rate": 9.147813370316197e-05, | |
"loss": 0.0659, | |
"step": 663 | |
}, | |
{ | |
"epoch": 1.2481203007518797, | |
"grad_norm": 3.8851730823516846, | |
"learning_rate": 9.14155652669504e-05, | |
"loss": 0.2046, | |
"step": 664 | |
}, | |
{ | |
"epoch": 1.25, | |
"grad_norm": 3.7416234016418457, | |
"learning_rate": 9.135279765204414e-05, | |
"loss": 0.2969, | |
"step": 665 | |
}, | |
{ | |
"epoch": 1.2518796992481203, | |
"grad_norm": 3.65122389793396, | |
"learning_rate": 9.128983129080445e-05, | |
"loss": 0.1854, | |
"step": 666 | |
}, | |
{ | |
"epoch": 1.2537593984962405, | |
"grad_norm": 2.737818717956543, | |
"learning_rate": 9.122666661696167e-05, | |
"loss": 0.1481, | |
"step": 667 | |
}, | |
{ | |
"epoch": 1.255639097744361, | |
"grad_norm": 2.880852460861206, | |
"learning_rate": 9.11633040656122e-05, | |
"loss": 0.1347, | |
"step": 668 | |
}, | |
{ | |
"epoch": 1.2575187969924813, | |
"grad_norm": 3.3521029949188232, | |
"learning_rate": 9.10997440732154e-05, | |
"loss": 0.1817, | |
"step": 669 | |
}, | |
{ | |
"epoch": 1.2593984962406015, | |
"grad_norm": 3.2931160926818848, | |
"learning_rate": 9.103598707759071e-05, | |
"loss": 0.2038, | |
"step": 670 | |
}, | |
{ | |
"epoch": 1.2612781954887218, | |
"grad_norm": 3.4462192058563232, | |
"learning_rate": 9.097203351791458e-05, | |
"loss": 0.1796, | |
"step": 671 | |
}, | |
{ | |
"epoch": 1.263157894736842, | |
"grad_norm": 3.3525967597961426, | |
"learning_rate": 9.090788383471744e-05, | |
"loss": 0.1607, | |
"step": 672 | |
}, | |
{ | |
"epoch": 1.2650375939849625, | |
"grad_norm": 3.104384183883667, | |
"learning_rate": 9.084353846988069e-05, | |
"loss": 0.1057, | |
"step": 673 | |
}, | |
{ | |
"epoch": 1.2669172932330828, | |
"grad_norm": 3.988388776779175, | |
"learning_rate": 9.077899786663362e-05, | |
"loss": 0.2247, | |
"step": 674 | |
}, | |
{ | |
"epoch": 1.268796992481203, | |
"grad_norm": 3.816800117492676, | |
"learning_rate": 9.071426246955038e-05, | |
"loss": 0.2174, | |
"step": 675 | |
}, | |
{ | |
"epoch": 1.2706766917293233, | |
"grad_norm": 2.942406177520752, | |
"learning_rate": 9.064933272454696e-05, | |
"loss": 0.1497, | |
"step": 676 | |
}, | |
{ | |
"epoch": 1.2725563909774436, | |
"grad_norm": 3.1315743923187256, | |
"learning_rate": 9.058420907887799e-05, | |
"loss": 0.141, | |
"step": 677 | |
}, | |
{ | |
"epoch": 1.274436090225564, | |
"grad_norm": 3.2112250328063965, | |
"learning_rate": 9.051889198113384e-05, | |
"loss": 0.1776, | |
"step": 678 | |
}, | |
{ | |
"epoch": 1.2763157894736843, | |
"grad_norm": 3.5193326473236084, | |
"learning_rate": 9.045338188123735e-05, | |
"loss": 0.1852, | |
"step": 679 | |
}, | |
{ | |
"epoch": 1.2781954887218046, | |
"grad_norm": 3.0619587898254395, | |
"learning_rate": 9.038767923044087e-05, | |
"loss": 0.2035, | |
"step": 680 | |
}, | |
{ | |
"epoch": 1.2800751879699248, | |
"grad_norm": 2.2244269847869873, | |
"learning_rate": 9.032178448132307e-05, | |
"loss": 0.0879, | |
"step": 681 | |
}, | |
{ | |
"epoch": 1.281954887218045, | |
"grad_norm": 1.9407459497451782, | |
"learning_rate": 9.025569808778584e-05, | |
"loss": 0.0721, | |
"step": 682 | |
}, | |
{ | |
"epoch": 1.2838345864661656, | |
"grad_norm": 3.1249163150787354, | |
"learning_rate": 9.018942050505122e-05, | |
"loss": 0.1494, | |
"step": 683 | |
}, | |
{ | |
"epoch": 1.2857142857142856, | |
"grad_norm": 3.244734287261963, | |
"learning_rate": 9.012295218965812e-05, | |
"loss": 0.1561, | |
"step": 684 | |
}, | |
{ | |
"epoch": 1.287593984962406, | |
"grad_norm": 2.47184419631958, | |
"learning_rate": 9.005629359945941e-05, | |
"loss": 0.1012, | |
"step": 685 | |
}, | |
{ | |
"epoch": 1.2894736842105263, | |
"grad_norm": 2.8298754692077637, | |
"learning_rate": 8.99894451936185e-05, | |
"loss": 0.1798, | |
"step": 686 | |
}, | |
{ | |
"epoch": 1.2913533834586466, | |
"grad_norm": 2.8187429904937744, | |
"learning_rate": 8.992240743260635e-05, | |
"loss": 0.1093, | |
"step": 687 | |
}, | |
{ | |
"epoch": 1.2932330827067668, | |
"grad_norm": 3.052556037902832, | |
"learning_rate": 8.985518077819828e-05, | |
"loss": 0.1436, | |
"step": 688 | |
}, | |
{ | |
"epoch": 1.295112781954887, | |
"grad_norm": 1.6897327899932861, | |
"learning_rate": 8.978776569347073e-05, | |
"loss": 0.0447, | |
"step": 689 | |
}, | |
{ | |
"epoch": 1.2969924812030076, | |
"grad_norm": 2.700467586517334, | |
"learning_rate": 8.972016264279812e-05, | |
"loss": 0.1563, | |
"step": 690 | |
}, | |
{ | |
"epoch": 1.2988721804511278, | |
"grad_norm": 2.610023021697998, | |
"learning_rate": 8.96523720918496e-05, | |
"loss": 0.1066, | |
"step": 691 | |
}, | |
{ | |
"epoch": 1.300751879699248, | |
"grad_norm": 3.862847089767456, | |
"learning_rate": 8.958439450758593e-05, | |
"loss": 0.235, | |
"step": 692 | |
}, | |
{ | |
"epoch": 1.3026315789473684, | |
"grad_norm": 2.830010175704956, | |
"learning_rate": 8.951623035825615e-05, | |
"loss": 0.131, | |
"step": 693 | |
}, | |
{ | |
"epoch": 1.3045112781954886, | |
"grad_norm": 3.1229724884033203, | |
"learning_rate": 8.944788011339446e-05, | |
"loss": 0.1766, | |
"step": 694 | |
}, | |
{ | |
"epoch": 1.306390977443609, | |
"grad_norm": 2.7942512035369873, | |
"learning_rate": 8.937934424381694e-05, | |
"loss": 0.1615, | |
"step": 695 | |
}, | |
{ | |
"epoch": 1.3082706766917294, | |
"grad_norm": 2.758984088897705, | |
"learning_rate": 8.931062322161823e-05, | |
"loss": 0.165, | |
"step": 696 | |
}, | |
{ | |
"epoch": 1.3101503759398496, | |
"grad_norm": 2.493286371231079, | |
"learning_rate": 8.924171752016845e-05, | |
"loss": 0.1941, | |
"step": 697 | |
}, | |
{ | |
"epoch": 1.3120300751879699, | |
"grad_norm": 1.32247793674469, | |
"learning_rate": 8.917262761410983e-05, | |
"loss": 0.0443, | |
"step": 698 | |
}, | |
{ | |
"epoch": 1.3139097744360901, | |
"grad_norm": 2.5577824115753174, | |
"learning_rate": 8.910335397935336e-05, | |
"loss": 0.1665, | |
"step": 699 | |
}, | |
{ | |
"epoch": 1.3157894736842106, | |
"grad_norm": 2.4263994693756104, | |
"learning_rate": 8.903389709307575e-05, | |
"loss": 0.1263, | |
"step": 700 | |
}, | |
{ | |
"epoch": 1.3157894736842106, | |
"eval_global_dataset_loss": 0.996315598487854, | |
"eval_global_dataset_runtime": 73.1026, | |
"eval_global_dataset_samples_per_second": 13.296, | |
"eval_global_dataset_steps_per_second": 0.055, | |
"eval_sequential_score": 0.9168267893425611, | |
"eval_sts-test-1024_pearson_cosine": 0.9032360441021332, | |
"eval_sts-test-1024_spearman_cosine": 0.9170557815471008, | |
"eval_sts-test-1280_pearson_cosine": 0.9032358213968139, | |
"eval_sts-test-1280_spearman_cosine": 0.9170974002201949, | |
"eval_sts-test-512_pearson_cosine": 0.90237825051997, | |
"eval_sts-test-512_spearman_cosine": 0.9168267893425611, | |
"eval_sts-test-760_pearson_cosine": 0.9017398576147293, | |
"eval_sts-test-760_spearman_cosine": 0.9162999327422001, | |
"eval_sts-test_pearson_cosine": 0.9032344072558449, | |
"eval_sts-test_spearman_cosine": 0.9171278758292024, | |
"step": 700 | |
}, | |
{ | |
"epoch": 1.3176691729323309, | |
"grad_norm": 2.2901155948638916, | |
"learning_rate": 8.896425743371588e-05, | |
"loss": 0.1022, | |
"step": 701 | |
}, | |
{ | |
"epoch": 1.3195488721804511, | |
"grad_norm": 3.3940200805664062, | |
"learning_rate": 8.88944354809717e-05, | |
"loss": 0.1656, | |
"step": 702 | |
}, | |
{ | |
"epoch": 1.3214285714285714, | |
"grad_norm": 4.018152713775635, | |
"learning_rate": 8.882443171579677e-05, | |
"loss": 0.262, | |
"step": 703 | |
}, | |
{ | |
"epoch": 1.3233082706766917, | |
"grad_norm": 3.101533889770508, | |
"learning_rate": 8.87542466203971e-05, | |
"loss": 0.174, | |
"step": 704 | |
}, | |
{ | |
"epoch": 1.3251879699248121, | |
"grad_norm": 2.423062324523926, | |
"learning_rate": 8.868388067822772e-05, | |
"loss": 0.0899, | |
"step": 705 | |
}, | |
{ | |
"epoch": 1.3270676691729324, | |
"grad_norm": 2.2438297271728516, | |
"learning_rate": 8.861333437398942e-05, | |
"loss": 0.0773, | |
"step": 706 | |
}, | |
{ | |
"epoch": 1.3289473684210527, | |
"grad_norm": 2.8316147327423096, | |
"learning_rate": 8.854260819362532e-05, | |
"loss": 0.1117, | |
"step": 707 | |
}, | |
{ | |
"epoch": 1.330827067669173, | |
"grad_norm": 2.189030408859253, | |
"learning_rate": 8.847170262431763e-05, | |
"loss": 0.0817, | |
"step": 708 | |
}, | |
{ | |
"epoch": 1.3327067669172932, | |
"grad_norm": 2.5064244270324707, | |
"learning_rate": 8.840061815448418e-05, | |
"loss": 0.1109, | |
"step": 709 | |
}, | |
{ | |
"epoch": 1.3345864661654137, | |
"grad_norm": 2.3013577461242676, | |
"learning_rate": 8.832935527377518e-05, | |
"loss": 0.0914, | |
"step": 710 | |
}, | |
{ | |
"epoch": 1.336466165413534, | |
"grad_norm": 3.4709525108337402, | |
"learning_rate": 8.825791447306974e-05, | |
"loss": 0.2248, | |
"step": 711 | |
}, | |
{ | |
"epoch": 1.3383458646616542, | |
"grad_norm": 2.144796133041382, | |
"learning_rate": 8.81862962444726e-05, | |
"loss": 0.1061, | |
"step": 712 | |
}, | |
{ | |
"epoch": 1.3402255639097744, | |
"grad_norm": 3.4525840282440186, | |
"learning_rate": 8.811450108131059e-05, | |
"loss": 0.2445, | |
"step": 713 | |
}, | |
{ | |
"epoch": 1.3421052631578947, | |
"grad_norm": 2.398047685623169, | |
"learning_rate": 8.804252947812936e-05, | |
"loss": 0.1304, | |
"step": 714 | |
}, | |
{ | |
"epoch": 1.3439849624060152, | |
"grad_norm": 1.2655713558197021, | |
"learning_rate": 8.797038193068994e-05, | |
"loss": 0.0253, | |
"step": 715 | |
}, | |
{ | |
"epoch": 1.3458646616541352, | |
"grad_norm": 3.264692783355713, | |
"learning_rate": 8.78980589359653e-05, | |
"loss": 0.2007, | |
"step": 716 | |
}, | |
{ | |
"epoch": 1.3477443609022557, | |
"grad_norm": 1.141622543334961, | |
"learning_rate": 8.782556099213692e-05, | |
"loss": 0.0736, | |
"step": 717 | |
}, | |
{ | |
"epoch": 1.349624060150376, | |
"grad_norm": 3.286989688873291, | |
"learning_rate": 8.775288859859142e-05, | |
"loss": 0.166, | |
"step": 718 | |
}, | |
{ | |
"epoch": 1.3515037593984962, | |
"grad_norm": 3.4484078884124756, | |
"learning_rate": 8.768004225591704e-05, | |
"loss": 0.1696, | |
"step": 719 | |
}, | |
{ | |
"epoch": 1.3533834586466165, | |
"grad_norm": 2.700953960418701, | |
"learning_rate": 8.760702246590026e-05, | |
"loss": 0.1204, | |
"step": 720 | |
}, | |
{ | |
"epoch": 1.3552631578947367, | |
"grad_norm": 1.3329017162322998, | |
"learning_rate": 8.753382973152233e-05, | |
"loss": 0.0349, | |
"step": 721 | |
}, | |
{ | |
"epoch": 1.3571428571428572, | |
"grad_norm": 3.4619643688201904, | |
"learning_rate": 8.746046455695572e-05, | |
"loss": 0.2092, | |
"step": 722 | |
}, | |
{ | |
"epoch": 1.3590225563909775, | |
"grad_norm": 3.6183106899261475, | |
"learning_rate": 8.73869274475608e-05, | |
"loss": 0.2843, | |
"step": 723 | |
}, | |
{ | |
"epoch": 1.3609022556390977, | |
"grad_norm": 2.5243875980377197, | |
"learning_rate": 8.731321890988223e-05, | |
"loss": 0.0976, | |
"step": 724 | |
}, | |
{ | |
"epoch": 1.362781954887218, | |
"grad_norm": 2.034864664077759, | |
"learning_rate": 8.723933945164553e-05, | |
"loss": 0.0867, | |
"step": 725 | |
}, | |
{ | |
"epoch": 1.3646616541353382, | |
"grad_norm": 2.596176862716675, | |
"learning_rate": 8.716528958175354e-05, | |
"loss": 0.1035, | |
"step": 726 | |
}, | |
{ | |
"epoch": 1.3665413533834587, | |
"grad_norm": 2.702852487564087, | |
"learning_rate": 8.709106981028301e-05, | |
"loss": 0.1341, | |
"step": 727 | |
}, | |
{ | |
"epoch": 1.368421052631579, | |
"grad_norm": 3.1285736560821533, | |
"learning_rate": 8.701668064848096e-05, | |
"loss": 0.1771, | |
"step": 728 | |
}, | |
{ | |
"epoch": 1.3703007518796992, | |
"grad_norm": 3.2308318614959717, | |
"learning_rate": 8.694212260876125e-05, | |
"loss": 0.1118, | |
"step": 729 | |
}, | |
{ | |
"epoch": 1.3721804511278195, | |
"grad_norm": 3.1482093334198, | |
"learning_rate": 8.686739620470099e-05, | |
"loss": 0.1495, | |
"step": 730 | |
}, | |
{ | |
"epoch": 1.3740601503759398, | |
"grad_norm": 2.186100721359253, | |
"learning_rate": 8.679250195103707e-05, | |
"loss": 0.0727, | |
"step": 731 | |
}, | |
{ | |
"epoch": 1.3759398496240602, | |
"grad_norm": 2.4259281158447266, | |
"learning_rate": 8.671744036366256e-05, | |
"loss": 0.0749, | |
"step": 732 | |
}, | |
{ | |
"epoch": 1.3778195488721805, | |
"grad_norm": 2.370572566986084, | |
"learning_rate": 8.664221195962318e-05, | |
"loss": 0.0647, | |
"step": 733 | |
}, | |
{ | |
"epoch": 1.3796992481203008, | |
"grad_norm": 2.1155412197113037, | |
"learning_rate": 8.656681725711369e-05, | |
"loss": 0.0729, | |
"step": 734 | |
}, | |
{ | |
"epoch": 1.381578947368421, | |
"grad_norm": 2.4955532550811768, | |
"learning_rate": 8.649125677547441e-05, | |
"loss": 0.0743, | |
"step": 735 | |
}, | |
{ | |
"epoch": 1.3834586466165413, | |
"grad_norm": 3.299919605255127, | |
"learning_rate": 8.641553103518759e-05, | |
"loss": 0.1728, | |
"step": 736 | |
}, | |
{ | |
"epoch": 1.3853383458646618, | |
"grad_norm": 3.5957415103912354, | |
"learning_rate": 8.633964055787381e-05, | |
"loss": 0.2017, | |
"step": 737 | |
}, | |
{ | |
"epoch": 1.387218045112782, | |
"grad_norm": 3.4752085208892822, | |
"learning_rate": 8.62635858662884e-05, | |
"loss": 0.2164, | |
"step": 738 | |
}, | |
{ | |
"epoch": 1.3890977443609023, | |
"grad_norm": 2.705174207687378, | |
"learning_rate": 8.618736748431786e-05, | |
"loss": 0.1107, | |
"step": 739 | |
}, | |
{ | |
"epoch": 1.3909774436090225, | |
"grad_norm": 1.9250751733779907, | |
"learning_rate": 8.611098593697624e-05, | |
"loss": 0.0747, | |
"step": 740 | |
}, | |
{ | |
"epoch": 1.3928571428571428, | |
"grad_norm": 3.185225248336792, | |
"learning_rate": 8.603444175040151e-05, | |
"loss": 0.1181, | |
"step": 741 | |
}, | |
{ | |
"epoch": 1.3947368421052633, | |
"grad_norm": 3.068506956100464, | |
"learning_rate": 8.595773545185196e-05, | |
"loss": 0.1958, | |
"step": 742 | |
}, | |
{ | |
"epoch": 1.3966165413533835, | |
"grad_norm": 4.703313827514648, | |
"learning_rate": 8.588086756970252e-05, | |
"loss": 0.3773, | |
"step": 743 | |
}, | |
{ | |
"epoch": 1.3984962406015038, | |
"grad_norm": 3.395601511001587, | |
"learning_rate": 8.580383863344118e-05, | |
"loss": 0.1545, | |
"step": 744 | |
}, | |
{ | |
"epoch": 1.400375939849624, | |
"grad_norm": 2.7103271484375, | |
"learning_rate": 8.572664917366534e-05, | |
"loss": 0.1117, | |
"step": 745 | |
}, | |
{ | |
"epoch": 1.4022556390977443, | |
"grad_norm": 1.5318002700805664, | |
"learning_rate": 8.564929972207808e-05, | |
"loss": 0.0535, | |
"step": 746 | |
}, | |
{ | |
"epoch": 1.4041353383458648, | |
"grad_norm": 3.2796318531036377, | |
"learning_rate": 8.557179081148459e-05, | |
"loss": 0.1782, | |
"step": 747 | |
}, | |
{ | |
"epoch": 1.4060150375939848, | |
"grad_norm": 4.089963436126709, | |
"learning_rate": 8.549412297578841e-05, | |
"loss": 0.2679, | |
"step": 748 | |
}, | |
{ | |
"epoch": 1.4078947368421053, | |
"grad_norm": 2.2563154697418213, | |
"learning_rate": 8.541629674998787e-05, | |
"loss": 0.063, | |
"step": 749 | |
}, | |
{ | |
"epoch": 1.4097744360902256, | |
"grad_norm": 4.269903182983398, | |
"learning_rate": 8.533831267017232e-05, | |
"loss": 0.2878, | |
"step": 750 | |
}, | |
{ | |
"epoch": 1.4097744360902256, | |
"eval_global_dataset_loss": 1.0132509469985962, | |
"eval_global_dataset_runtime": 72.6207, | |
"eval_global_dataset_samples_per_second": 13.385, | |
"eval_global_dataset_steps_per_second": 0.055, | |
"eval_sequential_score": 0.9202925060297567, | |
"eval_sts-test-1024_pearson_cosine": 0.9026730374123166, | |
"eval_sts-test-1024_spearman_cosine": 0.9203161346957712, | |
"eval_sts-test-1280_pearson_cosine": 0.9026700561671179, | |
"eval_sts-test-1280_spearman_cosine": 0.9203345274642032, | |
"eval_sts-test-512_pearson_cosine": 0.9028502216142966, | |
"eval_sts-test-512_spearman_cosine": 0.9202925060297567, | |
"eval_sts-test-760_pearson_cosine": 0.9010905085003336, | |
"eval_sts-test-760_spearman_cosine": 0.9196744016074186, | |
"eval_sts-test_pearson_cosine": 0.9026655939239919, | |
"eval_sts-test_spearman_cosine": 0.920319401537853, | |
"step": 750 | |
}, | |
{ | |
"epoch": 1.4116541353383458, | |
"grad_norm": 3.073023796081543, | |
"learning_rate": 8.526017127351838e-05, | |
"loss": 0.134, | |
"step": 751 | |
}, | |
{ | |
"epoch": 1.413533834586466, | |
"grad_norm": 2.0361175537109375, | |
"learning_rate": 8.518187309828641e-05, | |
"loss": 0.0676, | |
"step": 752 | |
}, | |
{ | |
"epoch": 1.4154135338345863, | |
"grad_norm": 1.954580545425415, | |
"learning_rate": 8.510341868381665e-05, | |
"loss": 0.0841, | |
"step": 753 | |
}, | |
{ | |
"epoch": 1.4172932330827068, | |
"grad_norm": 2.2092957496643066, | |
"learning_rate": 8.502480857052559e-05, | |
"loss": 0.1125, | |
"step": 754 | |
}, | |
{ | |
"epoch": 1.419172932330827, | |
"grad_norm": 2.514190673828125, | |
"learning_rate": 8.49460432999022e-05, | |
"loss": 0.0927, | |
"step": 755 | |
}, | |
{ | |
"epoch": 1.4210526315789473, | |
"grad_norm": 2.4373466968536377, | |
"learning_rate": 8.486712341450417e-05, | |
"loss": 0.0954, | |
"step": 756 | |
}, | |
{ | |
"epoch": 1.4229323308270676, | |
"grad_norm": 3.186216354370117, | |
"learning_rate": 8.478804945795435e-05, | |
"loss": 0.1168, | |
"step": 757 | |
}, | |
{ | |
"epoch": 1.4248120300751879, | |
"grad_norm": 2.6759350299835205, | |
"learning_rate": 8.470882197493675e-05, | |
"loss": 0.1191, | |
"step": 758 | |
}, | |
{ | |
"epoch": 1.4266917293233083, | |
"grad_norm": 3.011976718902588, | |
"learning_rate": 8.4629441511193e-05, | |
"loss": 0.1219, | |
"step": 759 | |
}, | |
{ | |
"epoch": 1.4285714285714286, | |
"grad_norm": 1.6346877813339233, | |
"learning_rate": 8.454990861351843e-05, | |
"loss": 0.0334, | |
"step": 760 | |
}, | |
{ | |
"epoch": 1.4304511278195489, | |
"grad_norm": 2.6164300441741943, | |
"learning_rate": 8.447022382975843e-05, | |
"loss": 0.0886, | |
"step": 761 | |
}, | |
{ | |
"epoch": 1.4323308270676691, | |
"grad_norm": 3.3143882751464844, | |
"learning_rate": 8.439038770880463e-05, | |
"loss": 0.1105, | |
"step": 762 | |
}, | |
{ | |
"epoch": 1.4342105263157894, | |
"grad_norm": 0.7730489373207092, | |
"learning_rate": 8.431040080059108e-05, | |
"loss": 0.0174, | |
"step": 763 | |
}, | |
{ | |
"epoch": 1.4360902255639099, | |
"grad_norm": 3.0229296684265137, | |
"learning_rate": 8.423026365609049e-05, | |
"loss": 0.1044, | |
"step": 764 | |
}, | |
{ | |
"epoch": 1.4379699248120301, | |
"grad_norm": 2.514319896697998, | |
"learning_rate": 8.41499768273105e-05, | |
"loss": 0.0946, | |
"step": 765 | |
}, | |
{ | |
"epoch": 1.4398496240601504, | |
"grad_norm": 3.089364528656006, | |
"learning_rate": 8.406954086728976e-05, | |
"loss": 0.1212, | |
"step": 766 | |
}, | |
{ | |
"epoch": 1.4417293233082706, | |
"grad_norm": 3.5201704502105713, | |
"learning_rate": 8.39889563300942e-05, | |
"loss": 0.15, | |
"step": 767 | |
}, | |
{ | |
"epoch": 1.443609022556391, | |
"grad_norm": 2.829073667526245, | |
"learning_rate": 8.39082237708132e-05, | |
"loss": 0.1287, | |
"step": 768 | |
}, | |
{ | |
"epoch": 1.4454887218045114, | |
"grad_norm": 4.4007954597473145, | |
"learning_rate": 8.382734374555574e-05, | |
"loss": 0.3371, | |
"step": 769 | |
}, | |
{ | |
"epoch": 1.4473684210526316, | |
"grad_norm": 3.454887866973877, | |
"learning_rate": 8.37463168114466e-05, | |
"loss": 0.1933, | |
"step": 770 | |
}, | |
{ | |
"epoch": 1.449248120300752, | |
"grad_norm": 1.4952677488327026, | |
"learning_rate": 8.366514352662252e-05, | |
"loss": 0.0267, | |
"step": 771 | |
}, | |
{ | |
"epoch": 1.4511278195488722, | |
"grad_norm": 2.9622116088867188, | |
"learning_rate": 8.358382445022829e-05, | |
"loss": 0.118, | |
"step": 772 | |
}, | |
{ | |
"epoch": 1.4530075187969924, | |
"grad_norm": 2.047785758972168, | |
"learning_rate": 8.350236014241304e-05, | |
"loss": 0.048, | |
"step": 773 | |
}, | |
{ | |
"epoch": 1.454887218045113, | |
"grad_norm": 3.2718257904052734, | |
"learning_rate": 8.342075116432625e-05, | |
"loss": 0.1522, | |
"step": 774 | |
}, | |
{ | |
"epoch": 1.4567669172932332, | |
"grad_norm": 4.418330192565918, | |
"learning_rate": 8.333899807811391e-05, | |
"loss": 0.24, | |
"step": 775 | |
}, | |
{ | |
"epoch": 1.4586466165413534, | |
"grad_norm": 3.0729095935821533, | |
"learning_rate": 8.325710144691472e-05, | |
"loss": 0.1502, | |
"step": 776 | |
}, | |
{ | |
"epoch": 1.4605263157894737, | |
"grad_norm": 2.873767614364624, | |
"learning_rate": 8.31750618348561e-05, | |
"loss": 0.1437, | |
"step": 777 | |
}, | |
{ | |
"epoch": 1.462406015037594, | |
"grad_norm": 3.250378131866455, | |
"learning_rate": 8.309287980705043e-05, | |
"loss": 0.1875, | |
"step": 778 | |
}, | |
{ | |
"epoch": 1.4642857142857144, | |
"grad_norm": 3.683256149291992, | |
"learning_rate": 8.301055592959101e-05, | |
"loss": 0.2667, | |
"step": 779 | |
}, | |
{ | |
"epoch": 1.4661654135338344, | |
"grad_norm": 2.6825520992279053, | |
"learning_rate": 8.292809076954832e-05, | |
"loss": 0.1094, | |
"step": 780 | |
}, | |
{ | |
"epoch": 1.468045112781955, | |
"grad_norm": 3.586061477661133, | |
"learning_rate": 8.284548489496599e-05, | |
"loss": 0.2173, | |
"step": 781 | |
}, | |
{ | |
"epoch": 1.4699248120300752, | |
"grad_norm": 2.057910919189453, | |
"learning_rate": 8.276273887485693e-05, | |
"loss": 0.0855, | |
"step": 782 | |
}, | |
{ | |
"epoch": 1.4718045112781954, | |
"grad_norm": 2.88437557220459, | |
"learning_rate": 8.267985327919943e-05, | |
"loss": 0.1287, | |
"step": 783 | |
}, | |
{ | |
"epoch": 1.4736842105263157, | |
"grad_norm": 2.481588125228882, | |
"learning_rate": 8.259682867893322e-05, | |
"loss": 0.1129, | |
"step": 784 | |
}, | |
{ | |
"epoch": 1.475563909774436, | |
"grad_norm": 2.7012627124786377, | |
"learning_rate": 8.251366564595551e-05, | |
"loss": 0.0758, | |
"step": 785 | |
}, | |
{ | |
"epoch": 1.4774436090225564, | |
"grad_norm": 3.422213554382324, | |
"learning_rate": 8.24303647531171e-05, | |
"loss": 0.2099, | |
"step": 786 | |
}, | |
{ | |
"epoch": 1.4793233082706767, | |
"grad_norm": 4.288575649261475, | |
"learning_rate": 8.234692657421838e-05, | |
"loss": 0.2751, | |
"step": 787 | |
}, | |
{ | |
"epoch": 1.481203007518797, | |
"grad_norm": 3.3151443004608154, | |
"learning_rate": 8.226335168400547e-05, | |
"loss": 0.1354, | |
"step": 788 | |
}, | |
{ | |
"epoch": 1.4830827067669172, | |
"grad_norm": 1.8601645231246948, | |
"learning_rate": 8.217964065816609e-05, | |
"loss": 0.058, | |
"step": 789 | |
}, | |
{ | |
"epoch": 1.4849624060150375, | |
"grad_norm": 2.664788246154785, | |
"learning_rate": 8.209579407332578e-05, | |
"loss": 0.1121, | |
"step": 790 | |
}, | |
{ | |
"epoch": 1.486842105263158, | |
"grad_norm": 3.37274432182312, | |
"learning_rate": 8.201181250704382e-05, | |
"loss": 0.2309, | |
"step": 791 | |
}, | |
{ | |
"epoch": 1.4887218045112782, | |
"grad_norm": 2.6739940643310547, | |
"learning_rate": 8.192769653780931e-05, | |
"loss": 0.1206, | |
"step": 792 | |
}, | |
{ | |
"epoch": 1.4906015037593985, | |
"grad_norm": 2.947044849395752, | |
"learning_rate": 8.184344674503716e-05, | |
"loss": 0.1848, | |
"step": 793 | |
}, | |
{ | |
"epoch": 1.4924812030075187, | |
"grad_norm": 1.8714139461517334, | |
"learning_rate": 8.175906370906401e-05, | |
"loss": 0.1164, | |
"step": 794 | |
}, | |
{ | |
"epoch": 1.494360902255639, | |
"grad_norm": 2.4230494499206543, | |
"learning_rate": 8.167454801114442e-05, | |
"loss": 0.1058, | |
"step": 795 | |
}, | |
{ | |
"epoch": 1.4962406015037595, | |
"grad_norm": 2.2121522426605225, | |
"learning_rate": 8.15899002334467e-05, | |
"loss": 0.0676, | |
"step": 796 | |
}, | |
{ | |
"epoch": 1.4981203007518797, | |
"grad_norm": 2.338106632232666, | |
"learning_rate": 8.150512095904901e-05, | |
"loss": 0.0922, | |
"step": 797 | |
}, | |
{ | |
"epoch": 1.5, | |
"grad_norm": 2.665654420852661, | |
"learning_rate": 8.142021077193524e-05, | |
"loss": 0.1241, | |
"step": 798 | |
}, | |
{ | |
"epoch": 1.5018796992481203, | |
"grad_norm": 2.739638566970825, | |
"learning_rate": 8.13351702569911e-05, | |
"loss": 0.1232, | |
"step": 799 | |
}, | |
{ | |
"epoch": 1.5037593984962405, | |
"grad_norm": 2.2335495948791504, | |
"learning_rate": 8.125000000000001e-05, | |
"loss": 0.0644, | |
"step": 800 | |
}, | |
{ | |
"epoch": 1.5037593984962405, | |
"eval_global_dataset_loss": 1.0063103437423706, | |
"eval_global_dataset_runtime": 74.1382, | |
"eval_global_dataset_samples_per_second": 13.111, | |
"eval_global_dataset_steps_per_second": 0.054, | |
"eval_sequential_score": 0.9225955849460038, | |
"eval_sts-test-1024_pearson_cosine": 0.9052989812166632, | |
"eval_sts-test-1024_spearman_cosine": 0.9221961799380853, | |
"eval_sts-test-1280_pearson_cosine": 0.9052941365370162, | |
"eval_sts-test-1280_spearman_cosine": 0.9222044589214429, | |
"eval_sts-test-512_pearson_cosine": 0.9048916374185606, | |
"eval_sts-test-512_spearman_cosine": 0.9225955849460038, | |
"eval_sts-test-760_pearson_cosine": 0.9038999523705149, | |
"eval_sts-test-760_spearman_cosine": 0.9219881313238769, | |
"eval_sts-test_pearson_cosine": 0.9052886258574522, | |
"eval_sts-test_spearman_cosine": 0.922200610312963, | |
"step": 800 | |
}, | |
{ | |
"epoch": 1.505639097744361, | |
"grad_norm": 3.764857769012451, | |
"learning_rate": 8.116470058763909e-05, | |
"loss": 0.2569, | |
"step": 801 | |
}, | |
{ | |
"epoch": 1.5075187969924813, | |
"grad_norm": 0.6914541125297546, | |
"learning_rate": 8.10792726074751e-05, | |
"loss": 0.0157, | |
"step": 802 | |
}, | |
{ | |
"epoch": 1.5093984962406015, | |
"grad_norm": 2.945553779602051, | |
"learning_rate": 8.099371664796047e-05, | |
"loss": 0.1063, | |
"step": 803 | |
}, | |
{ | |
"epoch": 1.5112781954887218, | |
"grad_norm": 2.2534825801849365, | |
"learning_rate": 8.090803329842914e-05, | |
"loss": 0.1023, | |
"step": 804 | |
}, | |
{ | |
"epoch": 1.513157894736842, | |
"grad_norm": 3.933741331100464, | |
"learning_rate": 8.082222314909253e-05, | |
"loss": 0.2308, | |
"step": 805 | |
}, | |
{ | |
"epoch": 1.5150375939849625, | |
"grad_norm": 3.690402030944824, | |
"learning_rate": 8.073628679103555e-05, | |
"loss": 0.2341, | |
"step": 806 | |
}, | |
{ | |
"epoch": 1.5169172932330826, | |
"grad_norm": 3.517522096633911, | |
"learning_rate": 8.065022481621248e-05, | |
"loss": 0.2281, | |
"step": 807 | |
}, | |
{ | |
"epoch": 1.518796992481203, | |
"grad_norm": 2.9169869422912598, | |
"learning_rate": 8.056403781744278e-05, | |
"loss": 0.1646, | |
"step": 808 | |
}, | |
{ | |
"epoch": 1.5206766917293233, | |
"grad_norm": 2.823941230773926, | |
"learning_rate": 8.047772638840721e-05, | |
"loss": 0.1352, | |
"step": 809 | |
}, | |
{ | |
"epoch": 1.5225563909774436, | |
"grad_norm": 2.6774966716766357, | |
"learning_rate": 8.039129112364363e-05, | |
"loss": 0.0896, | |
"step": 810 | |
}, | |
{ | |
"epoch": 1.524436090225564, | |
"grad_norm": 3.2963550090789795, | |
"learning_rate": 8.030473261854288e-05, | |
"loss": 0.173, | |
"step": 811 | |
}, | |
{ | |
"epoch": 1.526315789473684, | |
"grad_norm": 4.204916954040527, | |
"learning_rate": 8.021805146934473e-05, | |
"loss": 0.2946, | |
"step": 812 | |
}, | |
{ | |
"epoch": 1.5281954887218046, | |
"grad_norm": 1.9068849086761475, | |
"learning_rate": 8.013124827313379e-05, | |
"loss": 0.0928, | |
"step": 813 | |
}, | |
{ | |
"epoch": 1.5300751879699248, | |
"grad_norm": 1.860642433166504, | |
"learning_rate": 8.00443236278353e-05, | |
"loss": 0.1008, | |
"step": 814 | |
}, | |
{ | |
"epoch": 1.531954887218045, | |
"grad_norm": 3.6413190364837646, | |
"learning_rate": 7.995727813221113e-05, | |
"loss": 0.2301, | |
"step": 815 | |
}, | |
{ | |
"epoch": 1.5338345864661656, | |
"grad_norm": 1.9489543437957764, | |
"learning_rate": 7.987011238585555e-05, | |
"loss": 0.0953, | |
"step": 816 | |
}, | |
{ | |
"epoch": 1.5357142857142856, | |
"grad_norm": 2.9605321884155273, | |
"learning_rate": 7.978282698919125e-05, | |
"loss": 0.1275, | |
"step": 817 | |
}, | |
{ | |
"epoch": 1.537593984962406, | |
"grad_norm": 3.240093469619751, | |
"learning_rate": 7.969542254346498e-05, | |
"loss": 0.167, | |
"step": 818 | |
}, | |
{ | |
"epoch": 1.5394736842105263, | |
"grad_norm": 3.018465518951416, | |
"learning_rate": 7.960789965074362e-05, | |
"loss": 0.1617, | |
"step": 819 | |
}, | |
{ | |
"epoch": 1.5413533834586466, | |
"grad_norm": 2.4230682849884033, | |
"learning_rate": 7.952025891390991e-05, | |
"loss": 0.1428, | |
"step": 820 | |
}, | |
{ | |
"epoch": 1.543233082706767, | |
"grad_norm": 3.2175021171569824, | |
"learning_rate": 7.943250093665835e-05, | |
"loss": 0.1515, | |
"step": 821 | |
}, | |
{ | |
"epoch": 1.545112781954887, | |
"grad_norm": 2.267045259475708, | |
"learning_rate": 7.934462632349103e-05, | |
"loss": 0.0876, | |
"step": 822 | |
}, | |
{ | |
"epoch": 1.5469924812030076, | |
"grad_norm": 3.216639995574951, | |
"learning_rate": 7.925663567971345e-05, | |
"loss": 0.1386, | |
"step": 823 | |
}, | |
{ | |
"epoch": 1.5488721804511278, | |
"grad_norm": 4.30193567276001, | |
"learning_rate": 7.916852961143035e-05, | |
"loss": 0.35, | |
"step": 824 | |
}, | |
{ | |
"epoch": 1.550751879699248, | |
"grad_norm": 3.556504964828491, | |
"learning_rate": 7.908030872554155e-05, | |
"loss": 0.1543, | |
"step": 825 | |
}, | |
{ | |
"epoch": 1.5526315789473686, | |
"grad_norm": 3.369008779525757, | |
"learning_rate": 7.899197362973782e-05, | |
"loss": 0.1913, | |
"step": 826 | |
}, | |
{ | |
"epoch": 1.5545112781954886, | |
"grad_norm": 2.986630916595459, | |
"learning_rate": 7.890352493249656e-05, | |
"loss": 0.1459, | |
"step": 827 | |
}, | |
{ | |
"epoch": 1.556390977443609, | |
"grad_norm": 2.2407569885253906, | |
"learning_rate": 7.881496324307776e-05, | |
"loss": 0.0726, | |
"step": 828 | |
}, | |
{ | |
"epoch": 1.5582706766917294, | |
"grad_norm": 2.5670650005340576, | |
"learning_rate": 7.872628917151967e-05, | |
"loss": 0.1532, | |
"step": 829 | |
}, | |
{ | |
"epoch": 1.5601503759398496, | |
"grad_norm": 3.387564182281494, | |
"learning_rate": 7.863750332863468e-05, | |
"loss": 0.2685, | |
"step": 830 | |
}, | |
{ | |
"epoch": 1.5620300751879699, | |
"grad_norm": 2.2817680835723877, | |
"learning_rate": 7.854860632600517e-05, | |
"loss": 0.0786, | |
"step": 831 | |
}, | |
{ | |
"epoch": 1.5639097744360901, | |
"grad_norm": 2.6481478214263916, | |
"learning_rate": 7.845959877597908e-05, | |
"loss": 0.1155, | |
"step": 832 | |
}, | |
{ | |
"epoch": 1.5657894736842106, | |
"grad_norm": 2.793203592300415, | |
"learning_rate": 7.837048129166594e-05, | |
"loss": 0.0849, | |
"step": 833 | |
}, | |
{ | |
"epoch": 1.5676691729323309, | |
"grad_norm": 1.97296142578125, | |
"learning_rate": 7.82812544869325e-05, | |
"loss": 0.0519, | |
"step": 834 | |
}, | |
{ | |
"epoch": 1.5695488721804511, | |
"grad_norm": 3.4556632041931152, | |
"learning_rate": 7.819191897639854e-05, | |
"loss": 0.1754, | |
"step": 835 | |
}, | |
{ | |
"epoch": 1.5714285714285714, | |
"grad_norm": 3.4186487197875977, | |
"learning_rate": 7.810247537543265e-05, | |
"loss": 0.1959, | |
"step": 836 | |
}, | |
{ | |
"epoch": 1.5733082706766917, | |
"grad_norm": 3.430490255355835, | |
"learning_rate": 7.801292430014797e-05, | |
"loss": 0.1705, | |
"step": 837 | |
}, | |
{ | |
"epoch": 1.5751879699248121, | |
"grad_norm": 3.6400420665740967, | |
"learning_rate": 7.792326636739795e-05, | |
"loss": 0.2029, | |
"step": 838 | |
}, | |
{ | |
"epoch": 1.5770676691729322, | |
"grad_norm": 2.497185707092285, | |
"learning_rate": 7.783350219477208e-05, | |
"loss": 0.0788, | |
"step": 839 | |
}, | |
{ | |
"epoch": 1.5789473684210527, | |
"grad_norm": 2.136199951171875, | |
"learning_rate": 7.774363240059174e-05, | |
"loss": 0.0554, | |
"step": 840 | |
}, | |
{ | |
"epoch": 1.580827067669173, | |
"grad_norm": 2.642343521118164, | |
"learning_rate": 7.765365760390577e-05, | |
"loss": 0.1014, | |
"step": 841 | |
}, | |
{ | |
"epoch": 1.5827067669172932, | |
"grad_norm": 3.2328081130981445, | |
"learning_rate": 7.756357842448636e-05, | |
"loss": 0.1713, | |
"step": 842 | |
}, | |
{ | |
"epoch": 1.5845864661654137, | |
"grad_norm": 2.859098434448242, | |
"learning_rate": 7.747339548282469e-05, | |
"loss": 0.163, | |
"step": 843 | |
}, | |
{ | |
"epoch": 1.5864661654135337, | |
"grad_norm": 1.999979019165039, | |
"learning_rate": 7.738310940012668e-05, | |
"loss": 0.0694, | |
"step": 844 | |
}, | |
{ | |
"epoch": 1.5883458646616542, | |
"grad_norm": 1.6762597560882568, | |
"learning_rate": 7.729272079830874e-05, | |
"loss": 0.0914, | |
"step": 845 | |
}, | |
{ | |
"epoch": 1.5902255639097744, | |
"grad_norm": 3.4642903804779053, | |
"learning_rate": 7.720223029999343e-05, | |
"loss": 0.2394, | |
"step": 846 | |
}, | |
{ | |
"epoch": 1.5921052631578947, | |
"grad_norm": 2.8646364212036133, | |
"learning_rate": 7.711163852850523e-05, | |
"loss": 0.1341, | |
"step": 847 | |
}, | |
{ | |
"epoch": 1.5939849624060152, | |
"grad_norm": 2.863694190979004, | |
"learning_rate": 7.702094610786621e-05, | |
"loss": 0.1077, | |
"step": 848 | |
}, | |
{ | |
"epoch": 1.5958646616541352, | |
"grad_norm": 3.843398094177246, | |
"learning_rate": 7.693015366279173e-05, | |
"loss": 0.2587, | |
"step": 849 | |
}, | |
{ | |
"epoch": 1.5977443609022557, | |
"grad_norm": 2.592925548553467, | |
"learning_rate": 7.683926181868613e-05, | |
"loss": 0.1046, | |
"step": 850 | |
}, | |
{ | |
"epoch": 1.5977443609022557, | |
"eval_global_dataset_loss": 0.9889132976531982, | |
"eval_global_dataset_runtime": 73.9652, | |
"eval_global_dataset_samples_per_second": 13.141, | |
"eval_global_dataset_steps_per_second": 0.054, | |
"eval_sequential_score": 0.920544992646527, | |
"eval_sts-test-1024_pearson_cosine": 0.9048893863916063, | |
"eval_sts-test-1024_spearman_cosine": 0.9210602139189262, | |
"eval_sts-test-1280_pearson_cosine": 0.9048860398282079, | |
"eval_sts-test-1280_spearman_cosine": 0.9210629437458709, | |
"eval_sts-test-512_pearson_cosine": 0.9043480001505791, | |
"eval_sts-test-512_spearman_cosine": 0.920544992646527, | |
"eval_sts-test-760_pearson_cosine": 0.903769561880325, | |
"eval_sts-test-760_spearman_cosine": 0.9211490899240494, | |
"eval_sts-test_pearson_cosine": 0.9048811346013073, | |
"eval_sts-test_spearman_cosine": 0.9210440139623024, | |
"step": 850 | |
}, | |
{ | |
"epoch": 1.599624060150376, | |
"grad_norm": 2.807727813720703, | |
"learning_rate": 7.67482712016385e-05, | |
"loss": 0.1489, | |
"step": 851 | |
}, | |
{ | |
"epoch": 1.6015037593984962, | |
"grad_norm": 2.7714085578918457, | |
"learning_rate": 7.665718243841826e-05, | |
"loss": 0.1306, | |
"step": 852 | |
}, | |
{ | |
"epoch": 1.6033834586466167, | |
"grad_norm": 2.3455610275268555, | |
"learning_rate": 7.656599615647089e-05, | |
"loss": 0.0815, | |
"step": 853 | |
}, | |
{ | |
"epoch": 1.6052631578947367, | |
"grad_norm": 2.8961308002471924, | |
"learning_rate": 7.647471298391362e-05, | |
"loss": 0.1799, | |
"step": 854 | |
}, | |
{ | |
"epoch": 1.6071428571428572, | |
"grad_norm": 2.836217164993286, | |
"learning_rate": 7.638333354953108e-05, | |
"loss": 0.1138, | |
"step": 855 | |
}, | |
{ | |
"epoch": 1.6090225563909775, | |
"grad_norm": 1.5786960124969482, | |
"learning_rate": 7.6291858482771e-05, | |
"loss": 0.042, | |
"step": 856 | |
}, | |
{ | |
"epoch": 1.6109022556390977, | |
"grad_norm": 1.3835277557373047, | |
"learning_rate": 7.620028841373983e-05, | |
"loss": 0.0387, | |
"step": 857 | |
}, | |
{ | |
"epoch": 1.6127819548872182, | |
"grad_norm": 2.7180635929107666, | |
"learning_rate": 7.61086239731984e-05, | |
"loss": 0.0929, | |
"step": 858 | |
}, | |
{ | |
"epoch": 1.6146616541353382, | |
"grad_norm": 1.880897045135498, | |
"learning_rate": 7.601686579255768e-05, | |
"loss": 0.0656, | |
"step": 859 | |
}, | |
{ | |
"epoch": 1.6165413533834587, | |
"grad_norm": 2.802232503890991, | |
"learning_rate": 7.592501450387425e-05, | |
"loss": 0.108, | |
"step": 860 | |
}, | |
{ | |
"epoch": 1.618421052631579, | |
"grad_norm": 3.5828518867492676, | |
"learning_rate": 7.583307073984613e-05, | |
"loss": 0.2153, | |
"step": 861 | |
}, | |
{ | |
"epoch": 1.6203007518796992, | |
"grad_norm": 3.0358164310455322, | |
"learning_rate": 7.574103513380826e-05, | |
"loss": 0.1542, | |
"step": 862 | |
}, | |
{ | |
"epoch": 1.6221804511278195, | |
"grad_norm": 3.075732946395874, | |
"learning_rate": 7.56489083197283e-05, | |
"loss": 0.1405, | |
"step": 863 | |
}, | |
{ | |
"epoch": 1.6240601503759398, | |
"grad_norm": 2.816676378250122, | |
"learning_rate": 7.555669093220208e-05, | |
"loss": 0.1154, | |
"step": 864 | |
}, | |
{ | |
"epoch": 1.6259398496240602, | |
"grad_norm": 2.9650542736053467, | |
"learning_rate": 7.546438360644942e-05, | |
"loss": 0.116, | |
"step": 865 | |
}, | |
{ | |
"epoch": 1.6278195488721805, | |
"grad_norm": 3.609433650970459, | |
"learning_rate": 7.537198697830958e-05, | |
"loss": 0.2381, | |
"step": 866 | |
}, | |
{ | |
"epoch": 1.6296992481203008, | |
"grad_norm": 4.428701877593994, | |
"learning_rate": 7.527950168423704e-05, | |
"loss": 0.2811, | |
"step": 867 | |
}, | |
{ | |
"epoch": 1.631578947368421, | |
"grad_norm": 2.2774205207824707, | |
"learning_rate": 7.518692836129693e-05, | |
"loss": 0.0911, | |
"step": 868 | |
}, | |
{ | |
"epoch": 1.6334586466165413, | |
"grad_norm": 2.677185535430908, | |
"learning_rate": 7.509426764716086e-05, | |
"loss": 0.1305, | |
"step": 869 | |
}, | |
{ | |
"epoch": 1.6353383458646618, | |
"grad_norm": 2.101545572280884, | |
"learning_rate": 7.500152018010233e-05, | |
"loss": 0.0658, | |
"step": 870 | |
}, | |
{ | |
"epoch": 1.6372180451127818, | |
"grad_norm": 3.0622670650482178, | |
"learning_rate": 7.49086865989925e-05, | |
"loss": 0.1623, | |
"step": 871 | |
}, | |
{ | |
"epoch": 1.6390977443609023, | |
"grad_norm": 1.6130925416946411, | |
"learning_rate": 7.481576754329564e-05, | |
"loss": 0.0453, | |
"step": 872 | |
}, | |
{ | |
"epoch": 1.6409774436090225, | |
"grad_norm": 2.1795146465301514, | |
"learning_rate": 7.472276365306481e-05, | |
"loss": 0.1334, | |
"step": 873 | |
}, | |
{ | |
"epoch": 1.6428571428571428, | |
"grad_norm": 2.377540349960327, | |
"learning_rate": 7.462967556893743e-05, | |
"loss": 0.0564, | |
"step": 874 | |
}, | |
{ | |
"epoch": 1.6447368421052633, | |
"grad_norm": 2.5608479976654053, | |
"learning_rate": 7.453650393213092e-05, | |
"loss": 0.1334, | |
"step": 875 | |
}, | |
{ | |
"epoch": 1.6466165413533833, | |
"grad_norm": 3.679516553878784, | |
"learning_rate": 7.444324938443817e-05, | |
"loss": 0.1552, | |
"step": 876 | |
}, | |
{ | |
"epoch": 1.6484962406015038, | |
"grad_norm": 2.912466049194336, | |
"learning_rate": 7.434991256822323e-05, | |
"loss": 0.1236, | |
"step": 877 | |
}, | |
{ | |
"epoch": 1.650375939849624, | |
"grad_norm": 3.505437135696411, | |
"learning_rate": 7.425649412641681e-05, | |
"loss": 0.175, | |
"step": 878 | |
}, | |
{ | |
"epoch": 1.6522556390977443, | |
"grad_norm": 4.615713119506836, | |
"learning_rate": 7.416299470251187e-05, | |
"loss": 0.2705, | |
"step": 879 | |
}, | |
{ | |
"epoch": 1.6541353383458648, | |
"grad_norm": 3.014660120010376, | |
"learning_rate": 7.406941494055927e-05, | |
"loss": 0.1821, | |
"step": 880 | |
}, | |
{ | |
"epoch": 1.6560150375939848, | |
"grad_norm": 2.3996410369873047, | |
"learning_rate": 7.397575548516317e-05, | |
"loss": 0.1217, | |
"step": 881 | |
}, | |
{ | |
"epoch": 1.6578947368421053, | |
"grad_norm": 0.7946998476982117, | |
"learning_rate": 7.388201698147674e-05, | |
"loss": 0.0191, | |
"step": 882 | |
}, | |
{ | |
"epoch": 1.6597744360902256, | |
"grad_norm": 3.1606884002685547, | |
"learning_rate": 7.378820007519763e-05, | |
"loss": 0.1562, | |
"step": 883 | |
}, | |
{ | |
"epoch": 1.6616541353383458, | |
"grad_norm": 2.4829394817352295, | |
"learning_rate": 7.369430541256354e-05, | |
"loss": 0.1229, | |
"step": 884 | |
}, | |
{ | |
"epoch": 1.6635338345864663, | |
"grad_norm": 4.674249649047852, | |
"learning_rate": 7.360033364034785e-05, | |
"loss": 0.4534, | |
"step": 885 | |
}, | |
{ | |
"epoch": 1.6654135338345863, | |
"grad_norm": 2.6263651847839355, | |
"learning_rate": 7.350628540585501e-05, | |
"loss": 0.107, | |
"step": 886 | |
}, | |
{ | |
"epoch": 1.6672932330827068, | |
"grad_norm": 1.9486063718795776, | |
"learning_rate": 7.341216135691617e-05, | |
"loss": 0.0396, | |
"step": 887 | |
}, | |
{ | |
"epoch": 1.669172932330827, | |
"grad_norm": 2.4839587211608887, | |
"learning_rate": 7.331796214188478e-05, | |
"loss": 0.0831, | |
"step": 888 | |
}, | |
{ | |
"epoch": 1.6710526315789473, | |
"grad_norm": 4.256960391998291, | |
"learning_rate": 7.322368840963198e-05, | |
"loss": 0.2312, | |
"step": 889 | |
}, | |
{ | |
"epoch": 1.6729323308270678, | |
"grad_norm": 2.9588348865509033, | |
"learning_rate": 7.312934080954225e-05, | |
"loss": 0.107, | |
"step": 890 | |
}, | |
{ | |
"epoch": 1.6748120300751879, | |
"grad_norm": 3.246208667755127, | |
"learning_rate": 7.303491999150887e-05, | |
"loss": 0.1322, | |
"step": 891 | |
}, | |
{ | |
"epoch": 1.6766917293233083, | |
"grad_norm": 3.030524969100952, | |
"learning_rate": 7.294042660592947e-05, | |
"loss": 0.1794, | |
"step": 892 | |
}, | |
{ | |
"epoch": 1.6785714285714286, | |
"grad_norm": 4.523505210876465, | |
"learning_rate": 7.284586130370156e-05, | |
"loss": 0.2517, | |
"step": 893 | |
}, | |
{ | |
"epoch": 1.6804511278195489, | |
"grad_norm": 3.097930431365967, | |
"learning_rate": 7.275122473621806e-05, | |
"loss": 0.1961, | |
"step": 894 | |
}, | |
{ | |
"epoch": 1.6823308270676691, | |
"grad_norm": 3.7234556674957275, | |
"learning_rate": 7.26565175553627e-05, | |
"loss": 0.2131, | |
"step": 895 | |
}, | |
{ | |
"epoch": 1.6842105263157894, | |
"grad_norm": 2.1135101318359375, | |
"learning_rate": 7.256174041350568e-05, | |
"loss": 0.127, | |
"step": 896 | |
}, | |
{ | |
"epoch": 1.6860902255639099, | |
"grad_norm": 1.9874850511550903, | |
"learning_rate": 7.24668939634991e-05, | |
"loss": 0.096, | |
"step": 897 | |
}, | |
{ | |
"epoch": 1.6879699248120301, | |
"grad_norm": 2.357801675796509, | |
"learning_rate": 7.237197885867248e-05, | |
"loss": 0.1038, | |
"step": 898 | |
}, | |
{ | |
"epoch": 1.6898496240601504, | |
"grad_norm": 3.280027389526367, | |
"learning_rate": 7.227699575282823e-05, | |
"loss": 0.1448, | |
"step": 899 | |
}, | |
{ | |
"epoch": 1.6917293233082706, | |
"grad_norm": 2.795931577682495, | |
"learning_rate": 7.218194530023718e-05, | |
"loss": 0.2087, | |
"step": 900 | |
}, | |
{ | |
"epoch": 1.6917293233082706, | |
"eval_global_dataset_loss": 1.026059627532959, | |
"eval_global_dataset_runtime": 75.3127, | |
"eval_global_dataset_samples_per_second": 12.906, | |
"eval_global_dataset_steps_per_second": 0.053, | |
"eval_sequential_score": 0.9175974508149826, | |
"eval_sts-test-1024_pearson_cosine": 0.9021183505960124, | |
"eval_sts-test-1024_spearman_cosine": 0.9182267878039297, | |
"eval_sts-test-1280_pearson_cosine": 0.9021185166658658, | |
"eval_sts-test-1280_spearman_cosine": 0.9182425402479394, | |
"eval_sts-test-512_pearson_cosine": 0.9007274650132412, | |
"eval_sts-test-512_spearman_cosine": 0.9175974508149826, | |
"eval_sts-test-760_pearson_cosine": 0.9007953604283914, | |
"eval_sts-test-760_spearman_cosine": 0.9180001226649821, | |
"eval_sts-test_pearson_cosine": 0.9021177440718349, | |
"eval_sts-test_spearman_cosine": 0.9182428982580305, | |
"step": 900 | |
}, | |
{ | |
"epoch": 1.693609022556391, | |
"grad_norm": 3.9899439811706543, | |
"learning_rate": 7.208682815563409e-05, | |
"loss": 0.1872, | |
"step": 901 | |
}, | |
{ | |
"epoch": 1.6954887218045114, | |
"grad_norm": 4.918177127838135, | |
"learning_rate": 7.199164497421308e-05, | |
"loss": 0.3849, | |
"step": 902 | |
}, | |
{ | |
"epoch": 1.6973684210526314, | |
"grad_norm": 2.930837869644165, | |
"learning_rate": 7.189639641162316e-05, | |
"loss": 0.1832, | |
"step": 903 | |
}, | |
{ | |
"epoch": 1.699248120300752, | |
"grad_norm": 3.5482215881347656, | |
"learning_rate": 7.180108312396373e-05, | |
"loss": 0.2174, | |
"step": 904 | |
}, | |
{ | |
"epoch": 1.7011278195488722, | |
"grad_norm": 2.3238465785980225, | |
"learning_rate": 7.170570576777997e-05, | |
"loss": 0.1082, | |
"step": 905 | |
}, | |
{ | |
"epoch": 1.7030075187969924, | |
"grad_norm": 2.338536024093628, | |
"learning_rate": 7.161026500005848e-05, | |
"loss": 0.1198, | |
"step": 906 | |
}, | |
{ | |
"epoch": 1.704887218045113, | |
"grad_norm": 3.4399287700653076, | |
"learning_rate": 7.151476147822254e-05, | |
"loss": 0.1817, | |
"step": 907 | |
}, | |
{ | |
"epoch": 1.706766917293233, | |
"grad_norm": 1.87295663356781, | |
"learning_rate": 7.141919586012781e-05, | |
"loss": 0.0582, | |
"step": 908 | |
}, | |
{ | |
"epoch": 1.7086466165413534, | |
"grad_norm": 2.7307817935943604, | |
"learning_rate": 7.13235688040576e-05, | |
"loss": 0.1096, | |
"step": 909 | |
}, | |
{ | |
"epoch": 1.7105263157894737, | |
"grad_norm": 2.521165132522583, | |
"learning_rate": 7.12278809687185e-05, | |
"loss": 0.0846, | |
"step": 910 | |
}, | |
{ | |
"epoch": 1.712406015037594, | |
"grad_norm": 2.777625799179077, | |
"learning_rate": 7.113213301323568e-05, | |
"loss": 0.1956, | |
"step": 911 | |
}, | |
{ | |
"epoch": 1.7142857142857144, | |
"grad_norm": 2.8448679447174072, | |
"learning_rate": 7.103632559714852e-05, | |
"loss": 0.1669, | |
"step": 912 | |
}, | |
{ | |
"epoch": 1.7161654135338344, | |
"grad_norm": 3.025099754333496, | |
"learning_rate": 7.09404593804059e-05, | |
"loss": 0.1405, | |
"step": 913 | |
}, | |
{ | |
"epoch": 1.718045112781955, | |
"grad_norm": 3.907864809036255, | |
"learning_rate": 7.08445350233618e-05, | |
"loss": 0.2375, | |
"step": 914 | |
}, | |
{ | |
"epoch": 1.7199248120300752, | |
"grad_norm": 1.3771640062332153, | |
"learning_rate": 7.074855318677065e-05, | |
"loss": 0.0416, | |
"step": 915 | |
}, | |
{ | |
"epoch": 1.7218045112781954, | |
"grad_norm": 2.4469826221466064, | |
"learning_rate": 7.065251453178285e-05, | |
"loss": 0.0965, | |
"step": 916 | |
}, | |
{ | |
"epoch": 1.723684210526316, | |
"grad_norm": 1.9343886375427246, | |
"learning_rate": 7.055641971994015e-05, | |
"loss": 0.1192, | |
"step": 917 | |
}, | |
{ | |
"epoch": 1.725563909774436, | |
"grad_norm": 2.995832920074463, | |
"learning_rate": 7.046026941317113e-05, | |
"loss": 0.1436, | |
"step": 918 | |
}, | |
{ | |
"epoch": 1.7274436090225564, | |
"grad_norm": 3.5150604248046875, | |
"learning_rate": 7.036406427378663e-05, | |
"loss": 0.2399, | |
"step": 919 | |
}, | |
{ | |
"epoch": 1.7293233082706767, | |
"grad_norm": 2.318694829940796, | |
"learning_rate": 7.026780496447524e-05, | |
"loss": 0.1052, | |
"step": 920 | |
}, | |
{ | |
"epoch": 1.731203007518797, | |
"grad_norm": 3.5060043334960938, | |
"learning_rate": 7.01714921482986e-05, | |
"loss": 0.2563, | |
"step": 921 | |
}, | |
{ | |
"epoch": 1.7330827067669174, | |
"grad_norm": 3.699139356613159, | |
"learning_rate": 7.007512648868702e-05, | |
"loss": 0.2326, | |
"step": 922 | |
}, | |
{ | |
"epoch": 1.7349624060150375, | |
"grad_norm": 2.5102176666259766, | |
"learning_rate": 6.997870864943472e-05, | |
"loss": 0.0925, | |
"step": 923 | |
}, | |
{ | |
"epoch": 1.736842105263158, | |
"grad_norm": 3.0189504623413086, | |
"learning_rate": 6.98822392946954e-05, | |
"loss": 0.1497, | |
"step": 924 | |
}, | |
{ | |
"epoch": 1.7387218045112782, | |
"grad_norm": 2.051042318344116, | |
"learning_rate": 6.978571908897764e-05, | |
"loss": 0.1182, | |
"step": 925 | |
}, | |
{ | |
"epoch": 1.7406015037593985, | |
"grad_norm": 2.7945802211761475, | |
"learning_rate": 6.968914869714019e-05, | |
"loss": 0.1262, | |
"step": 926 | |
}, | |
{ | |
"epoch": 1.7424812030075187, | |
"grad_norm": 2.681675434112549, | |
"learning_rate": 6.959252878438764e-05, | |
"loss": 0.1003, | |
"step": 927 | |
}, | |
{ | |
"epoch": 1.744360902255639, | |
"grad_norm": 2.2682015895843506, | |
"learning_rate": 6.949586001626557e-05, | |
"loss": 0.1105, | |
"step": 928 | |
}, | |
{ | |
"epoch": 1.7462406015037595, | |
"grad_norm": 2.556654930114746, | |
"learning_rate": 6.939914305865615e-05, | |
"loss": 0.1579, | |
"step": 929 | |
}, | |
{ | |
"epoch": 1.7481203007518797, | |
"grad_norm": 1.8412240743637085, | |
"learning_rate": 6.930237857777351e-05, | |
"loss": 0.087, | |
"step": 930 | |
}, | |
{ | |
"epoch": 1.75, | |
"grad_norm": 4.481410503387451, | |
"learning_rate": 6.920556724015907e-05, | |
"loss": 0.3344, | |
"step": 931 | |
}, | |
{ | |
"epoch": 1.7518796992481203, | |
"grad_norm": 2.281207323074341, | |
"learning_rate": 6.910870971267707e-05, | |
"loss": 0.1117, | |
"step": 932 | |
}, | |
{ | |
"epoch": 1.7537593984962405, | |
"grad_norm": 1.5092271566390991, | |
"learning_rate": 6.901180666250991e-05, | |
"loss": 0.0418, | |
"step": 933 | |
}, | |
{ | |
"epoch": 1.755639097744361, | |
"grad_norm": 2.3904976844787598, | |
"learning_rate": 6.891485875715351e-05, | |
"loss": 0.1536, | |
"step": 934 | |
}, | |
{ | |
"epoch": 1.7575187969924813, | |
"grad_norm": 2.7644925117492676, | |
"learning_rate": 6.881786666441283e-05, | |
"loss": 0.1441, | |
"step": 935 | |
}, | |
{ | |
"epoch": 1.7593984962406015, | |
"grad_norm": 3.0127475261688232, | |
"learning_rate": 6.872083105239719e-05, | |
"loss": 0.2039, | |
"step": 936 | |
}, | |
{ | |
"epoch": 1.7612781954887218, | |
"grad_norm": 3.1405107975006104, | |
"learning_rate": 6.86237525895157e-05, | |
"loss": 0.19, | |
"step": 937 | |
}, | |
{ | |
"epoch": 1.763157894736842, | |
"grad_norm": 2.284958839416504, | |
"learning_rate": 6.852663194447255e-05, | |
"loss": 0.1295, | |
"step": 938 | |
}, | |
{ | |
"epoch": 1.7650375939849625, | |
"grad_norm": 2.2690012454986572, | |
"learning_rate": 6.842946978626262e-05, | |
"loss": 0.0694, | |
"step": 939 | |
}, | |
{ | |
"epoch": 1.7669172932330826, | |
"grad_norm": 2.514007568359375, | |
"learning_rate": 6.833226678416663e-05, | |
"loss": 0.133, | |
"step": 940 | |
}, | |
{ | |
"epoch": 1.768796992481203, | |
"grad_norm": 2.3104093074798584, | |
"learning_rate": 6.823502360774673e-05, | |
"loss": 0.0779, | |
"step": 941 | |
}, | |
{ | |
"epoch": 1.7706766917293233, | |
"grad_norm": 1.9387654066085815, | |
"learning_rate": 6.813774092684176e-05, | |
"loss": 0.0648, | |
"step": 942 | |
}, | |
{ | |
"epoch": 1.7725563909774436, | |
"grad_norm": 2.588404893875122, | |
"learning_rate": 6.80404194115627e-05, | |
"loss": 0.1141, | |
"step": 943 | |
}, | |
{ | |
"epoch": 1.774436090225564, | |
"grad_norm": 2.852778673171997, | |
"learning_rate": 6.794305973228799e-05, | |
"loss": 0.1103, | |
"step": 944 | |
}, | |
{ | |
"epoch": 1.776315789473684, | |
"grad_norm": 3.120905876159668, | |
"learning_rate": 6.7845662559659e-05, | |
"loss": 0.1673, | |
"step": 945 | |
}, | |
{ | |
"epoch": 1.7781954887218046, | |
"grad_norm": 2.5651848316192627, | |
"learning_rate": 6.774822856457536e-05, | |
"loss": 0.1971, | |
"step": 946 | |
}, | |
{ | |
"epoch": 1.7800751879699248, | |
"grad_norm": 2.2139346599578857, | |
"learning_rate": 6.76507584181903e-05, | |
"loss": 0.073, | |
"step": 947 | |
}, | |
{ | |
"epoch": 1.781954887218045, | |
"grad_norm": 3.4515695571899414, | |
"learning_rate": 6.755325279190616e-05, | |
"loss": 0.1718, | |
"step": 948 | |
}, | |
{ | |
"epoch": 1.7838345864661656, | |
"grad_norm": 3.680602550506592, | |
"learning_rate": 6.745571235736955e-05, | |
"loss": 0.2008, | |
"step": 949 | |
}, | |
{ | |
"epoch": 1.7857142857142856, | |
"grad_norm": 2.1303181648254395, | |
"learning_rate": 6.735813778646698e-05, | |
"loss": 0.0967, | |
"step": 950 | |
}, | |
{ | |
"epoch": 1.7857142857142856, | |
"eval_global_dataset_loss": 1.0005853176116943, | |
"eval_global_dataset_runtime": 75.2623, | |
"eval_global_dataset_samples_per_second": 12.915, | |
"eval_global_dataset_steps_per_second": 0.053, | |
"eval_sequential_score": 0.9174314683864816, | |
"eval_sts-test-1024_pearson_cosine": 0.9016950025477838, | |
"eval_sts-test-1024_spearman_cosine": 0.9180962931257123, | |
"eval_sts-test-1280_pearson_cosine": 0.9016927110616119, | |
"eval_sts-test-1280_spearman_cosine": 0.9181243969178661, | |
"eval_sts-test-512_pearson_cosine": 0.8999063058043584, | |
"eval_sts-test-512_spearman_cosine": 0.9174314683864816, | |
"eval_sts-test-760_pearson_cosine": 0.9003691295599635, | |
"eval_sts-test-760_spearman_cosine": 0.917645066157103, | |
"eval_sts-test_pearson_cosine": 0.9016887954419438, | |
"eval_sts-test_spearman_cosine": 0.9180766920732231, | |
"step": 950 | |
}, | |
{ | |
"epoch": 1.787593984962406, | |
"grad_norm": 3.379387617111206, | |
"learning_rate": 6.726052975131999e-05, | |
"loss": 0.1966, | |
"step": 951 | |
}, | |
{ | |
"epoch": 1.7894736842105263, | |
"grad_norm": 3.0120327472686768, | |
"learning_rate": 6.71628889242807e-05, | |
"loss": 0.122, | |
"step": 952 | |
}, | |
{ | |
"epoch": 1.7913533834586466, | |
"grad_norm": 2.767864465713501, | |
"learning_rate": 6.70652159779271e-05, | |
"loss": 0.1842, | |
"step": 953 | |
}, | |
{ | |
"epoch": 1.793233082706767, | |
"grad_norm": 2.6627755165100098, | |
"learning_rate": 6.696751158505838e-05, | |
"loss": 0.1725, | |
"step": 954 | |
}, | |
{ | |
"epoch": 1.795112781954887, | |
"grad_norm": 2.2890477180480957, | |
"learning_rate": 6.686977641869045e-05, | |
"loss": 0.0995, | |
"step": 955 | |
}, | |
{ | |
"epoch": 1.7969924812030076, | |
"grad_norm": 3.1283280849456787, | |
"learning_rate": 6.677201115205105e-05, | |
"loss": 0.1372, | |
"step": 956 | |
}, | |
{ | |
"epoch": 1.7988721804511278, | |
"grad_norm": 3.510864496231079, | |
"learning_rate": 6.66742164585754e-05, | |
"loss": 0.2178, | |
"step": 957 | |
}, | |
{ | |
"epoch": 1.800751879699248, | |
"grad_norm": 1.897053599357605, | |
"learning_rate": 6.65763930119013e-05, | |
"loss": 0.0658, | |
"step": 958 | |
}, | |
{ | |
"epoch": 1.8026315789473686, | |
"grad_norm": 3.1732497215270996, | |
"learning_rate": 6.647854148586467e-05, | |
"loss": 0.1593, | |
"step": 959 | |
}, | |
{ | |
"epoch": 1.8045112781954886, | |
"grad_norm": 3.1102993488311768, | |
"learning_rate": 6.638066255449491e-05, | |
"loss": 0.2025, | |
"step": 960 | |
}, | |
{ | |
"epoch": 1.806390977443609, | |
"grad_norm": 3.147205114364624, | |
"learning_rate": 6.628275689201007e-05, | |
"loss": 0.2018, | |
"step": 961 | |
}, | |
{ | |
"epoch": 1.8082706766917294, | |
"grad_norm": 2.6337480545043945, | |
"learning_rate": 6.61848251728124e-05, | |
"loss": 0.1033, | |
"step": 962 | |
}, | |
{ | |
"epoch": 1.8101503759398496, | |
"grad_norm": 3.233461380004883, | |
"learning_rate": 6.608686807148365e-05, | |
"loss": 0.1816, | |
"step": 963 | |
}, | |
{ | |
"epoch": 1.8120300751879699, | |
"grad_norm": 3.0871407985687256, | |
"learning_rate": 6.598888626278039e-05, | |
"loss": 0.1354, | |
"step": 964 | |
}, | |
{ | |
"epoch": 1.8139097744360901, | |
"grad_norm": 3.1812994480133057, | |
"learning_rate": 6.589088042162934e-05, | |
"loss": 0.1228, | |
"step": 965 | |
}, | |
{ | |
"epoch": 1.8157894736842106, | |
"grad_norm": 3.910372257232666, | |
"learning_rate": 6.579285122312284e-05, | |
"loss": 0.2353, | |
"step": 966 | |
}, | |
{ | |
"epoch": 1.8176691729323309, | |
"grad_norm": 3.3136324882507324, | |
"learning_rate": 6.569479934251408e-05, | |
"loss": 0.191, | |
"step": 967 | |
}, | |
{ | |
"epoch": 1.8195488721804511, | |
"grad_norm": 2.245729446411133, | |
"learning_rate": 6.559672545521248e-05, | |
"loss": 0.142, | |
"step": 968 | |
}, | |
{ | |
"epoch": 1.8214285714285714, | |
"grad_norm": 1.959720253944397, | |
"learning_rate": 6.549863023677905e-05, | |
"loss": 0.0692, | |
"step": 969 | |
}, | |
{ | |
"epoch": 1.8233082706766917, | |
"grad_norm": 3.1250393390655518, | |
"learning_rate": 6.540051436292178e-05, | |
"loss": 0.1626, | |
"step": 970 | |
}, | |
{ | |
"epoch": 1.8251879699248121, | |
"grad_norm": 4.049576282501221, | |
"learning_rate": 6.530237850949091e-05, | |
"loss": 0.3098, | |
"step": 971 | |
}, | |
{ | |
"epoch": 1.8270676691729322, | |
"grad_norm": 3.1379828453063965, | |
"learning_rate": 6.520422335247429e-05, | |
"loss": 0.165, | |
"step": 972 | |
}, | |
{ | |
"epoch": 1.8289473684210527, | |
"grad_norm": 2.626603841781616, | |
"learning_rate": 6.510604956799276e-05, | |
"loss": 0.1167, | |
"step": 973 | |
}, | |
{ | |
"epoch": 1.830827067669173, | |
"grad_norm": 2.630916118621826, | |
"learning_rate": 6.500785783229543e-05, | |
"loss": 0.1235, | |
"step": 974 | |
}, | |
{ | |
"epoch": 1.8327067669172932, | |
"grad_norm": 2.5859103202819824, | |
"learning_rate": 6.490964882175514e-05, | |
"loss": 0.1263, | |
"step": 975 | |
}, | |
{ | |
"epoch": 1.8345864661654137, | |
"grad_norm": 2.5505852699279785, | |
"learning_rate": 6.481142321286367e-05, | |
"loss": 0.0938, | |
"step": 976 | |
}, | |
{ | |
"epoch": 1.8364661654135337, | |
"grad_norm": 2.5898983478546143, | |
"learning_rate": 6.471318168222711e-05, | |
"loss": 0.1854, | |
"step": 977 | |
}, | |
{ | |
"epoch": 1.8383458646616542, | |
"grad_norm": 3.1198155879974365, | |
"learning_rate": 6.461492490656131e-05, | |
"loss": 0.188, | |
"step": 978 | |
}, | |
{ | |
"epoch": 1.8402255639097744, | |
"grad_norm": 2.085031509399414, | |
"learning_rate": 6.451665356268706e-05, | |
"loss": 0.0944, | |
"step": 979 | |
}, | |
{ | |
"epoch": 1.8421052631578947, | |
"grad_norm": 3.46240234375, | |
"learning_rate": 6.441836832752551e-05, | |
"loss": 0.215, | |
"step": 980 | |
}, | |
{ | |
"epoch": 1.8439849624060152, | |
"grad_norm": 3.1626245975494385, | |
"learning_rate": 6.432006987809351e-05, | |
"loss": 0.1419, | |
"step": 981 | |
}, | |
{ | |
"epoch": 1.8458646616541352, | |
"grad_norm": 1.4989360570907593, | |
"learning_rate": 6.422175889149895e-05, | |
"loss": 0.0393, | |
"step": 982 | |
}, | |
{ | |
"epoch": 1.8477443609022557, | |
"grad_norm": 1.7400661706924438, | |
"learning_rate": 6.412343604493605e-05, | |
"loss": 0.1053, | |
"step": 983 | |
}, | |
{ | |
"epoch": 1.849624060150376, | |
"grad_norm": 3.4047110080718994, | |
"learning_rate": 6.402510201568073e-05, | |
"loss": 0.2591, | |
"step": 984 | |
}, | |
{ | |
"epoch": 1.8515037593984962, | |
"grad_norm": 3.1428349018096924, | |
"learning_rate": 6.392675748108595e-05, | |
"loss": 0.2317, | |
"step": 985 | |
}, | |
{ | |
"epoch": 1.8533834586466167, | |
"grad_norm": 2.285388946533203, | |
"learning_rate": 6.382840311857703e-05, | |
"loss": 0.0773, | |
"step": 986 | |
}, | |
{ | |
"epoch": 1.8552631578947367, | |
"grad_norm": 1.1402099132537842, | |
"learning_rate": 6.373003960564698e-05, | |
"loss": 0.0319, | |
"step": 987 | |
}, | |
{ | |
"epoch": 1.8571428571428572, | |
"grad_norm": 2.901012420654297, | |
"learning_rate": 6.363166761985186e-05, | |
"loss": 0.1784, | |
"step": 988 | |
}, | |
{ | |
"epoch": 1.8590225563909775, | |
"grad_norm": 3.1784751415252686, | |
"learning_rate": 6.353328783880608e-05, | |
"loss": 0.169, | |
"step": 989 | |
}, | |
{ | |
"epoch": 1.8609022556390977, | |
"grad_norm": 1.4703645706176758, | |
"learning_rate": 6.343490094017774e-05, | |
"loss": 0.0297, | |
"step": 990 | |
}, | |
{ | |
"epoch": 1.8627819548872182, | |
"grad_norm": 1.4826966524124146, | |
"learning_rate": 6.333650760168398e-05, | |
"loss": 0.0623, | |
"step": 991 | |
}, | |
{ | |
"epoch": 1.8646616541353382, | |
"grad_norm": 2.5351502895355225, | |
"learning_rate": 6.323810850108628e-05, | |
"loss": 0.1196, | |
"step": 992 | |
}, | |
{ | |
"epoch": 1.8665413533834587, | |
"grad_norm": 2.9735541343688965, | |
"learning_rate": 6.313970431618585e-05, | |
"loss": 0.1775, | |
"step": 993 | |
}, | |
{ | |
"epoch": 1.868421052631579, | |
"grad_norm": 2.139782428741455, | |
"learning_rate": 6.304129572481887e-05, | |
"loss": 0.0573, | |
"step": 994 | |
}, | |
{ | |
"epoch": 1.8703007518796992, | |
"grad_norm": 2.248852491378784, | |
"learning_rate": 6.294288340485192e-05, | |
"loss": 0.1144, | |
"step": 995 | |
}, | |
{ | |
"epoch": 1.8721804511278195, | |
"grad_norm": 2.7781121730804443, | |
"learning_rate": 6.284446803417725e-05, | |
"loss": 0.1461, | |
"step": 996 | |
}, | |
{ | |
"epoch": 1.8740601503759398, | |
"grad_norm": 1.9069122076034546, | |
"learning_rate": 6.27460502907081e-05, | |
"loss": 0.0928, | |
"step": 997 | |
}, | |
{ | |
"epoch": 1.8759398496240602, | |
"grad_norm": 2.3354716300964355, | |
"learning_rate": 6.264763085237408e-05, | |
"loss": 0.0833, | |
"step": 998 | |
}, | |
{ | |
"epoch": 1.8778195488721805, | |
"grad_norm": 3.2997379302978516, | |
"learning_rate": 6.254921039711648e-05, | |
"loss": 0.2346, | |
"step": 999 | |
}, | |
{ | |
"epoch": 1.8796992481203008, | |
"grad_norm": 1.7376987934112549, | |
"learning_rate": 6.245078960288354e-05, | |
"loss": 0.0471, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 1.8796992481203008, | |
"eval_global_dataset_loss": 1.0007705688476562, | |
"eval_global_dataset_runtime": 73.6339, | |
"eval_global_dataset_samples_per_second": 13.2, | |
"eval_global_dataset_steps_per_second": 0.054, | |
"eval_sequential_score": 0.9177210985502069, | |
"eval_sts-test-1024_pearson_cosine": 0.9025234688655628, | |
"eval_sts-test-1024_spearman_cosine": 0.9176032684789633, | |
"eval_sts-test-1280_pearson_cosine": 0.9025230342933502, | |
"eval_sts-test-1280_spearman_cosine": 0.9175965110384932, | |
"eval_sts-test-512_pearson_cosine": 0.9015480520372816, | |
"eval_sts-test-512_spearman_cosine": 0.9177210985502069, | |
"eval_sts-test-760_pearson_cosine": 0.9013899032141457, | |
"eval_sts-test-760_spearman_cosine": 0.9176601025809303, | |
"eval_sts-test_pearson_cosine": 0.9025207350942973, | |
"eval_sts-test_spearman_cosine": 0.917561381298301, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 1.881578947368421, | |
"grad_norm": 3.5159120559692383, | |
"learning_rate": 6.235236914762595e-05, | |
"loss": 0.2246, | |
"step": 1001 | |
}, | |
{ | |
"epoch": 1.8834586466165413, | |
"grad_norm": 3.255794048309326, | |
"learning_rate": 6.225394970929191e-05, | |
"loss": 0.2424, | |
"step": 1002 | |
}, | |
{ | |
"epoch": 1.8853383458646618, | |
"grad_norm": 2.7804276943206787, | |
"learning_rate": 6.215553196582277e-05, | |
"loss": 0.1008, | |
"step": 1003 | |
}, | |
{ | |
"epoch": 1.8872180451127818, | |
"grad_norm": 1.8781172037124634, | |
"learning_rate": 6.205711659514808e-05, | |
"loss": 0.0493, | |
"step": 1004 | |
}, | |
{ | |
"epoch": 1.8890977443609023, | |
"grad_norm": 2.7714462280273438, | |
"learning_rate": 6.195870427518115e-05, | |
"loss": 0.1101, | |
"step": 1005 | |
}, | |
{ | |
"epoch": 1.8909774436090225, | |
"grad_norm": 3.425043821334839, | |
"learning_rate": 6.186029568381416e-05, | |
"loss": 0.2017, | |
"step": 1006 | |
}, | |
{ | |
"epoch": 1.8928571428571428, | |
"grad_norm": 4.350175380706787, | |
"learning_rate": 6.176189149891373e-05, | |
"loss": 0.3919, | |
"step": 1007 | |
}, | |
{ | |
"epoch": 1.8947368421052633, | |
"grad_norm": 2.360135316848755, | |
"learning_rate": 6.166349239831603e-05, | |
"loss": 0.0908, | |
"step": 1008 | |
}, | |
{ | |
"epoch": 1.8966165413533833, | |
"grad_norm": 1.7306382656097412, | |
"learning_rate": 6.156509905982227e-05, | |
"loss": 0.0622, | |
"step": 1009 | |
}, | |
{ | |
"epoch": 1.8984962406015038, | |
"grad_norm": 1.7376872301101685, | |
"learning_rate": 6.146671216119393e-05, | |
"loss": 0.0655, | |
"step": 1010 | |
}, | |
{ | |
"epoch": 1.900375939849624, | |
"grad_norm": 3.714693307876587, | |
"learning_rate": 6.136833238014814e-05, | |
"loss": 0.252, | |
"step": 1011 | |
}, | |
{ | |
"epoch": 1.9022556390977443, | |
"grad_norm": 3.6632583141326904, | |
"learning_rate": 6.126996039435302e-05, | |
"loss": 0.3261, | |
"step": 1012 | |
}, | |
{ | |
"epoch": 1.9041353383458648, | |
"grad_norm": 2.3958234786987305, | |
"learning_rate": 6.117159688142299e-05, | |
"loss": 0.1221, | |
"step": 1013 | |
}, | |
{ | |
"epoch": 1.9060150375939848, | |
"grad_norm": 2.7297089099884033, | |
"learning_rate": 6.107324251891407e-05, | |
"loss": 0.1101, | |
"step": 1014 | |
}, | |
{ | |
"epoch": 1.9078947368421053, | |
"grad_norm": 2.423370122909546, | |
"learning_rate": 6.097489798431929e-05, | |
"loss": 0.0951, | |
"step": 1015 | |
}, | |
{ | |
"epoch": 1.9097744360902256, | |
"grad_norm": 2.812894821166992, | |
"learning_rate": 6.087656395506396e-05, | |
"loss": 0.1421, | |
"step": 1016 | |
}, | |
{ | |
"epoch": 1.9116541353383458, | |
"grad_norm": 2.0672447681427, | |
"learning_rate": 6.077824110850106e-05, | |
"loss": 0.0644, | |
"step": 1017 | |
}, | |
{ | |
"epoch": 1.9135338345864663, | |
"grad_norm": 3.210081100463867, | |
"learning_rate": 6.067993012190649e-05, | |
"loss": 0.1413, | |
"step": 1018 | |
}, | |
{ | |
"epoch": 1.9154135338345863, | |
"grad_norm": 2.6235413551330566, | |
"learning_rate": 6.0581631672474515e-05, | |
"loss": 0.1214, | |
"step": 1019 | |
}, | |
{ | |
"epoch": 1.9172932330827068, | |
"grad_norm": 3.1395761966705322, | |
"learning_rate": 6.048334643731295e-05, | |
"loss": 0.208, | |
"step": 1020 | |
}, | |
{ | |
"epoch": 1.919172932330827, | |
"grad_norm": 3.3170242309570312, | |
"learning_rate": 6.0385075093438716e-05, | |
"loss": 0.1795, | |
"step": 1021 | |
}, | |
{ | |
"epoch": 1.9210526315789473, | |
"grad_norm": 3.103555202484131, | |
"learning_rate": 6.02868183177729e-05, | |
"loss": 0.1614, | |
"step": 1022 | |
}, | |
{ | |
"epoch": 1.9229323308270678, | |
"grad_norm": 3.3904659748077393, | |
"learning_rate": 6.0188576787136364e-05, | |
"loss": 0.1513, | |
"step": 1023 | |
}, | |
{ | |
"epoch": 1.9248120300751879, | |
"grad_norm": 1.0235174894332886, | |
"learning_rate": 6.009035117824487e-05, | |
"loss": 0.0291, | |
"step": 1024 | |
}, | |
{ | |
"epoch": 1.9266917293233083, | |
"grad_norm": 2.410435199737549, | |
"learning_rate": 5.999214216770459e-05, | |
"loss": 0.066, | |
"step": 1025 | |
}, | |
{ | |
"epoch": 1.9285714285714286, | |
"grad_norm": 1.549646019935608, | |
"learning_rate": 5.9893950432007263e-05, | |
"loss": 0.0552, | |
"step": 1026 | |
}, | |
{ | |
"epoch": 1.9304511278195489, | |
"grad_norm": 3.0009422302246094, | |
"learning_rate": 5.979577664752572e-05, | |
"loss": 0.1837, | |
"step": 1027 | |
}, | |
{ | |
"epoch": 1.9323308270676691, | |
"grad_norm": 2.2576701641082764, | |
"learning_rate": 5.9697621490509095e-05, | |
"loss": 0.1053, | |
"step": 1028 | |
}, | |
{ | |
"epoch": 1.9342105263157894, | |
"grad_norm": 2.387059211730957, | |
"learning_rate": 5.959948563707822e-05, | |
"loss": 0.0974, | |
"step": 1029 | |
}, | |
{ | |
"epoch": 1.9360902255639099, | |
"grad_norm": 2.6841256618499756, | |
"learning_rate": 5.950136976322095e-05, | |
"loss": 0.1521, | |
"step": 1030 | |
}, | |
{ | |
"epoch": 1.9379699248120301, | |
"grad_norm": 1.8788162469863892, | |
"learning_rate": 5.9403274544787546e-05, | |
"loss": 0.0598, | |
"step": 1031 | |
}, | |
{ | |
"epoch": 1.9398496240601504, | |
"grad_norm": 3.121676206588745, | |
"learning_rate": 5.9305200657485936e-05, | |
"loss": 0.1883, | |
"step": 1032 | |
}, | |
{ | |
"epoch": 1.9417293233082706, | |
"grad_norm": 1.5553926229476929, | |
"learning_rate": 5.920714877687719e-05, | |
"loss": 0.0306, | |
"step": 1033 | |
}, | |
{ | |
"epoch": 1.943609022556391, | |
"grad_norm": 2.58564829826355, | |
"learning_rate": 5.910911957837066e-05, | |
"loss": 0.1019, | |
"step": 1034 | |
}, | |
{ | |
"epoch": 1.9454887218045114, | |
"grad_norm": 3.2539162635803223, | |
"learning_rate": 5.9011113737219636e-05, | |
"loss": 0.1244, | |
"step": 1035 | |
}, | |
{ | |
"epoch": 1.9473684210526314, | |
"grad_norm": 2.184638261795044, | |
"learning_rate": 5.891313192851635e-05, | |
"loss": 0.1358, | |
"step": 1036 | |
}, | |
{ | |
"epoch": 1.949248120300752, | |
"grad_norm": 3.0994555950164795, | |
"learning_rate": 5.881517482718761e-05, | |
"loss": 0.1477, | |
"step": 1037 | |
}, | |
{ | |
"epoch": 1.9511278195488722, | |
"grad_norm": 2.4086666107177734, | |
"learning_rate": 5.871724310798995e-05, | |
"loss": 0.0892, | |
"step": 1038 | |
}, | |
{ | |
"epoch": 1.9530075187969924, | |
"grad_norm": 3.608358860015869, | |
"learning_rate": 5.861933744550512e-05, | |
"loss": 0.1625, | |
"step": 1039 | |
}, | |
{ | |
"epoch": 1.954887218045113, | |
"grad_norm": 2.080369234085083, | |
"learning_rate": 5.852145851413533e-05, | |
"loss": 0.0782, | |
"step": 1040 | |
}, | |
{ | |
"epoch": 1.956766917293233, | |
"grad_norm": 2.9665627479553223, | |
"learning_rate": 5.842360698809872e-05, | |
"loss": 0.1908, | |
"step": 1041 | |
}, | |
{ | |
"epoch": 1.9586466165413534, | |
"grad_norm": 3.191819190979004, | |
"learning_rate": 5.832578354142463e-05, | |
"loss": 0.11, | |
"step": 1042 | |
}, | |
{ | |
"epoch": 1.9605263157894737, | |
"grad_norm": 3.310695171356201, | |
"learning_rate": 5.8227988847948955e-05, | |
"loss": 0.1647, | |
"step": 1043 | |
}, | |
{ | |
"epoch": 1.962406015037594, | |
"grad_norm": 2.5343101024627686, | |
"learning_rate": 5.8130223581309575e-05, | |
"loss": 0.0864, | |
"step": 1044 | |
}, | |
{ | |
"epoch": 1.9642857142857144, | |
"grad_norm": 2.4089009761810303, | |
"learning_rate": 5.8032488414941625e-05, | |
"loss": 0.0953, | |
"step": 1045 | |
}, | |
{ | |
"epoch": 1.9661654135338344, | |
"grad_norm": 2.40613055229187, | |
"learning_rate": 5.793478402207291e-05, | |
"loss": 0.1, | |
"step": 1046 | |
}, | |
{ | |
"epoch": 1.968045112781955, | |
"grad_norm": 3.5450172424316406, | |
"learning_rate": 5.783711107571932e-05, | |
"loss": 0.1592, | |
"step": 1047 | |
}, | |
{ | |
"epoch": 1.9699248120300752, | |
"grad_norm": 2.3371622562408447, | |
"learning_rate": 5.773947024868002e-05, | |
"loss": 0.0991, | |
"step": 1048 | |
}, | |
{ | |
"epoch": 1.9718045112781954, | |
"grad_norm": 3.386986017227173, | |
"learning_rate": 5.7641862213533046e-05, | |
"loss": 0.1837, | |
"step": 1049 | |
}, | |
{ | |
"epoch": 1.973684210526316, | |
"grad_norm": 2.946260690689087, | |
"learning_rate": 5.7544287642630455e-05, | |
"loss": 0.1211, | |
"step": 1050 | |
}, | |
{ | |
"epoch": 1.973684210526316, | |
"eval_global_dataset_loss": 1.0035452842712402, | |
"eval_global_dataset_runtime": 74.0905, | |
"eval_global_dataset_samples_per_second": 13.119, | |
"eval_global_dataset_steps_per_second": 0.054, | |
"eval_sequential_score": 0.92074932690604, | |
"eval_sts-test-1024_pearson_cosine": 0.9057887446299446, | |
"eval_sts-test-1024_spearman_cosine": 0.9199283202645535, | |
"eval_sts-test-1280_pearson_cosine": 0.9057869799328011, | |
"eval_sts-test-1280_spearman_cosine": 0.9199397318362084, | |
"eval_sts-test-512_pearson_cosine": 0.9050175538356355, | |
"eval_sts-test-512_spearman_cosine": 0.92074932690604, | |
"eval_sts-test-760_pearson_cosine": 0.9048899865653727, | |
"eval_sts-test-760_spearman_cosine": 0.9203155287488471, | |
"eval_sts-test_pearson_cosine": 0.9057823528825933, | |
"eval_sts-test_spearman_cosine": 0.919914671129829, | |
"step": 1050 | |
}, | |
{ | |
"epoch": 1.975563909774436, | |
"grad_norm": 1.902694582939148, | |
"learning_rate": 5.7446747208093866e-05, | |
"loss": 0.0752, | |
"step": 1051 | |
}, | |
{ | |
"epoch": 1.9774436090225564, | |
"grad_norm": 3.0880801677703857, | |
"learning_rate": 5.73492415818097e-05, | |
"loss": 0.1237, | |
"step": 1052 | |
}, | |
{ | |
"epoch": 1.9793233082706767, | |
"grad_norm": 1.482879400253296, | |
"learning_rate": 5.725177143542466e-05, | |
"loss": 0.0486, | |
"step": 1053 | |
}, | |
{ | |
"epoch": 1.981203007518797, | |
"grad_norm": 3.4937937259674072, | |
"learning_rate": 5.715433744034101e-05, | |
"loss": 0.1704, | |
"step": 1054 | |
} | |
], | |
"logging_steps": 1, | |
"max_steps": 1596, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 3, | |
"save_steps": 527, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": true, | |
"should_training_stop": false | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 0.0, | |
"train_batch_size": 192, | |
"trial_name": null, | |
"trial_params": null | |
} | |