|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 12.0, |
|
"eval_steps": 500, |
|
"global_step": 90492, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.18677596747875214, |
|
"learning_rate": 1.3320367751060821e-05, |
|
"loss": 1.6931, |
|
"step": 7541 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_AFQMC_loss": 0.02504170872271061, |
|
"eval_AFQMC_runtime": 99.0724, |
|
"eval_AFQMC_samples_per_second": 43.564, |
|
"eval_AFQMC_steps_per_second": 0.343, |
|
"eval_emb_eval_pearson_cosine": 0.49435896960848713, |
|
"eval_emb_eval_spearman_cosine": 0.5280909639875639, |
|
"step": 7541 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_ATEC_loss": 0.01953265070915222, |
|
"eval_ATEC_runtime": 6.6017, |
|
"eval_ATEC_samples_per_second": 3029.521, |
|
"eval_ATEC_steps_per_second": 23.782, |
|
"step": 7541 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_BQ_loss": 0.01803545467555523, |
|
"eval_BQ_runtime": 3.4373, |
|
"eval_BQ_samples_per_second": 2909.235, |
|
"eval_BQ_steps_per_second": 22.983, |
|
"step": 7541 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_Cmnli_loss": 0.020238544791936874, |
|
"eval_Cmnli_runtime": 3.9735, |
|
"eval_Cmnli_samples_per_second": 2092.612, |
|
"eval_Cmnli_steps_per_second": 16.358, |
|
"step": 7541 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_LCQMC_loss": 0.03518352285027504, |
|
"eval_LCQMC_runtime": 2.3275, |
|
"eval_LCQMC_samples_per_second": 3781.698, |
|
"eval_LCQMC_steps_per_second": 29.645, |
|
"step": 7541 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_Ocnli_loss": 0.025195566937327385, |
|
"eval_Ocnli_runtime": 0.6266, |
|
"eval_Ocnli_samples_per_second": 2947.795, |
|
"eval_Ocnli_steps_per_second": 23.94, |
|
"step": 7541 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_PAWSX_loss": 0.067341148853302, |
|
"eval_PAWSX_runtime": 0.9538, |
|
"eval_PAWSX_samples_per_second": 2096.982, |
|
"eval_PAWSX_steps_per_second": 16.776, |
|
"step": 7541 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_QBQTC_loss": 8.280766487121582, |
|
"eval_QBQTC_runtime": 7.6679, |
|
"eval_QBQTC_samples_per_second": 2608.269, |
|
"eval_QBQTC_steps_per_second": 20.475, |
|
"step": 7541 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_STS-B_loss": 8.6417875289917, |
|
"eval_STS-B_runtime": 0.5653, |
|
"eval_STS-B_samples_per_second": 2579.365, |
|
"eval_STS-B_steps_per_second": 21.229, |
|
"step": 7541 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.29912641644477844, |
|
"learning_rate": 1.92609255130006e-05, |
|
"loss": 1.5717, |
|
"step": 15082 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_AFQMC_loss": 0.023414788767695427, |
|
"eval_AFQMC_runtime": 99.0838, |
|
"eval_AFQMC_samples_per_second": 43.559, |
|
"eval_AFQMC_steps_per_second": 0.343, |
|
"eval_emb_eval_pearson_cosine": 0.541482089845575, |
|
"eval_emb_eval_spearman_cosine": 0.584542245914602, |
|
"step": 15082 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_ATEC_loss": 0.01733938232064247, |
|
"eval_ATEC_runtime": 6.5234, |
|
"eval_ATEC_samples_per_second": 3065.879, |
|
"eval_ATEC_steps_per_second": 24.067, |
|
"step": 15082 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_BQ_loss": 0.015580276027321815, |
|
"eval_BQ_runtime": 3.419, |
|
"eval_BQ_samples_per_second": 2924.795, |
|
"eval_BQ_steps_per_second": 23.106, |
|
"step": 15082 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_Cmnli_loss": 0.016321830451488495, |
|
"eval_Cmnli_runtime": 3.7867, |
|
"eval_Cmnli_samples_per_second": 2195.824, |
|
"eval_Cmnli_steps_per_second": 17.165, |
|
"step": 15082 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_LCQMC_loss": 0.02696206048130989, |
|
"eval_LCQMC_runtime": 2.3672, |
|
"eval_LCQMC_samples_per_second": 3718.264, |
|
"eval_LCQMC_steps_per_second": 29.148, |
|
"step": 15082 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_Ocnli_loss": 0.018722666427493095, |
|
"eval_Ocnli_runtime": 0.6105, |
|
"eval_Ocnli_samples_per_second": 3025.526, |
|
"eval_Ocnli_steps_per_second": 24.571, |
|
"step": 15082 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_PAWSX_loss": 0.06007164344191551, |
|
"eval_PAWSX_runtime": 0.9605, |
|
"eval_PAWSX_samples_per_second": 2082.25, |
|
"eval_PAWSX_steps_per_second": 16.658, |
|
"step": 15082 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_QBQTC_loss": 8.381814002990723, |
|
"eval_QBQTC_runtime": 7.644, |
|
"eval_QBQTC_samples_per_second": 2616.427, |
|
"eval_QBQTC_steps_per_second": 20.539, |
|
"step": 15082 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_STS-B_loss": 9.617568969726562, |
|
"eval_STS-B_runtime": 0.5589, |
|
"eval_STS-B_samples_per_second": 2608.657, |
|
"eval_STS-B_steps_per_second": 21.47, |
|
"step": 15082 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.2338990569114685, |
|
"learning_rate": 1.7779829671031307e-05, |
|
"loss": 1.5301, |
|
"step": 22623 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_AFQMC_loss": 0.02241475135087967, |
|
"eval_AFQMC_runtime": 99.9714, |
|
"eval_AFQMC_samples_per_second": 43.172, |
|
"eval_AFQMC_steps_per_second": 0.34, |
|
"eval_emb_eval_pearson_cosine": 0.5913082581080403, |
|
"eval_emb_eval_spearman_cosine": 0.6383284889220768, |
|
"step": 22623 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_ATEC_loss": 0.016517719253897667, |
|
"eval_ATEC_runtime": 6.5842, |
|
"eval_ATEC_samples_per_second": 3037.594, |
|
"eval_ATEC_steps_per_second": 23.845, |
|
"step": 22623 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_BQ_loss": 0.014284521341323853, |
|
"eval_BQ_runtime": 3.406, |
|
"eval_BQ_samples_per_second": 2936.005, |
|
"eval_BQ_steps_per_second": 23.194, |
|
"step": 22623 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_Cmnli_loss": 0.01529396791011095, |
|
"eval_Cmnli_runtime": 3.7388, |
|
"eval_Cmnli_samples_per_second": 2223.987, |
|
"eval_Cmnli_steps_per_second": 17.385, |
|
"step": 22623 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_LCQMC_loss": 0.020633986219763756, |
|
"eval_LCQMC_runtime": 2.3364, |
|
"eval_LCQMC_samples_per_second": 3767.314, |
|
"eval_LCQMC_steps_per_second": 29.532, |
|
"step": 22623 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_Ocnli_loss": 0.016242269426584244, |
|
"eval_Ocnli_runtime": 0.6209, |
|
"eval_Ocnli_samples_per_second": 2974.536, |
|
"eval_Ocnli_steps_per_second": 24.157, |
|
"step": 22623 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_PAWSX_loss": 0.05045855790376663, |
|
"eval_PAWSX_runtime": 0.9593, |
|
"eval_PAWSX_samples_per_second": 2084.846, |
|
"eval_PAWSX_steps_per_second": 16.679, |
|
"step": 22623 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_QBQTC_loss": 8.840312957763672, |
|
"eval_QBQTC_runtime": 7.6443, |
|
"eval_QBQTC_samples_per_second": 2616.334, |
|
"eval_QBQTC_steps_per_second": 20.538, |
|
"step": 22623 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_STS-B_loss": 9.837035179138184, |
|
"eval_STS-B_runtime": 0.5658, |
|
"eval_STS-B_samples_per_second": 2577.011, |
|
"eval_STS-B_steps_per_second": 21.21, |
|
"step": 22623 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.1766250729560852, |
|
"learning_rate": 1.629912674479141e-05, |
|
"loss": 1.4768, |
|
"step": 30164 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_AFQMC_loss": 0.021405402570962906, |
|
"eval_AFQMC_runtime": 100.3018, |
|
"eval_AFQMC_samples_per_second": 43.03, |
|
"eval_AFQMC_steps_per_second": 0.339, |
|
"eval_emb_eval_pearson_cosine": 0.6125367634857398, |
|
"eval_emb_eval_spearman_cosine": 0.6570670709310811, |
|
"step": 30164 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_ATEC_loss": 0.015135602094233036, |
|
"eval_ATEC_runtime": 6.5473, |
|
"eval_ATEC_samples_per_second": 3054.678, |
|
"eval_ATEC_steps_per_second": 23.979, |
|
"step": 30164 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_BQ_loss": 0.013757712207734585, |
|
"eval_BQ_runtime": 3.4539, |
|
"eval_BQ_samples_per_second": 2895.245, |
|
"eval_BQ_steps_per_second": 22.872, |
|
"step": 30164 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_Cmnli_loss": 0.015052303671836853, |
|
"eval_Cmnli_runtime": 3.7534, |
|
"eval_Cmnli_samples_per_second": 2215.324, |
|
"eval_Cmnli_steps_per_second": 17.318, |
|
"step": 30164 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_LCQMC_loss": 0.019115839153528214, |
|
"eval_LCQMC_runtime": 2.3344, |
|
"eval_LCQMC_samples_per_second": 3770.582, |
|
"eval_LCQMC_steps_per_second": 29.558, |
|
"step": 30164 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_Ocnli_loss": 0.015967663377523422, |
|
"eval_Ocnli_runtime": 0.603, |
|
"eval_Ocnli_samples_per_second": 3063.145, |
|
"eval_Ocnli_steps_per_second": 24.877, |
|
"step": 30164 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_PAWSX_loss": 0.04721539840102196, |
|
"eval_PAWSX_runtime": 0.9557, |
|
"eval_PAWSX_samples_per_second": 2092.706, |
|
"eval_PAWSX_steps_per_second": 16.742, |
|
"step": 30164 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_QBQTC_loss": 9.168432235717773, |
|
"eval_QBQTC_runtime": 7.6741, |
|
"eval_QBQTC_samples_per_second": 2606.178, |
|
"eval_QBQTC_steps_per_second": 20.458, |
|
"step": 30164 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_STS-B_loss": 9.83575439453125, |
|
"eval_STS-B_runtime": 0.5632, |
|
"eval_STS-B_samples_per_second": 2588.593, |
|
"eval_STS-B_steps_per_second": 21.305, |
|
"step": 30164 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.13791993260383606, |
|
"learning_rate": 1.481822736068682e-05, |
|
"loss": 1.4141, |
|
"step": 37705 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_AFQMC_loss": 0.0204468946903944, |
|
"eval_AFQMC_runtime": 99.1315, |
|
"eval_AFQMC_samples_per_second": 43.538, |
|
"eval_AFQMC_steps_per_second": 0.343, |
|
"eval_emb_eval_pearson_cosine": 0.6315631932372178, |
|
"eval_emb_eval_spearman_cosine": 0.6690982617504025, |
|
"step": 37705 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_ATEC_loss": 0.014196612872183323, |
|
"eval_ATEC_runtime": 6.5521, |
|
"eval_ATEC_samples_per_second": 3052.435, |
|
"eval_ATEC_steps_per_second": 23.962, |
|
"step": 37705 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_BQ_loss": 0.013593867421150208, |
|
"eval_BQ_runtime": 3.4128, |
|
"eval_BQ_samples_per_second": 2930.177, |
|
"eval_BQ_steps_per_second": 23.148, |
|
"step": 37705 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_Cmnli_loss": 0.015099190175533295, |
|
"eval_Cmnli_runtime": 3.8007, |
|
"eval_Cmnli_samples_per_second": 2187.75, |
|
"eval_Cmnli_steps_per_second": 17.102, |
|
"step": 37705 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_LCQMC_loss": 0.017175855115056038, |
|
"eval_LCQMC_runtime": 2.4095, |
|
"eval_LCQMC_samples_per_second": 3653.013, |
|
"eval_LCQMC_steps_per_second": 28.636, |
|
"step": 37705 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_Ocnli_loss": 0.015738315880298615, |
|
"eval_Ocnli_runtime": 0.619, |
|
"eval_Ocnli_samples_per_second": 2983.626, |
|
"eval_Ocnli_steps_per_second": 24.231, |
|
"step": 37705 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_PAWSX_loss": 0.046284567564725876, |
|
"eval_PAWSX_runtime": 0.959, |
|
"eval_PAWSX_samples_per_second": 2085.505, |
|
"eval_PAWSX_steps_per_second": 16.684, |
|
"step": 37705 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_QBQTC_loss": 9.86181640625, |
|
"eval_QBQTC_runtime": 7.9334, |
|
"eval_QBQTC_samples_per_second": 2520.974, |
|
"eval_QBQTC_steps_per_second": 19.79, |
|
"step": 37705 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_STS-B_loss": 9.911561965942383, |
|
"eval_STS-B_runtime": 0.5701, |
|
"eval_STS-B_samples_per_second": 2557.501, |
|
"eval_STS-B_steps_per_second": 21.049, |
|
"step": 37705 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 0.17065556347370148, |
|
"learning_rate": 1.3337327976582224e-05, |
|
"loss": 1.3461, |
|
"step": 45246 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_AFQMC_loss": 0.02002647891640663, |
|
"eval_AFQMC_runtime": 98.4869, |
|
"eval_AFQMC_samples_per_second": 43.823, |
|
"eval_AFQMC_steps_per_second": 0.345, |
|
"eval_emb_eval_pearson_cosine": 0.6436085641934666, |
|
"eval_emb_eval_spearman_cosine": 0.6715343325157711, |
|
"step": 45246 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_ATEC_loss": 0.014070386998355389, |
|
"eval_ATEC_runtime": 6.5822, |
|
"eval_ATEC_samples_per_second": 3038.48, |
|
"eval_ATEC_steps_per_second": 23.852, |
|
"step": 45246 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_BQ_loss": 0.013585967943072319, |
|
"eval_BQ_runtime": 3.6468, |
|
"eval_BQ_samples_per_second": 2742.113, |
|
"eval_BQ_steps_per_second": 21.663, |
|
"step": 45246 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_Cmnli_loss": 0.01478442084044218, |
|
"eval_Cmnli_runtime": 3.7572, |
|
"eval_Cmnli_samples_per_second": 2213.061, |
|
"eval_Cmnli_steps_per_second": 17.3, |
|
"step": 45246 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_LCQMC_loss": 0.016025548800826073, |
|
"eval_LCQMC_runtime": 2.3232, |
|
"eval_LCQMC_samples_per_second": 3788.767, |
|
"eval_LCQMC_steps_per_second": 29.701, |
|
"step": 45246 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_Ocnli_loss": 0.015464934520423412, |
|
"eval_Ocnli_runtime": 0.6071, |
|
"eval_Ocnli_samples_per_second": 3042.383, |
|
"eval_Ocnli_steps_per_second": 24.708, |
|
"step": 45246 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_PAWSX_loss": 0.04284413903951645, |
|
"eval_PAWSX_runtime": 0.9891, |
|
"eval_PAWSX_samples_per_second": 2021.991, |
|
"eval_PAWSX_steps_per_second": 16.176, |
|
"step": 45246 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_QBQTC_loss": 10.840497016906738, |
|
"eval_QBQTC_runtime": 7.7281, |
|
"eval_QBQTC_samples_per_second": 2587.958, |
|
"eval_QBQTC_steps_per_second": 20.315, |
|
"step": 45246 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_STS-B_loss": 9.943366050720215, |
|
"eval_STS-B_runtime": 0.5591, |
|
"eval_STS-B_samples_per_second": 2607.892, |
|
"eval_STS-B_steps_per_second": 21.464, |
|
"step": 45246 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.13601745665073395, |
|
"learning_rate": 1.185642859247763e-05, |
|
"loss": 1.2856, |
|
"step": 52787 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_AFQMC_loss": 0.019535699859261513, |
|
"eval_AFQMC_runtime": 99.2494, |
|
"eval_AFQMC_samples_per_second": 43.486, |
|
"eval_AFQMC_steps_per_second": 0.343, |
|
"eval_emb_eval_pearson_cosine": 0.6473204770905479, |
|
"eval_emb_eval_spearman_cosine": 0.6723246242283224, |
|
"step": 52787 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_ATEC_loss": 0.013532574288547039, |
|
"eval_ATEC_runtime": 6.5765, |
|
"eval_ATEC_samples_per_second": 3041.133, |
|
"eval_ATEC_steps_per_second": 23.873, |
|
"step": 52787 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_BQ_loss": 0.013674370013177395, |
|
"eval_BQ_runtime": 3.4566, |
|
"eval_BQ_samples_per_second": 2892.978, |
|
"eval_BQ_steps_per_second": 22.855, |
|
"step": 52787 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_Cmnli_loss": 0.014647725969552994, |
|
"eval_Cmnli_runtime": 3.7546, |
|
"eval_Cmnli_samples_per_second": 2214.61, |
|
"eval_Cmnli_steps_per_second": 17.312, |
|
"step": 52787 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_LCQMC_loss": 0.0150056267157197, |
|
"eval_LCQMC_runtime": 2.3629, |
|
"eval_LCQMC_samples_per_second": 3725.146, |
|
"eval_LCQMC_steps_per_second": 29.202, |
|
"step": 52787 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_Ocnli_loss": 0.015559999272227287, |
|
"eval_Ocnli_runtime": 0.6231, |
|
"eval_Ocnli_samples_per_second": 2964.212, |
|
"eval_Ocnli_steps_per_second": 24.073, |
|
"step": 52787 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_PAWSX_loss": 0.04360537603497505, |
|
"eval_PAWSX_runtime": 0.9535, |
|
"eval_PAWSX_samples_per_second": 2097.49, |
|
"eval_PAWSX_steps_per_second": 16.78, |
|
"step": 52787 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_QBQTC_loss": 11.554633140563965, |
|
"eval_QBQTC_runtime": 7.7436, |
|
"eval_QBQTC_samples_per_second": 2582.791, |
|
"eval_QBQTC_steps_per_second": 20.275, |
|
"step": 52787 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_STS-B_loss": 10.235774993896484, |
|
"eval_STS-B_runtime": 0.5543, |
|
"eval_STS-B_samples_per_second": 2630.224, |
|
"eval_STS-B_steps_per_second": 21.648, |
|
"step": 52787 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.11353704333305359, |
|
"learning_rate": 1.0375332750508335e-05, |
|
"loss": 1.2182, |
|
"step": 60328 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_AFQMC_loss": 0.019351305440068245, |
|
"eval_AFQMC_runtime": 99.553, |
|
"eval_AFQMC_samples_per_second": 43.354, |
|
"eval_AFQMC_steps_per_second": 0.342, |
|
"eval_emb_eval_pearson_cosine": 0.6494222678001456, |
|
"eval_emb_eval_spearman_cosine": 0.6703225826686093, |
|
"step": 60328 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_ATEC_loss": 0.013181576505303383, |
|
"eval_ATEC_runtime": 6.522, |
|
"eval_ATEC_samples_per_second": 3066.526, |
|
"eval_ATEC_steps_per_second": 24.072, |
|
"step": 60328 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_BQ_loss": 0.013695988804101944, |
|
"eval_BQ_runtime": 3.4134, |
|
"eval_BQ_samples_per_second": 2929.642, |
|
"eval_BQ_steps_per_second": 23.144, |
|
"step": 60328 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_Cmnli_loss": 0.014423331245779991, |
|
"eval_Cmnli_runtime": 3.7623, |
|
"eval_Cmnli_samples_per_second": 2210.105, |
|
"eval_Cmnli_steps_per_second": 17.277, |
|
"step": 60328 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_LCQMC_loss": 0.015052268281579018, |
|
"eval_LCQMC_runtime": 2.3257, |
|
"eval_LCQMC_samples_per_second": 3784.705, |
|
"eval_LCQMC_steps_per_second": 29.669, |
|
"step": 60328 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_Ocnli_loss": 0.015219747088849545, |
|
"eval_Ocnli_runtime": 0.6121, |
|
"eval_Ocnli_samples_per_second": 3017.401, |
|
"eval_Ocnli_steps_per_second": 24.505, |
|
"step": 60328 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_PAWSX_loss": 0.042021822184324265, |
|
"eval_PAWSX_runtime": 0.9526, |
|
"eval_PAWSX_samples_per_second": 2099.597, |
|
"eval_PAWSX_steps_per_second": 16.797, |
|
"step": 60328 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_QBQTC_loss": 12.265329360961914, |
|
"eval_QBQTC_runtime": 7.6635, |
|
"eval_QBQTC_samples_per_second": 2609.768, |
|
"eval_QBQTC_steps_per_second": 20.487, |
|
"step": 60328 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_STS-B_loss": 10.596503257751465, |
|
"eval_STS-B_runtime": 0.5656, |
|
"eval_STS-B_samples_per_second": 2577.705, |
|
"eval_STS-B_steps_per_second": 21.216, |
|
"step": 60328 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.10075096040964127, |
|
"learning_rate": 8.89443336640374e-06, |
|
"loss": 1.1546, |
|
"step": 67869 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_AFQMC_loss": 0.018916338682174683, |
|
"eval_AFQMC_runtime": 99.1097, |
|
"eval_AFQMC_samples_per_second": 43.548, |
|
"eval_AFQMC_steps_per_second": 0.343, |
|
"eval_emb_eval_pearson_cosine": 0.6461004194477242, |
|
"eval_emb_eval_spearman_cosine": 0.6620616757187626, |
|
"step": 67869 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_ATEC_loss": 0.01316931750625372, |
|
"eval_ATEC_runtime": 6.5342, |
|
"eval_ATEC_samples_per_second": 3060.808, |
|
"eval_ATEC_steps_per_second": 24.027, |
|
"step": 67869 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_BQ_loss": 0.013916433788836002, |
|
"eval_BQ_runtime": 3.4228, |
|
"eval_BQ_samples_per_second": 2921.626, |
|
"eval_BQ_steps_per_second": 23.081, |
|
"step": 67869 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_Cmnli_loss": 0.014616083353757858, |
|
"eval_Cmnli_runtime": 3.7166, |
|
"eval_Cmnli_samples_per_second": 2237.257, |
|
"eval_Cmnli_steps_per_second": 17.489, |
|
"step": 67869 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_LCQMC_loss": 0.014678360894322395, |
|
"eval_LCQMC_runtime": 2.3376, |
|
"eval_LCQMC_samples_per_second": 3765.326, |
|
"eval_LCQMC_steps_per_second": 29.517, |
|
"step": 67869 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_Ocnli_loss": 0.015178242698311806, |
|
"eval_Ocnli_runtime": 0.6061, |
|
"eval_Ocnli_samples_per_second": 3047.588, |
|
"eval_Ocnli_steps_per_second": 24.75, |
|
"step": 67869 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_PAWSX_loss": 0.04116755351424217, |
|
"eval_PAWSX_runtime": 0.9482, |
|
"eval_PAWSX_samples_per_second": 2109.261, |
|
"eval_PAWSX_steps_per_second": 16.874, |
|
"step": 67869 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_QBQTC_loss": 12.928996086120605, |
|
"eval_QBQTC_runtime": 7.7135, |
|
"eval_QBQTC_samples_per_second": 2592.855, |
|
"eval_QBQTC_steps_per_second": 20.354, |
|
"step": 67869 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_STS-B_loss": 10.769329071044922, |
|
"eval_STS-B_runtime": 0.557, |
|
"eval_STS-B_samples_per_second": 2617.49, |
|
"eval_STS-B_steps_per_second": 21.543, |
|
"step": 67869 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 116.47360229492188, |
|
"learning_rate": 7.413533982299147e-06, |
|
"loss": 1.091, |
|
"step": 75410 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_AFQMC_loss": 0.01876525580883026, |
|
"eval_AFQMC_runtime": 106.9735, |
|
"eval_AFQMC_samples_per_second": 40.346, |
|
"eval_AFQMC_steps_per_second": 0.318, |
|
"eval_emb_eval_pearson_cosine": 0.6425398472325708, |
|
"eval_emb_eval_spearman_cosine": 0.6571917650236837, |
|
"step": 75410 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_ATEC_loss": 0.012936480343341827, |
|
"eval_ATEC_runtime": 6.5457, |
|
"eval_ATEC_samples_per_second": 3055.455, |
|
"eval_ATEC_steps_per_second": 23.985, |
|
"step": 75410 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_BQ_loss": 0.014092645607888699, |
|
"eval_BQ_runtime": 3.4559, |
|
"eval_BQ_samples_per_second": 2893.566, |
|
"eval_BQ_steps_per_second": 22.859, |
|
"step": 75410 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_Cmnli_loss": 0.01457986794412136, |
|
"eval_Cmnli_runtime": 3.7514, |
|
"eval_Cmnli_samples_per_second": 2216.51, |
|
"eval_Cmnli_steps_per_second": 17.327, |
|
"step": 75410 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_LCQMC_loss": 0.014272717759013176, |
|
"eval_LCQMC_runtime": 2.3604, |
|
"eval_LCQMC_samples_per_second": 3729.02, |
|
"eval_LCQMC_steps_per_second": 29.232, |
|
"step": 75410 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_Ocnli_loss": 0.015037407167255878, |
|
"eval_Ocnli_runtime": 0.6116, |
|
"eval_Ocnli_samples_per_second": 3020.096, |
|
"eval_Ocnli_steps_per_second": 24.527, |
|
"step": 75410 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_PAWSX_loss": 0.04113338142633438, |
|
"eval_PAWSX_runtime": 0.9594, |
|
"eval_PAWSX_samples_per_second": 2084.624, |
|
"eval_PAWSX_steps_per_second": 16.677, |
|
"step": 75410 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_QBQTC_loss": 13.890397071838379, |
|
"eval_QBQTC_runtime": 7.674, |
|
"eval_QBQTC_samples_per_second": 2606.201, |
|
"eval_QBQTC_steps_per_second": 20.459, |
|
"step": 75410 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_STS-B_loss": 10.956206321716309, |
|
"eval_STS-B_runtime": 0.5649, |
|
"eval_STS-B_samples_per_second": 2580.888, |
|
"eval_STS-B_steps_per_second": 21.242, |
|
"step": 75410 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 0.15993598103523254, |
|
"learning_rate": 5.933027513923951e-06, |
|
"loss": 1.0267, |
|
"step": 82951 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_AFQMC_loss": 0.01858402043581009, |
|
"eval_AFQMC_runtime": 99.2423, |
|
"eval_AFQMC_samples_per_second": 43.49, |
|
"eval_AFQMC_steps_per_second": 0.343, |
|
"eval_emb_eval_pearson_cosine": 0.635671675332461, |
|
"eval_emb_eval_spearman_cosine": 0.6462278753331322, |
|
"step": 82951 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_ATEC_loss": 0.012706396169960499, |
|
"eval_ATEC_runtime": 6.5392, |
|
"eval_ATEC_samples_per_second": 3058.477, |
|
"eval_ATEC_steps_per_second": 24.009, |
|
"step": 82951 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_BQ_loss": 0.014143843203783035, |
|
"eval_BQ_runtime": 3.3932, |
|
"eval_BQ_samples_per_second": 2947.098, |
|
"eval_BQ_steps_per_second": 23.282, |
|
"step": 82951 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_Cmnli_loss": 0.014601893723011017, |
|
"eval_Cmnli_runtime": 3.7177, |
|
"eval_Cmnli_samples_per_second": 2236.627, |
|
"eval_Cmnli_steps_per_second": 17.484, |
|
"step": 82951 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_LCQMC_loss": 0.01407212857156992, |
|
"eval_LCQMC_runtime": 2.3166, |
|
"eval_LCQMC_samples_per_second": 3799.506, |
|
"eval_LCQMC_steps_per_second": 29.785, |
|
"step": 82951 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_Ocnli_loss": 0.015255914069712162, |
|
"eval_Ocnli_runtime": 0.6096, |
|
"eval_Ocnli_samples_per_second": 3029.719, |
|
"eval_Ocnli_steps_per_second": 24.605, |
|
"step": 82951 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_PAWSX_loss": 0.04009222611784935, |
|
"eval_PAWSX_runtime": 0.9569, |
|
"eval_PAWSX_samples_per_second": 2090.135, |
|
"eval_PAWSX_steps_per_second": 16.721, |
|
"step": 82951 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_QBQTC_loss": 14.71971607208252, |
|
"eval_QBQTC_runtime": 7.651, |
|
"eval_QBQTC_samples_per_second": 2614.025, |
|
"eval_QBQTC_steps_per_second": 20.52, |
|
"step": 82951 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_STS-B_loss": 11.138467788696289, |
|
"eval_STS-B_runtime": 0.5605, |
|
"eval_STS-B_samples_per_second": 2601.08, |
|
"eval_STS-B_steps_per_second": 21.408, |
|
"step": 82951 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 0.22925728559494019, |
|
"learning_rate": 4.451735214089958e-06, |
|
"loss": 0.9688, |
|
"step": 90492 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_AFQMC_loss": 0.018353162333369255, |
|
"eval_AFQMC_runtime": 98.1243, |
|
"eval_AFQMC_samples_per_second": 43.985, |
|
"eval_AFQMC_steps_per_second": 0.346, |
|
"eval_emb_eval_pearson_cosine": 0.6340788346277473, |
|
"eval_emb_eval_spearman_cosine": 0.6444001384260496, |
|
"step": 90492 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_ATEC_loss": 0.012524303048849106, |
|
"eval_ATEC_runtime": 6.5509, |
|
"eval_ATEC_samples_per_second": 3052.999, |
|
"eval_ATEC_steps_per_second": 23.966, |
|
"step": 90492 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_BQ_loss": 0.014260655269026756, |
|
"eval_BQ_runtime": 3.4399, |
|
"eval_BQ_samples_per_second": 2907.038, |
|
"eval_BQ_steps_per_second": 22.966, |
|
"step": 90492 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_Cmnli_loss": 0.014628582634031773, |
|
"eval_Cmnli_runtime": 3.7252, |
|
"eval_Cmnli_samples_per_second": 2232.102, |
|
"eval_Cmnli_steps_per_second": 17.449, |
|
"step": 90492 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_LCQMC_loss": 0.013899387791752815, |
|
"eval_LCQMC_runtime": 2.349, |
|
"eval_LCQMC_samples_per_second": 3747.087, |
|
"eval_LCQMC_steps_per_second": 29.374, |
|
"step": 90492 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_Ocnli_loss": 0.015396999195218086, |
|
"eval_Ocnli_runtime": 0.6119, |
|
"eval_Ocnli_samples_per_second": 3018.253, |
|
"eval_Ocnli_steps_per_second": 24.512, |
|
"step": 90492 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_PAWSX_loss": 0.04036952182650566, |
|
"eval_PAWSX_runtime": 0.9649, |
|
"eval_PAWSX_samples_per_second": 2072.698, |
|
"eval_PAWSX_steps_per_second": 16.582, |
|
"step": 90492 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_QBQTC_loss": 15.251015663146973, |
|
"eval_QBQTC_runtime": 7.662, |
|
"eval_QBQTC_samples_per_second": 2610.3, |
|
"eval_QBQTC_steps_per_second": 20.491, |
|
"step": 90492 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_STS-B_loss": 11.509617805480957, |
|
"eval_STS-B_runtime": 0.5625, |
|
"eval_STS-B_samples_per_second": 2592.013, |
|
"eval_STS-B_steps_per_second": 21.333, |
|
"step": 90492 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 113115, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 128, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|