text_emb_V1 / trainer_state.json
zhonghhh's picture
Upload 10 files
39a71b9 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 12.0,
"eval_steps": 500,
"global_step": 90492,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 0.18677596747875214,
"learning_rate": 1.3320367751060821e-05,
"loss": 1.6931,
"step": 7541
},
{
"epoch": 1.0,
"eval_AFQMC_loss": 0.02504170872271061,
"eval_AFQMC_runtime": 99.0724,
"eval_AFQMC_samples_per_second": 43.564,
"eval_AFQMC_steps_per_second": 0.343,
"eval_emb_eval_pearson_cosine": 0.49435896960848713,
"eval_emb_eval_spearman_cosine": 0.5280909639875639,
"step": 7541
},
{
"epoch": 1.0,
"eval_ATEC_loss": 0.01953265070915222,
"eval_ATEC_runtime": 6.6017,
"eval_ATEC_samples_per_second": 3029.521,
"eval_ATEC_steps_per_second": 23.782,
"step": 7541
},
{
"epoch": 1.0,
"eval_BQ_loss": 0.01803545467555523,
"eval_BQ_runtime": 3.4373,
"eval_BQ_samples_per_second": 2909.235,
"eval_BQ_steps_per_second": 22.983,
"step": 7541
},
{
"epoch": 1.0,
"eval_Cmnli_loss": 0.020238544791936874,
"eval_Cmnli_runtime": 3.9735,
"eval_Cmnli_samples_per_second": 2092.612,
"eval_Cmnli_steps_per_second": 16.358,
"step": 7541
},
{
"epoch": 1.0,
"eval_LCQMC_loss": 0.03518352285027504,
"eval_LCQMC_runtime": 2.3275,
"eval_LCQMC_samples_per_second": 3781.698,
"eval_LCQMC_steps_per_second": 29.645,
"step": 7541
},
{
"epoch": 1.0,
"eval_Ocnli_loss": 0.025195566937327385,
"eval_Ocnli_runtime": 0.6266,
"eval_Ocnli_samples_per_second": 2947.795,
"eval_Ocnli_steps_per_second": 23.94,
"step": 7541
},
{
"epoch": 1.0,
"eval_PAWSX_loss": 0.067341148853302,
"eval_PAWSX_runtime": 0.9538,
"eval_PAWSX_samples_per_second": 2096.982,
"eval_PAWSX_steps_per_second": 16.776,
"step": 7541
},
{
"epoch": 1.0,
"eval_QBQTC_loss": 8.280766487121582,
"eval_QBQTC_runtime": 7.6679,
"eval_QBQTC_samples_per_second": 2608.269,
"eval_QBQTC_steps_per_second": 20.475,
"step": 7541
},
{
"epoch": 1.0,
"eval_STS-B_loss": 8.6417875289917,
"eval_STS-B_runtime": 0.5653,
"eval_STS-B_samples_per_second": 2579.365,
"eval_STS-B_steps_per_second": 21.229,
"step": 7541
},
{
"epoch": 2.0,
"grad_norm": 0.29912641644477844,
"learning_rate": 1.92609255130006e-05,
"loss": 1.5717,
"step": 15082
},
{
"epoch": 2.0,
"eval_AFQMC_loss": 0.023414788767695427,
"eval_AFQMC_runtime": 99.0838,
"eval_AFQMC_samples_per_second": 43.559,
"eval_AFQMC_steps_per_second": 0.343,
"eval_emb_eval_pearson_cosine": 0.541482089845575,
"eval_emb_eval_spearman_cosine": 0.584542245914602,
"step": 15082
},
{
"epoch": 2.0,
"eval_ATEC_loss": 0.01733938232064247,
"eval_ATEC_runtime": 6.5234,
"eval_ATEC_samples_per_second": 3065.879,
"eval_ATEC_steps_per_second": 24.067,
"step": 15082
},
{
"epoch": 2.0,
"eval_BQ_loss": 0.015580276027321815,
"eval_BQ_runtime": 3.419,
"eval_BQ_samples_per_second": 2924.795,
"eval_BQ_steps_per_second": 23.106,
"step": 15082
},
{
"epoch": 2.0,
"eval_Cmnli_loss": 0.016321830451488495,
"eval_Cmnli_runtime": 3.7867,
"eval_Cmnli_samples_per_second": 2195.824,
"eval_Cmnli_steps_per_second": 17.165,
"step": 15082
},
{
"epoch": 2.0,
"eval_LCQMC_loss": 0.02696206048130989,
"eval_LCQMC_runtime": 2.3672,
"eval_LCQMC_samples_per_second": 3718.264,
"eval_LCQMC_steps_per_second": 29.148,
"step": 15082
},
{
"epoch": 2.0,
"eval_Ocnli_loss": 0.018722666427493095,
"eval_Ocnli_runtime": 0.6105,
"eval_Ocnli_samples_per_second": 3025.526,
"eval_Ocnli_steps_per_second": 24.571,
"step": 15082
},
{
"epoch": 2.0,
"eval_PAWSX_loss": 0.06007164344191551,
"eval_PAWSX_runtime": 0.9605,
"eval_PAWSX_samples_per_second": 2082.25,
"eval_PAWSX_steps_per_second": 16.658,
"step": 15082
},
{
"epoch": 2.0,
"eval_QBQTC_loss": 8.381814002990723,
"eval_QBQTC_runtime": 7.644,
"eval_QBQTC_samples_per_second": 2616.427,
"eval_QBQTC_steps_per_second": 20.539,
"step": 15082
},
{
"epoch": 2.0,
"eval_STS-B_loss": 9.617568969726562,
"eval_STS-B_runtime": 0.5589,
"eval_STS-B_samples_per_second": 2608.657,
"eval_STS-B_steps_per_second": 21.47,
"step": 15082
},
{
"epoch": 3.0,
"grad_norm": 0.2338990569114685,
"learning_rate": 1.7779829671031307e-05,
"loss": 1.5301,
"step": 22623
},
{
"epoch": 3.0,
"eval_AFQMC_loss": 0.02241475135087967,
"eval_AFQMC_runtime": 99.9714,
"eval_AFQMC_samples_per_second": 43.172,
"eval_AFQMC_steps_per_second": 0.34,
"eval_emb_eval_pearson_cosine": 0.5913082581080403,
"eval_emb_eval_spearman_cosine": 0.6383284889220768,
"step": 22623
},
{
"epoch": 3.0,
"eval_ATEC_loss": 0.016517719253897667,
"eval_ATEC_runtime": 6.5842,
"eval_ATEC_samples_per_second": 3037.594,
"eval_ATEC_steps_per_second": 23.845,
"step": 22623
},
{
"epoch": 3.0,
"eval_BQ_loss": 0.014284521341323853,
"eval_BQ_runtime": 3.406,
"eval_BQ_samples_per_second": 2936.005,
"eval_BQ_steps_per_second": 23.194,
"step": 22623
},
{
"epoch": 3.0,
"eval_Cmnli_loss": 0.01529396791011095,
"eval_Cmnli_runtime": 3.7388,
"eval_Cmnli_samples_per_second": 2223.987,
"eval_Cmnli_steps_per_second": 17.385,
"step": 22623
},
{
"epoch": 3.0,
"eval_LCQMC_loss": 0.020633986219763756,
"eval_LCQMC_runtime": 2.3364,
"eval_LCQMC_samples_per_second": 3767.314,
"eval_LCQMC_steps_per_second": 29.532,
"step": 22623
},
{
"epoch": 3.0,
"eval_Ocnli_loss": 0.016242269426584244,
"eval_Ocnli_runtime": 0.6209,
"eval_Ocnli_samples_per_second": 2974.536,
"eval_Ocnli_steps_per_second": 24.157,
"step": 22623
},
{
"epoch": 3.0,
"eval_PAWSX_loss": 0.05045855790376663,
"eval_PAWSX_runtime": 0.9593,
"eval_PAWSX_samples_per_second": 2084.846,
"eval_PAWSX_steps_per_second": 16.679,
"step": 22623
},
{
"epoch": 3.0,
"eval_QBQTC_loss": 8.840312957763672,
"eval_QBQTC_runtime": 7.6443,
"eval_QBQTC_samples_per_second": 2616.334,
"eval_QBQTC_steps_per_second": 20.538,
"step": 22623
},
{
"epoch": 3.0,
"eval_STS-B_loss": 9.837035179138184,
"eval_STS-B_runtime": 0.5658,
"eval_STS-B_samples_per_second": 2577.011,
"eval_STS-B_steps_per_second": 21.21,
"step": 22623
},
{
"epoch": 4.0,
"grad_norm": 0.1766250729560852,
"learning_rate": 1.629912674479141e-05,
"loss": 1.4768,
"step": 30164
},
{
"epoch": 4.0,
"eval_AFQMC_loss": 0.021405402570962906,
"eval_AFQMC_runtime": 100.3018,
"eval_AFQMC_samples_per_second": 43.03,
"eval_AFQMC_steps_per_second": 0.339,
"eval_emb_eval_pearson_cosine": 0.6125367634857398,
"eval_emb_eval_spearman_cosine": 0.6570670709310811,
"step": 30164
},
{
"epoch": 4.0,
"eval_ATEC_loss": 0.015135602094233036,
"eval_ATEC_runtime": 6.5473,
"eval_ATEC_samples_per_second": 3054.678,
"eval_ATEC_steps_per_second": 23.979,
"step": 30164
},
{
"epoch": 4.0,
"eval_BQ_loss": 0.013757712207734585,
"eval_BQ_runtime": 3.4539,
"eval_BQ_samples_per_second": 2895.245,
"eval_BQ_steps_per_second": 22.872,
"step": 30164
},
{
"epoch": 4.0,
"eval_Cmnli_loss": 0.015052303671836853,
"eval_Cmnli_runtime": 3.7534,
"eval_Cmnli_samples_per_second": 2215.324,
"eval_Cmnli_steps_per_second": 17.318,
"step": 30164
},
{
"epoch": 4.0,
"eval_LCQMC_loss": 0.019115839153528214,
"eval_LCQMC_runtime": 2.3344,
"eval_LCQMC_samples_per_second": 3770.582,
"eval_LCQMC_steps_per_second": 29.558,
"step": 30164
},
{
"epoch": 4.0,
"eval_Ocnli_loss": 0.015967663377523422,
"eval_Ocnli_runtime": 0.603,
"eval_Ocnli_samples_per_second": 3063.145,
"eval_Ocnli_steps_per_second": 24.877,
"step": 30164
},
{
"epoch": 4.0,
"eval_PAWSX_loss": 0.04721539840102196,
"eval_PAWSX_runtime": 0.9557,
"eval_PAWSX_samples_per_second": 2092.706,
"eval_PAWSX_steps_per_second": 16.742,
"step": 30164
},
{
"epoch": 4.0,
"eval_QBQTC_loss": 9.168432235717773,
"eval_QBQTC_runtime": 7.6741,
"eval_QBQTC_samples_per_second": 2606.178,
"eval_QBQTC_steps_per_second": 20.458,
"step": 30164
},
{
"epoch": 4.0,
"eval_STS-B_loss": 9.83575439453125,
"eval_STS-B_runtime": 0.5632,
"eval_STS-B_samples_per_second": 2588.593,
"eval_STS-B_steps_per_second": 21.305,
"step": 30164
},
{
"epoch": 5.0,
"grad_norm": 0.13791993260383606,
"learning_rate": 1.481822736068682e-05,
"loss": 1.4141,
"step": 37705
},
{
"epoch": 5.0,
"eval_AFQMC_loss": 0.0204468946903944,
"eval_AFQMC_runtime": 99.1315,
"eval_AFQMC_samples_per_second": 43.538,
"eval_AFQMC_steps_per_second": 0.343,
"eval_emb_eval_pearson_cosine": 0.6315631932372178,
"eval_emb_eval_spearman_cosine": 0.6690982617504025,
"step": 37705
},
{
"epoch": 5.0,
"eval_ATEC_loss": 0.014196612872183323,
"eval_ATEC_runtime": 6.5521,
"eval_ATEC_samples_per_second": 3052.435,
"eval_ATEC_steps_per_second": 23.962,
"step": 37705
},
{
"epoch": 5.0,
"eval_BQ_loss": 0.013593867421150208,
"eval_BQ_runtime": 3.4128,
"eval_BQ_samples_per_second": 2930.177,
"eval_BQ_steps_per_second": 23.148,
"step": 37705
},
{
"epoch": 5.0,
"eval_Cmnli_loss": 0.015099190175533295,
"eval_Cmnli_runtime": 3.8007,
"eval_Cmnli_samples_per_second": 2187.75,
"eval_Cmnli_steps_per_second": 17.102,
"step": 37705
},
{
"epoch": 5.0,
"eval_LCQMC_loss": 0.017175855115056038,
"eval_LCQMC_runtime": 2.4095,
"eval_LCQMC_samples_per_second": 3653.013,
"eval_LCQMC_steps_per_second": 28.636,
"step": 37705
},
{
"epoch": 5.0,
"eval_Ocnli_loss": 0.015738315880298615,
"eval_Ocnli_runtime": 0.619,
"eval_Ocnli_samples_per_second": 2983.626,
"eval_Ocnli_steps_per_second": 24.231,
"step": 37705
},
{
"epoch": 5.0,
"eval_PAWSX_loss": 0.046284567564725876,
"eval_PAWSX_runtime": 0.959,
"eval_PAWSX_samples_per_second": 2085.505,
"eval_PAWSX_steps_per_second": 16.684,
"step": 37705
},
{
"epoch": 5.0,
"eval_QBQTC_loss": 9.86181640625,
"eval_QBQTC_runtime": 7.9334,
"eval_QBQTC_samples_per_second": 2520.974,
"eval_QBQTC_steps_per_second": 19.79,
"step": 37705
},
{
"epoch": 5.0,
"eval_STS-B_loss": 9.911561965942383,
"eval_STS-B_runtime": 0.5701,
"eval_STS-B_samples_per_second": 2557.501,
"eval_STS-B_steps_per_second": 21.049,
"step": 37705
},
{
"epoch": 6.0,
"grad_norm": 0.17065556347370148,
"learning_rate": 1.3337327976582224e-05,
"loss": 1.3461,
"step": 45246
},
{
"epoch": 6.0,
"eval_AFQMC_loss": 0.02002647891640663,
"eval_AFQMC_runtime": 98.4869,
"eval_AFQMC_samples_per_second": 43.823,
"eval_AFQMC_steps_per_second": 0.345,
"eval_emb_eval_pearson_cosine": 0.6436085641934666,
"eval_emb_eval_spearman_cosine": 0.6715343325157711,
"step": 45246
},
{
"epoch": 6.0,
"eval_ATEC_loss": 0.014070386998355389,
"eval_ATEC_runtime": 6.5822,
"eval_ATEC_samples_per_second": 3038.48,
"eval_ATEC_steps_per_second": 23.852,
"step": 45246
},
{
"epoch": 6.0,
"eval_BQ_loss": 0.013585967943072319,
"eval_BQ_runtime": 3.6468,
"eval_BQ_samples_per_second": 2742.113,
"eval_BQ_steps_per_second": 21.663,
"step": 45246
},
{
"epoch": 6.0,
"eval_Cmnli_loss": 0.01478442084044218,
"eval_Cmnli_runtime": 3.7572,
"eval_Cmnli_samples_per_second": 2213.061,
"eval_Cmnli_steps_per_second": 17.3,
"step": 45246
},
{
"epoch": 6.0,
"eval_LCQMC_loss": 0.016025548800826073,
"eval_LCQMC_runtime": 2.3232,
"eval_LCQMC_samples_per_second": 3788.767,
"eval_LCQMC_steps_per_second": 29.701,
"step": 45246
},
{
"epoch": 6.0,
"eval_Ocnli_loss": 0.015464934520423412,
"eval_Ocnli_runtime": 0.6071,
"eval_Ocnli_samples_per_second": 3042.383,
"eval_Ocnli_steps_per_second": 24.708,
"step": 45246
},
{
"epoch": 6.0,
"eval_PAWSX_loss": 0.04284413903951645,
"eval_PAWSX_runtime": 0.9891,
"eval_PAWSX_samples_per_second": 2021.991,
"eval_PAWSX_steps_per_second": 16.176,
"step": 45246
},
{
"epoch": 6.0,
"eval_QBQTC_loss": 10.840497016906738,
"eval_QBQTC_runtime": 7.7281,
"eval_QBQTC_samples_per_second": 2587.958,
"eval_QBQTC_steps_per_second": 20.315,
"step": 45246
},
{
"epoch": 6.0,
"eval_STS-B_loss": 9.943366050720215,
"eval_STS-B_runtime": 0.5591,
"eval_STS-B_samples_per_second": 2607.892,
"eval_STS-B_steps_per_second": 21.464,
"step": 45246
},
{
"epoch": 7.0,
"grad_norm": 0.13601745665073395,
"learning_rate": 1.185642859247763e-05,
"loss": 1.2856,
"step": 52787
},
{
"epoch": 7.0,
"eval_AFQMC_loss": 0.019535699859261513,
"eval_AFQMC_runtime": 99.2494,
"eval_AFQMC_samples_per_second": 43.486,
"eval_AFQMC_steps_per_second": 0.343,
"eval_emb_eval_pearson_cosine": 0.6473204770905479,
"eval_emb_eval_spearman_cosine": 0.6723246242283224,
"step": 52787
},
{
"epoch": 7.0,
"eval_ATEC_loss": 0.013532574288547039,
"eval_ATEC_runtime": 6.5765,
"eval_ATEC_samples_per_second": 3041.133,
"eval_ATEC_steps_per_second": 23.873,
"step": 52787
},
{
"epoch": 7.0,
"eval_BQ_loss": 0.013674370013177395,
"eval_BQ_runtime": 3.4566,
"eval_BQ_samples_per_second": 2892.978,
"eval_BQ_steps_per_second": 22.855,
"step": 52787
},
{
"epoch": 7.0,
"eval_Cmnli_loss": 0.014647725969552994,
"eval_Cmnli_runtime": 3.7546,
"eval_Cmnli_samples_per_second": 2214.61,
"eval_Cmnli_steps_per_second": 17.312,
"step": 52787
},
{
"epoch": 7.0,
"eval_LCQMC_loss": 0.0150056267157197,
"eval_LCQMC_runtime": 2.3629,
"eval_LCQMC_samples_per_second": 3725.146,
"eval_LCQMC_steps_per_second": 29.202,
"step": 52787
},
{
"epoch": 7.0,
"eval_Ocnli_loss": 0.015559999272227287,
"eval_Ocnli_runtime": 0.6231,
"eval_Ocnli_samples_per_second": 2964.212,
"eval_Ocnli_steps_per_second": 24.073,
"step": 52787
},
{
"epoch": 7.0,
"eval_PAWSX_loss": 0.04360537603497505,
"eval_PAWSX_runtime": 0.9535,
"eval_PAWSX_samples_per_second": 2097.49,
"eval_PAWSX_steps_per_second": 16.78,
"step": 52787
},
{
"epoch": 7.0,
"eval_QBQTC_loss": 11.554633140563965,
"eval_QBQTC_runtime": 7.7436,
"eval_QBQTC_samples_per_second": 2582.791,
"eval_QBQTC_steps_per_second": 20.275,
"step": 52787
},
{
"epoch": 7.0,
"eval_STS-B_loss": 10.235774993896484,
"eval_STS-B_runtime": 0.5543,
"eval_STS-B_samples_per_second": 2630.224,
"eval_STS-B_steps_per_second": 21.648,
"step": 52787
},
{
"epoch": 8.0,
"grad_norm": 0.11353704333305359,
"learning_rate": 1.0375332750508335e-05,
"loss": 1.2182,
"step": 60328
},
{
"epoch": 8.0,
"eval_AFQMC_loss": 0.019351305440068245,
"eval_AFQMC_runtime": 99.553,
"eval_AFQMC_samples_per_second": 43.354,
"eval_AFQMC_steps_per_second": 0.342,
"eval_emb_eval_pearson_cosine": 0.6494222678001456,
"eval_emb_eval_spearman_cosine": 0.6703225826686093,
"step": 60328
},
{
"epoch": 8.0,
"eval_ATEC_loss": 0.013181576505303383,
"eval_ATEC_runtime": 6.522,
"eval_ATEC_samples_per_second": 3066.526,
"eval_ATEC_steps_per_second": 24.072,
"step": 60328
},
{
"epoch": 8.0,
"eval_BQ_loss": 0.013695988804101944,
"eval_BQ_runtime": 3.4134,
"eval_BQ_samples_per_second": 2929.642,
"eval_BQ_steps_per_second": 23.144,
"step": 60328
},
{
"epoch": 8.0,
"eval_Cmnli_loss": 0.014423331245779991,
"eval_Cmnli_runtime": 3.7623,
"eval_Cmnli_samples_per_second": 2210.105,
"eval_Cmnli_steps_per_second": 17.277,
"step": 60328
},
{
"epoch": 8.0,
"eval_LCQMC_loss": 0.015052268281579018,
"eval_LCQMC_runtime": 2.3257,
"eval_LCQMC_samples_per_second": 3784.705,
"eval_LCQMC_steps_per_second": 29.669,
"step": 60328
},
{
"epoch": 8.0,
"eval_Ocnli_loss": 0.015219747088849545,
"eval_Ocnli_runtime": 0.6121,
"eval_Ocnli_samples_per_second": 3017.401,
"eval_Ocnli_steps_per_second": 24.505,
"step": 60328
},
{
"epoch": 8.0,
"eval_PAWSX_loss": 0.042021822184324265,
"eval_PAWSX_runtime": 0.9526,
"eval_PAWSX_samples_per_second": 2099.597,
"eval_PAWSX_steps_per_second": 16.797,
"step": 60328
},
{
"epoch": 8.0,
"eval_QBQTC_loss": 12.265329360961914,
"eval_QBQTC_runtime": 7.6635,
"eval_QBQTC_samples_per_second": 2609.768,
"eval_QBQTC_steps_per_second": 20.487,
"step": 60328
},
{
"epoch": 8.0,
"eval_STS-B_loss": 10.596503257751465,
"eval_STS-B_runtime": 0.5656,
"eval_STS-B_samples_per_second": 2577.705,
"eval_STS-B_steps_per_second": 21.216,
"step": 60328
},
{
"epoch": 9.0,
"grad_norm": 0.10075096040964127,
"learning_rate": 8.89443336640374e-06,
"loss": 1.1546,
"step": 67869
},
{
"epoch": 9.0,
"eval_AFQMC_loss": 0.018916338682174683,
"eval_AFQMC_runtime": 99.1097,
"eval_AFQMC_samples_per_second": 43.548,
"eval_AFQMC_steps_per_second": 0.343,
"eval_emb_eval_pearson_cosine": 0.6461004194477242,
"eval_emb_eval_spearman_cosine": 0.6620616757187626,
"step": 67869
},
{
"epoch": 9.0,
"eval_ATEC_loss": 0.01316931750625372,
"eval_ATEC_runtime": 6.5342,
"eval_ATEC_samples_per_second": 3060.808,
"eval_ATEC_steps_per_second": 24.027,
"step": 67869
},
{
"epoch": 9.0,
"eval_BQ_loss": 0.013916433788836002,
"eval_BQ_runtime": 3.4228,
"eval_BQ_samples_per_second": 2921.626,
"eval_BQ_steps_per_second": 23.081,
"step": 67869
},
{
"epoch": 9.0,
"eval_Cmnli_loss": 0.014616083353757858,
"eval_Cmnli_runtime": 3.7166,
"eval_Cmnli_samples_per_second": 2237.257,
"eval_Cmnli_steps_per_second": 17.489,
"step": 67869
},
{
"epoch": 9.0,
"eval_LCQMC_loss": 0.014678360894322395,
"eval_LCQMC_runtime": 2.3376,
"eval_LCQMC_samples_per_second": 3765.326,
"eval_LCQMC_steps_per_second": 29.517,
"step": 67869
},
{
"epoch": 9.0,
"eval_Ocnli_loss": 0.015178242698311806,
"eval_Ocnli_runtime": 0.6061,
"eval_Ocnli_samples_per_second": 3047.588,
"eval_Ocnli_steps_per_second": 24.75,
"step": 67869
},
{
"epoch": 9.0,
"eval_PAWSX_loss": 0.04116755351424217,
"eval_PAWSX_runtime": 0.9482,
"eval_PAWSX_samples_per_second": 2109.261,
"eval_PAWSX_steps_per_second": 16.874,
"step": 67869
},
{
"epoch": 9.0,
"eval_QBQTC_loss": 12.928996086120605,
"eval_QBQTC_runtime": 7.7135,
"eval_QBQTC_samples_per_second": 2592.855,
"eval_QBQTC_steps_per_second": 20.354,
"step": 67869
},
{
"epoch": 9.0,
"eval_STS-B_loss": 10.769329071044922,
"eval_STS-B_runtime": 0.557,
"eval_STS-B_samples_per_second": 2617.49,
"eval_STS-B_steps_per_second": 21.543,
"step": 67869
},
{
"epoch": 10.0,
"grad_norm": 116.47360229492188,
"learning_rate": 7.413533982299147e-06,
"loss": 1.091,
"step": 75410
},
{
"epoch": 10.0,
"eval_AFQMC_loss": 0.01876525580883026,
"eval_AFQMC_runtime": 106.9735,
"eval_AFQMC_samples_per_second": 40.346,
"eval_AFQMC_steps_per_second": 0.318,
"eval_emb_eval_pearson_cosine": 0.6425398472325708,
"eval_emb_eval_spearman_cosine": 0.6571917650236837,
"step": 75410
},
{
"epoch": 10.0,
"eval_ATEC_loss": 0.012936480343341827,
"eval_ATEC_runtime": 6.5457,
"eval_ATEC_samples_per_second": 3055.455,
"eval_ATEC_steps_per_second": 23.985,
"step": 75410
},
{
"epoch": 10.0,
"eval_BQ_loss": 0.014092645607888699,
"eval_BQ_runtime": 3.4559,
"eval_BQ_samples_per_second": 2893.566,
"eval_BQ_steps_per_second": 22.859,
"step": 75410
},
{
"epoch": 10.0,
"eval_Cmnli_loss": 0.01457986794412136,
"eval_Cmnli_runtime": 3.7514,
"eval_Cmnli_samples_per_second": 2216.51,
"eval_Cmnli_steps_per_second": 17.327,
"step": 75410
},
{
"epoch": 10.0,
"eval_LCQMC_loss": 0.014272717759013176,
"eval_LCQMC_runtime": 2.3604,
"eval_LCQMC_samples_per_second": 3729.02,
"eval_LCQMC_steps_per_second": 29.232,
"step": 75410
},
{
"epoch": 10.0,
"eval_Ocnli_loss": 0.015037407167255878,
"eval_Ocnli_runtime": 0.6116,
"eval_Ocnli_samples_per_second": 3020.096,
"eval_Ocnli_steps_per_second": 24.527,
"step": 75410
},
{
"epoch": 10.0,
"eval_PAWSX_loss": 0.04113338142633438,
"eval_PAWSX_runtime": 0.9594,
"eval_PAWSX_samples_per_second": 2084.624,
"eval_PAWSX_steps_per_second": 16.677,
"step": 75410
},
{
"epoch": 10.0,
"eval_QBQTC_loss": 13.890397071838379,
"eval_QBQTC_runtime": 7.674,
"eval_QBQTC_samples_per_second": 2606.201,
"eval_QBQTC_steps_per_second": 20.459,
"step": 75410
},
{
"epoch": 10.0,
"eval_STS-B_loss": 10.956206321716309,
"eval_STS-B_runtime": 0.5649,
"eval_STS-B_samples_per_second": 2580.888,
"eval_STS-B_steps_per_second": 21.242,
"step": 75410
},
{
"epoch": 11.0,
"grad_norm": 0.15993598103523254,
"learning_rate": 5.933027513923951e-06,
"loss": 1.0267,
"step": 82951
},
{
"epoch": 11.0,
"eval_AFQMC_loss": 0.01858402043581009,
"eval_AFQMC_runtime": 99.2423,
"eval_AFQMC_samples_per_second": 43.49,
"eval_AFQMC_steps_per_second": 0.343,
"eval_emb_eval_pearson_cosine": 0.635671675332461,
"eval_emb_eval_spearman_cosine": 0.6462278753331322,
"step": 82951
},
{
"epoch": 11.0,
"eval_ATEC_loss": 0.012706396169960499,
"eval_ATEC_runtime": 6.5392,
"eval_ATEC_samples_per_second": 3058.477,
"eval_ATEC_steps_per_second": 24.009,
"step": 82951
},
{
"epoch": 11.0,
"eval_BQ_loss": 0.014143843203783035,
"eval_BQ_runtime": 3.3932,
"eval_BQ_samples_per_second": 2947.098,
"eval_BQ_steps_per_second": 23.282,
"step": 82951
},
{
"epoch": 11.0,
"eval_Cmnli_loss": 0.014601893723011017,
"eval_Cmnli_runtime": 3.7177,
"eval_Cmnli_samples_per_second": 2236.627,
"eval_Cmnli_steps_per_second": 17.484,
"step": 82951
},
{
"epoch": 11.0,
"eval_LCQMC_loss": 0.01407212857156992,
"eval_LCQMC_runtime": 2.3166,
"eval_LCQMC_samples_per_second": 3799.506,
"eval_LCQMC_steps_per_second": 29.785,
"step": 82951
},
{
"epoch": 11.0,
"eval_Ocnli_loss": 0.015255914069712162,
"eval_Ocnli_runtime": 0.6096,
"eval_Ocnli_samples_per_second": 3029.719,
"eval_Ocnli_steps_per_second": 24.605,
"step": 82951
},
{
"epoch": 11.0,
"eval_PAWSX_loss": 0.04009222611784935,
"eval_PAWSX_runtime": 0.9569,
"eval_PAWSX_samples_per_second": 2090.135,
"eval_PAWSX_steps_per_second": 16.721,
"step": 82951
},
{
"epoch": 11.0,
"eval_QBQTC_loss": 14.71971607208252,
"eval_QBQTC_runtime": 7.651,
"eval_QBQTC_samples_per_second": 2614.025,
"eval_QBQTC_steps_per_second": 20.52,
"step": 82951
},
{
"epoch": 11.0,
"eval_STS-B_loss": 11.138467788696289,
"eval_STS-B_runtime": 0.5605,
"eval_STS-B_samples_per_second": 2601.08,
"eval_STS-B_steps_per_second": 21.408,
"step": 82951
},
{
"epoch": 12.0,
"grad_norm": 0.22925728559494019,
"learning_rate": 4.451735214089958e-06,
"loss": 0.9688,
"step": 90492
},
{
"epoch": 12.0,
"eval_AFQMC_loss": 0.018353162333369255,
"eval_AFQMC_runtime": 98.1243,
"eval_AFQMC_samples_per_second": 43.985,
"eval_AFQMC_steps_per_second": 0.346,
"eval_emb_eval_pearson_cosine": 0.6340788346277473,
"eval_emb_eval_spearman_cosine": 0.6444001384260496,
"step": 90492
},
{
"epoch": 12.0,
"eval_ATEC_loss": 0.012524303048849106,
"eval_ATEC_runtime": 6.5509,
"eval_ATEC_samples_per_second": 3052.999,
"eval_ATEC_steps_per_second": 23.966,
"step": 90492
},
{
"epoch": 12.0,
"eval_BQ_loss": 0.014260655269026756,
"eval_BQ_runtime": 3.4399,
"eval_BQ_samples_per_second": 2907.038,
"eval_BQ_steps_per_second": 22.966,
"step": 90492
},
{
"epoch": 12.0,
"eval_Cmnli_loss": 0.014628582634031773,
"eval_Cmnli_runtime": 3.7252,
"eval_Cmnli_samples_per_second": 2232.102,
"eval_Cmnli_steps_per_second": 17.449,
"step": 90492
},
{
"epoch": 12.0,
"eval_LCQMC_loss": 0.013899387791752815,
"eval_LCQMC_runtime": 2.349,
"eval_LCQMC_samples_per_second": 3747.087,
"eval_LCQMC_steps_per_second": 29.374,
"step": 90492
},
{
"epoch": 12.0,
"eval_Ocnli_loss": 0.015396999195218086,
"eval_Ocnli_runtime": 0.6119,
"eval_Ocnli_samples_per_second": 3018.253,
"eval_Ocnli_steps_per_second": 24.512,
"step": 90492
},
{
"epoch": 12.0,
"eval_PAWSX_loss": 0.04036952182650566,
"eval_PAWSX_runtime": 0.9649,
"eval_PAWSX_samples_per_second": 2072.698,
"eval_PAWSX_steps_per_second": 16.582,
"step": 90492
},
{
"epoch": 12.0,
"eval_QBQTC_loss": 15.251015663146973,
"eval_QBQTC_runtime": 7.662,
"eval_QBQTC_samples_per_second": 2610.3,
"eval_QBQTC_steps_per_second": 20.491,
"step": 90492
},
{
"epoch": 12.0,
"eval_STS-B_loss": 11.509617805480957,
"eval_STS-B_runtime": 0.5625,
"eval_STS-B_samples_per_second": 2592.013,
"eval_STS-B_steps_per_second": 21.333,
"step": 90492
}
],
"logging_steps": 500,
"max_steps": 113115,
"num_input_tokens_seen": 0,
"num_train_epochs": 15,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 128,
"trial_name": null,
"trial_params": null
}