{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 12.0,
  "eval_steps": 500,
  "global_step": 90492,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 1.0,
      "grad_norm": 0.18677596747875214,
      "learning_rate": 1.3320367751060821e-05,
      "loss": 1.6931,
      "step": 7541
    },
    {
      "epoch": 1.0,
      "eval_AFQMC_loss": 0.02504170872271061,
      "eval_AFQMC_runtime": 99.0724,
      "eval_AFQMC_samples_per_second": 43.564,
      "eval_AFQMC_steps_per_second": 0.343,
      "eval_emb_eval_pearson_cosine": 0.49435896960848713,
      "eval_emb_eval_spearman_cosine": 0.5280909639875639,
      "step": 7541
    },
    {
      "epoch": 1.0,
      "eval_ATEC_loss": 0.01953265070915222,
      "eval_ATEC_runtime": 6.6017,
      "eval_ATEC_samples_per_second": 3029.521,
      "eval_ATEC_steps_per_second": 23.782,
      "step": 7541
    },
    {
      "epoch": 1.0,
      "eval_BQ_loss": 0.01803545467555523,
      "eval_BQ_runtime": 3.4373,
      "eval_BQ_samples_per_second": 2909.235,
      "eval_BQ_steps_per_second": 22.983,
      "step": 7541
    },
    {
      "epoch": 1.0,
      "eval_Cmnli_loss": 0.020238544791936874,
      "eval_Cmnli_runtime": 3.9735,
      "eval_Cmnli_samples_per_second": 2092.612,
      "eval_Cmnli_steps_per_second": 16.358,
      "step": 7541
    },
    {
      "epoch": 1.0,
      "eval_LCQMC_loss": 0.03518352285027504,
      "eval_LCQMC_runtime": 2.3275,
      "eval_LCQMC_samples_per_second": 3781.698,
      "eval_LCQMC_steps_per_second": 29.645,
      "step": 7541
    },
    {
      "epoch": 1.0,
      "eval_Ocnli_loss": 0.025195566937327385,
      "eval_Ocnli_runtime": 0.6266,
      "eval_Ocnli_samples_per_second": 2947.795,
      "eval_Ocnli_steps_per_second": 23.94,
      "step": 7541
    },
    {
      "epoch": 1.0,
      "eval_PAWSX_loss": 0.067341148853302,
      "eval_PAWSX_runtime": 0.9538,
      "eval_PAWSX_samples_per_second": 2096.982,
      "eval_PAWSX_steps_per_second": 16.776,
      "step": 7541
    },
    {
      "epoch": 1.0,
      "eval_QBQTC_loss": 8.280766487121582,
      "eval_QBQTC_runtime": 7.6679,
      "eval_QBQTC_samples_per_second": 2608.269,
      "eval_QBQTC_steps_per_second": 20.475,
      "step": 7541
    },
    {
      "epoch": 1.0,
      "eval_STS-B_loss": 8.6417875289917,
      "eval_STS-B_runtime": 0.5653,
      "eval_STS-B_samples_per_second": 2579.365,
      "eval_STS-B_steps_per_second": 21.229,
      "step": 7541
    },
    {
      "epoch": 2.0,
      "grad_norm": 0.29912641644477844,
      "learning_rate": 1.92609255130006e-05,
      "loss": 1.5717,
      "step": 15082
    },
    {
      "epoch": 2.0,
      "eval_AFQMC_loss": 0.023414788767695427,
      "eval_AFQMC_runtime": 99.0838,
      "eval_AFQMC_samples_per_second": 43.559,
      "eval_AFQMC_steps_per_second": 0.343,
      "eval_emb_eval_pearson_cosine": 0.541482089845575,
      "eval_emb_eval_spearman_cosine": 0.584542245914602,
      "step": 15082
    },
    {
      "epoch": 2.0,
      "eval_ATEC_loss": 0.01733938232064247,
      "eval_ATEC_runtime": 6.5234,
      "eval_ATEC_samples_per_second": 3065.879,
      "eval_ATEC_steps_per_second": 24.067,
      "step": 15082
    },
    {
      "epoch": 2.0,
      "eval_BQ_loss": 0.015580276027321815,
      "eval_BQ_runtime": 3.419,
      "eval_BQ_samples_per_second": 2924.795,
      "eval_BQ_steps_per_second": 23.106,
      "step": 15082
    },
    {
      "epoch": 2.0,
      "eval_Cmnli_loss": 0.016321830451488495,
      "eval_Cmnli_runtime": 3.7867,
      "eval_Cmnli_samples_per_second": 2195.824,
      "eval_Cmnli_steps_per_second": 17.165,
      "step": 15082
    },
    {
      "epoch": 2.0,
      "eval_LCQMC_loss": 0.02696206048130989,
      "eval_LCQMC_runtime": 2.3672,
      "eval_LCQMC_samples_per_second": 3718.264,
      "eval_LCQMC_steps_per_second": 29.148,
      "step": 15082
    },
    {
      "epoch": 2.0,
      "eval_Ocnli_loss": 0.018722666427493095,
      "eval_Ocnli_runtime": 0.6105,
      "eval_Ocnli_samples_per_second": 3025.526,
      "eval_Ocnli_steps_per_second": 24.571,
      "step": 15082
    },
    {
      "epoch": 2.0,
      "eval_PAWSX_loss": 0.06007164344191551,
      "eval_PAWSX_runtime": 0.9605,
      "eval_PAWSX_samples_per_second": 2082.25,
      "eval_PAWSX_steps_per_second": 16.658,
      "step": 15082
    },
    {
      "epoch": 2.0,
      "eval_QBQTC_loss": 8.381814002990723,
      "eval_QBQTC_runtime": 7.644,
      "eval_QBQTC_samples_per_second": 2616.427,
      "eval_QBQTC_steps_per_second": 20.539,
      "step": 15082
    },
    {
      "epoch": 2.0,
      "eval_STS-B_loss": 9.617568969726562,
      "eval_STS-B_runtime": 0.5589,
      "eval_STS-B_samples_per_second": 2608.657,
      "eval_STS-B_steps_per_second": 21.47,
      "step": 15082
    },
    {
      "epoch": 3.0,
      "grad_norm": 0.2338990569114685,
      "learning_rate": 1.7779829671031307e-05,
      "loss": 1.5301,
      "step": 22623
    },
    {
      "epoch": 3.0,
      "eval_AFQMC_loss": 0.02241475135087967,
      "eval_AFQMC_runtime": 99.9714,
      "eval_AFQMC_samples_per_second": 43.172,
      "eval_AFQMC_steps_per_second": 0.34,
      "eval_emb_eval_pearson_cosine": 0.5913082581080403,
      "eval_emb_eval_spearman_cosine": 0.6383284889220768,
      "step": 22623
    },
    {
      "epoch": 3.0,
      "eval_ATEC_loss": 0.016517719253897667,
      "eval_ATEC_runtime": 6.5842,
      "eval_ATEC_samples_per_second": 3037.594,
      "eval_ATEC_steps_per_second": 23.845,
      "step": 22623
    },
    {
      "epoch": 3.0,
      "eval_BQ_loss": 0.014284521341323853,
      "eval_BQ_runtime": 3.406,
      "eval_BQ_samples_per_second": 2936.005,
      "eval_BQ_steps_per_second": 23.194,
      "step": 22623
    },
    {
      "epoch": 3.0,
      "eval_Cmnli_loss": 0.01529396791011095,
      "eval_Cmnli_runtime": 3.7388,
      "eval_Cmnli_samples_per_second": 2223.987,
      "eval_Cmnli_steps_per_second": 17.385,
      "step": 22623
    },
    {
      "epoch": 3.0,
      "eval_LCQMC_loss": 0.020633986219763756,
      "eval_LCQMC_runtime": 2.3364,
      "eval_LCQMC_samples_per_second": 3767.314,
      "eval_LCQMC_steps_per_second": 29.532,
      "step": 22623
    },
    {
      "epoch": 3.0,
      "eval_Ocnli_loss": 0.016242269426584244,
      "eval_Ocnli_runtime": 0.6209,
      "eval_Ocnli_samples_per_second": 2974.536,
      "eval_Ocnli_steps_per_second": 24.157,
      "step": 22623
    },
    {
      "epoch": 3.0,
      "eval_PAWSX_loss": 0.05045855790376663,
      "eval_PAWSX_runtime": 0.9593,
      "eval_PAWSX_samples_per_second": 2084.846,
      "eval_PAWSX_steps_per_second": 16.679,
      "step": 22623
    },
    {
      "epoch": 3.0,
      "eval_QBQTC_loss": 8.840312957763672,
      "eval_QBQTC_runtime": 7.6443,
      "eval_QBQTC_samples_per_second": 2616.334,
      "eval_QBQTC_steps_per_second": 20.538,
      "step": 22623
    },
    {
      "epoch": 3.0,
      "eval_STS-B_loss": 9.837035179138184,
      "eval_STS-B_runtime": 0.5658,
      "eval_STS-B_samples_per_second": 2577.011,
      "eval_STS-B_steps_per_second": 21.21,
      "step": 22623
    },
    {
      "epoch": 4.0,
      "grad_norm": 0.1766250729560852,
      "learning_rate": 1.629912674479141e-05,
      "loss": 1.4768,
      "step": 30164
    },
    {
      "epoch": 4.0,
      "eval_AFQMC_loss": 0.021405402570962906,
      "eval_AFQMC_runtime": 100.3018,
      "eval_AFQMC_samples_per_second": 43.03,
      "eval_AFQMC_steps_per_second": 0.339,
      "eval_emb_eval_pearson_cosine": 0.6125367634857398,
      "eval_emb_eval_spearman_cosine": 0.6570670709310811,
      "step": 30164
    },
    {
      "epoch": 4.0,
      "eval_ATEC_loss": 0.015135602094233036,
      "eval_ATEC_runtime": 6.5473,
      "eval_ATEC_samples_per_second": 3054.678,
      "eval_ATEC_steps_per_second": 23.979,
      "step": 30164
    },
    {
      "epoch": 4.0,
      "eval_BQ_loss": 0.013757712207734585,
      "eval_BQ_runtime": 3.4539,
      "eval_BQ_samples_per_second": 2895.245,
      "eval_BQ_steps_per_second": 22.872,
      "step": 30164
    },
    {
      "epoch": 4.0,
      "eval_Cmnli_loss": 0.015052303671836853,
      "eval_Cmnli_runtime": 3.7534,
      "eval_Cmnli_samples_per_second": 2215.324,
      "eval_Cmnli_steps_per_second": 17.318,
      "step": 30164
    },
    {
      "epoch": 4.0,
      "eval_LCQMC_loss": 0.019115839153528214,
      "eval_LCQMC_runtime": 2.3344,
      "eval_LCQMC_samples_per_second": 3770.582,
      "eval_LCQMC_steps_per_second": 29.558,
      "step": 30164
    },
    {
      "epoch": 4.0,
      "eval_Ocnli_loss": 0.015967663377523422,
      "eval_Ocnli_runtime": 0.603,
      "eval_Ocnli_samples_per_second": 3063.145,
      "eval_Ocnli_steps_per_second": 24.877,
      "step": 30164
    },
    {
      "epoch": 4.0,
      "eval_PAWSX_loss": 0.04721539840102196,
      "eval_PAWSX_runtime": 0.9557,
      "eval_PAWSX_samples_per_second": 2092.706,
      "eval_PAWSX_steps_per_second": 16.742,
      "step": 30164
    },
    {
      "epoch": 4.0,
      "eval_QBQTC_loss": 9.168432235717773,
      "eval_QBQTC_runtime": 7.6741,
      "eval_QBQTC_samples_per_second": 2606.178,
      "eval_QBQTC_steps_per_second": 20.458,
      "step": 30164
    },
    {
      "epoch": 4.0,
      "eval_STS-B_loss": 9.83575439453125,
      "eval_STS-B_runtime": 0.5632,
      "eval_STS-B_samples_per_second": 2588.593,
      "eval_STS-B_steps_per_second": 21.305,
      "step": 30164
    },
    {
      "epoch": 5.0,
      "grad_norm": 0.13791993260383606,
      "learning_rate": 1.481822736068682e-05,
      "loss": 1.4141,
      "step": 37705
    },
    {
      "epoch": 5.0,
      "eval_AFQMC_loss": 0.0204468946903944,
      "eval_AFQMC_runtime": 99.1315,
      "eval_AFQMC_samples_per_second": 43.538,
      "eval_AFQMC_steps_per_second": 0.343,
      "eval_emb_eval_pearson_cosine": 0.6315631932372178,
      "eval_emb_eval_spearman_cosine": 0.6690982617504025,
      "step": 37705
    },
    {
      "epoch": 5.0,
      "eval_ATEC_loss": 0.014196612872183323,
      "eval_ATEC_runtime": 6.5521,
      "eval_ATEC_samples_per_second": 3052.435,
      "eval_ATEC_steps_per_second": 23.962,
      "step": 37705
    },
    {
      "epoch": 5.0,
      "eval_BQ_loss": 0.013593867421150208,
      "eval_BQ_runtime": 3.4128,
      "eval_BQ_samples_per_second": 2930.177,
      "eval_BQ_steps_per_second": 23.148,
      "step": 37705
    },
    {
      "epoch": 5.0,
      "eval_Cmnli_loss": 0.015099190175533295,
      "eval_Cmnli_runtime": 3.8007,
      "eval_Cmnli_samples_per_second": 2187.75,
      "eval_Cmnli_steps_per_second": 17.102,
      "step": 37705
    },
    {
      "epoch": 5.0,
      "eval_LCQMC_loss": 0.017175855115056038,
      "eval_LCQMC_runtime": 2.4095,
      "eval_LCQMC_samples_per_second": 3653.013,
      "eval_LCQMC_steps_per_second": 28.636,
      "step": 37705
    },
    {
      "epoch": 5.0,
      "eval_Ocnli_loss": 0.015738315880298615,
      "eval_Ocnli_runtime": 0.619,
      "eval_Ocnli_samples_per_second": 2983.626,
      "eval_Ocnli_steps_per_second": 24.231,
      "step": 37705
    },
    {
      "epoch": 5.0,
      "eval_PAWSX_loss": 0.046284567564725876,
      "eval_PAWSX_runtime": 0.959,
      "eval_PAWSX_samples_per_second": 2085.505,
      "eval_PAWSX_steps_per_second": 16.684,
      "step": 37705
    },
    {
      "epoch": 5.0,
      "eval_QBQTC_loss": 9.86181640625,
      "eval_QBQTC_runtime": 7.9334,
      "eval_QBQTC_samples_per_second": 2520.974,
      "eval_QBQTC_steps_per_second": 19.79,
      "step": 37705
    },
    {
      "epoch": 5.0,
      "eval_STS-B_loss": 9.911561965942383,
      "eval_STS-B_runtime": 0.5701,
      "eval_STS-B_samples_per_second": 2557.501,
      "eval_STS-B_steps_per_second": 21.049,
      "step": 37705
    },
    {
      "epoch": 6.0,
      "grad_norm": 0.17065556347370148,
      "learning_rate": 1.3337327976582224e-05,
      "loss": 1.3461,
      "step": 45246
    },
    {
      "epoch": 6.0,
      "eval_AFQMC_loss": 0.02002647891640663,
      "eval_AFQMC_runtime": 98.4869,
      "eval_AFQMC_samples_per_second": 43.823,
      "eval_AFQMC_steps_per_second": 0.345,
      "eval_emb_eval_pearson_cosine": 0.6436085641934666,
      "eval_emb_eval_spearman_cosine": 0.6715343325157711,
      "step": 45246
    },
    {
      "epoch": 6.0,
      "eval_ATEC_loss": 0.014070386998355389,
      "eval_ATEC_runtime": 6.5822,
      "eval_ATEC_samples_per_second": 3038.48,
      "eval_ATEC_steps_per_second": 23.852,
      "step": 45246
    },
    {
      "epoch": 6.0,
      "eval_BQ_loss": 0.013585967943072319,
      "eval_BQ_runtime": 3.6468,
      "eval_BQ_samples_per_second": 2742.113,
      "eval_BQ_steps_per_second": 21.663,
      "step": 45246
    },
    {
      "epoch": 6.0,
      "eval_Cmnli_loss": 0.01478442084044218,
      "eval_Cmnli_runtime": 3.7572,
      "eval_Cmnli_samples_per_second": 2213.061,
      "eval_Cmnli_steps_per_second": 17.3,
      "step": 45246
    },
    {
      "epoch": 6.0,
      "eval_LCQMC_loss": 0.016025548800826073,
      "eval_LCQMC_runtime": 2.3232,
      "eval_LCQMC_samples_per_second": 3788.767,
      "eval_LCQMC_steps_per_second": 29.701,
      "step": 45246
    },
    {
      "epoch": 6.0,
      "eval_Ocnli_loss": 0.015464934520423412,
      "eval_Ocnli_runtime": 0.6071,
      "eval_Ocnli_samples_per_second": 3042.383,
      "eval_Ocnli_steps_per_second": 24.708,
      "step": 45246
    },
    {
      "epoch": 6.0,
      "eval_PAWSX_loss": 0.04284413903951645,
      "eval_PAWSX_runtime": 0.9891,
      "eval_PAWSX_samples_per_second": 2021.991,
      "eval_PAWSX_steps_per_second": 16.176,
      "step": 45246
    },
    {
      "epoch": 6.0,
      "eval_QBQTC_loss": 10.840497016906738,
      "eval_QBQTC_runtime": 7.7281,
      "eval_QBQTC_samples_per_second": 2587.958,
      "eval_QBQTC_steps_per_second": 20.315,
      "step": 45246
    },
    {
      "epoch": 6.0,
      "eval_STS-B_loss": 9.943366050720215,
      "eval_STS-B_runtime": 0.5591,
      "eval_STS-B_samples_per_second": 2607.892,
      "eval_STS-B_steps_per_second": 21.464,
      "step": 45246
    },
    {
      "epoch": 7.0,
      "grad_norm": 0.13601745665073395,
      "learning_rate": 1.185642859247763e-05,
      "loss": 1.2856,
      "step": 52787
    },
    {
      "epoch": 7.0,
      "eval_AFQMC_loss": 0.019535699859261513,
      "eval_AFQMC_runtime": 99.2494,
      "eval_AFQMC_samples_per_second": 43.486,
      "eval_AFQMC_steps_per_second": 0.343,
      "eval_emb_eval_pearson_cosine": 0.6473204770905479,
      "eval_emb_eval_spearman_cosine": 0.6723246242283224,
      "step": 52787
    },
    {
      "epoch": 7.0,
      "eval_ATEC_loss": 0.013532574288547039,
      "eval_ATEC_runtime": 6.5765,
      "eval_ATEC_samples_per_second": 3041.133,
      "eval_ATEC_steps_per_second": 23.873,
      "step": 52787
    },
    {
      "epoch": 7.0,
      "eval_BQ_loss": 0.013674370013177395,
      "eval_BQ_runtime": 3.4566,
      "eval_BQ_samples_per_second": 2892.978,
      "eval_BQ_steps_per_second": 22.855,
      "step": 52787
    },
    {
      "epoch": 7.0,
      "eval_Cmnli_loss": 0.014647725969552994,
      "eval_Cmnli_runtime": 3.7546,
      "eval_Cmnli_samples_per_second": 2214.61,
      "eval_Cmnli_steps_per_second": 17.312,
      "step": 52787
    },
    {
      "epoch": 7.0,
      "eval_LCQMC_loss": 0.0150056267157197,
      "eval_LCQMC_runtime": 2.3629,
      "eval_LCQMC_samples_per_second": 3725.146,
      "eval_LCQMC_steps_per_second": 29.202,
      "step": 52787
    },
    {
      "epoch": 7.0,
      "eval_Ocnli_loss": 0.015559999272227287,
      "eval_Ocnli_runtime": 0.6231,
      "eval_Ocnli_samples_per_second": 2964.212,
      "eval_Ocnli_steps_per_second": 24.073,
      "step": 52787
    },
    {
      "epoch": 7.0,
      "eval_PAWSX_loss": 0.04360537603497505,
      "eval_PAWSX_runtime": 0.9535,
      "eval_PAWSX_samples_per_second": 2097.49,
      "eval_PAWSX_steps_per_second": 16.78,
      "step": 52787
    },
    {
      "epoch": 7.0,
      "eval_QBQTC_loss": 11.554633140563965,
      "eval_QBQTC_runtime": 7.7436,
      "eval_QBQTC_samples_per_second": 2582.791,
      "eval_QBQTC_steps_per_second": 20.275,
      "step": 52787
    },
    {
      "epoch": 7.0,
      "eval_STS-B_loss": 10.235774993896484,
      "eval_STS-B_runtime": 0.5543,
      "eval_STS-B_samples_per_second": 2630.224,
      "eval_STS-B_steps_per_second": 21.648,
      "step": 52787
    },
    {
      "epoch": 8.0,
      "grad_norm": 0.11353704333305359,
      "learning_rate": 1.0375332750508335e-05,
      "loss": 1.2182,
      "step": 60328
    },
    {
      "epoch": 8.0,
      "eval_AFQMC_loss": 0.019351305440068245,
      "eval_AFQMC_runtime": 99.553,
      "eval_AFQMC_samples_per_second": 43.354,
      "eval_AFQMC_steps_per_second": 0.342,
      "eval_emb_eval_pearson_cosine": 0.6494222678001456,
      "eval_emb_eval_spearman_cosine": 0.6703225826686093,
      "step": 60328
    },
    {
      "epoch": 8.0,
      "eval_ATEC_loss": 0.013181576505303383,
      "eval_ATEC_runtime": 6.522,
      "eval_ATEC_samples_per_second": 3066.526,
      "eval_ATEC_steps_per_second": 24.072,
      "step": 60328
    },
    {
      "epoch": 8.0,
      "eval_BQ_loss": 0.013695988804101944,
      "eval_BQ_runtime": 3.4134,
      "eval_BQ_samples_per_second": 2929.642,
      "eval_BQ_steps_per_second": 23.144,
      "step": 60328
    },
    {
      "epoch": 8.0,
      "eval_Cmnli_loss": 0.014423331245779991,
      "eval_Cmnli_runtime": 3.7623,
      "eval_Cmnli_samples_per_second": 2210.105,
      "eval_Cmnli_steps_per_second": 17.277,
      "step": 60328
    },
    {
      "epoch": 8.0,
      "eval_LCQMC_loss": 0.015052268281579018,
      "eval_LCQMC_runtime": 2.3257,
      "eval_LCQMC_samples_per_second": 3784.705,
      "eval_LCQMC_steps_per_second": 29.669,
      "step": 60328
    },
    {
      "epoch": 8.0,
      "eval_Ocnli_loss": 0.015219747088849545,
      "eval_Ocnli_runtime": 0.6121,
      "eval_Ocnli_samples_per_second": 3017.401,
      "eval_Ocnli_steps_per_second": 24.505,
      "step": 60328
    },
    {
      "epoch": 8.0,
      "eval_PAWSX_loss": 0.042021822184324265,
      "eval_PAWSX_runtime": 0.9526,
      "eval_PAWSX_samples_per_second": 2099.597,
      "eval_PAWSX_steps_per_second": 16.797,
      "step": 60328
    },
    {
      "epoch": 8.0,
      "eval_QBQTC_loss": 12.265329360961914,
      "eval_QBQTC_runtime": 7.6635,
      "eval_QBQTC_samples_per_second": 2609.768,
      "eval_QBQTC_steps_per_second": 20.487,
      "step": 60328
    },
    {
      "epoch": 8.0,
      "eval_STS-B_loss": 10.596503257751465,
      "eval_STS-B_runtime": 0.5656,
      "eval_STS-B_samples_per_second": 2577.705,
      "eval_STS-B_steps_per_second": 21.216,
      "step": 60328
    },
    {
      "epoch": 9.0,
      "grad_norm": 0.10075096040964127,
      "learning_rate": 8.89443336640374e-06,
      "loss": 1.1546,
      "step": 67869
    },
    {
      "epoch": 9.0,
      "eval_AFQMC_loss": 0.018916338682174683,
      "eval_AFQMC_runtime": 99.1097,
      "eval_AFQMC_samples_per_second": 43.548,
      "eval_AFQMC_steps_per_second": 0.343,
      "eval_emb_eval_pearson_cosine": 0.6461004194477242,
      "eval_emb_eval_spearman_cosine": 0.6620616757187626,
      "step": 67869
    },
    {
      "epoch": 9.0,
      "eval_ATEC_loss": 0.01316931750625372,
      "eval_ATEC_runtime": 6.5342,
      "eval_ATEC_samples_per_second": 3060.808,
      "eval_ATEC_steps_per_second": 24.027,
      "step": 67869
    },
    {
      "epoch": 9.0,
      "eval_BQ_loss": 0.013916433788836002,
      "eval_BQ_runtime": 3.4228,
      "eval_BQ_samples_per_second": 2921.626,
      "eval_BQ_steps_per_second": 23.081,
      "step": 67869
    },
    {
      "epoch": 9.0,
      "eval_Cmnli_loss": 0.014616083353757858,
      "eval_Cmnli_runtime": 3.7166,
      "eval_Cmnli_samples_per_second": 2237.257,
      "eval_Cmnli_steps_per_second": 17.489,
      "step": 67869
    },
    {
      "epoch": 9.0,
      "eval_LCQMC_loss": 0.014678360894322395,
      "eval_LCQMC_runtime": 2.3376,
      "eval_LCQMC_samples_per_second": 3765.326,
      "eval_LCQMC_steps_per_second": 29.517,
      "step": 67869
    },
    {
      "epoch": 9.0,
      "eval_Ocnli_loss": 0.015178242698311806,
      "eval_Ocnli_runtime": 0.6061,
      "eval_Ocnli_samples_per_second": 3047.588,
      "eval_Ocnli_steps_per_second": 24.75,
      "step": 67869
    },
    {
      "epoch": 9.0,
      "eval_PAWSX_loss": 0.04116755351424217,
      "eval_PAWSX_runtime": 0.9482,
      "eval_PAWSX_samples_per_second": 2109.261,
      "eval_PAWSX_steps_per_second": 16.874,
      "step": 67869
    },
    {
      "epoch": 9.0,
      "eval_QBQTC_loss": 12.928996086120605,
      "eval_QBQTC_runtime": 7.7135,
      "eval_QBQTC_samples_per_second": 2592.855,
      "eval_QBQTC_steps_per_second": 20.354,
      "step": 67869
    },
    {
      "epoch": 9.0,
      "eval_STS-B_loss": 10.769329071044922,
      "eval_STS-B_runtime": 0.557,
      "eval_STS-B_samples_per_second": 2617.49,
      "eval_STS-B_steps_per_second": 21.543,
      "step": 67869
    },
    {
      "epoch": 10.0,
      "grad_norm": 116.47360229492188,
      "learning_rate": 7.413533982299147e-06,
      "loss": 1.091,
      "step": 75410
    },
    {
      "epoch": 10.0,
      "eval_AFQMC_loss": 0.01876525580883026,
      "eval_AFQMC_runtime": 106.9735,
      "eval_AFQMC_samples_per_second": 40.346,
      "eval_AFQMC_steps_per_second": 0.318,
      "eval_emb_eval_pearson_cosine": 0.6425398472325708,
      "eval_emb_eval_spearman_cosine": 0.6571917650236837,
      "step": 75410
    },
    {
      "epoch": 10.0,
      "eval_ATEC_loss": 0.012936480343341827,
      "eval_ATEC_runtime": 6.5457,
      "eval_ATEC_samples_per_second": 3055.455,
      "eval_ATEC_steps_per_second": 23.985,
      "step": 75410
    },
    {
      "epoch": 10.0,
      "eval_BQ_loss": 0.014092645607888699,
      "eval_BQ_runtime": 3.4559,
      "eval_BQ_samples_per_second": 2893.566,
      "eval_BQ_steps_per_second": 22.859,
      "step": 75410
    },
    {
      "epoch": 10.0,
      "eval_Cmnli_loss": 0.01457986794412136,
      "eval_Cmnli_runtime": 3.7514,
      "eval_Cmnli_samples_per_second": 2216.51,
      "eval_Cmnli_steps_per_second": 17.327,
      "step": 75410
    },
    {
      "epoch": 10.0,
      "eval_LCQMC_loss": 0.014272717759013176,
      "eval_LCQMC_runtime": 2.3604,
      "eval_LCQMC_samples_per_second": 3729.02,
      "eval_LCQMC_steps_per_second": 29.232,
      "step": 75410
    },
    {
      "epoch": 10.0,
      "eval_Ocnli_loss": 0.015037407167255878,
      "eval_Ocnli_runtime": 0.6116,
      "eval_Ocnli_samples_per_second": 3020.096,
      "eval_Ocnli_steps_per_second": 24.527,
      "step": 75410
    },
    {
      "epoch": 10.0,
      "eval_PAWSX_loss": 0.04113338142633438,
      "eval_PAWSX_runtime": 0.9594,
      "eval_PAWSX_samples_per_second": 2084.624,
      "eval_PAWSX_steps_per_second": 16.677,
      "step": 75410
    },
    {
      "epoch": 10.0,
      "eval_QBQTC_loss": 13.890397071838379,
      "eval_QBQTC_runtime": 7.674,
      "eval_QBQTC_samples_per_second": 2606.201,
      "eval_QBQTC_steps_per_second": 20.459,
      "step": 75410
    },
    {
      "epoch": 10.0,
      "eval_STS-B_loss": 10.956206321716309,
      "eval_STS-B_runtime": 0.5649,
      "eval_STS-B_samples_per_second": 2580.888,
      "eval_STS-B_steps_per_second": 21.242,
      "step": 75410
    },
    {
      "epoch": 11.0,
      "grad_norm": 0.15993598103523254,
      "learning_rate": 5.933027513923951e-06,
      "loss": 1.0267,
      "step": 82951
    },
    {
      "epoch": 11.0,
      "eval_AFQMC_loss": 0.01858402043581009,
      "eval_AFQMC_runtime": 99.2423,
      "eval_AFQMC_samples_per_second": 43.49,
      "eval_AFQMC_steps_per_second": 0.343,
      "eval_emb_eval_pearson_cosine": 0.635671675332461,
      "eval_emb_eval_spearman_cosine": 0.6462278753331322,
      "step": 82951
    },
    {
      "epoch": 11.0,
      "eval_ATEC_loss": 0.012706396169960499,
      "eval_ATEC_runtime": 6.5392,
      "eval_ATEC_samples_per_second": 3058.477,
      "eval_ATEC_steps_per_second": 24.009,
      "step": 82951
    },
    {
      "epoch": 11.0,
      "eval_BQ_loss": 0.014143843203783035,
      "eval_BQ_runtime": 3.3932,
      "eval_BQ_samples_per_second": 2947.098,
      "eval_BQ_steps_per_second": 23.282,
      "step": 82951
    },
    {
      "epoch": 11.0,
      "eval_Cmnli_loss": 0.014601893723011017,
      "eval_Cmnli_runtime": 3.7177,
      "eval_Cmnli_samples_per_second": 2236.627,
      "eval_Cmnli_steps_per_second": 17.484,
      "step": 82951
    },
    {
      "epoch": 11.0,
      "eval_LCQMC_loss": 0.01407212857156992,
      "eval_LCQMC_runtime": 2.3166,
      "eval_LCQMC_samples_per_second": 3799.506,
      "eval_LCQMC_steps_per_second": 29.785,
      "step": 82951
    },
    {
      "epoch": 11.0,
      "eval_Ocnli_loss": 0.015255914069712162,
      "eval_Ocnli_runtime": 0.6096,
      "eval_Ocnli_samples_per_second": 3029.719,
      "eval_Ocnli_steps_per_second": 24.605,
      "step": 82951
    },
    {
      "epoch": 11.0,
      "eval_PAWSX_loss": 0.04009222611784935,
      "eval_PAWSX_runtime": 0.9569,
      "eval_PAWSX_samples_per_second": 2090.135,
      "eval_PAWSX_steps_per_second": 16.721,
      "step": 82951
    },
    {
      "epoch": 11.0,
      "eval_QBQTC_loss": 14.71971607208252,
      "eval_QBQTC_runtime": 7.651,
      "eval_QBQTC_samples_per_second": 2614.025,
      "eval_QBQTC_steps_per_second": 20.52,
      "step": 82951
    },
    {
      "epoch": 11.0,
      "eval_STS-B_loss": 11.138467788696289,
      "eval_STS-B_runtime": 0.5605,
      "eval_STS-B_samples_per_second": 2601.08,
      "eval_STS-B_steps_per_second": 21.408,
      "step": 82951
    },
    {
      "epoch": 12.0,
      "grad_norm": 0.22925728559494019,
      "learning_rate": 4.451735214089958e-06,
      "loss": 0.9688,
      "step": 90492
    },
    {
      "epoch": 12.0,
      "eval_AFQMC_loss": 0.018353162333369255,
      "eval_AFQMC_runtime": 98.1243,
      "eval_AFQMC_samples_per_second": 43.985,
      "eval_AFQMC_steps_per_second": 0.346,
      "eval_emb_eval_pearson_cosine": 0.6340788346277473,
      "eval_emb_eval_spearman_cosine": 0.6444001384260496,
      "step": 90492
    },
    {
      "epoch": 12.0,
      "eval_ATEC_loss": 0.012524303048849106,
      "eval_ATEC_runtime": 6.5509,
      "eval_ATEC_samples_per_second": 3052.999,
      "eval_ATEC_steps_per_second": 23.966,
      "step": 90492
    },
    {
      "epoch": 12.0,
      "eval_BQ_loss": 0.014260655269026756,
      "eval_BQ_runtime": 3.4399,
      "eval_BQ_samples_per_second": 2907.038,
      "eval_BQ_steps_per_second": 22.966,
      "step": 90492
    },
    {
      "epoch": 12.0,
      "eval_Cmnli_loss": 0.014628582634031773,
      "eval_Cmnli_runtime": 3.7252,
      "eval_Cmnli_samples_per_second": 2232.102,
      "eval_Cmnli_steps_per_second": 17.449,
      "step": 90492
    },
    {
      "epoch": 12.0,
      "eval_LCQMC_loss": 0.013899387791752815,
      "eval_LCQMC_runtime": 2.349,
      "eval_LCQMC_samples_per_second": 3747.087,
      "eval_LCQMC_steps_per_second": 29.374,
      "step": 90492
    },
    {
      "epoch": 12.0,
      "eval_Ocnli_loss": 0.015396999195218086,
      "eval_Ocnli_runtime": 0.6119,
      "eval_Ocnli_samples_per_second": 3018.253,
      "eval_Ocnli_steps_per_second": 24.512,
      "step": 90492
    },
    {
      "epoch": 12.0,
      "eval_PAWSX_loss": 0.04036952182650566,
      "eval_PAWSX_runtime": 0.9649,
      "eval_PAWSX_samples_per_second": 2072.698,
      "eval_PAWSX_steps_per_second": 16.582,
      "step": 90492
    },
    {
      "epoch": 12.0,
      "eval_QBQTC_loss": 15.251015663146973,
      "eval_QBQTC_runtime": 7.662,
      "eval_QBQTC_samples_per_second": 2610.3,
      "eval_QBQTC_steps_per_second": 20.491,
      "step": 90492
    },
    {
      "epoch": 12.0,
      "eval_STS-B_loss": 11.509617805480957,
      "eval_STS-B_runtime": 0.5625,
      "eval_STS-B_samples_per_second": 2592.013,
      "eval_STS-B_steps_per_second": 21.333,
      "step": 90492
    }
  ],
  "logging_steps": 500,
  "max_steps": 113115,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 15,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 0.0,
  "train_batch_size": 128,
  "trial_name": null,
  "trial_params": null
}