kimlong22's picture
Training in progress, epoch 7, checkpoint
eeedc58 verified
{
"best_metric": 0.7979517910648003,
"best_model_checkpoint": "checkpoint/cross_encoder_20250522_full_data/checkpoint-16219",
"epoch": 7.0,
"eval_steps": 500,
"global_step": 16219,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 0.9638161659240723,
"learning_rate": 4.315925766076824e-09,
"loss": 0.6827,
"step": 1
},
{
"epoch": 0.09,
"grad_norm": 2.8890857696533203,
"learning_rate": 8.631851532153649e-07,
"loss": 0.6745,
"step": 200
},
{
"epoch": 0.17,
"grad_norm": 2.8610997200012207,
"learning_rate": 1.7263703064307298e-06,
"loss": 0.5921,
"step": 400
},
{
"epoch": 0.26,
"grad_norm": 4.058478832244873,
"learning_rate": 2.5895554596460943e-06,
"loss": 0.5238,
"step": 600
},
{
"epoch": 0.35,
"grad_norm": 2.51100492477417,
"learning_rate": 3.4527406128614595e-06,
"loss": 0.4943,
"step": 800
},
{
"epoch": 0.43,
"grad_norm": 3.381241798400879,
"learning_rate": 4.3159257660768235e-06,
"loss": 0.4882,
"step": 1000
},
{
"epoch": 0.52,
"grad_norm": 3.5782713890075684,
"learning_rate": 5.179110919292189e-06,
"loss": 0.4782,
"step": 1200
},
{
"epoch": 0.6,
"grad_norm": 5.436559200286865,
"learning_rate": 6.042296072507553e-06,
"loss": 0.4719,
"step": 1400
},
{
"epoch": 0.69,
"grad_norm": 2.6600098609924316,
"learning_rate": 6.905481225722919e-06,
"loss": 0.4593,
"step": 1600
},
{
"epoch": 0.78,
"grad_norm": 4.711264610290527,
"learning_rate": 7.768666378938283e-06,
"loss": 0.4629,
"step": 1800
},
{
"epoch": 0.86,
"grad_norm": 7.427570819854736,
"learning_rate": 8.631851532153647e-06,
"loss": 0.4549,
"step": 2000
},
{
"epoch": 0.95,
"grad_norm": 2.386420965194702,
"learning_rate": 9.495036685369013e-06,
"loss": 0.4572,
"step": 2200
},
{
"epoch": 1.0,
"eval_f2": 0.7404627946070191,
"eval_loss": 0.4705376923084259,
"eval_precision": 0.47348360206208184,
"eval_recall": 0.8619708466453674,
"eval_runtime": 147.2641,
"eval_samples_per_second": 816.166,
"eval_steps_per_second": 6.376,
"step": 2317
},
{
"epoch": 1.04,
"grad_norm": 3.0445728302001953,
"learning_rate": 9.999609111230013e-06,
"loss": 0.4511,
"step": 2400
},
{
"epoch": 1.12,
"grad_norm": 2.2899084091186523,
"learning_rate": 9.995456298935638e-06,
"loss": 0.447,
"step": 2600
},
{
"epoch": 1.21,
"grad_norm": 4.465543270111084,
"learning_rate": 9.986768615435655e-06,
"loss": 0.4405,
"step": 2800
},
{
"epoch": 1.29,
"grad_norm": 4.2559404373168945,
"learning_rate": 9.973553947402149e-06,
"loss": 0.4436,
"step": 3000
},
{
"epoch": 1.38,
"grad_norm": 4.245833873748779,
"learning_rate": 9.955824291100119e-06,
"loss": 0.4393,
"step": 3200
},
{
"epoch": 1.47,
"grad_norm": 4.579006671905518,
"learning_rate": 9.933595741497281e-06,
"loss": 0.4376,
"step": 3400
},
{
"epoch": 1.55,
"grad_norm": 7.389001369476318,
"learning_rate": 9.906888477653065e-06,
"loss": 0.4373,
"step": 3600
},
{
"epoch": 1.64,
"grad_norm": 4.0490312576293945,
"learning_rate": 9.875726744400081e-06,
"loss": 0.4345,
"step": 3800
},
{
"epoch": 1.73,
"grad_norm": 2.065431594848633,
"learning_rate": 9.840138830334685e-06,
"loss": 0.4327,
"step": 4000
},
{
"epoch": 1.81,
"grad_norm": 3.0262961387634277,
"learning_rate": 9.800157042136608e-06,
"loss": 0.4327,
"step": 4200
},
{
"epoch": 1.9,
"grad_norm": 2.6107230186462402,
"learning_rate": 9.755817675240981e-06,
"loss": 0.4294,
"step": 4400
},
{
"epoch": 1.99,
"grad_norm": 4.621052265167236,
"learning_rate": 9.707160980889367e-06,
"loss": 0.4283,
"step": 4600
},
{
"epoch": 2.0,
"eval_f2": 0.7717545519035234,
"eval_loss": 0.4515298902988434,
"eval_precision": 0.4774057047330478,
"eval_recall": 0.9123901757188498,
"eval_runtime": 148.5197,
"eval_samples_per_second": 809.266,
"eval_steps_per_second": 6.322,
"step": 4634
},
{
"epoch": 2.07,
"grad_norm": 3.324246644973755,
"learning_rate": 9.65423112958972e-06,
"loss": 0.4244,
"step": 4800
},
{
"epoch": 2.16,
"grad_norm": 4.125003337860107,
"learning_rate": 9.597076171018426e-06,
"loss": 0.4183,
"step": 5000
},
{
"epoch": 2.24,
"grad_norm": 2.1959779262542725,
"learning_rate": 9.535747990400856e-06,
"loss": 0.4164,
"step": 5200
},
{
"epoch": 2.33,
"grad_norm": 1.7736337184906006,
"learning_rate": 9.470302261409985e-06,
"loss": 0.4205,
"step": 5400
},
{
"epoch": 2.42,
"grad_norm": 4.9517903327941895,
"learning_rate": 9.400798395625894e-06,
"loss": 0.4176,
"step": 5600
},
{
"epoch": 2.5,
"grad_norm": 6.079417705535889,
"learning_rate": 9.327299488601976e-06,
"loss": 0.4202,
"step": 5800
},
{
"epoch": 2.59,
"grad_norm": 1.3246735334396362,
"learning_rate": 9.249872262586839e-06,
"loss": 0.4218,
"step": 6000
},
{
"epoch": 2.68,
"grad_norm": 2.6098692417144775,
"learning_rate": 9.168587005953913e-06,
"loss": 0.4182,
"step": 6200
},
{
"epoch": 2.76,
"grad_norm": 3.9474799633026123,
"learning_rate": 9.083517509393716e-06,
"loss": 0.415,
"step": 6400
},
{
"epoch": 2.85,
"grad_norm": 3.5382373332977295,
"learning_rate": 8.994740998926724e-06,
"loss": 0.4186,
"step": 6600
},
{
"epoch": 2.93,
"grad_norm": 2.33327054977417,
"learning_rate": 8.902338065797648e-06,
"loss": 0.4115,
"step": 6800
},
{
"epoch": 3.0,
"eval_f2": 0.7773410482632972,
"eval_loss": 0.4485355019569397,
"eval_precision": 0.47962529274004684,
"eval_recall": 0.9201277955271565,
"eval_runtime": 147.9088,
"eval_samples_per_second": 812.609,
"eval_steps_per_second": 6.349,
"step": 6951
},
{
"epoch": 3.02,
"grad_norm": 3.269026517868042,
"learning_rate": 8.806392593314781e-06,
"loss": 0.4167,
"step": 7000
},
{
"epoch": 3.11,
"grad_norm": 2.844139575958252,
"learning_rate": 8.70699168070078e-06,
"loss": 0.4101,
"step": 7200
},
{
"epoch": 3.19,
"grad_norm": 2.2456936836242676,
"learning_rate": 8.604225564024074e-06,
"loss": 0.4095,
"step": 7400
},
{
"epoch": 3.28,
"grad_norm": 2.3910915851593018,
"learning_rate": 8.498187534282632e-06,
"loss": 0.407,
"step": 7600
},
{
"epoch": 3.37,
"grad_norm": 2.2186901569366455,
"learning_rate": 8.388973852714463e-06,
"loss": 0.4044,
"step": 7800
},
{
"epoch": 3.45,
"grad_norm": 3.1750833988189697,
"learning_rate": 8.276683663411758e-06,
"loss": 0.4048,
"step": 8000
},
{
"epoch": 3.54,
"grad_norm": 5.709492206573486,
"learning_rate": 8.161418903317936e-06,
"loss": 0.4061,
"step": 8200
},
{
"epoch": 3.63,
"grad_norm": 6.942538738250732,
"learning_rate": 8.043284209689402e-06,
"loss": 0.4067,
"step": 8400
},
{
"epoch": 3.71,
"grad_norm": 1.3552676439285278,
"learning_rate": 7.922386825105899e-06,
"loss": 0.4046,
"step": 8600
},
{
"epoch": 3.8,
"grad_norm": 3.5627899169921875,
"learning_rate": 7.798836500115803e-06,
"loss": 0.407,
"step": 8800
},
{
"epoch": 3.88,
"grad_norm": 2.0421602725982666,
"learning_rate": 7.672745393604649e-06,
"loss": 0.4037,
"step": 9000
},
{
"epoch": 3.97,
"grad_norm": 4.619537830352783,
"learning_rate": 7.544227970977395e-06,
"loss": 0.4021,
"step": 9200
},
{
"epoch": 4.0,
"eval_f2": 0.7901536373601518,
"eval_loss": 0.4387025237083435,
"eval_precision": 0.5217129071170085,
"eval_recall": 0.9067991214057508,
"eval_runtime": 148.7755,
"eval_samples_per_second": 807.875,
"eval_steps_per_second": 6.312,
"step": 9268
},
{
"epoch": 4.06,
"grad_norm": 1.7471312284469604,
"learning_rate": 7.413400900246815e-06,
"loss": 0.3959,
"step": 9400
},
{
"epoch": 4.14,
"grad_norm": 7.655578136444092,
"learning_rate": 7.280382946122369e-06,
"loss": 0.3996,
"step": 9600
},
{
"epoch": 4.23,
"grad_norm": 4.281186580657959,
"learning_rate": 7.1452948621957e-06,
"loss": 0.3943,
"step": 9800
},
{
"epoch": 4.32,
"grad_norm": 3.821415424346924,
"learning_rate": 7.0082592813206175e-06,
"loss": 0.3972,
"step": 10000
},
{
"epoch": 4.4,
"grad_norm": 2.6942179203033447,
"learning_rate": 6.869400604287093e-06,
"loss": 0.3939,
"step": 10200
},
{
"epoch": 4.49,
"grad_norm": 4.14546012878418,
"learning_rate": 6.7288448868903225e-06,
"loss": 0.3997,
"step": 10400
},
{
"epoch": 4.57,
"grad_norm": 2.8214612007141113,
"learning_rate": 6.586719725497375e-06,
"loss": 0.3993,
"step": 10600
},
{
"epoch": 4.66,
"grad_norm": 2.5755832195281982,
"learning_rate": 6.443154141215318e-06,
"loss": 0.3969,
"step": 10800
},
{
"epoch": 4.75,
"grad_norm": 1.6153743267059326,
"learning_rate": 6.298278462765959e-06,
"loss": 0.3987,
"step": 11000
},
{
"epoch": 4.83,
"grad_norm": 7.3942766189575195,
"learning_rate": 6.152224208173533e-06,
"loss": 0.3946,
"step": 11200
},
{
"epoch": 4.92,
"grad_norm": 1.083901047706604,
"learning_rate": 6.005123965372751e-06,
"loss": 0.3918,
"step": 11400
},
{
"epoch": 5.0,
"eval_f2": 0.7704512576415139,
"eval_loss": 0.44662219285964966,
"eval_precision": 0.6110967168819632,
"eval_recall": 0.8241813099041534,
"eval_runtime": 149.5247,
"eval_samples_per_second": 803.827,
"eval_steps_per_second": 6.28,
"step": 11585
},
{
"epoch": 5.01,
"grad_norm": 3.2887141704559326,
"learning_rate": 5.857111271845573e-06,
"loss": 0.3935,
"step": 11600
},
{
"epoch": 5.09,
"grad_norm": 2.212423801422119,
"learning_rate": 5.708320493395999e-06,
"loss": 0.3908,
"step": 11800
},
{
"epoch": 5.18,
"grad_norm": 0.9465045928955078,
"learning_rate": 5.558886702172891e-06,
"loss": 0.3854,
"step": 12000
},
{
"epoch": 5.27,
"grad_norm": 5.766241073608398,
"learning_rate": 5.408945554051591e-06,
"loss": 0.3912,
"step": 12200
},
{
"epoch": 5.35,
"grad_norm": 4.334184646606445,
"learning_rate": 5.258633165485625e-06,
"loss": 0.3866,
"step": 12400
},
{
"epoch": 5.44,
"grad_norm": 4.286846160888672,
"learning_rate": 5.108085989940292e-06,
"loss": 0.3919,
"step": 12600
},
{
"epoch": 5.52,
"grad_norm": 1.167948603630066,
"learning_rate": 4.95744069402033e-06,
"loss": 0.3898,
"step": 12800
},
{
"epoch": 5.61,
"grad_norm": 4.312135696411133,
"learning_rate": 4.806834033404065e-06,
"loss": 0.3894,
"step": 13000
},
{
"epoch": 5.7,
"grad_norm": 2.726715564727783,
"learning_rate": 4.6564027286967275e-06,
"loss": 0.3918,
"step": 13200
},
{
"epoch": 5.78,
"grad_norm": 2.4171202182769775,
"learning_rate": 4.5062833413156e-06,
"loss": 0.3917,
"step": 13400
},
{
"epoch": 5.87,
"grad_norm": 5.957132339477539,
"learning_rate": 4.3566121495196656e-06,
"loss": 0.3849,
"step": 13600
},
{
"epoch": 5.96,
"grad_norm": 4.632606029510498,
"learning_rate": 4.20752502469631e-06,
"loss": 0.3879,
"step": 13800
},
{
"epoch": 6.0,
"eval_f2": 0.7946750167423844,
"eval_loss": 0.43369919061660767,
"eval_precision": 0.5782680276588739,
"eval_recall": 0.8766972843450479,
"eval_runtime": 148.7395,
"eval_samples_per_second": 808.07,
"eval_steps_per_second": 6.313,
"step": 13902
},
{
"epoch": 6.04,
"grad_norm": 1.9424314498901367,
"learning_rate": 4.0591573080173995e-06,
"loss": 0.3873,
"step": 14000
},
{
"epoch": 6.13,
"grad_norm": 2.831543445587158,
"learning_rate": 3.911643687576664e-06,
"loss": 0.3807,
"step": 14200
},
{
"epoch": 6.21,
"grad_norm": 2.7500486373901367,
"learning_rate": 3.7651180761199505e-06,
"loss": 0.3801,
"step": 14400
},
{
"epoch": 6.3,
"grad_norm": 4.015429496765137,
"learning_rate": 3.619713489479354e-06,
"loss": 0.3824,
"step": 14600
},
{
"epoch": 6.39,
"grad_norm": 2.4831278324127197,
"learning_rate": 3.4755619258215407e-06,
"loss": 0.3808,
"step": 14800
},
{
"epoch": 6.47,
"grad_norm": 7.442523002624512,
"learning_rate": 3.3327942458199193e-06,
"loss": 0.385,
"step": 15000
},
{
"epoch": 6.56,
"grad_norm": 4.242701530456543,
"learning_rate": 3.1915400538594333e-06,
"loss": 0.3832,
"step": 15200
},
{
"epoch": 6.65,
"grad_norm": 5.000258922576904,
"learning_rate": 3.0519275803818014e-06,
"loss": 0.3805,
"step": 15400
},
{
"epoch": 6.73,
"grad_norm": 2.0560362339019775,
"learning_rate": 2.914083565478024e-06,
"loss": 0.381,
"step": 15600
},
{
"epoch": 6.82,
"grad_norm": 1.8777313232421875,
"learning_rate": 2.7781331438338317e-06,
"loss": 0.3831,
"step": 15800
},
{
"epoch": 6.91,
"grad_norm": 1.0822844505310059,
"learning_rate": 2.6441997311325196e-06,
"loss": 0.3797,
"step": 16000
},
{
"epoch": 6.99,
"grad_norm": 3.2328083515167236,
"learning_rate": 2.5124049120182916e-06,
"loss": 0.383,
"step": 16200
},
{
"epoch": 7.0,
"eval_f2": 0.7979517910648003,
"eval_loss": 0.4335618019104004,
"eval_precision": 0.5632793509486378,
"eval_recall": 0.8907248402555911,
"eval_runtime": 148.5629,
"eval_samples_per_second": 809.031,
"eval_steps_per_second": 6.321,
"step": 16219
}
],
"logging_steps": 200,
"max_steps": 23170,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 5.462269789659464e+17,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}