{ "best_metric": 0.7979517910648003, "best_model_checkpoint": "checkpoint/cross_encoder_20250522_full_data/checkpoint-16219", "epoch": 7.0, "eval_steps": 500, "global_step": 16219, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 0.9638161659240723, "learning_rate": 4.315925766076824e-09, "loss": 0.6827, "step": 1 }, { "epoch": 0.09, "grad_norm": 2.8890857696533203, "learning_rate": 8.631851532153649e-07, "loss": 0.6745, "step": 200 }, { "epoch": 0.17, "grad_norm": 2.8610997200012207, "learning_rate": 1.7263703064307298e-06, "loss": 0.5921, "step": 400 }, { "epoch": 0.26, "grad_norm": 4.058478832244873, "learning_rate": 2.5895554596460943e-06, "loss": 0.5238, "step": 600 }, { "epoch": 0.35, "grad_norm": 2.51100492477417, "learning_rate": 3.4527406128614595e-06, "loss": 0.4943, "step": 800 }, { "epoch": 0.43, "grad_norm": 3.381241798400879, "learning_rate": 4.3159257660768235e-06, "loss": 0.4882, "step": 1000 }, { "epoch": 0.52, "grad_norm": 3.5782713890075684, "learning_rate": 5.179110919292189e-06, "loss": 0.4782, "step": 1200 }, { "epoch": 0.6, "grad_norm": 5.436559200286865, "learning_rate": 6.042296072507553e-06, "loss": 0.4719, "step": 1400 }, { "epoch": 0.69, "grad_norm": 2.6600098609924316, "learning_rate": 6.905481225722919e-06, "loss": 0.4593, "step": 1600 }, { "epoch": 0.78, "grad_norm": 4.711264610290527, "learning_rate": 7.768666378938283e-06, "loss": 0.4629, "step": 1800 }, { "epoch": 0.86, "grad_norm": 7.427570819854736, "learning_rate": 8.631851532153647e-06, "loss": 0.4549, "step": 2000 }, { "epoch": 0.95, "grad_norm": 2.386420965194702, "learning_rate": 9.495036685369013e-06, "loss": 0.4572, "step": 2200 }, { "epoch": 1.0, "eval_f2": 0.7404627946070191, "eval_loss": 0.4705376923084259, "eval_precision": 0.47348360206208184, "eval_recall": 0.8619708466453674, "eval_runtime": 147.2641, "eval_samples_per_second": 816.166, "eval_steps_per_second": 6.376, "step": 2317 }, { "epoch": 1.04, "grad_norm": 3.0445728302001953, "learning_rate": 9.999609111230013e-06, "loss": 0.4511, "step": 2400 }, { "epoch": 1.12, "grad_norm": 2.2899084091186523, "learning_rate": 9.995456298935638e-06, "loss": 0.447, "step": 2600 }, { "epoch": 1.21, "grad_norm": 4.465543270111084, "learning_rate": 9.986768615435655e-06, "loss": 0.4405, "step": 2800 }, { "epoch": 1.29, "grad_norm": 4.2559404373168945, "learning_rate": 9.973553947402149e-06, "loss": 0.4436, "step": 3000 }, { "epoch": 1.38, "grad_norm": 4.245833873748779, "learning_rate": 9.955824291100119e-06, "loss": 0.4393, "step": 3200 }, { "epoch": 1.47, "grad_norm": 4.579006671905518, "learning_rate": 9.933595741497281e-06, "loss": 0.4376, "step": 3400 }, { "epoch": 1.55, "grad_norm": 7.389001369476318, "learning_rate": 9.906888477653065e-06, "loss": 0.4373, "step": 3600 }, { "epoch": 1.64, "grad_norm": 4.0490312576293945, "learning_rate": 9.875726744400081e-06, "loss": 0.4345, "step": 3800 }, { "epoch": 1.73, "grad_norm": 2.065431594848633, "learning_rate": 9.840138830334685e-06, "loss": 0.4327, "step": 4000 }, { "epoch": 1.81, "grad_norm": 3.0262961387634277, "learning_rate": 9.800157042136608e-06, "loss": 0.4327, "step": 4200 }, { "epoch": 1.9, "grad_norm": 2.6107230186462402, "learning_rate": 9.755817675240981e-06, "loss": 0.4294, "step": 4400 }, { "epoch": 1.99, "grad_norm": 4.621052265167236, "learning_rate": 9.707160980889367e-06, "loss": 0.4283, "step": 4600 }, { "epoch": 2.0, "eval_f2": 0.7717545519035234, "eval_loss": 0.4515298902988434, "eval_precision": 0.4774057047330478, "eval_recall": 0.9123901757188498, "eval_runtime": 148.5197, "eval_samples_per_second": 809.266, "eval_steps_per_second": 6.322, "step": 4634 }, { "epoch": 2.07, "grad_norm": 3.324246644973755, "learning_rate": 9.65423112958972e-06, "loss": 0.4244, "step": 4800 }, { "epoch": 2.16, "grad_norm": 4.125003337860107, "learning_rate": 9.597076171018426e-06, "loss": 0.4183, "step": 5000 }, { "epoch": 2.24, "grad_norm": 2.1959779262542725, "learning_rate": 9.535747990400856e-06, "loss": 0.4164, "step": 5200 }, { "epoch": 2.33, "grad_norm": 1.7736337184906006, "learning_rate": 9.470302261409985e-06, "loss": 0.4205, "step": 5400 }, { "epoch": 2.42, "grad_norm": 4.9517903327941895, "learning_rate": 9.400798395625894e-06, "loss": 0.4176, "step": 5600 }, { "epoch": 2.5, "grad_norm": 6.079417705535889, "learning_rate": 9.327299488601976e-06, "loss": 0.4202, "step": 5800 }, { "epoch": 2.59, "grad_norm": 1.3246735334396362, "learning_rate": 9.249872262586839e-06, "loss": 0.4218, "step": 6000 }, { "epoch": 2.68, "grad_norm": 2.6098692417144775, "learning_rate": 9.168587005953913e-06, "loss": 0.4182, "step": 6200 }, { "epoch": 2.76, "grad_norm": 3.9474799633026123, "learning_rate": 9.083517509393716e-06, "loss": 0.415, "step": 6400 }, { "epoch": 2.85, "grad_norm": 3.5382373332977295, "learning_rate": 8.994740998926724e-06, "loss": 0.4186, "step": 6600 }, { "epoch": 2.93, "grad_norm": 2.33327054977417, "learning_rate": 8.902338065797648e-06, "loss": 0.4115, "step": 6800 }, { "epoch": 3.0, "eval_f2": 0.7773410482632972, "eval_loss": 0.4485355019569397, "eval_precision": 0.47962529274004684, "eval_recall": 0.9201277955271565, "eval_runtime": 147.9088, "eval_samples_per_second": 812.609, "eval_steps_per_second": 6.349, "step": 6951 }, { "epoch": 3.02, "grad_norm": 3.269026517868042, "learning_rate": 8.806392593314781e-06, "loss": 0.4167, "step": 7000 }, { "epoch": 3.11, "grad_norm": 2.844139575958252, "learning_rate": 8.70699168070078e-06, "loss": 0.4101, "step": 7200 }, { "epoch": 3.19, "grad_norm": 2.2456936836242676, "learning_rate": 8.604225564024074e-06, "loss": 0.4095, "step": 7400 }, { "epoch": 3.28, "grad_norm": 2.3910915851593018, "learning_rate": 8.498187534282632e-06, "loss": 0.407, "step": 7600 }, { "epoch": 3.37, "grad_norm": 2.2186901569366455, "learning_rate": 8.388973852714463e-06, "loss": 0.4044, "step": 7800 }, { "epoch": 3.45, "grad_norm": 3.1750833988189697, "learning_rate": 8.276683663411758e-06, "loss": 0.4048, "step": 8000 }, { "epoch": 3.54, "grad_norm": 5.709492206573486, "learning_rate": 8.161418903317936e-06, "loss": 0.4061, "step": 8200 }, { "epoch": 3.63, "grad_norm": 6.942538738250732, "learning_rate": 8.043284209689402e-06, "loss": 0.4067, "step": 8400 }, { "epoch": 3.71, "grad_norm": 1.3552676439285278, "learning_rate": 7.922386825105899e-06, "loss": 0.4046, "step": 8600 }, { "epoch": 3.8, "grad_norm": 3.5627899169921875, "learning_rate": 7.798836500115803e-06, "loss": 0.407, "step": 8800 }, { "epoch": 3.88, "grad_norm": 2.0421602725982666, "learning_rate": 7.672745393604649e-06, "loss": 0.4037, "step": 9000 }, { "epoch": 3.97, "grad_norm": 4.619537830352783, "learning_rate": 7.544227970977395e-06, "loss": 0.4021, "step": 9200 }, { "epoch": 4.0, "eval_f2": 0.7901536373601518, "eval_loss": 0.4387025237083435, "eval_precision": 0.5217129071170085, "eval_recall": 0.9067991214057508, "eval_runtime": 148.7755, "eval_samples_per_second": 807.875, "eval_steps_per_second": 6.312, "step": 9268 }, { "epoch": 4.06, "grad_norm": 1.7471312284469604, "learning_rate": 7.413400900246815e-06, "loss": 0.3959, "step": 9400 }, { "epoch": 4.14, "grad_norm": 7.655578136444092, "learning_rate": 7.280382946122369e-06, "loss": 0.3996, "step": 9600 }, { "epoch": 4.23, "grad_norm": 4.281186580657959, "learning_rate": 7.1452948621957e-06, "loss": 0.3943, "step": 9800 }, { "epoch": 4.32, "grad_norm": 3.821415424346924, "learning_rate": 7.0082592813206175e-06, "loss": 0.3972, "step": 10000 }, { "epoch": 4.4, "grad_norm": 2.6942179203033447, "learning_rate": 6.869400604287093e-06, "loss": 0.3939, "step": 10200 }, { "epoch": 4.49, "grad_norm": 4.14546012878418, "learning_rate": 6.7288448868903225e-06, "loss": 0.3997, "step": 10400 }, { "epoch": 4.57, "grad_norm": 2.8214612007141113, "learning_rate": 6.586719725497375e-06, "loss": 0.3993, "step": 10600 }, { "epoch": 4.66, "grad_norm": 2.5755832195281982, "learning_rate": 6.443154141215318e-06, "loss": 0.3969, "step": 10800 }, { "epoch": 4.75, "grad_norm": 1.6153743267059326, "learning_rate": 6.298278462765959e-06, "loss": 0.3987, "step": 11000 }, { "epoch": 4.83, "grad_norm": 7.3942766189575195, "learning_rate": 6.152224208173533e-06, "loss": 0.3946, "step": 11200 }, { "epoch": 4.92, "grad_norm": 1.083901047706604, "learning_rate": 6.005123965372751e-06, "loss": 0.3918, "step": 11400 }, { "epoch": 5.0, "eval_f2": 0.7704512576415139, "eval_loss": 0.44662219285964966, "eval_precision": 0.6110967168819632, "eval_recall": 0.8241813099041534, "eval_runtime": 149.5247, "eval_samples_per_second": 803.827, "eval_steps_per_second": 6.28, "step": 11585 }, { "epoch": 5.01, "grad_norm": 3.2887141704559326, "learning_rate": 5.857111271845573e-06, "loss": 0.3935, "step": 11600 }, { "epoch": 5.09, "grad_norm": 2.212423801422119, "learning_rate": 5.708320493395999e-06, "loss": 0.3908, "step": 11800 }, { "epoch": 5.18, "grad_norm": 0.9465045928955078, "learning_rate": 5.558886702172891e-06, "loss": 0.3854, "step": 12000 }, { "epoch": 5.27, "grad_norm": 5.766241073608398, "learning_rate": 5.408945554051591e-06, "loss": 0.3912, "step": 12200 }, { "epoch": 5.35, "grad_norm": 4.334184646606445, "learning_rate": 5.258633165485625e-06, "loss": 0.3866, "step": 12400 }, { "epoch": 5.44, "grad_norm": 4.286846160888672, "learning_rate": 5.108085989940292e-06, "loss": 0.3919, "step": 12600 }, { "epoch": 5.52, "grad_norm": 1.167948603630066, "learning_rate": 4.95744069402033e-06, "loss": 0.3898, "step": 12800 }, { "epoch": 5.61, "grad_norm": 4.312135696411133, "learning_rate": 4.806834033404065e-06, "loss": 0.3894, "step": 13000 }, { "epoch": 5.7, "grad_norm": 2.726715564727783, "learning_rate": 4.6564027286967275e-06, "loss": 0.3918, "step": 13200 }, { "epoch": 5.78, "grad_norm": 2.4171202182769775, "learning_rate": 4.5062833413156e-06, "loss": 0.3917, "step": 13400 }, { "epoch": 5.87, "grad_norm": 5.957132339477539, "learning_rate": 4.3566121495196656e-06, "loss": 0.3849, "step": 13600 }, { "epoch": 5.96, "grad_norm": 4.632606029510498, "learning_rate": 4.20752502469631e-06, "loss": 0.3879, "step": 13800 }, { "epoch": 6.0, "eval_f2": 0.7946750167423844, "eval_loss": 0.43369919061660767, "eval_precision": 0.5782680276588739, "eval_recall": 0.8766972843450479, "eval_runtime": 148.7395, "eval_samples_per_second": 808.07, "eval_steps_per_second": 6.313, "step": 13902 }, { "epoch": 6.04, "grad_norm": 1.9424314498901367, "learning_rate": 4.0591573080173995e-06, "loss": 0.3873, "step": 14000 }, { "epoch": 6.13, "grad_norm": 2.831543445587158, "learning_rate": 3.911643687576664e-06, "loss": 0.3807, "step": 14200 }, { "epoch": 6.21, "grad_norm": 2.7500486373901367, "learning_rate": 3.7651180761199505e-06, "loss": 0.3801, "step": 14400 }, { "epoch": 6.3, "grad_norm": 4.015429496765137, "learning_rate": 3.619713489479354e-06, "loss": 0.3824, "step": 14600 }, { "epoch": 6.39, "grad_norm": 2.4831278324127197, "learning_rate": 3.4755619258215407e-06, "loss": 0.3808, "step": 14800 }, { "epoch": 6.47, "grad_norm": 7.442523002624512, "learning_rate": 3.3327942458199193e-06, "loss": 0.385, "step": 15000 }, { "epoch": 6.56, "grad_norm": 4.242701530456543, "learning_rate": 3.1915400538594333e-06, "loss": 0.3832, "step": 15200 }, { "epoch": 6.65, "grad_norm": 5.000258922576904, "learning_rate": 3.0519275803818014e-06, "loss": 0.3805, "step": 15400 }, { "epoch": 6.73, "grad_norm": 2.0560362339019775, "learning_rate": 2.914083565478024e-06, "loss": 0.381, "step": 15600 }, { "epoch": 6.82, "grad_norm": 1.8777313232421875, "learning_rate": 2.7781331438338317e-06, "loss": 0.3831, "step": 15800 }, { "epoch": 6.91, "grad_norm": 1.0822844505310059, "learning_rate": 2.6441997311325196e-06, "loss": 0.3797, "step": 16000 }, { "epoch": 6.99, "grad_norm": 3.2328083515167236, "learning_rate": 2.5124049120182916e-06, "loss": 0.383, "step": 16200 }, { "epoch": 7.0, "eval_f2": 0.7979517910648003, "eval_loss": 0.4335618019104004, "eval_precision": 0.5632793509486378, "eval_recall": 0.8907248402555911, "eval_runtime": 148.5629, "eval_samples_per_second": 809.031, "eval_steps_per_second": 6.321, "step": 16219 } ], "logging_steps": 200, "max_steps": 23170, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 5.462269789659464e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }