kimlong22's picture
Training in progress, epoch 1, checkpoint
476e599 verified
{
"best_metric": 0.49999999700000003,
"best_model_checkpoint": "checkpoint/cross_encoder_20250522_full_data/checkpoint-4633",
"epoch": 1.0,
"eval_steps": 500,
"global_step": 4633,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 2.436934471130371,
"learning_rate": 4.315925766076824e-12,
"loss": 0.7375,
"step": 1
},
{
"epoch": 0.04,
"grad_norm": 1.8707695007324219,
"learning_rate": 8.631851532153648e-10,
"loss": 0.7326,
"step": 200
},
{
"epoch": 0.09,
"grad_norm": 1.4507315158843994,
"learning_rate": 1.7263703064307296e-09,
"loss": 0.734,
"step": 400
},
{
"epoch": 0.13,
"grad_norm": 4.8174872398376465,
"learning_rate": 2.589555459646094e-09,
"loss": 0.731,
"step": 600
},
{
"epoch": 0.17,
"grad_norm": 0.8691744208335876,
"learning_rate": 3.452740612861459e-09,
"loss": 0.7356,
"step": 800
},
{
"epoch": 0.22,
"grad_norm": 1.7719039916992188,
"learning_rate": 4.315925766076823e-09,
"loss": 0.7286,
"step": 1000
},
{
"epoch": 0.26,
"grad_norm": 2.3143906593322754,
"learning_rate": 5.179110919292188e-09,
"loss": 0.7302,
"step": 1200
},
{
"epoch": 0.3,
"grad_norm": 2.4967010021209717,
"learning_rate": 6.042296072507553e-09,
"loss": 0.7318,
"step": 1400
},
{
"epoch": 0.35,
"grad_norm": 3.2115557193756104,
"learning_rate": 6.905481225722918e-09,
"loss": 0.7322,
"step": 1600
},
{
"epoch": 0.39,
"grad_norm": 2.6317012310028076,
"learning_rate": 7.768666378938282e-09,
"loss": 0.7288,
"step": 1800
},
{
"epoch": 0.43,
"grad_norm": 1.578616738319397,
"learning_rate": 8.631851532153647e-09,
"loss": 0.727,
"step": 2000
},
{
"epoch": 0.47,
"grad_norm": 2.6788175106048584,
"learning_rate": 9.495036685369011e-09,
"loss": 0.7236,
"step": 2200
},
{
"epoch": 0.52,
"grad_norm": 1.1162360906600952,
"learning_rate": 9.99960892371536e-09,
"loss": 0.7234,
"step": 2400
},
{
"epoch": 0.56,
"grad_norm": 3.412252426147461,
"learning_rate": 9.995454119562455e-09,
"loss": 0.7273,
"step": 2600
},
{
"epoch": 0.6,
"grad_norm": 2.049833297729492,
"learning_rate": 9.986762270880315e-09,
"loss": 0.7216,
"step": 2800
},
{
"epoch": 0.65,
"grad_norm": 2.14269757270813,
"learning_rate": 9.973541271907098e-09,
"loss": 0.7236,
"step": 3000
},
{
"epoch": 0.69,
"grad_norm": 3.2950925827026367,
"learning_rate": 9.955803130412195e-09,
"loss": 0.7257,
"step": 3200
},
{
"epoch": 0.73,
"grad_norm": 1.6116753816604614,
"learning_rate": 9.933563956790353e-09,
"loss": 0.7202,
"step": 3400
},
{
"epoch": 0.78,
"grad_norm": 2.2606937885284424,
"learning_rate": 9.906843949429669e-09,
"loss": 0.718,
"step": 3600
},
{
"epoch": 0.82,
"grad_norm": 3.41159725189209,
"learning_rate": 9.875667376366706e-09,
"loss": 0.717,
"step": 3800
},
{
"epoch": 0.86,
"grad_norm": 2.007009983062744,
"learning_rate": 9.840062553245418e-09,
"loss": 0.7156,
"step": 4000
},
{
"epoch": 0.91,
"grad_norm": 2.2354674339294434,
"learning_rate": 9.800061817599912e-09,
"loss": 0.7138,
"step": 4200
},
{
"epoch": 0.95,
"grad_norm": 3.80654239654541,
"learning_rate": 9.755701499484371e-09,
"loss": 0.7144,
"step": 4400
},
{
"epoch": 0.99,
"grad_norm": 5.391232490539551,
"learning_rate": 9.707021888476834e-09,
"loss": 0.7088,
"step": 4600
},
{
"epoch": 1.0,
"eval_f2": 0.49999999700000003,
"eval_loss": 0.7074111700057983,
"eval_precision": 0.16666666666666666,
"eval_recall": 1.0,
"eval_runtime": 1108.1527,
"eval_samples_per_second": 108.462,
"eval_steps_per_second": 1.695,
"step": 4633
}
],
"logging_steps": 200,
"max_steps": 23165,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"total_flos": 7.801698556418458e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}