|
{ |
|
"best_metric": 0.49999999700000003, |
|
"best_model_checkpoint": "checkpoint/cross_encoder_20250522_full_data/checkpoint-4633", |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 4633, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.436934471130371, |
|
"learning_rate": 4.315925766076824e-12, |
|
"loss": 0.7375, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.8707695007324219, |
|
"learning_rate": 8.631851532153648e-10, |
|
"loss": 0.7326, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 1.4507315158843994, |
|
"learning_rate": 1.7263703064307296e-09, |
|
"loss": 0.734, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 4.8174872398376465, |
|
"learning_rate": 2.589555459646094e-09, |
|
"loss": 0.731, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.8691744208335876, |
|
"learning_rate": 3.452740612861459e-09, |
|
"loss": 0.7356, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.7719039916992188, |
|
"learning_rate": 4.315925766076823e-09, |
|
"loss": 0.7286, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 2.3143906593322754, |
|
"learning_rate": 5.179110919292188e-09, |
|
"loss": 0.7302, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.4967010021209717, |
|
"learning_rate": 6.042296072507553e-09, |
|
"loss": 0.7318, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 3.2115557193756104, |
|
"learning_rate": 6.905481225722918e-09, |
|
"loss": 0.7322, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.6317012310028076, |
|
"learning_rate": 7.768666378938282e-09, |
|
"loss": 0.7288, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.578616738319397, |
|
"learning_rate": 8.631851532153647e-09, |
|
"loss": 0.727, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 2.6788175106048584, |
|
"learning_rate": 9.495036685369011e-09, |
|
"loss": 0.7236, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.1162360906600952, |
|
"learning_rate": 9.99960892371536e-09, |
|
"loss": 0.7234, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 3.412252426147461, |
|
"learning_rate": 9.995454119562455e-09, |
|
"loss": 0.7273, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 2.049833297729492, |
|
"learning_rate": 9.986762270880315e-09, |
|
"loss": 0.7216, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 2.14269757270813, |
|
"learning_rate": 9.973541271907098e-09, |
|
"loss": 0.7236, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 3.2950925827026367, |
|
"learning_rate": 9.955803130412195e-09, |
|
"loss": 0.7257, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.6116753816604614, |
|
"learning_rate": 9.933563956790353e-09, |
|
"loss": 0.7202, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 2.2606937885284424, |
|
"learning_rate": 9.906843949429669e-09, |
|
"loss": 0.718, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 3.41159725189209, |
|
"learning_rate": 9.875667376366706e-09, |
|
"loss": 0.717, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 2.007009983062744, |
|
"learning_rate": 9.840062553245418e-09, |
|
"loss": 0.7156, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 2.2354674339294434, |
|
"learning_rate": 9.800061817599912e-09, |
|
"loss": 0.7138, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 3.80654239654541, |
|
"learning_rate": 9.755701499484371e-09, |
|
"loss": 0.7144, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 5.391232490539551, |
|
"learning_rate": 9.707021888476834e-09, |
|
"loss": 0.7088, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_f2": 0.49999999700000003, |
|
"eval_loss": 0.7074111700057983, |
|
"eval_precision": 0.16666666666666666, |
|
"eval_recall": 1.0, |
|
"eval_runtime": 1108.1527, |
|
"eval_samples_per_second": 108.462, |
|
"eval_steps_per_second": 1.695, |
|
"step": 4633 |
|
} |
|
], |
|
"logging_steps": 200, |
|
"max_steps": 23165, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 7.801698556418458e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|