taicheng's picture
Model save
d2bc76f verified
raw
history blame
6.14 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 96,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.010416666666666666,
"grad_norm": 534.9856766765029,
"learning_rate": 3.6623701904189776e-08,
"logits/chosen": -2.590585231781006,
"logits/rejected": -2.5664222240448,
"logps/chosen": -80.29847717285156,
"logps/rejected": -53.10200881958008,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.10416666666666667,
"grad_norm": 522.7071434903489,
"learning_rate": 3.662370190418977e-07,
"logits/chosen": -2.5560922622680664,
"logits/rejected": -2.5382773876190186,
"logps/chosen": -87.9105224609375,
"logps/rejected": -81.0152587890625,
"loss": 0.7035,
"rewards/accuracies": 0.2152777761220932,
"rewards/chosen": 0.0037625303957611322,
"rewards/margins": -0.015120850875973701,
"rewards/rejected": 0.01888338290154934,
"step": 10
},
{
"epoch": 0.20833333333333334,
"grad_norm": 330.8070947624322,
"learning_rate": 3.5415414722060956e-07,
"logits/chosen": -2.6099281311035156,
"logits/rejected": -2.5609025955200195,
"logps/chosen": -102.80690002441406,
"logps/rejected": -89.45201110839844,
"loss": 0.6595,
"rewards/accuracies": 0.36250001192092896,
"rewards/chosen": 0.4419492781162262,
"rewards/margins": 0.09075690805912018,
"rewards/rejected": 0.3511923849582672,
"step": 20
},
{
"epoch": 0.3125,
"grad_norm": 561.0026948523158,
"learning_rate": 3.1950008155765393e-07,
"logits/chosen": -2.510303020477295,
"logits/rejected": -2.5249061584472656,
"logps/chosen": -67.08929443359375,
"logps/rejected": -75.05818176269531,
"loss": 0.6859,
"rewards/accuracies": 0.3375000059604645,
"rewards/chosen": 0.2826586067676544,
"rewards/margins": 0.39012494683265686,
"rewards/rejected": -0.10746632516384125,
"step": 30
},
{
"epoch": 0.4166666666666667,
"grad_norm": 402.3919923341537,
"learning_rate": 2.6684804225439007e-07,
"logits/chosen": -2.5768940448760986,
"logits/rejected": -2.5649542808532715,
"logps/chosen": -72.13874053955078,
"logps/rejected": -70.94956970214844,
"loss": 0.6997,
"rewards/accuracies": 0.25,
"rewards/chosen": 0.00632839510217309,
"rewards/margins": 0.19826939702033997,
"rewards/rejected": -0.19194099307060242,
"step": 40
},
{
"epoch": 0.5208333333333334,
"grad_norm": 382.89112989307444,
"learning_rate": 2.0314640218361037e-07,
"logits/chosen": -2.494668960571289,
"logits/rejected": -2.5067570209503174,
"logps/chosen": -50.37248229980469,
"logps/rejected": -58.68071746826172,
"loss": 0.695,
"rewards/accuracies": 0.24375000596046448,
"rewards/chosen": 0.4294072091579437,
"rewards/margins": 0.24388039112091064,
"rewards/rejected": 0.1855268031358719,
"step": 50
},
{
"epoch": 0.625,
"grad_norm": 356.73514211856235,
"learning_rate": 1.3680172552929206e-07,
"logits/chosen": -2.58482027053833,
"logits/rejected": -2.566230535507202,
"logps/chosen": -78.02713775634766,
"logps/rejected": -78.90872955322266,
"loss": 0.6665,
"rewards/accuracies": 0.3499999940395355,
"rewards/chosen": 0.8464384078979492,
"rewards/margins": 0.4981551766395569,
"rewards/rejected": 0.3482832610607147,
"step": 60
},
{
"epoch": 0.7291666666666666,
"grad_norm": 347.90887844338835,
"learning_rate": 7.656937215250093e-08,
"logits/chosen": -2.5671563148498535,
"logits/rejected": -2.558472156524658,
"logps/chosen": -95.0243148803711,
"logps/rejected": -84.78661346435547,
"loss": 0.672,
"rewards/accuracies": 0.34375,
"rewards/chosen": 1.0726714134216309,
"rewards/margins": 0.5336906313896179,
"rewards/rejected": 0.5389808416366577,
"step": 70
},
{
"epoch": 0.8333333333333334,
"grad_norm": 625.7311819069191,
"learning_rate": 3.039807216523069e-08,
"logits/chosen": -2.569451332092285,
"logits/rejected": -2.516343593597412,
"logps/chosen": -84.30049133300781,
"logps/rejected": -78.89418029785156,
"loss": 0.6324,
"rewards/accuracies": 0.41874998807907104,
"rewards/chosen": 1.0922737121582031,
"rewards/margins": 0.6956304311752319,
"rewards/rejected": 0.3966432213783264,
"step": 80
},
{
"epoch": 0.9375,
"grad_norm": 530.9459931532258,
"learning_rate": 4.380949643555316e-09,
"logits/chosen": -2.502892017364502,
"logits/rejected": -2.4977681636810303,
"logps/chosen": -53.26112747192383,
"logps/rejected": -63.1404914855957,
"loss": 0.6758,
"rewards/accuracies": 0.2562499940395355,
"rewards/chosen": 0.58380526304245,
"rewards/margins": 0.31763672828674316,
"rewards/rejected": 0.2661685645580292,
"step": 90
},
{
"epoch": 1.0,
"step": 96,
"total_flos": 0.0,
"train_loss": 0.6724727004766464,
"train_runtime": 961.1077,
"train_samples_per_second": 6.36,
"train_steps_per_second": 0.1
}
],
"logging_steps": 10,
"max_steps": 96,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}