YYYYYYibo's picture
Model save
4a01f72 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984496124031008,
"eval_steps": 100,
"global_step": 161,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 2.9411764705882356e-07,
"logits/chosen": -2.7490084171295166,
"logits/rejected": -2.6610748767852783,
"logps/chosen": -299.482421875,
"logps/rejected": -284.2821960449219,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.06,
"learning_rate": 2.9411764705882355e-06,
"logits/chosen": -2.71052885055542,
"logits/rejected": -2.7085328102111816,
"logps/chosen": -263.4867858886719,
"logps/rejected": -292.8466491699219,
"loss": 0.6931,
"rewards/accuracies": 0.4305555522441864,
"rewards/chosen": 0.0012557146837934852,
"rewards/margins": -7.775126141496003e-05,
"rewards/rejected": 0.0013334659161046147,
"step": 10
},
{
"epoch": 0.12,
"learning_rate": 4.994647308096509e-06,
"logits/chosen": -2.7212631702423096,
"logits/rejected": -2.6979565620422363,
"logps/chosen": -281.99365234375,
"logps/rejected": -305.00482177734375,
"loss": 0.6912,
"rewards/accuracies": 0.5218750238418579,
"rewards/chosen": 0.04867880418896675,
"rewards/margins": 0.002921257633715868,
"rewards/rejected": 0.04575754702091217,
"step": 20
},
{
"epoch": 0.19,
"learning_rate": 4.900124635964823e-06,
"logits/chosen": -2.673609495162964,
"logits/rejected": -2.6867575645446777,
"logps/chosen": -256.4033203125,
"logps/rejected": -289.47882080078125,
"loss": 0.6871,
"rewards/accuracies": 0.565625011920929,
"rewards/chosen": 0.1824948489665985,
"rewards/margins": 0.019166249781847,
"rewards/rejected": 0.1633286029100418,
"step": 30
},
{
"epoch": 0.25,
"learning_rate": 4.69181688926877e-06,
"logits/chosen": -2.6670594215393066,
"logits/rejected": -2.669649362564087,
"logps/chosen": -255.5750732421875,
"logps/rejected": -286.7828063964844,
"loss": 0.6806,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.09983734786510468,
"rewards/margins": 0.02368326112627983,
"rewards/rejected": 0.07615408301353455,
"step": 40
},
{
"epoch": 0.31,
"learning_rate": 4.379599518697444e-06,
"logits/chosen": -2.6448562145233154,
"logits/rejected": -2.626420497894287,
"logps/chosen": -261.439208984375,
"logps/rejected": -301.18280029296875,
"loss": 0.6705,
"rewards/accuracies": 0.6656249761581421,
"rewards/chosen": 0.13415098190307617,
"rewards/margins": 0.053799472749233246,
"rewards/rejected": 0.08035150915384293,
"step": 50
},
{
"epoch": 0.37,
"learning_rate": 3.978274120908957e-06,
"logits/chosen": -2.6496951580047607,
"logits/rejected": -2.611518144607544,
"logps/chosen": -280.60791015625,
"logps/rejected": -300.1086730957031,
"loss": 0.6729,
"rewards/accuracies": 0.59375,
"rewards/chosen": 0.054178010672330856,
"rewards/margins": 0.04839291423559189,
"rewards/rejected": 0.005785099230706692,
"step": 60
},
{
"epoch": 0.43,
"learning_rate": 3.5068667246468437e-06,
"logits/chosen": -2.590028762817383,
"logits/rejected": -2.576371669769287,
"logps/chosen": -266.51116943359375,
"logps/rejected": -330.80792236328125,
"loss": 0.6691,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": -0.03388174623250961,
"rewards/margins": 0.06915116310119629,
"rewards/rejected": -0.1030329242348671,
"step": 70
},
{
"epoch": 0.5,
"learning_rate": 2.9877258050403214e-06,
"logits/chosen": -2.577664375305176,
"logits/rejected": -2.585880756378174,
"logps/chosen": -255.4573516845703,
"logps/rejected": -296.2555236816406,
"loss": 0.6672,
"rewards/accuracies": 0.628125011920929,
"rewards/chosen": 0.07994996011257172,
"rewards/margins": 0.06973399966955185,
"rewards/rejected": 0.01021595485508442,
"step": 80
},
{
"epoch": 0.56,
"learning_rate": 2.4454627874135976e-06,
"logits/chosen": -2.5908098220825195,
"logits/rejected": -2.602696657180786,
"logps/chosen": -274.9224853515625,
"logps/rejected": -314.12384033203125,
"loss": 0.6678,
"rewards/accuracies": 0.590624988079071,
"rewards/chosen": -0.0693972036242485,
"rewards/margins": 0.05152568221092224,
"rewards/rejected": -0.12092288583517075,
"step": 90
},
{
"epoch": 0.62,
"learning_rate": 1.9057852691845677e-06,
"logits/chosen": -2.565824031829834,
"logits/rejected": -2.594512462615967,
"logps/chosen": -260.5049743652344,
"logps/rejected": -310.77191162109375,
"loss": 0.6619,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.0450400672852993,
"rewards/margins": 0.08433112502098083,
"rewards/rejected": -0.03929106146097183,
"step": 100
},
{
"epoch": 0.62,
"eval_logits/chosen": -2.6071722507476807,
"eval_logits/rejected": -2.508082628250122,
"eval_logps/chosen": -293.4469299316406,
"eval_logps/rejected": -276.3959655761719,
"eval_loss": 0.663548469543457,
"eval_rewards/accuracies": 0.5920000076293945,
"eval_rewards/chosen": 0.02658846043050289,
"eval_rewards/margins": 0.07567868381738663,
"eval_rewards/rejected": -0.04909021407365799,
"eval_runtime": 397.106,
"eval_samples_per_second": 5.036,
"eval_steps_per_second": 0.63,
"step": 100
},
{
"epoch": 0.68,
"learning_rate": 1.3942782744524974e-06,
"logits/chosen": -2.6018948554992676,
"logits/rejected": -2.5732669830322266,
"logps/chosen": -280.30120849609375,
"logps/rejected": -298.91925048828125,
"loss": 0.6676,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.04486658424139023,
"rewards/margins": 0.06423817574977875,
"rewards/rejected": -0.019371582195162773,
"step": 110
},
{
"epoch": 0.74,
"learning_rate": 9.351913195398523e-07,
"logits/chosen": -2.6056082248687744,
"logits/rejected": -2.623044490814209,
"logps/chosen": -284.22393798828125,
"logps/rejected": -323.26153564453125,
"loss": 0.6574,
"rewards/accuracies": 0.653124988079071,
"rewards/chosen": -0.029515612870454788,
"rewards/margins": 0.09018988907337189,
"rewards/rejected": -0.11970548331737518,
"step": 120
},
{
"epoch": 0.81,
"learning_rate": 5.50288792267796e-07,
"logits/chosen": -2.5670642852783203,
"logits/rejected": -2.546692371368408,
"logps/chosen": -275.8234558105469,
"logps/rejected": -316.80291748046875,
"loss": 0.6611,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.015085836872458458,
"rewards/margins": 0.08981107175350189,
"rewards/rejected": -0.07472522556781769,
"step": 130
},
{
"epoch": 0.87,
"learning_rate": 2.5781814616827936e-07,
"logits/chosen": -2.602719783782959,
"logits/rejected": -2.62253999710083,
"logps/chosen": -277.62322998046875,
"logps/rejected": -338.15216064453125,
"loss": 0.664,
"rewards/accuracies": 0.59375,
"rewards/chosen": 0.039921779185533524,
"rewards/margins": 0.1004234328866005,
"rewards/rejected": -0.06050165742635727,
"step": 140
},
{
"epoch": 0.93,
"learning_rate": 7.164482546684642e-08,
"logits/chosen": -2.5598411560058594,
"logits/rejected": -2.5621132850646973,
"logps/chosen": -266.3095703125,
"logps/rejected": -310.7863464355469,
"loss": 0.6615,
"rewards/accuracies": 0.653124988079071,
"rewards/chosen": 0.02628152072429657,
"rewards/margins": 0.10357757657766342,
"rewards/rejected": -0.07729605585336685,
"step": 150
},
{
"epoch": 0.99,
"learning_rate": 5.94932300227169e-10,
"logits/chosen": -2.5754361152648926,
"logits/rejected": -2.580561876296997,
"logps/chosen": -266.0080871582031,
"logps/rejected": -315.78167724609375,
"loss": 0.6573,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": 0.02933870628476143,
"rewards/margins": 0.08014971017837524,
"rewards/rejected": -0.050811003893613815,
"step": 160
},
{
"epoch": 1.0,
"step": 161,
"total_flos": 0.0,
"train_loss": 0.6703717057008921,
"train_runtime": 7352.5467,
"train_samples_per_second": 2.807,
"train_steps_per_second": 0.022
}
],
"logging_steps": 10,
"max_steps": 161,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}