|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984496124031008, |
|
"eval_steps": 100, |
|
"global_step": 161, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.9411764705882356e-07, |
|
"logits/chosen": -2.7490084171295166, |
|
"logits/rejected": -2.6610748767852783, |
|
"logps/chosen": -299.482421875, |
|
"logps/rejected": -284.2821960449219, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9411764705882355e-06, |
|
"logits/chosen": -2.71052885055542, |
|
"logits/rejected": -2.7085328102111816, |
|
"logps/chosen": -263.4867858886719, |
|
"logps/rejected": -292.8466491699219, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4305555522441864, |
|
"rewards/chosen": 0.0012557146837934852, |
|
"rewards/margins": -7.775126141496003e-05, |
|
"rewards/rejected": 0.0013334659161046147, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.994647308096509e-06, |
|
"logits/chosen": -2.7212631702423096, |
|
"logits/rejected": -2.6979565620422363, |
|
"logps/chosen": -281.99365234375, |
|
"logps/rejected": -305.00482177734375, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.5218750238418579, |
|
"rewards/chosen": 0.04867880418896675, |
|
"rewards/margins": 0.002921257633715868, |
|
"rewards/rejected": 0.04575754702091217, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.900124635964823e-06, |
|
"logits/chosen": -2.673609495162964, |
|
"logits/rejected": -2.6867575645446777, |
|
"logps/chosen": -256.4033203125, |
|
"logps/rejected": -289.47882080078125, |
|
"loss": 0.6871, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/chosen": 0.1824948489665985, |
|
"rewards/margins": 0.019166249781847, |
|
"rewards/rejected": 0.1633286029100418, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.69181688926877e-06, |
|
"logits/chosen": -2.6670594215393066, |
|
"logits/rejected": -2.669649362564087, |
|
"logps/chosen": -255.5750732421875, |
|
"logps/rejected": -286.7828063964844, |
|
"loss": 0.6806, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.09983734786510468, |
|
"rewards/margins": 0.02368326112627983, |
|
"rewards/rejected": 0.07615408301353455, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.379599518697444e-06, |
|
"logits/chosen": -2.6448562145233154, |
|
"logits/rejected": -2.626420497894287, |
|
"logps/chosen": -261.439208984375, |
|
"logps/rejected": -301.18280029296875, |
|
"loss": 0.6705, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": 0.13415098190307617, |
|
"rewards/margins": 0.053799472749233246, |
|
"rewards/rejected": 0.08035150915384293, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.978274120908957e-06, |
|
"logits/chosen": -2.6496951580047607, |
|
"logits/rejected": -2.611518144607544, |
|
"logps/chosen": -280.60791015625, |
|
"logps/rejected": -300.1086730957031, |
|
"loss": 0.6729, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.054178010672330856, |
|
"rewards/margins": 0.04839291423559189, |
|
"rewards/rejected": 0.005785099230706692, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5068667246468437e-06, |
|
"logits/chosen": -2.590028762817383, |
|
"logits/rejected": -2.576371669769287, |
|
"logps/chosen": -266.51116943359375, |
|
"logps/rejected": -330.80792236328125, |
|
"loss": 0.6691, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.03388174623250961, |
|
"rewards/margins": 0.06915116310119629, |
|
"rewards/rejected": -0.1030329242348671, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9877258050403214e-06, |
|
"logits/chosen": -2.577664375305176, |
|
"logits/rejected": -2.585880756378174, |
|
"logps/chosen": -255.4573516845703, |
|
"logps/rejected": -296.2555236816406, |
|
"loss": 0.6672, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": 0.07994996011257172, |
|
"rewards/margins": 0.06973399966955185, |
|
"rewards/rejected": 0.01021595485508442, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4454627874135976e-06, |
|
"logits/chosen": -2.5908098220825195, |
|
"logits/rejected": -2.602696657180786, |
|
"logps/chosen": -274.9224853515625, |
|
"logps/rejected": -314.12384033203125, |
|
"loss": 0.6678, |
|
"rewards/accuracies": 0.590624988079071, |
|
"rewards/chosen": -0.0693972036242485, |
|
"rewards/margins": 0.05152568221092224, |
|
"rewards/rejected": -0.12092288583517075, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9057852691845677e-06, |
|
"logits/chosen": -2.565824031829834, |
|
"logits/rejected": -2.594512462615967, |
|
"logps/chosen": -260.5049743652344, |
|
"logps/rejected": -310.77191162109375, |
|
"loss": 0.6619, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.0450400672852993, |
|
"rewards/margins": 0.08433112502098083, |
|
"rewards/rejected": -0.03929106146097183, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_logits/chosen": -2.6071722507476807, |
|
"eval_logits/rejected": -2.508082628250122, |
|
"eval_logps/chosen": -293.4469299316406, |
|
"eval_logps/rejected": -276.3959655761719, |
|
"eval_loss": 0.663548469543457, |
|
"eval_rewards/accuracies": 0.5920000076293945, |
|
"eval_rewards/chosen": 0.02658846043050289, |
|
"eval_rewards/margins": 0.07567868381738663, |
|
"eval_rewards/rejected": -0.04909021407365799, |
|
"eval_runtime": 397.106, |
|
"eval_samples_per_second": 5.036, |
|
"eval_steps_per_second": 0.63, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.3942782744524974e-06, |
|
"logits/chosen": -2.6018948554992676, |
|
"logits/rejected": -2.5732669830322266, |
|
"logps/chosen": -280.30120849609375, |
|
"logps/rejected": -298.91925048828125, |
|
"loss": 0.6676, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.04486658424139023, |
|
"rewards/margins": 0.06423817574977875, |
|
"rewards/rejected": -0.019371582195162773, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.351913195398523e-07, |
|
"logits/chosen": -2.6056082248687744, |
|
"logits/rejected": -2.623044490814209, |
|
"logps/chosen": -284.22393798828125, |
|
"logps/rejected": -323.26153564453125, |
|
"loss": 0.6574, |
|
"rewards/accuracies": 0.653124988079071, |
|
"rewards/chosen": -0.029515612870454788, |
|
"rewards/margins": 0.09018988907337189, |
|
"rewards/rejected": -0.11970548331737518, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.50288792267796e-07, |
|
"logits/chosen": -2.5670642852783203, |
|
"logits/rejected": -2.546692371368408, |
|
"logps/chosen": -275.8234558105469, |
|
"logps/rejected": -316.80291748046875, |
|
"loss": 0.6611, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.015085836872458458, |
|
"rewards/margins": 0.08981107175350189, |
|
"rewards/rejected": -0.07472522556781769, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.5781814616827936e-07, |
|
"logits/chosen": -2.602719783782959, |
|
"logits/rejected": -2.62253999710083, |
|
"logps/chosen": -277.62322998046875, |
|
"logps/rejected": -338.15216064453125, |
|
"loss": 0.664, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.039921779185533524, |
|
"rewards/margins": 0.1004234328866005, |
|
"rewards/rejected": -0.06050165742635727, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.164482546684642e-08, |
|
"logits/chosen": -2.5598411560058594, |
|
"logits/rejected": -2.5621132850646973, |
|
"logps/chosen": -266.3095703125, |
|
"logps/rejected": -310.7863464355469, |
|
"loss": 0.6615, |
|
"rewards/accuracies": 0.653124988079071, |
|
"rewards/chosen": 0.02628152072429657, |
|
"rewards/margins": 0.10357757657766342, |
|
"rewards/rejected": -0.07729605585336685, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5.94932300227169e-10, |
|
"logits/chosen": -2.5754361152648926, |
|
"logits/rejected": -2.580561876296997, |
|
"logps/chosen": -266.0080871582031, |
|
"logps/rejected": -315.78167724609375, |
|
"loss": 0.6573, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.02933870628476143, |
|
"rewards/margins": 0.08014971017837524, |
|
"rewards/rejected": -0.050811003893613815, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 161, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6703717057008921, |
|
"train_runtime": 7352.5467, |
|
"train_samples_per_second": 2.807, |
|
"train_steps_per_second": 0.022 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 161, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|