|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.998691442030882, |
|
"eval_steps": 100, |
|
"global_step": 477, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002093692750588851, |
|
"grad_norm": 4.870537596158184, |
|
"learning_rate": 4.166666666666666e-09, |
|
"logits/chosen": -7.072341442108154, |
|
"logits/rejected": -6.540944576263428, |
|
"logps/chosen": -346.17401123046875, |
|
"logps/rejected": -373.49456787109375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02093692750588851, |
|
"grad_norm": 5.556671250581048, |
|
"learning_rate": 4.166666666666667e-08, |
|
"logits/chosen": -6.266731262207031, |
|
"logits/rejected": -6.149946212768555, |
|
"logps/chosen": -350.89019775390625, |
|
"logps/rejected": -355.3866271972656, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4305555522441864, |
|
"rewards/chosen": 0.000294171943096444, |
|
"rewards/margins": 0.0003653134626802057, |
|
"rewards/rejected": -7.114150503184646e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04187385501177702, |
|
"grad_norm": 5.947484840987328, |
|
"learning_rate": 8.333333333333334e-08, |
|
"logits/chosen": -6.901098728179932, |
|
"logits/rejected": -6.428101539611816, |
|
"logps/chosen": -368.4015197753906, |
|
"logps/rejected": -332.3041687011719, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.000805316842161119, |
|
"rewards/margins": 0.0013799300650134683, |
|
"rewards/rejected": -0.0005746129900217056, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06281078251766553, |
|
"grad_norm": 4.7415695218728455, |
|
"learning_rate": 1.25e-07, |
|
"logits/chosen": -6.833685874938965, |
|
"logits/rejected": -6.713122367858887, |
|
"logps/chosen": -381.8787536621094, |
|
"logps/rejected": -335.8555603027344, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0064066750928759575, |
|
"rewards/margins": 0.0018916327971965075, |
|
"rewards/rejected": 0.004515042062848806, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08374771002355404, |
|
"grad_norm": 4.440630542343055, |
|
"learning_rate": 1.6666666666666668e-07, |
|
"logits/chosen": -7.030734062194824, |
|
"logits/rejected": -6.716243743896484, |
|
"logps/chosen": -374.63531494140625, |
|
"logps/rejected": -338.505126953125, |
|
"loss": 0.6894, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.025013629347085953, |
|
"rewards/margins": 0.007752637378871441, |
|
"rewards/rejected": 0.017260991036891937, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"grad_norm": 4.761237773867083, |
|
"learning_rate": 1.9998927475076103e-07, |
|
"logits/chosen": -6.727326393127441, |
|
"logits/rejected": -6.21535587310791, |
|
"logps/chosen": -338.68768310546875, |
|
"logps/rejected": -324.8238830566406, |
|
"loss": 0.6853, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.04892030358314514, |
|
"rewards/margins": 0.014415493234992027, |
|
"rewards/rejected": 0.034504808485507965, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12562156503533106, |
|
"grad_norm": 5.119072453981655, |
|
"learning_rate": 1.9961413253717213e-07, |
|
"logits/chosen": -7.186664581298828, |
|
"logits/rejected": -6.633403778076172, |
|
"logps/chosen": -326.31439208984375, |
|
"logps/rejected": -301.0088195800781, |
|
"loss": 0.6768, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.07451288402080536, |
|
"rewards/margins": 0.03354056924581528, |
|
"rewards/rejected": 0.04097231104969978, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.14655849254121958, |
|
"grad_norm": 5.007390249181196, |
|
"learning_rate": 1.9870502626379125e-07, |
|
"logits/chosen": -7.373200416564941, |
|
"logits/rejected": -7.114656925201416, |
|
"logps/chosen": -356.11444091796875, |
|
"logps/rejected": -357.88812255859375, |
|
"loss": 0.6678, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.04506208747625351, |
|
"rewards/margins": 0.0459139421582222, |
|
"rewards/rejected": -0.0008518520626239479, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16749542004710807, |
|
"grad_norm": 5.277515253273024, |
|
"learning_rate": 1.9726682903510838e-07, |
|
"logits/chosen": -8.06989860534668, |
|
"logits/rejected": -7.776799201965332, |
|
"logps/chosen": -383.57763671875, |
|
"logps/rejected": -360.22412109375, |
|
"loss": 0.6567, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.008996413089334965, |
|
"rewards/margins": 0.0934341698884964, |
|
"rewards/rejected": -0.08443775773048401, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1884323475529966, |
|
"grad_norm": 5.718904864627125, |
|
"learning_rate": 1.9530725005474194e-07, |
|
"logits/chosen": -8.55430793762207, |
|
"logits/rejected": -8.116801261901855, |
|
"logps/chosen": -343.30426025390625, |
|
"logps/rejected": -344.4271545410156, |
|
"loss": 0.6418, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.09966368973255157, |
|
"rewards/margins": 0.11764608323574066, |
|
"rewards/rejected": -0.21730978786945343, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"grad_norm": 6.2104459729271335, |
|
"learning_rate": 1.9283679330160724e-07, |
|
"logits/chosen": -8.740680694580078, |
|
"logits/rejected": -8.447057723999023, |
|
"logps/chosen": -355.4083557128906, |
|
"logps/rejected": -336.2757263183594, |
|
"loss": 0.6273, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.18184241652488708, |
|
"rewards/margins": 0.17646023631095886, |
|
"rewards/rejected": -0.35830265283584595, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"eval_logits/chosen": -9.086737632751465, |
|
"eval_logits/rejected": -8.745743751525879, |
|
"eval_logps/chosen": -391.9050598144531, |
|
"eval_logps/rejected": -385.3993835449219, |
|
"eval_loss": 0.6256434321403503, |
|
"eval_rewards/accuracies": 0.6800000071525574, |
|
"eval_rewards/chosen": -0.2307606041431427, |
|
"eval_rewards/margins": 0.16034501791000366, |
|
"eval_rewards/rejected": -0.391105592250824, |
|
"eval_runtime": 241.3872, |
|
"eval_samples_per_second": 8.285, |
|
"eval_steps_per_second": 1.036, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23030620256477363, |
|
"grad_norm": 6.2752590431966375, |
|
"learning_rate": 1.898687012251826e-07, |
|
"logits/chosen": -9.274371147155762, |
|
"logits/rejected": -9.027790069580078, |
|
"logps/chosen": -357.4116516113281, |
|
"logps/rejected": -360.20465087890625, |
|
"loss": 0.6273, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.2726437747478485, |
|
"rewards/margins": 0.17124271392822266, |
|
"rewards/rejected": -0.44388651847839355, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2512431300706621, |
|
"grad_norm": 6.345691652928505, |
|
"learning_rate": 1.8641888376168482e-07, |
|
"logits/chosen": -9.697042465209961, |
|
"logits/rejected": -9.629292488098145, |
|
"logps/chosen": -399.30670166015625, |
|
"logps/rejected": -386.90557861328125, |
|
"loss": 0.6059, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.28612643480300903, |
|
"rewards/margins": 0.24121908843517303, |
|
"rewards/rejected": -0.5273455381393433, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2721800575765506, |
|
"grad_norm": 7.047283511946962, |
|
"learning_rate": 1.8250583305165094e-07, |
|
"logits/chosen": -10.05250358581543, |
|
"logits/rejected": -9.847970008850098, |
|
"logps/chosen": -378.30462646484375, |
|
"logps/rejected": -395.7349548339844, |
|
"loss": 0.5913, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.3639756739139557, |
|
"rewards/margins": 0.2538781762123108, |
|
"rewards/rejected": -0.6178538799285889, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29311698508243916, |
|
"grad_norm": 8.250566588104485, |
|
"learning_rate": 1.78150524316067e-07, |
|
"logits/chosen": -10.428335189819336, |
|
"logits/rejected": -10.18110179901123, |
|
"logps/chosen": -423.37896728515625, |
|
"logps/rejected": -418.40814208984375, |
|
"loss": 0.5918, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.36636847257614136, |
|
"rewards/margins": 0.3312264084815979, |
|
"rewards/rejected": -0.6975948214530945, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"grad_norm": 7.160872331336618, |
|
"learning_rate": 1.7337630342238038e-07, |
|
"logits/chosen": -10.668364524841309, |
|
"logits/rejected": -10.247892379760742, |
|
"logps/chosen": -438.568603515625, |
|
"logps/rejected": -439.31243896484375, |
|
"loss": 0.5943, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.44665470719337463, |
|
"rewards/margins": 0.35836517810821533, |
|
"rewards/rejected": -0.8050198554992676, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33499084009421615, |
|
"grad_norm": 8.012025922238234, |
|
"learning_rate": 1.682087617430782e-07, |
|
"logits/chosen": -10.813860893249512, |
|
"logits/rejected": -10.625129699707031, |
|
"logps/chosen": -402.5640563964844, |
|
"logps/rejected": -415.65032958984375, |
|
"loss": 0.5833, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.584823489189148, |
|
"rewards/margins": 0.2518795430660248, |
|
"rewards/rejected": -0.8367029428482056, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3559277676001047, |
|
"grad_norm": 8.371912815572413, |
|
"learning_rate": 1.6267559897763025e-07, |
|
"logits/chosen": -10.72344970703125, |
|
"logits/rejected": -10.272022247314453, |
|
"logps/chosen": -365.57806396484375, |
|
"logps/rejected": -445.9237365722656, |
|
"loss": 0.5881, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.5119096040725708, |
|
"rewards/margins": 0.36096295714378357, |
|
"rewards/rejected": -0.872872531414032, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3768646951059932, |
|
"grad_norm": 8.291785272423704, |
|
"learning_rate": 1.5680647467311557e-07, |
|
"logits/chosen": -11.077351570129395, |
|
"logits/rejected": -10.810079574584961, |
|
"logps/chosen": -403.3762512207031, |
|
"logps/rejected": -436.90411376953125, |
|
"loss": 0.5611, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.5242770910263062, |
|
"rewards/margins": 0.4779755473136902, |
|
"rewards/rejected": -1.0022525787353516, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.39780162261188173, |
|
"grad_norm": 8.768402559709461, |
|
"learning_rate": 1.506328492394303e-07, |
|
"logits/chosen": -10.848445892333984, |
|
"logits/rejected": -10.747730255126953, |
|
"logps/chosen": -361.96917724609375, |
|
"logps/rejected": -427.53131103515625, |
|
"loss": 0.5678, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.6229602098464966, |
|
"rewards/margins": 0.3965635299682617, |
|
"rewards/rejected": -1.0195238590240479, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"grad_norm": 10.10996155431003, |
|
"learning_rate": 1.4418781531128634e-07, |
|
"logits/chosen": -11.236587524414062, |
|
"logits/rejected": -11.044486045837402, |
|
"logps/chosen": -414.62225341796875, |
|
"logps/rejected": -419.2083435058594, |
|
"loss": 0.5701, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.6426066160202026, |
|
"rewards/margins": 0.4211527705192566, |
|
"rewards/rejected": -1.0637595653533936, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"eval_logits/chosen": -11.197042465209961, |
|
"eval_logits/rejected": -10.930534362792969, |
|
"eval_logps/chosen": -442.0617370605469, |
|
"eval_logps/rejected": -458.7671203613281, |
|
"eval_loss": 0.5679408311843872, |
|
"eval_rewards/accuracies": 0.6800000071525574, |
|
"eval_rewards/chosen": -0.7323274612426758, |
|
"eval_rewards/margins": 0.39245596528053284, |
|
"eval_rewards/rejected": -1.1247833967208862, |
|
"eval_runtime": 238.8989, |
|
"eval_samples_per_second": 8.372, |
|
"eval_steps_per_second": 1.046, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4396754776236587, |
|
"grad_norm": 8.838084489326292, |
|
"learning_rate": 1.375059203609562e-07, |
|
"logits/chosen": -11.455360412597656, |
|
"logits/rejected": -11.19542121887207, |
|
"logps/chosen": -448.05706787109375, |
|
"logps/rejected": -442.106689453125, |
|
"loss": 0.579, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.6968704462051392, |
|
"rewards/margins": 0.5110000967979431, |
|
"rewards/rejected": -1.207870364189148, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.46061240512954726, |
|
"grad_norm": 11.66423950154552, |
|
"learning_rate": 1.306229815126159e-07, |
|
"logits/chosen": -10.951299667358398, |
|
"logits/rejected": -10.760453224182129, |
|
"logps/chosen": -387.4031066894531, |
|
"logps/rejected": -438.4293518066406, |
|
"loss": 0.5669, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.6763008832931519, |
|
"rewards/margins": 0.459463506937027, |
|
"rewards/rejected": -1.1357643604278564, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48154933263543576, |
|
"grad_norm": 8.962170857968381, |
|
"learning_rate": 1.2357589355094274e-07, |
|
"logits/chosen": -11.39826488494873, |
|
"logits/rejected": -11.283321380615234, |
|
"logps/chosen": -385.0740966796875, |
|
"logps/rejected": -439.373291015625, |
|
"loss": 0.5679, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6969426870346069, |
|
"rewards/margins": 0.4374031126499176, |
|
"rewards/rejected": -1.1343457698822021, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5024862601413242, |
|
"grad_norm": 9.228744844021563, |
|
"learning_rate": 1.1640243115310217e-07, |
|
"logits/chosen": -11.526562690734863, |
|
"logits/rejected": -11.38569450378418, |
|
"logps/chosen": -431.7037048339844, |
|
"logps/rejected": -451.6121520996094, |
|
"loss": 0.5411, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.6818773150444031, |
|
"rewards/margins": 0.4736524522304535, |
|
"rewards/rejected": -1.1555297374725342, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"grad_norm": 9.475835105940853, |
|
"learning_rate": 1.0914104640422679e-07, |
|
"logits/chosen": -11.973138809204102, |
|
"logits/rejected": -11.678539276123047, |
|
"logps/chosen": -438.6128845214844, |
|
"logps/rejected": -494.81475830078125, |
|
"loss": 0.5498, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.7578285932540894, |
|
"rewards/margins": 0.5199133157730103, |
|
"rewards/rejected": -1.2777419090270996, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5443601151531012, |
|
"grad_norm": 10.55001524910548, |
|
"learning_rate": 1.0183066268176774e-07, |
|
"logits/chosen": -11.676985740661621, |
|
"logits/rejected": -11.725804328918457, |
|
"logps/chosen": -406.6064453125, |
|
"logps/rejected": -454.85015869140625, |
|
"loss": 0.5528, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.6437905430793762, |
|
"rewards/margins": 0.597993791103363, |
|
"rewards/rejected": -1.2417843341827393, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5652970426589898, |
|
"grad_norm": 10.40303673883758, |
|
"learning_rate": 9.451046601356724e-08, |
|
"logits/chosen": -11.600053787231445, |
|
"logits/rejected": -11.353797912597656, |
|
"logps/chosen": -433.164794921875, |
|
"logps/rejected": -443.1178283691406, |
|
"loss": 0.5492, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.8422502279281616, |
|
"rewards/margins": 0.5629727244377136, |
|
"rewards/rejected": -1.40522301197052, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5862339701648783, |
|
"grad_norm": 10.92362637516926, |
|
"learning_rate": 8.721969502803953e-08, |
|
"logits/chosen": -11.430012702941895, |
|
"logits/rejected": -11.168180465698242, |
|
"logps/chosen": -443.4029846191406, |
|
"logps/rejected": -476.8294982910156, |
|
"loss": 0.5538, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.9203767776489258, |
|
"rewards/margins": 0.4110565781593323, |
|
"rewards/rejected": -1.3314332962036133, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6071708976707668, |
|
"grad_norm": 9.76250775652993, |
|
"learning_rate": 7.999743062239557e-08, |
|
"logits/chosen": -11.536198616027832, |
|
"logits/rejected": -11.403619766235352, |
|
"logps/chosen": -425.9610900878906, |
|
"logps/rejected": -467.23291015625, |
|
"loss": 0.5452, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.7125166654586792, |
|
"rewards/margins": 0.6340970396995544, |
|
"rewards/rejected": -1.3466136455535889, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"grad_norm": 12.956036139139355, |
|
"learning_rate": 7.28823864763583e-08, |
|
"logits/chosen": -11.646090507507324, |
|
"logits/rejected": -11.440874099731445, |
|
"logps/chosen": -433.30120849609375, |
|
"logps/rejected": -482.5865173339844, |
|
"loss": 0.5398, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.790636420249939, |
|
"rewards/margins": 0.5609198808670044, |
|
"rewards/rejected": -1.351556420326233, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"eval_logits/chosen": -11.52165412902832, |
|
"eval_logits/rejected": -11.35400676727295, |
|
"eval_logps/chosen": -458.75299072265625, |
|
"eval_logps/rejected": -483.2173156738281, |
|
"eval_loss": 0.5490842461585999, |
|
"eval_rewards/accuracies": 0.6840000152587891, |
|
"eval_rewards/chosen": -0.8992397785186768, |
|
"eval_rewards/margins": 0.47004497051239014, |
|
"eval_rewards/rejected": -1.369284749031067, |
|
"eval_runtime": 222.3735, |
|
"eval_samples_per_second": 8.994, |
|
"eval_steps_per_second": 1.124, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6490447526825438, |
|
"grad_norm": 11.066017903277775, |
|
"learning_rate": 6.591270153428288e-08, |
|
"logits/chosen": -11.68547248840332, |
|
"logits/rejected": -11.603824615478516, |
|
"logps/chosen": -465.458740234375, |
|
"logps/rejected": -478.3704528808594, |
|
"loss": 0.5356, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.8401340246200562, |
|
"rewards/margins": 0.5598424673080444, |
|
"rewards/rejected": -1.3999764919281006, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6699816801884323, |
|
"grad_norm": 11.177050758944128, |
|
"learning_rate": 5.912573556804452e-08, |
|
"logits/chosen": -11.293741226196289, |
|
"logits/rejected": -11.169164657592773, |
|
"logps/chosen": -391.907958984375, |
|
"logps/rejected": -435.26190185546875, |
|
"loss": 0.5324, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7623863220214844, |
|
"rewards/margins": 0.5452743768692017, |
|
"rewards/rejected": -1.3076608180999756, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6909186076943209, |
|
"grad_norm": 13.652856840334575, |
|
"learning_rate": 5.255786891654399e-08, |
|
"logits/chosen": -11.769269943237305, |
|
"logits/rejected": -11.3505220413208, |
|
"logps/chosen": -447.8871154785156, |
|
"logps/rejected": -496.74029541015625, |
|
"loss": 0.537, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.8857334852218628, |
|
"rewards/margins": 0.6645745038986206, |
|
"rewards/rejected": -1.5503078699111938, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7118555352002094, |
|
"grad_norm": 11.172216313095676, |
|
"learning_rate": 4.624430747529102e-08, |
|
"logits/chosen": -11.618499755859375, |
|
"logits/rejected": -11.424800872802734, |
|
"logps/chosen": -420.2229919433594, |
|
"logps/rejected": -472.84033203125, |
|
"loss": 0.5363, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.7942701578140259, |
|
"rewards/margins": 0.6123701333999634, |
|
"rewards/rejected": -1.4066402912139893, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"grad_norm": 12.255527795835215, |
|
"learning_rate": 4.0218893981385925e-08, |
|
"logits/chosen": -11.575703620910645, |
|
"logits/rejected": -11.422430038452148, |
|
"logps/chosen": -432.9644470214844, |
|
"logps/rejected": -470.8561096191406, |
|
"loss": 0.5185, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8515139818191528, |
|
"rewards/margins": 0.5291236042976379, |
|
"rewards/rejected": -1.380637526512146, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7537293902119864, |
|
"grad_norm": 12.049137551707718, |
|
"learning_rate": 3.45139266054715e-08, |
|
"logits/chosen": -11.708954811096191, |
|
"logits/rejected": -11.447134017944336, |
|
"logps/chosen": -482.3265686035156, |
|
"logps/rejected": -521.8561401367188, |
|
"loss": 0.547, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.885657787322998, |
|
"rewards/margins": 0.5782418251037598, |
|
"rewards/rejected": -1.4638997316360474, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7746663177178749, |
|
"grad_norm": 10.956723652484643, |
|
"learning_rate": 2.9159985823062993e-08, |
|
"logits/chosen": -11.063775062561035, |
|
"logits/rejected": -11.105363845825195, |
|
"logps/chosen": -469.81201171875, |
|
"logps/rejected": -485.8446350097656, |
|
"loss": 0.54, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.8363800048828125, |
|
"rewards/margins": 0.5975291132926941, |
|
"rewards/rejected": -1.4339090585708618, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7956032452237635, |
|
"grad_norm": 13.069162308279825, |
|
"learning_rate": 2.4185770493280577e-08, |
|
"logits/chosen": -11.262879371643066, |
|
"logits/rejected": -11.126230239868164, |
|
"logps/chosen": -455.2259216308594, |
|
"logps/rejected": -489.8641052246094, |
|
"loss": 0.5364, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.9804290533065796, |
|
"rewards/margins": 0.48143139481544495, |
|
"rewards/rejected": -1.4618604183197021, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.816540172729652, |
|
"grad_norm": 13.1101074898303, |
|
"learning_rate": 1.9617944023656108e-08, |
|
"logits/chosen": -11.852777481079102, |
|
"logits/rejected": -11.575521469116211, |
|
"logps/chosen": -470.88604736328125, |
|
"logps/rejected": -511.36767578125, |
|
"loss": 0.532, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.876979649066925, |
|
"rewards/margins": 0.5152709484100342, |
|
"rewards/rejected": -1.3922507762908936, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"grad_norm": 13.484360798114164, |
|
"learning_rate": 1.5480991445620538e-08, |
|
"logits/chosen": -11.511263847351074, |
|
"logits/rejected": -11.029703140258789, |
|
"logps/chosen": -427.28448486328125, |
|
"logps/rejected": -486.9627990722656, |
|
"loss": 0.54, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8370214700698853, |
|
"rewards/margins": 0.5594587922096252, |
|
"rewards/rejected": -1.3964803218841553, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"eval_logits/chosen": -11.385127067565918, |
|
"eval_logits/rejected": -11.129400253295898, |
|
"eval_logps/chosen": -459.30474853515625, |
|
"eval_logps/rejected": -487.0408020019531, |
|
"eval_loss": 0.5449032783508301, |
|
"eval_rewards/accuracies": 0.6759999990463257, |
|
"eval_rewards/chosen": -0.9047574996948242, |
|
"eval_rewards/margins": 0.5027627944946289, |
|
"eval_rewards/rejected": -1.4075202941894531, |
|
"eval_runtime": 222.5645, |
|
"eval_samples_per_second": 8.986, |
|
"eval_steps_per_second": 1.123, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8584140277414289, |
|
"grad_norm": 10.485330642188247, |
|
"learning_rate": 1.1797088166794e-08, |
|
"logits/chosen": -11.086836814880371, |
|
"logits/rejected": -10.8326997756958, |
|
"logps/chosen": -452.47235107421875, |
|
"logps/rejected": -453.66986083984375, |
|
"loss": 0.5401, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.9103538393974304, |
|
"rewards/margins": 0.4986189007759094, |
|
"rewards/rejected": -1.4089727401733398, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8793509552473174, |
|
"grad_norm": 10.664608353191511, |
|
"learning_rate": 8.585981103608341e-09, |
|
"logits/chosen": -11.340084075927734, |
|
"logits/rejected": -10.990386962890625, |
|
"logps/chosen": -402.55157470703125, |
|
"logps/rejected": -500.97467041015625, |
|
"loss": 0.531, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.8159112930297852, |
|
"rewards/margins": 0.6232641339302063, |
|
"rewards/rejected": -1.4391753673553467, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9002878827532059, |
|
"grad_norm": 11.582461841758496, |
|
"learning_rate": 5.864882831430273e-09, |
|
"logits/chosen": -11.550569534301758, |
|
"logits/rejected": -11.248498916625977, |
|
"logps/chosen": -449.3023986816406, |
|
"logps/rejected": -463.66510009765625, |
|
"loss": 0.5232, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.8865207433700562, |
|
"rewards/margins": 0.47660237550735474, |
|
"rewards/rejected": -1.3631231784820557, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9212248102590945, |
|
"grad_norm": 11.301516689755074, |
|
"learning_rate": 3.6483793195745682e-09, |
|
"logits/chosen": -11.318781852722168, |
|
"logits/rejected": -10.964235305786133, |
|
"logps/chosen": -413.6148986816406, |
|
"logps/rejected": -463.11114501953125, |
|
"loss": 0.5351, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.8221774101257324, |
|
"rewards/margins": 0.663087010383606, |
|
"rewards/rejected": -1.4852644205093384, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"grad_norm": 12.872962412868715, |
|
"learning_rate": 1.9483517457776433e-09, |
|
"logits/chosen": -11.27305793762207, |
|
"logits/rejected": -10.98169994354248, |
|
"logps/chosen": -440.1998596191406, |
|
"logps/rejected": -488.0940856933594, |
|
"loss": 0.5273, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.8431445956230164, |
|
"rewards/margins": 0.6032007932662964, |
|
"rewards/rejected": -1.446345329284668, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9630986652708715, |
|
"grad_norm": 11.52042506596002, |
|
"learning_rate": 7.739128092312918e-10, |
|
"logits/chosen": -11.074236869812012, |
|
"logits/rejected": -10.837442398071289, |
|
"logps/chosen": -452.1619567871094, |
|
"logps/rejected": -474.42620849609375, |
|
"loss": 0.5402, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.8461356163024902, |
|
"rewards/margins": 0.6280852556228638, |
|
"rewards/rejected": -1.4742207527160645, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.98403559277676, |
|
"grad_norm": 11.458055654937185, |
|
"learning_rate": 1.313578835593465e-10, |
|
"logits/chosen": -11.382277488708496, |
|
"logits/rejected": -11.017861366271973, |
|
"logps/chosen": -448.569091796875, |
|
"logps/rejected": -476.3909606933594, |
|
"loss": 0.5184, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9083318710327148, |
|
"rewards/margins": 0.5944851636886597, |
|
"rewards/rejected": -1.502817153930664, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.998691442030882, |
|
"step": 477, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5785154289169632, |
|
"train_runtime": 17436.3201, |
|
"train_samples_per_second": 3.506, |
|
"train_steps_per_second": 0.027 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 477, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|