|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9988751406074241, |
|
"eval_steps": 100, |
|
"global_step": 444, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 5.829373545547037, |
|
"learning_rate": 1.111111111111111e-08, |
|
"logits/chosen": -1.8433172702789307, |
|
"logits/rejected": -2.1778242588043213, |
|
"logps/chosen": -155.12074279785156, |
|
"logps/rejected": -108.14129638671875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 6.4818389334129645, |
|
"learning_rate": 1.111111111111111e-07, |
|
"logits/chosen": -1.727405071258545, |
|
"logits/rejected": -1.8230912685394287, |
|
"logps/chosen": -143.81710815429688, |
|
"logps/rejected": -170.6587371826172, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.4652777910232544, |
|
"rewards/chosen": 8.138448174577206e-05, |
|
"rewards/margins": 0.0009054330294020474, |
|
"rewards/rejected": -0.0008240485331043601, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 6.216353393457572, |
|
"learning_rate": 2.222222222222222e-07, |
|
"logits/chosen": -1.7563774585723877, |
|
"logits/rejected": -1.8175561428070068, |
|
"logps/chosen": -156.39651489257812, |
|
"logps/rejected": -182.17941284179688, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.0005323028308339417, |
|
"rewards/margins": 0.0004471595457289368, |
|
"rewards/rejected": 8.51431759656407e-05, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 6.0623601927922826, |
|
"learning_rate": 3.333333333333333e-07, |
|
"logits/chosen": -1.7707617282867432, |
|
"logits/rejected": -1.9445222616195679, |
|
"logps/chosen": -162.476318359375, |
|
"logps/rejected": -180.51072692871094, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.003538253251463175, |
|
"rewards/margins": 0.0020595293026417494, |
|
"rewards/rejected": 0.0014787239488214254, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 6.203147518363453, |
|
"learning_rate": 4.444444444444444e-07, |
|
"logits/chosen": -1.7849353551864624, |
|
"logits/rejected": -1.9426301717758179, |
|
"logps/chosen": -175.6881866455078, |
|
"logps/rejected": -160.2828369140625, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.010533371940255165, |
|
"rewards/margins": 0.0045619565062224865, |
|
"rewards/rejected": 0.005971415434032679, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 5.821555258105456, |
|
"learning_rate": 4.998062918544441e-07, |
|
"logits/chosen": -1.6218881607055664, |
|
"logits/rejected": -1.7974551916122437, |
|
"logps/chosen": -140.06240844726562, |
|
"logps/rejected": -163.06736755371094, |
|
"loss": 0.687, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.024222631007432938, |
|
"rewards/margins": 0.012534504756331444, |
|
"rewards/rejected": 0.011688126251101494, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 6.110757227734316, |
|
"learning_rate": 4.98258427321406e-07, |
|
"logits/chosen": -1.7357165813446045, |
|
"logits/rejected": -1.8816426992416382, |
|
"logps/chosen": -164.33438110351562, |
|
"logps/rejected": -165.95216369628906, |
|
"loss": 0.6807, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.04671463742852211, |
|
"rewards/margins": 0.026966657489538193, |
|
"rewards/rejected": 0.019747978076338768, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 6.155653196810327, |
|
"learning_rate": 4.951722892251762e-07, |
|
"logits/chosen": -1.6737648248672485, |
|
"logits/rejected": -1.7360236644744873, |
|
"logps/chosen": -158.34616088867188, |
|
"logps/rejected": -189.7154998779297, |
|
"loss": 0.6759, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.06381961703300476, |
|
"rewards/margins": 0.03640252351760864, |
|
"rewards/rejected": 0.02741708979010582, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 6.083689058170866, |
|
"learning_rate": 4.905670000773126e-07, |
|
"logits/chosen": -1.577292799949646, |
|
"logits/rejected": -1.6474878787994385, |
|
"logps/chosen": -174.16554260253906, |
|
"logps/rejected": -139.80081176757812, |
|
"loss": 0.6741, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.08402051031589508, |
|
"rewards/margins": 0.05129547044634819, |
|
"rewards/rejected": 0.03272503241896629, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 5.617574112691242, |
|
"learning_rate": 4.844710954430464e-07, |
|
"logits/chosen": -1.6551265716552734, |
|
"logits/rejected": -1.710513710975647, |
|
"logps/chosen": -155.87420654296875, |
|
"logps/rejected": -184.04806518554688, |
|
"loss": 0.6661, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.08863753080368042, |
|
"rewards/margins": 0.054521817713975906, |
|
"rewards/rejected": 0.034115713089704514, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 5.627048741895505, |
|
"learning_rate": 4.769223471275234e-07, |
|
"logits/chosen": -1.5745666027069092, |
|
"logits/rejected": -1.6258203983306885, |
|
"logps/chosen": -147.27999877929688, |
|
"logps/rejected": -151.06619262695312, |
|
"loss": 0.6595, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.1112900972366333, |
|
"rewards/margins": 0.0667150542140007, |
|
"rewards/rejected": 0.04457502439618111, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_logits/chosen": -1.7720075845718384, |
|
"eval_logits/rejected": -1.495701789855957, |
|
"eval_logps/chosen": -124.06204986572266, |
|
"eval_logps/rejected": -139.30418395996094, |
|
"eval_loss": 0.6646677255630493, |
|
"eval_rewards/accuracies": 0.6875, |
|
"eval_rewards/chosen": 0.1106695607304573, |
|
"eval_rewards/margins": 0.06266607344150543, |
|
"eval_rewards/rejected": 0.04800347983837128, |
|
"eval_runtime": 107.3775, |
|
"eval_samples_per_second": 10.654, |
|
"eval_steps_per_second": 0.335, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 5.866012556456834, |
|
"learning_rate": 4.6796752913190956e-07, |
|
"logits/chosen": -1.5874210596084595, |
|
"logits/rejected": -1.6103451251983643, |
|
"logps/chosen": -155.8997039794922, |
|
"logps/rejected": -162.63836669921875, |
|
"loss": 0.6579, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.12256599962711334, |
|
"rewards/margins": 0.07956713438034058, |
|
"rewards/rejected": 0.04299888014793396, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 5.609183180938371, |
|
"learning_rate": 4.576621278295557e-07, |
|
"logits/chosen": -1.5197416543960571, |
|
"logits/rejected": -1.572852373123169, |
|
"logps/chosen": -147.88705444335938, |
|
"logps/rejected": -145.33999633789062, |
|
"loss": 0.6512, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.13489821553230286, |
|
"rewards/margins": 0.0956321507692337, |
|
"rewards/rejected": 0.03926606848835945, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 5.6105044223251355, |
|
"learning_rate": 4.4606999815804657e-07, |
|
"logits/chosen": -1.4735063314437866, |
|
"logits/rejected": -1.662398338317871, |
|
"logps/chosen": -146.32366943359375, |
|
"logps/rejected": -139.0260009765625, |
|
"loss": 0.6492, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.1404353231191635, |
|
"rewards/margins": 0.10020889341831207, |
|
"rewards/rejected": 0.04022643715143204, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 5.559220884446223, |
|
"learning_rate": 4.332629679574565e-07, |
|
"logits/chosen": -1.4670491218566895, |
|
"logits/rejected": -1.6285909414291382, |
|
"logps/chosen": -148.60751342773438, |
|
"logps/rejected": -174.4378204345703, |
|
"loss": 0.6456, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.12996497750282288, |
|
"rewards/margins": 0.07584364712238312, |
|
"rewards/rejected": 0.05412132665514946, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 5.496701088365727, |
|
"learning_rate": 4.193203929064353e-07, |
|
"logits/chosen": -1.4563395977020264, |
|
"logits/rejected": -1.5474860668182373, |
|
"logps/chosen": -142.05953979492188, |
|
"logps/rejected": -161.18702697753906, |
|
"loss": 0.6406, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 0.16432908177375793, |
|
"rewards/margins": 0.11925216019153595, |
|
"rewards/rejected": 0.04507693648338318, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 5.87004256418159, |
|
"learning_rate": 4.043286648138538e-07, |
|
"logits/chosen": -1.4940943717956543, |
|
"logits/rejected": -1.5696378946304321, |
|
"logps/chosen": -144.10693359375, |
|
"logps/rejected": -174.38937377929688, |
|
"loss": 0.6446, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.14990444481372833, |
|
"rewards/margins": 0.10559757798910141, |
|
"rewards/rejected": 0.04430687427520752, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 6.736125236884026, |
|
"learning_rate": 3.883806763127647e-07, |
|
"logits/chosen": -1.4956731796264648, |
|
"logits/rejected": -1.5208299160003662, |
|
"logps/chosen": -154.81716918945312, |
|
"logps/rejected": -155.2576904296875, |
|
"loss": 0.6388, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.17543208599090576, |
|
"rewards/margins": 0.13297812640666962, |
|
"rewards/rejected": 0.04245396703481674, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 5.520870747493312, |
|
"learning_rate": 3.715752452735703e-07, |
|
"logits/chosen": -1.518593192100525, |
|
"logits/rejected": -1.6800349950790405, |
|
"logps/chosen": -140.48988342285156, |
|
"logps/rejected": -161.6919708251953, |
|
"loss": 0.6328, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 0.17410950362682343, |
|
"rewards/margins": 0.13047902286052704, |
|
"rewards/rejected": 0.04363049194216728, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 5.706390091330182, |
|
"learning_rate": 3.540165025028843e-07, |
|
"logits/chosen": -1.5428271293640137, |
|
"logits/rejected": -1.6062263250350952, |
|
"logps/chosen": -159.2704315185547, |
|
"logps/rejected": -173.2039031982422, |
|
"loss": 0.6286, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.18463760614395142, |
|
"rewards/margins": 0.13972006738185883, |
|
"rewards/rejected": 0.04491753131151199, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 5.847027641584725, |
|
"learning_rate": 3.358132465220639e-07, |
|
"logits/chosen": -1.4393140077590942, |
|
"logits/rejected": -1.5474971532821655, |
|
"logps/chosen": -148.5250244140625, |
|
"logps/rejected": -156.9046173095703, |
|
"loss": 0.6273, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.1776243895292282, |
|
"rewards/margins": 0.1414380818605423, |
|
"rewards/rejected": 0.03618631511926651, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_logits/chosen": -1.7316410541534424, |
|
"eval_logits/rejected": -1.465333342552185, |
|
"eval_logps/chosen": -119.38525390625, |
|
"eval_logps/rejected": -138.4956817626953, |
|
"eval_loss": 0.6494045853614807, |
|
"eval_rewards/accuracies": 0.6979166865348816, |
|
"eval_rewards/chosen": 0.1574375331401825, |
|
"eval_rewards/margins": 0.10134916752576828, |
|
"eval_rewards/rejected": 0.05608838051557541, |
|
"eval_runtime": 106.286, |
|
"eval_samples_per_second": 10.763, |
|
"eval_steps_per_second": 0.339, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 5.808496828700401, |
|
"learning_rate": 3.170782694233712e-07, |
|
"logits/chosen": -1.4331612586975098, |
|
"logits/rejected": -1.62355637550354, |
|
"logps/chosen": -132.7198944091797, |
|
"logps/rejected": -162.63983154296875, |
|
"loss": 0.6259, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.1818162202835083, |
|
"rewards/margins": 0.14457334578037262, |
|
"rewards/rejected": 0.03724289312958717, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 6.450138104412953, |
|
"learning_rate": 2.979276579809346e-07, |
|
"logits/chosen": -1.567256212234497, |
|
"logits/rejected": -1.662076711654663, |
|
"logps/chosen": -139.86077880859375, |
|
"logps/rejected": -170.76498413085938, |
|
"loss": 0.6226, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.17449909448623657, |
|
"rewards/margins": 0.1373990774154663, |
|
"rewards/rejected": 0.03710002452135086, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 5.305590860314572, |
|
"learning_rate": 2.78480074347007e-07, |
|
"logits/chosen": -1.4688160419464111, |
|
"logits/rejected": -1.6507971286773682, |
|
"logps/chosen": -155.4250030517578, |
|
"logps/rejected": -139.8217010498047, |
|
"loss": 0.6187, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.185538649559021, |
|
"rewards/margins": 0.1848856508731842, |
|
"rewards/rejected": 0.000652993272524327, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 5.538376518324725, |
|
"learning_rate": 2.588560207905135e-07, |
|
"logits/chosen": -1.5921481847763062, |
|
"logits/rejected": -1.6697231531143188, |
|
"logps/chosen": -163.6059112548828, |
|
"logps/rejected": -150.09193420410156, |
|
"loss": 0.6086, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.21226021647453308, |
|
"rewards/margins": 0.19374233484268188, |
|
"rewards/rejected": 0.018517881631851196, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 5.203072441741653, |
|
"learning_rate": 2.391770930337597e-07, |
|
"logits/chosen": -1.5545365810394287, |
|
"logits/rejected": -1.5908061265945435, |
|
"logps/chosen": -140.0444793701172, |
|
"logps/rejected": -160.80111694335938, |
|
"loss": 0.6191, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.1901070922613144, |
|
"rewards/margins": 0.17807592451572418, |
|
"rewards/rejected": 0.012031197547912598, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 5.475288227895773, |
|
"learning_rate": 2.195652268138194e-07, |
|
"logits/chosen": -1.567275047302246, |
|
"logits/rejected": -1.6613355875015259, |
|
"logps/chosen": -150.01036071777344, |
|
"logps/rejected": -157.63027954101562, |
|
"loss": 0.6119, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.1891297549009323, |
|
"rewards/margins": 0.18906521797180176, |
|
"rewards/rejected": 6.455164111685008e-05, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 5.992354186949266, |
|
"learning_rate": 2.001419423371019e-07, |
|
"logits/chosen": -1.47898268699646, |
|
"logits/rejected": -1.5700337886810303, |
|
"logps/chosen": -134.41952514648438, |
|
"logps/rejected": -160.8531494140625, |
|
"loss": 0.6083, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 0.18836051225662231, |
|
"rewards/margins": 0.16941113770008087, |
|
"rewards/rejected": 0.018949372693896294, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 6.043059967391702, |
|
"learning_rate": 1.810275913086562e-07, |
|
"logits/chosen": -1.482757329940796, |
|
"logits/rejected": -1.648633599281311, |
|
"logps/chosen": -158.1710968017578, |
|
"logps/rejected": -164.2964324951172, |
|
"loss": 0.6129, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.17465534806251526, |
|
"rewards/margins": 0.1687730997800827, |
|
"rewards/rejected": 0.0058822231367230415, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 5.822282255796662, |
|
"learning_rate": 1.6234061120181143e-07, |
|
"logits/chosen": -1.5249128341674805, |
|
"logits/rejected": -1.6839654445648193, |
|
"logps/chosen": -130.04713439941406, |
|
"logps/rejected": -178.07696533203125, |
|
"loss": 0.6064, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": 0.20361635088920593, |
|
"rewards/margins": 0.20471592247486115, |
|
"rewards/rejected": -0.0010995581978932023, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 5.917241220768849, |
|
"learning_rate": 1.4419679138889375e-07, |
|
"logits/chosen": -1.4709835052490234, |
|
"logits/rejected": -1.7355806827545166, |
|
"logps/chosen": -156.6675262451172, |
|
"logps/rejected": -172.46078491210938, |
|
"loss": 0.6009, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.1793396770954132, |
|
"rewards/margins": 0.20549102127552032, |
|
"rewards/rejected": -0.02615133859217167, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_logits/chosen": -1.777042269706726, |
|
"eval_logits/rejected": -1.5097768306732178, |
|
"eval_logps/chosen": -120.27433013916016, |
|
"eval_logps/rejected": -141.86488342285156, |
|
"eval_loss": 0.6398369669914246, |
|
"eval_rewards/accuracies": 0.71875, |
|
"eval_rewards/chosen": 0.14854662120342255, |
|
"eval_rewards/margins": 0.12615016102790833, |
|
"eval_rewards/rejected": 0.022396454587578773, |
|
"eval_runtime": 111.134, |
|
"eval_samples_per_second": 10.294, |
|
"eval_steps_per_second": 0.324, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 5.942805799246918, |
|
"learning_rate": 1.2670855568026362e-07, |
|
"logits/chosen": -1.552185297012329, |
|
"logits/rejected": -1.6878124475479126, |
|
"logps/chosen": -135.38902282714844, |
|
"logps/rejected": -174.15255737304688, |
|
"loss": 0.6031, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.17009037733078003, |
|
"rewards/margins": 0.18223796784877777, |
|
"rewards/rejected": -0.012147602625191212, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 5.410484098522815, |
|
"learning_rate": 1.0998426571724643e-07, |
|
"logits/chosen": -1.5845314264297485, |
|
"logits/rejected": -1.6747452020645142, |
|
"logps/chosen": -146.5388641357422, |
|
"logps/rejected": -157.44863891601562, |
|
"loss": 0.5989, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.1947994828224182, |
|
"rewards/margins": 0.21365301311016083, |
|
"rewards/rejected": -0.01885353960096836, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 5.323259223525621, |
|
"learning_rate": 9.412754953531663e-08, |
|
"logits/chosen": -1.560361623764038, |
|
"logits/rejected": -1.6760743856430054, |
|
"logps/chosen": -147.3408966064453, |
|
"logps/rejected": -164.4519500732422, |
|
"loss": 0.5956, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.18070648610591888, |
|
"rewards/margins": 0.22251346707344055, |
|
"rewards/rejected": -0.04180694743990898, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 6.0603812493866025, |
|
"learning_rate": 7.923665945792943e-08, |
|
"logits/chosen": -1.542307734489441, |
|
"logits/rejected": -1.6773264408111572, |
|
"logps/chosen": -132.24139404296875, |
|
"logps/rejected": -148.74737548828125, |
|
"loss": 0.6007, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": 0.17221280932426453, |
|
"rewards/margins": 0.211787611246109, |
|
"rewards/rejected": -0.03957480937242508, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 5.800803684922802, |
|
"learning_rate": 6.540386329965863e-08, |
|
"logits/chosen": -1.613059639930725, |
|
"logits/rejected": -1.6966331005096436, |
|
"logps/chosen": -155.21559143066406, |
|
"logps/rejected": -161.65882873535156, |
|
"loss": 0.597, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.17716926336288452, |
|
"rewards/margins": 0.21501335501670837, |
|
"rewards/rejected": -0.03784411773085594, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 5.294198300700879, |
|
"learning_rate": 5.271487265090163e-08, |
|
"logits/chosen": -1.605891466140747, |
|
"logits/rejected": -1.6633691787719727, |
|
"logps/chosen": -133.00123596191406, |
|
"logps/rejected": -176.7678680419922, |
|
"loss": 0.5888, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": 0.17767903208732605, |
|
"rewards/margins": 0.228514164686203, |
|
"rewards/rejected": -0.05083512142300606, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 6.853203056351755, |
|
"learning_rate": 4.1248311786649394e-08, |
|
"logits/chosen": -1.6259254217147827, |
|
"logits/rejected": -1.7257139682769775, |
|
"logps/chosen": -135.5113525390625, |
|
"logps/rejected": -180.2209014892578, |
|
"loss": 0.5989, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.17052185535430908, |
|
"rewards/margins": 0.2071322202682495, |
|
"rewards/rejected": -0.03661039471626282, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 5.58249557148135, |
|
"learning_rate": 3.107523049009983e-08, |
|
"logits/chosen": -1.5495421886444092, |
|
"logits/rejected": -1.6909148693084717, |
|
"logps/chosen": -148.41799926757812, |
|
"logps/rejected": -188.6688995361328, |
|
"loss": 0.5986, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 0.18436935544013977, |
|
"rewards/margins": 0.21894951164722443, |
|
"rewards/rejected": -0.03458016738295555, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 6.065104934238928, |
|
"learning_rate": 2.2258663809784888e-08, |
|
"logits/chosen": -1.556806206703186, |
|
"logits/rejected": -1.6664282083511353, |
|
"logps/chosen": -134.76539611816406, |
|
"logps/rejected": -166.59054565429688, |
|
"loss": 0.598, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.19658346474170685, |
|
"rewards/margins": 0.23869290947914124, |
|
"rewards/rejected": -0.04210943728685379, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 6.519921387019466, |
|
"learning_rate": 1.4853241478071599e-08, |
|
"logits/chosen": -1.5817980766296387, |
|
"logits/rejected": -1.6547319889068604, |
|
"logps/chosen": -132.71343994140625, |
|
"logps/rejected": -159.65066528320312, |
|
"loss": 0.6003, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.16476558148860931, |
|
"rewards/margins": 0.182787224650383, |
|
"rewards/rejected": -0.018021635711193085, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_logits/chosen": -1.8108444213867188, |
|
"eval_logits/rejected": -1.544880986213684, |
|
"eval_logps/chosen": -121.37197875976562, |
|
"eval_logps/rejected": -144.05641174316406, |
|
"eval_loss": 0.6354950666427612, |
|
"eval_rewards/accuracies": 0.7326388955116272, |
|
"eval_rewards/chosen": 0.1375703364610672, |
|
"eval_rewards/margins": 0.13708928227424622, |
|
"eval_rewards/rejected": 0.00048106827307492495, |
|
"eval_runtime": 109.3237, |
|
"eval_samples_per_second": 10.464, |
|
"eval_steps_per_second": 0.329, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 5.560808880302564, |
|
"learning_rate": 8.904849411180748e-09, |
|
"logits/chosen": -1.5504529476165771, |
|
"logits/rejected": -1.675254464149475, |
|
"logps/chosen": -141.06692504882812, |
|
"logps/rejected": -165.89645385742188, |
|
"loss": 0.6023, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 0.1476067751646042, |
|
"rewards/margins": 0.19318901002407074, |
|
"rewards/rejected": -0.04558226466178894, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 6.1731600663059005, |
|
"learning_rate": 4.45034538815614e-09, |
|
"logits/chosen": -1.56648850440979, |
|
"logits/rejected": -1.7188094854354858, |
|
"logps/chosen": -161.48452758789062, |
|
"logps/rejected": -189.99099731445312, |
|
"loss": 0.593, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.16634421050548553, |
|
"rewards/margins": 0.21816936135292053, |
|
"rewards/rejected": -0.051825135946273804, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 5.916412317020735, |
|
"learning_rate": 1.5173306705126287e-09, |
|
"logits/chosen": -1.5939347743988037, |
|
"logits/rejected": -1.6984974145889282, |
|
"logps/chosen": -147.58717346191406, |
|
"logps/rejected": -158.92880249023438, |
|
"loss": 0.5973, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.18250404298305511, |
|
"rewards/margins": 0.252483069896698, |
|
"rewards/rejected": -0.06997901946306229, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 5.628332108752967, |
|
"learning_rate": 1.239789776653899e-10, |
|
"logits/chosen": -1.5746687650680542, |
|
"logits/rejected": -1.7450227737426758, |
|
"logps/chosen": -137.1623077392578, |
|
"logps/rejected": -198.08595275878906, |
|
"loss": 0.5957, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.1561325490474701, |
|
"rewards/margins": 0.1958049237728119, |
|
"rewards/rejected": -0.03967234492301941, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 444, |
|
"total_flos": 0.0, |
|
"train_loss": 0.05915545343278764, |
|
"train_runtime": 553.7697, |
|
"train_samples_per_second": 51.35, |
|
"train_steps_per_second": 0.802 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 444, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|