|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.8085106382978724, |
|
"eval_steps": 500, |
|
"global_step": 33, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0851063829787234, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -1.8645445108413696, |
|
"logits/rejected": 14.429821968078613, |
|
"logps/chosen": -346.043701171875, |
|
"logps/rejected": -212.6157684326172, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.1702127659574468, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"logits/chosen": 0.5981572866439819, |
|
"logits/rejected": 11.581156730651855, |
|
"logps/chosen": -301.7901306152344, |
|
"logps/rejected": -188.81680297851562, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.2553191489361702, |
|
"grad_norm": 77.4799575805664, |
|
"learning_rate": 1e-07, |
|
"logits/chosen": -0.6666683554649353, |
|
"logits/rejected": 13.3030424118042, |
|
"logps/chosen": -244.64401245117188, |
|
"logps/rejected": -135.6005096435547, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.3404255319148936, |
|
"grad_norm": 75.18358612060547, |
|
"learning_rate": 2e-07, |
|
"logits/chosen": -0.6529165506362915, |
|
"logits/rejected": 12.279073715209961, |
|
"logps/chosen": -260.230224609375, |
|
"logps/rejected": -151.9573974609375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.425531914893617, |
|
"grad_norm": 80.90792846679688, |
|
"learning_rate": 3e-07, |
|
"logits/chosen": 0.008319228887557983, |
|
"logits/rejected": 17.30872917175293, |
|
"logps/chosen": -284.0721435546875, |
|
"logps/rejected": -131.707275390625, |
|
"loss": 0.6899, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 0.008991742506623268, |
|
"rewards/margins": 0.007281172554939985, |
|
"rewards/rejected": 0.001710569835267961, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.5106382978723404, |
|
"grad_norm": 70.98322296142578, |
|
"learning_rate": 4e-07, |
|
"logits/chosen": 1.6513853073120117, |
|
"logits/rejected": 10.607856750488281, |
|
"logps/chosen": -241.02496337890625, |
|
"logps/rejected": -187.40670776367188, |
|
"loss": 0.6637, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.08284933865070343, |
|
"rewards/margins": 0.04663487523794174, |
|
"rewards/rejected": 0.03621446341276169, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.5957446808510638, |
|
"grad_norm": 64.64167022705078, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -0.655532717704773, |
|
"logits/rejected": 13.186487197875977, |
|
"logps/chosen": -312.20770263671875, |
|
"logps/rejected": -185.97059631347656, |
|
"loss": 0.5792, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 0.3591811954975128, |
|
"rewards/margins": 0.2757205367088318, |
|
"rewards/rejected": 0.08346068859100342, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.6808510638297872, |
|
"grad_norm": 50.39228820800781, |
|
"learning_rate": 4.821428571428571e-07, |
|
"logits/chosen": -0.6116840243339539, |
|
"logits/rejected": 14.426715850830078, |
|
"logps/chosen": -278.035888671875, |
|
"logps/rejected": -141.1471710205078, |
|
"loss": 0.4993, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 0.6650260090827942, |
|
"rewards/margins": 0.4626970589160919, |
|
"rewards/rejected": 0.20232899487018585, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.7659574468085106, |
|
"grad_norm": 36.09687042236328, |
|
"learning_rate": 4.6428571428571427e-07, |
|
"logits/chosen": 1.258104681968689, |
|
"logits/rejected": 13.34419059753418, |
|
"logps/chosen": -207.1935577392578, |
|
"logps/rejected": -125.97917938232422, |
|
"loss": 0.3753, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.0702375173568726, |
|
"rewards/margins": 0.9072185158729553, |
|
"rewards/rejected": 0.1630191206932068, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.851063829787234, |
|
"grad_norm": 31.1262264251709, |
|
"learning_rate": 4.464285714285714e-07, |
|
"logits/chosen": 1.3978009223937988, |
|
"logits/rejected": 14.55895709991455, |
|
"logps/chosen": -271.84869384765625, |
|
"logps/rejected": -154.90167236328125, |
|
"loss": 0.3268, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 1.6433621644973755, |
|
"rewards/margins": 1.044425368309021, |
|
"rewards/rejected": 0.5989368557929993, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.9361702127659575, |
|
"grad_norm": 36.019447326660156, |
|
"learning_rate": 4.285714285714285e-07, |
|
"logits/chosen": 2.2715587615966797, |
|
"logits/rejected": 11.973756790161133, |
|
"logps/chosen": -254.08721923828125, |
|
"logps/rejected": -192.8157501220703, |
|
"loss": 0.3231, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 2.2909791469573975, |
|
"rewards/margins": 1.358994483947754, |
|
"rewards/rejected": 0.931984543800354, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 1.0212765957446808, |
|
"grad_norm": 34.85441207885742, |
|
"learning_rate": 4.1071428571428566e-07, |
|
"logits/chosen": 2.2988595962524414, |
|
"logits/rejected": 11.726805686950684, |
|
"logps/chosen": -290.65106201171875, |
|
"logps/rejected": -231.5535125732422, |
|
"loss": 0.3005, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 2.6145758628845215, |
|
"rewards/margins": 2.101471424102783, |
|
"rewards/rejected": 0.5131043195724487, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 1.1063829787234043, |
|
"grad_norm": 15.43303394317627, |
|
"learning_rate": 3.928571428571428e-07, |
|
"logits/chosen": 2.161435127258301, |
|
"logits/rejected": 15.173026084899902, |
|
"logps/chosen": -285.2391662597656, |
|
"logps/rejected": -175.35610961914062, |
|
"loss": 0.1156, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 3.8329055309295654, |
|
"rewards/margins": 3.6420764923095703, |
|
"rewards/rejected": 0.19082875549793243, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 1.1914893617021276, |
|
"grad_norm": 22.640640258789062, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": 0.34790876507759094, |
|
"logits/rejected": 16.288211822509766, |
|
"logps/chosen": -258.007080078125, |
|
"logps/rejected": -125.82489013671875, |
|
"loss": 0.1923, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 3.476907968521118, |
|
"rewards/margins": 3.2935471534729004, |
|
"rewards/rejected": 0.18336114287376404, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 1.2765957446808511, |
|
"grad_norm": 17.350461959838867, |
|
"learning_rate": 3.5714285714285716e-07, |
|
"logits/chosen": 0.2184600532054901, |
|
"logits/rejected": 12.73766040802002, |
|
"logps/chosen": -201.4739532470703, |
|
"logps/rejected": -120.93484497070312, |
|
"loss": 0.1553, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 3.4015889167785645, |
|
"rewards/margins": 3.4224205017089844, |
|
"rewards/rejected": -0.020831629633903503, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 1.3617021276595744, |
|
"grad_norm": 10.290063858032227, |
|
"learning_rate": 3.392857142857143e-07, |
|
"logits/chosen": 1.29380464553833, |
|
"logits/rejected": 17.044889450073242, |
|
"logps/chosen": -255.9779052734375, |
|
"logps/rejected": -129.52340698242188, |
|
"loss": 0.0961, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 4.283539295196533, |
|
"rewards/margins": 4.322422504425049, |
|
"rewards/rejected": -0.03888271749019623, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 1.4468085106382977, |
|
"grad_norm": 12.951630592346191, |
|
"learning_rate": 3.2142857142857145e-07, |
|
"logits/chosen": 0.8126751780509949, |
|
"logits/rejected": 14.440966606140137, |
|
"logps/chosen": -242.29527282714844, |
|
"logps/rejected": -145.05775451660156, |
|
"loss": 0.0923, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 3.8756561279296875, |
|
"rewards/margins": 4.231058597564697, |
|
"rewards/rejected": -0.35540255904197693, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 1.5319148936170213, |
|
"grad_norm": 19.92337417602539, |
|
"learning_rate": 3.0357142857142855e-07, |
|
"logits/chosen": 1.3031485080718994, |
|
"logits/rejected": 13.797933578491211, |
|
"logps/chosen": -248.54811096191406, |
|
"logps/rejected": -165.89808654785156, |
|
"loss": 0.0929, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.139030456542969, |
|
"rewards/margins": 4.645418167114258, |
|
"rewards/rejected": -0.5063877701759338, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 1.6170212765957448, |
|
"grad_norm": 9.488024711608887, |
|
"learning_rate": 2.857142857142857e-07, |
|
"logits/chosen": 2.991806983947754, |
|
"logits/rejected": 14.36280632019043, |
|
"logps/chosen": -278.69769287109375, |
|
"logps/rejected": -145.9281463623047, |
|
"loss": 0.0783, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 4.406310558319092, |
|
"rewards/margins": 5.150607109069824, |
|
"rewards/rejected": -0.7442967295646667, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 1.702127659574468, |
|
"grad_norm": 12.83969497680664, |
|
"learning_rate": 2.6785714285714284e-07, |
|
"logits/chosen": 3.6287827491760254, |
|
"logits/rejected": 12.436114311218262, |
|
"logps/chosen": -210.47592163085938, |
|
"logps/rejected": -189.6756134033203, |
|
"loss": 0.083, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.7564351558685303, |
|
"rewards/margins": 4.538413047790527, |
|
"rewards/rejected": -0.7819780707359314, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.7872340425531914, |
|
"grad_norm": 17.395404815673828, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": 4.2808837890625, |
|
"logits/rejected": 13.777887344360352, |
|
"logps/chosen": -271.89227294921875, |
|
"logps/rejected": -207.44837951660156, |
|
"loss": 0.1051, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 4.251194477081299, |
|
"rewards/margins": 5.237154006958008, |
|
"rewards/rejected": -0.9859597682952881, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 1.872340425531915, |
|
"grad_norm": 23.122020721435547, |
|
"learning_rate": 2.3214285714285714e-07, |
|
"logits/chosen": 1.1441445350646973, |
|
"logits/rejected": 14.379843711853027, |
|
"logps/chosen": -235.24493408203125, |
|
"logps/rejected": -194.19224548339844, |
|
"loss": 0.1211, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 4.572199821472168, |
|
"rewards/margins": 5.295849800109863, |
|
"rewards/rejected": -0.7236496806144714, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 1.9574468085106385, |
|
"grad_norm": 13.432173728942871, |
|
"learning_rate": 2.1428571428571426e-07, |
|
"logits/chosen": -0.5294728875160217, |
|
"logits/rejected": 16.71356201171875, |
|
"logps/chosen": -219.78480529785156, |
|
"logps/rejected": -111.86544036865234, |
|
"loss": 0.1006, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 3.635490894317627, |
|
"rewards/margins": 4.1884026527404785, |
|
"rewards/rejected": -0.5529115200042725, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 2.0425531914893615, |
|
"grad_norm": 8.677014350891113, |
|
"learning_rate": 1.964285714285714e-07, |
|
"logits/chosen": 0.39843907952308655, |
|
"logits/rejected": 13.533981323242188, |
|
"logps/chosen": -216.69137573242188, |
|
"logps/rejected": -143.2519989013672, |
|
"loss": 0.056, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.103562355041504, |
|
"rewards/margins": 4.519499778747559, |
|
"rewards/rejected": -0.4159368574619293, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 2.127659574468085, |
|
"grad_norm": 3.03104305267334, |
|
"learning_rate": 1.7857142857142858e-07, |
|
"logits/chosen": 1.5668433904647827, |
|
"logits/rejected": 14.729446411132812, |
|
"logps/chosen": -191.61404418945312, |
|
"logps/rejected": -183.37631225585938, |
|
"loss": 0.03, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.106316566467285, |
|
"rewards/margins": 5.793395042419434, |
|
"rewards/rejected": -1.6870781183242798, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 2.2127659574468086, |
|
"grad_norm": 3.136233329772949, |
|
"learning_rate": 1.6071428571428573e-07, |
|
"logits/chosen": 0.05481068789958954, |
|
"logits/rejected": 14.857564926147461, |
|
"logps/chosen": -266.8812561035156, |
|
"logps/rejected": -150.13543701171875, |
|
"loss": 0.0235, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.614926338195801, |
|
"rewards/margins": 5.615174293518066, |
|
"rewards/rejected": -1.0002480745315552, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 2.297872340425532, |
|
"grad_norm": 2.425645112991333, |
|
"learning_rate": 1.4285714285714285e-07, |
|
"logits/chosen": 1.4185882806777954, |
|
"logits/rejected": 12.52522087097168, |
|
"logps/chosen": -257.33148193359375, |
|
"logps/rejected": -225.2210235595703, |
|
"loss": 0.0226, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.434384822845459, |
|
"rewards/margins": 5.778439044952393, |
|
"rewards/rejected": -1.3440542221069336, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 2.382978723404255, |
|
"grad_norm": 5.070924282073975, |
|
"learning_rate": 1.25e-07, |
|
"logits/chosen": 1.7797931432724, |
|
"logits/rejected": 12.562594413757324, |
|
"logps/chosen": -289.022216796875, |
|
"logps/rejected": -247.23553466796875, |
|
"loss": 0.0306, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.724697113037109, |
|
"rewards/margins": 6.377914905548096, |
|
"rewards/rejected": -1.6532176733016968, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 2.4680851063829787, |
|
"grad_norm": 2.1009552478790283, |
|
"learning_rate": 1.0714285714285713e-07, |
|
"logits/chosen": -0.9775732755661011, |
|
"logits/rejected": 14.321802139282227, |
|
"logps/chosen": -209.55625915527344, |
|
"logps/rejected": -115.63282775878906, |
|
"loss": 0.0191, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.2973175048828125, |
|
"rewards/margins": 5.245972156524658, |
|
"rewards/rejected": -0.9486544132232666, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 2.5531914893617023, |
|
"grad_norm": 2.554861545562744, |
|
"learning_rate": 8.928571428571429e-08, |
|
"logits/chosen": 2.66330623626709, |
|
"logits/rejected": 11.6483154296875, |
|
"logps/chosen": -243.45645141601562, |
|
"logps/rejected": -232.2058563232422, |
|
"loss": 0.0221, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.154801368713379, |
|
"rewards/margins": 6.131631851196289, |
|
"rewards/rejected": -1.9768304824829102, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.6382978723404253, |
|
"grad_norm": 2.7131459712982178, |
|
"learning_rate": 7.142857142857142e-08, |
|
"logits/chosen": -0.194177508354187, |
|
"logits/rejected": 13.815250396728516, |
|
"logps/chosen": -218.8939208984375, |
|
"logps/rejected": -164.52902221679688, |
|
"loss": 0.0205, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.5105462074279785, |
|
"rewards/margins": 5.92357873916626, |
|
"rewards/rejected": -1.413031816482544, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 2.723404255319149, |
|
"grad_norm": 3.911942481994629, |
|
"learning_rate": 5.3571428571428564e-08, |
|
"logits/chosen": -0.5970292091369629, |
|
"logits/rejected": 16.974449157714844, |
|
"logps/chosen": -247.49411010742188, |
|
"logps/rejected": -128.2796630859375, |
|
"loss": 0.0245, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.004292011260986, |
|
"rewards/margins": 5.12349271774292, |
|
"rewards/rejected": -1.1192007064819336, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 2.8085106382978724, |
|
"grad_norm": 3.6512134075164795, |
|
"learning_rate": 3.571428571428571e-08, |
|
"logits/chosen": -1.6241520643234253, |
|
"logits/rejected": 12.453986167907715, |
|
"logps/chosen": -295.5976867675781, |
|
"logps/rejected": -223.0675811767578, |
|
"loss": 0.0221, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.045155048370361, |
|
"rewards/margins": 6.366570472717285, |
|
"rewards/rejected": -1.321415901184082, |
|
"step": 33 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 33, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9958223708160.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|