|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 250, |
|
"global_step": 1672, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.050239234449760764, |
|
"grad_norm": 2.739856243133545, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -0.5215702056884766, |
|
"logits/rejected": -0.5743793249130249, |
|
"logps/chosen": -55.2584228515625, |
|
"logps/rejected": -27.386730194091797, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4627976417541504, |
|
"rewards/chosen": -0.0007224595756269991, |
|
"rewards/margins": 0.00034199823858216405, |
|
"rewards/rejected": -0.001064457930624485, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.10047846889952153, |
|
"grad_norm": 5.56309175491333, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -0.5266280174255371, |
|
"logits/rejected": -0.5796422362327576, |
|
"logps/chosen": -61.078453063964844, |
|
"logps/rejected": -28.424898147583008, |
|
"loss": 0.677, |
|
"rewards/accuracies": 0.644345223903656, |
|
"rewards/chosen": 0.01867148093879223, |
|
"rewards/margins": 0.034289903938770294, |
|
"rewards/rejected": -0.015618422068655491, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.14952153110047847, |
|
"eval_logits/chosen": -0.6673032641410828, |
|
"eval_logits/rejected": -0.6792050004005432, |
|
"eval_logps/chosen": -2.12101674079895, |
|
"eval_logps/rejected": -7.310378074645996, |
|
"eval_loss": 0.6611143946647644, |
|
"eval_rewards/accuracies": 0.878000020980835, |
|
"eval_rewards/chosen": -0.0029554006177932024, |
|
"eval_rewards/margins": 0.06584452092647552, |
|
"eval_rewards/rejected": -0.06879992038011551, |
|
"eval_runtime": 90.3233, |
|
"eval_samples_per_second": 5.536, |
|
"eval_steps_per_second": 2.768, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1507177033492823, |
|
"grad_norm": 1.9286553859710693, |
|
"learning_rate": 9.441489361702127e-07, |
|
"logits/chosen": -0.5305180549621582, |
|
"logits/rejected": -0.5869216918945312, |
|
"logps/chosen": -55.35295867919922, |
|
"logps/rejected": -28.797080993652344, |
|
"loss": 0.6057, |
|
"rewards/accuracies": 0.8898809552192688, |
|
"rewards/chosen": 0.11930066347122192, |
|
"rewards/margins": 0.22872012853622437, |
|
"rewards/rejected": -0.10941947251558304, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.20095693779904306, |
|
"grad_norm": 2.7939305305480957, |
|
"learning_rate": 8.882978723404255e-07, |
|
"logits/chosen": -0.5447388887405396, |
|
"logits/rejected": -0.5977838039398193, |
|
"logps/chosen": -50.50016784667969, |
|
"logps/rejected": -30.050025939941406, |
|
"loss": 0.5101, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 0.2724921405315399, |
|
"rewards/margins": 0.6402528285980225, |
|
"rewards/rejected": -0.36776062846183777, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.2511961722488038, |
|
"grad_norm": 2.6437010765075684, |
|
"learning_rate": 8.324468085106383e-07, |
|
"logits/chosen": -0.5628253817558289, |
|
"logits/rejected": -0.6165186762809753, |
|
"logps/chosen": -47.81046676635742, |
|
"logps/rejected": -33.11960220336914, |
|
"loss": 0.4141, |
|
"rewards/accuracies": 0.8913690447807312, |
|
"rewards/chosen": 0.3376307785511017, |
|
"rewards/margins": 1.0564574003219604, |
|
"rewards/rejected": -0.7188266515731812, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.29904306220095694, |
|
"eval_logits/chosen": -0.7404754161834717, |
|
"eval_logits/rejected": -0.7604221105575562, |
|
"eval_logps/chosen": -3.4617385864257812, |
|
"eval_logps/rejected": -19.556419372558594, |
|
"eval_loss": 0.3327634930610657, |
|
"eval_rewards/accuracies": 0.8960000276565552, |
|
"eval_rewards/chosen": -0.13702762126922607, |
|
"eval_rewards/margins": 1.1563764810562134, |
|
"eval_rewards/rejected": -1.2934041023254395, |
|
"eval_runtime": 90.3284, |
|
"eval_samples_per_second": 5.535, |
|
"eval_steps_per_second": 2.768, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3014354066985646, |
|
"grad_norm": 2.6485891342163086, |
|
"learning_rate": 7.765957446808509e-07, |
|
"logits/chosen": -0.5817141532897949, |
|
"logits/rejected": -0.6466739177703857, |
|
"logps/chosen": -46.02522277832031, |
|
"logps/rejected": -36.878692626953125, |
|
"loss": 0.3166, |
|
"rewards/accuracies": 0.9032738208770752, |
|
"rewards/chosen": 0.3111897110939026, |
|
"rewards/margins": 1.5069682598114014, |
|
"rewards/rejected": -1.1957786083221436, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.35167464114832536, |
|
"grad_norm": 3.9122705459594727, |
|
"learning_rate": 7.207446808510638e-07, |
|
"logits/chosen": -0.6006772518157959, |
|
"logits/rejected": -0.6688662767410278, |
|
"logps/chosen": -47.613197326660156, |
|
"logps/rejected": -42.94892120361328, |
|
"loss": 0.2404, |
|
"rewards/accuracies": 0.9092261791229248, |
|
"rewards/chosen": 0.2409990429878235, |
|
"rewards/margins": 2.0163769721984863, |
|
"rewards/rejected": -1.7753779888153076, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.4019138755980861, |
|
"grad_norm": 7.711537837982178, |
|
"learning_rate": 6.648936170212765e-07, |
|
"logits/chosen": -0.6143811345100403, |
|
"logits/rejected": -0.6911322474479675, |
|
"logps/chosen": -54.04777526855469, |
|
"logps/rejected": -50.569236755371094, |
|
"loss": 0.1869, |
|
"rewards/accuracies": 0.9330357313156128, |
|
"rewards/chosen": 0.23013024032115936, |
|
"rewards/margins": 2.570420026779175, |
|
"rewards/rejected": -2.340289831161499, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.4485645933014354, |
|
"eval_logits/chosen": -0.827215313911438, |
|
"eval_logits/rejected": -0.8525059819221497, |
|
"eval_logps/chosen": -5.357245445251465, |
|
"eval_logps/rejected": -34.60578918457031, |
|
"eval_loss": 0.19428196549415588, |
|
"eval_rewards/accuracies": 0.9280000329017639, |
|
"eval_rewards/chosen": -0.3265782594680786, |
|
"eval_rewards/margins": 2.4717628955841064, |
|
"eval_rewards/rejected": -2.7983410358428955, |
|
"eval_runtime": 90.3138, |
|
"eval_samples_per_second": 5.536, |
|
"eval_steps_per_second": 2.768, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.45215311004784686, |
|
"grad_norm": 4.005836486816406, |
|
"learning_rate": 6.090425531914894e-07, |
|
"logits/chosen": -0.6301328539848328, |
|
"logits/rejected": -0.7088668346405029, |
|
"logps/chosen": -42.79747772216797, |
|
"logps/rejected": -50.45292663574219, |
|
"loss": 0.1786, |
|
"rewards/accuracies": 0.9345238208770752, |
|
"rewards/chosen": 0.04424615204334259, |
|
"rewards/margins": 2.7416493892669678, |
|
"rewards/rejected": -2.6974034309387207, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.5023923444976076, |
|
"grad_norm": 3.9601917266845703, |
|
"learning_rate": 5.531914893617021e-07, |
|
"logits/chosen": -0.6196833252906799, |
|
"logits/rejected": -0.7095603346824646, |
|
"logps/chosen": -47.29566192626953, |
|
"logps/rejected": -54.954986572265625, |
|
"loss": 0.1614, |
|
"rewards/accuracies": 0.9345238208770752, |
|
"rewards/chosen": 0.14783993363380432, |
|
"rewards/margins": 3.119297742843628, |
|
"rewards/rejected": -2.9714577198028564, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.5526315789473685, |
|
"grad_norm": 4.151618480682373, |
|
"learning_rate": 4.973404255319149e-07, |
|
"logits/chosen": -0.5776461958885193, |
|
"logits/rejected": -0.6859668493270874, |
|
"logps/chosen": -56.877925872802734, |
|
"logps/rejected": -61.5712890625, |
|
"loss": 0.1234, |
|
"rewards/accuracies": 0.9508928656578064, |
|
"rewards/chosen": 0.2916085124015808, |
|
"rewards/margins": 3.574091672897339, |
|
"rewards/rejected": -3.2824831008911133, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.5980861244019139, |
|
"eval_logits/chosen": -0.833606481552124, |
|
"eval_logits/rejected": -0.8610065579414368, |
|
"eval_logps/chosen": -5.521287441253662, |
|
"eval_logps/rejected": -39.60647201538086, |
|
"eval_loss": 0.1578649878501892, |
|
"eval_rewards/accuracies": 0.9380000233650208, |
|
"eval_rewards/chosen": -0.3429825007915497, |
|
"eval_rewards/margins": 2.9554266929626465, |
|
"eval_rewards/rejected": -3.2984092235565186, |
|
"eval_runtime": 90.3107, |
|
"eval_samples_per_second": 5.536, |
|
"eval_steps_per_second": 2.768, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6028708133971292, |
|
"grad_norm": 2.2310285568237305, |
|
"learning_rate": 4.4148936170212766e-07, |
|
"logits/chosen": -0.6338366866111755, |
|
"logits/rejected": -0.7232056260108948, |
|
"logps/chosen": -42.76348114013672, |
|
"logps/rejected": -56.131858825683594, |
|
"loss": 0.1267, |
|
"rewards/accuracies": 0.9449405074119568, |
|
"rewards/chosen": 0.11895173043012619, |
|
"rewards/margins": 3.4666144847869873, |
|
"rewards/rejected": -3.3476624488830566, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.65311004784689, |
|
"grad_norm": 0.4909895062446594, |
|
"learning_rate": 3.856382978723404e-07, |
|
"logits/chosen": -0.612333357334137, |
|
"logits/rejected": -0.7189978957176208, |
|
"logps/chosen": -52.86572265625, |
|
"logps/rejected": -62.92350387573242, |
|
"loss": 0.0975, |
|
"rewards/accuracies": 0.9702380895614624, |
|
"rewards/chosen": 0.2107280194759369, |
|
"rewards/margins": 3.751209259033203, |
|
"rewards/rejected": -3.5404813289642334, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.7033492822966507, |
|
"grad_norm": 1.8026127815246582, |
|
"learning_rate": 3.2978723404255315e-07, |
|
"logits/chosen": -0.6208314895629883, |
|
"logits/rejected": -0.7236352562904358, |
|
"logps/chosen": -48.80342102050781, |
|
"logps/rejected": -60.991233825683594, |
|
"loss": 0.122, |
|
"rewards/accuracies": 0.949404776096344, |
|
"rewards/chosen": 0.10279477387666702, |
|
"rewards/margins": 3.68986439704895, |
|
"rewards/rejected": -3.587069511413574, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.7476076555023924, |
|
"eval_logits/chosen": -0.8265290856361389, |
|
"eval_logits/rejected": -0.8553086519241333, |
|
"eval_logps/chosen": -5.278414249420166, |
|
"eval_logps/rejected": -42.231048583984375, |
|
"eval_loss": 0.143920436501503, |
|
"eval_rewards/accuracies": 0.9360000491142273, |
|
"eval_rewards/chosen": -0.31869521737098694, |
|
"eval_rewards/margins": 3.2421717643737793, |
|
"eval_rewards/rejected": -3.5608673095703125, |
|
"eval_runtime": 90.3194, |
|
"eval_samples_per_second": 5.536, |
|
"eval_steps_per_second": 2.768, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.7535885167464115, |
|
"grad_norm": 0.7733187079429626, |
|
"learning_rate": 2.7393617021276595e-07, |
|
"logits/chosen": -0.595526933670044, |
|
"logits/rejected": -0.7032517790794373, |
|
"logps/chosen": -53.06210708618164, |
|
"logps/rejected": -64.58065795898438, |
|
"loss": 0.0987, |
|
"rewards/accuracies": 0.9598214626312256, |
|
"rewards/chosen": 0.2242652177810669, |
|
"rewards/margins": 3.922701835632324, |
|
"rewards/rejected": -3.698436975479126, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.8038277511961722, |
|
"grad_norm": 13.78078556060791, |
|
"learning_rate": 2.1808510638297872e-07, |
|
"logits/chosen": -0.6165376305580139, |
|
"logits/rejected": -0.7192720174789429, |
|
"logps/chosen": -43.811336517333984, |
|
"logps/rejected": -62.044307708740234, |
|
"loss": 0.0933, |
|
"rewards/accuracies": 0.9717261791229248, |
|
"rewards/chosen": 0.13936470448970795, |
|
"rewards/margins": 3.9077537059783936, |
|
"rewards/rejected": -3.7683887481689453, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 0.854066985645933, |
|
"grad_norm": 8.148602485656738, |
|
"learning_rate": 1.622340425531915e-07, |
|
"logits/chosen": -0.5712071657180786, |
|
"logits/rejected": -0.6922958493232727, |
|
"logps/chosen": -53.978843688964844, |
|
"logps/rejected": -67.05591583251953, |
|
"loss": 0.0821, |
|
"rewards/accuracies": 0.973214328289032, |
|
"rewards/chosen": 0.26229721307754517, |
|
"rewards/margins": 4.126736164093018, |
|
"rewards/rejected": -3.864438772201538, |
|
"step": 1428 |
|
}, |
|
{ |
|
"epoch": 0.8971291866028708, |
|
"eval_logits/chosen": -0.8240648508071899, |
|
"eval_logits/rejected": -0.8534601330757141, |
|
"eval_logps/chosen": -5.354381084442139, |
|
"eval_logps/rejected": -43.72341537475586, |
|
"eval_loss": 0.13976776599884033, |
|
"eval_rewards/accuracies": 0.9340000152587891, |
|
"eval_rewards/chosen": -0.3262918293476105, |
|
"eval_rewards/margins": 3.383812189102173, |
|
"eval_rewards/rejected": -3.71010422706604, |
|
"eval_runtime": 90.2962, |
|
"eval_samples_per_second": 5.537, |
|
"eval_steps_per_second": 2.769, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.9043062200956937, |
|
"grad_norm": 0.7219157218933105, |
|
"learning_rate": 1.0638297872340425e-07, |
|
"logits/chosen": -0.5762205719947815, |
|
"logits/rejected": -0.6943542957305908, |
|
"logps/chosen": -50.31645965576172, |
|
"logps/rejected": -66.44419860839844, |
|
"loss": 0.0927, |
|
"rewards/accuracies": 0.9657738208770752, |
|
"rewards/chosen": 0.20835421979427338, |
|
"rewards/margins": 4.124646186828613, |
|
"rewards/rejected": -3.916292190551758, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 0.9545454545454546, |
|
"grad_norm": 8.767385482788086, |
|
"learning_rate": 5.053191489361702e-08, |
|
"logits/chosen": -0.5936585664749146, |
|
"logits/rejected": -0.6990638971328735, |
|
"logps/chosen": -51.6839714050293, |
|
"logps/rejected": -65.34242248535156, |
|
"loss": 0.1066, |
|
"rewards/accuracies": 0.9553571343421936, |
|
"rewards/chosen": 0.15223945677280426, |
|
"rewards/margins": 4.063188076019287, |
|
"rewards/rejected": -3.9109482765197754, |
|
"step": 1596 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1672, |
|
"total_flos": 3.5989630324781875e+17, |
|
"train_loss": 0.25213732730828975, |
|
"train_runtime": 8848.1085, |
|
"train_samples_per_second": 1.512, |
|
"train_steps_per_second": 0.189 |
|
} |
|
], |
|
"logging_steps": 84, |
|
"max_steps": 1672, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.5989630324781875e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|