|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9885148514851485, |
|
"eval_steps": 500, |
|
"global_step": 78, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06336633663366337, |
|
"grad_norm": 12.625, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"log_odds_chosen": 0.9938775897026062, |
|
"log_odds_ratio": -0.41239112615585327, |
|
"logps/chosen": -0.6662770509719849, |
|
"logps/rejected": -1.2300238609313965, |
|
"loss": 9.4217, |
|
"nll_loss": 0.9876836538314819, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.03331385552883148, |
|
"rewards/margins": 0.02818734012544155, |
|
"rewards/rejected": -0.06150120496749878, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.12673267326732673, |
|
"grad_norm": 16.875, |
|
"learning_rate": 4.989935734988097e-07, |
|
"log_odds_chosen": 1.151381492614746, |
|
"log_odds_ratio": -0.37275490164756775, |
|
"logps/chosen": -0.5901239514350891, |
|
"logps/rejected": -1.2277742624282837, |
|
"loss": 11.2705, |
|
"nll_loss": 0.896571159362793, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.029506197199225426, |
|
"rewards/margins": 0.03188251703977585, |
|
"rewards/rejected": -0.061388712376356125, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1900990099009901, |
|
"grad_norm": 15.5, |
|
"learning_rate": 4.877641290737883e-07, |
|
"log_odds_chosen": 1.1365224123001099, |
|
"log_odds_ratio": -0.3716532588005066, |
|
"logps/chosen": -0.5673869252204895, |
|
"logps/rejected": -1.1881535053253174, |
|
"loss": 12.0359, |
|
"nll_loss": 0.8758338093757629, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -0.028369342908263206, |
|
"rewards/margins": 0.031038332730531693, |
|
"rewards/rejected": -0.05940768122673035, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.25346534653465347, |
|
"grad_norm": 15.0625, |
|
"learning_rate": 4.646121984004665e-07, |
|
"log_odds_chosen": 1.0660278797149658, |
|
"log_odds_ratio": -0.3872835040092468, |
|
"logps/chosen": -0.6164752840995789, |
|
"logps/rejected": -1.207406997680664, |
|
"loss": 10.6234, |
|
"nll_loss": 0.9384651184082031, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.030823761597275734, |
|
"rewards/margins": 0.0295465886592865, |
|
"rewards/rejected": -0.06037035584449768, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.31683168316831684, |
|
"grad_norm": 16.875, |
|
"learning_rate": 4.3069871595684787e-07, |
|
"log_odds_chosen": 1.2324841022491455, |
|
"log_odds_ratio": -0.37691444158554077, |
|
"logps/chosen": -0.6098319888114929, |
|
"logps/rejected": -1.3082152605056763, |
|
"loss": 11.208, |
|
"nll_loss": 0.8900537490844727, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.030491601675748825, |
|
"rewards/margins": 0.03491916134953499, |
|
"rewards/rejected": -0.06541076302528381, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.3801980198019802, |
|
"grad_norm": 16.375, |
|
"learning_rate": 3.877242453630256e-07, |
|
"log_odds_chosen": 1.3739392757415771, |
|
"log_odds_ratio": -0.3297683596611023, |
|
"logps/chosen": -0.5759536027908325, |
|
"logps/rejected": -1.337491750717163, |
|
"loss": 10.5079, |
|
"nll_loss": 0.8683460354804993, |
|
"rewards/accuracies": 0.8968750238418579, |
|
"rewards/chosen": -0.028797682374715805, |
|
"rewards/margins": 0.038076914846897125, |
|
"rewards/rejected": -0.06687458604574203, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.44356435643564357, |
|
"grad_norm": 18.5, |
|
"learning_rate": 3.378437060203357e-07, |
|
"log_odds_chosen": 1.5558397769927979, |
|
"log_odds_ratio": -0.28507766127586365, |
|
"logps/chosen": -0.5231963992118835, |
|
"logps/rejected": -1.3864152431488037, |
|
"loss": 14.1291, |
|
"nll_loss": 0.7970401048660278, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.026159819215536118, |
|
"rewards/margins": 0.04316094145178795, |
|
"rewards/rejected": -0.06932076811790466, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.5069306930693069, |
|
"grad_norm": 21.625, |
|
"learning_rate": 2.8355831645441387e-07, |
|
"log_odds_chosen": 1.7381893396377563, |
|
"log_odds_ratio": -0.2419164478778839, |
|
"logps/chosen": -0.4817093014717102, |
|
"logps/rejected": -1.4321532249450684, |
|
"loss": 17.0352, |
|
"nll_loss": 0.7467607259750366, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.02408546581864357, |
|
"rewards/margins": 0.047522205859422684, |
|
"rewards/rejected": -0.07160767167806625, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5702970297029702, |
|
"grad_norm": 21.375, |
|
"learning_rate": 2.2759017277414164e-07, |
|
"log_odds_chosen": 1.6346168518066406, |
|
"log_odds_ratio": -0.2641943395137787, |
|
"logps/chosen": -0.5079860091209412, |
|
"logps/rejected": -1.423356056213379, |
|
"loss": 14.0588, |
|
"nll_loss": 0.7725532650947571, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.025399303063750267, |
|
"rewards/margins": 0.04576849564909935, |
|
"rewards/rejected": -0.07116780430078506, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.6336633663366337, |
|
"grad_norm": 26.375, |
|
"learning_rate": 1.7274575140626315e-07, |
|
"log_odds_chosen": 1.783243179321289, |
|
"log_odds_ratio": -0.2324325144290924, |
|
"logps/chosen": -0.46209725737571716, |
|
"logps/rejected": -1.3920438289642334, |
|
"loss": 20.5353, |
|
"nll_loss": 0.7096225023269653, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": -0.023104865103960037, |
|
"rewards/margins": 0.04649733379483223, |
|
"rewards/rejected": -0.06960219889879227, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.697029702970297, |
|
"grad_norm": 24.125, |
|
"learning_rate": 1.2177518064852348e-07, |
|
"log_odds_chosen": 1.4938585758209229, |
|
"log_odds_ratio": -0.30352550745010376, |
|
"logps/chosen": -0.5551624298095703, |
|
"logps/rejected": -1.3799673318862915, |
|
"loss": 15.6133, |
|
"nll_loss": 0.7854876518249512, |
|
"rewards/accuracies": 0.9156249761581421, |
|
"rewards/chosen": -0.027758121490478516, |
|
"rewards/margins": 0.041240252554416656, |
|
"rewards/rejected": -0.06899837404489517, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.7603960396039604, |
|
"grad_norm": 28.125, |
|
"learning_rate": 7.723433775328384e-08, |
|
"log_odds_chosen": 1.6778091192245483, |
|
"log_odds_ratio": -0.26565200090408325, |
|
"logps/chosen": -0.5478382706642151, |
|
"logps/rejected": -1.5084162950515747, |
|
"loss": 17.3363, |
|
"nll_loss": 0.7867327928543091, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.027391914278268814, |
|
"rewards/margins": 0.04802890121936798, |
|
"rewards/rejected": -0.0754208117723465, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.8237623762376237, |
|
"grad_norm": 26.5, |
|
"learning_rate": 4.1356686569674335e-08, |
|
"log_odds_chosen": 1.653925895690918, |
|
"log_odds_ratio": -0.26407569646835327, |
|
"logps/chosen": -0.5457277297973633, |
|
"logps/rejected": -1.4650758504867554, |
|
"loss": 18.0275, |
|
"nll_loss": 0.8072822690010071, |
|
"rewards/accuracies": 0.940625011920929, |
|
"rewards/chosen": -0.027286384254693985, |
|
"rewards/margins": 0.04596741124987602, |
|
"rewards/rejected": -0.07325379550457001, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.8871287128712871, |
|
"grad_norm": 22.75, |
|
"learning_rate": 1.5941282340065697e-08, |
|
"log_odds_chosen": 1.8514635562896729, |
|
"log_odds_ratio": -0.22955870628356934, |
|
"logps/chosen": -0.4996102452278137, |
|
"logps/rejected": -1.5373380184173584, |
|
"loss": 14.8205, |
|
"nll_loss": 0.7640124559402466, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": -0.024980511516332626, |
|
"rewards/margins": 0.05188639834523201, |
|
"rewards/rejected": -0.07686690986156464, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.9504950495049505, |
|
"grad_norm": 30.875, |
|
"learning_rate": 2.2625595580163247e-09, |
|
"log_odds_chosen": 1.5729515552520752, |
|
"log_odds_ratio": -0.2771000564098358, |
|
"logps/chosen": -0.5589004755020142, |
|
"logps/rejected": -1.4541728496551514, |
|
"loss": 14.6472, |
|
"nll_loss": 0.8230066299438477, |
|
"rewards/accuracies": 0.940625011920929, |
|
"rewards/chosen": -0.02794502303004265, |
|
"rewards/margins": 0.04476362094283104, |
|
"rewards/rejected": -0.07270864397287369, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.9885148514851485, |
|
"step": 78, |
|
"total_flos": 0.0, |
|
"train_loss": 14.124817579220503, |
|
"train_runtime": 4601.8147, |
|
"train_samples_per_second": 1.097, |
|
"train_steps_per_second": 0.017 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 78, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 300, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|