silviasapora's picture
Model save
fabde15 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9885148514851485,
"eval_steps": 500,
"global_step": 78,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06336633663366337,
"grad_norm": 12.625,
"learning_rate": 3.1249999999999997e-07,
"log_odds_chosen": 0.9938775897026062,
"log_odds_ratio": -0.41239112615585327,
"logps/chosen": -0.6662770509719849,
"logps/rejected": -1.2300238609313965,
"loss": 9.4217,
"nll_loss": 0.9876836538314819,
"rewards/accuracies": 0.831250011920929,
"rewards/chosen": -0.03331385552883148,
"rewards/margins": 0.02818734012544155,
"rewards/rejected": -0.06150120496749878,
"step": 5
},
{
"epoch": 0.12673267326732673,
"grad_norm": 16.875,
"learning_rate": 4.989935734988097e-07,
"log_odds_chosen": 1.151381492614746,
"log_odds_ratio": -0.37275490164756775,
"logps/chosen": -0.5901239514350891,
"logps/rejected": -1.2277742624282837,
"loss": 11.2705,
"nll_loss": 0.896571159362793,
"rewards/accuracies": 0.856249988079071,
"rewards/chosen": -0.029506197199225426,
"rewards/margins": 0.03188251703977585,
"rewards/rejected": -0.061388712376356125,
"step": 10
},
{
"epoch": 0.1900990099009901,
"grad_norm": 15.5,
"learning_rate": 4.877641290737883e-07,
"log_odds_chosen": 1.1365224123001099,
"log_odds_ratio": -0.3716532588005066,
"logps/chosen": -0.5673869252204895,
"logps/rejected": -1.1881535053253174,
"loss": 12.0359,
"nll_loss": 0.8758338093757629,
"rewards/accuracies": 0.8812500238418579,
"rewards/chosen": -0.028369342908263206,
"rewards/margins": 0.031038332730531693,
"rewards/rejected": -0.05940768122673035,
"step": 15
},
{
"epoch": 0.25346534653465347,
"grad_norm": 15.0625,
"learning_rate": 4.646121984004665e-07,
"log_odds_chosen": 1.0660278797149658,
"log_odds_ratio": -0.3872835040092468,
"logps/chosen": -0.6164752840995789,
"logps/rejected": -1.207406997680664,
"loss": 10.6234,
"nll_loss": 0.9384651184082031,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": -0.030823761597275734,
"rewards/margins": 0.0295465886592865,
"rewards/rejected": -0.06037035584449768,
"step": 20
},
{
"epoch": 0.31683168316831684,
"grad_norm": 16.875,
"learning_rate": 4.3069871595684787e-07,
"log_odds_chosen": 1.2324841022491455,
"log_odds_ratio": -0.37691444158554077,
"logps/chosen": -0.6098319888114929,
"logps/rejected": -1.3082152605056763,
"loss": 11.208,
"nll_loss": 0.8900537490844727,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": -0.030491601675748825,
"rewards/margins": 0.03491916134953499,
"rewards/rejected": -0.06541076302528381,
"step": 25
},
{
"epoch": 0.3801980198019802,
"grad_norm": 16.375,
"learning_rate": 3.877242453630256e-07,
"log_odds_chosen": 1.3739392757415771,
"log_odds_ratio": -0.3297683596611023,
"logps/chosen": -0.5759536027908325,
"logps/rejected": -1.337491750717163,
"loss": 10.5079,
"nll_loss": 0.8683460354804993,
"rewards/accuracies": 0.8968750238418579,
"rewards/chosen": -0.028797682374715805,
"rewards/margins": 0.038076914846897125,
"rewards/rejected": -0.06687458604574203,
"step": 30
},
{
"epoch": 0.44356435643564357,
"grad_norm": 18.5,
"learning_rate": 3.378437060203357e-07,
"log_odds_chosen": 1.5558397769927979,
"log_odds_ratio": -0.28507766127586365,
"logps/chosen": -0.5231963992118835,
"logps/rejected": -1.3864152431488037,
"loss": 14.1291,
"nll_loss": 0.7970401048660278,
"rewards/accuracies": 0.9375,
"rewards/chosen": -0.026159819215536118,
"rewards/margins": 0.04316094145178795,
"rewards/rejected": -0.06932076811790466,
"step": 35
},
{
"epoch": 0.5069306930693069,
"grad_norm": 21.625,
"learning_rate": 2.8355831645441387e-07,
"log_odds_chosen": 1.7381893396377563,
"log_odds_ratio": -0.2419164478778839,
"logps/chosen": -0.4817093014717102,
"logps/rejected": -1.4321532249450684,
"loss": 17.0352,
"nll_loss": 0.7467607259750366,
"rewards/accuracies": 0.9375,
"rewards/chosen": -0.02408546581864357,
"rewards/margins": 0.047522205859422684,
"rewards/rejected": -0.07160767167806625,
"step": 40
},
{
"epoch": 0.5702970297029702,
"grad_norm": 21.375,
"learning_rate": 2.2759017277414164e-07,
"log_odds_chosen": 1.6346168518066406,
"log_odds_ratio": -0.2641943395137787,
"logps/chosen": -0.5079860091209412,
"logps/rejected": -1.423356056213379,
"loss": 14.0588,
"nll_loss": 0.7725532650947571,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": -0.025399303063750267,
"rewards/margins": 0.04576849564909935,
"rewards/rejected": -0.07116780430078506,
"step": 45
},
{
"epoch": 0.6336633663366337,
"grad_norm": 26.375,
"learning_rate": 1.7274575140626315e-07,
"log_odds_chosen": 1.783243179321289,
"log_odds_ratio": -0.2324325144290924,
"logps/chosen": -0.46209725737571716,
"logps/rejected": -1.3920438289642334,
"loss": 20.5353,
"nll_loss": 0.7096225023269653,
"rewards/accuracies": 0.953125,
"rewards/chosen": -0.023104865103960037,
"rewards/margins": 0.04649733379483223,
"rewards/rejected": -0.06960219889879227,
"step": 50
},
{
"epoch": 0.697029702970297,
"grad_norm": 24.125,
"learning_rate": 1.2177518064852348e-07,
"log_odds_chosen": 1.4938585758209229,
"log_odds_ratio": -0.30352550745010376,
"logps/chosen": -0.5551624298095703,
"logps/rejected": -1.3799673318862915,
"loss": 15.6133,
"nll_loss": 0.7854876518249512,
"rewards/accuracies": 0.9156249761581421,
"rewards/chosen": -0.027758121490478516,
"rewards/margins": 0.041240252554416656,
"rewards/rejected": -0.06899837404489517,
"step": 55
},
{
"epoch": 0.7603960396039604,
"grad_norm": 28.125,
"learning_rate": 7.723433775328384e-08,
"log_odds_chosen": 1.6778091192245483,
"log_odds_ratio": -0.26565200090408325,
"logps/chosen": -0.5478382706642151,
"logps/rejected": -1.5084162950515747,
"loss": 17.3363,
"nll_loss": 0.7867327928543091,
"rewards/accuracies": 0.9375,
"rewards/chosen": -0.027391914278268814,
"rewards/margins": 0.04802890121936798,
"rewards/rejected": -0.0754208117723465,
"step": 60
},
{
"epoch": 0.8237623762376237,
"grad_norm": 26.5,
"learning_rate": 4.1356686569674335e-08,
"log_odds_chosen": 1.653925895690918,
"log_odds_ratio": -0.26407569646835327,
"logps/chosen": -0.5457277297973633,
"logps/rejected": -1.4650758504867554,
"loss": 18.0275,
"nll_loss": 0.8072822690010071,
"rewards/accuracies": 0.940625011920929,
"rewards/chosen": -0.027286384254693985,
"rewards/margins": 0.04596741124987602,
"rewards/rejected": -0.07325379550457001,
"step": 65
},
{
"epoch": 0.8871287128712871,
"grad_norm": 22.75,
"learning_rate": 1.5941282340065697e-08,
"log_odds_chosen": 1.8514635562896729,
"log_odds_ratio": -0.22955870628356934,
"logps/chosen": -0.4996102452278137,
"logps/rejected": -1.5373380184173584,
"loss": 14.8205,
"nll_loss": 0.7640124559402466,
"rewards/accuracies": 0.953125,
"rewards/chosen": -0.024980511516332626,
"rewards/margins": 0.05188639834523201,
"rewards/rejected": -0.07686690986156464,
"step": 70
},
{
"epoch": 0.9504950495049505,
"grad_norm": 30.875,
"learning_rate": 2.2625595580163247e-09,
"log_odds_chosen": 1.5729515552520752,
"log_odds_ratio": -0.2771000564098358,
"logps/chosen": -0.5589004755020142,
"logps/rejected": -1.4541728496551514,
"loss": 14.6472,
"nll_loss": 0.8230066299438477,
"rewards/accuracies": 0.940625011920929,
"rewards/chosen": -0.02794502303004265,
"rewards/margins": 0.04476362094283104,
"rewards/rejected": -0.07270864397287369,
"step": 75
},
{
"epoch": 0.9885148514851485,
"step": 78,
"total_flos": 0.0,
"train_loss": 14.124817579220503,
"train_runtime": 4601.8147,
"train_samples_per_second": 1.097,
"train_steps_per_second": 0.017
}
],
"logging_steps": 5,
"max_steps": 78,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 300,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}