silviasapora's picture
Model save
319b0af verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9885148514851485,
"eval_steps": 500,
"global_step": 78,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06336633663366337,
"grad_norm": 6.15625,
"learning_rate": 3.1249999999999997e-07,
"log_odds_chosen": 0.9902670979499817,
"log_odds_ratio": -0.4136257767677307,
"logps/chosen": -0.6657758951187134,
"logps/rejected": -1.2273353338241577,
"loss": 2.0491,
"nll_loss": 0.9891467094421387,
"rewards/accuracies": 0.831250011920929,
"rewards/chosen": -0.03328879922628403,
"rewards/margins": 0.028077969327569008,
"rewards/rejected": -0.061366766691207886,
"step": 5
},
{
"epoch": 0.12673267326732673,
"grad_norm": 8.5625,
"learning_rate": 4.989935734988097e-07,
"log_odds_chosen": 1.1518529653549194,
"log_odds_ratio": -0.37196290493011475,
"logps/chosen": -0.5914251208305359,
"logps/rejected": -1.2302882671356201,
"loss": 2.4679,
"nll_loss": 0.8968847393989563,
"rewards/accuracies": 0.856249988079071,
"rewards/chosen": -0.029571253806352615,
"rewards/margins": 0.03194316104054451,
"rewards/rejected": -0.061514418572187424,
"step": 10
},
{
"epoch": 0.1900990099009901,
"grad_norm": 7.15625,
"learning_rate": 4.877641290737883e-07,
"log_odds_chosen": 1.1437830924987793,
"log_odds_ratio": -0.37077194452285767,
"logps/chosen": -0.5667107701301575,
"logps/rejected": -1.1911952495574951,
"loss": 2.4763,
"nll_loss": 0.8755158185958862,
"rewards/accuracies": 0.8843749761581421,
"rewards/chosen": -0.028335541486740112,
"rewards/margins": 0.031224220991134644,
"rewards/rejected": -0.05955975502729416,
"step": 15
},
{
"epoch": 0.25346534653465347,
"grad_norm": 7.53125,
"learning_rate": 4.646121984004665e-07,
"log_odds_chosen": 1.0641790628433228,
"log_odds_ratio": -0.38870134949684143,
"logps/chosen": -0.6166585683822632,
"logps/rejected": -1.206767201423645,
"loss": 2.3836,
"nll_loss": 0.9371929168701172,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": -0.030832935124635696,
"rewards/margins": 0.029505427926778793,
"rewards/rejected": -0.06033835932612419,
"step": 20
},
{
"epoch": 0.31683168316831684,
"grad_norm": 7.875,
"learning_rate": 4.3069871595684787e-07,
"log_odds_chosen": 1.2286351919174194,
"log_odds_ratio": -0.37730690836906433,
"logps/chosen": -0.6100655198097229,
"logps/rejected": -1.3068325519561768,
"loss": 2.3784,
"nll_loss": 0.8896926641464233,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": -0.030503276735544205,
"rewards/margins": 0.034838344901800156,
"rewards/rejected": -0.06534162163734436,
"step": 25
},
{
"epoch": 0.3801980198019802,
"grad_norm": 7.9375,
"learning_rate": 3.877242453630256e-07,
"log_odds_chosen": 1.3759397268295288,
"log_odds_ratio": -0.3282025456428528,
"logps/chosen": -0.5752947926521301,
"logps/rejected": -1.338653326034546,
"loss": 2.2376,
"nll_loss": 0.8668821454048157,
"rewards/accuracies": 0.893750011920929,
"rewards/chosen": -0.028764739632606506,
"rewards/margins": 0.038167934864759445,
"rewards/rejected": -0.06693266332149506,
"step": 30
},
{
"epoch": 0.44356435643564357,
"grad_norm": 8.125,
"learning_rate": 3.378437060203357e-07,
"log_odds_chosen": 1.5571784973144531,
"log_odds_ratio": -0.28482693433761597,
"logps/chosen": -0.5232511758804321,
"logps/rejected": -1.388687014579773,
"loss": 2.8435,
"nll_loss": 0.7976736426353455,
"rewards/accuracies": 0.934374988079071,
"rewards/chosen": -0.026162561029195786,
"rewards/margins": 0.04327179118990898,
"rewards/rejected": -0.06943435966968536,
"step": 35
},
{
"epoch": 0.5069306930693069,
"grad_norm": 9.1875,
"learning_rate": 2.8355831645441387e-07,
"log_odds_chosen": 1.7383034229278564,
"log_odds_ratio": -0.24174487590789795,
"logps/chosen": -0.48146629333496094,
"logps/rejected": -1.4316900968551636,
"loss": 2.8827,
"nll_loss": 0.7479132413864136,
"rewards/accuracies": 0.934374988079071,
"rewards/chosen": -0.024073313921689987,
"rewards/margins": 0.04751119762659073,
"rewards/rejected": -0.07158450782299042,
"step": 40
},
{
"epoch": 0.5702970297029702,
"grad_norm": 9.0,
"learning_rate": 2.2759017277414164e-07,
"log_odds_chosen": 1.639399766921997,
"log_odds_ratio": -0.26323410868644714,
"logps/chosen": -0.5090625882148743,
"logps/rejected": -1.4268980026245117,
"loss": 2.6135,
"nll_loss": 0.7730764746665955,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.025453131645917892,
"rewards/margins": 0.04589176923036575,
"rewards/rejected": -0.07134490460157394,
"step": 45
},
{
"epoch": 0.6336633663366337,
"grad_norm": 12.5625,
"learning_rate": 1.7274575140626315e-07,
"log_odds_chosen": 1.7857434749603271,
"log_odds_ratio": -0.23216819763183594,
"logps/chosen": -0.4630921483039856,
"logps/rejected": -1.3960620164871216,
"loss": 3.6278,
"nll_loss": 0.7098131775856018,
"rewards/accuracies": 0.9593750238418579,
"rewards/chosen": -0.0231546089053154,
"rewards/margins": 0.04664849489927292,
"rewards/rejected": -0.06980310380458832,
"step": 50
},
{
"epoch": 0.697029702970297,
"grad_norm": 10.125,
"learning_rate": 1.2177518064852348e-07,
"log_odds_chosen": 1.4938031435012817,
"log_odds_ratio": -0.3022598326206207,
"logps/chosen": -0.5556260347366333,
"logps/rejected": -1.3820416927337646,
"loss": 2.8267,
"nll_loss": 0.7848575115203857,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": -0.027781302109360695,
"rewards/margins": 0.041320785880088806,
"rewards/rejected": -0.06910209357738495,
"step": 55
},
{
"epoch": 0.7603960396039604,
"grad_norm": 11.4375,
"learning_rate": 7.723433775328384e-08,
"log_odds_chosen": 1.6789665222167969,
"log_odds_ratio": -0.26475173234939575,
"logps/chosen": -0.5475651621818542,
"logps/rejected": -1.5100398063659668,
"loss": 2.7577,
"nll_loss": 0.7874841690063477,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": -0.02737826108932495,
"rewards/margins": 0.048123735934495926,
"rewards/rejected": -0.07550199329853058,
"step": 60
},
{
"epoch": 0.8237623762376237,
"grad_norm": 10.4375,
"learning_rate": 4.1356686569674335e-08,
"log_odds_chosen": 1.652230978012085,
"log_odds_ratio": -0.2656940817832947,
"logps/chosen": -0.54820317029953,
"logps/rejected": -1.4693254232406616,
"loss": 2.5008,
"nll_loss": 0.8066957592964172,
"rewards/accuracies": 0.9375,
"rewards/chosen": -0.02741015888750553,
"rewards/margins": 0.0460561141371727,
"rewards/rejected": -0.07346627861261368,
"step": 65
},
{
"epoch": 0.8871287128712871,
"grad_norm": 9.8125,
"learning_rate": 1.5941282340065697e-08,
"log_odds_chosen": 1.8457355499267578,
"log_odds_ratio": -0.23030798137187958,
"logps/chosen": -0.5012167692184448,
"logps/rejected": -1.5387108325958252,
"loss": 2.9364,
"nll_loss": 0.7640018463134766,
"rewards/accuracies": 0.953125,
"rewards/chosen": -0.02506083808839321,
"rewards/margins": 0.05187469720840454,
"rewards/rejected": -0.0769355446100235,
"step": 70
},
{
"epoch": 0.9504950495049505,
"grad_norm": 13.5,
"learning_rate": 2.2625595580163247e-09,
"log_odds_chosen": 1.5747287273406982,
"log_odds_ratio": -0.2784648537635803,
"logps/chosen": -0.5585237741470337,
"logps/rejected": -1.4540793895721436,
"loss": 2.5767,
"nll_loss": 0.823401153087616,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": -0.027926195412874222,
"rewards/margins": 0.044777773320674896,
"rewards/rejected": -0.07270397245883942,
"step": 75
},
{
"epoch": 0.9885148514851485,
"step": 78,
"total_flos": 0.0,
"train_loss": 2.6355448135962853,
"train_runtime": 1528.1787,
"train_samples_per_second": 3.305,
"train_steps_per_second": 0.051
}
],
"logging_steps": 5,
"max_steps": 78,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 300,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}