|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9885148514851485, |
|
"eval_steps": 500, |
|
"global_step": 78, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06336633663366337, |
|
"grad_norm": 6.15625, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"log_odds_chosen": 0.9902670979499817, |
|
"log_odds_ratio": -0.4136257767677307, |
|
"logps/chosen": -0.6657758951187134, |
|
"logps/rejected": -1.2273353338241577, |
|
"loss": 2.0491, |
|
"nll_loss": 0.9891467094421387, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.03328879922628403, |
|
"rewards/margins": 0.028077969327569008, |
|
"rewards/rejected": -0.061366766691207886, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.12673267326732673, |
|
"grad_norm": 8.5625, |
|
"learning_rate": 4.989935734988097e-07, |
|
"log_odds_chosen": 1.1518529653549194, |
|
"log_odds_ratio": -0.37196290493011475, |
|
"logps/chosen": -0.5914251208305359, |
|
"logps/rejected": -1.2302882671356201, |
|
"loss": 2.4679, |
|
"nll_loss": 0.8968847393989563, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.029571253806352615, |
|
"rewards/margins": 0.03194316104054451, |
|
"rewards/rejected": -0.061514418572187424, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1900990099009901, |
|
"grad_norm": 7.15625, |
|
"learning_rate": 4.877641290737883e-07, |
|
"log_odds_chosen": 1.1437830924987793, |
|
"log_odds_ratio": -0.37077194452285767, |
|
"logps/chosen": -0.5667107701301575, |
|
"logps/rejected": -1.1911952495574951, |
|
"loss": 2.4763, |
|
"nll_loss": 0.8755158185958862, |
|
"rewards/accuracies": 0.8843749761581421, |
|
"rewards/chosen": -0.028335541486740112, |
|
"rewards/margins": 0.031224220991134644, |
|
"rewards/rejected": -0.05955975502729416, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.25346534653465347, |
|
"grad_norm": 7.53125, |
|
"learning_rate": 4.646121984004665e-07, |
|
"log_odds_chosen": 1.0641790628433228, |
|
"log_odds_ratio": -0.38870134949684143, |
|
"logps/chosen": -0.6166585683822632, |
|
"logps/rejected": -1.206767201423645, |
|
"loss": 2.3836, |
|
"nll_loss": 0.9371929168701172, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.030832935124635696, |
|
"rewards/margins": 0.029505427926778793, |
|
"rewards/rejected": -0.06033835932612419, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.31683168316831684, |
|
"grad_norm": 7.875, |
|
"learning_rate": 4.3069871595684787e-07, |
|
"log_odds_chosen": 1.2286351919174194, |
|
"log_odds_ratio": -0.37730690836906433, |
|
"logps/chosen": -0.6100655198097229, |
|
"logps/rejected": -1.3068325519561768, |
|
"loss": 2.3784, |
|
"nll_loss": 0.8896926641464233, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.030503276735544205, |
|
"rewards/margins": 0.034838344901800156, |
|
"rewards/rejected": -0.06534162163734436, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.3801980198019802, |
|
"grad_norm": 7.9375, |
|
"learning_rate": 3.877242453630256e-07, |
|
"log_odds_chosen": 1.3759397268295288, |
|
"log_odds_ratio": -0.3282025456428528, |
|
"logps/chosen": -0.5752947926521301, |
|
"logps/rejected": -1.338653326034546, |
|
"loss": 2.2376, |
|
"nll_loss": 0.8668821454048157, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.028764739632606506, |
|
"rewards/margins": 0.038167934864759445, |
|
"rewards/rejected": -0.06693266332149506, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.44356435643564357, |
|
"grad_norm": 8.125, |
|
"learning_rate": 3.378437060203357e-07, |
|
"log_odds_chosen": 1.5571784973144531, |
|
"log_odds_ratio": -0.28482693433761597, |
|
"logps/chosen": -0.5232511758804321, |
|
"logps/rejected": -1.388687014579773, |
|
"loss": 2.8435, |
|
"nll_loss": 0.7976736426353455, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/chosen": -0.026162561029195786, |
|
"rewards/margins": 0.04327179118990898, |
|
"rewards/rejected": -0.06943435966968536, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.5069306930693069, |
|
"grad_norm": 9.1875, |
|
"learning_rate": 2.8355831645441387e-07, |
|
"log_odds_chosen": 1.7383034229278564, |
|
"log_odds_ratio": -0.24174487590789795, |
|
"logps/chosen": -0.48146629333496094, |
|
"logps/rejected": -1.4316900968551636, |
|
"loss": 2.8827, |
|
"nll_loss": 0.7479132413864136, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/chosen": -0.024073313921689987, |
|
"rewards/margins": 0.04751119762659073, |
|
"rewards/rejected": -0.07158450782299042, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5702970297029702, |
|
"grad_norm": 9.0, |
|
"learning_rate": 2.2759017277414164e-07, |
|
"log_odds_chosen": 1.639399766921997, |
|
"log_odds_ratio": -0.26323410868644714, |
|
"logps/chosen": -0.5090625882148743, |
|
"logps/rejected": -1.4268980026245117, |
|
"loss": 2.6135, |
|
"nll_loss": 0.7730764746665955, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.025453131645917892, |
|
"rewards/margins": 0.04589176923036575, |
|
"rewards/rejected": -0.07134490460157394, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.6336633663366337, |
|
"grad_norm": 12.5625, |
|
"learning_rate": 1.7274575140626315e-07, |
|
"log_odds_chosen": 1.7857434749603271, |
|
"log_odds_ratio": -0.23216819763183594, |
|
"logps/chosen": -0.4630921483039856, |
|
"logps/rejected": -1.3960620164871216, |
|
"loss": 3.6278, |
|
"nll_loss": 0.7098131775856018, |
|
"rewards/accuracies": 0.9593750238418579, |
|
"rewards/chosen": -0.0231546089053154, |
|
"rewards/margins": 0.04664849489927292, |
|
"rewards/rejected": -0.06980310380458832, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.697029702970297, |
|
"grad_norm": 10.125, |
|
"learning_rate": 1.2177518064852348e-07, |
|
"log_odds_chosen": 1.4938031435012817, |
|
"log_odds_ratio": -0.3022598326206207, |
|
"logps/chosen": -0.5556260347366333, |
|
"logps/rejected": -1.3820416927337646, |
|
"loss": 2.8267, |
|
"nll_loss": 0.7848575115203857, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.027781302109360695, |
|
"rewards/margins": 0.041320785880088806, |
|
"rewards/rejected": -0.06910209357738495, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.7603960396039604, |
|
"grad_norm": 11.4375, |
|
"learning_rate": 7.723433775328384e-08, |
|
"log_odds_chosen": 1.6789665222167969, |
|
"log_odds_ratio": -0.26475173234939575, |
|
"logps/chosen": -0.5475651621818542, |
|
"logps/rejected": -1.5100398063659668, |
|
"loss": 2.7577, |
|
"nll_loss": 0.7874841690063477, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.02737826108932495, |
|
"rewards/margins": 0.048123735934495926, |
|
"rewards/rejected": -0.07550199329853058, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.8237623762376237, |
|
"grad_norm": 10.4375, |
|
"learning_rate": 4.1356686569674335e-08, |
|
"log_odds_chosen": 1.652230978012085, |
|
"log_odds_ratio": -0.2656940817832947, |
|
"logps/chosen": -0.54820317029953, |
|
"logps/rejected": -1.4693254232406616, |
|
"loss": 2.5008, |
|
"nll_loss": 0.8066957592964172, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.02741015888750553, |
|
"rewards/margins": 0.0460561141371727, |
|
"rewards/rejected": -0.07346627861261368, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.8871287128712871, |
|
"grad_norm": 9.8125, |
|
"learning_rate": 1.5941282340065697e-08, |
|
"log_odds_chosen": 1.8457355499267578, |
|
"log_odds_ratio": -0.23030798137187958, |
|
"logps/chosen": -0.5012167692184448, |
|
"logps/rejected": -1.5387108325958252, |
|
"loss": 2.9364, |
|
"nll_loss": 0.7640018463134766, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": -0.02506083808839321, |
|
"rewards/margins": 0.05187469720840454, |
|
"rewards/rejected": -0.0769355446100235, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.9504950495049505, |
|
"grad_norm": 13.5, |
|
"learning_rate": 2.2625595580163247e-09, |
|
"log_odds_chosen": 1.5747287273406982, |
|
"log_odds_ratio": -0.2784648537635803, |
|
"logps/chosen": -0.5585237741470337, |
|
"logps/rejected": -1.4540793895721436, |
|
"loss": 2.5767, |
|
"nll_loss": 0.823401153087616, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.027926195412874222, |
|
"rewards/margins": 0.044777773320674896, |
|
"rewards/rejected": -0.07270397245883942, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.9885148514851485, |
|
"step": 78, |
|
"total_flos": 0.0, |
|
"train_loss": 2.6355448135962853, |
|
"train_runtime": 1528.1787, |
|
"train_samples_per_second": 3.305, |
|
"train_steps_per_second": 0.051 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 78, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 300, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|