|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9885148514851485, |
|
"eval_steps": 500, |
|
"global_step": 78, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06336633663366337, |
|
"grad_norm": 12.8125, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"log_odds_chosen": 0.9911565780639648, |
|
"log_odds_ratio": -0.41253289580345154, |
|
"logps/chosen": -0.6660582423210144, |
|
"logps/rejected": -1.228592872619629, |
|
"loss": 4.3637, |
|
"nll_loss": 0.9886282086372375, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.0333029143512249, |
|
"rewards/margins": 0.028126735240221024, |
|
"rewards/rejected": -0.061429642140865326, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.12673267326732673, |
|
"grad_norm": 17.875, |
|
"learning_rate": 4.989935734988097e-07, |
|
"log_odds_chosen": 1.1546833515167236, |
|
"log_odds_ratio": -0.37236329913139343, |
|
"logps/chosen": -0.5902147889137268, |
|
"logps/rejected": -1.2310830354690552, |
|
"loss": 4.5589, |
|
"nll_loss": 0.8954709768295288, |
|
"rewards/accuracies": 0.846875011920929, |
|
"rewards/chosen": -0.02951074205338955, |
|
"rewards/margins": 0.03204340860247612, |
|
"rewards/rejected": -0.06155414506793022, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1900990099009901, |
|
"grad_norm": 15.0, |
|
"learning_rate": 4.877641290737883e-07, |
|
"log_odds_chosen": 1.1432024240493774, |
|
"log_odds_ratio": -0.3701472878456116, |
|
"logps/chosen": -0.5654497146606445, |
|
"logps/rejected": -1.1885969638824463, |
|
"loss": 4.5936, |
|
"nll_loss": 0.875917911529541, |
|
"rewards/accuracies": 0.8843749761581421, |
|
"rewards/chosen": -0.028272485360503197, |
|
"rewards/margins": 0.03115735575556755, |
|
"rewards/rejected": -0.0594298429787159, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.25346534653465347, |
|
"grad_norm": 15.1875, |
|
"learning_rate": 4.646121984004665e-07, |
|
"log_odds_chosen": 1.0618789196014404, |
|
"log_odds_ratio": -0.38934561610221863, |
|
"logps/chosen": -0.6174649000167847, |
|
"logps/rejected": -1.2051403522491455, |
|
"loss": 4.4157, |
|
"nll_loss": 0.937028706073761, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.030873248353600502, |
|
"rewards/margins": 0.029383767396211624, |
|
"rewards/rejected": -0.06025701016187668, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.31683168316831684, |
|
"grad_norm": 16.5, |
|
"learning_rate": 4.3069871595684787e-07, |
|
"log_odds_chosen": 1.2300812005996704, |
|
"log_odds_ratio": -0.3776150345802307, |
|
"logps/chosen": -0.6107009649276733, |
|
"logps/rejected": -1.3069515228271484, |
|
"loss": 4.4461, |
|
"nll_loss": 0.8909252882003784, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.030535047873854637, |
|
"rewards/margins": 0.03481253236532211, |
|
"rewards/rejected": -0.0653475821018219, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.3801980198019802, |
|
"grad_norm": 16.25, |
|
"learning_rate": 3.877242453630256e-07, |
|
"log_odds_chosen": 1.3746024370193481, |
|
"log_odds_ratio": -0.3285817801952362, |
|
"logps/chosen": -0.5757168531417847, |
|
"logps/rejected": -1.3395036458969116, |
|
"loss": 4.232, |
|
"nll_loss": 0.8684650659561157, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": -0.02878584899008274, |
|
"rewards/margins": 0.03818933293223381, |
|
"rewards/rejected": -0.0669751763343811, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.44356435643564357, |
|
"grad_norm": 17.75, |
|
"learning_rate": 3.378437060203357e-07, |
|
"log_odds_chosen": 1.5565850734710693, |
|
"log_odds_ratio": -0.28478947281837463, |
|
"logps/chosen": -0.5244386196136475, |
|
"logps/rejected": -1.3875908851623535, |
|
"loss": 4.7516, |
|
"nll_loss": 0.796803891658783, |
|
"rewards/accuracies": 0.934374988079071, |
|
"rewards/chosen": -0.026221930980682373, |
|
"rewards/margins": 0.04315761476755142, |
|
"rewards/rejected": -0.0693795457482338, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.5069306930693069, |
|
"grad_norm": 21.625, |
|
"learning_rate": 2.8355831645441387e-07, |
|
"log_odds_chosen": 1.7404935359954834, |
|
"log_odds_ratio": -0.24158628284931183, |
|
"logps/chosen": -0.4815496802330017, |
|
"logps/rejected": -1.4325745105743408, |
|
"loss": 5.0552, |
|
"nll_loss": 0.7471240758895874, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.024077486246824265, |
|
"rewards/margins": 0.047551244497299194, |
|
"rewards/rejected": -0.07162873446941376, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5702970297029702, |
|
"grad_norm": 20.125, |
|
"learning_rate": 2.2759017277414164e-07, |
|
"log_odds_chosen": 1.6410064697265625, |
|
"log_odds_ratio": -0.26414045691490173, |
|
"logps/chosen": -0.5090475678443909, |
|
"logps/rejected": -1.4282280206680298, |
|
"loss": 4.6016, |
|
"nll_loss": 0.7733598947525024, |
|
"rewards/accuracies": 0.9468749761581421, |
|
"rewards/chosen": -0.025452375411987305, |
|
"rewards/margins": 0.04595901817083359, |
|
"rewards/rejected": -0.07141139358282089, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.6336633663366337, |
|
"grad_norm": 26.5, |
|
"learning_rate": 1.7274575140626315e-07, |
|
"log_odds_chosen": 1.7832257747650146, |
|
"log_odds_ratio": -0.23228943347930908, |
|
"logps/chosen": -0.46333685517311096, |
|
"logps/rejected": -1.3944836854934692, |
|
"loss": 5.7573, |
|
"nll_loss": 0.709271252155304, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": -0.023166844621300697, |
|
"rewards/margins": 0.046557340770959854, |
|
"rewards/rejected": -0.0697241872549057, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.697029702970297, |
|
"grad_norm": 24.125, |
|
"learning_rate": 1.2177518064852348e-07, |
|
"log_odds_chosen": 1.5008140802383423, |
|
"log_odds_ratio": -0.30259355902671814, |
|
"logps/chosen": -0.5552427172660828, |
|
"logps/rejected": -1.3832123279571533, |
|
"loss": 5.0102, |
|
"nll_loss": 0.7843067646026611, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.027762139216065407, |
|
"rewards/margins": 0.041398484259843826, |
|
"rewards/rejected": -0.06916062533855438, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.7603960396039604, |
|
"grad_norm": 26.25, |
|
"learning_rate": 7.723433775328384e-08, |
|
"log_odds_chosen": 1.6810848712921143, |
|
"log_odds_ratio": -0.2639027237892151, |
|
"logps/chosen": -0.5468909740447998, |
|
"logps/rejected": -1.5102508068084717, |
|
"loss": 5.0576, |
|
"nll_loss": 0.787597119808197, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.02734454534947872, |
|
"rewards/margins": 0.04816799610853195, |
|
"rewards/rejected": -0.07551254332065582, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.8237623762376237, |
|
"grad_norm": 26.25, |
|
"learning_rate": 4.1356686569674335e-08, |
|
"log_odds_chosen": 1.6594898700714111, |
|
"log_odds_ratio": -0.2648767828941345, |
|
"logps/chosen": -0.5453223586082458, |
|
"logps/rejected": -1.4675935506820679, |
|
"loss": 5.2911, |
|
"nll_loss": 0.8060859441757202, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.0272661205381155, |
|
"rewards/margins": 0.04611356183886528, |
|
"rewards/rejected": -0.07337968051433563, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.8871287128712871, |
|
"grad_norm": 22.25, |
|
"learning_rate": 1.5941282340065697e-08, |
|
"log_odds_chosen": 1.8507673740386963, |
|
"log_odds_ratio": -0.22900144755840302, |
|
"logps/chosen": -0.500530481338501, |
|
"logps/rejected": -1.5389080047607422, |
|
"loss": 4.521, |
|
"nll_loss": 0.7640606164932251, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.02502652443945408, |
|
"rewards/margins": 0.0519188828766346, |
|
"rewards/rejected": -0.07694540917873383, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.9504950495049505, |
|
"grad_norm": 30.375, |
|
"learning_rate": 2.2625595580163247e-09, |
|
"log_odds_chosen": 1.575315237045288, |
|
"log_odds_ratio": -0.2773153781890869, |
|
"logps/chosen": -0.558388352394104, |
|
"logps/rejected": -1.4570810794830322, |
|
"loss": 4.7645, |
|
"nll_loss": 0.8240699768066406, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.02791941724717617, |
|
"rewards/margins": 0.04493463411927223, |
|
"rewards/rejected": -0.07285405695438385, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.9885148514851485, |
|
"step": 78, |
|
"total_flos": 0.0, |
|
"train_loss": 4.754565581297263, |
|
"train_runtime": 2713.8691, |
|
"train_samples_per_second": 1.861, |
|
"train_steps_per_second": 0.029 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 78, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 300, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|