|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9879931389365352, |
|
"eval_steps": 500, |
|
"global_step": 72, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0686106346483705, |
|
"grad_norm": 19.232778549194336, |
|
"learning_rate": 2.5e-07, |
|
"log_odds_chosen": 1.2014477252960205, |
|
"log_odds_ratio": -0.3934864103794098, |
|
"logps/chosen": -0.419245183467865, |
|
"logps/rejected": -0.8377097845077515, |
|
"loss": 11.1878, |
|
"nll_loss": 0.6684826612472534, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.02096226066350937, |
|
"rewards/margins": 0.020923227071762085, |
|
"rewards/rejected": -0.041885487735271454, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.137221269296741, |
|
"grad_norm": 16.76073455810547, |
|
"learning_rate": 4.996988640512931e-07, |
|
"log_odds_chosen": 1.282283067703247, |
|
"log_odds_ratio": -0.3863990008831024, |
|
"logps/chosen": -0.3940752148628235, |
|
"logps/rejected": -0.8234782218933105, |
|
"loss": 10.6398, |
|
"nll_loss": 0.6343874335289001, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.019703760743141174, |
|
"rewards/margins": 0.021470148116350174, |
|
"rewards/rejected": -0.04117390885949135, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.2058319039451115, |
|
"grad_norm": 18.169845581054688, |
|
"learning_rate": 4.892350839330522e-07, |
|
"log_odds_chosen": 1.4661200046539307, |
|
"log_odds_ratio": -0.3449063301086426, |
|
"logps/chosen": -0.35424643754959106, |
|
"logps/rejected": -0.8525441884994507, |
|
"loss": 9.6632, |
|
"nll_loss": 0.5748022794723511, |
|
"rewards/accuracies": 0.878125011920929, |
|
"rewards/chosen": -0.017712321132421494, |
|
"rewards/margins": 0.02491488680243492, |
|
"rewards/rejected": -0.042627207934856415, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.274442538593482, |
|
"grad_norm": 15.6996488571167, |
|
"learning_rate": 4.64432152500068e-07, |
|
"log_odds_chosen": 1.7303870916366577, |
|
"log_odds_ratio": -0.30832380056381226, |
|
"logps/chosen": -0.3413855731487274, |
|
"logps/rejected": -0.8977711796760559, |
|
"loss": 8.8048, |
|
"nll_loss": 0.5222700238227844, |
|
"rewards/accuracies": 0.8843749761581421, |
|
"rewards/chosen": -0.01706927828490734, |
|
"rewards/margins": 0.027819275856018066, |
|
"rewards/rejected": -0.04488855600357056, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.34305317324185247, |
|
"grad_norm": 18.215797424316406, |
|
"learning_rate": 4.2677669529663686e-07, |
|
"log_odds_chosen": 1.483076810836792, |
|
"log_odds_ratio": -0.32862988114356995, |
|
"logps/chosen": -0.3441302180290222, |
|
"logps/rejected": -0.8243424296379089, |
|
"loss": 9.7759, |
|
"nll_loss": 0.5818092226982117, |
|
"rewards/accuracies": 0.8968750238418579, |
|
"rewards/chosen": -0.01720651052892208, |
|
"rewards/margins": 0.024010611698031425, |
|
"rewards/rejected": -0.041217125952243805, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.411663807890223, |
|
"grad_norm": 16.51038360595703, |
|
"learning_rate": 3.7852568604830535e-07, |
|
"log_odds_chosen": 1.546562910079956, |
|
"log_odds_ratio": -0.30873748660087585, |
|
"logps/chosen": -0.34601905941963196, |
|
"logps/rejected": -0.8720181584358215, |
|
"loss": 9.2217, |
|
"nll_loss": 0.5480064153671265, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.017300952225923538, |
|
"rewards/margins": 0.026299959048628807, |
|
"rewards/rejected": -0.043600909411907196, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.48027444253859347, |
|
"grad_norm": 16.503437042236328, |
|
"learning_rate": 3.2257116931361555e-07, |
|
"log_odds_chosen": 1.6262096166610718, |
|
"log_odds_ratio": -0.29989489912986755, |
|
"logps/chosen": -0.3171854317188263, |
|
"logps/rejected": -0.8487747311592102, |
|
"loss": 8.9613, |
|
"nll_loss": 0.5319884419441223, |
|
"rewards/accuracies": 0.903124988079071, |
|
"rewards/chosen": -0.015859272330999374, |
|
"rewards/margins": 0.026579465717077255, |
|
"rewards/rejected": -0.04243873432278633, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.548885077186964, |
|
"grad_norm": 16.347063064575195, |
|
"learning_rate": 2.6226691858185454e-07, |
|
"log_odds_chosen": 1.7782882452011108, |
|
"log_odds_ratio": -0.28193312883377075, |
|
"logps/chosen": -0.29190748929977417, |
|
"logps/rejected": -0.8516559600830078, |
|
"loss": 8.6856, |
|
"nll_loss": 0.5151814222335815, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.01459537260234356, |
|
"rewards/margins": 0.027987420558929443, |
|
"rewards/rejected": -0.04258279502391815, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6174957118353345, |
|
"grad_norm": 18.475563049316406, |
|
"learning_rate": 2.0122741949596793e-07, |
|
"log_odds_chosen": 1.6845076084136963, |
|
"log_odds_ratio": -0.30887991189956665, |
|
"logps/chosen": -0.31784507632255554, |
|
"logps/rejected": -0.852371096611023, |
|
"loss": 8.6938, |
|
"nll_loss": 0.5149728059768677, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -0.015892256051301956, |
|
"rewards/margins": 0.02672630548477173, |
|
"rewards/rejected": -0.042618557810783386, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.6861063464837049, |
|
"grad_norm": 17.385778427124023, |
|
"learning_rate": 1.4311122664242953e-07, |
|
"log_odds_chosen": 1.7722632884979248, |
|
"log_odds_ratio": -0.2871997356414795, |
|
"logps/chosen": -0.3112233579158783, |
|
"logps/rejected": -0.8770394325256348, |
|
"loss": 8.9052, |
|
"nll_loss": 0.5289064049720764, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.01556116808205843, |
|
"rewards/margins": 0.028290802612900734, |
|
"rewards/rejected": -0.04385197162628174, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.7547169811320755, |
|
"grad_norm": 17.232837677001953, |
|
"learning_rate": 9.140167895908865e-08, |
|
"log_odds_chosen": 1.7312713861465454, |
|
"log_odds_ratio": -0.28886836767196655, |
|
"logps/chosen": -0.323580265045166, |
|
"logps/rejected": -0.8828792572021484, |
|
"loss": 8.9703, |
|
"nll_loss": 0.5326738357543945, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.01617901399731636, |
|
"rewards/margins": 0.02796494960784912, |
|
"rewards/rejected": -0.04414396360516548, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.823327615780446, |
|
"grad_norm": 16.895586013793945, |
|
"learning_rate": 4.919811712983879e-08, |
|
"log_odds_chosen": 1.7012507915496826, |
|
"log_odds_ratio": -0.29369616508483887, |
|
"logps/chosen": -0.33875319361686707, |
|
"logps/rejected": -0.8981720805168152, |
|
"loss": 9.0854, |
|
"nll_loss": 0.5401446223258972, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.016937660053372383, |
|
"rewards/margins": 0.027970943599939346, |
|
"rewards/rejected": -0.04490860179066658, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.8919382504288165, |
|
"grad_norm": 18.454177856445312, |
|
"learning_rate": 1.9030116872178314e-08, |
|
"log_odds_chosen": 1.8286237716674805, |
|
"log_odds_ratio": -0.2700421214103699, |
|
"logps/chosen": -0.3031145930290222, |
|
"logps/rejected": -0.8966620564460754, |
|
"loss": 8.785, |
|
"nll_loss": 0.5219055414199829, |
|
"rewards/accuracies": 0.9156249761581421, |
|
"rewards/chosen": -0.0151557307690382, |
|
"rewards/margins": 0.0296773761510849, |
|
"rewards/rejected": -0.04483310505747795, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.9605488850771869, |
|
"grad_norm": 17.21074104309082, |
|
"learning_rate": 2.7058725088047464e-09, |
|
"log_odds_chosen": 1.7211929559707642, |
|
"log_odds_ratio": -0.28593194484710693, |
|
"logps/chosen": -0.3278438448905945, |
|
"logps/rejected": -0.8622439503669739, |
|
"loss": 9.2475, |
|
"nll_loss": 0.5499622821807861, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -0.016392191872000694, |
|
"rewards/margins": 0.02672000229358673, |
|
"rewards/rejected": -0.043112196028232574, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.9879931389365352, |
|
"step": 72, |
|
"total_flos": 0.0, |
|
"train_loss": 9.334032826953464, |
|
"train_runtime": 762.9212, |
|
"train_samples_per_second": 6.111, |
|
"train_steps_per_second": 0.094 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 72, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|