|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9879931389365352, |
|
"eval_steps": 500, |
|
"global_step": 72, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0686106346483705, |
|
"grad_norm": 19.744274139404297, |
|
"learning_rate": 2.5e-07, |
|
"log_odds_chosen": 1.201342225074768, |
|
"log_odds_ratio": -0.3934743106365204, |
|
"logps/chosen": -0.4193040728569031, |
|
"logps/rejected": -0.8378152847290039, |
|
"loss": 12.1235, |
|
"nll_loss": 0.668493390083313, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.020965203642845154, |
|
"rewards/margins": 0.02092556282877922, |
|
"rewards/rejected": -0.041890766471624374, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.137221269296741, |
|
"grad_norm": 17.17472267150879, |
|
"learning_rate": 4.996988640512931e-07, |
|
"log_odds_chosen": 1.2820667028427124, |
|
"log_odds_ratio": -0.38662344217300415, |
|
"logps/chosen": -0.3940594494342804, |
|
"logps/rejected": -0.8234702944755554, |
|
"loss": 11.5715, |
|
"nll_loss": 0.6343871355056763, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.0197029747068882, |
|
"rewards/margins": 0.02147054485976696, |
|
"rewards/rejected": -0.04117351397871971, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.2058319039451115, |
|
"grad_norm": 18.691057205200195, |
|
"learning_rate": 4.892350839330522e-07, |
|
"log_odds_chosen": 1.4684958457946777, |
|
"log_odds_ratio": -0.34438949823379517, |
|
"logps/chosen": -0.35416024923324585, |
|
"logps/rejected": -0.853132426738739, |
|
"loss": 10.573, |
|
"nll_loss": 0.5747630000114441, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -0.01770801469683647, |
|
"rewards/margins": 0.024948609992861748, |
|
"rewards/rejected": -0.04265662282705307, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.274442538593482, |
|
"grad_norm": 16.21430206298828, |
|
"learning_rate": 4.64432152500068e-07, |
|
"log_odds_chosen": 1.7324159145355225, |
|
"log_odds_ratio": -0.307681143283844, |
|
"logps/chosen": -0.3413434624671936, |
|
"logps/rejected": -0.8989057540893555, |
|
"loss": 9.6949, |
|
"nll_loss": 0.5221809148788452, |
|
"rewards/accuracies": 0.8843749761581421, |
|
"rewards/chosen": -0.01706717535853386, |
|
"rewards/margins": 0.027878114953637123, |
|
"rewards/rejected": -0.04494528844952583, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.34305317324185247, |
|
"grad_norm": 18.64670181274414, |
|
"learning_rate": 4.2677669529663686e-07, |
|
"log_odds_chosen": 1.4884607791900635, |
|
"log_odds_ratio": -0.32770583033561707, |
|
"logps/chosen": -0.3440612554550171, |
|
"logps/rejected": -0.826368510723114, |
|
"loss": 10.6914, |
|
"nll_loss": 0.5817240476608276, |
|
"rewards/accuracies": 0.8968750238418579, |
|
"rewards/chosen": -0.017203064635396004, |
|
"rewards/margins": 0.024115364998579025, |
|
"rewards/rejected": -0.04131842777132988, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.411663807890223, |
|
"grad_norm": 17.069456100463867, |
|
"learning_rate": 3.7852568604830535e-07, |
|
"log_odds_chosen": 1.5529407262802124, |
|
"log_odds_ratio": -0.30767717957496643, |
|
"logps/chosen": -0.34597113728523254, |
|
"logps/rejected": -0.8747011423110962, |
|
"loss": 10.1225, |
|
"nll_loss": 0.5479608774185181, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": -0.017298556864261627, |
|
"rewards/margins": 0.02643650211393833, |
|
"rewards/rejected": -0.04373506084084511, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.48027444253859347, |
|
"grad_norm": 16.975975036621094, |
|
"learning_rate": 3.2257116931361555e-07, |
|
"log_odds_chosen": 1.6338489055633545, |
|
"log_odds_ratio": -0.2982104420661926, |
|
"logps/chosen": -0.317135751247406, |
|
"logps/rejected": -0.8524333834648132, |
|
"loss": 9.8603, |
|
"nll_loss": 0.5319342613220215, |
|
"rewards/accuracies": 0.903124988079071, |
|
"rewards/chosen": -0.0158567875623703, |
|
"rewards/margins": 0.0267648808658123, |
|
"rewards/rejected": -0.0426216684281826, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.548885077186964, |
|
"grad_norm": 16.777755737304688, |
|
"learning_rate": 2.6226691858185454e-07, |
|
"log_odds_chosen": 1.7881925106048584, |
|
"log_odds_ratio": -0.2799718976020813, |
|
"logps/chosen": -0.2918943166732788, |
|
"logps/rejected": -0.8554312586784363, |
|
"loss": 9.5767, |
|
"nll_loss": 0.5152069330215454, |
|
"rewards/accuracies": 0.9156249761581421, |
|
"rewards/chosen": -0.014594716019928455, |
|
"rewards/margins": 0.028176847845315933, |
|
"rewards/rejected": -0.042771559208631516, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6174957118353345, |
|
"grad_norm": 19.050371170043945, |
|
"learning_rate": 2.0122741949596793e-07, |
|
"log_odds_chosen": 1.693018913269043, |
|
"log_odds_ratio": -0.30690881609916687, |
|
"logps/chosen": -0.31784263253211975, |
|
"logps/rejected": -0.8562124371528625, |
|
"loss": 9.5933, |
|
"nll_loss": 0.5150309205055237, |
|
"rewards/accuracies": 0.8843749761581421, |
|
"rewards/chosen": -0.015892133116722107, |
|
"rewards/margins": 0.026918485760688782, |
|
"rewards/rejected": -0.04281061887741089, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.6861063464837049, |
|
"grad_norm": 17.89000129699707, |
|
"learning_rate": 1.4311122664242953e-07, |
|
"log_odds_chosen": 1.7827718257904053, |
|
"log_odds_ratio": -0.2854536771774292, |
|
"logps/chosen": -0.3112075924873352, |
|
"logps/rejected": -0.8817294836044312, |
|
"loss": 9.7932, |
|
"nll_loss": 0.5288890600204468, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.01556038111448288, |
|
"rewards/margins": 0.028526093810796738, |
|
"rewards/rejected": -0.044086478650569916, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.7547169811320755, |
|
"grad_norm": 17.733768463134766, |
|
"learning_rate": 9.140167895908865e-08, |
|
"log_odds_chosen": 1.7418218851089478, |
|
"log_odds_ratio": -0.28721481561660767, |
|
"logps/chosen": -0.3235136866569519, |
|
"logps/rejected": -0.8874639272689819, |
|
"loss": 9.8656, |
|
"nll_loss": 0.5327537655830383, |
|
"rewards/accuracies": 0.909375011920929, |
|
"rewards/chosen": -0.016175683587789536, |
|
"rewards/margins": 0.028197508305311203, |
|
"rewards/rejected": -0.04437319189310074, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.823327615780446, |
|
"grad_norm": 17.464258193969727, |
|
"learning_rate": 4.919811712983879e-08, |
|
"log_odds_chosen": 1.7085039615631104, |
|
"log_odds_ratio": -0.2921965420246124, |
|
"logps/chosen": -0.33900579810142517, |
|
"logps/rejected": -0.9029625654220581, |
|
"loss": 9.9775, |
|
"nll_loss": 0.5402873754501343, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.016950294375419617, |
|
"rewards/margins": 0.028197839856147766, |
|
"rewards/rejected": -0.045148126780986786, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.8919382504288165, |
|
"grad_norm": 19.061491012573242, |
|
"learning_rate": 1.9030116872178314e-08, |
|
"log_odds_chosen": 1.8424354791641235, |
|
"log_odds_ratio": -0.26801738142967224, |
|
"logps/chosen": -0.30308759212493896, |
|
"logps/rejected": -0.9023351669311523, |
|
"loss": 9.665, |
|
"nll_loss": 0.5219208598136902, |
|
"rewards/accuracies": 0.9156249761581421, |
|
"rewards/chosen": -0.015154379419982433, |
|
"rewards/margins": 0.02996237948536873, |
|
"rewards/rejected": -0.04511675983667374, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.9605488850771869, |
|
"grad_norm": 17.77915382385254, |
|
"learning_rate": 2.7058725088047464e-09, |
|
"log_odds_chosen": 1.7313966751098633, |
|
"log_odds_ratio": -0.28424638509750366, |
|
"logps/chosen": -0.3278297483921051, |
|
"logps/rejected": -0.8673001527786255, |
|
"loss": 10.1477, |
|
"nll_loss": 0.5500699877738953, |
|
"rewards/accuracies": 0.9156249761581421, |
|
"rewards/chosen": -0.016391487792134285, |
|
"rewards/margins": 0.02697352133691311, |
|
"rewards/rejected": -0.043365009129047394, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.9879931389365352, |
|
"step": 72, |
|
"total_flos": 0.0, |
|
"train_loss": 10.236132118437025, |
|
"train_runtime": 760.2628, |
|
"train_samples_per_second": 6.132, |
|
"train_steps_per_second": 0.095 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 72, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|