|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 96, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010416666666666666, |
|
"grad_norm": 534.9856766765029, |
|
"learning_rate": 3.6623701904189776e-08, |
|
"logits/chosen": -2.590585231781006, |
|
"logits/rejected": -2.5664222240448, |
|
"logps/chosen": -80.29847717285156, |
|
"logps/rejected": -53.10200881958008, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.10416666666666667, |
|
"grad_norm": 522.7071434903489, |
|
"learning_rate": 3.662370190418977e-07, |
|
"logits/chosen": -2.5560922622680664, |
|
"logits/rejected": -2.5382773876190186, |
|
"logps/chosen": -87.9105224609375, |
|
"logps/rejected": -81.0152587890625, |
|
"loss": 0.7035, |
|
"rewards/accuracies": 0.2152777761220932, |
|
"rewards/chosen": 0.0037625303957611322, |
|
"rewards/margins": -0.015120850875973701, |
|
"rewards/rejected": 0.01888338290154934, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.20833333333333334, |
|
"grad_norm": 330.8070947624322, |
|
"learning_rate": 3.5415414722060956e-07, |
|
"logits/chosen": -2.6099281311035156, |
|
"logits/rejected": -2.5609025955200195, |
|
"logps/chosen": -102.80690002441406, |
|
"logps/rejected": -89.45201110839844, |
|
"loss": 0.6595, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": 0.4419492781162262, |
|
"rewards/margins": 0.09075690805912018, |
|
"rewards/rejected": 0.3511923849582672, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3125, |
|
"grad_norm": 561.0026948523158, |
|
"learning_rate": 3.1950008155765393e-07, |
|
"logits/chosen": -2.510303020477295, |
|
"logits/rejected": -2.5249061584472656, |
|
"logps/chosen": -67.08929443359375, |
|
"logps/rejected": -75.05818176269531, |
|
"loss": 0.6859, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": 0.2826586067676544, |
|
"rewards/margins": 0.39012494683265686, |
|
"rewards/rejected": -0.10746632516384125, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.4166666666666667, |
|
"grad_norm": 402.3919923341537, |
|
"learning_rate": 2.6684804225439007e-07, |
|
"logits/chosen": -2.5768940448760986, |
|
"logits/rejected": -2.5649542808532715, |
|
"logps/chosen": -72.13874053955078, |
|
"logps/rejected": -70.94956970214844, |
|
"loss": 0.6997, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": 0.00632839510217309, |
|
"rewards/margins": 0.19826939702033997, |
|
"rewards/rejected": -0.19194099307060242, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5208333333333334, |
|
"grad_norm": 382.89112989307444, |
|
"learning_rate": 2.0314640218361037e-07, |
|
"logits/chosen": -2.494668960571289, |
|
"logits/rejected": -2.5067570209503174, |
|
"logps/chosen": -50.37248229980469, |
|
"logps/rejected": -58.68071746826172, |
|
"loss": 0.695, |
|
"rewards/accuracies": 0.24375000596046448, |
|
"rewards/chosen": 0.4294072091579437, |
|
"rewards/margins": 0.24388039112091064, |
|
"rewards/rejected": 0.1855268031358719, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 356.73514211856235, |
|
"learning_rate": 1.3680172552929206e-07, |
|
"logits/chosen": -2.58482027053833, |
|
"logits/rejected": -2.566230535507202, |
|
"logps/chosen": -78.02713775634766, |
|
"logps/rejected": -78.90872955322266, |
|
"loss": 0.6665, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": 0.8464384078979492, |
|
"rewards/margins": 0.4981551766395569, |
|
"rewards/rejected": 0.3482832610607147, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.7291666666666666, |
|
"grad_norm": 347.90887844338835, |
|
"learning_rate": 7.656937215250093e-08, |
|
"logits/chosen": -2.5671563148498535, |
|
"logits/rejected": -2.558472156524658, |
|
"logps/chosen": -95.0243148803711, |
|
"logps/rejected": -84.78661346435547, |
|
"loss": 0.672, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": 1.0726714134216309, |
|
"rewards/margins": 0.5336906313896179, |
|
"rewards/rejected": 0.5389808416366577, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 625.7311819069191, |
|
"learning_rate": 3.039807216523069e-08, |
|
"logits/chosen": -2.569451332092285, |
|
"logits/rejected": -2.516343593597412, |
|
"logps/chosen": -84.30049133300781, |
|
"logps/rejected": -78.89418029785156, |
|
"loss": 0.6324, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": 1.0922737121582031, |
|
"rewards/margins": 0.6956304311752319, |
|
"rewards/rejected": 0.3966432213783264, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.9375, |
|
"grad_norm": 530.9459931532258, |
|
"learning_rate": 4.380949643555316e-09, |
|
"logits/chosen": -2.502892017364502, |
|
"logits/rejected": -2.4977681636810303, |
|
"logps/chosen": -53.26112747192383, |
|
"logps/rejected": -63.1404914855957, |
|
"loss": 0.6758, |
|
"rewards/accuracies": 0.2562499940395355, |
|
"rewards/chosen": 0.58380526304245, |
|
"rewards/margins": 0.31763672828674316, |
|
"rewards/rejected": 0.2661685645580292, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 96, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6724727004766464, |
|
"train_runtime": 961.1077, |
|
"train_samples_per_second": 6.36, |
|
"train_steps_per_second": 0.1 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 96, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|