{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9947089947089947, "eval_steps": 1000, "global_step": 94, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.010582010582010581, "grad_norm": 3.9034635996758364, "learning_rate": 5e-08, "logits/chosen": -2.8740313053131104, "logits/rejected": -2.909637928009033, "logps/chosen": -495.3936462402344, "logps/rejected": -468.7409973144531, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.10582010582010581, "grad_norm": 3.740412497210408, "learning_rate": 5e-07, "logits/chosen": -2.8803439140319824, "logits/rejected": -2.933382987976074, "logps/chosen": -489.9436340332031, "logps/rejected": -471.76068115234375, "loss": 0.6926, "rewards/accuracies": 0.4765625, "rewards/chosen": 0.0005175346159376204, "rewards/margins": 0.001013587461784482, "rewards/rejected": -0.0004960527876392007, "step": 10 }, { "epoch": 0.21164021164021163, "grad_norm": 4.0026509714343526, "learning_rate": 4.82718437161051e-07, "logits/chosen": -2.9089906215667725, "logits/rejected": -2.9724087715148926, "logps/chosen": -490.58831787109375, "logps/rejected": -484.2608947753906, "loss": 0.677, "rewards/accuracies": 0.8101562261581421, "rewards/chosen": 0.017761804163455963, "rewards/margins": 0.033221058547496796, "rewards/rejected": -0.015459256246685982, "step": 20 }, { "epoch": 0.31746031746031744, "grad_norm": 5.045224654085605, "learning_rate": 4.332629679574565e-07, "logits/chosen": -2.9424614906311035, "logits/rejected": -2.994748830795288, "logps/chosen": -486.4335021972656, "logps/rejected": -484.3814392089844, "loss": 0.623, "rewards/accuracies": 0.8492187261581421, "rewards/chosen": 0.045325733721256256, "rewards/margins": 0.14735476672649384, "rewards/rejected": -0.10202904045581818, "step": 30 }, { "epoch": 0.42328042328042326, "grad_norm": 3.94438220021588, "learning_rate": 3.584709347793895e-07, "logits/chosen": -2.92952036857605, "logits/rejected": -3.001981019973755, "logps/chosen": -487.70849609375, "logps/rejected": -510.1280212402344, "loss": 0.5749, "rewards/accuracies": 0.85546875, "rewards/chosen": 0.017966564744710922, "rewards/margins": 0.2901422381401062, "rewards/rejected": -0.2721756398677826, "step": 40 }, { "epoch": 0.5291005291005291, "grad_norm": 3.0093221435481667, "learning_rate": 2.6868252339660607e-07, "logits/chosen": -2.92526912689209, "logits/rejected": -2.995861768722534, "logps/chosen": -514.0173950195312, "logps/rejected": -578.7903442382812, "loss": 0.4668, "rewards/accuracies": 0.8609374761581421, "rewards/chosen": -0.239375501871109, "rewards/margins": 0.7664871215820312, "rewards/rejected": -1.0058627128601074, "step": 50 }, { "epoch": 0.6349206349206349, "grad_norm": 3.4147008885194587, "learning_rate": 1.763112063972739e-07, "logits/chosen": -2.9192681312561035, "logits/rejected": -2.9830739498138428, "logps/chosen": -526.348876953125, "logps/rejected": -614.0721435546875, "loss": 0.421, "rewards/accuracies": 0.858593761920929, "rewards/chosen": -0.4103693962097168, "rewards/margins": 0.9783406257629395, "rewards/rejected": -1.3887102603912354, "step": 60 }, { "epoch": 0.7407407407407407, "grad_norm": 3.0922082498626846, "learning_rate": 9.412754953531663e-08, "logits/chosen": -2.918205976486206, "logits/rejected": -2.970673084259033, "logps/chosen": -554.398681640625, "logps/rejected": -667.9301147460938, "loss": 0.4035, "rewards/accuracies": 0.8492187261581421, "rewards/chosen": -0.6199524998664856, "rewards/margins": 1.1833505630493164, "rewards/rejected": -1.8033031225204468, "step": 70 }, { "epoch": 0.8465608465608465, "grad_norm": 3.5694688653613595, "learning_rate": 3.349364905389032e-08, "logits/chosen": -2.8878700733184814, "logits/rejected": -2.932638645172119, "logps/chosen": -557.0501708984375, "logps/rejected": -678.7019653320312, "loss": 0.3889, "rewards/accuracies": 0.8578125238418579, "rewards/chosen": -0.7133009433746338, "rewards/margins": 1.2904046773910522, "rewards/rejected": -2.0037055015563965, "step": 80 }, { "epoch": 0.9523809523809523, "grad_norm": 3.149374311366939, "learning_rate": 2.7922934437178692e-09, "logits/chosen": -2.89034366607666, "logits/rejected": -2.936624765396118, "logps/chosen": -562.55322265625, "logps/rejected": -680.3013305664062, "loss": 0.3864, "rewards/accuracies": 0.836718738079071, "rewards/chosen": -0.7662609815597534, "rewards/margins": 1.2681959867477417, "rewards/rejected": -2.034456968307495, "step": 90 }, { "epoch": 0.9947089947089947, "step": 94, "total_flos": 0.0, "train_loss": 0.5087684798747936, "train_runtime": 2521.2346, "train_samples_per_second": 38.335, "train_steps_per_second": 0.037 } ], "logging_steps": 10, "max_steps": 94, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }