{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.138086062941555, "eval_steps": 500, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": 6.844920635223389, "learning_rate": 5e-05, "logits/chosen": -3.09375, "logits/rejected": -3.0625, "logps/chosen": -258.0, "logps/rejected": -322.0, "loss": 1.0697, "max_memory_allocated (GB)": 94.25, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.30078125, "rewards/chosen": 0.67578125, "rewards/margins": -0.4140625, "rewards/rejected": 1.0859375, "step": 10, "total_memory_available (GB)": 94.62 }, { "epoch": 0.1, "grad_norm": 4.300484657287598, "learning_rate": 0.0001, "logits/chosen": -3.046875, "logits/rejected": -3.03125, "logps/chosen": -286.0, "logps/rejected": -336.0, "loss": 0.5736, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.69140625, "rewards/chosen": -0.150390625, "rewards/margins": 0.8046875, "rewards/rejected": -0.95703125, "step": 20, "total_memory_available (GB)": 94.62 }, { "epoch": 0.15, "grad_norm": 1.9487425088882446, "learning_rate": 0.00015, "logits/chosen": -2.859375, "logits/rejected": -2.84375, "logps/chosen": -294.0, "logps/rejected": -394.0, "loss": 0.2394, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.90625, "rewards/chosen": -1.984375, "rewards/margins": 4.8125, "rewards/rejected": -6.8125, "step": 30, "total_memory_available (GB)": 94.62 }, { "epoch": 0.21, "grad_norm": 1.0650818347930908, "learning_rate": 0.0002, "logits/chosen": -2.875, "logits/rejected": -2.828125, "logps/chosen": -284.0, "logps/rejected": -388.0, "loss": 0.1181, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.95703125, "rewards/chosen": -0.62109375, "rewards/margins": 5.0625, "rewards/rejected": -5.6875, "step": 40, "total_memory_available (GB)": 94.62 }, { "epoch": 0.26, "grad_norm": 0.5032941699028015, "learning_rate": 0.00025, "logits/chosen": -2.890625, "logits/rejected": -2.875, "logps/chosen": -278.0, "logps/rejected": -412.0, "loss": 0.0583, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.984375, "rewards/chosen": -0.8828125, "rewards/margins": 6.96875, "rewards/rejected": -7.84375, "step": 50, "total_memory_available (GB)": 94.62 }, { "epoch": 0.31, "grad_norm": 0.8771920800209045, "learning_rate": 0.0003, "logits/chosen": -2.828125, "logits/rejected": -2.828125, "logps/chosen": -312.0, "logps/rejected": -474.0, "loss": 0.0469, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.984375, "rewards/chosen": -2.609375, "rewards/margins": 11.5, "rewards/rejected": -14.125, "step": 60, "total_memory_available (GB)": 94.62 }, { "epoch": 0.36, "grad_norm": 0.42379140853881836, "learning_rate": 0.00035, "logits/chosen": -2.84375, "logits/rejected": -2.84375, "logps/chosen": -280.0, "logps/rejected": -436.0, "loss": 0.0285, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.9921875, "rewards/chosen": -0.9609375, "rewards/margins": 9.9375, "rewards/rejected": -10.9375, "step": 70, "total_memory_available (GB)": 94.62 }, { "epoch": 0.41, "grad_norm": 1.5195761919021606, "learning_rate": 0.0004, "logits/chosen": -2.78125, "logits/rejected": -2.765625, "logps/chosen": -292.0, "logps/rejected": -450.0, "loss": 0.0266, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.98828125, "rewards/chosen": -1.4453125, "rewards/margins": 10.75, "rewards/rejected": -12.1875, "step": 80, "total_memory_available (GB)": 94.62 }, { "epoch": 0.46, "grad_norm": 2.770358085632324, "learning_rate": 0.00045000000000000004, "logits/chosen": -2.921875, "logits/rejected": -2.9375, "logps/chosen": -276.0, "logps/rejected": -478.0, "loss": 0.0486, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.984375, "rewards/chosen": -1.5703125, "rewards/margins": 13.375, "rewards/rejected": -15.0, "step": 90, "total_memory_available (GB)": 94.62 }, { "epoch": 0.51, "grad_norm": 0.1556715965270996, "learning_rate": 0.0005, "logits/chosen": -2.75, "logits/rejected": -2.71875, "logps/chosen": -284.0, "logps/rejected": -482.0, "loss": 0.053, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.9765625, "rewards/chosen": -1.3125, "rewards/margins": 13.0, "rewards/rejected": -14.375, "step": 100, "total_memory_available (GB)": 94.62 }, { "epoch": 0.57, "grad_norm": 0.40476056933403015, "learning_rate": 0.0004998477067547739, "logits/chosen": -2.875, "logits/rejected": -2.875, "logps/chosen": -296.0, "logps/rejected": -444.0, "loss": 0.0384, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.98828125, "rewards/chosen": -1.2109375, "rewards/margins": 10.5, "rewards/rejected": -11.75, "step": 110, "total_memory_available (GB)": 94.62 }, { "epoch": 0.62, "grad_norm": 0.8060222864151001, "learning_rate": 0.0004993910125649561, "logits/chosen": -3.234375, "logits/rejected": -3.234375, "logps/chosen": -282.0, "logps/rejected": -456.0, "loss": 0.0183, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.9921875, "rewards/chosen": -0.76171875, "rewards/margins": 12.4375, "rewards/rejected": -13.1875, "step": 120, "total_memory_available (GB)": 94.62 }, { "epoch": 0.67, "grad_norm": 0.5584326982498169, "learning_rate": 0.0004986304738420684, "logits/chosen": -3.140625, "logits/rejected": -3.125, "logps/chosen": -296.0, "logps/rejected": -496.0, "loss": 0.0294, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.984375, "rewards/chosen": -1.875, "rewards/margins": 14.8125, "rewards/rejected": -16.75, "step": 130, "total_memory_available (GB)": 94.62 }, { "epoch": 0.72, "grad_norm": 1.2188811302185059, "learning_rate": 0.0004975670171853926, "logits/chosen": -3.21875, "logits/rejected": -3.21875, "logps/chosen": -300.0, "logps/rejected": -478.0, "loss": 0.0312, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.98828125, "rewards/chosen": -1.3515625, "rewards/margins": 13.0, "rewards/rejected": -14.375, "step": 140, "total_memory_available (GB)": 94.62 }, { "epoch": 0.77, "grad_norm": 1.9386507272720337, "learning_rate": 0.000496201938253052, "logits/chosen": -3.1875, "logits/rejected": -3.140625, "logps/chosen": -288.0, "logps/rejected": -488.0, "loss": 0.0504, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.984375, "rewards/chosen": -1.6171875, "rewards/margins": 13.25, "rewards/rejected": -14.8125, "step": 150, "total_memory_available (GB)": 94.62 }, { "epoch": 0.82, "grad_norm": 0.11973602324724197, "learning_rate": 0.0004945369001834514, "logits/chosen": -2.9375, "logits/rejected": -2.84375, "logps/chosen": -298.0, "logps/rejected": -486.0, "loss": 0.0395, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.984375, "rewards/chosen": -1.9296875, "rewards/margins": 14.5625, "rewards/rejected": -16.5, "step": 160, "total_memory_available (GB)": 94.62 }, { "epoch": 0.87, "grad_norm": 0.0885164737701416, "learning_rate": 0.0004925739315689991, "logits/chosen": -2.828125, "logits/rejected": -2.78125, "logps/chosen": -292.0, "logps/rejected": -490.0, "loss": 0.0511, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.984375, "rewards/chosen": -1.890625, "rewards/margins": 14.25, "rewards/rejected": -16.125, "step": 170, "total_memory_available (GB)": 94.62 }, { "epoch": 0.92, "grad_norm": 5.371945381164551, "learning_rate": 0.0004903154239845797, "logits/chosen": -2.71875, "logits/rejected": -2.671875, "logps/chosen": -304.0, "logps/rejected": -474.0, "loss": 0.028, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.98828125, "rewards/chosen": -2.03125, "rewards/margins": 12.75, "rewards/rejected": -14.75, "step": 180, "total_memory_available (GB)": 94.62 }, { "epoch": 0.98, "grad_norm": 1.7487589120864868, "learning_rate": 0.0004877641290737884, "logits/chosen": -2.90625, "logits/rejected": -2.875, "logps/chosen": -288.0, "logps/rejected": -484.0, "loss": 0.053, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.98046875, "rewards/chosen": -1.71875, "rewards/margins": 13.3125, "rewards/rejected": -15.0, "step": 190, "total_memory_available (GB)": 94.62 }, { "epoch": 1.03, "grad_norm": 0.1586950570344925, "learning_rate": 0.0004849231551964771, "logits/chosen": -3.0, "logits/rejected": -2.96875, "logps/chosen": -296.0, "logps/rejected": -516.0, "loss": 0.0173, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.9921875, "rewards/chosen": -2.375, "rewards/margins": 16.75, "rewards/rejected": -19.125, "step": 200, "total_memory_available (GB)": 94.62 }, { "epoch": 1.08, "grad_norm": 0.385784387588501, "learning_rate": 0.00048179596364169685, "logits/chosen": -3.015625, "logits/rejected": -3.0, "logps/chosen": -312.0, "logps/rejected": -528.0, "loss": 0.0372, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.98828125, "rewards/chosen": -2.28125, "rewards/margins": 17.25, "rewards/rejected": -19.625, "step": 210, "total_memory_available (GB)": 94.62 }, { "epoch": 1.13, "grad_norm": 0.10692762583494186, "learning_rate": 0.0004783863644106502, "logits/chosen": -3.125, "logits/rejected": -3.09375, "logps/chosen": -320.0, "logps/rejected": -572.0, "loss": 0.012, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.9921875, "rewards/chosen": -3.65625, "rewards/margins": 20.25, "rewards/rejected": -23.875, "step": 220, "total_memory_available (GB)": 94.62 }, { "epoch": 1.18, "grad_norm": 1.407893419265747, "learning_rate": 0.00047469851157479177, "logits/chosen": -3.171875, "logits/rejected": -3.109375, "logps/chosen": -308.0, "logps/rejected": -568.0, "loss": 0.008, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.99609375, "rewards/chosen": -3.234375, "rewards/margins": 20.625, "rewards/rejected": -23.875, "step": 230, "total_memory_available (GB)": 94.62 }, { "epoch": 1.23, "grad_norm": 0.15119591355323792, "learning_rate": 0.00047073689821473173, "logits/chosen": -3.203125, "logits/rejected": -3.1875, "logps/chosen": -290.0, "logps/rejected": -588.0, "loss": 0.0236, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.99609375, "rewards/chosen": -3.328125, "rewards/margins": 22.125, "rewards/rejected": -25.375, "step": 240, "total_memory_available (GB)": 94.62 }, { "epoch": 1.28, "grad_norm": 1.0850694179534912, "learning_rate": 0.00046650635094610973, "logits/chosen": -3.25, "logits/rejected": -3.265625, "logps/chosen": -286.0, "logps/rejected": -484.0, "loss": 0.0246, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.98828125, "rewards/chosen": -1.921875, "rewards/margins": 13.75, "rewards/rejected": -15.6875, "step": 250, "total_memory_available (GB)": 94.62 }, { "epoch": 1.34, "grad_norm": 1.0764814615249634, "learning_rate": 0.00046201202403910646, "logits/chosen": -3.234375, "logits/rejected": -3.21875, "logps/chosen": -298.0, "logps/rejected": -498.0, "loss": 0.0307, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.9921875, "rewards/chosen": -2.421875, "rewards/margins": 15.5, "rewards/rejected": -17.875, "step": 260, "total_memory_available (GB)": 94.62 }, { "epoch": 1.39, "grad_norm": 0.04254325106739998, "learning_rate": 0.00045725939313876043, "logits/chosen": -3.078125, "logits/rejected": -3.0625, "logps/chosen": -304.0, "logps/rejected": -548.0, "loss": 0.0161, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.9921875, "rewards/chosen": -2.703125, "rewards/margins": 18.75, "rewards/rejected": -21.5, "step": 270, "total_memory_available (GB)": 94.62 }, { "epoch": 1.44, "grad_norm": 0.2640644311904907, "learning_rate": 0.0004522542485937369, "logits/chosen": -3.078125, "logits/rejected": -3.078125, "logps/chosen": -294.0, "logps/rejected": -552.0, "loss": 0.0116, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.99609375, "rewards/chosen": -2.203125, "rewards/margins": 19.5, "rewards/rejected": -21.75, "step": 280, "total_memory_available (GB)": 94.62 }, { "epoch": 1.49, "grad_norm": 0.6378334164619446, "learning_rate": 0.00044700268840168044, "logits/chosen": -3.046875, "logits/rejected": -3.046875, "logps/chosen": -296.0, "logps/rejected": -600.0, "loss": 0.0159, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.53125, "rewards/margins": 24.0, "rewards/rejected": -27.625, "step": 290, "total_memory_available (GB)": 94.62 }, { "epoch": 1.54, "grad_norm": 0.9890326261520386, "learning_rate": 0.0004415111107797445, "logits/chosen": -3.078125, "logits/rejected": -3.0625, "logps/chosen": -320.0, "logps/rejected": -588.0, "loss": 0.0083, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.640625, "rewards/margins": 22.375, "rewards/rejected": -26.0, "step": 300, "total_memory_available (GB)": 94.62 }, { "epoch": 1.59, "grad_norm": 2.6053104400634766, "learning_rate": 0.00043578620636934855, "logits/chosen": -3.109375, "logits/rejected": -3.09375, "logps/chosen": -338.0, "logps/rejected": -628.0, "loss": 0.0038, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -5.0, "rewards/margins": 24.75, "rewards/rejected": -29.75, "step": 310, "total_memory_available (GB)": 94.62 }, { "epoch": 1.64, "grad_norm": 1.0414825677871704, "learning_rate": 0.0004298349500846628, "logits/chosen": -3.25, "logits/rejected": -3.21875, "logps/chosen": -302.0, "logps/rejected": -540.0, "loss": 0.0174, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.9921875, "rewards/chosen": -3.015625, "rewards/margins": 18.25, "rewards/rejected": -21.25, "step": 320, "total_memory_available (GB)": 94.62 }, { "epoch": 1.7, "grad_norm": 1.3290166854858398, "learning_rate": 0.00042366459261474935, "logits/chosen": -3.171875, "logits/rejected": -3.140625, "logps/chosen": -334.0, "logps/rejected": -568.0, "loss": 0.0209, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.98828125, "rewards/chosen": -4.1875, "rewards/margins": 19.875, "rewards/rejected": -24.125, "step": 330, "total_memory_available (GB)": 94.62 }, { "epoch": 1.75, "grad_norm": 3.274353504180908, "learning_rate": 0.0004172826515897146, "logits/chosen": -3.203125, "logits/rejected": -3.171875, "logps/chosen": -316.0, "logps/rejected": -604.0, "loss": 0.0095, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -2.96875, "rewards/margins": 23.625, "rewards/rejected": -26.625, "step": 340, "total_memory_available (GB)": 94.62 }, { "epoch": 1.8, "grad_norm": 1.1814924478530884, "learning_rate": 0.0004106969024216348, "logits/chosen": -3.125, "logits/rejected": -3.09375, "logps/chosen": -294.0, "logps/rejected": -612.0, "loss": 0.0129, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.99609375, "rewards/chosen": -3.0625, "rewards/margins": 25.625, "rewards/rejected": -28.625, "step": 350, "total_memory_available (GB)": 94.62 }, { "epoch": 1.85, "grad_norm": 0.10757168382406235, "learning_rate": 0.00040391536883141455, "logits/chosen": -3.046875, "logits/rejected": -3.046875, "logps/chosen": -306.0, "logps/rejected": -544.0, "loss": 0.0201, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.99609375, "rewards/chosen": -3.078125, "rewards/margins": 17.875, "rewards/rejected": -21.0, "step": 360, "total_memory_available (GB)": 94.62 }, { "epoch": 1.9, "grad_norm": 0.026544878259301186, "learning_rate": 0.0003969463130731183, "logits/chosen": -2.953125, "logits/rejected": -2.921875, "logps/chosen": -304.0, "logps/rejected": -548.0, "loss": 0.0068, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.99609375, "rewards/chosen": -3.171875, "rewards/margins": 18.625, "rewards/rejected": -21.75, "step": 370, "total_memory_available (GB)": 94.62 }, { "epoch": 1.95, "grad_norm": 0.012133469805121422, "learning_rate": 0.0003897982258676867, "logits/chosen": -2.875, "logits/rejected": -2.828125, "logps/chosen": -316.0, "logps/rejected": -540.0, "loss": 0.0172, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.99609375, "rewards/chosen": -3.203125, "rewards/margins": 18.0, "rewards/rejected": -21.25, "step": 380, "total_memory_available (GB)": 94.62 }, { "epoch": 2.0, "grad_norm": 0.05422056466341019, "learning_rate": 0.00038247981605830125, "logits/chosen": -2.9375, "logits/rejected": -2.859375, "logps/chosen": -306.0, "logps/rejected": -548.0, "loss": 0.0067, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.99609375, "rewards/chosen": -3.25, "rewards/margins": 19.875, "rewards/rejected": -23.125, "step": 390, "total_memory_available (GB)": 94.62 }, { "epoch": 2.06, "grad_norm": 0.5953335762023926, "learning_rate": 0.000375, "logits/chosen": -3.0625, "logits/rejected": -3.015625, "logps/chosen": -300.0, "logps/rejected": -588.0, "loss": 0.0196, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.99609375, "rewards/chosen": -3.6875, "rewards/margins": 21.625, "rewards/rejected": -25.25, "step": 400, "total_memory_available (GB)": 94.62 }, { "epoch": 2.11, "grad_norm": 4.781039714813232, "learning_rate": 0.0003673678906964727, "logits/chosen": -3.015625, "logits/rejected": -3.0, "logps/chosen": -300.0, "logps/rejected": -564.0, "loss": 0.0067, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.99609375, "rewards/chosen": -3.03125, "rewards/margins": 19.25, "rewards/rejected": -22.375, "step": 410, "total_memory_available (GB)": 94.62 }, { "epoch": 2.16, "grad_norm": 0.0018408839823678136, "learning_rate": 0.00035959278669726934, "logits/chosen": -2.953125, "logits/rejected": -2.921875, "logps/chosen": -300.0, "logps/rejected": -600.0, "loss": 0.0012, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.5625, "rewards/margins": 22.75, "rewards/rejected": -26.25, "step": 420, "total_memory_available (GB)": 94.62 }, { "epoch": 2.21, "grad_norm": 0.0082088652998209, "learning_rate": 0.0003516841607689501, "logits/chosen": -3.0625, "logits/rejected": -3.015625, "logps/chosen": -308.0, "logps/rejected": -560.0, "loss": 0.0058, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.99609375, "rewards/chosen": -2.828125, "rewards/margins": 21.0, "rewards/rejected": -23.875, "step": 430, "total_memory_available (GB)": 94.62 }, { "epoch": 2.26, "grad_norm": 0.0047104801051318645, "learning_rate": 0.00034365164835397803, "logits/chosen": -3.15625, "logits/rejected": -3.140625, "logps/chosen": -290.0, "logps/rejected": -536.0, "loss": 0.0025, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -2.171875, "rewards/margins": 18.375, "rewards/rejected": -20.5, "step": 440, "total_memory_available (GB)": 94.62 }, { "epoch": 2.31, "grad_norm": 0.10709760338068008, "learning_rate": 0.0003355050358314172, "logits/chosen": -3.171875, "logits/rejected": -3.140625, "logps/chosen": -310.0, "logps/rejected": -560.0, "loss": 0.0015, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -2.75, "rewards/margins": 21.25, "rewards/rejected": -24.0, "step": 450, "total_memory_available (GB)": 94.62 }, { "epoch": 2.36, "grad_norm": 0.016811428591609, "learning_rate": 0.00032725424859373687, "logits/chosen": -3.125, "logits/rejected": -3.078125, "logps/chosen": -304.0, "logps/rejected": -596.0, "loss": 0.0003, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.40625, "rewards/margins": 22.875, "rewards/rejected": -26.25, "step": 460, "total_memory_available (GB)": 94.62 }, { "epoch": 2.41, "grad_norm": 0.05604538321495056, "learning_rate": 0.0003189093389542498, "logits/chosen": -3.078125, "logits/rejected": -3.03125, "logps/chosen": -318.0, "logps/rejected": -612.0, "loss": 0.0003, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.828125, "rewards/margins": 24.25, "rewards/rejected": -28.0, "step": 470, "total_memory_available (GB)": 94.62 }, { "epoch": 2.47, "grad_norm": 0.10262610763311386, "learning_rate": 0.0003104804738999169, "logits/chosen": -3.0, "logits/rejected": -2.9375, "logps/chosen": -304.0, "logps/rejected": -608.0, "loss": 0.0063, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.99609375, "rewards/chosen": -4.0, "rewards/margins": 23.125, "rewards/rejected": -27.125, "step": 480, "total_memory_available (GB)": 94.62 }, { "epoch": 2.52, "grad_norm": 2.782456874847412, "learning_rate": 0.0003019779227044398, "logits/chosen": -2.859375, "logits/rejected": -2.75, "logps/chosen": -322.0, "logps/rejected": -544.0, "loss": 0.0095, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 0.99609375, "rewards/chosen": -3.203125, "rewards/margins": 18.875, "rewards/rejected": -22.125, "step": 490, "total_memory_available (GB)": 94.62 }, { "epoch": 2.57, "grad_norm": 0.1368335485458374, "learning_rate": 0.00029341204441673266, "logits/chosen": -3.03125, "logits/rejected": -2.9375, "logps/chosen": -296.0, "logps/rejected": -556.0, "loss": 0.0029, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -2.359375, "rewards/margins": 19.875, "rewards/rejected": -22.125, "step": 500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.62, "grad_norm": 0.009250438772141933, "learning_rate": 0.00028479327524001636, "logits/chosen": -3.171875, "logits/rejected": -3.109375, "logps/chosen": -318.0, "logps/rejected": -552.0, "loss": 0.0044, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -2.796875, "rewards/margins": 19.75, "rewards/rejected": -22.625, "step": 510, "total_memory_available (GB)": 94.62 }, { "epoch": 2.67, "grad_norm": 0.1493176519870758, "learning_rate": 0.0002761321158169134, "logits/chosen": -3.21875, "logits/rejected": -3.1875, "logps/chosen": -298.0, "logps/rejected": -524.0, "loss": 0.0048, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -2.46875, "rewards/margins": 17.5, "rewards/rejected": -20.0, "step": 520, "total_memory_available (GB)": 94.62 }, { "epoch": 2.72, "grad_norm": 0.06779959052801132, "learning_rate": 0.0002674391184360313, "logits/chosen": -3.234375, "logits/rejected": -3.21875, "logps/chosen": -298.0, "logps/rejected": -528.0, "loss": 0.0038, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -2.4375, "rewards/margins": 17.625, "rewards/rejected": -20.125, "step": 530, "total_memory_available (GB)": 94.62 }, { "epoch": 2.77, "grad_norm": 0.3520822525024414, "learning_rate": 0.0002587248741756253, "logits/chosen": -3.25, "logits/rejected": -3.1875, "logps/chosen": -302.0, "logps/rejected": -536.0, "loss": 0.0021, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -2.734375, "rewards/margins": 19.5, "rewards/rejected": -22.25, "step": 540, "total_memory_available (GB)": 94.62 }, { "epoch": 2.83, "grad_norm": 0.020363105461001396, "learning_rate": 0.00025, "logits/chosen": -3.15625, "logits/rejected": -3.125, "logps/chosen": -310.0, "logps/rejected": -572.0, "loss": 0.0016, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.40625, "rewards/margins": 21.625, "rewards/rejected": -25.0, "step": 550, "total_memory_available (GB)": 94.62 }, { "epoch": 2.88, "grad_norm": 0.01094722468405962, "learning_rate": 0.00024127512582437484, "logits/chosen": -3.109375, "logits/rejected": -3.0625, "logps/chosen": -316.0, "logps/rejected": -640.0, "loss": 0.0016, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.90625, "rewards/margins": 26.125, "rewards/rejected": -30.0, "step": 560, "total_memory_available (GB)": 94.62 }, { "epoch": 2.93, "grad_norm": 0.10121448338031769, "learning_rate": 0.00023256088156396867, "logits/chosen": -3.078125, "logits/rejected": -3.03125, "logps/chosen": -332.0, "logps/rejected": -628.0, "loss": 0.0016, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.875, "rewards/margins": 26.375, "rewards/rejected": -30.25, "step": 570, "total_memory_available (GB)": 94.62 }, { "epoch": 2.98, "grad_norm": 0.01434832252562046, "learning_rate": 0.00022386788418308668, "logits/chosen": -3.0, "logits/rejected": -2.90625, "logps/chosen": -328.0, "logps/rejected": -640.0, "loss": 0.0004, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -4.375, "rewards/margins": 26.75, "rewards/rejected": -31.125, "step": 580, "total_memory_available (GB)": 94.62 }, { "epoch": 3.03, "grad_norm": 0.0004497322079259902, "learning_rate": 0.0002152067247599837, "logits/chosen": -2.953125, "logits/rejected": -2.875, "logps/chosen": -304.0, "logps/rejected": -656.0, "loss": 0.0005, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -4.375, "rewards/margins": 28.25, "rewards/rejected": -32.75, "step": 590, "total_memory_available (GB)": 94.62 }, { "epoch": 3.08, "grad_norm": 0.010737881064414978, "learning_rate": 0.00020658795558326743, "logits/chosen": -2.984375, "logits/rejected": -2.859375, "logps/chosen": -330.0, "logps/rejected": -676.0, "loss": 0.0008, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -5.03125, "rewards/margins": 29.375, "rewards/rejected": -34.5, "step": 600, "total_memory_available (GB)": 94.62 }, { "epoch": 3.13, "grad_norm": 0.3118731379508972, "learning_rate": 0.0001980220772955602, "logits/chosen": -3.015625, "logits/rejected": -2.9375, "logps/chosen": -310.0, "logps/rejected": -648.0, "loss": 0.0011, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.828125, "rewards/margins": 27.75, "rewards/rejected": -31.5, "step": 610, "total_memory_available (GB)": 94.62 }, { "epoch": 3.19, "grad_norm": 0.04152989387512207, "learning_rate": 0.0001895195261000831, "logits/chosen": -3.09375, "logits/rejected": -3.03125, "logps/chosen": -302.0, "logps/rejected": -604.0, "loss": 0.0002, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -2.75, "rewards/margins": 24.75, "rewards/rejected": -27.5, "step": 620, "total_memory_available (GB)": 94.62 }, { "epoch": 3.24, "grad_norm": 0.00012200848141219467, "learning_rate": 0.00018109066104575022, "logits/chosen": -3.125, "logits/rejected": -3.0625, "logps/chosen": -324.0, "logps/rejected": -584.0, "loss": 0.0013, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -2.875, "rewards/margins": 23.375, "rewards/rejected": -26.25, "step": 630, "total_memory_available (GB)": 94.62 }, { "epoch": 3.29, "grad_norm": 0.037591688334941864, "learning_rate": 0.00017274575140626317, "logits/chosen": -3.09375, "logits/rejected": -3.046875, "logps/chosen": -308.0, "logps/rejected": -608.0, "loss": 0.0001, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -2.9375, "rewards/margins": 24.375, "rewards/rejected": -27.25, "step": 640, "total_memory_available (GB)": 94.62 }, { "epoch": 3.34, "grad_norm": 0.0018848556792363524, "learning_rate": 0.00016449496416858284, "logits/chosen": -3.109375, "logits/rejected": -3.03125, "logps/chosen": -308.0, "logps/rejected": -600.0, "loss": 0.0002, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.171875, "rewards/margins": 24.25, "rewards/rejected": -27.5, "step": 650, "total_memory_available (GB)": 94.62 }, { "epoch": 3.39, "grad_norm": 0.0032229481730610132, "learning_rate": 0.00015634835164602198, "logits/chosen": -3.078125, "logits/rejected": -3.046875, "logps/chosen": -312.0, "logps/rejected": -612.0, "loss": 0.0002, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.203125, "rewards/margins": 24.75, "rewards/rejected": -28.0, "step": 660, "total_memory_available (GB)": 94.62 }, { "epoch": 3.44, "grad_norm": 0.0005074635264463723, "learning_rate": 0.00014831583923105, "logits/chosen": -3.09375, "logits/rejected": -3.015625, "logps/chosen": -316.0, "logps/rejected": -596.0, "loss": 0.0, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.203125, "rewards/margins": 24.625, "rewards/rejected": -27.875, "step": 670, "total_memory_available (GB)": 94.62 }, { "epoch": 3.49, "grad_norm": 0.007239128462970257, "learning_rate": 0.00014040721330273062, "logits/chosen": -3.109375, "logits/rejected": -3.046875, "logps/chosen": -310.0, "logps/rejected": -604.0, "loss": 0.0001, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.265625, "rewards/margins": 24.75, "rewards/rejected": -28.0, "step": 680, "total_memory_available (GB)": 94.62 }, { "epoch": 3.55, "grad_norm": 0.0034740234259516, "learning_rate": 0.00013263210930352737, "logits/chosen": -3.078125, "logits/rejected": -3.03125, "logps/chosen": -300.0, "logps/rejected": -624.0, "loss": 0.0002, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.15625, "rewards/margins": 25.375, "rewards/rejected": -28.5, "step": 690, "total_memory_available (GB)": 94.62 }, { "epoch": 3.6, "grad_norm": 0.0001397097803419456, "learning_rate": 0.00012500000000000006, "logits/chosen": -3.078125, "logits/rejected": -3.046875, "logps/chosen": -304.0, "logps/rejected": -620.0, "loss": 0.0001, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.1875, "rewards/margins": 24.875, "rewards/rejected": -28.125, "step": 700, "total_memory_available (GB)": 94.62 }, { "epoch": 3.65, "grad_norm": 0.0016049661207944155, "learning_rate": 0.0001175201839416988, "logits/chosen": -3.09375, "logits/rejected": -3.03125, "logps/chosen": -324.0, "logps/rejected": -608.0, "loss": 0.0001, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.28125, "rewards/margins": 24.75, "rewards/rejected": -28.0, "step": 710, "total_memory_available (GB)": 94.62 }, { "epoch": 3.7, "grad_norm": 0.006374781485646963, "learning_rate": 0.00011020177413231333, "logits/chosen": -3.109375, "logits/rejected": -3.03125, "logps/chosen": -306.0, "logps/rejected": -608.0, "loss": 0.0002, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.28125, "rewards/margins": 25.125, "rewards/rejected": -28.375, "step": 720, "total_memory_available (GB)": 94.62 }, { "epoch": 3.75, "grad_norm": 0.0068184914998710155, "learning_rate": 0.00010305368692688174, "logits/chosen": -3.09375, "logits/rejected": -3.046875, "logps/chosen": -296.0, "logps/rejected": -616.0, "loss": 0.0004, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.15625, "rewards/margins": 25.125, "rewards/rejected": -28.25, "step": 730, "total_memory_available (GB)": 94.62 }, { "epoch": 3.8, "grad_norm": 0.008304049260914326, "learning_rate": 9.608463116858542e-05, "logits/chosen": -3.09375, "logits/rejected": -3.046875, "logps/chosen": -300.0, "logps/rejected": -592.0, "loss": 0.0002, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.046875, "rewards/margins": 24.0, "rewards/rejected": -27.0, "step": 740, "total_memory_available (GB)": 94.62 }, { "epoch": 3.85, "grad_norm": 0.03573792800307274, "learning_rate": 8.930309757836516e-05, "logits/chosen": -3.09375, "logits/rejected": -3.03125, "logps/chosen": -296.0, "logps/rejected": -604.0, "loss": 0.0001, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.015625, "rewards/margins": 24.75, "rewards/rejected": -27.75, "step": 750, "total_memory_available (GB)": 94.62 }, { "epoch": 3.9, "grad_norm": 0.0008675418794155121, "learning_rate": 8.271734841028553e-05, "logits/chosen": -3.09375, "logits/rejected": -3.03125, "logps/chosen": -296.0, "logps/rejected": -612.0, "loss": 0.0001, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -2.984375, "rewards/margins": 25.125, "rewards/rejected": -28.0, "step": 760, "total_memory_available (GB)": 94.62 }, { "epoch": 3.96, "grad_norm": 4.6364646550500765e-05, "learning_rate": 7.633540738525066e-05, "logits/chosen": -3.109375, "logits/rejected": -3.046875, "logps/chosen": -320.0, "logps/rejected": -616.0, "loss": 0.0001, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.25, "rewards/margins": 24.875, "rewards/rejected": -28.125, "step": 770, "total_memory_available (GB)": 94.62 }, { "epoch": 4.01, "grad_norm": 0.0028735387604683638, "learning_rate": 7.016504991533726e-05, "logits/chosen": -3.09375, "logits/rejected": -3.046875, "logps/chosen": -304.0, "logps/rejected": -616.0, "loss": 0.0, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -2.921875, "rewards/margins": 25.25, "rewards/rejected": -28.125, "step": 780, "total_memory_available (GB)": 94.62 }, { "epoch": 4.06, "grad_norm": 0.00028936524176970124, "learning_rate": 6.421379363065141e-05, "logits/chosen": -3.09375, "logits/rejected": -3.046875, "logps/chosen": -302.0, "logps/rejected": -596.0, "loss": 0.0001, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -2.875, "rewards/margins": 25.0, "rewards/rejected": -27.875, "step": 790, "total_memory_available (GB)": 94.62 }, { "epoch": 4.11, "grad_norm": 4.6098506572889164e-05, "learning_rate": 5.848888922025553e-05, "logits/chosen": -3.09375, "logits/rejected": -3.046875, "logps/chosen": -308.0, "logps/rejected": -616.0, "loss": 0.0, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.125, "rewards/margins": 25.5, "rewards/rejected": -28.625, "step": 800, "total_memory_available (GB)": 94.62 }, { "epoch": 4.16, "grad_norm": 0.0014158826088532805, "learning_rate": 5.299731159831953e-05, "logits/chosen": -3.09375, "logits/rejected": -3.046875, "logps/chosen": -298.0, "logps/rejected": -616.0, "loss": 0.0, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.03125, "rewards/margins": 25.5, "rewards/rejected": -28.5, "step": 810, "total_memory_available (GB)": 94.62 }, { "epoch": 4.21, "grad_norm": 0.0009728537406772375, "learning_rate": 4.7745751406263163e-05, "logits/chosen": -3.078125, "logits/rejected": -3.03125, "logps/chosen": -300.0, "logps/rejected": -620.0, "loss": 0.0001, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.125, "rewards/margins": 25.875, "rewards/rejected": -29.0, "step": 820, "total_memory_available (GB)": 94.62 }, { "epoch": 4.26, "grad_norm": 0.0001809960085665807, "learning_rate": 4.274060686123959e-05, "logits/chosen": -3.109375, "logits/rejected": -3.03125, "logps/chosen": -300.0, "logps/rejected": -616.0, "loss": 0.0, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -2.84375, "rewards/margins": 25.375, "rewards/rejected": -28.25, "step": 830, "total_memory_available (GB)": 94.62 }, { "epoch": 4.32, "grad_norm": 0.0019057235913351178, "learning_rate": 3.798797596089351e-05, "logits/chosen": -3.09375, "logits/rejected": -3.046875, "logps/chosen": -310.0, "logps/rejected": -620.0, "loss": 0.0, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.09375, "rewards/margins": 25.5, "rewards/rejected": -28.5, "step": 840, "total_memory_available (GB)": 94.62 }, { "epoch": 4.37, "grad_norm": 0.007396820466965437, "learning_rate": 3.3493649053890325e-05, "logits/chosen": -3.078125, "logits/rejected": -3.015625, "logps/chosen": -310.0, "logps/rejected": -600.0, "loss": 0.0, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.203125, "rewards/margins": 24.625, "rewards/rejected": -27.75, "step": 850, "total_memory_available (GB)": 94.62 }, { "epoch": 4.42, "grad_norm": 0.007374617271125317, "learning_rate": 2.9263101785268254e-05, "logits/chosen": -3.078125, "logits/rejected": -3.03125, "logps/chosen": -300.0, "logps/rejected": -616.0, "loss": 0.0001, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.03125, "rewards/margins": 25.5, "rewards/rejected": -28.5, "step": 860, "total_memory_available (GB)": 94.62 }, { "epoch": 4.47, "grad_norm": 0.004346287343651056, "learning_rate": 2.5301488425208295e-05, "logits/chosen": -3.09375, "logits/rejected": -3.03125, "logps/chosen": -310.0, "logps/rejected": -624.0, "loss": 0.0, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.359375, "rewards/margins": 25.5, "rewards/rejected": -28.75, "step": 870, "total_memory_available (GB)": 94.62 }, { "epoch": 4.52, "grad_norm": 0.0016928453696891665, "learning_rate": 2.1613635589349755e-05, "logits/chosen": -3.078125, "logits/rejected": -3.015625, "logps/chosen": -318.0, "logps/rejected": -620.0, "loss": 0.0, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.15625, "rewards/margins": 25.625, "rewards/rejected": -28.75, "step": 880, "total_memory_available (GB)": 94.62 }, { "epoch": 4.57, "grad_norm": 0.0004315236583352089, "learning_rate": 1.8204036358303172e-05, "logits/chosen": -3.09375, "logits/rejected": -3.046875, "logps/chosen": -302.0, "logps/rejected": -608.0, "loss": 0.0, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.171875, "rewards/margins": 25.125, "rewards/rejected": -28.375, "step": 890, "total_memory_available (GB)": 94.62 }, { "epoch": 4.62, "grad_norm": 0.0036792384926229715, "learning_rate": 1.5076844803522921e-05, "logits/chosen": -3.09375, "logits/rejected": -3.046875, "logps/chosen": -322.0, "logps/rejected": -624.0, "loss": 0.0, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.28125, "rewards/margins": 25.375, "rewards/rejected": -28.75, "step": 900, "total_memory_available (GB)": 94.62 }, { "epoch": 4.68, "grad_norm": 0.0032357031013816595, "learning_rate": 1.2235870926211617e-05, "logits/chosen": -3.09375, "logits/rejected": -3.03125, "logps/chosen": -308.0, "logps/rejected": -608.0, "loss": 0.0001, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.0, "rewards/margins": 25.125, "rewards/rejected": -28.125, "step": 910, "total_memory_available (GB)": 94.62 }, { "epoch": 4.73, "grad_norm": 0.00263414834626019, "learning_rate": 9.684576015420277e-06, "logits/chosen": -3.09375, "logits/rejected": -3.03125, "logps/chosen": -308.0, "logps/rejected": -608.0, "loss": 0.0001, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.265625, "rewards/margins": 25.375, "rewards/rejected": -28.625, "step": 920, "total_memory_available (GB)": 94.62 }, { "epoch": 4.78, "grad_norm": 0.0006815909291617572, "learning_rate": 7.426068431000882e-06, "logits/chosen": -3.09375, "logits/rejected": -3.03125, "logps/chosen": -300.0, "logps/rejected": -612.0, "loss": 0.0001, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.171875, "rewards/margins": 25.0, "rewards/rejected": -28.25, "step": 930, "total_memory_available (GB)": 94.62 }, { "epoch": 4.83, "grad_norm": 0.007029928732663393, "learning_rate": 5.463099816548578e-06, "logits/chosen": -3.109375, "logits/rejected": -3.046875, "logps/chosen": -314.0, "logps/rejected": -624.0, "loss": 0.0001, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.28125, "rewards/margins": 26.0, "rewards/rejected": -29.25, "step": 940, "total_memory_available (GB)": 94.62 }, { "epoch": 4.88, "grad_norm": 0.0016696957172825933, "learning_rate": 3.798061746947995e-06, "logits/chosen": -3.078125, "logits/rejected": -3.03125, "logps/chosen": -292.0, "logps/rejected": -616.0, "loss": 0.0, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.21875, "rewards/margins": 25.25, "rewards/rejected": -28.375, "step": 950, "total_memory_available (GB)": 94.62 }, { "epoch": 4.93, "grad_norm": 0.004225951619446278, "learning_rate": 2.4329828146074094e-06, "logits/chosen": -3.09375, "logits/rejected": -3.046875, "logps/chosen": -308.0, "logps/rejected": -624.0, "loss": 0.0, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.203125, "rewards/margins": 26.0, "rewards/rejected": -29.125, "step": 960, "total_memory_available (GB)": 94.62 }, { "epoch": 4.98, "grad_norm": 0.00014331191778182983, "learning_rate": 1.3695261579316775e-06, "logits/chosen": -3.09375, "logits/rejected": -3.015625, "logps/chosen": -312.0, "logps/rejected": -604.0, "loss": 0.0001, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.15625, "rewards/margins": 25.5, "rewards/rejected": -28.75, "step": 970, "total_memory_available (GB)": 94.62 }, { "epoch": 5.04, "grad_norm": 0.0036776724737137556, "learning_rate": 6.089874350439506e-07, "logits/chosen": -3.09375, "logits/rejected": -3.046875, "logps/chosen": -330.0, "logps/rejected": -616.0, "loss": 0.0001, "max_memory_allocated (GB)": 94.39, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.3125, "rewards/margins": 25.25, "rewards/rejected": -28.5, "step": 980, "total_memory_available (GB)": 94.62 }, { "epoch": 5.09, "grad_norm": 1.2631918252736796e-05, "learning_rate": 1.5229324522605948e-07, "logits/chosen": -3.09375, "logits/rejected": -3.046875, "logps/chosen": -300.0, "logps/rejected": -608.0, "loss": 0.0, "max_memory_allocated (GB)": 94.42, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.328125, "rewards/margins": 25.0, "rewards/rejected": -28.375, "step": 990, "total_memory_available (GB)": 94.62 }, { "epoch": 5.14, "grad_norm": 0.0053689004853367805, "learning_rate": 0.0, "logits/chosen": -3.09375, "logits/rejected": -3.0625, "logps/chosen": -304.0, "logps/rejected": -624.0, "loss": 0.0, "max_memory_allocated (GB)": 94.42, "memory_allocated (GB)": 51.7, "rewards/accuracies": 1.0, "rewards/chosen": -3.265625, "rewards/margins": 25.25, "rewards/rejected": -28.625, "step": 1000, "total_memory_available (GB)": 94.62 } ], "logging_steps": 10, "max_steps": 1000, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 10, "total_flos": 8.39920851615744e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }