{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9884169884169884, "eval_steps": 500, "global_step": 144, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06864006864006864, "grad_norm": 117.0, "learning_rate": 1.6666666666666667e-05, "log_odds_chosen": 0.04570715129375458, "log_odds_ratio": -0.7358157634735107, "logps/chosen": -0.9796704053878784, "logps/rejected": -1.0139487981796265, "loss": 48.3858, "nll_loss": 1.5120567083358765, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 5 }, { "epoch": 0.13728013728013727, "grad_norm": 43.0, "learning_rate": 3.3333333333333335e-05, "log_odds_chosen": 0.12318801879882812, "log_odds_ratio": -0.7144898176193237, "logps/chosen": -0.7773754000663757, "logps/rejected": -0.8364827036857605, "loss": 40.1791, "nll_loss": 1.2555955648422241, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 10 }, { "epoch": 0.2059202059202059, "grad_norm": 42.5, "learning_rate": 5e-05, "log_odds_chosen": 0.1416315734386444, "log_odds_ratio": -0.7184887528419495, "logps/chosen": -0.7499162554740906, "logps/rejected": -0.8260188102722168, "loss": 37.8788, "nll_loss": 1.183712363243103, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 15 }, { "epoch": 0.27456027456027454, "grad_norm": 36.0, "learning_rate": 4.981488838730066e-05, "log_odds_chosen": 0.1828576922416687, "log_odds_ratio": -0.7147494554519653, "logps/chosen": -0.7645934820175171, "logps/rejected": -0.8450287580490112, "loss": 36.3979, "nll_loss": 1.1374342441558838, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 20 }, { "epoch": 0.3432003432003432, "grad_norm": 40.5, "learning_rate": 4.926229485393513e-05, "log_odds_chosen": 0.1581808477640152, "log_odds_ratio": -0.7021191716194153, "logps/chosen": -0.7081555724143982, "logps/rejected": -0.7702702283859253, "loss": 35.2689, "nll_loss": 1.1021528244018555, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 25 }, { "epoch": 0.4118404118404118, "grad_norm": 31.875, "learning_rate": 4.83504027183137e-05, "log_odds_chosen": 0.09541453421115875, "log_odds_ratio": -0.7330835461616516, "logps/chosen": -0.7472664713859558, "logps/rejected": -0.7989091277122498, "loss": 34.0556, "nll_loss": 1.0642387866973877, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 30 }, { "epoch": 0.4804804804804805, "grad_norm": 31.5, "learning_rate": 4.709271612634298e-05, "log_odds_chosen": 0.09454293549060822, "log_odds_ratio": -0.7360396981239319, "logps/chosen": -0.7235242128372192, "logps/rejected": -0.7580782771110535, "loss": 34.7533, "nll_loss": 1.0860421657562256, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 35 }, { "epoch": 0.5491205491205491, "grad_norm": 36.5, "learning_rate": 4.550786006948777e-05, "log_odds_chosen": 0.18448153138160706, "log_odds_ratio": -0.7088391184806824, "logps/chosen": -0.6769216060638428, "logps/rejected": -0.7544242143630981, "loss": 33.5798, "nll_loss": 1.0493675470352173, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 40 }, { "epoch": 0.6177606177606177, "grad_norm": 30.375, "learning_rate": 4.361930456859455e-05, "log_odds_chosen": 0.2148957997560501, "log_odds_ratio": -0.7018598318099976, "logps/chosen": -0.6819745898246765, "logps/rejected": -0.7681012153625488, "loss": 32.7666, "nll_loss": 1.0239557027816772, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 45 }, { "epoch": 0.6864006864006864, "grad_norm": 29.375, "learning_rate": 4.14550171080187e-05, "log_odds_chosen": 0.1911373734474182, "log_odds_ratio": -0.7087734937667847, "logps/chosen": -0.699134349822998, "logps/rejected": -0.7818752527236938, "loss": 33.5611, "nll_loss": 1.0487844944000244, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 50 }, { "epoch": 0.7550407550407551, "grad_norm": 32.5, "learning_rate": 3.9047048467134014e-05, "log_odds_chosen": 0.1783633977174759, "log_odds_ratio": -0.7113261818885803, "logps/chosen": -0.7090396881103516, "logps/rejected": -0.7853409051895142, "loss": 33.1202, "nll_loss": 1.035004734992981, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 55 }, { "epoch": 0.8236808236808236, "grad_norm": 30.0, "learning_rate": 3.6431058082615964e-05, "log_odds_chosen": 0.18033447861671448, "log_odds_ratio": -0.7068595290184021, "logps/chosen": -0.7181381583213806, "logps/rejected": -0.7968712449073792, "loss": 33.0624, "nll_loss": 1.0332015752792358, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 60 }, { "epoch": 0.8923208923208923, "grad_norm": 33.75, "learning_rate": 3.3645785970375284e-05, "log_odds_chosen": 0.15356490015983582, "log_odds_ratio": -0.7268341779708862, "logps/chosen": -0.6993820071220398, "logps/rejected": -0.779438853263855, "loss": 33.6264, "nll_loss": 1.050825834274292, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 65 }, { "epoch": 0.960960960960961, "grad_norm": 29.0, "learning_rate": 3.0732479027412256e-05, "log_odds_chosen": 0.181875079870224, "log_odds_ratio": -0.7036765813827515, "logps/chosen": -0.6884555220603943, "logps/rejected": -0.763231635093689, "loss": 33.2219, "nll_loss": 1.0381855964660645, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 70 }, { "epoch": 1.0411840411840412, "grad_norm": 24.5, "learning_rate": 2.7734280209446865e-05, "log_odds_chosen": 0.3063170909881592, "log_odds_ratio": -0.6562883853912354, "logps/chosen": -0.6133887767791748, "logps/rejected": -0.7328479290008545, "loss": 33.71, "nll_loss": 0.9260991811752319, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 75 }, { "epoch": 1.1098241098241097, "grad_norm": 30.5, "learning_rate": 2.4695589629950063e-05, "log_odds_chosen": 0.6145213842391968, "log_odds_ratio": -0.5515514612197876, "logps/chosen": -0.5365720391273499, "logps/rejected": -0.7887281179428101, "loss": 26.4267, "nll_loss": 0.8258360028266907, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 80 }, { "epoch": 1.1784641784641785, "grad_norm": 27.75, "learning_rate": 2.1661407042015055e-05, "log_odds_chosen": 0.6053102016448975, "log_odds_ratio": -0.5604658126831055, "logps/chosen": -0.5426899790763855, "logps/rejected": -0.7706879377365112, "loss": 26.4214, "nll_loss": 0.8256683349609375, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 85 }, { "epoch": 1.247104247104247, "grad_norm": 28.125, "learning_rate": 1.867666544020798e-05, "log_odds_chosen": 0.6666785478591919, "log_odds_ratio": -0.5315438508987427, "logps/chosen": -0.49580830335617065, "logps/rejected": -0.7630711793899536, "loss": 25.8852, "nll_loss": 0.808911919593811, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 90 }, { "epoch": 1.3157443157443156, "grad_norm": 30.375, "learning_rate": 1.5785565651040956e-05, "log_odds_chosen": 0.6142023801803589, "log_odds_ratio": -0.5481556057929993, "logps/chosen": -0.5331859588623047, "logps/rejected": -0.7694956660270691, "loss": 25.3047, "nll_loss": 0.7907715439796448, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 95 }, { "epoch": 1.3843843843843844, "grad_norm": 27.5, "learning_rate": 1.303092176606978e-05, "log_odds_chosen": 0.7389415502548218, "log_odds_ratio": -0.49965333938598633, "logps/chosen": -0.5179293751716614, "logps/rejected": -0.8284605145454407, "loss": 26.6145, "nll_loss": 0.8317017555236816, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 100 }, { "epoch": 1.4530244530244532, "grad_norm": 27.625, "learning_rate": 1.0453527111051184e-05, "log_odds_chosen": 0.7348083853721619, "log_odds_ratio": -0.517388641834259, "logps/chosen": -0.5103100538253784, "logps/rejected": -0.7935900688171387, "loss": 25.6598, "nll_loss": 0.8018671870231628, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 105 }, { "epoch": 1.5216645216645217, "grad_norm": 28.5, "learning_rate": 8.09155014047743e-06, "log_odds_chosen": 0.6674805879592896, "log_odds_ratio": -0.5333995819091797, "logps/chosen": -0.5412867665290833, "logps/rejected": -0.8199647068977356, "loss": 26.3129, "nll_loss": 0.8222776651382446, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 110 }, { "epoch": 1.5903045903045903, "grad_norm": 27.625, "learning_rate": 5.9799692036430485e-06, "log_odds_chosen": 0.5439990758895874, "log_odds_ratio": -0.5787786841392517, "logps/chosen": -0.5497544407844543, "logps/rejected": -0.7601868510246277, "loss": 26.8215, "nll_loss": 0.8381711840629578, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 115 }, { "epoch": 1.6589446589446588, "grad_norm": 28.875, "learning_rate": 4.150054552753055e-06, "log_odds_chosen": 0.6353312730789185, "log_odds_ratio": -0.5571193695068359, "logps/chosen": -0.5275944471359253, "logps/rejected": -0.7502118349075317, "loss": 26.0757, "nll_loss": 0.8148647546768188, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 120 }, { "epoch": 1.7275847275847276, "grad_norm": 28.75, "learning_rate": 2.6289052639777044e-06, "log_odds_chosen": 0.5769211053848267, "log_odds_ratio": -0.550868809223175, "logps/chosen": -0.5242232084274292, "logps/rejected": -0.7588512301445007, "loss": 25.6315, "nll_loss": 0.8009853363037109, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 125 }, { "epoch": 1.7962247962247964, "grad_norm": 33.75, "learning_rate": 1.439047929157139e-06, "log_odds_chosen": 0.689731776714325, "log_odds_ratio": -0.5165315866470337, "logps/chosen": -0.5240885019302368, "logps/rejected": -0.8086684942245483, "loss": 25.9948, "nll_loss": 0.812338650226593, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 130 }, { "epoch": 1.864864864864865, "grad_norm": 27.0, "learning_rate": 5.981030611018234e-07, "log_odds_chosen": 0.6484060883522034, "log_odds_ratio": -0.5438445806503296, "logps/chosen": -0.551119863986969, "logps/rejected": -0.7980315089225769, "loss": 27.2419, "nll_loss": 0.8513097763061523, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 135 }, { "epoch": 1.9335049335049335, "grad_norm": 28.625, "learning_rate": 1.1852415266910554e-07, "log_odds_chosen": 0.7377719879150391, "log_odds_ratio": -0.502840518951416, "logps/chosen": -0.5086525082588196, "logps/rejected": -0.8028281331062317, "loss": 24.634, "nll_loss": 0.7698137760162354, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 140 }, { "epoch": 1.9884169884169884, "step": 144, "total_flos": 0.0, "train_loss": 30.958674271901447, "train_runtime": 2313.4467, "train_samples_per_second": 4.03, "train_steps_per_second": 0.062 } ], "logging_steps": 5, "max_steps": 144, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }