|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9884169884169884, |
|
"eval_steps": 500, |
|
"global_step": 144, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06864006864006864, |
|
"grad_norm": 117.0, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"log_odds_chosen": 0.04570715129375458, |
|
"log_odds_ratio": -0.7358157634735107, |
|
"logps/chosen": -0.9796704053878784, |
|
"logps/rejected": -1.0139487981796265, |
|
"loss": 48.3858, |
|
"nll_loss": 1.5120567083358765, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.13728013728013727, |
|
"grad_norm": 43.0, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"log_odds_chosen": 0.12318801879882812, |
|
"log_odds_ratio": -0.7144898176193237, |
|
"logps/chosen": -0.7773754000663757, |
|
"logps/rejected": -0.8364827036857605, |
|
"loss": 40.1791, |
|
"nll_loss": 1.2555955648422241, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.2059202059202059, |
|
"grad_norm": 42.5, |
|
"learning_rate": 5e-05, |
|
"log_odds_chosen": 0.1416315734386444, |
|
"log_odds_ratio": -0.7184887528419495, |
|
"logps/chosen": -0.7499162554740906, |
|
"logps/rejected": -0.8260188102722168, |
|
"loss": 37.8788, |
|
"nll_loss": 1.183712363243103, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.27456027456027454, |
|
"grad_norm": 36.0, |
|
"learning_rate": 4.981488838730066e-05, |
|
"log_odds_chosen": 0.1828576922416687, |
|
"log_odds_ratio": -0.7147494554519653, |
|
"logps/chosen": -0.7645934820175171, |
|
"logps/rejected": -0.8450287580490112, |
|
"loss": 36.3979, |
|
"nll_loss": 1.1374342441558838, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3432003432003432, |
|
"grad_norm": 40.5, |
|
"learning_rate": 4.926229485393513e-05, |
|
"log_odds_chosen": 0.1581808477640152, |
|
"log_odds_ratio": -0.7021191716194153, |
|
"logps/chosen": -0.7081555724143982, |
|
"logps/rejected": -0.7702702283859253, |
|
"loss": 35.2689, |
|
"nll_loss": 1.1021528244018555, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.4118404118404118, |
|
"grad_norm": 31.875, |
|
"learning_rate": 4.83504027183137e-05, |
|
"log_odds_chosen": 0.09541453421115875, |
|
"log_odds_ratio": -0.7330835461616516, |
|
"logps/chosen": -0.7472664713859558, |
|
"logps/rejected": -0.7989091277122498, |
|
"loss": 34.0556, |
|
"nll_loss": 1.0642387866973877, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.4804804804804805, |
|
"grad_norm": 31.5, |
|
"learning_rate": 4.709271612634298e-05, |
|
"log_odds_chosen": 0.09454293549060822, |
|
"log_odds_ratio": -0.7360396981239319, |
|
"logps/chosen": -0.7235242128372192, |
|
"logps/rejected": -0.7580782771110535, |
|
"loss": 34.7533, |
|
"nll_loss": 1.0860421657562256, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.5491205491205491, |
|
"grad_norm": 36.5, |
|
"learning_rate": 4.550786006948777e-05, |
|
"log_odds_chosen": 0.18448153138160706, |
|
"log_odds_ratio": -0.7088391184806824, |
|
"logps/chosen": -0.6769216060638428, |
|
"logps/rejected": -0.7544242143630981, |
|
"loss": 33.5798, |
|
"nll_loss": 1.0493675470352173, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6177606177606177, |
|
"grad_norm": 30.375, |
|
"learning_rate": 4.361930456859455e-05, |
|
"log_odds_chosen": 0.2148957997560501, |
|
"log_odds_ratio": -0.7018598318099976, |
|
"logps/chosen": -0.6819745898246765, |
|
"logps/rejected": -0.7681012153625488, |
|
"loss": 32.7666, |
|
"nll_loss": 1.0239557027816772, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.6864006864006864, |
|
"grad_norm": 29.375, |
|
"learning_rate": 4.14550171080187e-05, |
|
"log_odds_chosen": 0.1911373734474182, |
|
"log_odds_ratio": -0.7087734937667847, |
|
"logps/chosen": -0.699134349822998, |
|
"logps/rejected": -0.7818752527236938, |
|
"loss": 33.5611, |
|
"nll_loss": 1.0487844944000244, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.7550407550407551, |
|
"grad_norm": 32.5, |
|
"learning_rate": 3.9047048467134014e-05, |
|
"log_odds_chosen": 0.1783633977174759, |
|
"log_odds_ratio": -0.7113261818885803, |
|
"logps/chosen": -0.7090396881103516, |
|
"logps/rejected": -0.7853409051895142, |
|
"loss": 33.1202, |
|
"nll_loss": 1.035004734992981, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.8236808236808236, |
|
"grad_norm": 30.0, |
|
"learning_rate": 3.6431058082615964e-05, |
|
"log_odds_chosen": 0.18033447861671448, |
|
"log_odds_ratio": -0.7068595290184021, |
|
"logps/chosen": -0.7181381583213806, |
|
"logps/rejected": -0.7968712449073792, |
|
"loss": 33.0624, |
|
"nll_loss": 1.0332015752792358, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.8923208923208923, |
|
"grad_norm": 33.75, |
|
"learning_rate": 3.3645785970375284e-05, |
|
"log_odds_chosen": 0.15356490015983582, |
|
"log_odds_ratio": -0.7268341779708862, |
|
"logps/chosen": -0.6993820071220398, |
|
"logps/rejected": -0.779438853263855, |
|
"loss": 33.6264, |
|
"nll_loss": 1.050825834274292, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.960960960960961, |
|
"grad_norm": 29.0, |
|
"learning_rate": 3.0732479027412256e-05, |
|
"log_odds_chosen": 0.181875079870224, |
|
"log_odds_ratio": -0.7036765813827515, |
|
"logps/chosen": -0.6884555220603943, |
|
"logps/rejected": -0.763231635093689, |
|
"loss": 33.2219, |
|
"nll_loss": 1.0381855964660645, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.0411840411840412, |
|
"grad_norm": 24.5, |
|
"learning_rate": 2.7734280209446865e-05, |
|
"log_odds_chosen": 0.3063170909881592, |
|
"log_odds_ratio": -0.6562883853912354, |
|
"logps/chosen": -0.6133887767791748, |
|
"logps/rejected": -0.7328479290008545, |
|
"loss": 33.71, |
|
"nll_loss": 0.9260991811752319, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.1098241098241097, |
|
"grad_norm": 30.5, |
|
"learning_rate": 2.4695589629950063e-05, |
|
"log_odds_chosen": 0.6145213842391968, |
|
"log_odds_ratio": -0.5515514612197876, |
|
"logps/chosen": -0.5365720391273499, |
|
"logps/rejected": -0.7887281179428101, |
|
"loss": 26.4267, |
|
"nll_loss": 0.8258360028266907, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.1784641784641785, |
|
"grad_norm": 27.75, |
|
"learning_rate": 2.1661407042015055e-05, |
|
"log_odds_chosen": 0.6053102016448975, |
|
"log_odds_ratio": -0.5604658126831055, |
|
"logps/chosen": -0.5426899790763855, |
|
"logps/rejected": -0.7706879377365112, |
|
"loss": 26.4214, |
|
"nll_loss": 0.8256683349609375, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.247104247104247, |
|
"grad_norm": 28.125, |
|
"learning_rate": 1.867666544020798e-05, |
|
"log_odds_chosen": 0.6666785478591919, |
|
"log_odds_ratio": -0.5315438508987427, |
|
"logps/chosen": -0.49580830335617065, |
|
"logps/rejected": -0.7630711793899536, |
|
"loss": 25.8852, |
|
"nll_loss": 0.808911919593811, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.3157443157443156, |
|
"grad_norm": 30.375, |
|
"learning_rate": 1.5785565651040956e-05, |
|
"log_odds_chosen": 0.6142023801803589, |
|
"log_odds_ratio": -0.5481556057929993, |
|
"logps/chosen": -0.5331859588623047, |
|
"logps/rejected": -0.7694956660270691, |
|
"loss": 25.3047, |
|
"nll_loss": 0.7907715439796448, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.3843843843843844, |
|
"grad_norm": 27.5, |
|
"learning_rate": 1.303092176606978e-05, |
|
"log_odds_chosen": 0.7389415502548218, |
|
"log_odds_ratio": -0.49965333938598633, |
|
"logps/chosen": -0.5179293751716614, |
|
"logps/rejected": -0.8284605145454407, |
|
"loss": 26.6145, |
|
"nll_loss": 0.8317017555236816, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.4530244530244532, |
|
"grad_norm": 27.625, |
|
"learning_rate": 1.0453527111051184e-05, |
|
"log_odds_chosen": 0.7348083853721619, |
|
"log_odds_ratio": -0.517388641834259, |
|
"logps/chosen": -0.5103100538253784, |
|
"logps/rejected": -0.7935900688171387, |
|
"loss": 25.6598, |
|
"nll_loss": 0.8018671870231628, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.5216645216645217, |
|
"grad_norm": 28.5, |
|
"learning_rate": 8.09155014047743e-06, |
|
"log_odds_chosen": 0.6674805879592896, |
|
"log_odds_ratio": -0.5333995819091797, |
|
"logps/chosen": -0.5412867665290833, |
|
"logps/rejected": -0.8199647068977356, |
|
"loss": 26.3129, |
|
"nll_loss": 0.8222776651382446, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.5903045903045903, |
|
"grad_norm": 27.625, |
|
"learning_rate": 5.9799692036430485e-06, |
|
"log_odds_chosen": 0.5439990758895874, |
|
"log_odds_ratio": -0.5787786841392517, |
|
"logps/chosen": -0.5497544407844543, |
|
"logps/rejected": -0.7601868510246277, |
|
"loss": 26.8215, |
|
"nll_loss": 0.8381711840629578, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.6589446589446588, |
|
"grad_norm": 28.875, |
|
"learning_rate": 4.150054552753055e-06, |
|
"log_odds_chosen": 0.6353312730789185, |
|
"log_odds_ratio": -0.5571193695068359, |
|
"logps/chosen": -0.5275944471359253, |
|
"logps/rejected": -0.7502118349075317, |
|
"loss": 26.0757, |
|
"nll_loss": 0.8148647546768188, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.7275847275847276, |
|
"grad_norm": 28.75, |
|
"learning_rate": 2.6289052639777044e-06, |
|
"log_odds_chosen": 0.5769211053848267, |
|
"log_odds_ratio": -0.550868809223175, |
|
"logps/chosen": -0.5242232084274292, |
|
"logps/rejected": -0.7588512301445007, |
|
"loss": 25.6315, |
|
"nll_loss": 0.8009853363037109, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.7962247962247964, |
|
"grad_norm": 33.75, |
|
"learning_rate": 1.439047929157139e-06, |
|
"log_odds_chosen": 0.689731776714325, |
|
"log_odds_ratio": -0.5165315866470337, |
|
"logps/chosen": -0.5240885019302368, |
|
"logps/rejected": -0.8086684942245483, |
|
"loss": 25.9948, |
|
"nll_loss": 0.812338650226593, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.864864864864865, |
|
"grad_norm": 27.0, |
|
"learning_rate": 5.981030611018234e-07, |
|
"log_odds_chosen": 0.6484060883522034, |
|
"log_odds_ratio": -0.5438445806503296, |
|
"logps/chosen": -0.551119863986969, |
|
"logps/rejected": -0.7980315089225769, |
|
"loss": 27.2419, |
|
"nll_loss": 0.8513097763061523, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.9335049335049335, |
|
"grad_norm": 28.625, |
|
"learning_rate": 1.1852415266910554e-07, |
|
"log_odds_chosen": 0.7377719879150391, |
|
"log_odds_ratio": -0.502840518951416, |
|
"logps/chosen": -0.5086525082588196, |
|
"logps/rejected": -0.8028281331062317, |
|
"loss": 24.634, |
|
"nll_loss": 0.7698137760162354, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.9884169884169884, |
|
"step": 144, |
|
"total_flos": 0.0, |
|
"train_loss": 30.958674271901447, |
|
"train_runtime": 2313.4467, |
|
"train_samples_per_second": 4.03, |
|
"train_steps_per_second": 0.062 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 144, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|