silviasapora's picture
Model save
03a7947 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9884169884169884,
"eval_steps": 500,
"global_step": 144,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06864006864006864,
"grad_norm": 117.0,
"learning_rate": 1.6666666666666667e-05,
"log_odds_chosen": 0.04570715129375458,
"log_odds_ratio": -0.7358157634735107,
"logps/chosen": -0.9796704053878784,
"logps/rejected": -1.0139487981796265,
"loss": 48.3858,
"nll_loss": 1.5120567083358765,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 5
},
{
"epoch": 0.13728013728013727,
"grad_norm": 43.0,
"learning_rate": 3.3333333333333335e-05,
"log_odds_chosen": 0.12318801879882812,
"log_odds_ratio": -0.7144898176193237,
"logps/chosen": -0.7773754000663757,
"logps/rejected": -0.8364827036857605,
"loss": 40.1791,
"nll_loss": 1.2555955648422241,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 10
},
{
"epoch": 0.2059202059202059,
"grad_norm": 42.5,
"learning_rate": 5e-05,
"log_odds_chosen": 0.1416315734386444,
"log_odds_ratio": -0.7184887528419495,
"logps/chosen": -0.7499162554740906,
"logps/rejected": -0.8260188102722168,
"loss": 37.8788,
"nll_loss": 1.183712363243103,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 15
},
{
"epoch": 0.27456027456027454,
"grad_norm": 36.0,
"learning_rate": 4.981488838730066e-05,
"log_odds_chosen": 0.1828576922416687,
"log_odds_ratio": -0.7147494554519653,
"logps/chosen": -0.7645934820175171,
"logps/rejected": -0.8450287580490112,
"loss": 36.3979,
"nll_loss": 1.1374342441558838,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 20
},
{
"epoch": 0.3432003432003432,
"grad_norm": 40.5,
"learning_rate": 4.926229485393513e-05,
"log_odds_chosen": 0.1581808477640152,
"log_odds_ratio": -0.7021191716194153,
"logps/chosen": -0.7081555724143982,
"logps/rejected": -0.7702702283859253,
"loss": 35.2689,
"nll_loss": 1.1021528244018555,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 25
},
{
"epoch": 0.4118404118404118,
"grad_norm": 31.875,
"learning_rate": 4.83504027183137e-05,
"log_odds_chosen": 0.09541453421115875,
"log_odds_ratio": -0.7330835461616516,
"logps/chosen": -0.7472664713859558,
"logps/rejected": -0.7989091277122498,
"loss": 34.0556,
"nll_loss": 1.0642387866973877,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 30
},
{
"epoch": 0.4804804804804805,
"grad_norm": 31.5,
"learning_rate": 4.709271612634298e-05,
"log_odds_chosen": 0.09454293549060822,
"log_odds_ratio": -0.7360396981239319,
"logps/chosen": -0.7235242128372192,
"logps/rejected": -0.7580782771110535,
"loss": 34.7533,
"nll_loss": 1.0860421657562256,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 35
},
{
"epoch": 0.5491205491205491,
"grad_norm": 36.5,
"learning_rate": 4.550786006948777e-05,
"log_odds_chosen": 0.18448153138160706,
"log_odds_ratio": -0.7088391184806824,
"logps/chosen": -0.6769216060638428,
"logps/rejected": -0.7544242143630981,
"loss": 33.5798,
"nll_loss": 1.0493675470352173,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 40
},
{
"epoch": 0.6177606177606177,
"grad_norm": 30.375,
"learning_rate": 4.361930456859455e-05,
"log_odds_chosen": 0.2148957997560501,
"log_odds_ratio": -0.7018598318099976,
"logps/chosen": -0.6819745898246765,
"logps/rejected": -0.7681012153625488,
"loss": 32.7666,
"nll_loss": 1.0239557027816772,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 45
},
{
"epoch": 0.6864006864006864,
"grad_norm": 29.375,
"learning_rate": 4.14550171080187e-05,
"log_odds_chosen": 0.1911373734474182,
"log_odds_ratio": -0.7087734937667847,
"logps/chosen": -0.699134349822998,
"logps/rejected": -0.7818752527236938,
"loss": 33.5611,
"nll_loss": 1.0487844944000244,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 50
},
{
"epoch": 0.7550407550407551,
"grad_norm": 32.5,
"learning_rate": 3.9047048467134014e-05,
"log_odds_chosen": 0.1783633977174759,
"log_odds_ratio": -0.7113261818885803,
"logps/chosen": -0.7090396881103516,
"logps/rejected": -0.7853409051895142,
"loss": 33.1202,
"nll_loss": 1.035004734992981,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 55
},
{
"epoch": 0.8236808236808236,
"grad_norm": 30.0,
"learning_rate": 3.6431058082615964e-05,
"log_odds_chosen": 0.18033447861671448,
"log_odds_ratio": -0.7068595290184021,
"logps/chosen": -0.7181381583213806,
"logps/rejected": -0.7968712449073792,
"loss": 33.0624,
"nll_loss": 1.0332015752792358,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 60
},
{
"epoch": 0.8923208923208923,
"grad_norm": 33.75,
"learning_rate": 3.3645785970375284e-05,
"log_odds_chosen": 0.15356490015983582,
"log_odds_ratio": -0.7268341779708862,
"logps/chosen": -0.6993820071220398,
"logps/rejected": -0.779438853263855,
"loss": 33.6264,
"nll_loss": 1.050825834274292,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 65
},
{
"epoch": 0.960960960960961,
"grad_norm": 29.0,
"learning_rate": 3.0732479027412256e-05,
"log_odds_chosen": 0.181875079870224,
"log_odds_ratio": -0.7036765813827515,
"logps/chosen": -0.6884555220603943,
"logps/rejected": -0.763231635093689,
"loss": 33.2219,
"nll_loss": 1.0381855964660645,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 70
},
{
"epoch": 1.0411840411840412,
"grad_norm": 24.5,
"learning_rate": 2.7734280209446865e-05,
"log_odds_chosen": 0.3063170909881592,
"log_odds_ratio": -0.6562883853912354,
"logps/chosen": -0.6133887767791748,
"logps/rejected": -0.7328479290008545,
"loss": 33.71,
"nll_loss": 0.9260991811752319,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 75
},
{
"epoch": 1.1098241098241097,
"grad_norm": 30.5,
"learning_rate": 2.4695589629950063e-05,
"log_odds_chosen": 0.6145213842391968,
"log_odds_ratio": -0.5515514612197876,
"logps/chosen": -0.5365720391273499,
"logps/rejected": -0.7887281179428101,
"loss": 26.4267,
"nll_loss": 0.8258360028266907,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 80
},
{
"epoch": 1.1784641784641785,
"grad_norm": 27.75,
"learning_rate": 2.1661407042015055e-05,
"log_odds_chosen": 0.6053102016448975,
"log_odds_ratio": -0.5604658126831055,
"logps/chosen": -0.5426899790763855,
"logps/rejected": -0.7706879377365112,
"loss": 26.4214,
"nll_loss": 0.8256683349609375,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 85
},
{
"epoch": 1.247104247104247,
"grad_norm": 28.125,
"learning_rate": 1.867666544020798e-05,
"log_odds_chosen": 0.6666785478591919,
"log_odds_ratio": -0.5315438508987427,
"logps/chosen": -0.49580830335617065,
"logps/rejected": -0.7630711793899536,
"loss": 25.8852,
"nll_loss": 0.808911919593811,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 90
},
{
"epoch": 1.3157443157443156,
"grad_norm": 30.375,
"learning_rate": 1.5785565651040956e-05,
"log_odds_chosen": 0.6142023801803589,
"log_odds_ratio": -0.5481556057929993,
"logps/chosen": -0.5331859588623047,
"logps/rejected": -0.7694956660270691,
"loss": 25.3047,
"nll_loss": 0.7907715439796448,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 95
},
{
"epoch": 1.3843843843843844,
"grad_norm": 27.5,
"learning_rate": 1.303092176606978e-05,
"log_odds_chosen": 0.7389415502548218,
"log_odds_ratio": -0.49965333938598633,
"logps/chosen": -0.5179293751716614,
"logps/rejected": -0.8284605145454407,
"loss": 26.6145,
"nll_loss": 0.8317017555236816,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 100
},
{
"epoch": 1.4530244530244532,
"grad_norm": 27.625,
"learning_rate": 1.0453527111051184e-05,
"log_odds_chosen": 0.7348083853721619,
"log_odds_ratio": -0.517388641834259,
"logps/chosen": -0.5103100538253784,
"logps/rejected": -0.7935900688171387,
"loss": 25.6598,
"nll_loss": 0.8018671870231628,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 105
},
{
"epoch": 1.5216645216645217,
"grad_norm": 28.5,
"learning_rate": 8.09155014047743e-06,
"log_odds_chosen": 0.6674805879592896,
"log_odds_ratio": -0.5333995819091797,
"logps/chosen": -0.5412867665290833,
"logps/rejected": -0.8199647068977356,
"loss": 26.3129,
"nll_loss": 0.8222776651382446,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 110
},
{
"epoch": 1.5903045903045903,
"grad_norm": 27.625,
"learning_rate": 5.9799692036430485e-06,
"log_odds_chosen": 0.5439990758895874,
"log_odds_ratio": -0.5787786841392517,
"logps/chosen": -0.5497544407844543,
"logps/rejected": -0.7601868510246277,
"loss": 26.8215,
"nll_loss": 0.8381711840629578,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 115
},
{
"epoch": 1.6589446589446588,
"grad_norm": 28.875,
"learning_rate": 4.150054552753055e-06,
"log_odds_chosen": 0.6353312730789185,
"log_odds_ratio": -0.5571193695068359,
"logps/chosen": -0.5275944471359253,
"logps/rejected": -0.7502118349075317,
"loss": 26.0757,
"nll_loss": 0.8148647546768188,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 120
},
{
"epoch": 1.7275847275847276,
"grad_norm": 28.75,
"learning_rate": 2.6289052639777044e-06,
"log_odds_chosen": 0.5769211053848267,
"log_odds_ratio": -0.550868809223175,
"logps/chosen": -0.5242232084274292,
"logps/rejected": -0.7588512301445007,
"loss": 25.6315,
"nll_loss": 0.8009853363037109,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 125
},
{
"epoch": 1.7962247962247964,
"grad_norm": 33.75,
"learning_rate": 1.439047929157139e-06,
"log_odds_chosen": 0.689731776714325,
"log_odds_ratio": -0.5165315866470337,
"logps/chosen": -0.5240885019302368,
"logps/rejected": -0.8086684942245483,
"loss": 25.9948,
"nll_loss": 0.812338650226593,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 130
},
{
"epoch": 1.864864864864865,
"grad_norm": 27.0,
"learning_rate": 5.981030611018234e-07,
"log_odds_chosen": 0.6484060883522034,
"log_odds_ratio": -0.5438445806503296,
"logps/chosen": -0.551119863986969,
"logps/rejected": -0.7980315089225769,
"loss": 27.2419,
"nll_loss": 0.8513097763061523,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 135
},
{
"epoch": 1.9335049335049335,
"grad_norm": 28.625,
"learning_rate": 1.1852415266910554e-07,
"log_odds_chosen": 0.7377719879150391,
"log_odds_ratio": -0.502840518951416,
"logps/chosen": -0.5086525082588196,
"logps/rejected": -0.8028281331062317,
"loss": 24.634,
"nll_loss": 0.7698137760162354,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 140
},
{
"epoch": 1.9884169884169884,
"step": 144,
"total_flos": 0.0,
"train_loss": 30.958674271901447,
"train_runtime": 2313.4467,
"train_samples_per_second": 4.03,
"train_steps_per_second": 0.062
}
],
"logging_steps": 5,
"max_steps": 144,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}