silviasapora's picture
Model save
65abb0d verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9879931389365352,
"eval_steps": 500,
"global_step": 72,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0686106346483705,
"grad_norm": 19.247772216796875,
"learning_rate": 2.5e-07,
"log_odds_chosen": 1.202063798904419,
"log_odds_ratio": -0.3932521939277649,
"logps/chosen": -0.4192759096622467,
"logps/rejected": -0.8378359079360962,
"loss": 11.1199,
"nll_loss": 0.6684929728507996,
"rewards/accuracies": 0.828125,
"rewards/chosen": -0.020963797345757484,
"rewards/margins": 0.020928001031279564,
"rewards/rejected": -0.04189179837703705,
"step": 5
},
{
"epoch": 0.137221269296741,
"grad_norm": 16.6769962310791,
"learning_rate": 4.996988640512931e-07,
"log_odds_chosen": 1.2801547050476074,
"log_odds_ratio": -0.3870637118816376,
"logps/chosen": -0.39408302307128906,
"logps/rejected": -0.8230869174003601,
"loss": 10.5716,
"nll_loss": 0.6343531608581543,
"rewards/accuracies": 0.84375,
"rewards/chosen": -0.019704151898622513,
"rewards/margins": 0.021450195461511612,
"rewards/rejected": -0.041154347360134125,
"step": 10
},
{
"epoch": 0.2058319039451115,
"grad_norm": 18.085586547851562,
"learning_rate": 4.892350839330522e-07,
"log_odds_chosen": 1.465559959411621,
"log_odds_ratio": -0.3448963165283203,
"logps/chosen": -0.3543047606945038,
"logps/rejected": -0.8525373339653015,
"loss": 9.6005,
"nll_loss": 0.5749582052230835,
"rewards/accuracies": 0.878125011920929,
"rewards/chosen": -0.01771523989737034,
"rewards/margins": 0.024911630898714066,
"rewards/rejected": -0.04262687265872955,
"step": 15
},
{
"epoch": 0.274442538593482,
"grad_norm": 15.663309097290039,
"learning_rate": 4.64432152500068e-07,
"log_odds_chosen": 1.7299188375473022,
"log_odds_ratio": -0.30832645297050476,
"logps/chosen": -0.3413788974285126,
"logps/rejected": -0.897726833820343,
"loss": 8.7406,
"nll_loss": 0.5222011804580688,
"rewards/accuracies": 0.887499988079071,
"rewards/chosen": -0.017068946734070778,
"rewards/margins": 0.027817392721772194,
"rewards/rejected": -0.04488633945584297,
"step": 20
},
{
"epoch": 0.34305317324185247,
"grad_norm": 18.158308029174805,
"learning_rate": 4.2677669529663686e-07,
"log_odds_chosen": 1.4837188720703125,
"log_odds_ratio": -0.3287079334259033,
"logps/chosen": -0.3441501259803772,
"logps/rejected": -0.8242276906967163,
"loss": 9.7102,
"nll_loss": 0.5818053483963013,
"rewards/accuracies": 0.8968750238418579,
"rewards/chosen": -0.01720750704407692,
"rewards/margins": 0.024003881961107254,
"rewards/rejected": -0.041211389005184174,
"step": 25
},
{
"epoch": 0.411663807890223,
"grad_norm": 16.503480911254883,
"learning_rate": 3.7852568604830535e-07,
"log_odds_chosen": 1.5456057786941528,
"log_odds_ratio": -0.3088548183441162,
"logps/chosen": -0.3460735082626343,
"logps/rejected": -0.8717023134231567,
"loss": 9.158,
"nll_loss": 0.5480188727378845,
"rewards/accuracies": 0.887499988079071,
"rewards/chosen": -0.017303675413131714,
"rewards/margins": 0.026281436905264854,
"rewards/rejected": -0.04358511418104172,
"step": 30
},
{
"epoch": 0.48027444253859347,
"grad_norm": 16.41570472717285,
"learning_rate": 3.2257116931361555e-07,
"log_odds_chosen": 1.6255455017089844,
"log_odds_ratio": -0.2997768521308899,
"logps/chosen": -0.3172294795513153,
"logps/rejected": -0.8485333323478699,
"loss": 8.8971,
"nll_loss": 0.531952977180481,
"rewards/accuracies": 0.903124988079071,
"rewards/chosen": -0.015861475840210915,
"rewards/margins": 0.026565194129943848,
"rewards/rejected": -0.04242666810750961,
"step": 35
},
{
"epoch": 0.548885077186964,
"grad_norm": 16.295194625854492,
"learning_rate": 2.6226691858185454e-07,
"log_odds_chosen": 1.7775678634643555,
"log_odds_ratio": -0.2819564938545227,
"logps/chosen": -0.29198122024536133,
"logps/rejected": -0.8509271740913391,
"loss": 8.6232,
"nll_loss": 0.5151973962783813,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": -0.014599060639739037,
"rewards/margins": 0.027947301045060158,
"rewards/rejected": -0.042546361684799194,
"step": 40
},
{
"epoch": 0.6174957118353345,
"grad_norm": 18.461917877197266,
"learning_rate": 2.0122741949596793e-07,
"log_odds_chosen": 1.683835744857788,
"log_odds_ratio": -0.30903160572052,
"logps/chosen": -0.31781280040740967,
"logps/rejected": -0.8516524434089661,
"loss": 8.6307,
"nll_loss": 0.5150163769721985,
"rewards/accuracies": 0.8812500238418579,
"rewards/chosen": -0.015890639275312424,
"rewards/margins": 0.02669198252260685,
"rewards/rejected": -0.04258262366056442,
"step": 45
},
{
"epoch": 0.6861063464837049,
"grad_norm": 17.289121627807617,
"learning_rate": 1.4311122664242953e-07,
"log_odds_chosen": 1.7723395824432373,
"log_odds_ratio": -0.28731250762939453,
"logps/chosen": -0.31122511625289917,
"logps/rejected": -0.876534640789032,
"loss": 8.8422,
"nll_loss": 0.5288792848587036,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.015561257489025593,
"rewards/margins": 0.02826547622680664,
"rewards/rejected": -0.04382672905921936,
"step": 50
},
{
"epoch": 0.7547169811320755,
"grad_norm": 17.134496688842773,
"learning_rate": 9.140167895908865e-08,
"log_odds_chosen": 1.7291252613067627,
"log_odds_ratio": -0.28900426626205444,
"logps/chosen": -0.3234766721725464,
"logps/rejected": -0.8822552561759949,
"loss": 8.9071,
"nll_loss": 0.5326792001724243,
"rewards/accuracies": 0.909375011920929,
"rewards/chosen": -0.0161738358438015,
"rewards/margins": 0.027938928455114365,
"rewards/rejected": -0.04411276429891586,
"step": 55
},
{
"epoch": 0.823327615780446,
"grad_norm": 16.877124786376953,
"learning_rate": 4.919811712983879e-08,
"log_odds_chosen": 1.698340654373169,
"log_odds_ratio": -0.29392507672309875,
"logps/chosen": -0.3388321101665497,
"logps/rejected": -0.8975754976272583,
"loss": 9.0244,
"nll_loss": 0.5402450561523438,
"rewards/accuracies": 0.893750011920929,
"rewards/chosen": -0.016941606998443604,
"rewards/margins": 0.02793716825544834,
"rewards/rejected": -0.044878773391246796,
"step": 60
},
{
"epoch": 0.8919382504288165,
"grad_norm": 18.37203598022461,
"learning_rate": 1.9030116872178314e-08,
"log_odds_chosen": 1.82688307762146,
"log_odds_ratio": -0.2703757882118225,
"logps/chosen": -0.30324336886405945,
"logps/rejected": -0.8961852788925171,
"loss": 8.723,
"nll_loss": 0.5218873023986816,
"rewards/accuracies": 0.9156249761581421,
"rewards/chosen": -0.015162169933319092,
"rewards/margins": 0.029647093266248703,
"rewards/rejected": -0.044809263199567795,
"step": 65
},
{
"epoch": 0.9605488850771869,
"grad_norm": 17.195167541503906,
"learning_rate": 2.7058725088047464e-09,
"log_odds_chosen": 1.7199735641479492,
"log_odds_ratio": -0.28608742356300354,
"logps/chosen": -0.32787787914276123,
"logps/rejected": -0.8620197176933289,
"loss": 9.1848,
"nll_loss": 0.5500085949897766,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": -0.01639389432966709,
"rewards/margins": 0.02670709416270256,
"rewards/rejected": -0.0431009940803051,
"step": 70
},
{
"epoch": 0.9879931389365352,
"step": 72,
"total_flos": 0.0,
"train_loss": 9.270023425420126,
"train_runtime": 759.6158,
"train_samples_per_second": 6.137,
"train_steps_per_second": 0.095
}
],
"logging_steps": 5,
"max_steps": 72,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}