|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9879931389365352, |
|
"eval_steps": 500, |
|
"global_step": 72, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0686106346483705, |
|
"grad_norm": 19.247772216796875, |
|
"learning_rate": 2.5e-07, |
|
"log_odds_chosen": 1.202063798904419, |
|
"log_odds_ratio": -0.3932521939277649, |
|
"logps/chosen": -0.4192759096622467, |
|
"logps/rejected": -0.8378359079360962, |
|
"loss": 11.1199, |
|
"nll_loss": 0.6684929728507996, |
|
"rewards/accuracies": 0.828125, |
|
"rewards/chosen": -0.020963797345757484, |
|
"rewards/margins": 0.020928001031279564, |
|
"rewards/rejected": -0.04189179837703705, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.137221269296741, |
|
"grad_norm": 16.6769962310791, |
|
"learning_rate": 4.996988640512931e-07, |
|
"log_odds_chosen": 1.2801547050476074, |
|
"log_odds_ratio": -0.3870637118816376, |
|
"logps/chosen": -0.39408302307128906, |
|
"logps/rejected": -0.8230869174003601, |
|
"loss": 10.5716, |
|
"nll_loss": 0.6343531608581543, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.019704151898622513, |
|
"rewards/margins": 0.021450195461511612, |
|
"rewards/rejected": -0.041154347360134125, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.2058319039451115, |
|
"grad_norm": 18.085586547851562, |
|
"learning_rate": 4.892350839330522e-07, |
|
"log_odds_chosen": 1.465559959411621, |
|
"log_odds_ratio": -0.3448963165283203, |
|
"logps/chosen": -0.3543047606945038, |
|
"logps/rejected": -0.8525373339653015, |
|
"loss": 9.6005, |
|
"nll_loss": 0.5749582052230835, |
|
"rewards/accuracies": 0.878125011920929, |
|
"rewards/chosen": -0.01771523989737034, |
|
"rewards/margins": 0.024911630898714066, |
|
"rewards/rejected": -0.04262687265872955, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.274442538593482, |
|
"grad_norm": 15.663309097290039, |
|
"learning_rate": 4.64432152500068e-07, |
|
"log_odds_chosen": 1.7299188375473022, |
|
"log_odds_ratio": -0.30832645297050476, |
|
"logps/chosen": -0.3413788974285126, |
|
"logps/rejected": -0.897726833820343, |
|
"loss": 8.7406, |
|
"nll_loss": 0.5222011804580688, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.017068946734070778, |
|
"rewards/margins": 0.027817392721772194, |
|
"rewards/rejected": -0.04488633945584297, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.34305317324185247, |
|
"grad_norm": 18.158308029174805, |
|
"learning_rate": 4.2677669529663686e-07, |
|
"log_odds_chosen": 1.4837188720703125, |
|
"log_odds_ratio": -0.3287079334259033, |
|
"logps/chosen": -0.3441501259803772, |
|
"logps/rejected": -0.8242276906967163, |
|
"loss": 9.7102, |
|
"nll_loss": 0.5818053483963013, |
|
"rewards/accuracies": 0.8968750238418579, |
|
"rewards/chosen": -0.01720750704407692, |
|
"rewards/margins": 0.024003881961107254, |
|
"rewards/rejected": -0.041211389005184174, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.411663807890223, |
|
"grad_norm": 16.503480911254883, |
|
"learning_rate": 3.7852568604830535e-07, |
|
"log_odds_chosen": 1.5456057786941528, |
|
"log_odds_ratio": -0.3088548183441162, |
|
"logps/chosen": -0.3460735082626343, |
|
"logps/rejected": -0.8717023134231567, |
|
"loss": 9.158, |
|
"nll_loss": 0.5480188727378845, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.017303675413131714, |
|
"rewards/margins": 0.026281436905264854, |
|
"rewards/rejected": -0.04358511418104172, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.48027444253859347, |
|
"grad_norm": 16.41570472717285, |
|
"learning_rate": 3.2257116931361555e-07, |
|
"log_odds_chosen": 1.6255455017089844, |
|
"log_odds_ratio": -0.2997768521308899, |
|
"logps/chosen": -0.3172294795513153, |
|
"logps/rejected": -0.8485333323478699, |
|
"loss": 8.8971, |
|
"nll_loss": 0.531952977180481, |
|
"rewards/accuracies": 0.903124988079071, |
|
"rewards/chosen": -0.015861475840210915, |
|
"rewards/margins": 0.026565194129943848, |
|
"rewards/rejected": -0.04242666810750961, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.548885077186964, |
|
"grad_norm": 16.295194625854492, |
|
"learning_rate": 2.6226691858185454e-07, |
|
"log_odds_chosen": 1.7775678634643555, |
|
"log_odds_ratio": -0.2819564938545227, |
|
"logps/chosen": -0.29198122024536133, |
|
"logps/rejected": -0.8509271740913391, |
|
"loss": 8.6232, |
|
"nll_loss": 0.5151973962783813, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.014599060639739037, |
|
"rewards/margins": 0.027947301045060158, |
|
"rewards/rejected": -0.042546361684799194, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6174957118353345, |
|
"grad_norm": 18.461917877197266, |
|
"learning_rate": 2.0122741949596793e-07, |
|
"log_odds_chosen": 1.683835744857788, |
|
"log_odds_ratio": -0.30903160572052, |
|
"logps/chosen": -0.31781280040740967, |
|
"logps/rejected": -0.8516524434089661, |
|
"loss": 8.6307, |
|
"nll_loss": 0.5150163769721985, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -0.015890639275312424, |
|
"rewards/margins": 0.02669198252260685, |
|
"rewards/rejected": -0.04258262366056442, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.6861063464837049, |
|
"grad_norm": 17.289121627807617, |
|
"learning_rate": 1.4311122664242953e-07, |
|
"log_odds_chosen": 1.7723395824432373, |
|
"log_odds_ratio": -0.28731250762939453, |
|
"logps/chosen": -0.31122511625289917, |
|
"logps/rejected": -0.876534640789032, |
|
"loss": 8.8422, |
|
"nll_loss": 0.5288792848587036, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.015561257489025593, |
|
"rewards/margins": 0.02826547622680664, |
|
"rewards/rejected": -0.04382672905921936, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.7547169811320755, |
|
"grad_norm": 17.134496688842773, |
|
"learning_rate": 9.140167895908865e-08, |
|
"log_odds_chosen": 1.7291252613067627, |
|
"log_odds_ratio": -0.28900426626205444, |
|
"logps/chosen": -0.3234766721725464, |
|
"logps/rejected": -0.8822552561759949, |
|
"loss": 8.9071, |
|
"nll_loss": 0.5326792001724243, |
|
"rewards/accuracies": 0.909375011920929, |
|
"rewards/chosen": -0.0161738358438015, |
|
"rewards/margins": 0.027938928455114365, |
|
"rewards/rejected": -0.04411276429891586, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.823327615780446, |
|
"grad_norm": 16.877124786376953, |
|
"learning_rate": 4.919811712983879e-08, |
|
"log_odds_chosen": 1.698340654373169, |
|
"log_odds_ratio": -0.29392507672309875, |
|
"logps/chosen": -0.3388321101665497, |
|
"logps/rejected": -0.8975754976272583, |
|
"loss": 9.0244, |
|
"nll_loss": 0.5402450561523438, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.016941606998443604, |
|
"rewards/margins": 0.02793716825544834, |
|
"rewards/rejected": -0.044878773391246796, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.8919382504288165, |
|
"grad_norm": 18.37203598022461, |
|
"learning_rate": 1.9030116872178314e-08, |
|
"log_odds_chosen": 1.82688307762146, |
|
"log_odds_ratio": -0.2703757882118225, |
|
"logps/chosen": -0.30324336886405945, |
|
"logps/rejected": -0.8961852788925171, |
|
"loss": 8.723, |
|
"nll_loss": 0.5218873023986816, |
|
"rewards/accuracies": 0.9156249761581421, |
|
"rewards/chosen": -0.015162169933319092, |
|
"rewards/margins": 0.029647093266248703, |
|
"rewards/rejected": -0.044809263199567795, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.9605488850771869, |
|
"grad_norm": 17.195167541503906, |
|
"learning_rate": 2.7058725088047464e-09, |
|
"log_odds_chosen": 1.7199735641479492, |
|
"log_odds_ratio": -0.28608742356300354, |
|
"logps/chosen": -0.32787787914276123, |
|
"logps/rejected": -0.8620197176933289, |
|
"loss": 9.1848, |
|
"nll_loss": 0.5500085949897766, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -0.01639389432966709, |
|
"rewards/margins": 0.02670709416270256, |
|
"rewards/rejected": -0.0431009940803051, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.9879931389365352, |
|
"step": 72, |
|
"total_flos": 0.0, |
|
"train_loss": 9.270023425420126, |
|
"train_runtime": 759.6158, |
|
"train_samples_per_second": 6.137, |
|
"train_steps_per_second": 0.095 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 72, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|